Compare commits

..

15 Commits

Author SHA1 Message Date
Wu Tianwei
928e993084 feat: Sync vibe-workflow (#32047)
Signed-off-by: majiayu000 <1835304752@qq.com>
Co-authored-by: Xiyuan Chen <52963600+GareArc@users.noreply.github.com>
Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
Co-authored-by: Stephen Zhou <38493346+hyoban@users.noreply.github.com>
Co-authored-by: wangxiaolei <fatelei@gmail.com>
Co-authored-by: Joel <iamjoel007@gmail.com>
Co-authored-by: lif <1835304752@qq.com>
Co-authored-by: Stream <Stream_2@qq.com>
Co-authored-by: Junyan Qin <rockchinq@gmail.com>
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
Co-authored-by: QuantumGhost <obelisk.reg+git@gmail.com>
Co-authored-by: 99 <wh2099@pm.me>
Co-authored-by: Ryan <longwei.llw@gmail.com>
Co-authored-by: Longwei Liu <longweiliu@LongweideMacBook-Air.local>
Co-authored-by: longbingljw <longbing.ljw@oceanbase.com>
2026-02-06 15:19:09 +08:00
qiuqiua
559547e9bf feat: cherry pick implementation (#31960) 2026-02-05 10:00:47 +08:00
yyh
365f749ed5 fix: remove staleTime/gcTime overrides from trigger query hooks and use orpc contract (#31863)
Some checks failed
autofix.ci / autofix (push) Has been cancelled
Build and Push API & Web / build (api, DIFY_API_IMAGE_NAME, linux/amd64, build-api-amd64) (push) Has been cancelled
Build and Push API & Web / build (api, DIFY_API_IMAGE_NAME, linux/arm64, build-api-arm64) (push) Has been cancelled
Build and Push API & Web / build (web, DIFY_WEB_IMAGE_NAME, linux/amd64, build-web-amd64) (push) Has been cancelled
Build and Push API & Web / build (web, DIFY_WEB_IMAGE_NAME, linux/arm64, build-web-arm64) (push) Has been cancelled
Build and Push API & Web / create-manifest (api, DIFY_API_IMAGE_NAME, merge-api-images) (push) Has been cancelled
Build and Push API & Web / create-manifest (web, DIFY_WEB_IMAGE_NAME, merge-web-images) (push) Has been cancelled
Main CI Pipeline / Check Changed Files (push) Has been cancelled
Main CI Pipeline / API Tests (push) Has been cancelled
Main CI Pipeline / Web Tests (push) Has been cancelled
Main CI Pipeline / Style Check (push) Has been cancelled
Main CI Pipeline / VDB Tests (push) Has been cancelled
Main CI Pipeline / DB Migration Test (push) Has been cancelled
Mark stale issues and pull requests / stale (push) Has been cancelled
2026-02-04 19:33:32 +08:00
wangxiaolei
f686197589 feat: use latest hash to sync draft (#31924) 2026-02-04 19:32:36 +08:00
Coding On Star
f584be9cf0 chore: update CODEOWNERS to specify test file patterns for base components (#31941)
Co-authored-by: CodingOnStar <hanxujiang@dify.com>
2026-02-04 19:29:57 +08:00
QuantumGhost
3bd228ddb7 chore: bump version in docker-compose and package manager to 1.12.1 (#31947) 2026-02-04 19:29:28 +08:00
wangxiaolei
0dfa59b1db fix: fix delete_draft_variables_batch cycle forever (#31934)
Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
2026-02-04 19:10:27 +08:00
Coding On Star
1e344f773b refactor(web): extract complex components into modular structure with comprehensive tests (#31729)
Co-authored-by: CodingOnStar <hanxujiang@dify.com>
Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-04 18:35:31 +08:00
-LAN-
bba2040a05 chore: assign code owners for test directories (#31940) 2026-02-04 18:22:14 +08:00
Coding On Star
ad3be1e4d0 fix: include locale in appList query key for localization support inuseExploreAppList (#31921)
Co-authored-by: CodingOnStar <hanxujiang@dify.com>
2026-02-04 18:12:30 +08:00
Coding On Star
297dd832aa refactor(datasets): extract hooks and components with comprehensive tests (#31707)
Co-authored-by: CodingOnStar <hanxujiang@dify.com>
Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-04 18:12:17 +08:00
zxhlyh
cc5705cb71 fix: auto summary env (#31930) 2026-02-04 17:47:38 +08:00
wangxiaolei
74b027c41a fix: fix mcp output schema is union type frontend crash (#31779)
Co-authored-by: Stephen Zhou <38493346+hyoban@users.noreply.github.com>
2026-02-04 17:33:41 +08:00
Stephen Zhou
5f69470ebf test: try fix test, clear test log in CI (#31912) 2026-02-04 17:05:15 +08:00
wangxiaolei
ec7ccd800c fix: fix mcp server status is not right (#31826)
Co-authored-by: Stephen Zhou <38493346+hyoban@users.noreply.github.com>
2026-02-04 16:55:12 +08:00
224 changed files with 29824 additions and 3771 deletions

View File

@@ -1 +0,0 @@
../../.agents/skills/component-refactoring

View File

@@ -1 +0,0 @@
../../.agents/skills/frontend-code-review

View File

@@ -1 +0,0 @@
../../.agents/skills/frontend-testing

View File

@@ -1 +0,0 @@
../../.agents/skills/orpc-contract-first

7
.github/CODEOWNERS vendored
View File

@@ -24,6 +24,10 @@
/api/services/tools/mcp_tools_manage_service.py @Nov1c444
/api/controllers/mcp/ @Nov1c444
/api/controllers/console/app/mcp_server.py @Nov1c444
# Backend - Tests
/api/tests/ @laipz8200 @QuantumGhost
/api/tests/**/*mcp* @Nov1c444
# Backend - Workflow - Engine (Core graph execution engine)
@@ -234,6 +238,9 @@
# Frontend - Base Components
/web/app/components/base/ @iamjoel @zxhlyh
# Frontend - Base Components Tests
/web/app/components/base/**/*.spec.tsx @hyoban @CodingOnStar
# Frontend - Utils and Hooks
/web/utils/classnames.ts @iamjoel @zxhlyh
/web/utils/time.ts @iamjoel @zxhlyh

View File

@@ -79,29 +79,6 @@ jobs:
find . -name "*.py" -type f -exec sed -i.bak -E 's/"([^"]+)" \| None/Optional["\1"]/g; s/'"'"'([^'"'"']+)'"'"' \| None/Optional['"'"'\1'"'"']/g' {} \;
find . -name "*.py.bak" -type f -delete
- name: Install pnpm
uses: pnpm/action-setup@v4
with:
package_json_file: web/package.json
run_install: false
- name: Setup Node.js
uses: actions/setup-node@v6
with:
node-version: 24
cache: pnpm
cache-dependency-path: ./web/pnpm-lock.yaml
- name: Install web dependencies
run: |
cd web
pnpm install --frozen-lockfile
- name: ESLint autofix
run: |
cd web
pnpm lint:fix || true
# mdformat breaks YAML front matter in markdown files. Add --exclude for directories containing YAML front matter.
- name: mdformat
run: |

View File

@@ -39,7 +39,7 @@ jobs:
run: pnpm install --frozen-lockfile
- name: Run tests
run: pnpm test:coverage
run: pnpm test:ci
- name: Coverage Summary
if: always()

View File

@@ -102,6 +102,8 @@ forbidden_modules =
core.trigger
core.variables
ignore_imports =
core.workflow.nodes.agent.agent_node -> core.db.session_factory
core.workflow.nodes.agent.agent_node -> models.tools
core.workflow.nodes.loop.loop_node -> core.app.workflow.node_factory
core.workflow.graph_engine.command_channels.redis_channel -> extensions.ext_redis
core.workflow.workflow_entry -> core.app.workflow.layers.observability

View File

@@ -739,8 +739,10 @@ def upgrade_db():
click.echo(click.style("Database migration successful!", fg="green"))
except Exception:
except Exception as e:
logger.exception("Failed to execute database migration")
click.echo(click.style(f"Database migration failed: {e}", fg="red"))
raise SystemExit(1)
finally:
lock.release()
else:

View File

@@ -1,3 +1,4 @@
import logging
import uuid
from datetime import datetime
from typing import Any, Literal, TypeAlias
@@ -54,6 +55,8 @@ ALLOW_CREATE_APP_MODES = ["chat", "agent-chat", "advanced-chat", "workflow", "co
register_enum_models(console_ns, IconType)
_logger = logging.getLogger(__name__)
class AppListQuery(BaseModel):
page: int = Field(default=1, ge=1, le=99999, description="Page number (1-99999)")
@@ -499,6 +502,7 @@ class AppListApi(Resource):
select(Workflow).where(
Workflow.version == Workflow.VERSION_DRAFT,
Workflow.app_id.in_(workflow_capable_app_ids),
Workflow.tenant_id == current_tenant_id,
)
)
.scalars()
@@ -510,12 +514,14 @@ class AppListApi(Resource):
NodeType.TRIGGER_PLUGIN,
}
for workflow in draft_workflows:
node_id = None
try:
for _, node_data in workflow.walk_nodes():
for node_id, node_data in workflow.walk_nodes():
if node_data.get("type") in trigger_node_types:
draft_trigger_app_ids.add(str(workflow.app_id))
break
except Exception:
_logger.exception("error while walking nodes, workflow_id=%s, node_id=%s", workflow.id, node_id)
continue
for app in app_pagination.items:

View File

@@ -1,8 +1,13 @@
import logging
from collections.abc import Sequence
from typing import Any
from flask_restx import Resource
from pydantic import BaseModel, Field
logger = logging.getLogger(__name__)
from controllers.console import console_ns
from controllers.console.app.error import (
CompletionRequestError,
@@ -19,6 +24,7 @@ from core.helper.code_executor.python3.python3_code_provider import Python3CodeP
from core.llm_generator.entities import RuleCodeGeneratePayload, RuleGeneratePayload, RuleStructuredOutputPayload
from core.llm_generator.llm_generator import LLMGenerator
from core.model_runtime.errors.invoke import InvokeError
from core.workflow.generator import WorkflowGenerator
from extensions.ext_database import db
from libs.login import current_account_with_tenant, login_required
from models import App
@@ -41,6 +47,30 @@ class InstructionTemplatePayload(BaseModel):
type: str = Field(..., description="Instruction template type")
class PreviousWorkflow(BaseModel):
"""Previous workflow attempt for regeneration context."""
nodes: list[dict[str, Any]] = Field(default_factory=list, description="Previously generated nodes")
edges: list[dict[str, Any]] = Field(default_factory=list, description="Previously generated edges")
warnings: list[str] = Field(default_factory=list, description="Warnings from previous generation")
class FlowchartGeneratePayload(BaseModel):
instruction: str = Field(..., description="Workflow flowchart generation instruction")
model_config_data: dict[str, Any] = Field(..., alias="model_config", description="Model configuration")
available_nodes: list[dict[str, Any]] = Field(default_factory=list, description="Available node types")
existing_nodes: list[dict[str, Any]] = Field(default_factory=list, description="Existing workflow nodes")
existing_edges: list[dict[str, Any]] = Field(default_factory=list, description="Existing workflow edges")
available_tools: list[dict[str, Any]] = Field(default_factory=list, description="Available tools")
selected_node_ids: list[str] = Field(default_factory=list, description="IDs of selected nodes for context")
previous_workflow: PreviousWorkflow | None = Field(default=None, description="Previous workflow for regeneration")
regenerate_mode: bool = Field(default=False, description="Whether this is a regeneration request")
# Language preference for generated content (node titles, descriptions)
language: str | None = Field(default=None, description="Preferred language for generated content")
# Available models that user has configured (for LLM/question-classifier nodes)
available_models: list[dict[str, Any]] = Field(default_factory=list, description="User's configured models")
def reg(cls: type[BaseModel]):
console_ns.schema_model(cls.__name__, cls.model_json_schema(ref_template=DEFAULT_REF_TEMPLATE_SWAGGER_2_0))
@@ -50,6 +80,7 @@ reg(RuleCodeGeneratePayload)
reg(RuleStructuredOutputPayload)
reg(InstructionGeneratePayload)
reg(InstructionTemplatePayload)
reg(FlowchartGeneratePayload)
reg(ModelConfig)
@@ -240,6 +271,52 @@ class InstructionGenerateApi(Resource):
raise CompletionRequestError(e.description)
@console_ns.route("/flowchart-generate")
class FlowchartGenerateApi(Resource):
@console_ns.doc("generate_workflow_flowchart")
@console_ns.doc(description="Generate workflow flowchart using LLM with intent classification")
@console_ns.expect(console_ns.models[FlowchartGeneratePayload.__name__])
@console_ns.response(200, "Flowchart generated successfully")
@console_ns.response(400, "Invalid request parameters")
@console_ns.response(402, "Provider quota exceeded")
@setup_required
@login_required
@account_initialization_required
def post(self):
args = FlowchartGeneratePayload.model_validate(console_ns.payload)
_, current_tenant_id = current_account_with_tenant()
try:
# Convert PreviousWorkflow to dict if present
previous_workflow_dict = args.previous_workflow.model_dump() if args.previous_workflow else None
result = WorkflowGenerator.generate_workflow_flowchart(
tenant_id=current_tenant_id,
instruction=args.instruction,
model_config=args.model_config_data,
available_nodes=args.available_nodes,
existing_nodes=args.existing_nodes,
existing_edges=args.existing_edges,
available_tools=args.available_tools,
selected_node_ids=args.selected_node_ids,
previous_workflow=previous_workflow_dict,
regenerate_mode=args.regenerate_mode,
preferred_language=args.language,
available_models=args.available_models,
)
except ProviderTokenNotInitError as ex:
raise ProviderNotInitializeError(ex.description)
except QuotaExceededError:
raise ProviderQuotaExceededError()
except ModelCurrentlyNotSupportError:
raise ProviderModelCurrentlyNotSupportError()
except InvokeError as e:
raise CompletionRequestError(e.description)
return result
@console_ns.route("/instruction-generate/template")
class InstructionGenerationTemplateApi(Resource):
@console_ns.doc("get_instruction_template")

View File

@@ -47,6 +47,7 @@ class DifyNodeFactory(NodeFactory):
code_providers: Sequence[type[CodeNodeProvider]] | None = None,
code_limits: CodeNodeLimits | None = None,
template_renderer: Jinja2TemplateRenderer | None = None,
template_transform_max_output_length: int | None = None,
http_request_http_client: HttpClientProtocol | None = None,
http_request_tool_file_manager_factory: Callable[[], ToolFileManager] = ToolFileManager,
http_request_file_manager: FileManagerProtocol | None = None,
@@ -68,6 +69,9 @@ class DifyNodeFactory(NodeFactory):
max_object_array_length=dify_config.CODE_MAX_OBJECT_ARRAY_LENGTH,
)
self._template_renderer = template_renderer or CodeExecutorJinja2TemplateRenderer()
self._template_transform_max_output_length = (
template_transform_max_output_length or dify_config.TEMPLATE_TRANSFORM_MAX_LENGTH
)
self._http_request_http_client = http_request_http_client or ssrf_proxy
self._http_request_tool_file_manager_factory = http_request_tool_file_manager_factory
self._http_request_file_manager = http_request_file_manager or file_manager
@@ -122,6 +126,7 @@ class DifyNodeFactory(NodeFactory):
graph_init_params=self.graph_init_params,
graph_runtime_state=self.graph_runtime_state,
template_renderer=self._template_renderer,
max_output_length=self._template_transform_max_output_length,
)
if node_type == NodeType.HTTP_REQUEST:

View File

@@ -6,7 +6,8 @@ from yarl import URL
from configs import dify_config
from core.helper.download import download_with_size_limit
from core.plugin.entities.marketplace import MarketplacePluginDeclaration
from core.plugin.entities.marketplace import MarketplacePluginDeclaration, MarketplacePluginSnapshot
from extensions.ext_redis import redis_client
marketplace_api_url = URL(str(dify_config.MARKETPLACE_API_URL))
logger = logging.getLogger(__name__)
@@ -43,28 +44,37 @@ def batch_fetch_plugin_by_ids(plugin_ids: list[str]) -> list[dict]:
return data.get("data", {}).get("plugins", [])
def batch_fetch_plugin_manifests_ignore_deserialization_error(
plugin_ids: list[str],
) -> Sequence[MarketplacePluginDeclaration]:
if len(plugin_ids) == 0:
return []
url = str(marketplace_api_url / "api/v1/plugins/batch")
response = httpx.post(url, json={"plugin_ids": plugin_ids}, headers={"X-Dify-Version": dify_config.project.version})
response.raise_for_status()
result: list[MarketplacePluginDeclaration] = []
for plugin in response.json()["data"]["plugins"]:
try:
result.append(MarketplacePluginDeclaration.model_validate(plugin))
except Exception:
logger.exception(
"Failed to deserialize marketplace plugin manifest for %s", plugin.get("plugin_id", "unknown")
)
return result
def record_install_plugin_event(plugin_unique_identifier: str):
url = str(marketplace_api_url / "api/v1/stats/plugins/install_count")
response = httpx.post(url, json={"unique_identifier": plugin_unique_identifier})
response.raise_for_status()
def fetch_global_plugin_manifest(cache_key_prefix: str, cache_ttl: int) -> None:
"""
Fetch all plugin manifests from marketplace and cache them in Redis.
This should be called once per check cycle to populate the instance-level cache.
Args:
cache_key_prefix: Redis key prefix for caching plugin manifests
cache_ttl: Cache TTL in seconds
Raises:
httpx.HTTPError: If the HTTP request fails
Exception: If any other error occurs during fetching or caching
"""
url = str(marketplace_api_url / "api/v1/dist/plugins/manifest.json")
response = httpx.get(url, headers={"X-Dify-Version": dify_config.project.version}, timeout=30)
response.raise_for_status()
raw_json = response.json()
plugins_data = raw_json.get("plugins", [])
# Parse and cache all plugin snapshots
for plugin_data in plugins_data:
plugin_snapshot = MarketplacePluginSnapshot.model_validate(plugin_data)
redis_client.setex(
name=f"{cache_key_prefix}{plugin_snapshot.plugin_id}",
time=cache_ttl,
value=plugin_snapshot.model_dump_json(),
)

View File

@@ -32,6 +32,7 @@ from core.ops.ops_trace_manager import TraceQueueManager, TraceTask
from core.ops.utils import measure_time
from core.prompt.utils.prompt_template_parser import PromptTemplateParser
from core.workflow.entities.workflow_node_execution import WorkflowNodeExecutionMetadataKey
from core.workflow.generator import WorkflowGenerator
from extensions.ext_database import db
from extensions.ext_storage import storage
from models import App, Message, WorkflowNodeExecutionModel
@@ -285,6 +286,35 @@ class LLMGenerator:
return rule_config
@classmethod
def generate_workflow_flowchart(
cls,
tenant_id: str,
instruction: str,
model_config: dict,
available_nodes: Sequence[dict[str, object]] | None = None,
existing_nodes: Sequence[dict[str, object]] | None = None,
available_tools: Sequence[dict[str, object]] | None = None,
selected_node_ids: Sequence[str] | None = None,
previous_workflow: dict[str, object] | None = None,
regenerate_mode: bool = False,
preferred_language: str | None = None,
available_models: Sequence[dict[str, object]] | None = None,
):
return WorkflowGenerator.generate_workflow_flowchart(
tenant_id=tenant_id,
instruction=instruction,
model_config=model_config,
available_nodes=available_nodes,
existing_nodes=existing_nodes,
available_tools=available_tools,
selected_node_ids=selected_node_ids,
previous_workflow=previous_workflow,
regenerate_mode=regenerate_mode,
preferred_language=preferred_language,
available_models=available_models,
)
@classmethod
def generate_code(
cls,

View File

@@ -143,6 +143,50 @@ Based on task description, please create a well-structured prompt template that
Please generate the full prompt template with at least 300 words and output only the prompt template.
""" # noqa: E501
WORKFLOW_FLOWCHART_PROMPT_TEMPLATE = """
You are an expert workflow designer. Generate a Mermaid flowchart based on the user's request.
Constraints:
- Detect the language of the user's request. Generate all node titles in the same language as the user's input.
- If the input language cannot be determined, use {{PREFERRED_LANGUAGE}} as the fallback language.
- Use only node types listed in <available_nodes>.
- Use only tools listed in <available_tools>. When using a tool node, set type=tool and tool=<tool_key>.
- Tools may include MCP providers (provider_type=mcp). Tool selection still uses tool_key.
- Prefer reusing node titles from <existing_nodes> when possible.
- Output must be valid Mermaid flowchart syntax, no markdown, no extra text.
- First line must be: flowchart LR
- Every node must be declared on its own line using:
<id>["type=<type>|title=<title>|tool=<tool_key>"]
- type is required and must match a type in <available_nodes>.
- title is required for non-tool nodes.
- tool is required only when type=tool, otherwise omit tool.
- Declare all node lines before any edges.
- Edges must use:
<id> --> <id>
<id> -->|true| <id>
<id> -->|false| <id>
- Keep node ids unique and simple (N1, N2, ...).
- For complex orchestration:
- Break the request into stages (ingest, transform, decision, action, output).
- Use IfElse for branching and label edges true/false only.
- Fan-in branches by connecting multiple nodes into a shared downstream node.
- Avoid cycles unless explicitly requested.
- Keep each branch complete with a clear downstream target.
<user_request>
{{TASK_DESCRIPTION}}
</user_request>
<available_nodes>
{{AVAILABLE_NODES}}
</available_nodes>
<existing_nodes>
{{EXISTING_NODES}}
</existing_nodes>
<available_tools>
{{AVAILABLE_TOOLS}}
</available_tools>
"""
RULE_CONFIG_PROMPT_GENERATE_TEMPLATE = """
Here is a task description for which I would like you to create a high-quality prompt template for:
<task_description>

View File

@@ -1,4 +1,4 @@
from pydantic import BaseModel, Field, model_validator
from pydantic import BaseModel, Field, computed_field, model_validator
from core.model_runtime.entities.provider_entities import ProviderEntity
from core.plugin.entities.endpoint import EndpointProviderDeclaration
@@ -48,3 +48,15 @@ class MarketplacePluginDeclaration(BaseModel):
if "tool" in data and not data["tool"]:
del data["tool"]
return data
class MarketplacePluginSnapshot(BaseModel):
org: str
name: str
latest_version: str
latest_package_identifier: str
latest_package_url: str
@computed_field
def plugin_id(self) -> str:
return f"{self.org}/{self.name}"

View File

@@ -112,7 +112,7 @@ class ArrayBooleanVariable(ArrayBooleanSegment, ArrayVariable):
class RAGPipelineVariable(BaseModel):
belong_to_node_id: str = Field(description="belong to which node id, shared means public")
type: str = Field(description="variable type, text-input, paragraph, select, number, file, file-list")
type: str = Field(description="variable type, text-input, paragraph, select, number, file, file-list")
label: str = Field(description="label")
description: str | None = Field(description="description", default="")
variable: str = Field(description="variable key", default="")

View File

@@ -0,0 +1 @@
from .runner import WorkflowGenerator

View File

@@ -0,0 +1,29 @@
"""
Vibe Workflow Generator Configuration Module.
This module centralizes configuration for the Vibe workflow generation feature,
including node schemas, fallback rules, and response templates.
"""
from core.workflow.generator.config.node_schemas import (
BUILTIN_NODE_SCHEMAS,
FALLBACK_RULES,
FIELD_NAME_CORRECTIONS,
NODE_TYPE_ALIASES,
get_builtin_node_schemas,
get_corrected_field_name,
validate_node_schemas,
)
from core.workflow.generator.config.responses import DEFAULT_SUGGESTIONS, OFF_TOPIC_RESPONSES
__all__ = [
"BUILTIN_NODE_SCHEMAS",
"DEFAULT_SUGGESTIONS",
"FALLBACK_RULES",
"FIELD_NAME_CORRECTIONS",
"NODE_TYPE_ALIASES",
"OFF_TOPIC_RESPONSES",
"get_builtin_node_schemas",
"get_corrected_field_name",
"validate_node_schemas",
]

View File

@@ -0,0 +1,501 @@
"""
Unified Node Configuration for Vibe Workflow Generation.
This module centralizes all node-related configuration:
- Node schemas (parameter definitions)
- Fallback rules (keyword-based node type inference)
- Node type aliases (natural language to canonical type mapping)
- Field name corrections (LLM output normalization)
- Validation utilities
Note: These definitions are the single source of truth.
Frontend has a mirrored copy at web/app/components/workflow/hooks/use-workflow-vibe-config.ts
"""
from typing import Any
# =============================================================================
# NODE SCHEMAS
# =============================================================================
# Built-in node schemas with parameter definitions
# These help the model understand what config each node type requires
_HARDCODED_SCHEMAS: dict[str, dict[str, Any]] = {
"http-request": {
"description": "Send HTTP requests to external APIs or fetch web content",
"required": ["url", "method"],
"parameters": {
"url": {
"type": "string",
"description": "Full URL including protocol (https://...)",
"example": "{{#start.url#}} or https://api.example.com/data",
},
"method": {
"type": "enum",
"options": ["GET", "POST", "PUT", "DELETE", "PATCH", "HEAD"],
"description": "HTTP method",
},
"headers": {
"type": "string",
"description": "HTTP headers as newline-separated 'Key: Value' pairs",
"example": "Content-Type: application/json\nAuthorization: Bearer {{#start.api_key#}}",
},
"params": {
"type": "string",
"description": "URL query parameters as newline-separated 'key: value' pairs",
},
"body": {
"type": "object",
"description": "Request body with type field required",
"example": {"type": "none", "data": []},
},
"authorization": {
"type": "object",
"description": "Authorization config",
"example": {"type": "no-auth"},
},
"timeout": {
"type": "number",
"description": "Request timeout in seconds",
"default": 60,
},
},
"outputs": ["body (response content)", "status_code", "headers"],
},
"code": {
"description": "Execute Python or JavaScript code for custom logic",
"required": ["code", "language"],
"parameters": {
"code": {
"type": "string",
"description": "Code to execute. Must define a main() function that returns a dict.",
},
"language": {
"type": "enum",
"options": ["python3", "javascript"],
},
"variables": {
"type": "array",
"description": "Input variables passed to the code",
"item_schema": {"variable": "string", "value_selector": "array"},
},
"outputs": {
"type": "object",
"description": "Output variable definitions",
},
},
"outputs": ["Variables defined in outputs schema"],
},
"llm": {
"description": "Call a large language model for text generation/processing",
"required": ["prompt_template"],
"parameters": {
"model": {
"type": "object",
"description": "Model configuration (provider, name, mode)",
},
"prompt_template": {
"type": "array",
"description": "Messages for the LLM",
"item_schema": {
"role": "enum: system, user, assistant",
"text": "string - message content, can include {{#node_id.field#}} references",
},
},
"context": {
"type": "object",
"description": "Optional context settings",
},
"memory": {
"type": "object",
"description": "Optional memory/conversation settings",
},
},
"outputs": ["text (generated response)"],
},
"if-else": {
"description": "Conditional branching based on conditions",
"required": ["cases"],
"parameters": {
"cases": {
"type": "array",
"description": "List of condition cases. Each case defines when 'true' branch is taken.",
"item_schema": {
"case_id": "string - unique case identifier (e.g., 'case_1')",
"logical_operator": "enum: and, or - how multiple conditions combine",
"conditions": {
"type": "array",
"item_schema": {
"variable_selector": "array of strings - path to variable, e.g. ['node_id', 'field']",
"comparison_operator": (
"enum: =, ≠, >, <, ≥, ≤, contains, not contains, is, is not, empty, not empty"
),
"value": "string or number - value to compare against",
},
},
},
},
},
"outputs": ["Branches: true (first case conditions met), false (else/no case matched)"],
},
"knowledge-retrieval": {
"description": "Query knowledge base for relevant content",
"required": ["query_variable_selector", "dataset_ids"],
"parameters": {
"query_variable_selector": {
"type": "array",
"description": "Path to query variable, e.g. ['start', 'query']",
},
"dataset_ids": {
"type": "array",
"description": "List of knowledge base IDs to search",
},
"retrieval_mode": {
"type": "enum",
"options": ["single", "multiple"],
},
},
"outputs": ["result (retrieved documents)"],
},
"template-transform": {
"description": "Transform data using Jinja2 templates",
"required": ["template", "variables"],
"parameters": {
"template": {
"type": "string",
"description": "Jinja2 template string. Use {{ variable_name }} to reference variables.",
},
"variables": {
"type": "array",
"description": "Input variables defined for the template",
"item_schema": {
"variable": "string - variable name to use in template",
"value_selector": "array - path to source value, e.g. ['start', 'user_input']",
},
},
},
"outputs": ["output (transformed string)"],
},
"variable-aggregator": {
"description": "Aggregate variables from multiple branches",
"required": ["variables"],
"parameters": {
"variables": {
"type": "array",
"description": "List of variable selectors to aggregate",
"item_schema": "array of strings - path to source variable, e.g. ['node_id', 'field']",
},
},
"outputs": ["output (aggregated value)"],
},
"iteration": {
"description": "Loop over array items",
"required": ["iterator_selector"],
"parameters": {
"iterator_selector": {
"type": "array",
"description": "Path to array variable to iterate",
},
},
"outputs": ["item (current iteration item)", "index (current index)"],
},
"parameter-extractor": {
"description": "Extract structured parameters from user input using LLM",
"required": ["query", "parameters"],
"parameters": {
"model": {
"type": "object",
"description": "Model configuration (provider, name, mode)",
},
"query": {
"type": "array",
"description": "Path to input text to extract parameters from, e.g. ['start', 'user_input']",
},
"parameters": {
"type": "array",
"description": "Parameters to extract from the input",
"item_schema": {
"name": "string - parameter name (required)",
"type": (
"enum: string, number, boolean, array[string], array[number], array[object], array[boolean]"
),
"description": "string - description of what to extract (required)",
"required": "boolean - whether this parameter is required (MUST be specified)",
"options": "array of strings (optional) - for enum-like selection",
},
},
"instruction": {
"type": "string",
"description": "Additional instructions for extraction",
},
"reasoning_mode": {
"type": "enum",
"options": ["function_call", "prompt"],
"description": "How to perform extraction (defaults to function_call)",
},
},
"outputs": ["Extracted parameters as defined in parameters array", "__is_success", "__reason"],
},
"question-classifier": {
"description": "Classify user input into predefined categories using LLM",
"required": ["query", "classes"],
"parameters": {
"model": {
"type": "object",
"description": "Model configuration (provider, name, mode)",
},
"query": {
"type": "array",
"description": "Path to input text to classify, e.g. ['start', 'user_input']",
},
"classes": {
"type": "array",
"description": "Classification categories",
"item_schema": {
"id": "string - unique class identifier",
"name": "string - class name/label",
},
},
"instruction": {
"type": "string",
"description": "Additional instructions for classification",
},
},
"outputs": ["class_name (selected class)"],
},
}
def _get_dynamic_schemas() -> dict[str, dict[str, Any]]:
"""
Dynamically load schemas from node classes.
Uses lazy import to avoid circular dependency.
"""
from core.workflow.nodes.node_mapping import LATEST_VERSION, NODE_TYPE_CLASSES_MAPPING
schemas = {}
for node_type, version_map in NODE_TYPE_CLASSES_MAPPING.items():
# Get the latest version class
node_cls = version_map.get(LATEST_VERSION)
if not node_cls:
continue
# Get schema from the class
schema = node_cls.get_default_config_schema()
if schema:
schemas[node_type.value] = schema
return schemas
# Cache for built-in schemas (populated on first access)
_builtin_schemas_cache: dict[str, dict[str, Any]] | None = None
def get_builtin_node_schemas() -> dict[str, dict[str, Any]]:
"""
Get the complete set of built-in node schemas.
Combines hardcoded schemas with dynamically loaded ones.
Results are cached after first call.
"""
global _builtin_schemas_cache
if _builtin_schemas_cache is None:
_builtin_schemas_cache = {**_HARDCODED_SCHEMAS, **_get_dynamic_schemas()}
return _builtin_schemas_cache
# For backward compatibility - but use get_builtin_node_schemas() for lazy loading
BUILTIN_NODE_SCHEMAS: dict[str, dict[str, Any]] = _HARDCODED_SCHEMAS.copy()
# =============================================================================
# FALLBACK RULES
# =============================================================================
# Keyword rules for smart fallback detection
# Maps node type to keywords that suggest using that node type as a fallback
FALLBACK_RULES: dict[str, list[str]] = {
"http-request": [
"http",
"url",
"web",
"scrape",
"scraper",
"fetch",
"api",
"request",
"download",
"upload",
"webhook",
"endpoint",
"rest",
"get",
"post",
],
"code": [
"code",
"script",
"calculate",
"compute",
"process",
"transform",
"parse",
"convert",
"format",
"filter",
"sort",
"math",
"logic",
],
"llm": [
"analyze",
"summarize",
"summary",
"extract",
"classify",
"translate",
"generate",
"write",
"rewrite",
"explain",
"answer",
"chat",
],
}
# =============================================================================
# NODE TYPE ALIASES
# =============================================================================
# Node type aliases for inference from natural language
# Maps common terms to canonical node type names
NODE_TYPE_ALIASES: dict[str, str] = {
# Start node aliases
"start": "start",
"begin": "start",
"input": "start",
# End node aliases
"end": "end",
"finish": "end",
"output": "end",
# LLM node aliases
"llm": "llm",
"ai": "llm",
"gpt": "llm",
"model": "llm",
"chat": "llm",
# Code node aliases
"code": "code",
"script": "code",
"python": "code",
"javascript": "code",
# HTTP request node aliases
"http-request": "http-request",
"http": "http-request",
"request": "http-request",
"api": "http-request",
"fetch": "http-request",
"webhook": "http-request",
# Conditional node aliases
"if-else": "if-else",
"condition": "if-else",
"branch": "if-else",
"switch": "if-else",
# Loop node aliases
"iteration": "iteration",
"loop": "loop",
"foreach": "iteration",
# Tool node alias
"tool": "tool",
}
# =============================================================================
# FIELD NAME CORRECTIONS
# =============================================================================
# Field name corrections for LLM-generated node configs
# Maps incorrect field names to correct ones for specific node types
FIELD_NAME_CORRECTIONS: dict[str, dict[str, str]] = {
"http-request": {
"text": "body", # LLM might use "text" instead of "body"
"content": "body",
"response": "body",
},
"code": {
"text": "result", # LLM might use "text" instead of "result"
"output": "result",
},
"llm": {
"response": "text",
"answer": "text",
},
}
def get_corrected_field_name(node_type: str, field: str) -> str:
"""
Get the corrected field name for a node type.
Args:
node_type: The type of the node (e.g., "http-request", "code")
field: The field name to correct
Returns:
The corrected field name, or the original if no correction needed
"""
corrections = FIELD_NAME_CORRECTIONS.get(node_type, {})
return corrections.get(field, field)
# =============================================================================
# VALIDATION UTILITIES
# =============================================================================
# Node types that are internal and don't need schemas for LLM generation
_INTERNAL_NODE_TYPES: set[str] = {
# Internal workflow nodes
"answer", # Internal to chatflow
"loop", # Uses iteration internally
"assigner", # Variable assignment utility
"variable-assigner", # Variable assignment utility
"agent", # Agent node (complex, handled separately)
"document-extractor", # Internal document processing
"list-operator", # Internal list operations
# Iteration internal nodes
"iteration-start", # Internal to iteration loop
"loop-start", # Internal to loop
"loop-end", # Internal to loop
# Trigger nodes (not user-creatable via LLM)
"trigger-plugin", # Plugin trigger
"trigger-schedule", # Scheduled trigger
"trigger-webhook", # Webhook trigger
# Other internal nodes
"datasource", # Data source configuration
"human-input", # Human-in-the-loop node
"knowledge-index", # Knowledge indexing node
}
def validate_node_schemas() -> list[str]:
"""
Validate that all registered node types have corresponding schemas.
This function checks if BUILTIN_NODE_SCHEMAS covers all node types
registered in NODE_TYPE_CLASSES_MAPPING, excluding internal node types.
Returns:
List of warning messages for missing schemas (empty if all valid)
"""
from core.workflow.nodes.node_mapping import NODE_TYPE_CLASSES_MAPPING
schemas = get_builtin_node_schemas()
warnings = []
for node_type in NODE_TYPE_CLASSES_MAPPING:
type_value = node_type.value
if type_value in _INTERNAL_NODE_TYPES:
continue
if type_value not in schemas:
warnings.append(f"Missing schema for node type: {type_value}")
return warnings

View File

@@ -0,0 +1,72 @@
"""
Response Templates for Vibe Workflow Generation.
This module defines templates for off-topic responses and default suggestions
to guide users back to workflow-related requests.
"""
# Off-topic response templates for different categories
# Each category has messages in multiple languages
OFF_TOPIC_RESPONSES: dict[str, dict[str, str]] = {
"weather": {
"en": (
"I'm the workflow design assistant - I can't check the weather, "
"but I can help you build AI workflows! For example, I could help you "
"create a workflow that fetches weather data from an API."
),
"zh": "我是工作流设计助手无法查询天气。但我可以帮你创建一个从API获取天气数据的工作流",
},
"math": {
"en": (
"I focus on workflow design rather than calculations. However, "
"if you need calculations in a workflow, I can help you add a Code node "
"that handles math operations!"
),
"zh": "我专注于工作流设计而非计算。但如果您需要在工作流中进行计算,我可以帮您添加一个处理数学运算的代码节点!",
},
"joke": {
"en": (
"While I'd love to share a laugh, I'm specialized in workflow design. "
"How about we create something fun instead - like a workflow that generates jokes using AI?"
),
"zh": "虽然我很想讲笑话但我专门从事工作流设计。不如我们创建一个有趣的东西——比如使用AI生成笑话的工作流",
},
"translation": {
"en": (
"I can't translate directly, but I can help you build a translation workflow! "
"Would you like to create one using an LLM node?"
),
"zh": "我不能直接翻译但我可以帮你构建一个翻译工作流要创建一个使用LLM节点的翻译流程吗",
},
"general_coding": {
"en": (
"I'm specialized in Dify workflow design rather than general coding help. "
"But if you want to add code logic to your workflow, I can help you configure a Code node!"
),
"zh": (
"我专注于Dify工作流设计而非通用编程帮助。但如果您想在工作流中添加代码逻辑我可以帮您配置一个代码节点"
),
},
"default": {
"en": (
"I'm the Dify workflow design assistant. I help create AI automation workflows, "
"but I can't help with general questions. Would you like to create a workflow instead?"
),
"zh": "我是Dify工作流设计助手。我帮助创建AI自动化工作流但无法回答一般性问题。您想创建一个工作流吗",
},
}
# Default suggestions for off-topic requests
# These help guide users towards valid workflow requests
DEFAULT_SUGGESTIONS: dict[str, list[str]] = {
"en": [
"Create a chatbot workflow",
"Build a document summarization pipeline",
"Add email notification to workflow",
],
"zh": [
"创建一个聊天机器人工作流",
"构建文档摘要处理流程",
"添加邮件通知到工作流",
],
}

View File

@@ -0,0 +1,733 @@
# =============================================================================
# NEW FORMAT: depends_on based prompt (for use with GraphBuilder)
# =============================================================================
BUILDER_SYSTEM_PROMPT_V2 = """<role>
You are a Workflow Configuration Engineer.
Your goal is to generate workflow node configurations with dependency declarations.
The graph structure (edges, start/end nodes) will be automatically built from your output.
</role>
<language_rules>
- Detect the language of the user's request automatically (e.g., English, Chinese, Japanese, etc.).
- Generate ALL node titles, descriptions, and user-facing text in the SAME language as the user's input.
- If the input language is ambiguous or cannot be determined (e.g. code-only input),
use {preferred_language} as the target language.
</language_rules>
<inputs>
<plan>
{plan_context}
</plan>
<tool_schemas>
{tool_schemas}
</tool_schemas>
<node_specs>
{builtin_node_specs}
</node_specs>
<available_models>
{available_models}
</available_models>
<workflow_context>
<existing_nodes>
{existing_nodes_context}
</existing_nodes>
<selected_nodes>
{selected_nodes_context}
</selected_nodes>
</workflow_context>
</inputs>
<critical_rules>
1. **DO NOT generate start or end nodes** - they are automatically added
2. **DO NOT generate edges** - they are automatically built from depends_on
3. **Use depends_on array** to declare which nodes must run before this one
4. **Leave depends_on empty []** for nodes that should start immediately (connect to start)
</critical_rules>
<rules>
1. **Configuration**:
- You MUST fill ALL required parameters for every node.
- Use `{{{{#node_id.field#}}}}` syntax to reference outputs from previous nodes in text fields.
2. **Dependency Declaration**:
- Each node has a `depends_on` array listing node IDs that must complete before it runs
- Empty depends_on `[]` means the node runs immediately after start
- Example: `"depends_on": ["fetch_data"]` means this node waits for fetch_data to complete
3. **Variable References**:
- For text fields (like prompts, queries): use string format `{{{{#node_id.field#}}}}`
- Dependencies will be auto-inferred from variable references if not explicitly declared
4. **Tools**:
- ONLY use the tools listed in `<tool_schemas>`.
- If a planned tool is missing from schemas, fallback to `http-request` or `code`.
5. **Model Selection** (CRITICAL):
- For LLM, question-classifier, and parameter-extractor nodes, you MUST include a "model" config.
- You MUST use ONLY models from the `<available_models>` section above.
- Copy the EXACT provider and name values from available_models.
- NEVER use openai/gpt-4o, gpt-3.5-turbo, gpt-4, or any other models unless they appear in available_models.
- If available_models is empty or shows "No models configured", omit the model config entirely.
6. **if-else Branching**:
- Add `true_branch` and `false_branch` in config to specify target node IDs
- Example: `"config": {{"cases": [...], "true_branch": "success_node", "false_branch": "fallback_node"}}`
7. **question-classifier Branching**:
- Add `target` field to each class in the classes array
- Example: `"classes": [{{"id": "tech", "name": "Tech", "target": "tech_handler"}}, ...]`
8. **Node Specifics**:
- For `if-else` comparison_operator, use literal symbols: `≥`, `≤`, `=`, `≠` (NOT `>=` or `==`).
</rules>
<output_format>
Return ONLY a JSON object with a `nodes` array. Each node has:
- id: unique identifier
- type: node type
- title: display name
- config: node configuration
- depends_on: array of node IDs this depends on
```json
{{{{
"nodes": [
{{{{
"id": "fetch_data",
"type": "http-request",
"title": "Fetch Data",
"config": {{"url": "{{{{#start.url#}}}}", "method": "GET"}},
"depends_on": []
}}}},
{{{{
"id": "analyze",
"type": "llm",
"title": "Analyze",
"config": {{"prompt_template": [{{"role": "user", "text": "Analyze: {{{{#fetch_data.body#}}}}"}}]}},
"depends_on": ["fetch_data"]
}}}}
]
}}}}
```
</output_format>
<examples>
<example name="simple_linear">
```json
{{{{
"nodes": [
{{{{
"id": "llm",
"type": "llm",
"title": "Generate Response",
"config": {{{{
"model": {{"provider": "openai", "name": "gpt-4o", "mode": "chat"}},
"prompt_template": [{{"role": "user", "text": "Answer: {{{{#start.query#}}}}"}}]
}}}},
"depends_on": []
}}}}
]
}}}}
```
</example>
<example name="parallel_then_merge">
```json
{{{{
"nodes": [
{{{{
"id": "api1",
"type": "http-request",
"title": "Fetch API 1",
"config": {{"url": "https://api1.example.com", "method": "GET"}},
"depends_on": []
}}}},
{{{{
"id": "api2",
"type": "http-request",
"title": "Fetch API 2",
"config": {{"url": "https://api2.example.com", "method": "GET"}},
"depends_on": []
}}}},
{{{{
"id": "merge",
"type": "llm",
"title": "Merge Results",
"config": {{{{
"prompt_template": [{{"role": "user", "text": "Combine: {{{{#api1.body#}}}} and {{{{#api2.body#}}}}"}}]
}}}},
"depends_on": ["api1", "api2"]
}}}}
]
}}}}
```
</example>
<example name="if_else_branching">
```json
{{{{
"nodes": [
{{{{
"id": "check",
"type": "if-else",
"title": "Check Condition",
"config": {{{{
"cases": [{{{{
"case_id": "case_1",
"logical_operator": "and",
"conditions": [{{{{
"variable_selector": ["start", "score"],
"comparison_operator": "",
"value": "60"
}}}}]
}}}}],
"true_branch": "pass_handler",
"false_branch": "fail_handler"
}}}},
"depends_on": []
}}}},
{{{{
"id": "pass_handler",
"type": "llm",
"title": "Pass Response",
"config": {{"prompt_template": [{{"role": "user", "text": "Congratulations!"}}]}},
"depends_on": []
}}}},
{{{{
"id": "fail_handler",
"type": "llm",
"title": "Fail Response",
"config": {{"prompt_template": [{{"role": "user", "text": "Try again."}}]}},
"depends_on": []
}}}}
]
}}}}
```
Note: pass_handler and fail_handler have empty depends_on because their connections come from if-else branches.
</example>
<example name="question_classifier">
```json
{{{{
"nodes": [
{{{{
"id": "classifier",
"type": "question-classifier",
"title": "Classify Intent",
"config": {{{{
"model": {{"provider": "openai", "name": "gpt-4o", "mode": "chat"}},
"query_variable_selector": ["start", "user_input"],
"classes": [
{{"id": "tech", "name": "Technical", "target": "tech_handler"}},
{{"id": "billing", "name": "Billing", "target": "billing_handler"}},
{{"id": "other", "name": "Other", "target": "other_handler"}}
]
}}}},
"depends_on": []
}}}},
{{{{
"id": "tech_handler",
"type": "llm",
"title": "Tech Support",
"config": {{"prompt_template": [{{"role": "user", "text": "Help with tech: {{{{#start.user_input#}}}}"}}]}},
"depends_on": []
}}}},
{{{{
"id": "billing_handler",
"type": "llm",
"title": "Billing Support",
"config": {{"prompt_template": [{{"role": "user", "text": "Help with billing: {{{{#start.user_input#}}}}"}}]}},
"depends_on": []
}}}},
{{{{
"id": "other_handler",
"type": "llm",
"title": "General Support",
"config": {{"prompt_template": [{{"role": "user", "text": "General help: {{{{#start.user_input#}}}}"}}]}},
"depends_on": []
}}}}
]
}}}}
```
Note: Handler nodes have empty depends_on because their connections come from classifier branches.
</example>
</examples>
"""
BUILDER_USER_PROMPT_V2 = """<instruction>
{instruction}
</instruction>
Generate the workflow nodes configuration. Remember:
1. Do NOT generate start or end nodes
2. Do NOT generate edges - use depends_on instead
3. For if-else: add true_branch/false_branch in config
4. For question-classifier: add target to each class
"""
# =============================================================================
# LEGACY FORMAT: edges-based prompt (backward compatible)
# =============================================================================
BUILDER_SYSTEM_PROMPT = """<role>
You are a Workflow Configuration Engineer.
Your goal is to implement the Architect's plan by generating a precise, runnable Dify Workflow JSON configuration.
</role>
<language_rules>
- Detect the language of the user's request automatically (e.g., English, Chinese, Japanese, etc.).
- Generate ALL node titles, descriptions, and user-facing text in the SAME language as the user's input.
- If the input language is ambiguous or cannot be determined (e.g. code-only input),
use {preferred_language} as the target language.
</language_rules>
<inputs>
<plan>
{plan_context}
</plan>
<tool_schemas>
{tool_schemas}
</tool_schemas>
<node_specs>
{builtin_node_specs}
</node_specs>
<available_models>
{available_models}
</available_models>
<workflow_context>
<existing_nodes>
{existing_nodes_context}
</existing_nodes>
<existing_edges>
{existing_edges_context}
</existing_edges>
<selected_nodes>
{selected_nodes_context}
</selected_nodes>
</workflow_context>
</inputs>
<rules>
1. **Configuration**:
- You MUST fill ALL required parameters for every node.
- Use `{{{{#node_id.field#}}}}` syntax to reference outputs from previous nodes in text fields.
- For 'start' node, define all necessary user inputs.
2. **Variable References**:
- For text fields (like prompts, queries): use string format `{{{{#node_id.field#}}}}`
- For 'end' node outputs: use `value_selector` array format `["node_id", "field"]`
- Example: to reference 'llm' node's 'text' output in end node, use `["llm", "text"]`
3. **Tools**:
- ONLY use the tools listed in `<tool_schemas>`.
- If a planned tool is missing from schemas, fallback to `http-request` or `code`.
4. **Model Selection** (CRITICAL):
- For LLM, question-classifier, and parameter-extractor nodes, you MUST include a "model" config.
- You MUST use ONLY models from the `<available_models>` section above.
- Copy the EXACT provider and name values from available_models.
- NEVER use openai/gpt-4o, gpt-3.5-turbo, gpt-4, or any other models unless they appear in available_models.
- If available_models is empty or shows "No models configured", omit the model config entirely.
5. **Node Specifics**:
- For `if-else` comparison_operator, use literal symbols: `≥`, `≤`, `=`, `≠` (NOT `>=` or `==`).
6. **Modification Mode**:
- If `<existing_nodes>` contains nodes, you are MODIFYING an existing workflow.
- Keep nodes that are NOT mentioned in the user's instruction UNCHANGED.
- Only modify/add/remove nodes that the user explicitly requested.
- Preserve node IDs for unchanged nodes to maintain connections.
- If user says "add X", append new nodes to existing workflow.
- If user says "change Y to Z", only modify that specific node.
- If user says "remove X", exclude that node from output.
**Edge Modification**:
- Use `<existing_edges>` to understand current node connections.
- If user mentions "fix edge", "connect", "link", or "add connection",
review existing_edges and correct missing/wrong connections.
- For multi-branch nodes (if-else, question-classifier),
ensure EACH branch has proper sourceHandle (e.g., "true"/"false") and target.
- Common edge issues to fix:
* Missing edge: Two nodes should connect but don't - add the edge
* Wrong target: Edge points to wrong node - update the target
* Missing sourceHandle: if-else/classifier branches lack sourceHandle - add "true"/"false"
* Disconnected nodes: Node has no incoming or outgoing edges - connect it properly
- When modifying edges, ensure logical flow makes sense (start → middle → end).
- ALWAYS output complete edges array, even if only modifying one edge.
**Validation Feedback** (Automatic Retry):
- If `<validation_feedback>` is present, you are RETRYING after validation errors.
- Focus ONLY on fixing the specific validation issues mentioned.
- Keep everything else from the previous attempt UNCHANGED (preserve node IDs, edges, etc).
- Common validation issues and fixes:
* "Missing required connection" → Add the missing edge
* "Invalid node configuration" → Fix the specific node's config section
* "Type mismatch in variable reference" → Correct the variable selector path
* "Unknown variable" → Update variable reference to existing output
- When fixing, make MINIMAL changes to address each specific error.
7. **Output**:
- Return ONLY the JSON object with `nodes` and `edges`.
- Do NOT generate Mermaid diagrams.
- Do NOT generate explanations.
</rules>
<edge_rules priority="critical">
**EDGES ARE CRITICAL** - Every node except 'end' MUST have at least one outgoing edge.
1. **Linear Flow**: Simple source -> target connection
```
{{"source": "node_a", "target": "node_b"}}
```
2. **question-classifier Branching**: Each class MUST have a separate edge with `sourceHandle` = class `id`
- If you define classes: [{{"id": "cls_refund", "name": "Refund"}}, {{"id": "cls_inquiry", "name": "Inquiry"}}]
- You MUST create edges:
- {{"source": "classifier", "sourceHandle": "cls_refund", "target": "refund_handler"}}
- {{"source": "classifier", "sourceHandle": "cls_inquiry", "target": "inquiry_handler"}}
3. **if-else Branching**: MUST have exactly TWO edges with sourceHandle "true" and "false"
- {{"source": "condition", "sourceHandle": "true", "target": "true_branch"}}
- {{"source": "condition", "sourceHandle": "false", "target": "false_branch"}}
4. **Branch Convergence**: Multiple branches can connect to same downstream node
- Both true_branch and false_branch can connect to the same 'end' node
5. **NEVER leave orphan nodes**: Every node must be connected in the graph
</edge_rules>
<examples>
<example name="simple_linear">
```json
{{
"nodes": [
{{
"id": "start",
"type": "start",
"title": "Start",
"config": {{
"variables": [{{"variable": "query", "label": "Query", "type": "text-input"}}]
}}
}},
{{
"id": "llm",
"type": "llm",
"title": "Generate Response",
"config": {{
"model": {{"provider": "openai", "name": "gpt-4o", "mode": "chat"}},
"prompt_template": [{{"role": "user", "text": "Answer: {{{{#start.query#}}}}"}}]
}}
}},
{{
"id": "end",
"type": "end",
"title": "End",
"config": {{
"outputs": [
{{"variable": "result", "value_selector": ["llm", "text"]}}
]
}}
}}
],
"edges": [
{{"source": "start", "target": "llm"}},
{{"source": "llm", "target": "end"}}
]
}}
```
</example>
<example name="question_classifier_branching" description="Customer service with intent classification">
```json
{{
"nodes": [
{{
"id": "start",
"type": "start",
"title": "Start",
"config": {{
"variables": [{{"variable": "user_input", "label": "User Message", "type": "text-input", "required": true}}]
}}
}},
{{
"id": "classifier",
"type": "question-classifier",
"title": "Classify Intent",
"config": {{
"model": {{"provider": "openai", "name": "gpt-4o", "mode": "chat"}},
"query_variable_selector": ["start", "user_input"],
"classes": [
{{"id": "cls_refund", "name": "Refund Request"}},
{{"id": "cls_inquiry", "name": "Product Inquiry"}},
{{"id": "cls_complaint", "name": "Complaint"}},
{{"id": "cls_other", "name": "Other"}}
],
"instruction": "Classify the user's intent"
}}
}},
{{
"id": "handle_refund",
"type": "llm",
"title": "Handle Refund",
"config": {{
"model": {{"provider": "openai", "name": "gpt-4o", "mode": "chat"}},
"prompt_template": [{{"role": "user", "text": "Extract order number and respond: {{{{#start.user_input#}}}}"}}]
}}
}},
{{
"id": "handle_inquiry",
"type": "llm",
"title": "Handle Inquiry",
"config": {{
"model": {{"provider": "openai", "name": "gpt-4o", "mode": "chat"}},
"prompt_template": [{{"role": "user", "text": "Answer product question: {{{{#start.user_input#}}}}"}}]
}}
}},
{{
"id": "handle_complaint",
"type": "llm",
"title": "Handle Complaint",
"config": {{
"model": {{"provider": "openai", "name": "gpt-4o", "mode": "chat"}},
"prompt_template": [{{"role": "user", "text": "Respond with empathy: {{{{#start.user_input#}}}}"}}]
}}
}},
{{
"id": "handle_other",
"type": "llm",
"title": "Handle Other",
"config": {{
"model": {{"provider": "openai", "name": "gpt-4o", "mode": "chat"}},
"prompt_template": [{{"role": "user", "text": "Provide general response: {{{{#start.user_input#}}}}"}}]
}}
}},
{{
"id": "end",
"type": "end",
"title": "End",
"config": {{
"outputs": [{{"variable": "response", "value_selector": ["handle_refund", "text"]}}]
}}
}}
],
"edges": [
{{"source": "start", "target": "classifier"}},
{{"source": "classifier", "sourceHandle": "cls_refund", "target": "handle_refund"}},
{{"source": "classifier", "sourceHandle": "cls_inquiry", "target": "handle_inquiry"}},
{{"source": "classifier", "sourceHandle": "cls_complaint", "target": "handle_complaint"}},
{{"source": "classifier", "sourceHandle": "cls_other", "target": "handle_other"}},
{{"source": "handle_refund", "target": "end"}},
{{"source": "handle_inquiry", "target": "end"}},
{{"source": "handle_complaint", "target": "end"}},
{{"source": "handle_other", "target": "end"}}
]
}}
```
CRITICAL: Notice that each class id (cls_refund, cls_inquiry, etc.) becomes a sourceHandle in the edges!
</example>
<example name="if_else_branching" description="Conditional logic with if-else">
```json
{{
"nodes": [
{{
"id": "start",
"type": "start",
"title": "Start",
"config": {{
"variables": [{{"variable": "years", "label": "Years of Experience", "type": "number", "required": true}}]
}}
}},
{{
"id": "check_experience",
"type": "if-else",
"title": "Check Experience",
"config": {{
"cases": [
{{
"case_id": "case_1",
"logical_operator": "and",
"conditions": [
{{
"variable_selector": ["start", "years"],
"comparison_operator": "",
"value": "3"
}}
]
}}
]
}}
}},
{{
"id": "qualified",
"type": "llm",
"title": "Qualified Response",
"config": {{
"model": {{"provider": "openai", "name": "gpt-4o", "mode": "chat"}},
"prompt_template": [{{"role": "user", "text": "Generate qualified candidate response"}}]
}}
}},
{{
"id": "not_qualified",
"type": "llm",
"title": "Not Qualified Response",
"config": {{
"model": {{"provider": "openai", "name": "gpt-4o", "mode": "chat"}},
"prompt_template": [{{"role": "user", "text": "Generate rejection response"}}]
}}
}},
{{
"id": "end",
"type": "end",
"title": "End",
"config": {{
"outputs": [{{"variable": "result", "value_selector": ["qualified", "text"]}}]
}}
}}
],
"edges": [
{{"source": "start", "target": "check_experience"}},
{{"source": "check_experience", "sourceHandle": "true", "target": "qualified"}},
{{"source": "check_experience", "sourceHandle": "false", "target": "not_qualified"}},
{{"source": "qualified", "target": "end"}},
{{"source": "not_qualified", "target": "end"}}
]
}}
```
CRITICAL: if-else MUST have exactly two edges with sourceHandle "true" and "false"!
</example>
<example name="parameter_extractor" description="Extract structured data from text">
```json
{{
"nodes": [
{{
"id": "start",
"type": "start",
"title": "Start",
"config": {{
"variables": [{{"variable": "resume", "label": "Resume Text", "type": "paragraph", "required": true}}]
}}
}},
{{
"id": "extract",
"type": "parameter-extractor",
"title": "Extract Info",
"config": {{
"model": {{"provider": "openai", "name": "gpt-4o", "mode": "chat"}},
"query": ["start", "resume"],
"parameters": [
{{"name": "name", "type": "string", "description": "Candidate name", "required": true}},
{{"name": "years", "type": "number", "description": "Years of experience", "required": true}},
{{"name": "skills", "type": "array[string]", "description": "List of skills", "required": true}}
],
"instruction": "Extract candidate information from resume"
}}
}},
{{
"id": "process",
"type": "llm",
"title": "Process Data",
"config": {{
"model": {{"provider": "openai", "name": "gpt-4o", "mode": "chat"}},
"prompt_template": [{{"role": "user", "text": "Name: {{{{#extract.name#}}}}, Years: {{{{#extract.years#}}}}"}}]
}}
}},
{{
"id": "end",
"type": "end",
"title": "End",
"config": {{
"outputs": [{{"variable": "result", "value_selector": ["process", "text"]}}]
}}
}}
],
"edges": [
{{"source": "start", "target": "extract"}},
{{"source": "extract", "target": "process"}},
{{"source": "process", "target": "end"}}
]
}}
```
</example>
</examples>
<edge_checklist>
Before finalizing, verify:
1. [ ] Every node (except 'end') has at least one outgoing edge
2. [ ] 'start' node has exactly one outgoing edge
3. [ ] 'question-classifier' has one edge per class, each with sourceHandle = class id
4. [ ] 'if-else' has exactly two edges: sourceHandle "true" and sourceHandle "false"
5. [ ] All branches eventually connect to 'end' (directly or through other nodes)
6. [ ] No orphan nodes exist (every node is reachable from 'start')
</edge_checklist>
"""
BUILDER_USER_PROMPT = """<instruction>
{instruction}
</instruction>
Generate the full workflow configuration now. Pay special attention to:
1. Creating edges for ALL branches of question-classifier and if-else nodes
2. Using correct sourceHandle values for branching nodes
3. Ensuring every node is connected in the graph
"""
def format_existing_nodes(nodes: list[dict] | None) -> str:
"""Format existing workflow nodes for context."""
if not nodes:
return "No existing nodes in workflow (creating from scratch)."
lines = []
for node in nodes:
node_id = node.get("id", "unknown")
node_type = node.get("type", "unknown")
title = node.get("title", "Untitled")
lines.append(f"- [{node_id}] {title} ({node_type})")
return "\n".join(lines)
def format_selected_nodes(
selected_ids: list[str] | None,
existing_nodes: list[dict] | None,
) -> str:
"""Format selected nodes for modification context."""
if not selected_ids:
return "No nodes selected (generating new workflow)."
node_map = {n.get("id"): n for n in (existing_nodes or [])}
lines = []
for node_id in selected_ids:
if node_id in node_map:
node = node_map[node_id]
lines.append(f"- [{node_id}] {node.get('title', 'Untitled')} ({node.get('type', 'unknown')})")
else:
lines.append(f"- [{node_id}] (not found in current workflow)")
return "\n".join(lines)
def format_existing_edges(edges: list[dict] | None) -> str:
"""Format existing workflow edges to show connections."""
if not edges:
return "No existing edges (creating new workflow)."
lines = []
for edge in edges:
source = edge.get("source", "unknown")
target = edge.get("target", "unknown")
source_handle = edge.get("sourceHandle", "")
if source_handle:
lines.append(f"- {source} ({source_handle}) -> {target}")
else:
lines.append(f"- {source} -> {target}")
return "\n".join(lines)

View File

@@ -0,0 +1,75 @@
PLANNER_SYSTEM_PROMPT = """<role>
You are an expert Workflow Architect.
Your job is to analyze user requests and plan a high-level automation workflow.
</role>
<task>
1. **Classify Intent**:
- Is the user asking to create an automation/workflow? -> Intent: "generate"
- Is it general chat/weather/jokes? -> Intent: "off_topic"
2. **Plan Steps** (if intent is "generate"):
- Break down the user's goal into logical steps.
- For each step, identify if a specific capability/tool is needed.
- Select the MOST RELEVANT tools from the available_tools list.
- DO NOT configure parameters yet. Just identify the tool.
3. **Output Format**:
Return a JSON object.
</task>
<available_tools>
{tools_summary}
</available_tools>
<response_format>
If intent is "generate":
```json
{{
"intent": "generate",
"plan_thought": "Brief explanation of the plan...",
"steps": [
{{ "step": 1, "description": "Fetch data from URL", "tool": "http-request" }},
{{ "step": 2, "description": "Summarize content", "tool": "llm" }},
{{ "step": 3, "description": "Search for info", "tool": "google_search" }}
],
"required_tool_keys": ["google_search"]
}}
```
(Note: 'http-request', 'llm', 'code' are built-in, you don't need to list them in required_tool_keys,
only external tools)
If intent is "off_topic":
```json
{{
"intent": "off_topic",
"message": "I can only help you build workflows. Try asking me to 'Create a workflow that...'",
"suggestions": ["Scrape a website", "Summarize a PDF"]
}}
```
</response_format>
"""
PLANNER_USER_PROMPT = """<user_request>
{instruction}
</user_request>
"""
def format_tools_for_planner(tools: list[dict]) -> str:
"""Format tools list for planner (Lightweight: Name + Description only)."""
if not tools:
return "No external tools available."
lines = []
for t in tools:
key = t.get("tool_key") or t.get("tool_name")
provider = t.get("provider_id") or t.get("provider", "")
desc = t.get("tool_description") or t.get("description", "")
label = t.get("tool_label") or key
# Format: - [provider/key] Label: Description
full_key = f"{provider}/{key}" if provider else key
lines.append(f"- [{full_key}] {label}: {desc}")
return "\n".join(lines)

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,349 @@
import json
import logging
import re
from collections.abc import Sequence
import json_repair
from core.model_manager import ModelManager
from core.model_runtime.entities.message_entities import SystemPromptMessage, UserPromptMessage
from core.model_runtime.entities.model_entities import ModelType
from core.workflow.generator.prompts.builder_prompts import (
BUILDER_SYSTEM_PROMPT,
BUILDER_SYSTEM_PROMPT_V2,
BUILDER_USER_PROMPT,
BUILDER_USER_PROMPT_V2,
format_existing_edges,
format_existing_nodes,
format_selected_nodes,
)
from core.workflow.generator.prompts.planner_prompts import (
PLANNER_SYSTEM_PROMPT,
PLANNER_USER_PROMPT,
format_tools_for_planner,
)
from core.workflow.generator.prompts.vibe_prompts import (
format_available_models,
format_available_nodes,
format_available_tools,
parse_vibe_response,
)
from core.workflow.generator.utils.graph_builder import CyclicDependencyError, GraphBuilder
from core.workflow.generator.utils.mermaid_generator import generate_mermaid
from core.workflow.generator.utils.workflow_validator import ValidationHint, WorkflowValidator
logger = logging.getLogger(__name__)
class WorkflowGenerator:
"""
Refactored Vibe Workflow Generator (Planner-Builder Architecture).
Extracts Vibe logic from the monolithic LLMGenerator.
"""
@classmethod
def generate_workflow_flowchart(
cls,
tenant_id: str,
instruction: str,
model_config: dict,
available_nodes: Sequence[dict[str, object]] | None = None,
existing_nodes: Sequence[dict[str, object]] | None = None,
existing_edges: Sequence[dict[str, object]] | None = None,
available_tools: Sequence[dict[str, object]] | None = None,
selected_node_ids: Sequence[str] | None = None,
previous_workflow: dict[str, object] | None = None,
regenerate_mode: bool = False,
preferred_language: str | None = None,
available_models: Sequence[dict[str, object]] | None = None,
use_graph_builder: bool = False,
):
"""
Generates a Dify Workflow Flowchart from natural language instruction.
Pipeline:
1. Planner: Analyze intent & select tools.
2. Context Filter: Filter relevant tools (reduce tokens).
3. Builder: Generate node configurations.
4. Repair: Fix common node/edge issues (NodeRepair, EdgeRepair).
5. Validator: Check for errors & generate friendly hints.
6. Renderer: Deterministic Mermaid generation.
"""
model_manager = ModelManager()
model_instance = model_manager.get_model_instance(
tenant_id=tenant_id,
model_type=ModelType.LLM,
provider=model_config.get("provider", ""),
model=model_config.get("name", ""),
)
model_parameters = model_config.get("completion_params", {})
available_tools_list = list(available_tools) if available_tools else []
# Check if this is modification mode (user is refining existing workflow)
has_existing_nodes = existing_nodes and len(list(existing_nodes)) > 0
# --- STEP 1: PLANNER (Skip in modification mode) ---
if has_existing_nodes:
# In modification mode, skip Planner:
# - User intent is clear: modify the existing workflow
# - Tools are already in use (from existing nodes)
# - No need for intent classification or tool selection
plan_data = {"intent": "generate", "steps": [], "required_tool_keys": []}
filtered_tools = available_tools_list # Use all available tools
else:
# In creation mode, run Planner to validate intent and select tools
planner_tools_context = format_tools_for_planner(available_tools_list)
planner_system = PLANNER_SYSTEM_PROMPT.format(tools_summary=planner_tools_context)
planner_user = PLANNER_USER_PROMPT.format(instruction=instruction)
try:
response = model_instance.invoke_llm(
prompt_messages=[
SystemPromptMessage(content=planner_system),
UserPromptMessage(content=planner_user),
],
model_parameters=model_parameters,
stream=False,
)
plan_content = response.message.content
# Reuse parse_vibe_response logic or simple load
plan_data = parse_vibe_response(plan_content)
except Exception as e:
logger.exception("Planner failed")
return {"intent": "error", "error": f"Planning failed: {str(e)}"}
if plan_data.get("intent") == "off_topic":
return {
"intent": "off_topic",
"message": plan_data.get("message", "I can only help with workflow creation."),
"suggestions": plan_data.get("suggestions", []),
}
# --- STEP 2: CONTEXT FILTERING ---
required_tools = plan_data.get("required_tool_keys", [])
filtered_tools = []
if required_tools:
# Simple linear search (optimized version would use a map)
for tool in available_tools_list:
t_key = tool.get("tool_key") or tool.get("tool_name")
provider = tool.get("provider_id") or tool.get("provider")
full_key = f"{provider}/{t_key}" if provider else t_key
# Check if this tool is in required list (match either full key or short name)
if t_key in required_tools or full_key in required_tools:
filtered_tools.append(tool)
else:
# If logic only, no tools needed
filtered_tools = []
# --- STEP 3: BUILDER (with retry loop) ---
MAX_GLOBAL_RETRIES = 2 # Total attempts: 1 initial + 1 retry
workflow_data = None
mermaid_code = None
all_warnings = []
all_fixes = []
retry_count = 0
validation_hints = []
for attempt in range(MAX_GLOBAL_RETRIES):
retry_count = attempt
logger.info("Generation attempt %s/%s", attempt + 1, MAX_GLOBAL_RETRIES)
# Prepare context
tool_schemas = format_available_tools(filtered_tools)
node_specs = format_available_nodes(list(available_nodes) if available_nodes else [])
existing_nodes_context = format_existing_nodes(list(existing_nodes) if existing_nodes else None)
existing_edges_context = format_existing_edges(list(existing_edges) if existing_edges else None)
selected_nodes_context = format_selected_nodes(
list(selected_node_ids) if selected_node_ids else None, list(existing_nodes) if existing_nodes else None
)
# Build retry context
retry_context = ""
# NOTE: Manual regeneration/refinement mode removed
# Only handle automatic retry (validation errors)
# For automatic retry (validation errors)
if attempt > 0 and validation_hints:
severe_issues = [h for h in validation_hints if h.severity == "error"]
if severe_issues:
retry_context = "\n<validation_feedback>\n"
retry_context += "The previous generation had validation errors:\n"
for idx, hint in enumerate(severe_issues[:5], 1):
retry_context += f"{idx}. {hint.message}\n"
retry_context += "\nPlease fix these specific issues while keeping everything else UNCHANGED.\n"
retry_context += "</validation_feedback>\n"
# Select prompt version based on use_graph_builder flag
if use_graph_builder:
builder_system = BUILDER_SYSTEM_PROMPT_V2.format(
plan_context=json.dumps(plan_data.get("steps", []), indent=2),
tool_schemas=tool_schemas,
builtin_node_specs=node_specs,
available_models=format_available_models(list(available_models or [])),
preferred_language=preferred_language or "English",
existing_nodes_context=existing_nodes_context,
selected_nodes_context=selected_nodes_context,
)
builder_user = BUILDER_USER_PROMPT_V2.format(instruction=instruction) + retry_context
else:
builder_system = BUILDER_SYSTEM_PROMPT.format(
plan_context=json.dumps(plan_data.get("steps", []), indent=2),
tool_schemas=tool_schemas,
builtin_node_specs=node_specs,
available_models=format_available_models(list(available_models or [])),
preferred_language=preferred_language or "English",
existing_nodes_context=existing_nodes_context,
existing_edges_context=existing_edges_context,
selected_nodes_context=selected_nodes_context,
)
builder_user = BUILDER_USER_PROMPT.format(instruction=instruction) + retry_context
try:
build_res = model_instance.invoke_llm(
prompt_messages=[
SystemPromptMessage(content=builder_system),
UserPromptMessage(content=builder_user),
],
model_parameters=model_parameters,
stream=False,
)
# Builder output is raw JSON nodes/edges
build_content = build_res.message.content
match = re.search(r"```(?:json)?\s*([\s\S]+?)```", build_content)
if match:
build_content = match.group(1)
workflow_data = json_repair.loads(build_content)
if "nodes" not in workflow_data:
workflow_data["nodes"] = []
# --- GraphBuilder Mode: Build graph from depends_on ---
if use_graph_builder:
try:
# Extract nodes from LLM output (without start/end)
llm_nodes = workflow_data.get("nodes", [])
# Build complete graph with start/end and edges
complete_nodes, edges = GraphBuilder.build_graph(llm_nodes)
workflow_data["nodes"] = complete_nodes
workflow_data["edges"] = edges
logger.info(
"GraphBuilder: built %d nodes, %d edges from %d LLM nodes",
len(complete_nodes),
len(edges),
len(llm_nodes),
)
except CyclicDependencyError as e:
logger.warning("GraphBuilder: cyclic dependency detected: %s", e)
# Add to validation hints for retry
validation_hints.append(
ValidationHint(
node_id="",
field="depends_on",
message=f"Cyclic dependency detected: {e}. Please fix the dependency chain.",
severity="error",
)
)
if attempt == MAX_GLOBAL_RETRIES - 1:
return {
"intent": "error",
"error": "Failed to build workflow: cyclic dependency detected.",
}
continue # Retry with error feedback
except Exception as e:
logger.exception("GraphBuilder failed on attempt %d", attempt + 1)
if attempt == MAX_GLOBAL_RETRIES - 1:
return {"intent": "error", "error": f"Graph building failed: {str(e)}"}
continue
else:
# Legacy mode: edges from LLM output
if "edges" not in workflow_data:
workflow_data["edges"] = []
except Exception as e:
logger.exception("Builder failed on attempt %d", attempt + 1)
if attempt == MAX_GLOBAL_RETRIES - 1:
return {"intent": "error", "error": f"Building failed: {str(e)}"}
continue # Try again
# NOTE: NodeRepair and EdgeRepair have been removed.
# Validation will detect structural issues, and LLM will fix them on retry.
# This is more accurate because LLM understands the workflow context.
# --- STEP 4: RENDERER (Generate Mermaid early for validation) ---
mermaid_code = generate_mermaid(workflow_data)
# --- STEP 5: VALIDATOR ---
is_valid, validation_hints = WorkflowValidator.validate(workflow_data, available_tools_list)
# --- STEP 6: GRAPH VALIDATION (structural checks using graph algorithms) ---
if attempt < MAX_GLOBAL_RETRIES - 1:
try:
from core.workflow.generator.utils.graph_validator import GraphValidator
graph_result = GraphValidator.validate(workflow_data)
if not graph_result.success:
# Convert graph errors to validation hints
for graph_error in graph_result.errors:
validation_hints.append(
ValidationHint(
node_id=graph_error.node_id,
field="edges",
message=f"[Graph] {graph_error.message}",
severity="error",
)
)
# Also add warnings (dead ends) as hints
for graph_warning in graph_result.warnings:
validation_hints.append(
ValidationHint(
node_id=graph_warning.node_id,
field="edges",
message=f"[Graph] {graph_warning.message}",
severity="warning",
)
)
except Exception as e:
logger.warning("Graph validation error: %s", e)
# Collect all validation warnings
all_warnings = [h.message for h in validation_hints]
# Check if we should retry
severe_issues = [h for h in validation_hints if h.severity == "error"]
if not severe_issues or attempt == MAX_GLOBAL_RETRIES - 1:
break
# Has severe errors and retries remaining - continue to next attempt
# Collect all validation warnings
all_warnings = [h.message for h in validation_hints]
# Add stability warning (as requested by user)
stability_warning = "The generated workflow may require debugging."
if preferred_language and preferred_language.startswith("zh"):
stability_warning = "生成的 Workflow 可能需要调试。"
all_warnings.append(stability_warning)
return {
"intent": "generate",
"flowchart": mermaid_code,
"nodes": workflow_data["nodes"],
"edges": workflow_data["edges"],
"message": plan_data.get("plan_thought", "Generated workflow based on your request."),
"warnings": all_warnings,
"tool_recommendations": [], # Legacy field
"error": "",
"fixed_issues": all_fixes, # Track what was auto-fixed
"retry_count": retry_count, # Track how many retries were needed
}

View File

@@ -0,0 +1,217 @@
"""
Type definitions for Vibe Workflow Generator.
This module provides:
- TypedDict classes for lightweight type hints (no runtime overhead)
- Pydantic models for runtime validation where needed
Usage:
# For type hints only (no runtime validation):
from core.workflow.generator.types import WorkflowNodeDict, WorkflowEdgeDict
# For runtime validation:
from core.workflow.generator.types import WorkflowNode, WorkflowEdge
"""
from typing import Any, TypedDict
from pydantic import BaseModel, Field
# ============================================================
# TypedDict definitions (lightweight, for type hints only)
# ============================================================
class WorkflowNodeDict(TypedDict, total=False):
"""
Workflow node structure (TypedDict for hints).
Attributes:
id: Unique node identifier
type: Node type (e.g., "start", "end", "llm", "if-else", "http-request")
title: Human-readable node title
config: Node-specific configuration
data: Additional node data
"""
id: str
type: str
title: str
config: dict[str, Any]
data: dict[str, Any]
class WorkflowEdgeDict(TypedDict, total=False):
"""
Workflow edge structure (TypedDict for hints).
Attributes:
source: Source node ID
target: Target node ID
sourceHandle: Branch handle for if-else/question-classifier nodes
"""
source: str
target: str
sourceHandle: str
class AvailableModelDict(TypedDict):
"""
Available model structure.
Attributes:
provider: Model provider (e.g., "openai", "anthropic")
model: Model name (e.g., "gpt-4", "claude-3")
"""
provider: str
model: str
class ToolParameterDict(TypedDict, total=False):
"""
Tool parameter structure.
Attributes:
name: Parameter name
type: Parameter type (e.g., "string", "number", "boolean")
required: Whether parameter is required
human_description: Human-readable description
llm_description: LLM-oriented description
options: Available options for enum-type parameters
"""
name: str
type: str
required: bool
human_description: str | dict[str, str]
llm_description: str
options: list[Any]
class AvailableToolDict(TypedDict, total=False):
"""
Available tool structure.
Attributes:
provider_id: Tool provider ID
provider: Tool provider name (alternative to provider_id)
tool_key: Unique tool key
tool_name: Tool name (alternative to tool_key)
tool_description: Tool description
description: Alternative description field
is_team_authorization: Whether tool is configured/authorized
parameters: List of tool parameters
"""
provider_id: str
provider: str
tool_key: str
tool_name: str
tool_description: str
description: str
is_team_authorization: bool
parameters: list[ToolParameterDict]
class WorkflowDataDict(TypedDict, total=False):
"""
Complete workflow data structure.
Attributes:
nodes: List of workflow nodes
edges: List of workflow edges
warnings: List of warning messages
"""
nodes: list[WorkflowNodeDict]
edges: list[WorkflowEdgeDict]
warnings: list[str]
# ============================================================
# Pydantic models (for runtime validation)
# ============================================================
class WorkflowNode(BaseModel):
"""
Workflow node with runtime validation.
Use this model when you need to validate node data at runtime.
For lightweight type hints without validation, use WorkflowNodeDict.
"""
id: str
type: str
title: str = ""
config: dict[str, Any] = Field(default_factory=dict)
data: dict[str, Any] = Field(default_factory=dict)
class WorkflowEdge(BaseModel):
"""
Workflow edge with runtime validation.
Use this model when you need to validate edge data at runtime.
For lightweight type hints without validation, use WorkflowEdgeDict.
"""
source: str
target: str
sourceHandle: str | None = None
class AvailableModel(BaseModel):
"""
Available model with runtime validation.
Use this model when you need to validate model data at runtime.
For lightweight type hints without validation, use AvailableModelDict.
"""
provider: str
model: str
class ToolParameter(BaseModel):
"""Tool parameter with runtime validation."""
name: str = ""
type: str = "string"
required: bool = False
human_description: str | dict[str, str] = ""
llm_description: str = ""
options: list[Any] = Field(default_factory=list)
class AvailableTool(BaseModel):
"""
Available tool with runtime validation.
Use this model when you need to validate tool data at runtime.
For lightweight type hints without validation, use AvailableToolDict.
"""
provider_id: str = ""
provider: str = ""
tool_key: str = ""
tool_name: str = ""
tool_description: str = ""
description: str = ""
is_team_authorization: bool = False
parameters: list[ToolParameter] = Field(default_factory=list)
class WorkflowData(BaseModel):
"""
Complete workflow data with runtime validation.
Use this model when you need to validate workflow data at runtime.
For lightweight type hints without validation, use WorkflowDataDict.
"""
nodes: list[WorkflowNode] = Field(default_factory=list)
edges: list[WorkflowEdge] = Field(default_factory=list)
warnings: list[str] = Field(default_factory=list)

View File

@@ -0,0 +1,384 @@
"""
Edge Repair Utility for Vibe Workflow Generation.
This module provides intelligent edge repair capabilities for generated workflows.
It can detect and fix common edge issues:
- Missing edges between sequential nodes
- Incomplete branches for question-classifier and if-else nodes
- Orphaned nodes without connections
The repair logic is deterministic and doesn't require LLM calls.
"""
import logging
from dataclasses import dataclass, field
from core.workflow.generator.types import WorkflowDataDict, WorkflowEdgeDict, WorkflowNodeDict
logger = logging.getLogger(__name__)
@dataclass
class RepairResult:
"""Result of edge repair operation."""
nodes: list[WorkflowNodeDict]
edges: list[WorkflowEdgeDict]
repairs_made: list[str] = field(default_factory=list)
warnings: list[str] = field(default_factory=list)
@property
def was_repaired(self) -> bool:
"""Check if any repairs were made."""
return len(self.repairs_made) > 0
class EdgeRepair:
"""
Intelligent edge repair for workflow graphs.
Repairs are applied in order:
1. Infer linear connections from node order (if no edges exist)
2. Add missing branch edges for question-classifier
3. Add missing branch edges for if-else
4. Connect orphaned nodes
"""
@classmethod
def repair(cls, workflow_data: WorkflowDataDict) -> RepairResult:
"""
Repair edges in the workflow data.
Args:
workflow_data: Dict containing 'nodes' and 'edges'
Returns:
RepairResult with repaired nodes, edges, and repair logs
"""
nodes = list(workflow_data.get("nodes", []))
edges = list(workflow_data.get("edges", []))
repairs: list[str] = []
warnings: list[str] = []
logger.info("[EDGE REPAIR] Starting repair process for %s nodes, %s edges", len(nodes), len(edges))
# Build node lookup
# Build node lookup
node_map = {n.get("id"): n for n in nodes if n.get("id")}
node_ids = set(node_map.keys())
# 1. If no edges at all, infer linear chain
if not edges and len(nodes) > 1:
edges, inferred_repairs = cls._infer_linear_chain(nodes)
repairs.extend(inferred_repairs)
# 2. Build edge index for analysis
outgoing_edges: dict[str, list[WorkflowEdgeDict]] = {}
incoming_edges: dict[str, list[WorkflowEdgeDict]] = {}
for edge in edges:
src = edge.get("source")
tgt = edge.get("target")
if src:
outgoing_edges.setdefault(src, []).append(edge)
if tgt:
incoming_edges.setdefault(tgt, []).append(edge)
# 3. Repair question-classifier branches
for node in nodes:
if node.get("type") == "question-classifier":
new_edges, branch_repairs, branch_warnings = cls._repair_classifier_branches(
node, edges, outgoing_edges, node_ids
)
edges.extend(new_edges)
repairs.extend(branch_repairs)
warnings.extend(branch_warnings)
# Update outgoing index
for edge in new_edges:
outgoing_edges.setdefault(edge.get("source"), []).append(edge)
# 4. Repair if-else branches
for node in nodes:
if node.get("type") == "if-else":
new_edges, branch_repairs, branch_warnings = cls._repair_if_else_branches(
node, edges, outgoing_edges, node_ids
)
edges.extend(new_edges)
repairs.extend(branch_repairs)
warnings.extend(branch_warnings)
# Update outgoing index
for edge in new_edges:
outgoing_edges.setdefault(edge.get("source"), []).append(edge)
# 5. Connect orphaned nodes (nodes with no incoming edge, except start)
new_edges, orphan_repairs = cls._connect_orphaned_nodes(nodes, edges, outgoing_edges, incoming_edges)
edges.extend(new_edges)
repairs.extend(orphan_repairs)
# 6. Connect nodes with no outgoing edge to 'end' (except end nodes)
new_edges, terminal_repairs = cls._connect_terminal_nodes(nodes, edges, outgoing_edges)
edges.extend(new_edges)
repairs.extend(terminal_repairs)
if repairs:
logger.info("[EDGE REPAIR] Completed with %s repairs:", len(repairs))
for i, repair in enumerate(repairs, 1):
logger.info("[EDGE REPAIR] %s. %s", i, repair)
else:
logger.info("[EDGE REPAIR] Completed - no repairs needed")
return RepairResult(
nodes=nodes,
edges=edges,
repairs_made=repairs,
warnings=warnings,
)
@classmethod
def _infer_linear_chain(cls, nodes: list[WorkflowNodeDict]) -> tuple[list[WorkflowEdgeDict], list[str]]:
"""
Infer a linear chain of edges from node order.
This is used when no edges are provided at all.
"""
edges: list[WorkflowEdgeDict] = []
repairs: list[str] = []
# Filter to get ordered node IDs
node_ids = [n.get("id") for n in nodes if n.get("id")]
if len(node_ids) < 2:
return edges, repairs
# Create edges between consecutive nodes
for i in range(len(node_ids) - 1):
src = node_ids[i]
tgt = node_ids[i + 1]
edges.append({"source": src, "target": tgt})
repairs.append(f"Inferred edge: {src} -> {tgt}")
return edges, repairs
@classmethod
def _repair_classifier_branches(
cls,
node: WorkflowNodeDict,
edges: list[WorkflowEdgeDict],
outgoing_edges: dict[str, list[WorkflowEdgeDict]],
valid_node_ids: set[str],
) -> tuple[list[WorkflowEdgeDict], list[str], list[str]]:
"""
Repair missing branches for question-classifier nodes.
For each class that doesn't have an edge, create one pointing to 'end'.
"""
new_edges: list[WorkflowEdgeDict] = []
repairs: list[str] = []
warnings: list[str] = []
node_id = node.get("id")
if not node_id:
return new_edges, repairs, warnings
config = node.get("config", {})
classes = config.get("classes", [])
if not classes:
return new_edges, repairs, warnings
# Get existing sourceHandles for this node
existing_handles = set()
for edge in outgoing_edges.get(node_id, []):
handle = edge.get("sourceHandle")
if handle:
existing_handles.add(handle)
# Find 'end' node as default target
end_node_id = "end"
if "end" not in valid_node_ids:
# Try to find an end node
for nid in valid_node_ids:
if "end" in nid.lower():
end_node_id = nid
break
# Add missing branches
for cls_def in classes:
if not isinstance(cls_def, dict):
continue
cls_id = cls_def.get("id")
cls_name = cls_def.get("name", cls_id)
if cls_id and cls_id not in existing_handles:
new_edge = {
"source": node_id,
"sourceHandle": cls_id,
"target": end_node_id,
}
new_edges.append(new_edge)
repairs.append(f"Added missing branch edge for class '{cls_name}' -> {end_node_id}")
warnings.append(
f"Auto-connected question-classifier branch '{cls_name}' to '{end_node_id}'. "
"You may want to redirect this to a specific handler node."
)
return new_edges, repairs, warnings
@classmethod
def _repair_if_else_branches(
cls,
node: WorkflowNodeDict,
edges: list[WorkflowEdgeDict],
outgoing_edges: dict[str, list[WorkflowEdgeDict]],
valid_node_ids: set[str],
) -> tuple[list[WorkflowEdgeDict], list[str], list[str]]:
"""
Repair missing branches for if-else nodes.
If-else in Dify uses case_id as sourceHandle for each condition,
plus 'false' for the else branch.
"""
new_edges: list[WorkflowEdgeDict] = []
repairs: list[str] = []
warnings: list[str] = []
node_id = node.get("id")
if not node_id:
return new_edges, repairs, warnings
# Get existing sourceHandles
existing_handles = set()
for edge in outgoing_edges.get(node_id, []):
handle = edge.get("sourceHandle")
if handle:
existing_handles.add(handle)
# Find 'end' node as default target
end_node_id = "end"
if "end" not in valid_node_ids:
for nid in valid_node_ids:
if "end" in nid.lower():
end_node_id = nid
break
# Get required branches from config
config = node.get("config", {})
cases = config.get("cases", [])
# Build required handles: each case_id + 'false' for else
required_branches = set()
for case in cases:
case_id = case.get("case_id")
if case_id:
required_branches.add(case_id)
required_branches.add("false") # else branch
# Add missing branches
for branch in required_branches:
if branch not in existing_handles:
new_edge = {
"source": node_id,
"sourceHandle": branch,
"target": end_node_id,
}
new_edges.append(new_edge)
repairs.append(f"Added missing if-else branch '{branch}' -> {end_node_id}")
warnings.append(
f"Auto-connected if-else branch '{branch}' to '{end_node_id}'. "
"You may want to redirect this to a specific handler node."
)
return new_edges, repairs, warnings
@classmethod
def _connect_orphaned_nodes(
cls,
nodes: list[WorkflowNodeDict],
edges: list[WorkflowEdgeDict],
outgoing_edges: dict[str, list[WorkflowEdgeDict]],
incoming_edges: dict[str, list[WorkflowEdgeDict]],
) -> tuple[list[WorkflowEdgeDict], list[str]]:
"""
Connect orphaned nodes to the previous node in sequence.
An orphaned node has no incoming edges and is not a 'start' node.
"""
new_edges: list[WorkflowEdgeDict] = []
repairs: list[str] = []
node_ids = [n.get("id") for n in nodes if n.get("id")]
node_types = {n.get("id"): n.get("type") for n in nodes}
for i, node_id in enumerate(node_ids):
node_type = node_types.get(node_id)
# Skip start nodes - they don't need incoming edges
if node_type == "start":
continue
# Check if node has incoming edges
if node_id not in incoming_edges or not incoming_edges[node_id]:
# Find previous node to connect from
if i > 0:
prev_node_id = node_ids[i - 1]
new_edge = {"source": prev_node_id, "target": node_id}
new_edges.append(new_edge)
repairs.append(f"Connected orphaned node: {prev_node_id} -> {node_id}")
# Update incoming_edges for subsequent checks
incoming_edges.setdefault(node_id, []).append(new_edge)
return new_edges, repairs
@classmethod
def _connect_terminal_nodes(
cls,
nodes: list[WorkflowNodeDict],
edges: list[WorkflowEdgeDict],
outgoing_edges: dict[str, list[WorkflowEdgeDict]],
) -> tuple[list[WorkflowEdgeDict], list[str]]:
"""
Connect terminal nodes (no outgoing edges) to 'end'.
A terminal node has no outgoing edges and is not an 'end' node.
This ensures all branches eventually reach 'end'.
"""
new_edges: list[WorkflowEdgeDict] = []
repairs: list[str] = []
# Find end node
end_node_id = None
node_ids = set()
for n in nodes:
nid = n.get("id")
ntype = n.get("type")
if nid:
node_ids.add(nid)
if ntype == "end":
end_node_id = nid
if not end_node_id:
# No end node found, can't connect
return new_edges, repairs
for node in nodes:
node_id = node.get("id")
node_type = node.get("type")
# Skip end nodes
if node_type == "end":
continue
# Skip nodes that already have outgoing edges
if outgoing_edges.get(node_id):
continue
# Connect to end
new_edge = {"source": node_id, "target": end_node_id}
new_edges.append(new_edge)
repairs.append(f"Connected terminal node to end: {node_id} -> {end_node_id}")
# Update for subsequent checks
outgoing_edges.setdefault(node_id, []).append(new_edge)
return new_edges, repairs

View File

@@ -0,0 +1,621 @@
"""
GraphBuilder: Automatic workflow graph construction from node list.
This module implements the core logic for building complete workflow graphs
from LLM-generated node lists with dependency declarations.
Key features:
- Automatic start/end node generation
- Dependency inference from variable references
- Topological sorting with cycle detection
- Special handling for branching nodes (if-else, question-classifier)
- Silent error recovery where possible
"""
import json
import logging
import re
import uuid
from collections import defaultdict
from typing import Any
logger = logging.getLogger(__name__)
# Pattern to match variable references like {{#node_id.field#}}
VAR_PATTERN = re.compile(r"\{\{#([^.#]+)\.[^#]+#\}\}")
# System variable prefixes to exclude from dependency inference
SYSTEM_VAR_PREFIXES = {"sys", "start", "env"}
# Node types that have special branching behavior
BRANCHING_NODE_TYPES = {"if-else", "question-classifier"}
# Container node types (iteration, loop) - these have internal subgraphs
# but behave as single-input-single-output nodes in the external graph
CONTAINER_NODE_TYPES = {"iteration", "loop"}
class GraphBuildError(Exception):
"""Raised when graph cannot be built due to unrecoverable errors."""
pass
class CyclicDependencyError(GraphBuildError):
"""Raised when cyclic dependencies are detected."""
pass
class GraphBuilder:
"""
Builds complete workflow graphs from LLM-generated node lists.
This class handles the conversion from a simplified node list format
(with depends_on declarations) to a full workflow graph with nodes and edges.
The LLM only needs to generate:
- Node configurations with depends_on arrays
- Branch targets in config for branching nodes
The GraphBuilder automatically:
- Adds start and end nodes
- Generates all edges from dependencies
- Infers implicit dependencies from variable references
- Handles branching nodes (if-else, question-classifier)
- Validates graph structure (no cycles, proper connectivity)
"""
@classmethod
def build_graph(
cls,
nodes: list[dict[str, Any]],
start_config: dict[str, Any] | None = None,
end_config: dict[str, Any] | None = None,
) -> tuple[list[dict[str, Any]], list[dict[str, Any]]]:
"""
Build a complete workflow graph from a node list.
Args:
nodes: LLM-generated nodes (without start/end)
start_config: Optional configuration for start node
end_config: Optional configuration for end node
Returns:
Tuple of (complete_nodes, edges) where:
- complete_nodes includes start, user nodes, and end
- edges contains all connections
Raises:
CyclicDependencyError: If cyclic dependencies are detected
GraphBuildError: If graph cannot be built
"""
if not nodes:
# Empty node list - create minimal workflow
start_node = cls._create_start_node([], start_config)
end_node = cls._create_end_node([], end_config)
edge = cls._create_edge("start", "end")
return [start_node, end_node], [edge]
# Build node index for quick lookup
node_map = {node["id"]: node for node in nodes}
# Step 1: Extract explicit dependencies from depends_on
dependencies = cls._extract_explicit_dependencies(nodes)
# Step 2: Infer implicit dependencies from variable references
dependencies = cls._infer_dependencies_from_variables(nodes, dependencies, node_map)
# Step 3: Validate and fix dependencies (remove invalid references)
dependencies = cls._validate_dependencies(dependencies, node_map)
# Step 4: Topological sort (detects cycles)
sorted_node_ids = cls._topological_sort(nodes, dependencies)
# Step 5: Generate start node
start_node = cls._create_start_node(nodes, start_config)
# Step 6: Generate edges
edges = cls._generate_edges(nodes, sorted_node_ids, dependencies, node_map)
# Step 7: Find terminal nodes and generate end node
terminal_nodes = cls._find_terminal_nodes(nodes, dependencies, node_map)
end_node = cls._create_end_node(terminal_nodes, end_config)
# Step 8: Add edges from terminal nodes to end
for terminal_id in terminal_nodes:
edges.append(cls._create_edge(terminal_id, "end"))
# Step 9: Assemble complete node list
all_nodes = [start_node, *nodes, end_node]
return all_nodes, edges
@classmethod
def _extract_explicit_dependencies(
cls,
nodes: list[dict[str, Any]],
) -> dict[str, list[str]]:
"""
Extract explicit dependencies from depends_on field.
Args:
nodes: List of nodes with optional depends_on field
Returns:
Dictionary mapping node_id -> list of dependency node_ids
"""
dependencies: dict[str, list[str]] = {}
for node in nodes:
node_id = node.get("id", "")
depends_on = node.get("depends_on", [])
# Ensure depends_on is a list
if isinstance(depends_on, str):
depends_on = [depends_on] if depends_on else []
elif not isinstance(depends_on, list):
depends_on = []
dependencies[node_id] = list(depends_on)
return dependencies
@classmethod
def _infer_dependencies_from_variables(
cls,
nodes: list[dict[str, Any]],
explicit_deps: dict[str, list[str]],
node_map: dict[str, dict[str, Any]],
) -> dict[str, list[str]]:
"""
Infer implicit dependencies from variable references in config.
Scans node configurations for patterns like {{#node_id.field#}}
and adds those as dependencies if not already declared.
Args:
nodes: List of nodes
explicit_deps: Already extracted explicit dependencies
node_map: Map of node_id -> node for validation
Returns:
Updated dependencies dictionary
"""
for node in nodes:
node_id = node.get("id", "")
config = node.get("config", {})
# Serialize config to search for variable references
try:
config_str = json.dumps(config, ensure_ascii=False)
except (TypeError, ValueError):
continue
# Find all variable references
referenced_nodes = set(VAR_PATTERN.findall(config_str))
# Filter out system variables
referenced_nodes -= SYSTEM_VAR_PREFIXES
# Ensure node_id exists in dependencies
if node_id not in explicit_deps:
explicit_deps[node_id] = []
# Add inferred dependencies
for ref in referenced_nodes:
# Skip self-references (e.g., loop nodes referencing their own outputs)
if ref == node_id:
logger.debug(
"Skipping self-reference: %s -> %s",
node_id,
ref,
)
continue
if ref in node_map and ref not in explicit_deps[node_id]:
explicit_deps[node_id].append(ref)
logger.debug(
"Inferred dependency: %s -> %s (from variable reference)",
node_id,
ref,
)
return explicit_deps
@classmethod
def _validate_dependencies(
cls,
dependencies: dict[str, list[str]],
node_map: dict[str, dict[str, Any]],
) -> dict[str, list[str]]:
"""
Validate dependencies and remove invalid references.
Silent fix: References to non-existent nodes are removed.
Args:
dependencies: Dependencies to validate
node_map: Map of valid node IDs
Returns:
Validated dependencies
"""
valid_deps: dict[str, list[str]] = {}
for node_id, deps in dependencies.items():
valid_deps[node_id] = []
for dep in deps:
if dep in node_map:
valid_deps[node_id].append(dep)
else:
logger.warning(
"Removed invalid dependency: %s -> %s (node does not exist)",
node_id,
dep,
)
return valid_deps
@classmethod
def _topological_sort(
cls,
nodes: list[dict[str, Any]],
dependencies: dict[str, list[str]],
) -> list[str]:
"""
Perform topological sort on nodes based on dependencies.
Uses Kahn's algorithm for cycle detection.
Args:
nodes: List of nodes
dependencies: Dependency graph
Returns:
List of node IDs in topological order
Raises:
CyclicDependencyError: If cyclic dependencies are detected
"""
# Build in-degree map
in_degree: dict[str, int] = defaultdict(int)
reverse_deps: dict[str, list[str]] = defaultdict(list)
node_ids = {node["id"] for node in nodes}
for node_id in node_ids:
in_degree[node_id] = 0
for node_id, deps in dependencies.items():
for dep in deps:
if dep in node_ids:
in_degree[node_id] += 1
reverse_deps[dep].append(node_id)
# Start with nodes that have no dependencies
queue = [nid for nid in node_ids if in_degree[nid] == 0]
sorted_ids: list[str] = []
while queue:
current = queue.pop(0)
sorted_ids.append(current)
for dependent in reverse_deps[current]:
in_degree[dependent] -= 1
if in_degree[dependent] == 0:
queue.append(dependent)
# Check for cycles
if len(sorted_ids) != len(node_ids):
remaining = node_ids - set(sorted_ids)
raise CyclicDependencyError(
f"Cyclic dependency detected involving nodes: {remaining}"
)
return sorted_ids
@classmethod
def _generate_edges(
cls,
nodes: list[dict[str, Any]],
sorted_node_ids: list[str],
dependencies: dict[str, list[str]],
node_map: dict[str, dict[str, Any]],
) -> list[dict[str, Any]]:
"""
Generate all edges based on dependencies and special node handling.
Args:
nodes: List of nodes
sorted_node_ids: Topologically sorted node IDs
dependencies: Dependency graph
node_map: Map of node_id -> node
Returns:
List of edge dictionaries
"""
edges: list[dict[str, Any]] = []
nodes_with_incoming: set[str] = set()
# Track which nodes have outgoing edges from branching
branching_sources: set[str] = set()
# First pass: Handle branching nodes
for node in nodes:
node_id = node.get("id", "")
node_type = node.get("type", "")
if node_type == "if-else":
branch_edges = cls._handle_if_else_node(node)
edges.extend(branch_edges)
branching_sources.add(node_id)
nodes_with_incoming.update(edge["target"] for edge in branch_edges)
elif node_type == "question-classifier":
branch_edges = cls._handle_question_classifier_node(node)
edges.extend(branch_edges)
branching_sources.add(node_id)
nodes_with_incoming.update(edge["target"] for edge in branch_edges)
# Second pass: Generate edges from dependencies
for node_id in sorted_node_ids:
deps = dependencies.get(node_id, [])
if deps:
# Connect from each dependency
for dep_id in deps:
dep_node = node_map.get(dep_id, {})
dep_type = dep_node.get("type", "")
# Skip if dependency is a branching node (edges handled above)
if dep_type in BRANCHING_NODE_TYPES:
continue
edges.append(cls._create_edge(dep_id, node_id))
nodes_with_incoming.add(node_id)
else:
# No dependencies - connect from start
# But skip if this node receives edges from branching nodes
if node_id not in nodes_with_incoming:
edges.append(cls._create_edge("start", node_id))
nodes_with_incoming.add(node_id)
return edges
@classmethod
def _handle_if_else_node(
cls,
node: dict[str, Any],
) -> list[dict[str, Any]]:
"""
Handle if-else node branching.
Expects config to contain true_branch and/or false_branch.
Args:
node: If-else node
Returns:
List of branch edges
"""
edges: list[dict[str, Any]] = []
node_id = node.get("id", "")
config = node.get("config", {})
true_branch = config.get("true_branch")
false_branch = config.get("false_branch")
if true_branch:
edges.append(cls._create_edge(node_id, true_branch, source_handle="true"))
if false_branch:
edges.append(cls._create_edge(node_id, false_branch, source_handle="false"))
# If no branches specified, log warning
if not true_branch and not false_branch:
logger.warning(
"if-else node %s has no branch targets specified",
node_id,
)
return edges
@classmethod
def _handle_question_classifier_node(
cls,
node: dict[str, Any],
) -> list[dict[str, Any]]:
"""
Handle question-classifier node branching.
Expects config.classes to contain class definitions with target fields.
Args:
node: Question-classifier node
Returns:
List of branch edges
"""
edges: list[dict[str, Any]] = []
node_id = node.get("id", "")
config = node.get("config", {})
classes = config.get("classes", [])
if not classes:
logger.warning(
"question-classifier node %s has no classes defined",
node_id,
)
return edges
for cls_def in classes:
class_id = cls_def.get("id", "")
target = cls_def.get("target")
if target:
edges.append(cls._create_edge(node_id, target, source_handle=class_id))
else:
# Silent fix: Connect to end if no target specified
edges.append(cls._create_edge(node_id, "end", source_handle=class_id))
logger.debug(
"question-classifier class %s has no target, connecting to end",
class_id,
)
return edges
@classmethod
def _find_terminal_nodes(
cls,
nodes: list[dict[str, Any]],
dependencies: dict[str, list[str]],
node_map: dict[str, dict[str, Any]],
) -> list[str]:
"""
Find nodes that should connect to the end node.
Terminal nodes are those that:
- Are not dependencies of any other node
- Are not branching nodes (those connect to their branches)
Args:
nodes: List of nodes
dependencies: Dependency graph
node_map: Map of node_id -> node
Returns:
List of terminal node IDs
"""
# Build set of all nodes that are depended upon
depended_upon: set[str] = set()
for deps in dependencies.values():
depended_upon.update(deps)
# Also track nodes that are branch targets
branch_targets: set[str] = set()
branching_nodes: set[str] = set()
for node in nodes:
node_id = node.get("id", "")
node_type = node.get("type", "")
config = node.get("config", {})
if node_type == "if-else":
branching_nodes.add(node_id)
if config.get("true_branch"):
branch_targets.add(config["true_branch"])
if config.get("false_branch"):
branch_targets.add(config["false_branch"])
elif node_type == "question-classifier":
branching_nodes.add(node_id)
for cls_def in config.get("classes", []):
if cls_def.get("target"):
branch_targets.add(cls_def["target"])
# Find terminal nodes
terminal_nodes: list[str] = []
for node in nodes:
node_id = node.get("id", "")
node_type = node.get("type", "")
# Skip branching nodes - they don't connect to end directly
if node_type in BRANCHING_NODE_TYPES:
continue
# Terminal if not depended upon and not a branch target that leads elsewhere
if node_id not in depended_upon:
terminal_nodes.append(node_id)
# If no terminal nodes found (shouldn't happen), use all non-branching nodes
if not terminal_nodes:
terminal_nodes = [
node["id"]
for node in nodes
if node.get("type") not in BRANCHING_NODE_TYPES
]
logger.warning("No terminal nodes found, using all non-branching nodes")
return terminal_nodes
@classmethod
def _create_start_node(
cls,
nodes: list[dict[str, Any]],
config: dict[str, Any] | None = None,
) -> dict[str, Any]:
"""
Create a start node.
Args:
nodes: User nodes (for potential config inference)
config: Optional start node configuration
Returns:
Start node dictionary
"""
return {
"id": "start",
"type": "start",
"title": "Start",
"config": config or {},
"data": {},
}
@classmethod
def _create_end_node(
cls,
terminal_nodes: list[str],
config: dict[str, Any] | None = None,
) -> dict[str, Any]:
"""
Create an end node.
Args:
terminal_nodes: Nodes that will connect to end
config: Optional end node configuration
Returns:
End node dictionary
"""
return {
"id": "end",
"type": "end",
"title": "End",
"config": config or {},
"data": {},
}
@classmethod
def _create_edge(
cls,
source: str,
target: str,
source_handle: str | None = None,
) -> dict[str, Any]:
"""
Create an edge dictionary.
Args:
source: Source node ID
target: Target node ID
source_handle: Optional handle for branching (e.g., "true", "false", class_id)
Returns:
Edge dictionary
"""
edge: dict[str, Any] = {
"id": f"{source}-{target}-{uuid.uuid4().hex[:8]}",
"source": source,
"target": target,
}
if source_handle:
edge["sourceHandle"] = source_handle
else:
edge["sourceHandle"] = "source"
edge["targetHandle"] = "target"
return edge

View File

@@ -0,0 +1,280 @@
"""
Graph Validator for Workflow Generation
Validates workflow graph structure using graph algorithms:
- Reachability from start node (BFS)
- Reachability to end node (reverse BFS)
- Branch edge validation for if-else and classifier nodes
"""
import time
from collections import deque
from dataclasses import dataclass, field
@dataclass
class GraphError:
"""Represents a structural error in the workflow graph."""
node_id: str
node_type: str
error_type: str # "unreachable", "dead_end", "cycle", "missing_start", "missing_end"
message: str
@dataclass
class GraphValidationResult:
"""Result of graph validation."""
success: bool
errors: list[GraphError] = field(default_factory=list)
warnings: list[GraphError] = field(default_factory=list)
execution_time: float = 0.0
stats: dict = field(default_factory=dict)
class GraphValidator:
"""
Validates workflow graph structure using proper graph algorithms.
Performs:
1. Forward reachability analysis (BFS from start)
2. Backward reachability analysis (reverse BFS from end)
3. Branch edge validation for if-else and classifier nodes
"""
@staticmethod
def _build_adjacency(
nodes: dict[str, dict], edges: list[dict]
) -> tuple[dict[str, list[str]], dict[str, list[str]]]:
"""Build forward and reverse adjacency lists from edges."""
outgoing: dict[str, list[str]] = {node_id: [] for node_id in nodes}
incoming: dict[str, list[str]] = {node_id: [] for node_id in nodes}
for edge in edges:
source = edge.get("source")
target = edge.get("target")
if source in outgoing and target in incoming:
outgoing[source].append(target)
incoming[target].append(source)
return outgoing, incoming
@staticmethod
def _bfs_reachable(start: str, adjacency: dict[str, list[str]]) -> set[str]:
"""BFS to find all nodes reachable from start node."""
if start not in adjacency:
return set()
visited = set()
queue = deque([start])
visited.add(start)
while queue:
current = queue.popleft()
for neighbor in adjacency.get(current, []):
if neighbor not in visited:
visited.add(neighbor)
queue.append(neighbor)
return visited
@staticmethod
def validate(workflow_data: dict) -> GraphValidationResult:
"""Validate workflow graph structure."""
start_time = time.time()
errors: list[GraphError] = []
warnings: list[GraphError] = []
nodes_list = workflow_data.get("nodes", [])
edges_list = workflow_data.get("edges", [])
nodes = {n["id"]: n for n in nodes_list if n.get("id")}
# Find start and end nodes
start_node_id = None
end_node_ids = []
for node_id, node in nodes.items():
node_type = node.get("type")
if node_type == "start":
start_node_id = node_id
elif node_type == "end":
end_node_ids.append(node_id)
# Check start node exists
if not start_node_id:
errors.append(
GraphError(
node_id="workflow",
node_type="workflow",
error_type="missing_start",
message="Workflow has no start node",
)
)
# Check end node exists
if not end_node_ids:
errors.append(
GraphError(
node_id="workflow",
node_type="workflow",
error_type="missing_end",
message="Workflow has no end node",
)
)
# If missing start or end, can't do reachability analysis
if not start_node_id or not end_node_ids:
execution_time = time.time() - start_time
return GraphValidationResult(
success=False,
errors=errors,
warnings=warnings,
execution_time=execution_time,
stats={"nodes": len(nodes), "edges": len(edges_list)},
)
# Build adjacency lists
outgoing, incoming = GraphValidator._build_adjacency(nodes, edges_list)
# --- FORWARD REACHABILITY: BFS from start ---
reachable_from_start = GraphValidator._bfs_reachable(start_node_id, outgoing)
# Find unreachable nodes
unreachable_nodes = set(nodes.keys()) - reachable_from_start
for node_id in unreachable_nodes:
node = nodes[node_id]
errors.append(
GraphError(
node_id=node_id,
node_type=node.get("type", "unknown"),
error_type="unreachable",
message=f"Node '{node_id}' is not reachable from start node",
)
)
# --- BACKWARD REACHABILITY: Reverse BFS from end nodes ---
can_reach_end: set[str] = set()
for end_id in end_node_ids:
can_reach_end.update(GraphValidator._bfs_reachable(end_id, incoming))
# Find dead-end nodes (can't reach any end node)
dead_end_nodes = set(nodes.keys()) - can_reach_end
for node_id in dead_end_nodes:
if node_id in unreachable_nodes:
continue
node = nodes[node_id]
warnings.append(
GraphError(
node_id=node_id,
node_type=node.get("type", "unknown"),
error_type="dead_end",
message=f"Node '{node_id}' cannot reach any end node (dead end)",
)
)
# --- Start node has outgoing edges? ---
if not outgoing.get(start_node_id):
errors.append(
GraphError(
node_id=start_node_id,
node_type="start",
error_type="disconnected",
message="Start node has no outgoing connections",
)
)
# --- End nodes have incoming edges? ---
for end_id in end_node_ids:
if not incoming.get(end_id):
errors.append(
GraphError(
node_id=end_id,
node_type="end",
error_type="disconnected",
message="End node has no incoming connections",
)
)
# --- BRANCH EDGE VALIDATION ---
edge_handles: dict[str, set[str]] = {}
for edge in edges_list:
source = edge.get("source")
handle = edge.get("sourceHandle", "")
if source:
if source not in edge_handles:
edge_handles[source] = set()
edge_handles[source].add(handle)
# Check if-else and question-classifier nodes
for node_id, node in nodes.items():
node_type = node.get("type")
if node_type == "if-else":
handles = edge_handles.get(node_id, set())
config = node.get("config", {})
cases = config.get("cases", [])
required_handles = set()
for case in cases:
case_id = case.get("case_id")
if case_id:
required_handles.add(case_id)
required_handles.add("false")
missing = required_handles - handles
for handle in missing:
errors.append(
GraphError(
node_id=node_id,
node_type=node_type,
error_type="missing_branch",
message=f"If-else node '{node_id}' missing edge for branch '{handle}'",
)
)
elif node_type == "question-classifier":
handles = edge_handles.get(node_id, set())
config = node.get("config", {})
classes = config.get("classes", [])
required_handles = set()
for cls in classes:
if isinstance(cls, dict):
cls_id = cls.get("id")
if cls_id:
required_handles.add(cls_id)
missing = required_handles - handles
for handle in missing:
cls_name = handle
for cls in classes:
if isinstance(cls, dict) and cls.get("id") == handle:
cls_name = cls.get("name", handle)
break
errors.append(
GraphError(
node_id=node_id,
node_type=node_type,
error_type="missing_branch",
message=f"Classifier '{node_id}' missing edge for class '{cls_name}'",
)
)
execution_time = time.time() - start_time
success = len(errors) == 0
return GraphValidationResult(
success=success,
errors=errors,
warnings=warnings,
execution_time=execution_time,
stats={
"nodes": len(nodes),
"edges": len(edges_list),
"reachable_from_start": len(reachable_from_start),
"can_reach_end": len(can_reach_end),
"unreachable": len(unreachable_nodes),
"dead_ends": len(dead_end_nodes - unreachable_nodes),
},
)

View File

@@ -0,0 +1,113 @@
import logging
from core.workflow.generator.types import WorkflowDataDict
logger = logging.getLogger(__name__)
def generate_mermaid(workflow_data: WorkflowDataDict) -> str:
"""
Generate a Mermaid flowchart from workflow data consisting of nodes and edges.
Args:
workflow_data: Dict containing 'nodes' (list) and 'edges' (list)
Returns:
String containing the Mermaid flowchart syntax
"""
nodes = workflow_data.get("nodes", [])
edges = workflow_data.get("edges", [])
lines = ["flowchart TD"]
# 1. Define Nodes
# Format: node_id["title<br/>type"] or similar
# We will use the Vibe Workflow standard format: id["type=TYPE|title=TITLE"]
# Or specifically for tool nodes: id["type=tool|title=TITLE|tool=TOOL_KEY"]
# Map of original IDs to safe Mermaid IDs
id_map = {}
def get_safe_id(original_id: str) -> str:
if original_id == "end":
return "end_node"
if original_id == "subgraph":
return "subgraph_node"
# Mermaid IDs should be alphanumeric.
# If the ID has special chars, we might need to escape or hash, but Vibe usually generates simple IDs.
# We'll trust standard IDs but handle the reserved keyword 'end'.
return original_id
for node in nodes:
node_id = node.get("id")
if not node_id:
continue
safe_id = get_safe_id(node_id)
id_map[node_id] = safe_id
node_type = node.get("type", "unknown")
title = node.get("title", "Untitled")
# Escape quotes in title
safe_title = title.replace('"', "'")
if node_type == "tool":
config = node.get("config", {})
# Try multiple fields for tool reference
tool_ref = (
config.get("tool_key")
or config.get("tool")
or config.get("tool_name")
or node.get("tool_name")
or "unknown"
)
node_def = f'{safe_id}["type={node_type}|title={safe_title}|tool={tool_ref}"]'
else:
node_def = f'{safe_id}["type={node_type}|title={safe_title}"]'
lines.append(f" {node_def}")
# 2. Define Edges
# Format: source --> target
# Track defined nodes to avoid edge errors
defined_node_ids = {n.get("id") for n in nodes if n.get("id")}
for edge in edges:
source = edge.get("source")
target = edge.get("target")
# Skip invalid edges
if not source or not target:
continue
if source not in defined_node_ids or target not in defined_node_ids:
continue
safe_source = id_map.get(source, source)
safe_target = id_map.get(target, target)
# Handle conditional branches (true/false) if present
# In Dify workflow, sourceHandle is often used for this
source_handle = edge.get("sourceHandle")
label = ""
if source_handle == "true":
label = "|true|"
elif source_handle == "false":
label = "|false|"
elif source_handle and source_handle != "source":
# For question-classifier or other multi-path nodes
# Clean up handle for display if needed
safe_handle = str(source_handle).replace('"', "'")
label = f"|{safe_handle}|"
edge_line = f" {safe_source} -->{label} {safe_target}"
lines.append(edge_line)
# Start/End nodes are implicitly handled if they are in the 'nodes' list
# If not, we might need to add them, but usually the Builder should produce them.
result = "\n".join(lines)
return result

View File

@@ -0,0 +1,304 @@
"""
Node Repair Utility for Vibe Workflow Generation.
This module provides intelligent node configuration repair capabilities.
It can detect and fix common node configuration issues:
- Invalid comparison operators in if-else nodes (e.g. '>=' -> '')
"""
import copy
import logging
import uuid
from dataclasses import dataclass, field
from core.workflow.generator.types import WorkflowNodeDict
logger = logging.getLogger(__name__)
@dataclass
class NodeRepairResult:
"""Result of node repair operation."""
nodes: list[WorkflowNodeDict]
repairs_made: list[str] = field(default_factory=list)
warnings: list[str] = field(default_factory=list)
@property
def was_repaired(self) -> bool:
"""Check if any repairs were made."""
return len(self.repairs_made) > 0
class NodeRepair:
"""
Intelligent node configuration repair.
"""
OPERATOR_MAP = {
">=": "",
"<=": "",
"!=": "",
"==": "=",
}
TYPE_MAPPING = {
"json": "object",
"dict": "object",
"dictionary": "object",
"float": "number",
"int": "number",
"integer": "number",
"double": "number",
"str": "string",
"text": "string",
"bool": "boolean",
"list": "array[object]",
"array": "array[object]",
}
_REPAIR_HANDLERS = {
"if-else": "_repair_if_else_operators",
"variable-aggregator": "_repair_variable_aggregator_variables",
"code": "_repair_code_node_config",
}
@classmethod
def repair(
cls,
nodes: list[WorkflowNodeDict],
llm_callback=None,
) -> NodeRepairResult:
"""
Repair node configurations.
Args:
nodes: List of node dictionaries
llm_callback: Optional callback(node, issue_desc) -> fixed_config_part
Returns:
NodeRepairResult with repaired nodes and logs
"""
# Deep copy to avoid mutating original
nodes = copy.deepcopy(nodes)
repairs: list[str] = []
warnings: list[str] = []
logger.info("[NODE REPAIR] Starting repair process for %s nodes", len(nodes))
for node in nodes:
node_type = node.get("type")
# 1. Rule-based repairs
handler_name = cls._REPAIR_HANDLERS.get(node_type)
if handler_name:
handler = getattr(cls, handler_name)
# Check if handler accepts llm_callback (inspect signature or just pass generic kwargs?)
# Simplest for now: handlers signature: (node, repairs, llm_callback=None)
try:
handler(node, repairs, llm_callback=llm_callback)
except TypeError:
# Fallback for handlers that don't accept llm_callback yet
handler(node, repairs)
# Add other node type repairs here as needed
if repairs:
logger.info("[NODE REPAIR] Completed with %s repairs:", len(repairs))
for i, repair in enumerate(repairs, 1):
logger.info("[NODE REPAIR] %s. %s", i, repair)
else:
logger.info("[NODE REPAIR] Completed - no repairs needed")
return NodeRepairResult(
nodes=nodes,
repairs_made=repairs,
warnings=warnings,
)
@classmethod
def _repair_if_else_operators(cls, node: WorkflowNodeDict, repairs: list[str], **kwargs):
"""
Normalize comparison operators in if-else nodes.
And ensure 'id' field exists for cases and conditions (frontend requirement).
"""
node_id = node.get("id", "unknown")
config = node.get("config", {})
cases = config.get("cases", [])
for case in cases:
# Ensure case_id
if "case_id" not in case:
case["case_id"] = str(uuid.uuid4())
repairs.append(f"Generated missing case_id for case in node '{node_id}'")
conditions = case.get("conditions", [])
for condition in conditions:
# Ensure condition id
if "id" not in condition:
condition["id"] = str(uuid.uuid4())
# Not logging this repair to avoid clutter, as it's a structural fix
# Ensure value type (LLM might return int/float, but we need str/bool/list)
val = condition.get("value")
if isinstance(val, (int, float)) and not isinstance(val, bool):
condition["value"] = str(val)
repairs.append(f"Coerced numeric value to string in node '{node_id}'")
op = condition.get("comparison_operator")
if op in cls.OPERATOR_MAP:
new_op = cls.OPERATOR_MAP[op]
condition["comparison_operator"] = new_op
repairs.append(f"Normalized operator '{op}' to '{new_op}' in node '{node_id}'")
@classmethod
def _repair_variable_aggregator_variables(cls, node: WorkflowNodeDict, repairs: list[str]):
"""
Repair variable-aggregator variables format.
Converts dict format to list[list[str]] format.
Expected: [["node_id", "field"], ["node_id2", "field2"]]
May receive: [{"name": "...", "value_selector": ["node_id", "field"]}, ...]
"""
node_id = node.get("id", "unknown")
config = node.get("config", {})
variables = config.get("variables", [])
if not variables:
return
repaired = False
repaired_variables = []
for var in variables:
if isinstance(var, dict):
# Convert dict format to array format
value_selector = var.get("value_selector") or var.get("selector") or var.get("path")
if isinstance(value_selector, list) and len(value_selector) > 0:
repaired_variables.append(value_selector)
repaired = True
else:
# Try to extract from name field - LLM may generate {"name": "node_id.field"}
name = var.get("name")
if isinstance(name, str) and "." in name:
# Try to parse "node_id.field" format
parts = name.split(".", 1)
if len(parts) == 2:
repaired_variables.append([parts[0], parts[1]])
repaired = True
else:
logger.warning(
"Variable aggregator node '%s' has invalid variable format: %s",
node_id,
var,
)
repaired_variables.append([]) # Empty array as fallback
else:
# If no valid selector or name, skip this variable
logger.warning(
"Variable aggregator node '%s' has invalid variable format: %s",
node_id,
var,
)
# Don't add empty array - skip invalid variables
elif isinstance(var, list):
# Already in correct format
repaired_variables.append(var)
else:
# Unknown format, skip
logger.warning("Variable aggregator node '%s' has unknown variable format: %s", node_id, var)
# Don't add empty array - skip invalid variables
if repaired:
config["variables"] = repaired_variables
repairs.append(f"Repaired variable-aggregator variables format in node '{node_id}'")
@classmethod
def _repair_code_node_config(cls, node: WorkflowNodeDict, repairs: list[str], llm_callback=None):
"""
Repair code node configuration (outputs and variables).
1. Outputs: Converts list format to dict format AND normalizes types.
2. Variables: Ensures value_selector exists.
"""
node_id = node.get("id", "unknown")
config = node.get("config", {})
if "variables" not in config:
config["variables"] = []
# --- Repair Variables ---
variables = config.get("variables")
if isinstance(variables, list):
for var in variables:
if isinstance(var, dict):
# Ensure value_selector exists (frontend crashes if missing)
if "value_selector" not in var:
var["value_selector"] = []
# Not logging trivial repairs
# --- Repair Outputs ---
outputs = config.get("outputs")
if not outputs:
return
# Helper to normalize type
def normalize_type(t: str) -> str:
t_lower = str(t).lower()
return cls.TYPE_MAPPING.get(t_lower, t)
# 1. Handle Dict format (Standard) - Check for invalid types
if isinstance(outputs, dict):
changed = False
for var_name, var_config in outputs.items():
if isinstance(var_config, dict):
original_type = var_config.get("type")
if original_type:
new_type = normalize_type(original_type)
if new_type != original_type:
var_config["type"] = new_type
changed = True
repairs.append(
f"Normalized type '{original_type}' to '{new_type}' "
f"for var '{var_name}' in node '{node_id}'"
)
return
# 2. Handle List format (Repair needed)
if isinstance(outputs, list):
new_outputs = {}
for item in outputs:
if isinstance(item, dict):
var_name = item.get("variable") or item.get("name")
var_type = item.get("type")
if var_name and var_type:
norm_type = normalize_type(var_type)
new_outputs[var_name] = {"type": norm_type}
if norm_type != var_type:
repairs.append(
f"Normalized type '{var_type}' to '{norm_type}' "
f"during list conversion in node '{node_id}'"
)
if new_outputs:
config["outputs"] = new_outputs
repairs.append(f"Repaired code node outputs format in node '{node_id}'")
else:
# Fallback: Try LLM if available
if llm_callback:
try:
# Attempt to fix using LLM
fixed_outputs = llm_callback(
node,
"outputs must be a dictionary like {'var_name': {'type': 'string'}}, "
"but got a list or valid conversion failed.",
)
if isinstance(fixed_outputs, dict) and fixed_outputs:
config["outputs"] = fixed_outputs
repairs.append(f"Repaired code node outputs format using LLM in node '{node_id}'")
return
except Exception as e:
logger.warning("LLM fallback repair failed for node '%s': %s", node_id, e)
# If conversion/LLM failed, set to empty dict
config["outputs"] = {}
repairs.append(f"Reset invalid code node outputs to empty dict in node '{node_id}'")

View File

@@ -0,0 +1,101 @@
from dataclasses import dataclass
from core.workflow.generator.types import AvailableModelDict, AvailableToolDict, WorkflowDataDict
from core.workflow.generator.validation.context import ValidationContext
from core.workflow.generator.validation.engine import ValidationEngine
from core.workflow.generator.validation.rules import Severity
@dataclass
class ValidationHint:
"""Legacy compatibility class for validation hints."""
node_id: str
field: str
message: str
severity: str # 'error', 'warning'
suggestion: str = None
node_type: str = None # Added for test compatibility
# Alias for potential old code using 'type' instead of 'severity'
@property
def type(self) -> str:
return self.severity
@property
def element_id(self) -> str:
return self.node_id
FriendlyHint = ValidationHint # Alias for backward compatibility
class WorkflowValidator:
"""
Validates the generated workflow configuration (nodes and edges).
Wraps the new ValidationEngine for backward compatibility.
"""
@classmethod
def validate(
cls,
workflow_data: WorkflowDataDict,
available_tools: list[AvailableToolDict],
available_models: list[AvailableModelDict] | None = None,
) -> tuple[bool, list[ValidationHint]]:
"""
Validate workflow data and return validity status and hints.
Args:
workflow_data: Dict containing 'nodes' and 'edges'
available_tools: List of available tool configurations
available_models: List of available models (added for Vibe compat)
Returns:
Tuple(max_severity_is_not_error, list_of_hints)
"""
nodes = workflow_data.get("nodes", [])
edges = workflow_data.get("edges", [])
# Create context
context = ValidationContext(
nodes=nodes,
edges=edges,
available_models=available_models or [],
available_tools=available_tools or [],
)
# Run validation engine
engine = ValidationEngine()
result = engine.validate(context)
# Convert engine errors to legacy hints
hints: list[ValidationHint] = []
error_count = 0
warning_count = 0
for error in result.all_errors:
# Map severity
severity = "error" if error.severity == Severity.ERROR else "warning"
if severity == "error":
error_count += 1
else:
warning_count += 1
# Map field from message or details if possible (heuristic)
field_name = error.details.get("field", "unknown")
hints.append(
ValidationHint(
node_id=error.node_id,
field=field_name,
message=error.message,
severity=severity,
suggestion=error.fix_hint,
node_type=error.node_type,
)
)
return result.is_valid, hints

View File

@@ -0,0 +1,42 @@
"""
Validation Rule Engine for Vibe Workflow Generation.
This module provides a declarative, schema-based validation system for
generated workflow nodes. It classifies errors into fixable (LLM can auto-fix)
and user-required (needs manual intervention) categories.
Usage:
from core.workflow.generator.validation import ValidationEngine, ValidationContext
context = ValidationContext(
available_models=[...],
available_tools=[...],
nodes=[...],
edges=[...],
)
engine = ValidationEngine()
result = engine.validate(context)
# Access classified errors
fixable_errors = result.fixable_errors
user_required_errors = result.user_required_errors
"""
from core.workflow.generator.validation.context import ValidationContext
from core.workflow.generator.validation.engine import ValidationEngine, ValidationResult
from core.workflow.generator.validation.rules import (
RuleCategory,
Severity,
ValidationError,
ValidationRule,
)
__all__ = [
"RuleCategory",
"Severity",
"ValidationContext",
"ValidationEngine",
"ValidationError",
"ValidationResult",
"ValidationRule",
]

View File

@@ -0,0 +1,115 @@
"""
Validation Context for the Rule Engine.
The ValidationContext holds all the data needed for validation:
- Generated nodes and edges
- Available models, tools, and datasets
- Node output schemas for variable reference validation
"""
from dataclasses import dataclass, field
from core.workflow.generator.types import (
AvailableModelDict,
AvailableToolDict,
WorkflowEdgeDict,
WorkflowNodeDict,
)
@dataclass
class ValidationContext:
"""
Context object containing all data needed for validation.
This is passed to each validation rule, providing access to:
- The nodes being validated
- Edge connections between nodes
- Available external resources (models, tools)
"""
# Generated workflow data
nodes: list[WorkflowNodeDict] = field(default_factory=list)
edges: list[WorkflowEdgeDict] = field(default_factory=list)
# Available external resources
available_models: list[AvailableModelDict] = field(default_factory=list)
available_tools: list[AvailableToolDict] = field(default_factory=list)
# Cached lookups (populated lazily)
_node_map: dict[str, WorkflowNodeDict] | None = field(default=None, repr=False)
_model_set: set[tuple[str, str]] | None = field(default=None, repr=False)
_tool_set: set[str] | None = field(default=None, repr=False)
_configured_tool_set: set[str] | None = field(default=None, repr=False)
@property
def node_map(self) -> dict[str, WorkflowNodeDict]:
"""Get a map of node_id -> node for quick lookup."""
if self._node_map is None:
self._node_map = {node.get("id", ""): node for node in self.nodes}
return self._node_map
@property
def model_set(self) -> set[tuple[str, str]]:
"""Get a set of (provider, model_name) tuples for quick lookup."""
if self._model_set is None:
self._model_set = {(m.get("provider", ""), m.get("model", "")) for m in self.available_models}
return self._model_set
@property
def tool_set(self) -> set[str]:
"""Get a set of all tool keys (both configured and unconfigured)."""
if self._tool_set is None:
self._tool_set = set()
for tool in self.available_tools:
provider = tool.get("provider_id") or tool.get("provider", "")
tool_key = tool.get("tool_key") or tool.get("tool_name", "")
if provider and tool_key:
self._tool_set.add(f"{provider}/{tool_key}")
if tool_key:
self._tool_set.add(tool_key)
return self._tool_set
@property
def configured_tool_set(self) -> set[str]:
"""Get a set of configured (authorized) tool keys."""
if self._configured_tool_set is None:
self._configured_tool_set = set()
for tool in self.available_tools:
if not tool.get("is_team_authorization", False):
continue
provider = tool.get("provider_id") or tool.get("provider", "")
tool_key = tool.get("tool_key") or tool.get("tool_name", "")
if provider and tool_key:
self._configured_tool_set.add(f"{provider}/{tool_key}")
if tool_key:
self._configured_tool_set.add(tool_key)
return self._configured_tool_set
def has_model(self, provider: str, model_name: str) -> bool:
"""Check if a model is available."""
return (provider, model_name) in self.model_set
def has_tool(self, tool_key: str) -> bool:
"""Check if a tool exists (configured or not)."""
return tool_key in self.tool_set
def is_tool_configured(self, tool_key: str) -> bool:
"""Check if a tool is configured and ready to use."""
return tool_key in self.configured_tool_set
def get_node(self, node_id: str) -> WorkflowNodeDict | None:
"""Get a node by its ID."""
return self.node_map.get(node_id)
def get_node_ids(self) -> set[str]:
"""Get all node IDs in the workflow."""
return set(self.node_map.keys())
def get_upstream_nodes(self, node_id: str) -> list[str]:
"""Get IDs of nodes that connect to this node (upstream)."""
return [edge.get("source", "") for edge in self.edges if edge.get("target") == node_id]
def get_downstream_nodes(self, node_id: str) -> list[str]:
"""Get IDs of nodes that this node connects to (downstream)."""
return [edge.get("target", "") for edge in self.edges if edge.get("source") == node_id]

View File

@@ -0,0 +1,260 @@
"""
Validation Engine - Core validation logic.
The ValidationEngine orchestrates rule execution and aggregates results.
It provides a clean interface for validating workflow nodes.
"""
import logging
from dataclasses import dataclass, field
from typing import Any
from core.workflow.generator.types import (
AvailableModelDict,
AvailableToolDict,
WorkflowEdgeDict,
WorkflowNodeDict,
)
from core.workflow.generator.validation.context import ValidationContext
from core.workflow.generator.validation.rules import (
RuleCategory,
Severity,
ValidationError,
get_registry,
)
logger = logging.getLogger(__name__)
@dataclass
class ValidationResult:
"""
Result of validation containing all errors classified by fixability.
Attributes:
all_errors: All validation errors found
fixable_errors: Errors that LLM can automatically fix
user_required_errors: Errors that require user intervention
warnings: Non-blocking warnings
stats: Validation statistics
"""
all_errors: list[ValidationError] = field(default_factory=list)
fixable_errors: list[ValidationError] = field(default_factory=list)
user_required_errors: list[ValidationError] = field(default_factory=list)
warnings: list[ValidationError] = field(default_factory=list)
stats: dict[str, int] = field(default_factory=dict)
@property
def has_errors(self) -> bool:
"""Check if there are any errors (excluding warnings)."""
return len(self.fixable_errors) > 0 or len(self.user_required_errors) > 0
@property
def has_fixable_errors(self) -> bool:
"""Check if there are fixable errors."""
return len(self.fixable_errors) > 0
@property
def is_valid(self) -> bool:
"""Check if validation passed (no errors, warnings are OK)."""
return not self.has_errors
def to_dict(self) -> dict[str, Any]:
"""Convert to dictionary for API response."""
return {
"fixable": [e.to_dict() for e in self.fixable_errors],
"user_required": [e.to_dict() for e in self.user_required_errors],
"warnings": [e.to_dict() for e in self.warnings],
"all_warnings": [e.message for e in self.all_errors],
"stats": self.stats,
}
def get_error_messages(self) -> list[str]:
"""Get all error messages as strings."""
return [e.message for e in self.all_errors]
def get_fixable_by_node(self) -> dict[str, list[ValidationError]]:
"""Group fixable errors by node ID."""
result: dict[str, list[ValidationError]] = {}
for error in self.fixable_errors:
if error.node_id not in result:
result[error.node_id] = []
result[error.node_id].append(error)
return result
class ValidationEngine:
"""
The main validation engine.
Usage:
engine = ValidationEngine()
context = ValidationContext(nodes=[...], available_models=[...])
result = engine.validate(context)
"""
def __init__(self):
self._registry = get_registry()
def validate(self, context: ValidationContext) -> ValidationResult:
"""
Validate all nodes in the context.
Args:
context: ValidationContext with nodes, edges, and available resources
Returns:
ValidationResult with classified errors
"""
result = ValidationResult()
stats = {
"total_nodes": len(context.nodes),
"total_rules_checked": 0,
"total_errors": 0,
"fixable_count": 0,
"user_required_count": 0,
"warning_count": 0,
}
# Validate each node
for node in context.nodes:
node_type = node.get("type", "unknown")
node_id = node.get("id", "unknown")
# Get applicable rules for this node type
rules = self._registry.get_rules_for_node(node_type)
for rule in rules:
stats["total_rules_checked"] += 1
try:
errors = rule.check(node, context)
for error in errors:
result.all_errors.append(error)
stats["total_errors"] += 1
# Classify by severity and fixability
if error.severity == Severity.WARNING:
result.warnings.append(error)
stats["warning_count"] += 1
elif error.is_fixable:
result.fixable_errors.append(error)
stats["fixable_count"] += 1
else:
result.user_required_errors.append(error)
stats["user_required_count"] += 1
except Exception:
logger.exception(
"Rule '%s' failed for node '%s'",
rule.id,
node_id,
)
# Don't let a rule failure break the entire validation
continue
# Validate edges separately
edge_errors = self._validate_edges(context)
for error in edge_errors:
result.all_errors.append(error)
stats["total_errors"] += 1
if error.is_fixable:
result.fixable_errors.append(error)
stats["fixable_count"] += 1
else:
result.user_required_errors.append(error)
stats["user_required_count"] += 1
result.stats = stats
return result
def _validate_edges(self, context: ValidationContext) -> list[ValidationError]:
"""Validate edge connections."""
errors: list[ValidationError] = []
valid_node_ids = context.get_node_ids()
for edge in context.edges:
source = edge.get("source", "")
target = edge.get("target", "")
if source and source not in valid_node_ids:
errors.append(
ValidationError(
rule_id="edge.source.invalid",
node_id=source,
node_type="edge",
category=RuleCategory.SEMANTIC,
severity=Severity.ERROR,
is_fixable=True,
message=f"Edge source '{source}' does not exist",
fix_hint="Update edge to reference existing node",
)
)
if target and target not in valid_node_ids:
errors.append(
ValidationError(
rule_id="edge.target.invalid",
node_id=target,
node_type="edge",
category=RuleCategory.SEMANTIC,
severity=Severity.ERROR,
is_fixable=True,
message=f"Edge target '{target}' does not exist",
fix_hint="Update edge to reference existing node",
)
)
return errors
def validate_single_node(
self,
node: WorkflowNodeDict,
context: ValidationContext,
) -> list[ValidationError]:
"""
Validate a single node.
Useful for incremental validation when a node is added/modified.
"""
node_type = node.get("type", "unknown")
rules = self._registry.get_rules_for_node(node_type)
errors: list[ValidationError] = []
for rule in rules:
try:
errors.extend(rule.check(node, context))
except Exception:
logger.exception("Rule '%s' failed", rule.id)
return errors
def validate_nodes(
nodes: list[WorkflowNodeDict],
edges: list[WorkflowEdgeDict] | None = None,
available_models: list[AvailableModelDict] | None = None,
available_tools: list[AvailableToolDict] | None = None,
) -> ValidationResult:
"""
Convenience function to validate nodes without creating engine/context manually.
Args:
nodes: List of workflow nodes to validate
edges: Optional list of edges
available_models: Optional list of available models
available_tools: Optional list of available tools
Returns:
ValidationResult with classified errors
"""
context = ValidationContext(
nodes=nodes,
edges=edges or [],
available_models=available_models or [],
available_tools=available_tools or [],
)
engine = ValidationEngine()
return engine.validate(context)

View File

@@ -0,0 +1,947 @@
"""
Validation Rules Definition and Registry.
This module defines:
- ValidationRule: The rule structure
- RuleCategory: Categories of validation rules
- Severity: Error severity levels
- ValidationError: Error output structure
- All built-in validation rules
"""
import re
from collections.abc import Callable
from dataclasses import dataclass, field
from enum import Enum
from typing import TYPE_CHECKING, Any
from core.workflow.generator.types import WorkflowNodeDict
if TYPE_CHECKING:
from core.workflow.generator.validation.context import ValidationContext
class RuleCategory(Enum):
"""Categories of validation rules."""
STRUCTURE = "structure" # Field existence, types, formats
SEMANTIC = "semantic" # Variable references, edge connections
REFERENCE = "reference" # External resources (models, tools, datasets)
class Severity(Enum):
"""Severity levels for validation errors."""
ERROR = "error" # Must be fixed
WARNING = "warning" # Should be fixed but not blocking
@dataclass
class ValidationError:
"""
Represents a validation error found during rule execution.
Attributes:
rule_id: The ID of the rule that generated this error
node_id: The ID of the node with the error
node_type: The type of the node
category: The rule category
severity: Error severity
is_fixable: Whether LLM can auto-fix this error
message: Human-readable error message
fix_hint: Hint for LLM to fix the error
details: Additional error details
"""
rule_id: str
node_id: str
node_type: str
category: RuleCategory
severity: Severity
is_fixable: bool
message: str
fix_hint: str = ""
details: dict[str, Any] = field(default_factory=dict)
def to_dict(self) -> dict[str, Any]:
"""Convert to dictionary for API response."""
return {
"rule_id": self.rule_id,
"node_id": self.node_id,
"node_type": self.node_type,
"category": self.category.value,
"severity": self.severity.value,
"is_fixable": self.is_fixable,
"message": self.message,
"fix_hint": self.fix_hint,
"details": self.details,
}
# Type alias for rule check functions
RuleCheckFn = Callable[
[WorkflowNodeDict, "ValidationContext"],
list[ValidationError],
]
@dataclass
class ValidationRule:
"""
A validation rule definition.
Attributes:
id: Unique rule identifier (e.g., "llm.model.required")
node_types: List of node types this rule applies to, or ["*"] for all
category: The rule category
severity: Default severity for errors from this rule
is_fixable: Whether errors from this rule can be auto-fixed by LLM
check: The validation function
description: Human-readable description of what this rule checks
fix_hint: Default hint for fixing errors from this rule
"""
id: str
node_types: list[str]
category: RuleCategory
severity: Severity
is_fixable: bool
check: RuleCheckFn
description: str = ""
fix_hint: str = ""
def applies_to(self, node_type: str) -> bool:
"""Check if this rule applies to a given node type."""
return "*" in self.node_types or node_type in self.node_types
# =============================================================================
# Rule Registry
# =============================================================================
class RuleRegistry:
"""
Registry for validation rules.
Rules are registered here and can be retrieved by category or node type.
"""
def __init__(self):
self._rules: list[ValidationRule] = []
def register(self, rule: ValidationRule) -> None:
"""Register a validation rule."""
self._rules.append(rule)
def get_rules_for_node(self, node_type: str) -> list[ValidationRule]:
"""Get all rules that apply to a given node type."""
return [r for r in self._rules if r.applies_to(node_type)]
def get_rules_by_category(self, category: RuleCategory) -> list[ValidationRule]:
"""Get all rules in a given category."""
return [r for r in self._rules if r.category == category]
def get_all_rules(self) -> list[ValidationRule]:
"""Get all registered rules."""
return list(self._rules)
# Global rule registry instance
_registry = RuleRegistry()
def register_rule(rule: ValidationRule) -> ValidationRule:
"""Decorator/function to register a rule with the global registry."""
_registry.register(rule)
return rule
def get_registry() -> RuleRegistry:
"""Get the global rule registry."""
return _registry
# =============================================================================
# Helper Functions for Rule Implementations
# =============================================================================
# Explicit placeholder value defined in prompt contract
# See: api/core/workflow/generator/prompts/vibe_prompts.py
PLACEHOLDER_VALUE = "__PLACEHOLDER__"
# Variable reference pattern: {{#node_id.field#}}
VARIABLE_REF_PATTERN = re.compile(r"\{\{#([^.#]+)\.([^#]+)#\}\}")
def is_placeholder(value: Any) -> bool:
"""Check if a value appears to be a placeholder."""
if not isinstance(value, str):
return False
return value == PLACEHOLDER_VALUE or PLACEHOLDER_VALUE in value
def extract_variable_refs(text: str) -> list[tuple[str, str]]:
"""
Extract variable references from text.
Returns list of (node_id, field_name) tuples.
"""
return VARIABLE_REF_PATTERN.findall(text)
def check_required_field(
config: dict[str, Any],
field_name: str,
node_id: str,
node_type: str,
rule_id: str,
fix_hint: str = "",
) -> ValidationError | None:
"""Helper to check if a required field exists and is non-empty."""
value = config.get(field_name)
if value is None or value == "" or (isinstance(value, list) and len(value) == 0):
return ValidationError(
rule_id=rule_id,
node_id=node_id,
node_type=node_type,
category=RuleCategory.STRUCTURE,
severity=Severity.ERROR,
is_fixable=True,
message=f"Node '{node_id}': missing required field '{field_name}'",
fix_hint=fix_hint or f"Add '{field_name}' to the node config",
)
return None
# =============================================================================
# Structure Rules - Field existence, types, formats
# =============================================================================
def _check_llm_prompt_template(node: WorkflowNodeDict, ctx: "ValidationContext") -> list[ValidationError]:
"""Check that LLM node has prompt_template."""
errors: list[ValidationError] = []
node_id = node.get("id", "unknown")
config = node.get("config", {})
err = check_required_field(
config,
"prompt_template",
node_id,
"llm",
"llm.prompt_template.required",
"Add prompt_template with system and user messages",
)
if err:
errors.append(err)
return errors
def _check_http_request_url(node: WorkflowNodeDict, ctx: "ValidationContext") -> list[ValidationError]:
"""Check that http-request node has url and method."""
errors: list[ValidationError] = []
node_id = node.get("id", "unknown")
config = node.get("config", {})
# Check url
url = config.get("url", "")
if not url:
errors.append(
ValidationError(
rule_id="http.url.required",
node_id=node_id,
node_type="http-request",
category=RuleCategory.STRUCTURE,
severity=Severity.ERROR,
is_fixable=True,
message=f"Node '{node_id}': http-request missing required 'url'",
fix_hint="Add url - use {{#start.url#}} or a concrete URL",
)
)
elif is_placeholder(url):
errors.append(
ValidationError(
rule_id="http.url.placeholder",
node_id=node_id,
node_type="http-request",
category=RuleCategory.STRUCTURE,
severity=Severity.ERROR,
is_fixable=True,
message=f"Node '{node_id}': url contains placeholder value",
fix_hint="Replace placeholder with actual URL or variable reference",
)
)
# Check method
method = config.get("method", "")
if not method:
errors.append(
ValidationError(
rule_id="http.method.required",
node_id=node_id,
node_type="http-request",
category=RuleCategory.STRUCTURE,
severity=Severity.ERROR,
is_fixable=True,
message=f"Node '{node_id}': http-request missing 'method'",
fix_hint="Add method: GET, POST, PUT, DELETE, or PATCH",
)
)
return errors
def _check_code_node(node: WorkflowNodeDict, ctx: "ValidationContext") -> list[ValidationError]:
"""Check that code node has code and language."""
errors: list[ValidationError] = []
node_id = node.get("id", "unknown")
config = node.get("config", {})
err = check_required_field(
config,
"code",
node_id,
"code",
"code.code.required",
"Add code with a main() function that returns a dict",
)
if err:
errors.append(err)
err = check_required_field(
config,
"language",
node_id,
"code",
"code.language.required",
"Add language: python3 or javascript",
)
if err:
errors.append(err)
return errors
def _check_question_classifier(node: WorkflowNodeDict, ctx: "ValidationContext") -> list[ValidationError]:
"""Check that question-classifier has classes."""
errors: list[ValidationError] = []
node_id = node.get("id", "unknown")
config = node.get("config", {})
err = check_required_field(
config,
"classes",
node_id,
"question-classifier",
"classifier.classes.required",
"Add classes array with id and name for each classification",
)
if err:
errors.append(err)
return errors
def _check_parameter_extractor(node: WorkflowNodeDict, ctx: "ValidationContext") -> list[ValidationError]:
"""Check that parameter-extractor has parameters and instruction."""
errors: list[ValidationError] = []
node_id = node.get("id", "unknown")
config = node.get("config", {})
err = check_required_field(
config,
"parameters",
node_id,
"parameter-extractor",
"extractor.parameters.required",
"Add parameters array with name, type, description fields",
)
if err:
errors.append(err)
else:
# Check individual parameters for required fields
parameters = config.get("parameters", [])
if isinstance(parameters, list):
for i, param in enumerate(parameters):
if isinstance(param, dict):
# Check for 'required' field (boolean)
if "required" not in param:
errors.append(
ValidationError(
rule_id="extractor.param.required_field.missing",
node_id=node_id,
node_type="parameter-extractor",
category=RuleCategory.STRUCTURE,
severity=Severity.ERROR,
is_fixable=True,
message=f"Node '{node_id}': parameter[{i}] missing 'required' field",
fix_hint=f"Add 'required': True to parameter '{param.get('name', 'unknown')}'",
details={"param_index": i, "param_name": param.get("name")},
)
)
# instruction is recommended but not strictly required
if not config.get("instruction"):
errors.append(
ValidationError(
rule_id="extractor.instruction.recommended",
node_id=node_id,
node_type="parameter-extractor",
category=RuleCategory.STRUCTURE,
severity=Severity.WARNING,
is_fixable=True,
message=f"Node '{node_id}': parameter-extractor should have 'instruction'",
fix_hint="Add instruction describing what to extract",
)
)
return errors
def _check_knowledge_retrieval(node: WorkflowNodeDict, ctx: "ValidationContext") -> list[ValidationError]:
"""Check that knowledge-retrieval has dataset_ids."""
errors: list[ValidationError] = []
node_id = node.get("id", "unknown")
config = node.get("config", {})
dataset_ids = config.get("dataset_ids", [])
if not dataset_ids:
errors.append(
ValidationError(
rule_id="knowledge.dataset.required",
node_id=node_id,
node_type="knowledge-retrieval",
category=RuleCategory.STRUCTURE,
severity=Severity.ERROR,
is_fixable=False, # User must select knowledge base
message=f"Node '{node_id}': knowledge-retrieval missing 'dataset_ids'",
fix_hint="User must select knowledge bases in the UI",
)
)
else:
# Check for placeholder values
for ds_id in dataset_ids:
if is_placeholder(ds_id):
errors.append(
ValidationError(
rule_id="knowledge.dataset.placeholder",
node_id=node_id,
node_type="knowledge-retrieval",
category=RuleCategory.STRUCTURE,
severity=Severity.ERROR,
is_fixable=False,
message=f"Node '{node_id}': dataset_ids contains placeholder",
fix_hint="User must replace placeholder with actual knowledge base ID",
details={"placeholder_value": ds_id},
)
)
break
return errors
def _check_end_node(node: WorkflowNodeDict, ctx: "ValidationContext") -> list[ValidationError]:
"""Check that end node has outputs defined."""
errors: list[ValidationError] = []
node_id = node.get("id", "unknown")
config = node.get("config", {})
outputs = config.get("outputs", [])
if not outputs:
errors.append(
ValidationError(
rule_id="end.outputs.recommended",
node_id=node_id,
node_type="end",
category=RuleCategory.STRUCTURE,
severity=Severity.WARNING,
is_fixable=True,
message="End node should define output variables",
fix_hint="Add outputs array with variable and value_selector",
)
)
return errors
# =============================================================================
# Semantic Rules - Variable references, edge connections
# =============================================================================
def _check_variable_references(node: WorkflowNodeDict, ctx: "ValidationContext") -> list[ValidationError]:
"""Check that variable references point to valid nodes."""
errors: list[ValidationError] = []
node_id = node.get("id", "unknown")
node_type = node.get("type", "unknown")
config = node.get("config", {})
# Get all valid node IDs (including 'start' which is always valid)
valid_node_ids = ctx.get_node_ids()
valid_node_ids.add("start")
valid_node_ids.add("sys") # System variables
def check_text_for_refs(text: str, field_path: str) -> None:
if not isinstance(text, str):
return
refs = extract_variable_refs(text)
for ref_node_id, ref_field in refs:
if ref_node_id not in valid_node_ids:
errors.append(
ValidationError(
rule_id="variable.ref.invalid_node",
node_id=node_id,
node_type=node_type,
category=RuleCategory.SEMANTIC,
severity=Severity.ERROR,
is_fixable=True,
message=f"Node '{node_id}': references non-existent node '{ref_node_id}'",
fix_hint=f"Change {{{{#{ref_node_id}.{ref_field}#}}}} to reference a valid node",
details={"field_path": field_path, "invalid_ref": ref_node_id},
)
)
# Check prompt_template for LLM nodes
prompt_template = config.get("prompt_template", [])
if isinstance(prompt_template, list):
for i, msg in enumerate(prompt_template):
if isinstance(msg, dict):
text = msg.get("text", "")
check_text_for_refs(text, f"prompt_template[{i}].text")
# Check instruction field
instruction = config.get("instruction", "")
check_text_for_refs(instruction, "instruction")
# Check url for http-request
url = config.get("url", "")
check_text_for_refs(url, "url")
return errors
# NOTE: _check_node_has_outgoing_edge removed - handled by GraphValidator
# NOTE: _check_node_has_incoming_edge removed - handled by GraphValidator
# NOTE: _check_question_classifier_branches removed - handled by EdgeRepair
# NOTE: _check_if_else_branches removed - handled by EdgeRepair
def _check_if_else_operators(node: WorkflowNodeDict, ctx: "ValidationContext") -> list[ValidationError]:
"""Check that if-else comparison operators are valid."""
errors: list[ValidationError] = []
node_id = node.get("id", "unknown")
node_type = node.get("type", "unknown")
if node_type != "if-else":
return errors
valid_operators = {
"contains",
"not contains",
"start with",
"end with",
"is",
"is not",
"empty",
"not empty",
"in",
"not in",
"all of",
"=",
"",
">",
"<",
"",
"",
"null",
"not null",
"exists",
"not exists",
}
config = node.get("config", {})
cases = config.get("cases", [])
for case in cases:
conditions = case.get("conditions", [])
for condition in conditions:
op = condition.get("comparison_operator")
if op and op not in valid_operators:
errors.append(
ValidationError(
rule_id="ifelse.operator.invalid",
node_id=node_id,
node_type=node_type,
category=RuleCategory.SEMANTIC,
severity=Severity.ERROR,
is_fixable=True,
message=f"Invalid operator '{op}' in if-else node",
fix_hint=f"Use one of: {', '.join(sorted(valid_operators))}",
details={"invalid_operator": op, "field": "config.cases.conditions.comparison_operator"},
)
)
return errors
def _check_edge_targets_exist(node: WorkflowNodeDict, ctx: "ValidationContext") -> list[ValidationError]:
"""Check that edge targets reference existing nodes."""
errors: list[ValidationError] = []
node_id = node.get("id", "unknown")
node_type = node.get("type", "unknown")
valid_node_ids = ctx.get_node_ids()
# Check all outgoing edges from this node
for edge in ctx.edges:
if edge.get("source") == node_id:
target = edge.get("target")
if target and target not in valid_node_ids:
errors.append(
ValidationError(
rule_id="edge.target.invalid",
node_id=node_id,
node_type=node_type,
category=RuleCategory.SEMANTIC,
severity=Severity.ERROR,
is_fixable=True,
message=f"Edge from '{node_id}' targets non-existent node '{target}'",
fix_hint=f"Change edge target from '{target}' to an existing node",
details={"invalid_target": target, "field": "edges"},
)
)
return errors
# =============================================================================
# Reference Rules - External resources (models, tools, datasets)
# =============================================================================
# Node types that require model configuration
MODEL_REQUIRED_NODE_TYPES = {"llm", "question-classifier", "parameter-extractor"}
def _check_model_config(node: WorkflowNodeDict, ctx: "ValidationContext") -> list[ValidationError]:
"""Check that model configuration is valid."""
errors: list[ValidationError] = []
node_id = node.get("id", "unknown")
node_type = node.get("type", "unknown")
config = node.get("config", {})
if node_type not in MODEL_REQUIRED_NODE_TYPES:
return errors
model = config.get("model")
# Check if model config exists
if not model:
if ctx.available_models:
errors.append(
ValidationError(
rule_id="model.required",
node_id=node_id,
node_type=node_type,
category=RuleCategory.REFERENCE,
severity=Severity.ERROR,
is_fixable=True,
message=f"Node '{node_id}' ({node_type}): missing required 'model' configuration",
fix_hint="Add model config using one of the available models",
)
)
else:
errors.append(
ValidationError(
rule_id="model.no_available",
node_id=node_id,
node_type=node_type,
category=RuleCategory.REFERENCE,
severity=Severity.ERROR,
is_fixable=False,
message=f"Node '{node_id}' ({node_type}): needs model but no models available",
fix_hint="User must configure a model provider first",
)
)
return errors
# Check if model config is valid
if isinstance(model, dict):
provider = model.get("provider", "")
name = model.get("name", "")
# Check for placeholder values
if is_placeholder(provider) or is_placeholder(name):
if ctx.available_models:
errors.append(
ValidationError(
rule_id="model.placeholder",
node_id=node_id,
node_type=node_type,
category=RuleCategory.REFERENCE,
severity=Severity.ERROR,
is_fixable=True,
message=f"Node '{node_id}': model config contains placeholder",
fix_hint="Replace placeholder with actual model from available_models",
)
)
return errors
# Check if model exists in available_models
if ctx.available_models and provider and name:
if not ctx.has_model(provider, name):
errors.append(
ValidationError(
rule_id="model.not_found",
node_id=node_id,
node_type=node_type,
category=RuleCategory.REFERENCE,
severity=Severity.ERROR,
is_fixable=True,
message=f"Node '{node_id}': model '{provider}/{name}' not in available models",
fix_hint="Replace with a model from available_models",
details={"provider": provider, "model": name},
)
)
return errors
def _check_tool_reference(node: WorkflowNodeDict, ctx: "ValidationContext") -> list[ValidationError]:
"""Check that tool references are valid and configured."""
errors: list[ValidationError] = []
node_id = node.get("id", "unknown")
node_type = node.get("type", "unknown")
if node_type != "tool":
return errors
config = node.get("config", {})
tool_ref = (
config.get("tool_key")
or config.get("tool_name")
or config.get("provider_id", "") + "/" + config.get("tool_name", "")
)
if not tool_ref:
errors.append(
ValidationError(
rule_id="tool.key.required",
node_id=node_id,
node_type=node_type,
category=RuleCategory.REFERENCE,
severity=Severity.ERROR,
is_fixable=True,
message=f"Node '{node_id}': tool node missing tool_key",
fix_hint="Add tool_key from available_tools",
)
)
return errors
# Check if tool exists
if not ctx.has_tool(tool_ref):
errors.append(
ValidationError(
rule_id="tool.not_found",
node_id=node_id,
node_type=node_type,
category=RuleCategory.REFERENCE,
severity=Severity.ERROR,
is_fixable=True, # Can be replaced with http-request fallback
message=f"Node '{node_id}': tool '{tool_ref}' not found",
fix_hint="Use http-request or code node as fallback",
details={"tool_ref": tool_ref},
)
)
elif not ctx.is_tool_configured(tool_ref):
errors.append(
ValidationError(
rule_id="tool.not_configured",
node_id=node_id,
node_type=node_type,
category=RuleCategory.REFERENCE,
severity=Severity.WARNING,
is_fixable=False, # User needs to configure
message=f"Node '{node_id}': tool '{tool_ref}' requires configuration",
fix_hint="Configure the tool in Tools settings",
details={"tool_ref": tool_ref},
)
)
return errors
# =============================================================================
# Register All Rules
# =============================================================================
# Structure Rules
register_rule(
ValidationRule(
id="llm.prompt_template.required",
node_types=["llm"],
category=RuleCategory.STRUCTURE,
severity=Severity.ERROR,
is_fixable=True,
check=_check_llm_prompt_template,
description="LLM node must have prompt_template",
fix_hint="Add prompt_template with system and user messages",
)
)
register_rule(
ValidationRule(
id="http.config.required",
node_types=["http-request"],
category=RuleCategory.STRUCTURE,
severity=Severity.ERROR,
is_fixable=True,
check=_check_http_request_url,
description="HTTP request node must have url and method",
fix_hint="Add url and method to config",
)
)
register_rule(
ValidationRule(
id="code.config.required",
node_types=["code"],
category=RuleCategory.STRUCTURE,
severity=Severity.ERROR,
is_fixable=True,
check=_check_code_node,
description="Code node must have code and language",
fix_hint="Add code with main() function and language",
)
)
register_rule(
ValidationRule(
id="classifier.classes.required",
node_types=["question-classifier"],
category=RuleCategory.STRUCTURE,
severity=Severity.ERROR,
is_fixable=True,
check=_check_question_classifier,
description="Question classifier must have classes",
fix_hint="Add classes array with classification options",
)
)
register_rule(
ValidationRule(
id="extractor.config.required",
node_types=["parameter-extractor"],
category=RuleCategory.STRUCTURE,
severity=Severity.ERROR,
is_fixable=True,
check=_check_parameter_extractor,
description="Parameter extractor must have parameters",
fix_hint="Add parameters array",
)
)
register_rule(
ValidationRule(
id="knowledge.config.required",
node_types=["knowledge-retrieval"],
category=RuleCategory.STRUCTURE,
severity=Severity.ERROR,
is_fixable=False,
check=_check_knowledge_retrieval,
description="Knowledge retrieval must have dataset_ids",
fix_hint="User must select knowledge base",
)
)
register_rule(
ValidationRule(
id="end.outputs.check",
node_types=["end"],
category=RuleCategory.STRUCTURE,
severity=Severity.WARNING,
is_fixable=True,
check=_check_end_node,
description="End node should have outputs",
fix_hint="Add outputs array",
)
)
# Semantic Rules
register_rule(
ValidationRule(
id="variable.references.valid",
node_types=["*"],
category=RuleCategory.SEMANTIC,
severity=Severity.ERROR,
is_fixable=True,
check=_check_variable_references,
description="Variable references must point to valid nodes",
fix_hint="Fix variable reference to use valid node ID",
)
)
# Edge Validation Rules
# NOTE: Edge connectivity and branch completeness are now handled by:
# - GraphValidator (BFS-based reachability analysis)
# - EdgeRepair (automatic branch edge repair)
register_rule(
ValidationRule(
id="edge.targets.valid",
node_types=["*"],
category=RuleCategory.SEMANTIC,
severity=Severity.ERROR,
is_fixable=True,
check=_check_edge_targets_exist,
description="Edge targets must reference existing nodes",
fix_hint="Change edge target to an existing node ID",
)
)
# Reference Rules
register_rule(
ValidationRule(
id="model.config.valid",
node_types=["llm", "question-classifier", "parameter-extractor"],
category=RuleCategory.REFERENCE,
severity=Severity.ERROR,
is_fixable=True,
check=_check_model_config,
description="Model configuration must be valid",
fix_hint="Add valid model from available_models",
)
)
register_rule(
ValidationRule(
id="tool.reference.valid",
node_types=["tool"],
category=RuleCategory.REFERENCE,
severity=Severity.ERROR,
is_fixable=True,
check=_check_tool_reference,
description="Tool reference must be valid and configured",
fix_hint="Use valid tool or fallback node",
)
)
register_rule(
ValidationRule(
id="ifelse.operator.valid",
node_types=["if-else"],
category=RuleCategory.SEMANTIC,
severity=Severity.ERROR,
is_fixable=True,
check=_check_if_else_operators,
description="If-else operators must be valid",
fix_hint="Use standard operators like ≥, ≤, =, ≠",
)
)

View File

@@ -2,7 +2,7 @@ from __future__ import annotations
import json
from collections.abc import Generator, Mapping, Sequence
from typing import TYPE_CHECKING, Any, cast
from typing import TYPE_CHECKING, Any, Union, cast
from packaging.version import Version
from pydantic import ValidationError
@@ -11,6 +11,7 @@ from sqlalchemy.orm import Session
from core.agent.entities import AgentToolEntity
from core.agent.plugin_entities import AgentStrategyParameter
from core.db.session_factory import session_factory
from core.file import File, FileTransferMethod
from core.memory.token_buffer_memory import TokenBufferMemory
from core.model_manager import ModelInstance, ModelManager
@@ -49,6 +50,12 @@ from factories import file_factory
from factories.agent_factory import get_plugin_agent_strategy
from models import ToolFile
from models.model import Conversation
from models.tools import (
ApiToolProvider,
BuiltinToolProvider,
MCPToolProvider,
WorkflowToolProvider,
)
from services.tools.builtin_tools_manage_service import BuiltinToolManageService
from .exc import (
@@ -259,7 +266,7 @@ class AgentNode(Node[AgentNodeData]):
value = cast(list[dict[str, Any]], value)
tool_value = []
for tool in value:
provider_type = ToolProviderType(tool.get("type", ToolProviderType.BUILT_IN))
provider_type = self._infer_tool_provider_type(tool, self.tenant_id)
setting_params = tool.get("settings", {})
parameters = tool.get("parameters", {})
manual_input_params = [key for key, value in parameters.items() if value is not None]
@@ -748,3 +755,34 @@ class AgentNode(Node[AgentNodeData]):
llm_usage=llm_usage,
)
)
@staticmethod
def _infer_tool_provider_type(tool_config: dict[str, Any], tenant_id: str) -> ToolProviderType:
provider_type_str = tool_config.get("type")
if provider_type_str:
return ToolProviderType(provider_type_str)
provider_id = tool_config.get("provider_name")
if not provider_id:
return ToolProviderType.BUILT_IN
with session_factory.create_session() as session:
provider_map: dict[
type[Union[WorkflowToolProvider, MCPToolProvider, ApiToolProvider, BuiltinToolProvider]],
ToolProviderType,
] = {
WorkflowToolProvider: ToolProviderType.WORKFLOW,
MCPToolProvider: ToolProviderType.MCP,
ApiToolProvider: ToolProviderType.API,
BuiltinToolProvider: ToolProviderType.BUILT_IN,
}
for provider_model, provider_type in provider_map.items():
stmt = select(provider_model).where(
provider_model.id == provider_id,
provider_model.tenant_id == tenant_id,
)
if session.scalar(stmt):
return provider_type
raise AgentNodeError(f"Tool provider with ID '{provider_id}' not found.")

View File

@@ -199,6 +199,14 @@ class Node(Generic[NodeDataT]):
return None
@classmethod
def get_default_config_schema(cls) -> dict[str, Any] | None:
"""
Get the default configuration schema for the node.
Used for LLM generation.
"""
return None
# Global registry populated via __init_subclass__
_registry: ClassVar[dict[NodeType, dict[str, type[Node]]]] = {}

View File

@@ -1,3 +1,5 @@
from typing import Any
from core.workflow.enums import NodeExecutionType, NodeType, WorkflowNodeExecutionStatus
from core.workflow.node_events import NodeRunResult
from core.workflow.nodes.base.node import Node
@@ -9,6 +11,24 @@ class EndNode(Node[EndNodeData]):
node_type = NodeType.END
execution_type = NodeExecutionType.RESPONSE
@classmethod
def get_default_config_schema(cls) -> dict[str, Any] | None:
return {
"description": "Workflow exit point - defines output variables",
"required": ["outputs"],
"parameters": {
"outputs": {
"type": "array",
"description": "Output variables to return",
"item_schema": {
"variable": "string - output variable name",
"type": "enum: string, number, object, array",
"value_selector": "array - path to source value, e.g. ['node_id', 'field']",
},
},
},
}
@classmethod
def version(cls) -> str:
return "1"

View File

@@ -14,6 +14,27 @@ class StartNode(Node[StartNodeData]):
node_type = NodeType.START
execution_type = NodeExecutionType.ROOT
@classmethod
def get_default_config_schema(cls) -> dict[str, Any] | None:
return {
"description": "Workflow entry point - defines input variables",
"required": [],
"parameters": {
"variables": {
"type": "array",
"description": "Input variables for the workflow",
"item_schema": {
"variable": "string - variable name",
"label": "string - display label",
"type": "enum: text-input, paragraph, number, select, file, file-list",
"required": "boolean",
"max_length": "number (optional)",
},
},
},
"outputs": ["All defined variables are available as {{#start.variable_name#}}"],
}
@classmethod
def version(cls) -> str:
return "1"

View File

@@ -1,7 +1,6 @@
from collections.abc import Mapping, Sequence
from typing import TYPE_CHECKING, Any
from configs import dify_config
from core.workflow.enums import NodeType, WorkflowNodeExecutionStatus
from core.workflow.node_events import NodeRunResult
from core.workflow.nodes.base.node import Node
@@ -16,12 +15,13 @@ if TYPE_CHECKING:
from core.workflow.entities import GraphInitParams
from core.workflow.runtime import GraphRuntimeState
MAX_TEMPLATE_TRANSFORM_OUTPUT_LENGTH = dify_config.TEMPLATE_TRANSFORM_MAX_LENGTH
DEFAULT_TEMPLATE_TRANSFORM_MAX_OUTPUT_LENGTH = 400_000
class TemplateTransformNode(Node[TemplateTransformNodeData]):
node_type = NodeType.TEMPLATE_TRANSFORM
_template_renderer: Jinja2TemplateRenderer
_max_output_length: int
def __init__(
self,
@@ -31,6 +31,7 @@ class TemplateTransformNode(Node[TemplateTransformNodeData]):
graph_runtime_state: "GraphRuntimeState",
*,
template_renderer: Jinja2TemplateRenderer | None = None,
max_output_length: int | None = None,
) -> None:
super().__init__(
id=id,
@@ -40,6 +41,10 @@ class TemplateTransformNode(Node[TemplateTransformNodeData]):
)
self._template_renderer = template_renderer or CodeExecutorJinja2TemplateRenderer()
if max_output_length is not None and max_output_length <= 0:
raise ValueError("max_output_length must be a positive integer")
self._max_output_length = max_output_length or DEFAULT_TEMPLATE_TRANSFORM_MAX_OUTPUT_LENGTH
@classmethod
def get_default_config(cls, filters: Mapping[str, object] | None = None) -> Mapping[str, object]:
"""
@@ -69,11 +74,11 @@ class TemplateTransformNode(Node[TemplateTransformNodeData]):
except TemplateRenderError as e:
return NodeRunResult(inputs=variables, status=WorkflowNodeExecutionStatus.FAILED, error=str(e))
if len(rendered) > MAX_TEMPLATE_TRANSFORM_OUTPUT_LENGTH:
if len(rendered) > self._max_output_length:
return NodeRunResult(
inputs=variables,
status=WorkflowNodeExecutionStatus.FAILED,
error=f"Output length exceeds {MAX_TEMPLATE_TRANSFORM_OUTPUT_LENGTH} characters",
error=f"Output length exceeds {self._max_output_length} characters",
)
return NodeRunResult(

View File

@@ -50,6 +50,19 @@ class ToolNode(Node[ToolNodeData]):
def version(cls) -> str:
return "1"
@classmethod
def get_default_config_schema(cls) -> dict[str, Any] | None:
return {
"description": "Execute an external tool",
"required": ["provider_id", "tool_id", "tool_parameters"],
"parameters": {
"provider_id": {"type": "string"},
"provider_type": {"type": "string"},
"tool_id": {"type": "string"},
"tool_parameters": {"type": "object"},
},
}
def _run(self) -> Generator[NodeEventBase, None, None]:
"""
Run the tool node

View File

@@ -10,6 +10,10 @@ import models as models
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql
def _is_pg(conn):
return conn.dialect.name == "postgresql"
# revision identifiers, used by Alembic.
revision = '7df29de0f6be'
down_revision = '03ea244985ce'
@@ -19,16 +23,31 @@ depends_on = None
def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.create_table('tenant_credit_pools',
sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False),
sa.Column('tenant_id', models.types.StringUUID(), nullable=False),
sa.Column('pool_type', sa.String(length=40), server_default='trial', nullable=False),
sa.Column('quota_limit', sa.BigInteger(), nullable=False),
sa.Column('quota_used', sa.BigInteger(), nullable=False),
sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False),
sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False),
sa.PrimaryKeyConstraint('id', name='tenant_credit_pool_pkey')
)
conn = op.get_bind()
if _is_pg(conn):
op.create_table('tenant_credit_pools',
sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False),
sa.Column('tenant_id', models.types.StringUUID(), nullable=False),
sa.Column('pool_type', sa.String(length=40), server_default='trial', nullable=False),
sa.Column('quota_limit', sa.BigInteger(), nullable=False),
sa.Column('quota_used', sa.BigInteger(), nullable=False),
sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False),
sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False),
sa.PrimaryKeyConstraint('id', name='tenant_credit_pool_pkey')
)
else:
# For MySQL and other databases, UUID should be generated at application level
op.create_table('tenant_credit_pools',
sa.Column('id', models.types.StringUUID(), nullable=False),
sa.Column('tenant_id', models.types.StringUUID(), nullable=False),
sa.Column('pool_type', sa.String(length=40), server_default='trial', nullable=False),
sa.Column('quota_limit', sa.BigInteger(), nullable=False),
sa.Column('quota_used', sa.BigInteger(), nullable=False),
sa.Column('created_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False),
sa.Column('updated_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False),
sa.PrimaryKeyConstraint('id', name='tenant_credit_pool_pkey')
)
with op.batch_alter_table('tenant_credit_pools', schema=None) as batch_op:
batch_op.create_index('tenant_credit_pool_pool_type_idx', ['pool_type'], unique=False)
batch_op.create_index('tenant_credit_pool_tenant_id_idx', ['tenant_id'], unique=False)

View File

@@ -2166,7 +2166,9 @@ class TenantCreditPool(TypeBase):
sa.Index("tenant_credit_pool_pool_type_idx", "pool_type"),
)
id: Mapped[str] = mapped_column(StringUUID, primary_key=True, server_default=text("uuid_generate_v4()"), init=False)
id: Mapped[str] = mapped_column(
StringUUID, insert_default=lambda: str(uuid4()), default_factory=lambda: str(uuid4()), init=False
)
tenant_id: Mapped[str] = mapped_column(StringUUID, nullable=False)
pool_type: Mapped[str] = mapped_column(String(40), nullable=False, default="trial", server_default="trial")
quota_limit: Mapped[int] = mapped_column(BigInteger, nullable=False, default=0)

View File

@@ -1,6 +1,6 @@
[project]
name = "dify-api"
version = "1.12.0"
version = "1.12.1"
requires-python = ">=3.11,<3.13"
dependencies = [

View File

@@ -1,16 +1,24 @@
import logging
import math
import time
import click
import app
from core.helper.marketplace import fetch_global_plugin_manifest
from extensions.ext_database import db
from models.account import TenantPluginAutoUpgradeStrategy
from tasks import process_tenant_plugin_autoupgrade_check_task as check_task
logger = logging.getLogger(__name__)
AUTO_UPGRADE_MINIMAL_CHECKING_INTERVAL = 15 * 60 # 15 minutes
MAX_CONCURRENT_CHECK_TASKS = 20
# Import cache constants from the task module
CACHE_REDIS_KEY_PREFIX = check_task.CACHE_REDIS_KEY_PREFIX
CACHE_REDIS_TTL = check_task.CACHE_REDIS_TTL
@app.celery.task(queue="plugin")
def check_upgradable_plugin_task():
@@ -40,6 +48,22 @@ def check_upgradable_plugin_task():
) # make sure all strategies are checked in this interval
batch_interval_time = (AUTO_UPGRADE_MINIMAL_CHECKING_INTERVAL / batch_chunk_count) if batch_chunk_count > 0 else 0
if total_strategies == 0:
click.echo(click.style("no strategies to process, skipping plugin manifest fetch.", fg="green"))
return
# Fetch and cache all plugin manifests before processing tenants
# This reduces load on marketplace from 300k requests to 1 request per check cycle
logger.info("fetching global plugin manifest from marketplace")
try:
fetch_global_plugin_manifest(CACHE_REDIS_KEY_PREFIX, CACHE_REDIS_TTL)
logger.info("successfully fetched and cached global plugin manifest")
except Exception as e:
logger.exception("failed to fetch global plugin manifest")
click.echo(click.style(f"failed to fetch global plugin manifest: {e}", fg="red"))
click.echo(click.style("skipping plugin upgrade check for this cycle", fg="yellow"))
return
for i in range(0, total_strategies, MAX_CONCURRENT_CHECK_TASKS):
batch_strategies = strategies[i : i + MAX_CONCURRENT_CHECK_TASKS]
for strategy in batch_strategies:

View File

@@ -327,6 +327,17 @@ class AccountService:
@staticmethod
def delete_account(account: Account):
"""Delete account. This method only adds a task to the queue for deletion."""
# Queue account deletion sync tasks for all workspaces BEFORE account deletion (enterprise only)
from services.enterprise.account_deletion_sync import sync_account_deletion
sync_success = sync_account_deletion(account_id=account.id, source="account_deleted")
if not sync_success:
logger.warning(
"Enterprise account deletion sync failed for account %s; proceeding with local deletion.",
account.id,
)
# Now proceed with async account deletion
delete_account_task.delay(account.id)
@staticmethod
@@ -1230,6 +1241,19 @@ class TenantService:
if dify_config.BILLING_ENABLED:
BillingService.clean_billing_info_cache(tenant.id)
# Queue account deletion sync task for enterprise backend to reassign resources (enterprise only)
from services.enterprise.account_deletion_sync import sync_workspace_member_removal
sync_success = sync_workspace_member_removal(
workspace_id=tenant.id, member_id=account.id, source="workspace_member_removed"
)
if not sync_success:
logger.warning(
"Enterprise workspace member removal sync failed: workspace_id=%s, member_id=%s",
tenant.id,
account.id,
)
@staticmethod
def update_member_role(tenant: Tenant, member: Account, new_role: str, operator: Account):
"""Update member role"""

View File

@@ -0,0 +1,115 @@
import json
import logging
import uuid
from datetime import UTC, datetime
from redis import RedisError
from configs import dify_config
from extensions.ext_database import db
from extensions.ext_redis import redis_client
from models.account import TenantAccountJoin
logger = logging.getLogger(__name__)
ACCOUNT_DELETION_SYNC_QUEUE = "enterprise:member:sync:queue"
ACCOUNT_DELETION_SYNC_TASK_TYPE = "sync_member_deletion_from_workspace"
def _queue_task(workspace_id: str, member_id: str, *, source: str) -> bool:
"""
Queue an account deletion sync task to Redis.
Internal helper function. Do not call directly - use the public functions instead.
Args:
workspace_id: The workspace/tenant ID to sync
member_id: The member/account ID that was removed
source: Source of the sync request (for debugging/tracking)
Returns:
bool: True if task was queued successfully, False otherwise
"""
try:
task = {
"task_id": str(uuid.uuid4()),
"workspace_id": workspace_id,
"member_id": member_id,
"retry_count": 0,
"created_at": datetime.now(UTC).isoformat(),
"source": source,
"type": ACCOUNT_DELETION_SYNC_TASK_TYPE,
}
# Push to Redis list (queue) - LPUSH adds to the head, worker consumes from tail with RPOP
redis_client.lpush(ACCOUNT_DELETION_SYNC_QUEUE, json.dumps(task))
logger.info(
"Queued account deletion sync task for workspace %s, member %s, task_id: %s, source: %s",
workspace_id,
member_id,
task["task_id"],
source,
)
return True
except (RedisError, TypeError) as e:
logger.error(
"Failed to queue account deletion sync for workspace %s, member %s: %s",
workspace_id,
member_id,
str(e),
exc_info=True,
)
# Don't raise - we don't want to fail member deletion if queueing fails
return False
def sync_workspace_member_removal(workspace_id: str, member_id: str, *, source: str) -> bool:
"""
Sync a single workspace member removal (enterprise only).
Queues a task for the enterprise backend to reassign resources from the removed member.
Handles enterprise edition check internally. Safe to call in community edition (no-op).
Args:
workspace_id: The workspace/tenant ID
member_id: The member/account ID that was removed
source: Source of the sync request (e.g., "workspace_member_removed")
Returns:
bool: True if task was queued (or skipped in community), False if queueing failed
"""
if not dify_config.ENTERPRISE_ENABLED:
return True
return _queue_task(workspace_id=workspace_id, member_id=member_id, source=source)
def sync_account_deletion(account_id: str, *, source: str) -> bool:
"""
Sync full account deletion across all workspaces (enterprise only).
Fetches all workspace memberships for the account and queues a sync task for each.
Handles enterprise edition check internally. Safe to call in community edition (no-op).
Args:
account_id: The account ID being deleted
source: Source of the sync request (e.g., "account_deleted")
Returns:
bool: True if all tasks were queued (or skipped in community), False if any queueing failed
"""
if not dify_config.ENTERPRISE_ENABLED:
return True
# Fetch all workspaces the account belongs to
workspace_joins = db.session.query(TenantAccountJoin).filter_by(account_id=account_id).all()
# Queue sync task for each workspace
success = True
for join in workspace_joins:
if not _queue_task(workspace_id=join.tenant_id, member_id=account_id, source=source):
success = False
return success

View File

@@ -8,7 +8,6 @@ from sqlalchemy import delete, select
from core.db.session_factory import session_factory
from core.indexing_runner import DocumentIsPausedError, IndexingRunner
from core.rag.index_processor.index_processor_factory import IndexProcessorFactory
from extensions.ext_database import db
from libs.datetime_utils import naive_utc_now
from models.dataset import Dataset, Document, DocumentSegment
@@ -27,7 +26,7 @@ def document_indexing_update_task(dataset_id: str, document_id: str):
logger.info(click.style(f"Start update document: {document_id}", fg="green"))
start_at = time.perf_counter()
with session_factory.create_session() as session:
with session_factory.create_session() as session, session.begin():
document = session.query(Document).where(Document.id == document_id, Document.dataset_id == dataset_id).first()
if not document:
@@ -36,7 +35,6 @@ def document_indexing_update_task(dataset_id: str, document_id: str):
document.indexing_status = "parsing"
document.processing_started_at = naive_utc_now()
session.commit()
# delete all document segment and index
try:
@@ -56,7 +54,7 @@ def document_indexing_update_task(dataset_id: str, document_id: str):
segment_ids = [segment.id for segment in segments]
segment_delete_stmt = delete(DocumentSegment).where(DocumentSegment.id.in_(segment_ids))
session.execute(segment_delete_stmt)
db.session.commit()
end_at = time.perf_counter()
logger.info(
click.style(

View File

@@ -6,8 +6,8 @@ import typing
import click
from celery import shared_task
from core.helper import marketplace
from core.helper.marketplace import MarketplacePluginDeclaration
from core.helper.marketplace import record_install_plugin_event
from core.plugin.entities.marketplace import MarketplacePluginSnapshot
from core.plugin.entities.plugin import PluginInstallationSource
from core.plugin.impl.plugin import PluginInstaller
from extensions.ext_redis import redis_client
@@ -16,7 +16,7 @@ from models.account import TenantPluginAutoUpgradeStrategy
logger = logging.getLogger(__name__)
RETRY_TIMES_OF_ONE_PLUGIN_IN_ONE_TENANT = 3
CACHE_REDIS_KEY_PREFIX = "plugin_autoupgrade_check_task:cached_plugin_manifests:"
CACHE_REDIS_KEY_PREFIX = "plugin_autoupgrade_check_task:cached_plugin_snapshot:"
CACHE_REDIS_TTL = 60 * 60 # 1 hour
@@ -25,11 +25,11 @@ def _get_redis_cache_key(plugin_id: str) -> str:
return f"{CACHE_REDIS_KEY_PREFIX}{plugin_id}"
def _get_cached_manifest(plugin_id: str) -> typing.Union[MarketplacePluginDeclaration, None, bool]:
def _get_cached_manifest(plugin_id: str) -> typing.Union[MarketplacePluginSnapshot, None, bool]:
"""
Get cached plugin manifest from Redis.
Returns:
- MarketplacePluginDeclaration: if found in cache
- MarketplacePluginSnapshot: if found in cache
- None: if cached as not found (marketplace returned no result)
- False: if not in cache at all
"""
@@ -43,76 +43,31 @@ def _get_cached_manifest(plugin_id: str) -> typing.Union[MarketplacePluginDeclar
if cached_json is None:
return None
return MarketplacePluginDeclaration.model_validate(cached_json)
return MarketplacePluginSnapshot.model_validate(cached_json)
except Exception:
logger.exception("Failed to get cached manifest for plugin %s", plugin_id)
return False
def _set_cached_manifest(plugin_id: str, manifest: typing.Union[MarketplacePluginDeclaration, None]) -> None:
"""
Cache plugin manifest in Redis.
Args:
plugin_id: The plugin ID
manifest: The manifest to cache, or None if not found in marketplace
"""
try:
key = _get_redis_cache_key(plugin_id)
if manifest is None:
# Cache the fact that this plugin was not found
redis_client.setex(key, CACHE_REDIS_TTL, json.dumps(None))
else:
# Cache the manifest data
redis_client.setex(key, CACHE_REDIS_TTL, manifest.model_dump_json())
except Exception:
# If Redis fails, continue without caching
# traceback.print_exc()
logger.exception("Failed to set cached manifest for plugin %s", plugin_id)
def marketplace_batch_fetch_plugin_manifests(
plugin_ids_plain_list: list[str],
) -> list[MarketplacePluginDeclaration]:
"""Fetch plugin manifests with Redis caching support."""
cached_manifests: dict[str, typing.Union[MarketplacePluginDeclaration, None]] = {}
not_cached_plugin_ids: list[str] = []
) -> list[MarketplacePluginSnapshot]:
"""
Fetch plugin manifests from Redis cache only.
This function assumes fetch_global_plugin_manifest() has been called
to pre-populate the cache with all marketplace plugins.
"""
result: list[MarketplacePluginSnapshot] = []
# Check Redis cache for each plugin
for plugin_id in plugin_ids_plain_list:
cached_result = _get_cached_manifest(plugin_id)
if cached_result is False:
# Not in cache, need to fetch
not_cached_plugin_ids.append(plugin_id)
else:
# Either found manifest or cached as None (not found in marketplace)
# At this point, cached_result is either MarketplacePluginDeclaration or None
if isinstance(cached_result, bool):
# This should never happen due to the if condition above, but for type safety
continue
cached_manifests[plugin_id] = cached_result
if not isinstance(cached_result, MarketplacePluginSnapshot):
# cached_result is False (not in cache) or None (cached as not found)
logger.warning("plugin %s not found in cache, skipping", plugin_id)
continue
# Fetch uncached plugins from marketplace
if not_cached_plugin_ids:
manifests = marketplace.batch_fetch_plugin_manifests_ignore_deserialization_error(not_cached_plugin_ids)
# Cache the fetched manifests
for manifest in manifests:
cached_manifests[manifest.plugin_id] = manifest
_set_cached_manifest(manifest.plugin_id, manifest)
# Cache plugins that were not found in marketplace
fetched_plugin_ids = {manifest.plugin_id for manifest in manifests}
for plugin_id in not_cached_plugin_ids:
if plugin_id not in fetched_plugin_ids:
cached_manifests[plugin_id] = None
_set_cached_manifest(plugin_id, None)
# Build result list from cached manifests
result: list[MarketplacePluginDeclaration] = []
for plugin_id in plugin_ids_plain_list:
cached_manifest: typing.Union[MarketplacePluginDeclaration, None] = cached_manifests.get(plugin_id)
if cached_manifest is not None:
result.append(cached_manifest)
result.append(cached_result)
return result
@@ -211,7 +166,7 @@ def process_tenant_plugin_autoupgrade_check_task(
# execute upgrade
new_unique_identifier = manifest.latest_package_identifier
marketplace.record_install_plugin_event(new_unique_identifier)
record_install_plugin_event(new_unique_identifier)
click.echo(
click.style(
f"Upgrade plugin: {original_unique_identifier} -> {new_unique_identifier}",

View File

@@ -259,8 +259,8 @@ def _delete_app_workflow_app_logs(tenant_id: str, app_id: str):
def _delete_app_workflow_archive_logs(tenant_id: str, app_id: str):
def del_workflow_archive_log(workflow_archive_log_id: str):
db.session.query(WorkflowArchiveLog).where(WorkflowArchiveLog.id == workflow_archive_log_id).delete(
def del_workflow_archive_log(session, workflow_archive_log_id: str):
session.query(WorkflowArchiveLog).where(WorkflowArchiveLog.id == workflow_archive_log_id).delete(
synchronize_session=False
)
@@ -420,7 +420,7 @@ def delete_draft_variables_batch(app_id: str, batch_size: int = 1000) -> int:
total_files_deleted = 0
while True:
with session_factory.create_session() as session:
with session_factory.create_session() as session, session.begin():
# Get a batch of draft variable IDs along with their file_ids
query_sql = """
SELECT id, file_id FROM workflow_draft_variables

View File

@@ -10,7 +10,10 @@ from models import Tenant
from models.enums import CreatorUserRole
from models.model import App, UploadFile
from models.workflow import WorkflowDraftVariable, WorkflowDraftVariableFile
from tasks.remove_app_and_related_data_task import _delete_draft_variables, delete_draft_variables_batch
from tasks.remove_app_and_related_data_task import (
_delete_draft_variables,
delete_draft_variables_batch,
)
@pytest.fixture
@@ -297,12 +300,18 @@ class TestDeleteDraftVariablesWithOffloadIntegration:
def test_delete_draft_variables_with_offload_data(self, mock_storage, setup_offload_test_data):
data = setup_offload_test_data
app_id = data["app"].id
upload_file_ids = [uf.id for uf in data["upload_files"]]
variable_file_ids = [vf.id for vf in data["variable_files"]]
mock_storage.delete.return_value = None
with session_factory.create_session() as session:
draft_vars_before = session.query(WorkflowDraftVariable).filter_by(app_id=app_id).count()
var_files_before = session.query(WorkflowDraftVariableFile).count()
upload_files_before = session.query(UploadFile).count()
var_files_before = (
session.query(WorkflowDraftVariableFile)
.where(WorkflowDraftVariableFile.id.in_(variable_file_ids))
.count()
)
upload_files_before = session.query(UploadFile).where(UploadFile.id.in_(upload_file_ids)).count()
assert draft_vars_before == 3
assert var_files_before == 2
assert upload_files_before == 2
@@ -315,8 +324,12 @@ class TestDeleteDraftVariablesWithOffloadIntegration:
assert draft_vars_after == 0
with session_factory.create_session() as session:
var_files_after = session.query(WorkflowDraftVariableFile).count()
upload_files_after = session.query(UploadFile).count()
var_files_after = (
session.query(WorkflowDraftVariableFile)
.where(WorkflowDraftVariableFile.id.in_(variable_file_ids))
.count()
)
upload_files_after = session.query(UploadFile).where(UploadFile.id.in_(upload_file_ids)).count()
assert var_files_after == 0
assert upload_files_after == 0
@@ -329,6 +342,8 @@ class TestDeleteDraftVariablesWithOffloadIntegration:
def test_delete_draft_variables_storage_failure_continues_cleanup(self, mock_storage, setup_offload_test_data):
data = setup_offload_test_data
app_id = data["app"].id
upload_file_ids = [uf.id for uf in data["upload_files"]]
variable_file_ids = [vf.id for vf in data["variable_files"]]
mock_storage.delete.side_effect = [Exception("Storage error"), None]
deleted_count = delete_draft_variables_batch(app_id, batch_size=10)
@@ -339,8 +354,12 @@ class TestDeleteDraftVariablesWithOffloadIntegration:
assert draft_vars_after == 0
with session_factory.create_session() as session:
var_files_after = session.query(WorkflowDraftVariableFile).count()
upload_files_after = session.query(UploadFile).count()
var_files_after = (
session.query(WorkflowDraftVariableFile)
.where(WorkflowDraftVariableFile.id.in_(variable_file_ids))
.count()
)
upload_files_after = session.query(UploadFile).where(UploadFile.id.in_(upload_file_ids)).count()
assert var_files_after == 0
assert upload_files_after == 0
@@ -395,3 +414,275 @@ class TestDeleteDraftVariablesWithOffloadIntegration:
if app2_obj:
session.delete(app2_obj)
session.commit()
class TestDeleteDraftVariablesSessionCommit:
"""Test suite to verify session commit behavior in delete_draft_variables_batch."""
@pytest.fixture
def setup_offload_test_data(self, app_and_tenant):
"""Create test data with offload files for session commit tests."""
from core.variables.types import SegmentType
from libs.datetime_utils import naive_utc_now
tenant, app = app_and_tenant
with session_factory.create_session() as session:
upload_file1 = UploadFile(
tenant_id=tenant.id,
storage_type="local",
key="test/file1.json",
name="file1.json",
size=1024,
extension="json",
mime_type="application/json",
created_by_role=CreatorUserRole.ACCOUNT,
created_by=str(uuid.uuid4()),
created_at=naive_utc_now(),
used=False,
)
upload_file2 = UploadFile(
tenant_id=tenant.id,
storage_type="local",
key="test/file2.json",
name="file2.json",
size=2048,
extension="json",
mime_type="application/json",
created_by_role=CreatorUserRole.ACCOUNT,
created_by=str(uuid.uuid4()),
created_at=naive_utc_now(),
used=False,
)
session.add(upload_file1)
session.add(upload_file2)
session.flush()
var_file1 = WorkflowDraftVariableFile(
tenant_id=tenant.id,
app_id=app.id,
user_id=str(uuid.uuid4()),
upload_file_id=upload_file1.id,
size=1024,
length=10,
value_type=SegmentType.STRING,
)
var_file2 = WorkflowDraftVariableFile(
tenant_id=tenant.id,
app_id=app.id,
user_id=str(uuid.uuid4()),
upload_file_id=upload_file2.id,
size=2048,
length=20,
value_type=SegmentType.OBJECT,
)
session.add(var_file1)
session.add(var_file2)
session.flush()
draft_var1 = WorkflowDraftVariable.new_node_variable(
app_id=app.id,
node_id="node_1",
name="large_var_1",
value=StringSegment(value="truncated..."),
node_execution_id=str(uuid.uuid4()),
file_id=var_file1.id,
)
draft_var2 = WorkflowDraftVariable.new_node_variable(
app_id=app.id,
node_id="node_2",
name="large_var_2",
value=StringSegment(value="truncated..."),
node_execution_id=str(uuid.uuid4()),
file_id=var_file2.id,
)
draft_var3 = WorkflowDraftVariable.new_node_variable(
app_id=app.id,
node_id="node_3",
name="regular_var",
value=StringSegment(value="regular_value"),
node_execution_id=str(uuid.uuid4()),
)
session.add(draft_var1)
session.add(draft_var2)
session.add(draft_var3)
session.commit()
data = {
"app": app,
"tenant": tenant,
"upload_files": [upload_file1, upload_file2],
"variable_files": [var_file1, var_file2],
"draft_variables": [draft_var1, draft_var2, draft_var3],
}
yield data
with session_factory.create_session() as session:
for table, ids in [
(WorkflowDraftVariable, [v.id for v in data["draft_variables"]]),
(WorkflowDraftVariableFile, [vf.id for vf in data["variable_files"]]),
(UploadFile, [uf.id for uf in data["upload_files"]]),
]:
cleanup_query = delete(table).where(table.id.in_(ids)).execution_options(synchronize_session=False)
session.execute(cleanup_query)
session.commit()
@pytest.fixture
def setup_commit_test_data(self, app_and_tenant):
"""Create test data for session commit tests."""
tenant, app = app_and_tenant
variable_ids: list[str] = []
with session_factory.create_session() as session:
variables = []
for i in range(10):
var = WorkflowDraftVariable.new_node_variable(
app_id=app.id,
node_id=f"node_{i}",
name=f"var_{i}",
value=StringSegment(value="test_value"),
node_execution_id=str(uuid.uuid4()),
)
session.add(var)
variables.append(var)
session.commit()
variable_ids = [v.id for v in variables]
yield {
"app": app,
"tenant": tenant,
"variable_ids": variable_ids,
}
with session_factory.create_session() as session:
cleanup_query = (
delete(WorkflowDraftVariable)
.where(WorkflowDraftVariable.id.in_(variable_ids))
.execution_options(synchronize_session=False)
)
session.execute(cleanup_query)
session.commit()
def test_session_commit_is_called_after_each_batch(self, setup_commit_test_data):
"""Test that session.begin() is used for automatic transaction management."""
data = setup_commit_test_data
app_id = data["app"].id
# Since session.begin() is used, the transaction is automatically committed
# when the with block exits successfully. We verify this by checking that
# data is actually persisted.
deleted_count = delete_draft_variables_batch(app_id, batch_size=3)
# Verify all data was deleted (proves transaction was committed)
with session_factory.create_session() as session:
remaining_count = session.query(WorkflowDraftVariable).filter_by(app_id=app_id).count()
assert deleted_count == 10
assert remaining_count == 0
def test_data_persisted_after_batch_deletion(self, setup_commit_test_data):
"""Test that data is actually persisted to database after batch deletion with commits."""
data = setup_commit_test_data
app_id = data["app"].id
variable_ids = data["variable_ids"]
# Verify initial state
with session_factory.create_session() as session:
initial_count = session.query(WorkflowDraftVariable).filter_by(app_id=app_id).count()
assert initial_count == 10
# Perform deletion with small batch size to force multiple commits
deleted_count = delete_draft_variables_batch(app_id, batch_size=3)
assert deleted_count == 10
# Verify all data is deleted in a new session (proves commits worked)
with session_factory.create_session() as session:
final_count = session.query(WorkflowDraftVariable).filter_by(app_id=app_id).count()
assert final_count == 0
# Verify specific IDs are deleted
with session_factory.create_session() as session:
remaining_vars = (
session.query(WorkflowDraftVariable).where(WorkflowDraftVariable.id.in_(variable_ids)).count()
)
assert remaining_vars == 0
def test_session_commit_with_empty_dataset(self, setup_commit_test_data):
"""Test session behavior when deleting from an empty dataset."""
nonexistent_app_id = str(uuid.uuid4())
# Should not raise any errors and should return 0
deleted_count = delete_draft_variables_batch(nonexistent_app_id, batch_size=10)
assert deleted_count == 0
def test_session_commit_with_single_batch(self, setup_commit_test_data):
"""Test that commit happens correctly when all data fits in a single batch."""
data = setup_commit_test_data
app_id = data["app"].id
with session_factory.create_session() as session:
initial_count = session.query(WorkflowDraftVariable).filter_by(app_id=app_id).count()
assert initial_count == 10
# Delete all in a single batch
deleted_count = delete_draft_variables_batch(app_id, batch_size=100)
assert deleted_count == 10
# Verify data is persisted
with session_factory.create_session() as session:
final_count = session.query(WorkflowDraftVariable).filter_by(app_id=app_id).count()
assert final_count == 0
def test_invalid_batch_size_raises_error(self, setup_commit_test_data):
"""Test that invalid batch size raises ValueError."""
data = setup_commit_test_data
app_id = data["app"].id
with pytest.raises(ValueError, match="batch_size must be positive"):
delete_draft_variables_batch(app_id, batch_size=0)
with pytest.raises(ValueError, match="batch_size must be positive"):
delete_draft_variables_batch(app_id, batch_size=-1)
@patch("extensions.ext_storage.storage")
def test_session_commit_with_offload_data_cleanup(self, mock_storage, setup_offload_test_data):
"""Test that session commits correctly when cleaning up offload data."""
data = setup_offload_test_data
app_id = data["app"].id
upload_file_ids = [uf.id for uf in data["upload_files"]]
mock_storage.delete.return_value = None
# Verify initial state
with session_factory.create_session() as session:
draft_vars_before = session.query(WorkflowDraftVariable).filter_by(app_id=app_id).count()
var_files_before = (
session.query(WorkflowDraftVariableFile)
.where(WorkflowDraftVariableFile.id.in_([vf.id for vf in data["variable_files"]]))
.count()
)
upload_files_before = session.query(UploadFile).where(UploadFile.id.in_(upload_file_ids)).count()
assert draft_vars_before == 3
assert var_files_before == 2
assert upload_files_before == 2
# Delete variables with offload data
deleted_count = delete_draft_variables_batch(app_id, batch_size=10)
assert deleted_count == 3
# Verify all data is persisted (deleted) in new session
with session_factory.create_session() as session:
draft_vars_after = session.query(WorkflowDraftVariable).filter_by(app_id=app_id).count()
var_files_after = (
session.query(WorkflowDraftVariableFile)
.where(WorkflowDraftVariableFile.id.in_([vf.id for vf in data["variable_files"]]))
.count()
)
upload_files_after = session.query(UploadFile).where(UploadFile.id.in_(upload_file_ids)).count()
assert draft_vars_after == 0
assert var_files_after == 0
assert upload_files_after == 0
# Verify storage cleanup was called
assert mock_storage.delete.call_count == 2

View File

@@ -1016,7 +1016,7 @@ class TestAccountService:
def test_delete_account(self, db_session_with_containers, mock_external_service_dependencies):
"""
Test account deletion (should add task to queue).
Test account deletion (should add task to queue and sync to enterprise).
"""
fake = Faker()
email = fake.email()
@@ -1034,10 +1034,18 @@ class TestAccountService:
password=password,
)
with patch("services.account_service.delete_account_task") as mock_delete_task:
with (
patch("services.account_service.delete_account_task") as mock_delete_task,
patch("services.enterprise.account_deletion_sync.sync_account_deletion") as mock_sync,
):
mock_sync.return_value = True
# Delete account
AccountService.delete_account(account)
# Verify sync was called
mock_sync.assert_called_once_with(account_id=account.id, source="account_deleted")
# Verify task was added to queue
mock_delete_task.delay.assert_called_once_with(account.id)
@@ -1716,7 +1724,7 @@ class TestTenantService:
def test_remove_member_from_tenant_success(self, db_session_with_containers, mock_external_service_dependencies):
"""
Test successful member removal from tenant.
Test successful member removal from tenant (should sync to enterprise).
"""
fake = Faker()
tenant_name = fake.company()
@@ -1751,7 +1759,15 @@ class TestTenantService:
TenantService.create_tenant_member(tenant, member_account, role="normal")
# Remove member
TenantService.remove_member_from_tenant(tenant, member_account, owner_account)
with patch("services.enterprise.account_deletion_sync.sync_workspace_member_removal") as mock_sync:
mock_sync.return_value = True
TenantService.remove_member_from_tenant(tenant, member_account, owner_account)
# Verify sync was called
mock_sync.assert_called_once_with(
workspace_id=tenant.id, member_id=member_account.id, source="workspace_member_removed"
)
# Verify member was removed
from extensions.ext_database import db

View File

@@ -0,0 +1,182 @@
from unittest.mock import MagicMock, patch
import pytest
from faker import Faker
from models import Account, Tenant, TenantAccountJoin, TenantAccountRole
from models.dataset import Dataset, Document, DocumentSegment
from tasks.document_indexing_update_task import document_indexing_update_task
class TestDocumentIndexingUpdateTask:
@pytest.fixture
def mock_external_dependencies(self):
"""Patch external collaborators used by the update task.
- IndexProcessorFactory.init_index_processor().clean(...)
- IndexingRunner.run([...])
"""
with (
patch("tasks.document_indexing_update_task.IndexProcessorFactory") as mock_factory,
patch("tasks.document_indexing_update_task.IndexingRunner") as mock_runner,
):
processor_instance = MagicMock()
mock_factory.return_value.init_index_processor.return_value = processor_instance
runner_instance = MagicMock()
mock_runner.return_value = runner_instance
yield {
"factory": mock_factory,
"processor": processor_instance,
"runner": mock_runner,
"runner_instance": runner_instance,
}
def _create_dataset_document_with_segments(self, db_session_with_containers, *, segment_count: int = 2):
fake = Faker()
# Account and tenant
account = Account(
email=fake.email(),
name=fake.name(),
interface_language="en-US",
status="active",
)
db_session_with_containers.add(account)
db_session_with_containers.commit()
tenant = Tenant(name=fake.company(), status="normal")
db_session_with_containers.add(tenant)
db_session_with_containers.commit()
join = TenantAccountJoin(
tenant_id=tenant.id,
account_id=account.id,
role=TenantAccountRole.OWNER,
current=True,
)
db_session_with_containers.add(join)
db_session_with_containers.commit()
# Dataset and document
dataset = Dataset(
tenant_id=tenant.id,
name=fake.company(),
description=fake.text(max_nb_chars=64),
data_source_type="upload_file",
indexing_technique="high_quality",
created_by=account.id,
)
db_session_with_containers.add(dataset)
db_session_with_containers.commit()
document = Document(
tenant_id=tenant.id,
dataset_id=dataset.id,
position=0,
data_source_type="upload_file",
batch="test_batch",
name=fake.file_name(),
created_from="upload_file",
created_by=account.id,
indexing_status="waiting",
enabled=True,
doc_form="text_model",
)
db_session_with_containers.add(document)
db_session_with_containers.commit()
# Segments
node_ids = []
for i in range(segment_count):
node_id = f"node-{i + 1}"
seg = DocumentSegment(
tenant_id=tenant.id,
dataset_id=dataset.id,
document_id=document.id,
position=i,
content=fake.text(max_nb_chars=32),
answer=None,
word_count=10,
tokens=5,
index_node_id=node_id,
status="completed",
created_by=account.id,
)
db_session_with_containers.add(seg)
node_ids.append(node_id)
db_session_with_containers.commit()
# Refresh to ensure ORM state
db_session_with_containers.refresh(dataset)
db_session_with_containers.refresh(document)
return dataset, document, node_ids
def test_cleans_segments_and_reindexes(self, db_session_with_containers, mock_external_dependencies):
dataset, document, node_ids = self._create_dataset_document_with_segments(db_session_with_containers)
# Act
document_indexing_update_task(dataset.id, document.id)
# Ensure we see committed changes from another session
db_session_with_containers.expire_all()
# Assert document status updated before reindex
updated = db_session_with_containers.query(Document).where(Document.id == document.id).first()
assert updated.indexing_status == "parsing"
assert updated.processing_started_at is not None
# Segments should be deleted
remaining = (
db_session_with_containers.query(DocumentSegment).where(DocumentSegment.document_id == document.id).count()
)
assert remaining == 0
# Assert index processor clean was called with expected args
clean_call = mock_external_dependencies["processor"].clean.call_args
assert clean_call is not None
args, kwargs = clean_call
# args[0] is a Dataset instance (from another session) — validate by id
assert getattr(args[0], "id", None) == dataset.id
# args[1] should contain our node_ids
assert set(args[1]) == set(node_ids)
assert kwargs.get("with_keywords") is True
assert kwargs.get("delete_child_chunks") is True
# Assert indexing runner invoked with the updated document
run_call = mock_external_dependencies["runner_instance"].run.call_args
assert run_call is not None
run_docs = run_call[0][0]
assert len(run_docs) == 1
first = run_docs[0]
assert getattr(first, "id", None) == document.id
def test_clean_error_is_logged_and_indexing_continues(self, db_session_with_containers, mock_external_dependencies):
dataset, document, node_ids = self._create_dataset_document_with_segments(db_session_with_containers)
# Force clean to raise; task should continue to indexing
mock_external_dependencies["processor"].clean.side_effect = Exception("boom")
document_indexing_update_task(dataset.id, document.id)
# Ensure we see committed changes from another session
db_session_with_containers.expire_all()
# Indexing should still be triggered
mock_external_dependencies["runner_instance"].run.assert_called_once()
# Segments should remain (since clean failed before DB delete)
remaining = (
db_session_with_containers.query(DocumentSegment).where(DocumentSegment.document_id == document.id).count()
)
assert remaining > 0
def test_document_not_found_noop(self, db_session_with_containers, mock_external_dependencies):
fake = Faker()
# Act with non-existent document id
document_indexing_update_task(dataset_id=fake.uuid4(), document_id=fake.uuid4())
# Neither processor nor runner should be called
mock_external_dependencies["processor"].clean.assert_not_called()
mock_external_dependencies["runner_instance"].run.assert_not_called()

View File

@@ -0,0 +1,400 @@
"""
Unit tests for GraphBuilder.
Tests the automatic graph construction from node lists with dependency declarations.
"""
import pytest
from core.workflow.generator.utils.graph_builder import (
CyclicDependencyError,
GraphBuilder,
)
class TestGraphBuilderBasic:
"""Basic functionality tests."""
def test_empty_nodes_creates_minimal_workflow(self):
"""Empty node list creates start -> end workflow."""
result_nodes, result_edges = GraphBuilder.build_graph([])
assert len(result_nodes) == 2
assert result_nodes[0]["type"] == "start"
assert result_nodes[1]["type"] == "end"
assert len(result_edges) == 1
assert result_edges[0]["source"] == "start"
assert result_edges[0]["target"] == "end"
def test_simple_linear_workflow(self):
"""Simple linear workflow: start -> fetch -> process -> end."""
nodes = [
{"id": "fetch", "type": "http-request", "depends_on": []},
{"id": "process", "type": "llm", "depends_on": ["fetch"]},
]
result_nodes, result_edges = GraphBuilder.build_graph(nodes)
# Should have: start + 2 user nodes + end = 4
assert len(result_nodes) == 4
assert result_nodes[0]["type"] == "start"
assert result_nodes[-1]["type"] == "end"
# Should have: start->fetch, fetch->process, process->end = 3
assert len(result_edges) == 3
# Verify edge connections
edge_pairs = [(e["source"], e["target"]) for e in result_edges]
assert ("start", "fetch") in edge_pairs
assert ("fetch", "process") in edge_pairs
assert ("process", "end") in edge_pairs
class TestParallelWorkflow:
"""Tests for parallel node handling."""
def test_parallel_workflow(self):
"""Parallel workflow: multiple nodes from start, merging to one."""
nodes = [
{"id": "api1", "type": "http-request", "depends_on": []},
{"id": "api2", "type": "http-request", "depends_on": []},
{"id": "merge", "type": "llm", "depends_on": ["api1", "api2"]},
]
result_nodes, result_edges = GraphBuilder.build_graph(nodes)
# start should connect to both api1 and api2
start_edges = [e for e in result_edges if e["source"] == "start"]
assert len(start_edges) == 2
start_targets = {e["target"] for e in start_edges}
assert start_targets == {"api1", "api2"}
# Both api1 and api2 should connect to merge
merge_incoming = [e for e in result_edges if e["target"] == "merge"]
assert len(merge_incoming) == 2
def test_multiple_terminal_nodes(self):
"""Multiple terminal nodes all connect to end."""
nodes = [
{"id": "branch1", "type": "llm", "depends_on": []},
{"id": "branch2", "type": "llm", "depends_on": []},
]
result_nodes, result_edges = GraphBuilder.build_graph(nodes)
# Both branches should connect to end
end_incoming = [e for e in result_edges if e["target"] == "end"]
assert len(end_incoming) == 2
class TestIfElseWorkflow:
"""Tests for if-else branching."""
def test_if_else_workflow(self):
"""Conditional branching workflow."""
nodes = [
{
"id": "check",
"type": "if-else",
"config": {"true_branch": "success", "false_branch": "fallback"},
"depends_on": [],
},
{"id": "success", "type": "llm", "depends_on": []},
{"id": "fallback", "type": "code", "depends_on": []},
]
result_nodes, result_edges = GraphBuilder.build_graph(nodes)
# Should have true and false branch edges
branch_edges = [e for e in result_edges if e["source"] == "check"]
assert len(branch_edges) == 2
assert any(e.get("sourceHandle") == "true" for e in branch_edges)
assert any(e.get("sourceHandle") == "false" for e in branch_edges)
# Verify targets
true_edge = next(e for e in branch_edges if e.get("sourceHandle") == "true")
false_edge = next(e for e in branch_edges if e.get("sourceHandle") == "false")
assert true_edge["target"] == "success"
assert false_edge["target"] == "fallback"
def test_if_else_missing_branch_no_error(self):
"""if-else with only true branch doesn't error (warning only)."""
nodes = [
{
"id": "check",
"type": "if-else",
"config": {"true_branch": "success"},
"depends_on": [],
},
{"id": "success", "type": "llm", "depends_on": []},
]
# Should not raise
result_nodes, result_edges = GraphBuilder.build_graph(nodes)
# Should have one branch edge
branch_edges = [e for e in result_edges if e["source"] == "check"]
assert len(branch_edges) == 1
assert branch_edges[0].get("sourceHandle") == "true"
class TestQuestionClassifierWorkflow:
"""Tests for question-classifier branching."""
def test_question_classifier_workflow(self):
"""Question classifier with multiple classes."""
nodes = [
{
"id": "classifier",
"type": "question-classifier",
"config": {
"query": ["start", "user_input"],
"classes": [
{"id": "tech", "name": "技术问题", "target": "tech_handler"},
{"id": "sales", "name": "销售咨询", "target": "sales_handler"},
{"id": "other", "name": "其他问题", "target": "other_handler"},
],
},
"depends_on": [],
},
{"id": "tech_handler", "type": "llm", "depends_on": []},
{"id": "sales_handler", "type": "llm", "depends_on": []},
{"id": "other_handler", "type": "llm", "depends_on": []},
]
result_nodes, result_edges = GraphBuilder.build_graph(nodes)
# Should have 3 branch edges from classifier
classifier_edges = [e for e in result_edges if e["source"] == "classifier"]
assert len(classifier_edges) == 3
# Each should use class id as sourceHandle
assert any(e.get("sourceHandle") == "tech" and e["target"] == "tech_handler" for e in classifier_edges)
assert any(e.get("sourceHandle") == "sales" and e["target"] == "sales_handler" for e in classifier_edges)
assert any(e.get("sourceHandle") == "other" and e["target"] == "other_handler" for e in classifier_edges)
def test_question_classifier_missing_target(self):
"""Classes without target connect to end."""
nodes = [
{
"id": "classifier",
"type": "question-classifier",
"config": {
"classes": [
{"id": "known", "name": "已知问题", "target": "handler"},
{"id": "unknown", "name": "未知问题"}, # Missing target
],
},
"depends_on": [],
},
{"id": "handler", "type": "llm", "depends_on": []},
]
result_nodes, result_edges = GraphBuilder.build_graph(nodes)
# Missing target should connect to end
classifier_edges = [e for e in result_edges if e["source"] == "classifier"]
assert any(e.get("sourceHandle") == "unknown" and e["target"] == "end" for e in classifier_edges)
class TestVariableDependencyInference:
"""Tests for automatic dependency inference from variables."""
def test_variable_dependency_inference(self):
"""Dependencies inferred from variable references."""
nodes = [
{"id": "fetch", "type": "http-request", "depends_on": []},
{
"id": "process",
"type": "llm",
"config": {"prompt_template": [{"text": "{{#fetch.body#}}"}]},
# No explicit depends_on, but references fetch
},
]
result_nodes, result_edges = GraphBuilder.build_graph(nodes)
# Should automatically infer process depends on fetch
assert any(e["source"] == "fetch" and e["target"] == "process" for e in result_edges)
def test_system_variable_not_inferred(self):
"""System variables (sys, start) not inferred as dependencies."""
nodes = [
{
"id": "process",
"type": "llm",
"config": {"prompt_template": [{"text": "{{#sys.query#}} {{#start.input#}}"}]},
"depends_on": [],
},
]
result_nodes, result_edges = GraphBuilder.build_graph(nodes)
# Should connect to start, not create dependency on sys or start
edge_sources = {e["source"] for e in result_edges}
assert "sys" not in edge_sources
assert "start" in edge_sources
class TestCycleDetection:
"""Tests for cyclic dependency detection."""
def test_cyclic_dependency_detected(self):
"""Cyclic dependencies raise error."""
nodes = [
{"id": "a", "type": "llm", "depends_on": ["c"]},
{"id": "b", "type": "llm", "depends_on": ["a"]},
{"id": "c", "type": "llm", "depends_on": ["b"]},
]
with pytest.raises(CyclicDependencyError):
GraphBuilder.build_graph(nodes)
def test_self_dependency_detected(self):
"""Self-dependency raises error."""
nodes = [
{"id": "a", "type": "llm", "depends_on": ["a"]},
]
with pytest.raises(CyclicDependencyError):
GraphBuilder.build_graph(nodes)
class TestErrorRecovery:
"""Tests for silent error recovery."""
def test_invalid_dependency_removed(self):
"""Invalid dependencies (non-existent nodes) are silently removed."""
nodes = [
{"id": "process", "type": "llm", "depends_on": ["nonexistent"]},
]
# Should not raise, invalid dependency silently removed
result_nodes, result_edges = GraphBuilder.build_graph(nodes)
# Process should connect from start (since invalid dep was removed)
assert any(e["source"] == "start" and e["target"] == "process" for e in result_edges)
def test_depends_on_as_string(self):
"""depends_on as string is converted to list."""
nodes = [
{"id": "fetch", "type": "http-request", "depends_on": []},
{"id": "process", "type": "llm", "depends_on": "fetch"}, # String instead of list
]
result_nodes, result_edges = GraphBuilder.build_graph(nodes)
# Should work correctly
assert any(e["source"] == "fetch" and e["target"] == "process" for e in result_edges)
class TestContainerNodes:
"""Tests for container nodes (iteration, loop)."""
def test_iteration_node_as_regular_node(self):
"""Iteration nodes behave as regular single-in-single-out nodes."""
nodes = [
{"id": "prepare", "type": "code", "depends_on": []},
{
"id": "loop",
"type": "iteration",
"config": {"iterator_selector": ["prepare", "items"]},
"depends_on": ["prepare"],
},
{"id": "process_result", "type": "llm", "depends_on": ["loop"]},
]
result_nodes, result_edges = GraphBuilder.build_graph(nodes)
# Should have standard edges: start->prepare, prepare->loop, loop->process_result, process_result->end
edge_pairs = [(e["source"], e["target"]) for e in result_edges]
assert ("start", "prepare") in edge_pairs
assert ("prepare", "loop") in edge_pairs
assert ("loop", "process_result") in edge_pairs
assert ("process_result", "end") in edge_pairs
def test_loop_node_as_regular_node(self):
"""Loop nodes behave as regular single-in-single-out nodes."""
nodes = [
{"id": "init", "type": "code", "depends_on": []},
{
"id": "repeat",
"type": "loop",
"config": {"loop_count": 5},
"depends_on": ["init"],
},
{"id": "finish", "type": "llm", "depends_on": ["repeat"]},
]
result_nodes, result_edges = GraphBuilder.build_graph(nodes)
# Standard edge flow
edge_pairs = [(e["source"], e["target"]) for e in result_edges]
assert ("init", "repeat") in edge_pairs
assert ("repeat", "finish") in edge_pairs
def test_iteration_with_variable_inference(self):
"""Iteration node dependencies can be inferred from iterator_selector."""
nodes = [
{"id": "data_source", "type": "http-request", "depends_on": []},
{
"id": "process_each",
"type": "iteration",
"config": {
"iterator_selector": ["data_source", "items"],
},
# No explicit depends_on, but references data_source
},
]
result_nodes, result_edges = GraphBuilder.build_graph(nodes)
# Should infer dependency from iterator_selector reference
# Note: iterator_selector format is different from {{#...#}}, so this tests
# that explicit depends_on is properly handled when not provided
# In this case, process_each has no depends_on, so it connects to start
edge_pairs = [(e["source"], e["target"]) for e in result_edges]
# Without explicit depends_on, connects to start
assert ("start", "process_each") in edge_pairs or ("data_source", "process_each") in edge_pairs
def test_loop_node_self_reference_not_cycle(self):
"""Loop nodes referencing their own outputs should not create cycle."""
nodes = [
{"id": "init", "type": "code", "depends_on": []},
{
"id": "my_loop",
"type": "loop",
"config": {
"loop_count": 5,
# Loop node referencing its own output (common pattern)
"prompt": "Previous: {{#my_loop.output#}}, continue...",
},
"depends_on": ["init"],
},
{"id": "finish", "type": "llm", "depends_on": ["my_loop"]},
]
# Should NOT raise CyclicDependencyError
result_nodes, result_edges = GraphBuilder.build_graph(nodes)
# Verify the graph is built correctly
assert len(result_nodes) == 5 # start + 3 + end
edge_pairs = [(e["source"], e["target"]) for e in result_edges]
assert ("init", "my_loop") in edge_pairs
assert ("my_loop", "finish") in edge_pairs
class TestEdgeStructure:
"""Tests for edge structure correctness."""
def test_edge_has_required_fields(self):
"""Edges have all required fields."""
nodes = [
{"id": "node1", "type": "llm", "depends_on": []},
]
result_nodes, result_edges = GraphBuilder.build_graph(nodes)
for edge in result_edges:
assert "id" in edge
assert "source" in edge
assert "target" in edge
assert "sourceHandle" in edge
assert "targetHandle" in edge
def test_edge_id_unique(self):
"""Each edge has a unique ID."""
nodes = [
{"id": "a", "type": "llm", "depends_on": []},
{"id": "b", "type": "llm", "depends_on": []},
{"id": "c", "type": "llm", "depends_on": ["a", "b"]},
]
result_nodes, result_edges = GraphBuilder.build_graph(nodes)
edge_ids = [e["id"] for e in result_edges]
assert len(edge_ids) == len(set(edge_ids)) # All unique

View File

@@ -0,0 +1,287 @@
"""
Unit tests for the Mermaid Generator.
Tests cover:
- Basic workflow rendering
- Reserved word handling ('end''end_node')
- Question classifier multi-branch edges
- If-else branch labels
- Edge validation and skipping
- Tool node formatting
"""
from core.workflow.generator.utils.mermaid_generator import generate_mermaid
class TestBasicWorkflow:
"""Tests for basic workflow Mermaid generation."""
def test_simple_start_end_workflow(self):
"""Test simple Start → End workflow."""
workflow_data = {
"nodes": [
{"id": "start", "type": "start", "title": "Start"},
{"id": "end", "type": "end", "title": "End"},
],
"edges": [{"source": "start", "target": "end"}],
}
result = generate_mermaid(workflow_data)
assert "flowchart TD" in result
assert 'start["type=start|title=Start"]' in result
assert 'end_node["type=end|title=End"]' in result
assert "start --> end_node" in result
def test_start_llm_end_workflow(self):
"""Test Start → LLM → End workflow."""
workflow_data = {
"nodes": [
{"id": "start", "type": "start", "title": "Start"},
{"id": "llm", "type": "llm", "title": "Generate"},
{"id": "end", "type": "end", "title": "End"},
],
"edges": [
{"source": "start", "target": "llm"},
{"source": "llm", "target": "end"},
],
}
result = generate_mermaid(workflow_data)
assert 'llm["type=llm|title=Generate"]' in result
assert "start --> llm" in result
assert "llm --> end_node" in result
def test_empty_workflow(self):
"""Test empty workflow returns minimal output."""
workflow_data = {"nodes": [], "edges": []}
result = generate_mermaid(workflow_data)
assert result == "flowchart TD"
def test_missing_keys_handled(self):
"""Test workflow with missing keys doesn't crash."""
workflow_data = {}
result = generate_mermaid(workflow_data)
assert "flowchart TD" in result
class TestReservedWords:
"""Tests for reserved word handling in node IDs."""
def test_end_node_id_is_replaced(self):
"""Test 'end' node ID is replaced with 'end_node'."""
workflow_data = {
"nodes": [{"id": "end", "type": "end", "title": "End"}],
"edges": [],
}
result = generate_mermaid(workflow_data)
# Should use end_node instead of end
assert "end_node[" in result
assert '"type=end|title=End"' in result
def test_subgraph_node_id_is_replaced(self):
"""Test 'subgraph' node ID is replaced with 'subgraph_node'."""
workflow_data = {
"nodes": [{"id": "subgraph", "type": "code", "title": "Process"}],
"edges": [],
}
result = generate_mermaid(workflow_data)
assert "subgraph_node[" in result
def test_edge_uses_safe_ids(self):
"""Test edges correctly reference safe IDs after replacement."""
workflow_data = {
"nodes": [
{"id": "start", "type": "start", "title": "Start"},
{"id": "end", "type": "end", "title": "End"},
],
"edges": [{"source": "start", "target": "end"}],
}
result = generate_mermaid(workflow_data)
# Edge should use end_node, not end
assert "start --> end_node" in result
assert "start --> end\n" not in result
class TestBranchEdges:
"""Tests for branching node edge labels."""
def test_question_classifier_source_handles(self):
"""Test question-classifier edges with sourceHandle labels."""
workflow_data = {
"nodes": [
{"id": "classifier", "type": "question-classifier", "title": "Classify"},
{"id": "refund", "type": "llm", "title": "Handle Refund"},
{"id": "inquiry", "type": "llm", "title": "Handle Inquiry"},
],
"edges": [
{"source": "classifier", "target": "refund", "sourceHandle": "refund"},
{"source": "classifier", "target": "inquiry", "sourceHandle": "inquiry"},
],
}
result = generate_mermaid(workflow_data)
assert "classifier -->|refund| refund" in result
assert "classifier -->|inquiry| inquiry" in result
def test_if_else_true_false_handles(self):
"""Test if-else edges with true/false labels."""
workflow_data = {
"nodes": [
{"id": "ifelse", "type": "if-else", "title": "Check"},
{"id": "yes_branch", "type": "llm", "title": "Yes"},
{"id": "no_branch", "type": "llm", "title": "No"},
],
"edges": [
{"source": "ifelse", "target": "yes_branch", "sourceHandle": "true"},
{"source": "ifelse", "target": "no_branch", "sourceHandle": "false"},
],
}
result = generate_mermaid(workflow_data)
assert "ifelse -->|true| yes_branch" in result
assert "ifelse -->|false| no_branch" in result
def test_source_handle_source_is_ignored(self):
"""Test sourceHandle='source' doesn't add label."""
workflow_data = {
"nodes": [
{"id": "llm1", "type": "llm", "title": "LLM 1"},
{"id": "llm2", "type": "llm", "title": "LLM 2"},
],
"edges": [{"source": "llm1", "target": "llm2", "sourceHandle": "source"}],
}
result = generate_mermaid(workflow_data)
# Should be plain arrow without label
assert "llm1 --> llm2" in result
assert "llm1 -->|source|" not in result
class TestEdgeValidation:
"""Tests for edge validation and error handling."""
def test_edge_with_missing_source_is_skipped(self):
"""Test edge with non-existent source node is skipped."""
workflow_data = {
"nodes": [{"id": "end", "type": "end", "title": "End"}],
"edges": [{"source": "nonexistent", "target": "end"}],
}
result = generate_mermaid(workflow_data)
# Should not contain the invalid edge
assert "nonexistent" not in result
assert "-->" not in result or "nonexistent" not in result
def test_edge_with_missing_target_is_skipped(self):
"""Test edge with non-existent target node is skipped."""
workflow_data = {
"nodes": [{"id": "start", "type": "start", "title": "Start"}],
"edges": [{"source": "start", "target": "nonexistent"}],
}
result = generate_mermaid(workflow_data)
# Edge should be skipped
assert "start --> nonexistent" not in result
def test_edge_without_source_or_target_is_skipped(self):
"""Test edge missing source or target is skipped."""
workflow_data = {
"nodes": [{"id": "start", "type": "start", "title": "Start"}],
"edges": [{"source": "start"}, {"target": "start"}, {}],
}
result = generate_mermaid(workflow_data)
# No edges should be rendered
assert result.count("-->") == 0
class TestToolNodes:
"""Tests for tool node formatting."""
def test_tool_node_includes_tool_key(self):
"""Test tool node includes tool_key in label."""
workflow_data = {
"nodes": [
{
"id": "search",
"type": "tool",
"title": "Search",
"config": {"tool_key": "google/search"},
}
],
"edges": [],
}
result = generate_mermaid(workflow_data)
assert 'search["type=tool|title=Search|tool=google/search"]' in result
def test_tool_node_with_tool_name_fallback(self):
"""Test tool node uses tool_name as fallback."""
workflow_data = {
"nodes": [
{
"id": "tool1",
"type": "tool",
"title": "My Tool",
"config": {"tool_name": "my_tool"},
}
],
"edges": [],
}
result = generate_mermaid(workflow_data)
assert "tool=my_tool" in result
def test_tool_node_missing_tool_key_shows_unknown(self):
"""Test tool node without tool_key shows 'unknown'."""
workflow_data = {
"nodes": [{"id": "tool1", "type": "tool", "title": "Tool", "config": {}}],
"edges": [],
}
result = generate_mermaid(workflow_data)
assert "tool=unknown" in result
class TestNodeFormatting:
"""Tests for node label formatting."""
def test_quotes_in_title_are_escaped(self):
"""Test double quotes in title are replaced with single quotes."""
workflow_data = {
"nodes": [{"id": "llm", "type": "llm", "title": 'Say "Hello"'}],
"edges": [],
}
result = generate_mermaid(workflow_data)
# Double quotes should be replaced
assert "Say 'Hello'" in result
assert 'Say "Hello"' not in result
def test_node_without_id_is_skipped(self):
"""Test node without id is skipped."""
workflow_data = {
"nodes": [{"type": "llm", "title": "No ID"}],
"edges": [],
}
result = generate_mermaid(workflow_data)
# Should only have flowchart header
lines = [line for line in result.split("\n") if line.strip()]
assert len(lines) == 1
def test_node_default_values(self):
"""Test node with missing type/title uses defaults."""
workflow_data = {
"nodes": [{"id": "node1"}],
"edges": [],
}
result = generate_mermaid(workflow_data)
assert "type=unknown" in result
assert "title=Untitled" in result

View File

@@ -0,0 +1,81 @@
from core.workflow.generator.utils.node_repair import NodeRepair
class TestNodeRepair:
"""Tests for NodeRepair utility."""
def test_repair_if_else_valid_operators(self):
"""Test that valid operators remain unchanged."""
nodes = [
{
"id": "node1",
"type": "if-else",
"config": {
"cases": [
{
"conditions": [
{"comparison_operator": "", "value": "1"},
{"comparison_operator": "=", "value": "2"},
]
}
]
},
}
]
result = NodeRepair.repair(nodes)
assert result.was_repaired is False
assert result.nodes == nodes
def test_repair_if_else_invalid_operators(self):
"""Test that invalid operators are normalized."""
nodes = [
{
"id": "node1",
"type": "if-else",
"config": {
"cases": [
{
"conditions": [
{"comparison_operator": ">=", "value": "1"},
{"comparison_operator": "<=", "value": "2"},
{"comparison_operator": "!=", "value": "3"},
{"comparison_operator": "==", "value": "4"},
]
}
]
},
}
]
result = NodeRepair.repair(nodes)
assert result.was_repaired is True
assert len(result.repairs_made) == 4
conditions = result.nodes[0]["config"]["cases"][0]["conditions"]
assert conditions[0]["comparison_operator"] == ""
assert conditions[1]["comparison_operator"] == ""
assert conditions[2]["comparison_operator"] == ""
assert conditions[3]["comparison_operator"] == "="
def test_repair_ignores_other_nodes(self):
"""Test that other node types are ignored."""
nodes = [{"id": "node1", "type": "llm", "config": {"some_field": ">="}}]
result = NodeRepair.repair(nodes)
assert result.was_repaired is False
assert result.nodes[0]["config"]["some_field"] == ">="
def test_repair_handles_missing_config(self):
"""Test robustness against missing fields."""
nodes = [
{
"id": "node1",
"type": "if-else",
# Missing config
},
{
"id": "node2",
"type": "if-else",
"config": {}, # Missing cases
},
]
result = NodeRepair.repair(nodes)
assert result.was_repaired is False

View File

@@ -0,0 +1,99 @@
"""
Tests for node schemas validation.
Ensures that the node configuration stays in sync with registered node types.
"""
from core.workflow.generator.config.node_schemas import (
get_builtin_node_schemas,
validate_node_schemas,
)
class TestNodeSchemasValidation:
"""Tests for node schema validation utilities."""
def test_validate_node_schemas_returns_no_warnings(self):
"""Ensure all registered node types have corresponding schemas."""
warnings = validate_node_schemas()
# If this test fails, it means a new node type was added but
# no schema was defined for it in node_schemas.py
assert len(warnings) == 0, (
f"Missing schemas for node types: {warnings}. "
"Please add schemas for these node types in node_schemas.py "
"or add them to _INTERNAL_NODE_TYPES if they don't need schemas."
)
def test_builtin_node_schemas_not_empty(self):
"""Ensure BUILTIN_NODE_SCHEMAS contains expected node types."""
# get_builtin_node_schemas() includes dynamic schemas
all_schemas = get_builtin_node_schemas()
assert len(all_schemas) > 0
# Core node types should always be present
expected_types = ["llm", "code", "http-request", "if-else"]
for node_type in expected_types:
assert node_type in all_schemas, f"Missing schema for core node type: {node_type}"
def test_schema_structure(self):
"""Ensure each schema has required fields."""
all_schemas = get_builtin_node_schemas()
for node_type, schema in all_schemas.items():
assert "description" in schema, f"Missing 'description' in schema for {node_type}"
# 'parameters' is optional but if present should be a dict
if "parameters" in schema:
assert isinstance(schema["parameters"], dict), (
f"'parameters' in schema for {node_type} should be a dict"
)
class TestNodeSchemasMerged:
"""Tests to verify the merged configuration works correctly."""
def test_fallback_rules_available(self):
"""Ensure FALLBACK_RULES is available from node_schemas."""
from core.workflow.generator.config.node_schemas import FALLBACK_RULES
assert len(FALLBACK_RULES) > 0
assert "http-request" in FALLBACK_RULES
assert "code" in FALLBACK_RULES
assert "llm" in FALLBACK_RULES
def test_node_type_aliases_available(self):
"""Ensure NODE_TYPE_ALIASES is available from node_schemas."""
from core.workflow.generator.config.node_schemas import NODE_TYPE_ALIASES
assert len(NODE_TYPE_ALIASES) > 0
assert NODE_TYPE_ALIASES.get("gpt") == "llm"
assert NODE_TYPE_ALIASES.get("api") == "http-request"
def test_field_name_corrections_available(self):
"""Ensure FIELD_NAME_CORRECTIONS is available from node_schemas."""
from core.workflow.generator.config.node_schemas import (
FIELD_NAME_CORRECTIONS,
get_corrected_field_name,
)
assert len(FIELD_NAME_CORRECTIONS) > 0
# Test the helper function
assert get_corrected_field_name("http-request", "text") == "body"
assert get_corrected_field_name("llm", "response") == "text"
assert get_corrected_field_name("code", "unknown") == "unknown"
def test_config_init_exports(self):
"""Ensure config __init__.py exports all needed symbols."""
from core.workflow.generator.config import (
BUILTIN_NODE_SCHEMAS,
FALLBACK_RULES,
FIELD_NAME_CORRECTIONS,
NODE_TYPE_ALIASES,
get_corrected_field_name,
validate_node_schemas,
)
# Just verify imports work
assert BUILTIN_NODE_SCHEMAS is not None
assert FALLBACK_RULES is not None
assert FIELD_NAME_CORRECTIONS is not None
assert NODE_TYPE_ALIASES is not None
assert callable(get_corrected_field_name)
assert callable(validate_node_schemas)

View File

@@ -0,0 +1,172 @@
"""
Unit tests for the Planner Prompts.
Tests cover:
- Tool formatting for planner context
- Edge cases with missing fields
- Empty tool lists
"""
from core.workflow.generator.prompts.planner_prompts import format_tools_for_planner
class TestFormatToolsForPlanner:
"""Tests for format_tools_for_planner function."""
def test_empty_tools_returns_default_message(self):
"""Test empty tools list returns default message."""
result = format_tools_for_planner([])
assert result == "No external tools available."
def test_none_tools_returns_default_message(self):
"""Test None tools list returns default message."""
result = format_tools_for_planner(None)
assert result == "No external tools available."
def test_single_tool_formatting(self):
"""Test single tool is formatted correctly."""
tools = [
{
"provider_id": "google",
"tool_key": "search",
"tool_label": "Google Search",
"tool_description": "Search the web using Google",
}
]
result = format_tools_for_planner(tools)
assert "[google/search]" in result
assert "Google Search" in result
assert "Search the web using Google" in result
def test_multiple_tools_formatting(self):
"""Test multiple tools are formatted correctly."""
tools = [
{
"provider_id": "google",
"tool_key": "search",
"tool_label": "Search",
"tool_description": "Web search",
},
{
"provider_id": "slack",
"tool_key": "send_message",
"tool_label": "Send Message",
"tool_description": "Send a Slack message",
},
]
result = format_tools_for_planner(tools)
lines = result.strip().split("\n")
assert len(lines) == 2
assert "[google/search]" in result
assert "[slack/send_message]" in result
def test_tool_without_provider_uses_key_only(self):
"""Test tool without provider_id uses tool_key only."""
tools = [
{
"tool_key": "my_tool",
"tool_label": "My Tool",
"tool_description": "A custom tool",
}
]
result = format_tools_for_planner(tools)
# Should format as [my_tool] without provider prefix
assert "[my_tool]" in result
assert "My Tool" in result
def test_tool_with_tool_name_fallback(self):
"""Test tool uses tool_name when tool_key is missing."""
tools = [
{
"tool_name": "fallback_tool",
"description": "Fallback description",
}
]
result = format_tools_for_planner(tools)
assert "fallback_tool" in result
assert "Fallback description" in result
def test_tool_with_missing_description(self):
"""Test tool with missing description doesn't crash."""
tools = [
{
"provider_id": "test",
"tool_key": "tool1",
"tool_label": "Tool 1",
}
]
result = format_tools_for_planner(tools)
assert "[test/tool1]" in result
assert "Tool 1" in result
def test_tool_with_all_missing_fields(self):
"""Test tool with all fields missing uses defaults."""
tools = [{}]
result = format_tools_for_planner(tools)
# Should not crash, may produce minimal output
assert isinstance(result, str)
def test_tool_uses_provider_fallback(self):
"""Test tool uses 'provider' when 'provider_id' is missing."""
tools = [
{
"provider": "openai",
"tool_key": "dalle",
"tool_label": "DALL-E",
"tool_description": "Generate images",
}
]
result = format_tools_for_planner(tools)
assert "[openai/dalle]" in result
def test_tool_label_fallback_to_key(self):
"""Test tool_label falls back to tool_key when missing."""
tools = [
{
"provider_id": "test",
"tool_key": "my_key",
"tool_description": "Description here",
}
]
result = format_tools_for_planner(tools)
# Label should fallback to key
assert "my_key" in result
assert "Description here" in result
class TestPlannerPromptConstants:
"""Tests for planner prompt constant availability."""
def test_planner_system_prompt_exists(self):
"""Test PLANNER_SYSTEM_PROMPT is defined."""
from core.workflow.generator.prompts.planner_prompts import PLANNER_SYSTEM_PROMPT
assert PLANNER_SYSTEM_PROMPT is not None
assert len(PLANNER_SYSTEM_PROMPT) > 0
assert "{tools_summary}" in PLANNER_SYSTEM_PROMPT
def test_planner_user_prompt_exists(self):
"""Test PLANNER_USER_PROMPT is defined."""
from core.workflow.generator.prompts.planner_prompts import PLANNER_USER_PROMPT
assert PLANNER_USER_PROMPT is not None
assert "{instruction}" in PLANNER_USER_PROMPT
def test_planner_system_prompt_has_required_sections(self):
"""Test PLANNER_SYSTEM_PROMPT has required XML sections."""
from core.workflow.generator.prompts.planner_prompts import PLANNER_SYSTEM_PROMPT
assert "<role>" in PLANNER_SYSTEM_PROMPT
assert "<task>" in PLANNER_SYSTEM_PROMPT
assert "<available_tools>" in PLANNER_SYSTEM_PROMPT
assert "<response_format>" in PLANNER_SYSTEM_PROMPT

View File

@@ -0,0 +1,510 @@
"""
Unit tests for the Validation Rule Engine.
Tests cover:
- Structure rules (required fields, types, formats)
- Semantic rules (variable references, edge connections)
- Reference rules (model exists, tool configured, dataset valid)
- ValidationEngine integration
"""
from core.workflow.generator.validation import (
ValidationContext,
ValidationEngine,
)
from core.workflow.generator.validation.rules import (
extract_variable_refs,
is_placeholder,
)
class TestPlaceholderDetection:
"""Tests for placeholder detection utility."""
def test_detects_please_select(self):
assert is_placeholder("PLEASE_SELECT_YOUR_MODEL") is True
def test_detects_your_prefix(self):
assert is_placeholder("YOUR_API_KEY") is True
def test_detects_todo(self):
assert is_placeholder("TODO: fill this in") is True
def test_detects_placeholder(self):
assert is_placeholder("PLACEHOLDER_VALUE") is True
def test_detects_example_prefix(self):
assert is_placeholder("EXAMPLE_URL") is True
def test_detects_replace_prefix(self):
assert is_placeholder("REPLACE_WITH_ACTUAL") is True
def test_case_insensitive(self):
assert is_placeholder("please_select") is True
assert is_placeholder("Please_Select") is True
def test_valid_values_not_detected(self):
assert is_placeholder("https://api.example.com") is False
assert is_placeholder("gpt-4") is False
assert is_placeholder("my_variable") is False
def test_non_string_returns_false(self):
assert is_placeholder(123) is False
assert is_placeholder(None) is False
assert is_placeholder(["list"]) is False
class TestVariableRefExtraction:
"""Tests for variable reference extraction."""
def test_extracts_simple_ref(self):
refs = extract_variable_refs("Hello {{#start.query#}}")
assert refs == [("start", "query")]
def test_extracts_multiple_refs(self):
refs = extract_variable_refs("{{#node1.output#}} and {{#node2.text#}}")
assert refs == [("node1", "output"), ("node2", "text")]
def test_extracts_nested_field(self):
refs = extract_variable_refs("{{#http_request.body#}}")
assert refs == [("http_request", "body")]
def test_no_refs_returns_empty(self):
refs = extract_variable_refs("No references here")
assert refs == []
def test_handles_malformed_refs(self):
refs = extract_variable_refs("{{#invalid}} and {{incomplete#}}")
assert refs == []
class TestValidationContext:
"""Tests for ValidationContext."""
def test_node_map_lookup(self):
ctx = ValidationContext(
nodes=[
{"id": "start", "type": "start"},
{"id": "llm_1", "type": "llm"},
]
)
assert ctx.get_node("start") == {"id": "start", "type": "start"}
assert ctx.get_node("nonexistent") is None
def test_model_set(self):
ctx = ValidationContext(
available_models=[
{"provider": "openai", "model": "gpt-4"},
{"provider": "anthropic", "model": "claude-3"},
]
)
assert ctx.has_model("openai", "gpt-4") is True
assert ctx.has_model("anthropic", "claude-3") is True
assert ctx.has_model("openai", "gpt-3.5") is False
def test_tool_set(self):
ctx = ValidationContext(
available_tools=[
{"provider_id": "google", "tool_key": "search", "is_team_authorization": True},
{"provider_id": "slack", "tool_key": "send_message", "is_team_authorization": False},
]
)
assert ctx.has_tool("google/search") is True
assert ctx.has_tool("search") is True
assert ctx.is_tool_configured("google/search") is True
assert ctx.is_tool_configured("slack/send_message") is False
def test_upstream_downstream_nodes(self):
ctx = ValidationContext(
nodes=[
{"id": "start", "type": "start"},
{"id": "llm", "type": "llm"},
{"id": "end", "type": "end"},
],
edges=[
{"source": "start", "target": "llm"},
{"source": "llm", "target": "end"},
],
)
assert ctx.get_upstream_nodes("llm") == ["start"]
assert ctx.get_downstream_nodes("llm") == ["end"]
class TestStructureRules:
"""Tests for structure validation rules."""
def test_llm_missing_prompt_template(self):
ctx = ValidationContext(nodes=[{"id": "llm_1", "type": "llm", "config": {}}])
engine = ValidationEngine()
result = engine.validate(ctx)
assert result.has_errors
errors = [e for e in result.all_errors if e.rule_id == "llm.prompt_template.required"]
assert len(errors) == 1
assert errors[0].is_fixable is True
def test_llm_with_prompt_template_passes(self):
ctx = ValidationContext(
nodes=[
{
"id": "llm_1",
"type": "llm",
"config": {
"prompt_template": [
{"role": "system", "text": "You are helpful"},
{"role": "user", "text": "Hello"},
]
},
}
]
)
engine = ValidationEngine()
result = engine.validate(ctx)
# No prompt_template errors
errors = [e for e in result.all_errors if "prompt_template" in e.rule_id]
assert len(errors) == 0
def test_http_request_missing_url(self):
ctx = ValidationContext(nodes=[{"id": "http_1", "type": "http-request", "config": {}}])
engine = ValidationEngine()
result = engine.validate(ctx)
errors = [e for e in result.all_errors if "http.url" in e.rule_id]
assert len(errors) == 1
assert errors[0].is_fixable is True
def test_http_request_placeholder_url(self):
ctx = ValidationContext(
nodes=[
{
"id": "http_1",
"type": "http-request",
"config": {"url": "PLEASE_SELECT_YOUR_URL", "method": "GET"},
}
]
)
engine = ValidationEngine()
result = engine.validate(ctx)
errors = [e for e in result.all_errors if "placeholder" in e.rule_id]
assert len(errors) == 1
def test_code_node_missing_fields(self):
ctx = ValidationContext(nodes=[{"id": "code_1", "type": "code", "config": {}}])
engine = ValidationEngine()
result = engine.validate(ctx)
error_rules = {e.rule_id for e in result.all_errors}
assert "code.code.required" in error_rules
assert "code.language.required" in error_rules
def test_knowledge_retrieval_missing_dataset(self):
ctx = ValidationContext(nodes=[{"id": "kb_1", "type": "knowledge-retrieval", "config": {}}])
engine = ValidationEngine()
result = engine.validate(ctx)
errors = [e for e in result.all_errors if "knowledge.dataset" in e.rule_id]
assert len(errors) == 1
assert errors[0].is_fixable is False # User must configure
class TestSemanticRules:
"""Tests for semantic validation rules."""
def test_valid_variable_reference(self):
ctx = ValidationContext(
nodes=[
{"id": "start", "type": "start", "config": {}},
{
"id": "llm_1",
"type": "llm",
"config": {"prompt_template": [{"role": "user", "text": "Process: {{#start.query#}}"}]},
},
]
)
engine = ValidationEngine()
result = engine.validate(ctx)
# No variable reference errors
errors = [e for e in result.all_errors if "variable.ref" in e.rule_id]
assert len(errors) == 0
def test_invalid_variable_reference(self):
ctx = ValidationContext(
nodes=[
{"id": "start", "type": "start", "config": {}},
{
"id": "llm_1",
"type": "llm",
"config": {"prompt_template": [{"role": "user", "text": "Process: {{#nonexistent.field#}}"}]},
},
]
)
engine = ValidationEngine()
result = engine.validate(ctx)
errors = [e for e in result.all_errors if "variable.ref" in e.rule_id]
assert len(errors) == 1
assert "nonexistent" in errors[0].message
def test_edge_validation(self):
ctx = ValidationContext(
nodes=[
{"id": "start", "type": "start", "config": {}},
{"id": "end", "type": "end", "config": {}},
],
edges=[
{"source": "start", "target": "end"},
{"source": "nonexistent", "target": "end"},
],
)
engine = ValidationEngine()
result = engine.validate(ctx)
errors = [e for e in result.all_errors if "edge" in e.rule_id]
assert len(errors) == 1
assert "nonexistent" in errors[0].message
class TestReferenceRules:
"""Tests for reference validation rules (models, tools)."""
def test_llm_missing_model_with_available(self):
ctx = ValidationContext(
nodes=[
{
"id": "llm_1",
"type": "llm",
"config": {"prompt_template": [{"role": "user", "text": "Hi"}]},
}
],
available_models=[{"provider": "openai", "model": "gpt-4"}],
)
engine = ValidationEngine()
result = engine.validate(ctx)
errors = [e for e in result.all_errors if e.rule_id == "model.required"]
assert len(errors) == 1
assert errors[0].is_fixable is True
def test_llm_missing_model_no_available(self):
ctx = ValidationContext(
nodes=[
{
"id": "llm_1",
"type": "llm",
"config": {"prompt_template": [{"role": "user", "text": "Hi"}]},
}
],
available_models=[], # No models available
)
engine = ValidationEngine()
result = engine.validate(ctx)
errors = [e for e in result.all_errors if e.rule_id == "model.no_available"]
assert len(errors) == 1
assert errors[0].is_fixable is False
def test_llm_with_valid_model(self):
ctx = ValidationContext(
nodes=[
{
"id": "llm_1",
"type": "llm",
"config": {
"prompt_template": [{"role": "user", "text": "Hi"}],
"model": {"provider": "openai", "name": "gpt-4"},
},
}
],
available_models=[{"provider": "openai", "model": "gpt-4"}],
)
engine = ValidationEngine()
result = engine.validate(ctx)
errors = [e for e in result.all_errors if "model" in e.rule_id]
assert len(errors) == 0
def test_llm_with_invalid_model(self):
ctx = ValidationContext(
nodes=[
{
"id": "llm_1",
"type": "llm",
"config": {
"prompt_template": [{"role": "user", "text": "Hi"}],
"model": {"provider": "openai", "name": "gpt-99"},
},
}
],
available_models=[{"provider": "openai", "model": "gpt-4"}],
)
engine = ValidationEngine()
result = engine.validate(ctx)
errors = [e for e in result.all_errors if e.rule_id == "model.not_found"]
assert len(errors) == 1
assert errors[0].is_fixable is True
def test_tool_node_not_found(self):
ctx = ValidationContext(
nodes=[
{
"id": "tool_1",
"type": "tool",
"config": {"tool_key": "nonexistent/tool"},
}
],
available_tools=[],
)
engine = ValidationEngine()
result = engine.validate(ctx)
errors = [e for e in result.all_errors if e.rule_id == "tool.not_found"]
assert len(errors) == 1
def test_tool_node_not_configured(self):
ctx = ValidationContext(
nodes=[
{
"id": "tool_1",
"type": "tool",
"config": {"tool_key": "google/search"},
}
],
available_tools=[{"provider_id": "google", "tool_key": "search", "is_team_authorization": False}],
)
engine = ValidationEngine()
result = engine.validate(ctx)
errors = [e for e in result.all_errors if e.rule_id == "tool.not_configured"]
assert len(errors) == 1
assert errors[0].is_fixable is False
class TestValidationResult:
"""Tests for ValidationResult classification."""
def test_has_errors(self):
ctx = ValidationContext(nodes=[{"id": "llm_1", "type": "llm", "config": {}}])
engine = ValidationEngine()
result = engine.validate(ctx)
assert result.has_errors is True
assert result.is_valid is False
def test_has_fixable_errors(self):
ctx = ValidationContext(
nodes=[
{
"id": "llm_1",
"type": "llm",
"config": {"prompt_template": [{"role": "user", "text": "Hi"}]},
}
],
available_models=[{"provider": "openai", "model": "gpt-4"}],
)
engine = ValidationEngine()
result = engine.validate(ctx)
assert result.has_fixable_errors is True
assert len(result.fixable_errors) > 0
def test_get_fixable_by_node(self):
ctx = ValidationContext(
nodes=[
{"id": "llm_1", "type": "llm", "config": {}},
{"id": "http_1", "type": "http-request", "config": {}},
]
)
engine = ValidationEngine()
result = engine.validate(ctx)
by_node = result.get_fixable_by_node()
assert "llm_1" in by_node
assert "http_1" in by_node
def test_to_dict(self):
ctx = ValidationContext(nodes=[{"id": "llm_1", "type": "llm", "config": {}}])
engine = ValidationEngine()
result = engine.validate(ctx)
d = result.to_dict()
assert "fixable" in d
assert "user_required" in d
assert "warnings" in d
assert "all_warnings" in d
assert "stats" in d
class TestIntegration:
"""Integration tests for the full validation pipeline."""
def test_complete_workflow_validation(self):
"""Test validation of a complete workflow."""
ctx = ValidationContext(
nodes=[
{
"id": "start",
"type": "start",
"config": {"variables": [{"variable": "query", "type": "text-input"}]},
},
{
"id": "llm_1",
"type": "llm",
"config": {
"model": {"provider": "openai", "name": "gpt-4"},
"prompt_template": [{"role": "user", "text": "{{#start.query#}}"}],
},
},
{
"id": "end",
"type": "end",
"config": {"outputs": [{"variable": "result", "value_selector": ["llm_1", "text"]}]},
},
],
edges=[
{"source": "start", "target": "llm_1"},
{"source": "llm_1", "target": "end"},
],
available_models=[{"provider": "openai", "model": "gpt-4"}],
)
engine = ValidationEngine()
result = engine.validate(ctx)
# Should have no errors
assert result.is_valid is True
assert len(result.fixable_errors) == 0
assert len(result.user_required_errors) == 0
def test_workflow_with_multiple_errors(self):
"""Test workflow with multiple types of errors."""
ctx = ValidationContext(
nodes=[
{"id": "start", "type": "start", "config": {}},
{
"id": "llm_1",
"type": "llm",
"config": {}, # Missing prompt_template and model
},
{
"id": "kb_1",
"type": "knowledge-retrieval",
"config": {"dataset_ids": ["PLEASE_SELECT_YOUR_DATASET"]},
},
{"id": "end", "type": "end", "config": {}},
],
available_models=[{"provider": "openai", "model": "gpt-4"}],
)
engine = ValidationEngine()
result = engine.validate(ctx)
# Should have multiple errors
assert result.has_errors is True
assert len(result.fixable_errors) >= 2 # model, prompt_template
assert len(result.user_required_errors) >= 1 # dataset placeholder
# Check stats
assert result.stats["total_nodes"] == 4
assert result.stats["total_errors"] >= 3

View File

@@ -0,0 +1,434 @@
"""
Unit tests for the Vibe Workflow Validator.
Tests cover:
- Basic validation function
- User-friendly validation hints
- Edge cases and error handling
"""
from core.workflow.generator.utils.workflow_validator import ValidationHint, WorkflowValidator
class TestValidationHint:
"""Tests for ValidationHint dataclass."""
def test_hint_creation(self):
"""Test creating a validation hint."""
hint = ValidationHint(
node_id="llm_1",
field="model",
message="Model is not configured",
severity="error",
)
assert hint.node_id == "llm_1"
assert hint.field == "model"
assert hint.message == "Model is not configured"
assert hint.severity == "error"
def test_hint_with_suggestion(self):
"""Test hint with suggestion."""
hint = ValidationHint(
node_id="http_1",
field="url",
message="URL is required",
severity="error",
suggestion="Add a valid URL like https://api.example.com",
)
assert hint.suggestion is not None
class TestWorkflowValidatorBasic:
"""Tests for basic validation scenarios."""
def test_empty_workflow_is_valid(self):
"""Test empty workflow passes validation."""
workflow_data = {"nodes": [], "edges": []}
is_valid, hints = WorkflowValidator.validate(workflow_data, [])
# Empty but valid structure
assert is_valid is True
assert len(hints) == 0
def test_minimal_valid_workflow(self):
"""Test minimal Start → End workflow."""
workflow_data = {
"nodes": [
{"id": "start", "type": "start", "config": {}},
{"id": "end", "type": "end", "config": {}},
],
"edges": [{"source": "start", "target": "end"}],
}
is_valid, hints = WorkflowValidator.validate(workflow_data, [])
assert is_valid is True
def test_complete_workflow_with_llm(self):
"""Test complete workflow with LLM node."""
workflow_data = {
"nodes": [
{"id": "start", "type": "start", "config": {"variables": []}},
{
"id": "llm",
"type": "llm",
"config": {
"model": {"provider": "openai", "name": "gpt-4"},
"prompt_template": [{"role": "user", "text": "Hello"}],
},
},
{"id": "end", "type": "end", "config": {"outputs": []}},
],
"edges": [
{"source": "start", "target": "llm"},
{"source": "llm", "target": "end"},
],
}
is_valid, hints = WorkflowValidator.validate(workflow_data, [])
# Should pass with no critical errors
errors = [h for h in hints if h.severity == "error"]
assert len(errors) == 0
class TestVariableReferenceValidation:
"""Tests for variable reference validation."""
def test_valid_variable_reference(self):
"""Test valid variable reference passes."""
workflow_data = {
"nodes": [
{"id": "start", "type": "start", "config": {}},
{
"id": "llm",
"type": "llm",
"config": {"prompt_template": [{"role": "user", "text": "Query: {{#start.query#}}"}]},
},
],
"edges": [{"source": "start", "target": "llm"}],
}
is_valid, hints = WorkflowValidator.validate(workflow_data, [])
ref_errors = [h for h in hints if "reference" in h.message.lower()]
assert len(ref_errors) == 0
def test_invalid_variable_reference(self):
"""Test invalid variable reference generates hint."""
workflow_data = {
"nodes": [
{"id": "start", "type": "start", "config": {}},
{
"id": "llm",
"type": "llm",
"config": {"prompt_template": [{"role": "user", "text": "{{#nonexistent.field#}}"}]},
},
],
"edges": [{"source": "start", "target": "llm"}],
}
is_valid, hints = WorkflowValidator.validate(workflow_data, [])
# Should have a hint about invalid reference
ref_hints = [h for h in hints if "nonexistent" in h.message or "reference" in h.message.lower()]
assert len(ref_hints) >= 1
class TestEdgeValidation:
"""Tests for edge validation."""
def test_edge_with_invalid_source(self):
"""Test edge with non-existent source generates hint."""
workflow_data = {
"nodes": [{"id": "end", "type": "end", "config": {}}],
"edges": [{"source": "nonexistent", "target": "end"}],
}
is_valid, hints = WorkflowValidator.validate(workflow_data, [])
# Should have hint about invalid edge
edge_hints = [h for h in hints if "edge" in h.message.lower() or "source" in h.message.lower()]
assert len(edge_hints) >= 1
def test_edge_with_invalid_target(self):
"""Test edge with non-existent target generates hint."""
workflow_data = {
"nodes": [{"id": "start", "type": "start", "config": {}}],
"edges": [{"source": "start", "target": "nonexistent"}],
}
is_valid, hints = WorkflowValidator.validate(workflow_data, [])
edge_hints = [h for h in hints if "edge" in h.message.lower() or "target" in h.message.lower()]
assert len(edge_hints) >= 1
class TestToolValidation:
"""Tests for tool node validation."""
def test_tool_node_found_in_available(self):
"""Test tool node that exists in available tools."""
workflow_data = {
"nodes": [
{"id": "start", "type": "start", "config": {}},
{
"id": "tool1",
"type": "tool",
"config": {"tool_key": "google/search"},
},
{"id": "end", "type": "end", "config": {}},
],
"edges": [{"source": "start", "target": "tool1"}, {"source": "tool1", "target": "end"}],
}
available_tools = [{"provider_id": "google", "tool_key": "search", "is_team_authorization": True}]
is_valid, hints = WorkflowValidator.validate(workflow_data, available_tools)
tool_errors = [h for h in hints if h.severity == "error" and "tool" in h.message.lower()]
assert len(tool_errors) == 0
def test_tool_node_not_found(self):
"""Test tool node not in available tools generates hint."""
workflow_data = {
"nodes": [
{
"id": "tool1",
"type": "tool",
"config": {"tool_key": "unknown/tool"},
}
],
"edges": [],
}
available_tools = []
is_valid, hints = WorkflowValidator.validate(workflow_data, available_tools)
tool_hints = [h for h in hints if "tool" in h.message.lower()]
assert len(tool_hints) >= 1
class TestQuestionClassifierValidation:
"""Tests for question-classifier node validation."""
def test_question_classifier_with_classes(self):
"""Test question-classifier with valid classes."""
workflow_data = {
"nodes": [
{"id": "start", "type": "start", "config": {}},
{
"id": "classifier",
"type": "question-classifier",
"config": {
"classes": [
{"id": "class1", "name": "Class 1"},
{"id": "class2", "name": "Class 2"},
],
"model": {"provider": "openai", "name": "gpt-4", "mode": "chat"},
},
},
{"id": "h1", "type": "llm", "config": {}},
{"id": "h2", "type": "llm", "config": {}},
{"id": "end", "type": "end", "config": {}},
],
"edges": [
{"source": "start", "target": "classifier"},
{"source": "classifier", "sourceHandle": "class1", "target": "h1"},
{"source": "classifier", "sourceHandle": "class2", "target": "h2"},
{"source": "h1", "target": "end"},
{"source": "h2", "target": "end"},
],
}
available_models = [{"provider": "openai", "model": "gpt-4", "mode": "chat"}]
is_valid, hints = WorkflowValidator.validate(workflow_data, [], available_models=available_models)
class_errors = [h for h in hints if "class" in h.message.lower() and h.severity == "error"]
assert len(class_errors) == 0
def test_question_classifier_missing_classes(self):
"""Test question-classifier without classes generates hint."""
workflow_data = {
"nodes": [
{
"id": "classifier",
"type": "question-classifier",
"config": {"model": {"provider": "openai", "name": "gpt-4", "mode": "chat"}},
}
],
"edges": [],
}
available_models = [{"provider": "openai", "model": "gpt-4", "mode": "chat"}]
is_valid, hints = WorkflowValidator.validate(workflow_data, [], available_models=available_models)
# Should have hint about missing classes
class_hints = [h for h in hints if "class" in h.message.lower()]
assert len(class_hints) >= 1
class TestHttpRequestValidation:
"""Tests for HTTP request node validation."""
def test_http_request_with_url(self):
"""Test HTTP request with valid URL."""
workflow_data = {
"nodes": [
{"id": "start", "type": "start", "config": {}},
{
"id": "http",
"type": "http-request",
"config": {"url": "https://api.example.com", "method": "GET"},
},
{"id": "end", "type": "end", "config": {}},
],
"edges": [{"source": "start", "target": "http"}, {"source": "http", "target": "end"}],
}
is_valid, hints = WorkflowValidator.validate(workflow_data, [])
url_errors = [h for h in hints if "url" in h.message.lower() and h.severity == "error"]
assert len(url_errors) == 0
def test_http_request_missing_url(self):
"""Test HTTP request without URL generates hint."""
workflow_data = {
"nodes": [
{
"id": "http",
"type": "http-request",
"config": {"method": "GET"},
}
],
"edges": [],
}
is_valid, hints = WorkflowValidator.validate(workflow_data, [])
url_hints = [h for h in hints if "url" in h.message.lower()]
assert len(url_hints) >= 1
class TestParameterExtractorValidation:
"""Tests for parameter-extractor node validation."""
def test_parameter_extractor_valid_params(self):
"""Test parameter-extractor with valid parameters."""
workflow_data = {
"nodes": [
{"id": "start", "type": "start", "config": {}},
{
"id": "extractor",
"type": "parameter-extractor",
"config": {
"instruction": "Extract info",
"parameters": [
{
"name": "name",
"type": "string",
"description": "Name",
"required": True,
}
],
"model": {"provider": "openai", "name": "gpt-4", "mode": "chat"},
},
},
{"id": "end", "type": "end", "config": {}},
],
"edges": [{"source": "start", "target": "extractor"}, {"source": "extractor", "target": "end"}],
}
available_models = [{"provider": "openai", "model": "gpt-4", "mode": "chat"}]
is_valid, hints = WorkflowValidator.validate(workflow_data, [], available_models=available_models)
errors = [h for h in hints if h.severity == "error"]
assert len(errors) == 0
def test_parameter_extractor_missing_required_field(self):
"""Test parameter-extractor missing 'required' field in parameter item."""
workflow_data = {
"nodes": [
{
"id": "extractor",
"type": "parameter-extractor",
"config": {
"instruction": "Extract info",
"parameters": [
{
"name": "name",
"type": "string",
"description": "Name",
# Missing 'required'
}
],
"model": {"provider": "openai", "name": "gpt-4", "mode": "chat"},
},
}
],
"edges": [],
}
available_models = [{"provider": "openai", "model": "gpt-4", "mode": "chat"}]
is_valid, hints = WorkflowValidator.validate(workflow_data, [], available_models=available_models)
errors = [h for h in hints if "required" in h.message and h.severity == "error"]
assert len(errors) >= 1
assert "parameter-extractor" in errors[0].node_type
class TestIfElseValidation:
"""Tests for if-else node validation."""
def test_if_else_valid_operators(self):
"""Test if-else with valid operators."""
workflow_data = {
"nodes": [
{"id": "start", "type": "start", "config": {}},
{
"id": "ifelse",
"type": "if-else",
"config": {
"cases": [{"case_id": "c1", "conditions": [{"comparison_operator": "", "value": "1"}]}]
},
},
{"id": "t", "type": "llm", "config": {}},
{"id": "f", "type": "llm", "config": {}},
{"id": "end", "type": "end", "config": {}},
],
"edges": [
{"source": "start", "target": "ifelse"},
{"source": "ifelse", "sourceHandle": "true", "target": "t"},
{"source": "ifelse", "sourceHandle": "false", "target": "f"},
{"source": "t", "target": "end"},
{"source": "f", "target": "end"},
],
}
is_valid, hints = WorkflowValidator.validate(workflow_data, [])
errors = [h for h in hints if h.severity == "error"]
# Filter out LLM model errors if any (available tools/models check might trigger)
# (actually available_models empty list might trigger model error?
# No, model config validation skips if model field not present? No, LLM has model config.
# But logic skips check if key missing? Let's check logic.
# _check_model_config checks if provider/name match available. If available is empty, it fails.
# But wait, validate default available_models is None?
# I should provide mock available_models or ignore model errors.
# Actually LLM node "config": {} implies missing model config. Rules check if config structure is valid?
# Let's filter specifically for operator errors.
operator_errors = [h for h in errors if "operator" in h.message]
assert len(operator_errors) == 0
def test_if_else_invalid_operators(self):
"""Test if-else with invalid operators."""
workflow_data = {
"nodes": [
{"id": "start", "type": "start", "config": {}},
{
"id": "ifelse",
"type": "if-else",
"config": {
"cases": [{"case_id": "c1", "conditions": [{"comparison_operator": ">=", "value": "1"}]}]
},
},
{"id": "t", "type": "llm", "config": {}},
{"id": "f", "type": "llm", "config": {}},
{"id": "end", "type": "end", "config": {}},
],
"edges": [
{"source": "start", "target": "ifelse"},
{"source": "ifelse", "sourceHandle": "true", "target": "t"},
{"source": "ifelse", "sourceHandle": "false", "target": "f"},
{"source": "t", "target": "end"},
{"source": "f", "target": "end"},
],
}
is_valid, hints = WorkflowValidator.validate(workflow_data, [])
operator_errors = [h for h in hints if "operator" in h.message and h.severity == "error"]
assert len(operator_errors) > 0
assert "" in operator_errors[0].suggestion

View File

@@ -0,0 +1,197 @@
from unittest.mock import MagicMock, patch
import pytest
from core.tools.entities.tool_entities import ToolProviderType
from core.workflow.nodes.agent.agent_node import AgentNode
class TestInferToolProviderType:
"""Test cases for AgentNode._infer_tool_provider_type method."""
def test_infer_type_from_config_workflow(self):
"""Test inferring workflow provider type from config."""
tool_config = {
"type": "workflow",
"provider_name": "workflow-provider-id",
}
tenant_id = "test-tenant"
result = AgentNode._infer_tool_provider_type(tool_config, tenant_id)
assert result == ToolProviderType.WORKFLOW
def test_infer_type_from_config_builtin(self):
"""Test inferring builtin provider type from config."""
tool_config = {
"type": "builtin",
"provider_name": "builtin-provider-id",
}
tenant_id = "test-tenant"
result = AgentNode._infer_tool_provider_type(tool_config, tenant_id)
assert result == ToolProviderType.BUILT_IN
def test_infer_type_from_config_api(self):
"""Test inferring API provider type from config."""
tool_config = {
"type": "api",
"provider_name": "api-provider-id",
}
tenant_id = "test-tenant"
result = AgentNode._infer_tool_provider_type(tool_config, tenant_id)
assert result == ToolProviderType.API
def test_infer_type_from_config_mcp(self):
"""Test inferring MCP provider type from config."""
tool_config = {
"type": "mcp",
"provider_name": "mcp-provider-id",
}
tenant_id = "test-tenant"
result = AgentNode._infer_tool_provider_type(tool_config, tenant_id)
assert result == ToolProviderType.MCP
def test_infer_type_invalid_config_value_raises_error(self):
"""Test that invalid type value in config raises ValueError."""
tool_config = {
"type": "invalid-type",
"provider_name": "workflow-provider-id",
}
tenant_id = "test-tenant"
with pytest.raises(ValueError):
AgentNode._infer_tool_provider_type(tool_config, tenant_id)
def test_infer_workflow_type_from_database(self):
"""Test inferring workflow provider type from database."""
tool_config = {
"provider_name": "workflow-provider-id",
}
tenant_id = "test-tenant"
with patch("core.db.session_factory.session_factory.create_session") as mock_create_session:
mock_session = MagicMock()
mock_create_session.return_value.__enter__.return_value = mock_session
# First query (WorkflowToolProvider) returns a result
mock_session.scalar.return_value = True
result = AgentNode._infer_tool_provider_type(tool_config, tenant_id)
assert result == ToolProviderType.WORKFLOW
# Should only query once (after finding WorkflowToolProvider)
assert mock_session.scalar.call_count == 1
def test_infer_mcp_type_from_database(self):
"""Test inferring MCP provider type from database."""
tool_config = {
"provider_name": "mcp-provider-id",
}
tenant_id = "test-tenant"
with patch("core.db.session_factory.session_factory.create_session") as mock_create_session:
mock_session = MagicMock()
mock_create_session.return_value.__enter__.return_value = mock_session
# First query (WorkflowToolProvider) returns None
# Second query (MCPToolProvider) returns a result
mock_session.scalar.side_effect = [None, True]
result = AgentNode._infer_tool_provider_type(tool_config, tenant_id)
assert result == ToolProviderType.MCP
assert mock_session.scalar.call_count == 2
def test_infer_api_type_from_database(self):
"""Test inferring API provider type from database."""
tool_config = {
"provider_name": "api-provider-id",
}
tenant_id = "test-tenant"
with patch("core.db.session_factory.session_factory.create_session") as mock_create_session:
mock_session = MagicMock()
mock_create_session.return_value.__enter__.return_value = mock_session
# First query (WorkflowToolProvider) returns None
# Second query (MCPToolProvider) returns None
# Third query (ApiToolProvider) returns a result
mock_session.scalar.side_effect = [None, None, True]
result = AgentNode._infer_tool_provider_type(tool_config, tenant_id)
assert result == ToolProviderType.API
assert mock_session.scalar.call_count == 3
def test_infer_builtin_type_from_database(self):
"""Test inferring builtin provider type from database."""
tool_config = {
"provider_name": "builtin-provider-id",
}
tenant_id = "test-tenant"
with patch("core.db.session_factory.session_factory.create_session") as mock_create_session:
mock_session = MagicMock()
mock_create_session.return_value.__enter__.return_value = mock_session
# First three queries return None
# Fourth query (BuiltinToolProvider) returns a result
mock_session.scalar.side_effect = [None, None, None, True]
result = AgentNode._infer_tool_provider_type(tool_config, tenant_id)
assert result == ToolProviderType.BUILT_IN
assert mock_session.scalar.call_count == 4
def test_infer_type_default_when_not_found(self):
"""Test raising AgentNodeError when provider is not found in database."""
tool_config = {
"provider_name": "unknown-provider-id",
}
tenant_id = "test-tenant"
with patch("core.db.session_factory.session_factory.create_session") as mock_create_session:
mock_session = MagicMock()
mock_create_session.return_value.__enter__.return_value = mock_session
# All queries return None
mock_session.scalar.return_value = None
# Current implementation raises AgentNodeError when provider not found
from core.workflow.nodes.agent.exc import AgentNodeError
with pytest.raises(AgentNodeError, match="Tool provider with ID 'unknown-provider-id' not found"):
AgentNode._infer_tool_provider_type(tool_config, tenant_id)
def test_infer_type_default_when_no_provider_name(self):
"""Test defaulting to BUILT_IN when provider_name is missing."""
tool_config = {}
tenant_id = "test-tenant"
result = AgentNode._infer_tool_provider_type(tool_config, tenant_id)
assert result == ToolProviderType.BUILT_IN
def test_infer_type_database_exception_propagates(self):
"""Test that database exception propagates (current implementation doesn't catch it)."""
tool_config = {
"provider_name": "provider-id",
}
tenant_id = "test-tenant"
with patch("core.db.session_factory.session_factory.create_session") as mock_create_session:
mock_session = MagicMock()
mock_create_session.return_value.__enter__.return_value = mock_session
# Database query raises exception
mock_session.scalar.side_effect = Exception("Database error")
# Current implementation doesn't catch exceptions, so it propagates
with pytest.raises(Exception, match="Database error"):
AgentNode._infer_tool_provider_type(tool_config, tenant_id)

View File

@@ -217,7 +217,6 @@ class TestTemplateTransformNode:
@patch(
"core.workflow.nodes.template_transform.template_transform_node.CodeExecutorJinja2TemplateRenderer.render_template"
)
@patch("core.workflow.nodes.template_transform.template_transform_node.MAX_TEMPLATE_TRANSFORM_OUTPUT_LENGTH", 10)
def test_run_output_length_exceeds_limit(
self, mock_execute, basic_node_data, mock_graph, mock_graph_runtime_state, graph_init_params
):
@@ -231,6 +230,7 @@ class TestTemplateTransformNode:
graph_init_params=graph_init_params,
graph=mock_graph,
graph_runtime_state=mock_graph_runtime_state,
max_output_length=10,
)
result = node._run()

View File

@@ -0,0 +1,276 @@
"""Unit tests for account deletion synchronization.
This test module verifies the enterprise account deletion sync functionality,
including Redis queuing, error handling, and community vs enterprise behavior.
"""
from unittest.mock import MagicMock, patch
import pytest
from redis import RedisError
from services.enterprise.account_deletion_sync import (
_queue_task,
sync_account_deletion,
sync_workspace_member_removal,
)
class TestQueueTask:
"""Unit tests for the _queue_task helper function."""
@pytest.fixture
def mock_redis_client(self):
"""Mock redis_client for testing."""
with patch("services.enterprise.account_deletion_sync.redis_client") as mock_redis:
yield mock_redis
@pytest.fixture
def mock_uuid(self):
"""Mock UUID generation for predictable task IDs."""
with patch("services.enterprise.account_deletion_sync.uuid.uuid4") as mock_uuid_gen:
mock_uuid_gen.return_value = MagicMock(hex="test-task-id-1234")
yield mock_uuid_gen
def test_queue_task_success(self, mock_redis_client, mock_uuid):
"""Test successful task queueing to Redis."""
# Arrange
workspace_id = "ws-123"
member_id = "member-456"
source = "test_source"
# Act
result = _queue_task(workspace_id=workspace_id, member_id=member_id, source=source)
# Assert
assert result is True
mock_redis_client.lpush.assert_called_once()
# Verify the task payload structure
call_args = mock_redis_client.lpush.call_args[0]
assert call_args[0] == "enterprise:member:sync:queue"
import json
task_data = json.loads(call_args[1])
assert task_data["workspace_id"] == workspace_id
assert task_data["member_id"] == member_id
assert task_data["source"] == source
assert task_data["type"] == "sync_member_deletion_from_workspace"
assert task_data["retry_count"] == 0
assert "task_id" in task_data
assert "created_at" in task_data
def test_queue_task_redis_error(self, mock_redis_client, caplog):
"""Test handling of Redis connection errors."""
# Arrange
mock_redis_client.lpush.side_effect = RedisError("Connection failed")
# Act
result = _queue_task(workspace_id="ws-123", member_id="member-456", source="test_source")
# Assert
assert result is False
assert "Failed to queue account deletion sync" in caplog.text
def test_queue_task_type_error(self, mock_redis_client, caplog):
"""Test handling of JSON serialization errors."""
# Arrange
mock_redis_client.lpush.side_effect = TypeError("Cannot serialize")
# Act
result = _queue_task(workspace_id="ws-123", member_id="member-456", source="test_source")
# Assert
assert result is False
assert "Failed to queue account deletion sync" in caplog.text
class TestSyncWorkspaceMemberRemoval:
"""Unit tests for sync_workspace_member_removal function."""
@pytest.fixture
def mock_queue_task(self):
"""Mock _queue_task for testing."""
with patch("services.enterprise.account_deletion_sync._queue_task") as mock_queue:
mock_queue.return_value = True
yield mock_queue
def test_sync_workspace_member_removal_enterprise_enabled(self, mock_queue_task):
"""Test sync when ENTERPRISE_ENABLED is True."""
# Arrange
workspace_id = "ws-123"
member_id = "member-456"
source = "workspace_member_removed"
with patch("services.enterprise.account_deletion_sync.dify_config") as mock_config:
mock_config.ENTERPRISE_ENABLED = True
# Act
result = sync_workspace_member_removal(workspace_id=workspace_id, member_id=member_id, source=source)
# Assert
assert result is True
mock_queue_task.assert_called_once_with(workspace_id=workspace_id, member_id=member_id, source=source)
def test_sync_workspace_member_removal_enterprise_disabled(self, mock_queue_task):
"""Test sync when ENTERPRISE_ENABLED is False (community edition)."""
# Arrange
with patch("services.enterprise.account_deletion_sync.dify_config") as mock_config:
mock_config.ENTERPRISE_ENABLED = False
# Act
result = sync_workspace_member_removal(workspace_id="ws-123", member_id="member-456", source="test_source")
# Assert
assert result is True
mock_queue_task.assert_not_called()
def test_sync_workspace_member_removal_queue_failure(self, mock_queue_task):
"""Test handling of queue task failures."""
# Arrange
mock_queue_task.return_value = False
with patch("services.enterprise.account_deletion_sync.dify_config") as mock_config:
mock_config.ENTERPRISE_ENABLED = True
# Act
result = sync_workspace_member_removal(workspace_id="ws-123", member_id="member-456", source="test_source")
# Assert
assert result is False
class TestSyncAccountDeletion:
"""Unit tests for sync_account_deletion function."""
@pytest.fixture
def mock_db_session(self):
"""Mock database session for testing."""
with patch("services.enterprise.account_deletion_sync.db.session") as mock_session:
yield mock_session
@pytest.fixture
def mock_queue_task(self):
"""Mock _queue_task for testing."""
with patch("services.enterprise.account_deletion_sync._queue_task") as mock_queue:
mock_queue.return_value = True
yield mock_queue
def test_sync_account_deletion_enterprise_disabled(self, mock_db_session, mock_queue_task):
"""Test sync when ENTERPRISE_ENABLED is False (community edition)."""
# Arrange
with patch("services.enterprise.account_deletion_sync.dify_config") as mock_config:
mock_config.ENTERPRISE_ENABLED = False
# Act
result = sync_account_deletion(account_id="acc-123", source="account_deleted")
# Assert
assert result is True
mock_db_session.query.assert_not_called()
mock_queue_task.assert_not_called()
def test_sync_account_deletion_multiple_workspaces(self, mock_db_session, mock_queue_task):
"""Test sync for account with multiple workspace memberships."""
# Arrange
account_id = "acc-123"
# Mock workspace joins
mock_join1 = MagicMock()
mock_join1.tenant_id = "tenant-1"
mock_join2 = MagicMock()
mock_join2.tenant_id = "tenant-2"
mock_join3 = MagicMock()
mock_join3.tenant_id = "tenant-3"
mock_query = MagicMock()
mock_query.filter_by.return_value.all.return_value = [mock_join1, mock_join2, mock_join3]
mock_db_session.query.return_value = mock_query
with patch("services.enterprise.account_deletion_sync.dify_config") as mock_config:
mock_config.ENTERPRISE_ENABLED = True
# Act
result = sync_account_deletion(account_id=account_id, source="account_deleted")
# Assert
assert result is True
assert mock_queue_task.call_count == 3
# Verify each workspace was queued
mock_queue_task.assert_any_call(workspace_id="tenant-1", member_id=account_id, source="account_deleted")
mock_queue_task.assert_any_call(workspace_id="tenant-2", member_id=account_id, source="account_deleted")
mock_queue_task.assert_any_call(workspace_id="tenant-3", member_id=account_id, source="account_deleted")
def test_sync_account_deletion_no_workspaces(self, mock_db_session, mock_queue_task):
"""Test sync for account with no workspace memberships."""
# Arrange
mock_query = MagicMock()
mock_query.filter_by.return_value.all.return_value = []
mock_db_session.query.return_value = mock_query
with patch("services.enterprise.account_deletion_sync.dify_config") as mock_config:
mock_config.ENTERPRISE_ENABLED = True
# Act
result = sync_account_deletion(account_id="acc-123", source="account_deleted")
# Assert
assert result is True
mock_queue_task.assert_not_called()
def test_sync_account_deletion_partial_failure(self, mock_db_session, mock_queue_task):
"""Test sync when some tasks fail to queue."""
# Arrange
account_id = "acc-123"
# Mock workspace joins
mock_join1 = MagicMock()
mock_join1.tenant_id = "tenant-1"
mock_join2 = MagicMock()
mock_join2.tenant_id = "tenant-2"
mock_join3 = MagicMock()
mock_join3.tenant_id = "tenant-3"
mock_query = MagicMock()
mock_query.filter_by.return_value.all.return_value = [mock_join1, mock_join2, mock_join3]
mock_db_session.query.return_value = mock_query
# Mock queue_task to fail for second workspace
def queue_side_effect(workspace_id, member_id, source):
return workspace_id != "tenant-2"
mock_queue_task.side_effect = queue_side_effect
with patch("services.enterprise.account_deletion_sync.dify_config") as mock_config:
mock_config.ENTERPRISE_ENABLED = True
# Act
result = sync_account_deletion(account_id=account_id, source="account_deleted")
# Assert
assert result is False # Should return False if any task fails
assert mock_queue_task.call_count == 3
def test_sync_account_deletion_all_failures(self, mock_db_session, mock_queue_task):
"""Test sync when all tasks fail to queue."""
# Arrange
mock_join = MagicMock()
mock_join.tenant_id = "tenant-1"
mock_query = MagicMock()
mock_query.filter_by.return_value.all.return_value = [mock_join]
mock_db_session.query.return_value = mock_query
mock_queue_task.return_value = False
with patch("services.enterprise.account_deletion_sync.dify_config") as mock_config:
mock_config.ENTERPRISE_ENABLED = True
# Act
result = sync_account_deletion(account_id="acc-123", source="account_deleted")
# Assert
assert result is False
mock_queue_task.assert_called_once()

View File

@@ -350,7 +350,7 @@ class TestDeleteWorkflowArchiveLogs:
mock_query.where.return_value = mock_delete_query
mock_db.session.query.return_value = mock_query
delete_func("log-1")
delete_func(mock_db.session, "log-1")
mock_db.session.query.assert_called_once_with(WorkflowArchiveLog)
mock_query.where.assert_called_once()

2
api/uv.lock generated
View File

@@ -1368,7 +1368,7 @@ wheels = [
[[package]]
name = "dify-api"
version = "1.12.0"
version = "1.12.1"
source = { virtual = "." }
dependencies = [
{ name = "aliyun-log-python-sdk" },

View File

@@ -21,7 +21,7 @@ services:
# API service
api:
image: langgenius/dify-api:1.12.0
image: langgenius/dify-api:1.12.1
restart: always
environment:
# Use the shared environment variables.
@@ -63,7 +63,7 @@ services:
# worker service
# The Celery worker for processing all queues (dataset, workflow, mail, etc.)
worker:
image: langgenius/dify-api:1.12.0
image: langgenius/dify-api:1.12.1
restart: always
environment:
# Use the shared environment variables.
@@ -102,7 +102,7 @@ services:
# worker_beat service
# Celery beat for scheduling periodic tasks.
worker_beat:
image: langgenius/dify-api:1.12.0
image: langgenius/dify-api:1.12.1
restart: always
environment:
# Use the shared environment variables.
@@ -132,7 +132,7 @@ services:
# Frontend web application.
web:
image: langgenius/dify-web:1.12.0
image: langgenius/dify-web:1.12.1
restart: always
environment:
CONSOLE_API_URL: ${CONSOLE_API_URL:-}

View File

@@ -707,7 +707,7 @@ services:
# API service
api:
image: langgenius/dify-api:1.12.0
image: langgenius/dify-api:1.12.1
restart: always
environment:
# Use the shared environment variables.
@@ -749,7 +749,7 @@ services:
# worker service
# The Celery worker for processing all queues (dataset, workflow, mail, etc.)
worker:
image: langgenius/dify-api:1.12.0
image: langgenius/dify-api:1.12.1
restart: always
environment:
# Use the shared environment variables.
@@ -788,7 +788,7 @@ services:
# worker_beat service
# Celery beat for scheduling periodic tasks.
worker_beat:
image: langgenius/dify-api:1.12.0
image: langgenius/dify-api:1.12.1
restart: always
environment:
# Use the shared environment variables.
@@ -818,7 +818,7 @@ services:
# Frontend web application.
web:
image: langgenius/dify-web:1.12.0
image: langgenius/dify-web:1.12.1
restart: always
environment:
CONSOLE_API_URL: ${CONSOLE_API_URL:-}

View File

@@ -10,9 +10,15 @@ type VersionSelectorProps = {
versionLen: number
value: number
onChange: (index: number) => void
contentClassName?: string
}
const VersionSelector: React.FC<VersionSelectorProps> = ({ versionLen, value, onChange }) => {
const VersionSelector: React.FC<VersionSelectorProps> = ({
versionLen,
value,
onChange,
contentClassName,
}) => {
const { t } = useTranslation()
const [isOpen, {
setFalse: handleOpenFalse,
@@ -64,6 +70,7 @@ const VersionSelector: React.FC<VersionSelectorProps> = ({ versionLen, value, on
</PortalToFollowElemTrigger>
<PortalToFollowElemContent className={cn(
'z-[99]',
contentClassName,
)}
>
<div

View File

@@ -1,3 +1,4 @@
import type { App } from '@/types/app'
import { fireEvent, render, screen, waitFor } from '@testing-library/react'
import { useRouter } from 'next/navigation'
import { afterAll, beforeEach, describe, expect, it, vi } from 'vitest'
@@ -13,8 +14,8 @@ import { getRedirection } from '@/utils/app-redirection'
import CreateAppModal from './index'
vi.mock('ahooks', () => ({
useDebounceFn: (fn: (...args: any[]) => any) => {
const run = (...args: any[]) => fn(...args)
useDebounceFn: <T extends (...args: unknown[]) => unknown>(fn: T) => {
const run = (...args: Parameters<T>) => fn(...args)
const cancel = vi.fn()
const flush = vi.fn()
return { run, cancel, flush }
@@ -83,7 +84,7 @@ describe('CreateAppModal', () => {
beforeEach(() => {
vi.clearAllMocks()
mockUseRouter.mockReturnValue({ push: mockPush } as any)
mockUseRouter.mockReturnValue({ push: mockPush } as unknown as ReturnType<typeof useRouter>)
mockUseProviderContext.mockReturnValue({
plan: {
type: AppModeEnum.ADVANCED_CHAT,
@@ -92,10 +93,10 @@ describe('CreateAppModal', () => {
reset: {},
},
enableBilling: true,
} as any)
} as unknown as ReturnType<typeof useProviderContext>)
mockUseAppContext.mockReturnValue({
isCurrentWorkspaceEditor: true,
} as any)
} as unknown as ReturnType<typeof useAppContext>)
mockSetItem.mockClear()
Object.defineProperty(window, 'localStorage', {
value: {
@@ -118,8 +119,8 @@ describe('CreateAppModal', () => {
})
it('creates an app, notifies success, and fires callbacks', async () => {
const mockApp = { id: 'app-1', mode: AppModeEnum.ADVANCED_CHAT }
mockCreateApp.mockResolvedValue(mockApp as any)
const mockApp: Partial<App> = { id: 'app-1', mode: AppModeEnum.ADVANCED_CHAT }
mockCreateApp.mockResolvedValue(mockApp as App)
const { onClose, onSuccess } = renderModal()
const nameInput = screen.getByPlaceholderText('app.newApp.appNamePlaceholder')

View File

@@ -4,7 +4,7 @@ import type { FC } from 'react'
import { RiQuestionLine } from '@remixicon/react'
import { useBoolean } from 'ahooks'
import * as React from 'react'
import { useEffect, useRef, useState } from 'react'
import { useCallback, useEffect, useRef, useState } from 'react'
import { PortalToFollowElem, PortalToFollowElemContent, PortalToFollowElemTrigger } from '@/app/components/base/portal-to-follow-elem'
import { cn } from '@/utils/classnames'
import { tooltipManager } from './TooltipManager'
@@ -61,6 +61,20 @@ const Tooltip: FC<TooltipProps> = ({
isHoverTriggerRef.current = isHoverTrigger
}, [isHoverTrigger])
const closeTimeoutRef = useRef<ReturnType<typeof setTimeout> | null>(null)
const clearCloseTimeout = useCallback(() => {
if (closeTimeoutRef.current) {
clearTimeout(closeTimeoutRef.current)
closeTimeoutRef.current = null
}
}, [])
useEffect(() => {
return () => {
clearCloseTimeout()
}
}, [clearCloseTimeout])
const close = () => setOpen(false)
const handleLeave = (isTrigger: boolean) => {
@@ -71,7 +85,9 @@ const Tooltip: FC<TooltipProps> = ({
// give time to move to the popup
if (needsDelay) {
setTimeout(() => {
clearCloseTimeout()
closeTimeoutRef.current = setTimeout(() => {
closeTimeoutRef.current = null
if (!isHoverPopupRef.current && !isHoverTriggerRef.current) {
setOpen(false)
tooltipManager.clear(close)
@@ -79,6 +95,7 @@ const Tooltip: FC<TooltipProps> = ({
}, 300)
}
else {
clearCloseTimeout()
setOpen(false)
tooltipManager.clear(close)
}
@@ -95,6 +112,7 @@ const Tooltip: FC<TooltipProps> = ({
onClick={() => triggerMethod === 'click' && setOpen(v => !v)}
onMouseEnter={() => {
if (triggerMethod === 'hover') {
clearCloseTimeout()
setHoverTrigger()
tooltipManager.register(close)
setOpen(true)
@@ -115,7 +133,12 @@ const Tooltip: FC<TooltipProps> = ({
!noDecoration && 'system-xs-regular relative max-w-[300px] break-words rounded-md bg-components-panel-bg px-3 py-2 text-left text-text-tertiary shadow-lg',
popupClassName,
)}
onMouseEnter={() => triggerMethod === 'hover' && setHoverPopup()}
onMouseEnter={() => {
if (triggerMethod === 'hover') {
clearCloseTimeout()
setHoverPopup()
}
}}
onMouseLeave={() => triggerMethod === 'hover' && handleLeave(false)}
>
{popupContent}

View File

@@ -216,13 +216,22 @@ describe('image-uploader utils', () => {
type FileCallback = (file: MockFile) => void
type EntriesCallback = (entries: FileSystemEntry[]) => void
// Helper to create mock FileSystemEntry with required properties
const createMockEntry = (props: {
isFile: boolean
isDirectory: boolean
name?: string
file?: (callback: FileCallback) => void
createReader?: () => { readEntries: (callback: EntriesCallback) => void }
}): FileSystemEntry => props as unknown as FileSystemEntry
it('should resolve with file array for file entry', async () => {
const mockFile: MockFile = { name: 'test.png' }
const mockEntry = {
const mockEntry = createMockEntry({
isFile: true,
isDirectory: false,
file: (callback: FileCallback) => callback(mockFile),
}
})
const result = await traverseFileEntry(mockEntry)
expect(result).toHaveLength(1)
@@ -232,11 +241,11 @@ describe('image-uploader utils', () => {
it('should resolve with file array with prefix for nested file', async () => {
const mockFile: MockFile = { name: 'test.png' }
const mockEntry = {
const mockEntry = createMockEntry({
isFile: true,
isDirectory: false,
file: (callback: FileCallback) => callback(mockFile),
}
})
const result = await traverseFileEntry(mockEntry, 'folder/')
expect(result).toHaveLength(1)
@@ -244,24 +253,24 @@ describe('image-uploader utils', () => {
})
it('should resolve empty array for unknown entry type', async () => {
const mockEntry = {
const mockEntry = createMockEntry({
isFile: false,
isDirectory: false,
}
})
const result = await traverseFileEntry(mockEntry)
expect(result).toEqual([])
})
it('should handle directory with no files', async () => {
const mockEntry = {
const mockEntry = createMockEntry({
isFile: false,
isDirectory: true,
name: 'empty-folder',
createReader: () => ({
readEntries: (callback: EntriesCallback) => callback([]),
}),
}
})
const result = await traverseFileEntry(mockEntry)
expect(result).toEqual([])
@@ -271,20 +280,20 @@ describe('image-uploader utils', () => {
const mockFile1: MockFile = { name: 'file1.png' }
const mockFile2: MockFile = { name: 'file2.png' }
const mockFileEntry1 = {
const mockFileEntry1 = createMockEntry({
isFile: true,
isDirectory: false,
file: (callback: FileCallback) => callback(mockFile1),
}
})
const mockFileEntry2 = {
const mockFileEntry2 = createMockEntry({
isFile: true,
isDirectory: false,
file: (callback: FileCallback) => callback(mockFile2),
}
})
let readCount = 0
const mockEntry = {
const mockEntry = createMockEntry({
isFile: false,
isDirectory: true,
name: 'folder',
@@ -292,14 +301,14 @@ describe('image-uploader utils', () => {
readEntries: (callback: EntriesCallback) => {
if (readCount === 0) {
readCount++
callback([mockFileEntry1, mockFileEntry2] as unknown as FileSystemEntry[])
callback([mockFileEntry1, mockFileEntry2])
}
else {
callback([])
}
},
}),
}
})
const result = await traverseFileEntry(mockEntry)
expect(result).toHaveLength(2)

View File

@@ -18,17 +18,17 @@ type FileWithPath = {
relativePath?: string
} & File
export const traverseFileEntry = (entry: any, prefix = ''): Promise<FileWithPath[]> => {
export const traverseFileEntry = (entry: FileSystemEntry, prefix = ''): Promise<FileWithPath[]> => {
return new Promise((resolve) => {
if (entry.isFile) {
entry.file((file: FileWithPath) => {
(entry as FileSystemFileEntry).file((file: FileWithPath) => {
file.relativePath = `${prefix}${file.name}`
resolve([file])
})
}
else if (entry.isDirectory) {
const reader = entry.createReader()
const entries: any[] = []
const reader = (entry as FileSystemDirectoryEntry).createReader()
const entries: FileSystemEntry[] = []
const read = () => {
reader.readEntries(async (results: FileSystemEntry[]) => {
if (!results.length) {

View File

@@ -0,0 +1,218 @@
'use client'
import { useDebounceFn } from 'ahooks'
import { useRouter } from 'next/navigation'
import { useCallback, useMemo, useRef, useState } from 'react'
import { useTranslation } from 'react-i18next'
import { useContext } from 'use-context-selector'
import { ToastContext } from '@/app/components/base/toast'
import { usePluginDependencies } from '@/app/components/workflow/plugin-dependency/hooks'
import {
DSLImportMode,
DSLImportStatus,
} from '@/models/app'
import { useImportPipelineDSL, useImportPipelineDSLConfirm } from '@/service/use-pipeline'
export enum CreateFromDSLModalTab {
FROM_FILE = 'from-file',
FROM_URL = 'from-url',
}
export type UseDSLImportOptions = {
activeTab?: CreateFromDSLModalTab
dslUrl?: string
onSuccess?: () => void
onClose?: () => void
}
export type DSLVersions = {
importedVersion: string
systemVersion: string
}
export const useDSLImport = ({
activeTab = CreateFromDSLModalTab.FROM_FILE,
dslUrl = '',
onSuccess,
onClose,
}: UseDSLImportOptions) => {
const { push } = useRouter()
const { t } = useTranslation()
const { notify } = useContext(ToastContext)
const [currentFile, setDSLFile] = useState<File>()
const [fileContent, setFileContent] = useState<string>()
const [currentTab, setCurrentTab] = useState(activeTab)
const [dslUrlValue, setDslUrlValue] = useState(dslUrl)
const [showConfirmModal, setShowConfirmModal] = useState(false)
const [versions, setVersions] = useState<DSLVersions>()
const [importId, setImportId] = useState<string>()
const [isConfirming, setIsConfirming] = useState(false)
const { handleCheckPluginDependencies } = usePluginDependencies()
const isCreatingRef = useRef(false)
const { mutateAsync: importDSL } = useImportPipelineDSL()
const { mutateAsync: importDSLConfirm } = useImportPipelineDSLConfirm()
const readFile = useCallback((file: File) => {
const reader = new FileReader()
reader.onload = (event) => {
const content = event.target?.result
setFileContent(content as string)
}
reader.readAsText(file)
}, [])
const handleFile = useCallback((file?: File) => {
setDSLFile(file)
if (file)
readFile(file)
if (!file)
setFileContent('')
}, [readFile])
const onCreate = useCallback(async () => {
if (currentTab === CreateFromDSLModalTab.FROM_FILE && !currentFile)
return
if (currentTab === CreateFromDSLModalTab.FROM_URL && !dslUrlValue)
return
if (isCreatingRef.current)
return
isCreatingRef.current = true
let response
if (currentTab === CreateFromDSLModalTab.FROM_FILE) {
response = await importDSL({
mode: DSLImportMode.YAML_CONTENT,
yaml_content: fileContent || '',
})
}
if (currentTab === CreateFromDSLModalTab.FROM_URL) {
response = await importDSL({
mode: DSLImportMode.YAML_URL,
yaml_url: dslUrlValue || '',
})
}
if (!response) {
notify({ type: 'error', message: t('creation.errorTip', { ns: 'datasetPipeline' }) })
isCreatingRef.current = false
return
}
const { id, status, pipeline_id, dataset_id, imported_dsl_version, current_dsl_version } = response
if (status === DSLImportStatus.COMPLETED || status === DSLImportStatus.COMPLETED_WITH_WARNINGS) {
onSuccess?.()
onClose?.()
notify({
type: status === DSLImportStatus.COMPLETED ? 'success' : 'warning',
message: t(status === DSLImportStatus.COMPLETED ? 'creation.successTip' : 'creation.caution', { ns: 'datasetPipeline' }),
children: status === DSLImportStatus.COMPLETED_WITH_WARNINGS && t('newApp.appCreateDSLWarning', { ns: 'app' }),
})
if (pipeline_id)
await handleCheckPluginDependencies(pipeline_id, true)
push(`/datasets/${dataset_id}/pipeline`)
isCreatingRef.current = false
}
else if (status === DSLImportStatus.PENDING) {
setVersions({
importedVersion: imported_dsl_version ?? '',
systemVersion: current_dsl_version ?? '',
})
onClose?.()
setTimeout(() => {
setShowConfirmModal(true)
}, 300)
setImportId(id)
isCreatingRef.current = false
}
else {
notify({ type: 'error', message: t('creation.errorTip', { ns: 'datasetPipeline' }) })
isCreatingRef.current = false
}
}, [
currentTab,
currentFile,
dslUrlValue,
fileContent,
importDSL,
notify,
t,
onSuccess,
onClose,
handleCheckPluginDependencies,
push,
])
const { run: handleCreateApp } = useDebounceFn(onCreate, { wait: 300 })
const onDSLConfirm = useCallback(async () => {
if (!importId)
return
setIsConfirming(true)
const response = await importDSLConfirm(importId)
setIsConfirming(false)
if (!response) {
notify({ type: 'error', message: t('creation.errorTip', { ns: 'datasetPipeline' }) })
return
}
const { status, pipeline_id, dataset_id } = response
if (status === DSLImportStatus.COMPLETED) {
onSuccess?.()
setShowConfirmModal(false)
notify({
type: 'success',
message: t('creation.successTip', { ns: 'datasetPipeline' }),
})
if (pipeline_id)
await handleCheckPluginDependencies(pipeline_id, true)
push(`/datasets/${dataset_id}/pipeline`)
}
else if (status === DSLImportStatus.FAILED) {
notify({ type: 'error', message: t('creation.errorTip', { ns: 'datasetPipeline' }) })
}
}, [importId, importDSLConfirm, notify, t, onSuccess, handleCheckPluginDependencies, push])
const handleCancelConfirm = useCallback(() => {
setShowConfirmModal(false)
}, [])
const buttonDisabled = useMemo(() => {
if (currentTab === CreateFromDSLModalTab.FROM_FILE)
return !currentFile
if (currentTab === CreateFromDSLModalTab.FROM_URL)
return !dslUrlValue
return false
}, [currentTab, currentFile, dslUrlValue])
return {
// State
currentFile,
currentTab,
dslUrlValue,
showConfirmModal,
versions,
buttonDisabled,
isConfirming,
// Actions
setCurrentTab,
setDslUrlValue,
handleFile,
handleCreateApp,
onDSLConfirm,
handleCancelConfirm,
}
}

View File

@@ -1,24 +1,18 @@
'use client'
import { useDebounceFn, useKeyPress } from 'ahooks'
import { useKeyPress } from 'ahooks'
import { noop } from 'es-toolkit/function'
import { useRouter } from 'next/navigation'
import { useMemo, useRef, useState } from 'react'
import { useTranslation } from 'react-i18next'
import { useContext } from 'use-context-selector'
import Button from '@/app/components/base/button'
import Input from '@/app/components/base/input'
import Modal from '@/app/components/base/modal'
import { ToastContext } from '@/app/components/base/toast'
import { usePluginDependencies } from '@/app/components/workflow/plugin-dependency/hooks'
import {
DSLImportMode,
DSLImportStatus,
} from '@/models/app'
import { useImportPipelineDSL, useImportPipelineDSLConfirm } from '@/service/use-pipeline'
import DSLConfirmModal from './dsl-confirm-modal'
import Header from './header'
import { CreateFromDSLModalTab, useDSLImport } from './hooks/use-dsl-import'
import Tab from './tab'
import Uploader from './uploader'
export { CreateFromDSLModalTab }
type CreateFromDSLModalProps = {
show: boolean
onSuccess?: () => void
@@ -27,11 +21,6 @@ type CreateFromDSLModalProps = {
dslUrl?: string
}
export enum CreateFromDSLModalTab {
FROM_FILE = 'from-file',
FROM_URL = 'from-url',
}
const CreateFromDSLModal = ({
show,
onSuccess,
@@ -39,149 +28,33 @@ const CreateFromDSLModal = ({
activeTab = CreateFromDSLModalTab.FROM_FILE,
dslUrl = '',
}: CreateFromDSLModalProps) => {
const { push } = useRouter()
const { t } = useTranslation()
const { notify } = useContext(ToastContext)
const [currentFile, setDSLFile] = useState<File>()
const [fileContent, setFileContent] = useState<string>()
const [currentTab, setCurrentTab] = useState(activeTab)
const [dslUrlValue, setDslUrlValue] = useState(dslUrl)
const [showErrorModal, setShowErrorModal] = useState(false)
const [versions, setVersions] = useState<{ importedVersion: string, systemVersion: string }>()
const [importId, setImportId] = useState<string>()
const { handleCheckPluginDependencies } = usePluginDependencies()
const readFile = (file: File) => {
const reader = new FileReader()
reader.onload = function (event) {
const content = event.target?.result
setFileContent(content as string)
}
reader.readAsText(file)
}
const handleFile = (file?: File) => {
setDSLFile(file)
if (file)
readFile(file)
if (!file)
setFileContent('')
}
const isCreatingRef = useRef(false)
const { mutateAsync: importDSL } = useImportPipelineDSL()
const onCreate = async () => {
if (currentTab === CreateFromDSLModalTab.FROM_FILE && !currentFile)
return
if (currentTab === CreateFromDSLModalTab.FROM_URL && !dslUrlValue)
return
if (isCreatingRef.current)
return
isCreatingRef.current = true
let response
if (currentTab === CreateFromDSLModalTab.FROM_FILE) {
response = await importDSL({
mode: DSLImportMode.YAML_CONTENT,
yaml_content: fileContent || '',
})
}
if (currentTab === CreateFromDSLModalTab.FROM_URL) {
response = await importDSL({
mode: DSLImportMode.YAML_URL,
yaml_url: dslUrlValue || '',
})
}
if (!response) {
notify({ type: 'error', message: t('creation.errorTip', { ns: 'datasetPipeline' }) })
isCreatingRef.current = false
return
}
const { id, status, pipeline_id, dataset_id, imported_dsl_version, current_dsl_version } = response
if (status === DSLImportStatus.COMPLETED || status === DSLImportStatus.COMPLETED_WITH_WARNINGS) {
if (onSuccess)
onSuccess()
if (onClose)
onClose()
notify({
type: status === DSLImportStatus.COMPLETED ? 'success' : 'warning',
message: t(status === DSLImportStatus.COMPLETED ? 'creation.successTip' : 'creation.caution', { ns: 'datasetPipeline' }),
children: status === DSLImportStatus.COMPLETED_WITH_WARNINGS && t('newApp.appCreateDSLWarning', { ns: 'app' }),
})
if (pipeline_id)
await handleCheckPluginDependencies(pipeline_id, true)
push(`/datasets/${dataset_id}/pipeline`)
isCreatingRef.current = false
}
else if (status === DSLImportStatus.PENDING) {
setVersions({
importedVersion: imported_dsl_version ?? '',
systemVersion: current_dsl_version ?? '',
})
if (onClose)
onClose()
setTimeout(() => {
setShowErrorModal(true)
}, 300)
setImportId(id)
isCreatingRef.current = false
}
else {
notify({ type: 'error', message: t('creation.errorTip', { ns: 'datasetPipeline' }) })
isCreatingRef.current = false
}
}
const { run: handleCreateApp } = useDebounceFn(onCreate, { wait: 300 })
useKeyPress('esc', () => {
if (show && !showErrorModal)
onClose()
const {
currentFile,
currentTab,
dslUrlValue,
showConfirmModal,
versions,
buttonDisabled,
isConfirming,
setCurrentTab,
setDslUrlValue,
handleFile,
handleCreateApp,
onDSLConfirm,
handleCancelConfirm,
} = useDSLImport({
activeTab,
dslUrl,
onSuccess,
onClose,
})
const { mutateAsync: importDSLConfirm } = useImportPipelineDSLConfirm()
const onDSLConfirm = async () => {
if (!importId)
return
const response = await importDSLConfirm(importId)
if (!response) {
notify({ type: 'error', message: t('creation.errorTip', { ns: 'datasetPipeline' }) })
return
}
const { status, pipeline_id, dataset_id } = response
if (status === DSLImportStatus.COMPLETED) {
if (onSuccess)
onSuccess()
if (onClose)
onClose()
notify({
type: 'success',
message: t('creation.successTip', { ns: 'datasetPipeline' }),
})
if (pipeline_id)
await handleCheckPluginDependencies(pipeline_id, true)
push(`datasets/${dataset_id}/pipeline`)
}
else if (status === DSLImportStatus.FAILED) {
notify({ type: 'error', message: t('creation.errorTip', { ns: 'datasetPipeline' }) })
}
}
const buttonDisabled = useMemo(() => {
if (currentTab === CreateFromDSLModalTab.FROM_FILE)
return !currentFile
if (currentTab === CreateFromDSLModalTab.FROM_URL)
return !dslUrlValue
return false
}, [currentTab, currentFile, dslUrlValue])
useKeyPress('esc', () => {
if (show && !showConfirmModal)
onClose()
})
return (
<>
@@ -196,29 +69,25 @@ const CreateFromDSLModal = ({
setCurrentTab={setCurrentTab}
/>
<div className="px-6 py-4">
{
currentTab === CreateFromDSLModalTab.FROM_FILE && (
<Uploader
className="mt-0"
file={currentFile}
updateFile={handleFile}
/>
)
}
{
currentTab === CreateFromDSLModalTab.FROM_URL && (
<div>
<div className="system-md-semibold leading6 mb-1 text-text-secondary">
DSL URL
</div>
<Input
placeholder={t('importFromDSLUrlPlaceholder', { ns: 'app' }) || ''}
value={dslUrlValue}
onChange={e => setDslUrlValue(e.target.value)}
/>
{currentTab === CreateFromDSLModalTab.FROM_FILE && (
<Uploader
className="mt-0"
file={currentFile}
updateFile={handleFile}
/>
)}
{currentTab === CreateFromDSLModalTab.FROM_URL && (
<div>
<div className="system-md-semibold leading6 mb-1 text-text-secondary">
DSL URL
</div>
)
}
<Input
placeholder={t('importFromDSLUrlPlaceholder', { ns: 'app' }) || ''}
value={dslUrlValue}
onChange={e => setDslUrlValue(e.target.value)}
/>
</div>
)}
</div>
<div className="flex justify-end gap-x-2 p-6 pt-5">
<Button onClick={onClose}>
@@ -234,32 +103,14 @@ const CreateFromDSLModal = ({
</Button>
</div>
</Modal>
<Modal
isShow={showErrorModal}
onClose={() => setShowErrorModal(false)}
className="w-[480px]"
>
<div className="flex flex-col items-start gap-2 self-stretch pb-4">
<div className="title-2xl-semi-bold text-text-primary">{t('newApp.appCreateDSLErrorTitle', { ns: 'app' })}</div>
<div className="system-md-regular flex grow flex-col text-text-secondary">
<div>{t('newApp.appCreateDSLErrorPart1', { ns: 'app' })}</div>
<div>{t('newApp.appCreateDSLErrorPart2', { ns: 'app' })}</div>
<br />
<div>
{t('newApp.appCreateDSLErrorPart3', { ns: 'app' })}
<span className="system-md-medium">{versions?.importedVersion}</span>
</div>
<div>
{t('newApp.appCreateDSLErrorPart4', { ns: 'app' })}
<span className="system-md-medium">{versions?.systemVersion}</span>
</div>
</div>
</div>
<div className="flex items-start justify-end gap-2 self-stretch pt-6">
<Button variant="secondary" onClick={() => setShowErrorModal(false)}>{t('newApp.Cancel', { ns: 'app' })}</Button>
<Button variant="primary" destructive onClick={onDSLConfirm}>{t('newApp.Confirm', { ns: 'app' })}</Button>
</div>
</Modal>
{showConfirmModal && (
<DSLConfirmModal
versions={versions}
onCancel={handleCancelConfirm}
onConfirm={onDSLConfirm}
confirmDisabled={isConfirming}
/>
)}
</>
)
}

View File

@@ -0,0 +1,334 @@
import type { FileListItemProps } from './file-list-item'
import type { CustomFile as File, FileItem } from '@/models/datasets'
import { fireEvent, render, screen } from '@testing-library/react'
import { beforeEach, describe, expect, it, vi } from 'vitest'
import { PROGRESS_COMPLETE, PROGRESS_ERROR, PROGRESS_NOT_STARTED } from '../constants'
import FileListItem from './file-list-item'
// Mock theme hook - can be changed per test
let mockTheme = 'light'
vi.mock('@/hooks/use-theme', () => ({
default: () => ({ theme: mockTheme }),
}))
// Mock theme types
vi.mock('@/types/app', () => ({
Theme: { dark: 'dark', light: 'light' },
}))
// Mock SimplePieChart with dynamic import handling
vi.mock('next/dynamic', () => ({
default: () => {
const DynamicComponent = ({ percentage, stroke, fill }: { percentage: number, stroke: string, fill: string }) => (
<div data-testid="pie-chart" data-percentage={percentage} data-stroke={stroke} data-fill={fill}>
Pie Chart:
{' '}
{percentage}
%
</div>
)
DynamicComponent.displayName = 'SimplePieChart'
return DynamicComponent
},
}))
// Mock DocumentFileIcon
vi.mock('@/app/components/datasets/common/document-file-icon', () => ({
default: ({ name, extension, size }: { name: string, extension: string, size: string }) => (
<div data-testid="document-icon" data-name={name} data-extension={extension} data-size={size}>
Document Icon
</div>
),
}))
describe('FileListItem', () => {
const createMockFile = (overrides: Partial<File> = {}): File => ({
name: 'test-document.pdf',
size: 1024 * 100, // 100KB
type: 'application/pdf',
lastModified: Date.now(),
...overrides,
} as File)
const createMockFileItem = (overrides: Partial<FileItem> = {}): FileItem => ({
fileID: 'file-123',
file: createMockFile(overrides.file as Partial<File>),
progress: PROGRESS_NOT_STARTED,
...overrides,
})
const defaultProps: FileListItemProps = {
fileItem: createMockFileItem(),
onPreview: vi.fn(),
onRemove: vi.fn(),
}
beforeEach(() => {
vi.clearAllMocks()
mockTheme = 'light'
})
describe('rendering', () => {
it('should render the file item container', () => {
const { container } = render(<FileListItem {...defaultProps} />)
const item = container.firstChild as HTMLElement
expect(item).toHaveClass('flex', 'h-12', 'items-center', 'rounded-lg')
})
it('should render document icon with correct props', () => {
render(<FileListItem {...defaultProps} />)
const icon = screen.getByTestId('document-icon')
expect(icon).toBeInTheDocument()
expect(icon).toHaveAttribute('data-name', 'test-document.pdf')
expect(icon).toHaveAttribute('data-extension', 'pdf')
expect(icon).toHaveAttribute('data-size', 'xl')
})
it('should render file name', () => {
render(<FileListItem {...defaultProps} />)
expect(screen.getByText('test-document.pdf')).toBeInTheDocument()
})
it('should render file extension in uppercase via CSS class', () => {
render(<FileListItem {...defaultProps} />)
const extensionSpan = screen.getByText('pdf')
expect(extensionSpan).toBeInTheDocument()
expect(extensionSpan).toHaveClass('uppercase')
})
it('should render file size', () => {
render(<FileListItem {...defaultProps} />)
// Default mock file is 100KB (1024 * 100 bytes)
expect(screen.getByText('100.00 KB')).toBeInTheDocument()
})
it('should render delete button', () => {
const { container } = render(<FileListItem {...defaultProps} />)
const deleteButton = container.querySelector('.cursor-pointer')
expect(deleteButton).toBeInTheDocument()
})
})
describe('progress states', () => {
it('should show progress chart when uploading (0-99)', () => {
const fileItem = createMockFileItem({ progress: 50 })
render(<FileListItem {...defaultProps} fileItem={fileItem} />)
const pieChart = screen.getByTestId('pie-chart')
expect(pieChart).toBeInTheDocument()
expect(pieChart).toHaveAttribute('data-percentage', '50')
})
it('should show progress chart at 0%', () => {
const fileItem = createMockFileItem({ progress: 0 })
render(<FileListItem {...defaultProps} fileItem={fileItem} />)
const pieChart = screen.getByTestId('pie-chart')
expect(pieChart).toHaveAttribute('data-percentage', '0')
})
it('should not show progress chart when complete (100)', () => {
const fileItem = createMockFileItem({ progress: PROGRESS_COMPLETE })
render(<FileListItem {...defaultProps} fileItem={fileItem} />)
expect(screen.queryByTestId('pie-chart')).not.toBeInTheDocument()
})
it('should not show progress chart when not started (-1)', () => {
const fileItem = createMockFileItem({ progress: PROGRESS_NOT_STARTED })
render(<FileListItem {...defaultProps} fileItem={fileItem} />)
expect(screen.queryByTestId('pie-chart')).not.toBeInTheDocument()
})
})
describe('error state', () => {
it('should show error indicator when progress is PROGRESS_ERROR', () => {
const fileItem = createMockFileItem({ progress: PROGRESS_ERROR })
const { container } = render(<FileListItem {...defaultProps} fileItem={fileItem} />)
const errorIndicator = container.querySelector('.text-text-destructive')
expect(errorIndicator).toBeInTheDocument()
})
it('should not show error indicator when not in error state', () => {
const { container } = render(<FileListItem {...defaultProps} />)
const errorIndicator = container.querySelector('.text-text-destructive')
expect(errorIndicator).not.toBeInTheDocument()
})
})
describe('theme handling', () => {
it('should use correct chart color for light theme', () => {
mockTheme = 'light'
const fileItem = createMockFileItem({ progress: 50 })
render(<FileListItem {...defaultProps} fileItem={fileItem} />)
const pieChart = screen.getByTestId('pie-chart')
expect(pieChart).toHaveAttribute('data-stroke', '#296dff')
expect(pieChart).toHaveAttribute('data-fill', '#296dff')
})
it('should use correct chart color for dark theme', () => {
mockTheme = 'dark'
const fileItem = createMockFileItem({ progress: 50 })
render(<FileListItem {...defaultProps} fileItem={fileItem} />)
const pieChart = screen.getByTestId('pie-chart')
expect(pieChart).toHaveAttribute('data-stroke', '#5289ff')
expect(pieChart).toHaveAttribute('data-fill', '#5289ff')
})
})
describe('event handlers', () => {
it('should call onPreview when item is clicked with file id', () => {
const onPreview = vi.fn()
const fileItem = createMockFileItem({
file: createMockFile({ id: 'uploaded-id' } as Partial<File>),
})
render(<FileListItem {...defaultProps} fileItem={fileItem} onPreview={onPreview} />)
const item = screen.getByText('test-document.pdf').closest('[class*="flex h-12"]')!
fireEvent.click(item)
expect(onPreview).toHaveBeenCalledTimes(1)
expect(onPreview).toHaveBeenCalledWith(fileItem.file)
})
it('should not call onPreview when file has no id', () => {
const onPreview = vi.fn()
const fileItem = createMockFileItem()
render(<FileListItem {...defaultProps} fileItem={fileItem} onPreview={onPreview} />)
const item = screen.getByText('test-document.pdf').closest('[class*="flex h-12"]')!
fireEvent.click(item)
expect(onPreview).not.toHaveBeenCalled()
})
it('should call onRemove when delete button is clicked', () => {
const onRemove = vi.fn()
const fileItem = createMockFileItem()
const { container } = render(<FileListItem {...defaultProps} fileItem={fileItem} onRemove={onRemove} />)
const deleteButton = container.querySelector('.cursor-pointer')!
fireEvent.click(deleteButton)
expect(onRemove).toHaveBeenCalledTimes(1)
expect(onRemove).toHaveBeenCalledWith('file-123')
})
it('should stop propagation when delete button is clicked', () => {
const onPreview = vi.fn()
const onRemove = vi.fn()
const fileItem = createMockFileItem({
file: createMockFile({ id: 'uploaded-id' } as Partial<File>),
})
const { container } = render(<FileListItem {...defaultProps} fileItem={fileItem} onPreview={onPreview} onRemove={onRemove} />)
const deleteButton = container.querySelector('.cursor-pointer')!
fireEvent.click(deleteButton)
expect(onRemove).toHaveBeenCalledTimes(1)
expect(onPreview).not.toHaveBeenCalled()
})
})
describe('file type handling', () => {
it('should handle files with multiple dots in name', () => {
const fileItem = createMockFileItem({
file: createMockFile({ name: 'my.document.file.docx' }),
})
render(<FileListItem {...defaultProps} fileItem={fileItem} />)
expect(screen.getByText('my.document.file.docx')).toBeInTheDocument()
expect(screen.getByText('docx')).toBeInTheDocument()
})
it('should handle files without extension', () => {
const fileItem = createMockFileItem({
file: createMockFile({ name: 'README' }),
})
render(<FileListItem {...defaultProps} fileItem={fileItem} />)
// File name appears once, and extension area shows empty string
expect(screen.getByText('README')).toBeInTheDocument()
})
it('should handle various file extensions', () => {
const extensions = ['txt', 'md', 'json', 'csv', 'xlsx']
extensions.forEach((ext) => {
const fileItem = createMockFileItem({
file: createMockFile({ name: `file.${ext}` }),
})
const { unmount } = render(<FileListItem {...defaultProps} fileItem={fileItem} />)
expect(screen.getByText(ext)).toBeInTheDocument()
unmount()
})
})
})
describe('file size display', () => {
it('should display size in KB for small files', () => {
const fileItem = createMockFileItem({
file: createMockFile({ size: 5 * 1024 }),
})
render(<FileListItem {...defaultProps} fileItem={fileItem} />)
expect(screen.getByText('5.00 KB')).toBeInTheDocument()
})
it('should display size in MB for larger files', () => {
const fileItem = createMockFileItem({
file: createMockFile({ size: 5 * 1024 * 1024 }),
})
render(<FileListItem {...defaultProps} fileItem={fileItem} />)
expect(screen.getByText('5.00 MB')).toBeInTheDocument()
})
})
describe('upload progress values', () => {
it('should show chart at progress 1', () => {
const fileItem = createMockFileItem({ progress: 1 })
render(<FileListItem {...defaultProps} fileItem={fileItem} />)
expect(screen.getByTestId('pie-chart')).toBeInTheDocument()
})
it('should show chart at progress 99', () => {
const fileItem = createMockFileItem({ progress: 99 })
render(<FileListItem {...defaultProps} fileItem={fileItem} />)
expect(screen.getByTestId('pie-chart')).toHaveAttribute('data-percentage', '99')
})
it('should not show chart at progress 100', () => {
const fileItem = createMockFileItem({ progress: 100 })
render(<FileListItem {...defaultProps} fileItem={fileItem} />)
expect(screen.queryByTestId('pie-chart')).not.toBeInTheDocument()
})
})
describe('styling', () => {
it('should have proper shadow styling', () => {
const { container } = render(<FileListItem {...defaultProps} />)
const item = container.firstChild as HTMLElement
expect(item).toHaveClass('shadow-xs')
})
it('should have proper border styling', () => {
const { container } = render(<FileListItem {...defaultProps} />)
const item = container.firstChild as HTMLElement
expect(item).toHaveClass('border', 'border-components-panel-border')
})
it('should truncate long file names', () => {
const longFileName = 'this-is-a-very-long-file-name-that-should-be-truncated.pdf'
const fileItem = createMockFileItem({
file: createMockFile({ name: longFileName }),
})
render(<FileListItem {...defaultProps} fileItem={fileItem} />)
const nameElement = screen.getByText(longFileName)
expect(nameElement).toHaveClass('truncate')
})
})
})

View File

@@ -0,0 +1,89 @@
'use client'
import type { CustomFile as File, FileItem } from '@/models/datasets'
import { RiDeleteBinLine, RiErrorWarningFill } from '@remixicon/react'
import dynamic from 'next/dynamic'
import { useMemo } from 'react'
import DocumentFileIcon from '@/app/components/datasets/common/document-file-icon'
import useTheme from '@/hooks/use-theme'
import { Theme } from '@/types/app'
import { formatFileSize, getFileExtension } from '@/utils/format'
import { PROGRESS_COMPLETE, PROGRESS_ERROR } from '../constants'
const SimplePieChart = dynamic(() => import('@/app/components/base/simple-pie-chart'), { ssr: false })
export type FileListItemProps = {
fileItem: FileItem
onPreview: (file: File) => void
onRemove: (fileID: string) => void
}
const FileListItem = ({
fileItem,
onPreview,
onRemove,
}: FileListItemProps) => {
const { theme } = useTheme()
const chartColor = useMemo(() => theme === Theme.dark ? '#5289ff' : '#296dff', [theme])
const isUploading = fileItem.progress >= 0 && fileItem.progress < PROGRESS_COMPLETE
const isError = fileItem.progress === PROGRESS_ERROR
const handleClick = () => {
if (fileItem.file?.id)
onPreview(fileItem.file)
}
const handleRemove = (e: React.MouseEvent) => {
e.stopPropagation()
onRemove(fileItem.fileID)
}
return (
<div
onClick={handleClick}
className="flex h-12 max-w-[640px] items-center rounded-lg border border-components-panel-border bg-components-panel-on-panel-item-bg text-xs leading-3 text-text-tertiary shadow-xs"
>
<div className="flex w-12 shrink-0 items-center justify-center">
<DocumentFileIcon
size="xl"
className="shrink-0"
name={fileItem.file.name}
extension={getFileExtension(fileItem.file.name)}
/>
</div>
<div className="flex shrink grow flex-col gap-0.5">
<div className="flex w-full">
<div className="w-0 grow truncate text-sm leading-4 text-text-secondary">
{fileItem.file.name}
</div>
</div>
<div className="w-full truncate leading-3 text-text-tertiary">
<span className="uppercase">{getFileExtension(fileItem.file.name)}</span>
<span className="px-1 text-text-quaternary">·</span>
<span>{formatFileSize(fileItem.file.size)}</span>
</div>
</div>
<div className="flex w-16 shrink-0 items-center justify-end gap-1 pr-3">
{isUploading && (
<SimplePieChart
percentage={fileItem.progress}
stroke={chartColor}
fill={chartColor}
animationDuration={0}
/>
)}
{isError && (
<RiErrorWarningFill className="size-4 text-text-destructive" />
)}
<span
className="flex h-6 w-6 cursor-pointer items-center justify-center"
onClick={handleRemove}
>
<RiDeleteBinLine className="size-4 text-text-tertiary" />
</span>
</div>
</div>
)
}
export default FileListItem

View File

@@ -0,0 +1,210 @@
import type { RefObject } from 'react'
import type { UploadDropzoneProps } from './upload-dropzone'
import { fireEvent, render, screen } from '@testing-library/react'
import { beforeEach, describe, expect, it, vi } from 'vitest'
import UploadDropzone from './upload-dropzone'
// Helper to create mock ref objects for testing
const createMockRef = <T,>(value: T | null = null): RefObject<T | null> => ({ current: value })
// Mock react-i18next
vi.mock('react-i18next', () => ({
useTranslation: () => ({
t: (key: string, options?: Record<string, unknown>) => {
const translations: Record<string, string> = {
'stepOne.uploader.button': 'Drag and drop files, or',
'stepOne.uploader.buttonSingleFile': 'Drag and drop file, or',
'stepOne.uploader.browse': 'Browse',
'stepOne.uploader.tip': 'Supports {{supportTypes}}, Max {{size}}MB each, up to {{batchCount}} files at a time, {{totalCount}} files total',
}
let result = translations[key] || key
if (options && typeof options === 'object') {
Object.entries(options).forEach(([k, v]) => {
result = result.replace(`{{${k}}}`, String(v))
})
}
return result
},
}),
}))
describe('UploadDropzone', () => {
const defaultProps: UploadDropzoneProps = {
dropRef: createMockRef<HTMLDivElement>() as RefObject<HTMLDivElement | null>,
dragRef: createMockRef<HTMLDivElement>() as RefObject<HTMLDivElement | null>,
fileUploaderRef: createMockRef<HTMLInputElement>() as RefObject<HTMLInputElement | null>,
dragging: false,
supportBatchUpload: true,
supportTypesShowNames: 'PDF, DOCX, TXT',
fileUploadConfig: {
file_size_limit: 15,
batch_count_limit: 5,
file_upload_limit: 10,
},
acceptTypes: ['.pdf', '.docx', '.txt'],
onSelectFile: vi.fn(),
onFileChange: vi.fn(),
}
beforeEach(() => {
vi.clearAllMocks()
})
describe('rendering', () => {
it('should render the dropzone container', () => {
const { container } = render(<UploadDropzone {...defaultProps} />)
const dropzone = container.querySelector('[class*="border-dashed"]')
expect(dropzone).toBeInTheDocument()
})
it('should render hidden file input', () => {
render(<UploadDropzone {...defaultProps} />)
const input = document.getElementById('fileUploader') as HTMLInputElement
expect(input).toBeInTheDocument()
expect(input).toHaveClass('hidden')
expect(input).toHaveAttribute('type', 'file')
})
it('should render upload icon', () => {
render(<UploadDropzone {...defaultProps} />)
const icon = document.querySelector('svg')
expect(icon).toBeInTheDocument()
})
it('should render browse label when extensions are allowed', () => {
render(<UploadDropzone {...defaultProps} />)
expect(screen.getByText('Browse')).toBeInTheDocument()
})
it('should not render browse label when no extensions allowed', () => {
render(<UploadDropzone {...defaultProps} acceptTypes={[]} />)
expect(screen.queryByText('Browse')).not.toBeInTheDocument()
})
it('should render file size and count limits', () => {
render(<UploadDropzone {...defaultProps} />)
const tipText = screen.getByText(/Supports.*Max.*15MB/i)
expect(tipText).toBeInTheDocument()
})
})
describe('file input configuration', () => {
it('should allow multiple files when supportBatchUpload is true', () => {
render(<UploadDropzone {...defaultProps} supportBatchUpload={true} />)
const input = document.getElementById('fileUploader') as HTMLInputElement
expect(input).toHaveAttribute('multiple')
})
it('should not allow multiple files when supportBatchUpload is false', () => {
render(<UploadDropzone {...defaultProps} supportBatchUpload={false} />)
const input = document.getElementById('fileUploader') as HTMLInputElement
expect(input).not.toHaveAttribute('multiple')
})
it('should set accept attribute with correct types', () => {
render(<UploadDropzone {...defaultProps} acceptTypes={['.pdf', '.docx']} />)
const input = document.getElementById('fileUploader') as HTMLInputElement
expect(input).toHaveAttribute('accept', '.pdf,.docx')
})
})
describe('text content', () => {
it('should show batch upload text when supportBatchUpload is true', () => {
render(<UploadDropzone {...defaultProps} supportBatchUpload={true} />)
expect(screen.getByText(/Drag and drop files/i)).toBeInTheDocument()
})
it('should show single file text when supportBatchUpload is false', () => {
render(<UploadDropzone {...defaultProps} supportBatchUpload={false} />)
expect(screen.getByText(/Drag and drop file/i)).toBeInTheDocument()
})
})
describe('dragging state', () => {
it('should apply dragging styles when dragging is true', () => {
const { container } = render(<UploadDropzone {...defaultProps} dragging={true} />)
const dropzone = container.querySelector('[class*="border-components-dropzone-border-accent"]')
expect(dropzone).toBeInTheDocument()
})
it('should render drag overlay when dragging', () => {
const dragRef = createMockRef<HTMLDivElement>()
render(<UploadDropzone {...defaultProps} dragging={true} dragRef={dragRef as RefObject<HTMLDivElement | null>} />)
const overlay = document.querySelector('.absolute.left-0.top-0')
expect(overlay).toBeInTheDocument()
})
it('should not render drag overlay when not dragging', () => {
render(<UploadDropzone {...defaultProps} dragging={false} />)
const overlay = document.querySelector('.absolute.left-0.top-0')
expect(overlay).not.toBeInTheDocument()
})
})
describe('event handlers', () => {
it('should call onSelectFile when browse label is clicked', () => {
const onSelectFile = vi.fn()
render(<UploadDropzone {...defaultProps} onSelectFile={onSelectFile} />)
const browseLabel = screen.getByText('Browse')
fireEvent.click(browseLabel)
expect(onSelectFile).toHaveBeenCalledTimes(1)
})
it('should call onFileChange when files are selected', () => {
const onFileChange = vi.fn()
render(<UploadDropzone {...defaultProps} onFileChange={onFileChange} />)
const input = document.getElementById('fileUploader') as HTMLInputElement
const file = new File(['content'], 'test.pdf', { type: 'application/pdf' })
fireEvent.change(input, { target: { files: [file] } })
expect(onFileChange).toHaveBeenCalledTimes(1)
})
})
describe('refs', () => {
it('should attach dropRef to drop container', () => {
const dropRef = createMockRef<HTMLDivElement>()
render(<UploadDropzone {...defaultProps} dropRef={dropRef as RefObject<HTMLDivElement | null>} />)
expect(dropRef.current).toBeInstanceOf(HTMLDivElement)
})
it('should attach fileUploaderRef to input element', () => {
const fileUploaderRef = createMockRef<HTMLInputElement>()
render(<UploadDropzone {...defaultProps} fileUploaderRef={fileUploaderRef as RefObject<HTMLInputElement | null>} />)
expect(fileUploaderRef.current).toBeInstanceOf(HTMLInputElement)
})
it('should attach dragRef to overlay when dragging', () => {
const dragRef = createMockRef<HTMLDivElement>()
render(<UploadDropzone {...defaultProps} dragging={true} dragRef={dragRef as RefObject<HTMLDivElement | null>} />)
expect(dragRef.current).toBeInstanceOf(HTMLDivElement)
})
})
describe('styling', () => {
it('should have base dropzone styling', () => {
const { container } = render(<UploadDropzone {...defaultProps} />)
const dropzone = container.querySelector('[class*="border-dashed"]')
expect(dropzone).toBeInTheDocument()
expect(dropzone).toHaveClass('rounded-xl')
})
it('should have cursor-pointer on browse label', () => {
render(<UploadDropzone {...defaultProps} />)
const browseLabel = screen.getByText('Browse')
expect(browseLabel).toHaveClass('cursor-pointer')
})
})
describe('accessibility', () => {
it('should have an accessible file input', () => {
render(<UploadDropzone {...defaultProps} />)
const input = document.getElementById('fileUploader') as HTMLInputElement
expect(input).toHaveAttribute('id', 'fileUploader')
})
})
})

View File

@@ -0,0 +1,84 @@
'use client'
import type { RefObject } from 'react'
import type { FileUploadConfig } from '../hooks/use-file-upload'
import { RiUploadCloud2Line } from '@remixicon/react'
import { useTranslation } from 'react-i18next'
import { cn } from '@/utils/classnames'
export type UploadDropzoneProps = {
dropRef: RefObject<HTMLDivElement | null>
dragRef: RefObject<HTMLDivElement | null>
fileUploaderRef: RefObject<HTMLInputElement | null>
dragging: boolean
supportBatchUpload: boolean
supportTypesShowNames: string
fileUploadConfig: FileUploadConfig
acceptTypes: string[]
onSelectFile: () => void
onFileChange: (e: React.ChangeEvent<HTMLInputElement>) => void
}
const UploadDropzone = ({
dropRef,
dragRef,
fileUploaderRef,
dragging,
supportBatchUpload,
supportTypesShowNames,
fileUploadConfig,
acceptTypes,
onSelectFile,
onFileChange,
}: UploadDropzoneProps) => {
const { t } = useTranslation()
return (
<>
<input
ref={fileUploaderRef}
id="fileUploader"
className="hidden"
type="file"
multiple={supportBatchUpload}
accept={acceptTypes.join(',')}
onChange={onFileChange}
/>
<div
ref={dropRef}
className={cn(
'relative mb-2 box-border flex min-h-20 max-w-[640px] flex-col items-center justify-center gap-1 rounded-xl border border-dashed border-components-dropzone-border bg-components-dropzone-bg px-4 py-3 text-xs leading-4 text-text-tertiary',
dragging && 'border-components-dropzone-border-accent bg-components-dropzone-bg-accent',
)}
>
<div className="flex min-h-5 items-center justify-center text-sm leading-4 text-text-secondary">
<RiUploadCloud2Line className="mr-2 size-5" />
<span>
{supportBatchUpload
? t('stepOne.uploader.button', { ns: 'datasetCreation' })
: t('stepOne.uploader.buttonSingleFile', { ns: 'datasetCreation' })}
{acceptTypes.length > 0 && (
<label
className="ml-1 cursor-pointer text-text-accent"
onClick={onSelectFile}
>
{t('stepOne.uploader.browse', { ns: 'datasetCreation' })}
</label>
)}
</span>
</div>
<div>
{t('stepOne.uploader.tip', {
ns: 'datasetCreation',
size: fileUploadConfig.file_size_limit,
supportTypes: supportTypesShowNames,
batchCount: fileUploadConfig.batch_count_limit,
totalCount: fileUploadConfig.file_upload_limit,
})}
</div>
{dragging && <div ref={dragRef} className="absolute left-0 top-0 h-full w-full" />}
</div>
</>
)
}
export default UploadDropzone

View File

@@ -0,0 +1,3 @@
export const PROGRESS_NOT_STARTED = -1
export const PROGRESS_ERROR = -2
export const PROGRESS_COMPLETE = 100

View File

@@ -0,0 +1,921 @@
import type { ReactNode } from 'react'
import type { CustomFile, FileItem } from '@/models/datasets'
import { act, render, renderHook, waitFor } from '@testing-library/react'
import { beforeEach, describe, expect, it, vi } from 'vitest'
import { ToastContext } from '@/app/components/base/toast'
import { PROGRESS_COMPLETE, PROGRESS_ERROR, PROGRESS_NOT_STARTED } from '../constants'
// Import after mocks
import { useFileUpload } from './use-file-upload'
// Mock notify function
const mockNotify = vi.fn()
const mockClose = vi.fn()
// Mock ToastContext
vi.mock('use-context-selector', async () => {
const actual = await vi.importActual<typeof import('use-context-selector')>('use-context-selector')
return {
...actual,
useContext: vi.fn(() => ({ notify: mockNotify, close: mockClose })),
}
})
// Mock upload service
const mockUpload = vi.fn()
vi.mock('@/service/base', () => ({
upload: (...args: unknown[]) => mockUpload(...args),
}))
// Mock file upload config
const mockFileUploadConfig = {
file_size_limit: 15,
batch_count_limit: 5,
file_upload_limit: 10,
}
const mockSupportTypes = {
allowed_extensions: ['pdf', 'docx', 'txt', 'md'],
}
vi.mock('@/service/use-common', () => ({
useFileUploadConfig: () => ({ data: mockFileUploadConfig }),
useFileSupportTypes: () => ({ data: mockSupportTypes }),
}))
// Mock i18n
vi.mock('react-i18next', () => ({
useTranslation: () => ({
t: (key: string) => key,
}),
}))
// Mock locale
vi.mock('@/context/i18n', () => ({
useLocale: () => 'en-US',
}))
vi.mock('@/i18n-config/language', () => ({
LanguagesSupported: ['en-US', 'zh-Hans'],
}))
// Mock config
vi.mock('@/config', () => ({
IS_CE_EDITION: false,
}))
// Mock file upload error message
vi.mock('@/app/components/base/file-uploader/utils', () => ({
getFileUploadErrorMessage: (_e: unknown, defaultMsg: string) => defaultMsg,
}))
const createWrapper = () => {
return ({ children }: { children: ReactNode }) => (
<ToastContext.Provider value={{ notify: mockNotify, close: mockClose }}>
{children}
</ToastContext.Provider>
)
}
describe('useFileUpload', () => {
const defaultOptions = {
fileList: [] as FileItem[],
prepareFileList: vi.fn(),
onFileUpdate: vi.fn(),
onFileListUpdate: vi.fn(),
onPreview: vi.fn(),
supportBatchUpload: true,
}
beforeEach(() => {
vi.clearAllMocks()
mockUpload.mockReset()
// Default mock to return a resolved promise to avoid unhandled rejections
mockUpload.mockResolvedValue({ id: 'default-id' })
mockNotify.mockReset()
})
describe('initialization', () => {
it('should initialize with default values', () => {
const { result } = renderHook(
() => useFileUpload(defaultOptions),
{ wrapper: createWrapper() },
)
expect(result.current.dragging).toBe(false)
expect(result.current.hideUpload).toBe(false)
expect(result.current.dropRef.current).toBeNull()
expect(result.current.dragRef.current).toBeNull()
expect(result.current.fileUploaderRef.current).toBeNull()
})
it('should set hideUpload true when not batch upload and has files', () => {
const { result } = renderHook(
() => useFileUpload({
...defaultOptions,
supportBatchUpload: false,
fileList: [{ fileID: 'file-1', file: {} as CustomFile, progress: 100 }],
}),
{ wrapper: createWrapper() },
)
expect(result.current.hideUpload).toBe(true)
})
it('should compute acceptTypes correctly', () => {
const { result } = renderHook(
() => useFileUpload(defaultOptions),
{ wrapper: createWrapper() },
)
expect(result.current.acceptTypes).toEqual(['.pdf', '.docx', '.txt', '.md'])
})
it('should compute supportTypesShowNames correctly', () => {
const { result } = renderHook(
() => useFileUpload(defaultOptions),
{ wrapper: createWrapper() },
)
expect(result.current.supportTypesShowNames).toContain('PDF')
expect(result.current.supportTypesShowNames).toContain('DOCX')
expect(result.current.supportTypesShowNames).toContain('TXT')
// 'md' is mapped to 'markdown' in the extensionMap
expect(result.current.supportTypesShowNames).toContain('MARKDOWN')
})
it('should set batch limit to 1 when not batch upload', () => {
const { result } = renderHook(
() => useFileUpload({
...defaultOptions,
supportBatchUpload: false,
}),
{ wrapper: createWrapper() },
)
expect(result.current.fileUploadConfig.batch_count_limit).toBe(1)
expect(result.current.fileUploadConfig.file_upload_limit).toBe(1)
})
})
describe('selectHandle', () => {
it('should trigger click on file input', () => {
const { result } = renderHook(
() => useFileUpload(defaultOptions),
{ wrapper: createWrapper() },
)
const mockClick = vi.fn()
const mockInput = { click: mockClick } as unknown as HTMLInputElement
Object.defineProperty(result.current.fileUploaderRef, 'current', {
value: mockInput,
writable: true,
})
act(() => {
result.current.selectHandle()
})
expect(mockClick).toHaveBeenCalled()
})
it('should do nothing when file input ref is null', () => {
const { result } = renderHook(
() => useFileUpload(defaultOptions),
{ wrapper: createWrapper() },
)
expect(() => {
act(() => {
result.current.selectHandle()
})
}).not.toThrow()
})
})
describe('handlePreview', () => {
it('should call onPreview when file has id', () => {
const onPreview = vi.fn()
const { result } = renderHook(
() => useFileUpload({ ...defaultOptions, onPreview }),
{ wrapper: createWrapper() },
)
const mockFile = { id: 'file-123', name: 'test.pdf', size: 1024 } as CustomFile
act(() => {
result.current.handlePreview(mockFile)
})
expect(onPreview).toHaveBeenCalledWith(mockFile)
})
it('should not call onPreview when file has no id', () => {
const onPreview = vi.fn()
const { result } = renderHook(
() => useFileUpload({ ...defaultOptions, onPreview }),
{ wrapper: createWrapper() },
)
const mockFile = { name: 'test.pdf', size: 1024 } as CustomFile
act(() => {
result.current.handlePreview(mockFile)
})
expect(onPreview).not.toHaveBeenCalled()
})
})
describe('removeFile', () => {
it('should call onFileListUpdate with filtered list', () => {
const onFileListUpdate = vi.fn()
const { result } = renderHook(
() => useFileUpload({ ...defaultOptions, onFileListUpdate }),
{ wrapper: createWrapper() },
)
act(() => {
result.current.removeFile('file-to-remove')
})
expect(onFileListUpdate).toHaveBeenCalled()
})
it('should clear file input value', () => {
const { result } = renderHook(
() => useFileUpload(defaultOptions),
{ wrapper: createWrapper() },
)
const mockInput = { value: 'some-file' } as HTMLInputElement
Object.defineProperty(result.current.fileUploaderRef, 'current', {
value: mockInput,
writable: true,
})
act(() => {
result.current.removeFile('file-123')
})
expect(mockInput.value).toBe('')
})
})
describe('fileChangeHandle', () => {
it('should handle valid files', async () => {
mockUpload.mockResolvedValue({ id: 'uploaded-id' })
const prepareFileList = vi.fn()
const { result } = renderHook(
() => useFileUpload({ ...defaultOptions, prepareFileList }),
{ wrapper: createWrapper() },
)
const mockFile = new File(['content'], 'test.pdf', { type: 'application/pdf' })
const event = {
target: { files: [mockFile] },
} as unknown as React.ChangeEvent<HTMLInputElement>
act(() => {
result.current.fileChangeHandle(event)
})
await waitFor(() => {
expect(prepareFileList).toHaveBeenCalled()
})
})
it('should limit files to batch count', () => {
const prepareFileList = vi.fn()
const { result } = renderHook(
() => useFileUpload({ ...defaultOptions, prepareFileList }),
{ wrapper: createWrapper() },
)
const files = Array.from({ length: 10 }, (_, i) =>
new File(['content'], `file${i}.pdf`, { type: 'application/pdf' }))
const event = {
target: { files },
} as unknown as React.ChangeEvent<HTMLInputElement>
act(() => {
result.current.fileChangeHandle(event)
})
// Should be called with at most batch_count_limit files
if (prepareFileList.mock.calls.length > 0) {
const calledFiles = prepareFileList.mock.calls[0][0]
expect(calledFiles.length).toBeLessThanOrEqual(mockFileUploadConfig.batch_count_limit)
}
})
it('should reject invalid file types', () => {
const { result } = renderHook(
() => useFileUpload(defaultOptions),
{ wrapper: createWrapper() },
)
const mockFile = new File(['content'], 'test.exe', { type: 'application/x-msdownload' })
const event = {
target: { files: [mockFile] },
} as unknown as React.ChangeEvent<HTMLInputElement>
act(() => {
result.current.fileChangeHandle(event)
})
expect(mockNotify).toHaveBeenCalledWith(
expect.objectContaining({ type: 'error' }),
)
})
it('should reject files exceeding size limit', () => {
const { result } = renderHook(
() => useFileUpload(defaultOptions),
{ wrapper: createWrapper() },
)
// Create a file larger than the limit (15MB)
const largeFile = new File([new ArrayBuffer(20 * 1024 * 1024)], 'large.pdf', { type: 'application/pdf' })
const event = {
target: { files: [largeFile] },
} as unknown as React.ChangeEvent<HTMLInputElement>
act(() => {
result.current.fileChangeHandle(event)
})
expect(mockNotify).toHaveBeenCalledWith(
expect.objectContaining({ type: 'error' }),
)
})
it('should handle null files', () => {
const prepareFileList = vi.fn()
const { result } = renderHook(
() => useFileUpload({ ...defaultOptions, prepareFileList }),
{ wrapper: createWrapper() },
)
const event = {
target: { files: null },
} as unknown as React.ChangeEvent<HTMLInputElement>
act(() => {
result.current.fileChangeHandle(event)
})
expect(prepareFileList).not.toHaveBeenCalled()
})
})
describe('drag and drop handlers', () => {
const TestDropzone = ({ options }: { options: typeof defaultOptions }) => {
const {
dropRef,
dragRef,
dragging,
} = useFileUpload(options)
return (
<div>
<div ref={dropRef} data-testid="dropzone">
{dragging && <div ref={dragRef} data-testid="drag-overlay" />}
</div>
<span data-testid="dragging">{String(dragging)}</span>
</div>
)
}
it('should set dragging true on dragenter', async () => {
const { getByTestId } = await act(async () =>
render(
<ToastContext.Provider value={{ notify: mockNotify, close: mockClose }}>
<TestDropzone options={defaultOptions} />
</ToastContext.Provider>,
),
)
const dropzone = getByTestId('dropzone')
await act(async () => {
const dragEnterEvent = new Event('dragenter', { bubbles: true, cancelable: true })
dropzone.dispatchEvent(dragEnterEvent)
})
expect(getByTestId('dragging').textContent).toBe('true')
})
it('should handle dragover event', async () => {
const { getByTestId } = await act(async () =>
render(
<ToastContext.Provider value={{ notify: mockNotify, close: mockClose }}>
<TestDropzone options={defaultOptions} />
</ToastContext.Provider>,
),
)
const dropzone = getByTestId('dropzone')
await act(async () => {
const dragOverEvent = new Event('dragover', { bubbles: true, cancelable: true })
dropzone.dispatchEvent(dragOverEvent)
})
expect(dropzone).toBeInTheDocument()
})
it('should set dragging false on dragleave from drag overlay', async () => {
const { getByTestId, queryByTestId } = await act(async () =>
render(
<ToastContext.Provider value={{ notify: mockNotify, close: mockClose }}>
<TestDropzone options={defaultOptions} />
</ToastContext.Provider>,
),
)
const dropzone = getByTestId('dropzone')
await act(async () => {
const dragEnterEvent = new Event('dragenter', { bubbles: true, cancelable: true })
dropzone.dispatchEvent(dragEnterEvent)
})
expect(getByTestId('dragging').textContent).toBe('true')
const dragOverlay = queryByTestId('drag-overlay')
if (dragOverlay) {
await act(async () => {
const dragLeaveEvent = new Event('dragleave', { bubbles: true, cancelable: true })
Object.defineProperty(dragLeaveEvent, 'target', { value: dragOverlay })
dropzone.dispatchEvent(dragLeaveEvent)
})
}
})
it('should handle drop with files', async () => {
mockUpload.mockResolvedValue({ id: 'uploaded-id' })
const prepareFileList = vi.fn()
const { getByTestId } = await act(async () =>
render(
<ToastContext.Provider value={{ notify: mockNotify, close: mockClose }}>
<TestDropzone options={{ ...defaultOptions, prepareFileList }} />
</ToastContext.Provider>,
),
)
const dropzone = getByTestId('dropzone')
const mockFile = new File(['content'], 'test.pdf', { type: 'application/pdf' })
await act(async () => {
const dropEvent = new Event('drop', { bubbles: true, cancelable: true }) as Event & { dataTransfer: DataTransfer | null }
Object.defineProperty(dropEvent, 'dataTransfer', {
value: {
items: [{
getAsFile: () => mockFile,
webkitGetAsEntry: () => null,
}],
},
})
dropzone.dispatchEvent(dropEvent)
})
await waitFor(() => {
expect(prepareFileList).toHaveBeenCalled()
})
})
it('should handle drop without dataTransfer', async () => {
const prepareFileList = vi.fn()
const { getByTestId } = await act(async () =>
render(
<ToastContext.Provider value={{ notify: mockNotify, close: mockClose }}>
<TestDropzone options={{ ...defaultOptions, prepareFileList }} />
</ToastContext.Provider>,
),
)
const dropzone = getByTestId('dropzone')
await act(async () => {
const dropEvent = new Event('drop', { bubbles: true, cancelable: true }) as Event & { dataTransfer: DataTransfer | null }
Object.defineProperty(dropEvent, 'dataTransfer', { value: null })
dropzone.dispatchEvent(dropEvent)
})
expect(prepareFileList).not.toHaveBeenCalled()
})
it('should limit to single file on drop when supportBatchUpload is false', async () => {
mockUpload.mockResolvedValue({ id: 'uploaded-id' })
const prepareFileList = vi.fn()
const { getByTestId } = await act(async () =>
render(
<ToastContext.Provider value={{ notify: mockNotify, close: mockClose }}>
<TestDropzone options={{ ...defaultOptions, supportBatchUpload: false, prepareFileList }} />
</ToastContext.Provider>,
),
)
const dropzone = getByTestId('dropzone')
const files = [
new File(['content1'], 'test1.pdf', { type: 'application/pdf' }),
new File(['content2'], 'test2.pdf', { type: 'application/pdf' }),
]
await act(async () => {
const dropEvent = new Event('drop', { bubbles: true, cancelable: true }) as Event & { dataTransfer: DataTransfer | null }
Object.defineProperty(dropEvent, 'dataTransfer', {
value: {
items: files.map(f => ({
getAsFile: () => f,
webkitGetAsEntry: () => null,
})),
},
})
dropzone.dispatchEvent(dropEvent)
})
await waitFor(() => {
if (prepareFileList.mock.calls.length > 0) {
const calledFiles = prepareFileList.mock.calls[0][0]
expect(calledFiles.length).toBe(1)
}
})
})
it('should handle drop with FileSystemFileEntry', async () => {
mockUpload.mockResolvedValue({ id: 'uploaded-id' })
const prepareFileList = vi.fn()
const mockFile = new File(['content'], 'test.pdf', { type: 'application/pdf' })
const { getByTestId } = await act(async () =>
render(
<ToastContext.Provider value={{ notify: mockNotify, close: mockClose }}>
<TestDropzone options={{ ...defaultOptions, prepareFileList }} />
</ToastContext.Provider>,
),
)
const dropzone = getByTestId('dropzone')
await act(async () => {
const dropEvent = new Event('drop', { bubbles: true, cancelable: true }) as Event & { dataTransfer: DataTransfer | null }
Object.defineProperty(dropEvent, 'dataTransfer', {
value: {
items: [{
getAsFile: () => mockFile,
webkitGetAsEntry: () => ({
isFile: true,
isDirectory: false,
file: (callback: (file: File) => void) => callback(mockFile),
}),
}],
},
})
dropzone.dispatchEvent(dropEvent)
})
await waitFor(() => {
expect(prepareFileList).toHaveBeenCalled()
})
})
it('should handle drop with FileSystemDirectoryEntry', async () => {
mockUpload.mockResolvedValue({ id: 'uploaded-id' })
const prepareFileList = vi.fn()
const mockFile = new File(['content'], 'nested.pdf', { type: 'application/pdf' })
const { getByTestId } = await act(async () =>
render(
<ToastContext.Provider value={{ notify: mockNotify, close: mockClose }}>
<TestDropzone options={{ ...defaultOptions, prepareFileList }} />
</ToastContext.Provider>,
),
)
const dropzone = getByTestId('dropzone')
await act(async () => {
let callCount = 0
const dropEvent = new Event('drop', { bubbles: true, cancelable: true }) as Event & { dataTransfer: DataTransfer | null }
Object.defineProperty(dropEvent, 'dataTransfer', {
value: {
items: [{
getAsFile: () => null,
webkitGetAsEntry: () => ({
isFile: false,
isDirectory: true,
name: 'folder',
createReader: () => ({
readEntries: (callback: (entries: Array<{ isFile: boolean, isDirectory: boolean, name?: string, file?: (cb: (f: File) => void) => void }>) => void) => {
// First call returns file entry, second call returns empty (signals end)
if (callCount === 0) {
callCount++
callback([{
isFile: true,
isDirectory: false,
name: 'nested.pdf',
file: (cb: (f: File) => void) => cb(mockFile),
}])
}
else {
callback([])
}
},
}),
}),
}],
},
})
dropzone.dispatchEvent(dropEvent)
})
await waitFor(() => {
expect(prepareFileList).toHaveBeenCalled()
})
})
it('should handle drop with empty directory', async () => {
const prepareFileList = vi.fn()
const { getByTestId } = await act(async () =>
render(
<ToastContext.Provider value={{ notify: mockNotify, close: mockClose }}>
<TestDropzone options={{ ...defaultOptions, prepareFileList }} />
</ToastContext.Provider>,
),
)
const dropzone = getByTestId('dropzone')
await act(async () => {
const dropEvent = new Event('drop', { bubbles: true, cancelable: true }) as Event & { dataTransfer: DataTransfer | null }
Object.defineProperty(dropEvent, 'dataTransfer', {
value: {
items: [{
getAsFile: () => null,
webkitGetAsEntry: () => ({
isFile: false,
isDirectory: true,
name: 'empty-folder',
createReader: () => ({
readEntries: (callback: (entries: never[]) => void) => {
callback([])
},
}),
}),
}],
},
})
dropzone.dispatchEvent(dropEvent)
})
// Should not prepare file list if no valid files
await new Promise(resolve => setTimeout(resolve, 100))
})
it('should handle entry that is neither file nor directory', async () => {
const prepareFileList = vi.fn()
const { getByTestId } = await act(async () =>
render(
<ToastContext.Provider value={{ notify: mockNotify, close: mockClose }}>
<TestDropzone options={{ ...defaultOptions, prepareFileList }} />
</ToastContext.Provider>,
),
)
const dropzone = getByTestId('dropzone')
await act(async () => {
const dropEvent = new Event('drop', { bubbles: true, cancelable: true }) as Event & { dataTransfer: DataTransfer | null }
Object.defineProperty(dropEvent, 'dataTransfer', {
value: {
items: [{
getAsFile: () => null,
webkitGetAsEntry: () => ({
isFile: false,
isDirectory: false,
}),
}],
},
})
dropzone.dispatchEvent(dropEvent)
})
// Should not throw and should handle gracefully
await new Promise(resolve => setTimeout(resolve, 100))
})
})
describe('file upload', () => {
it('should call upload with correct parameters', async () => {
mockUpload.mockResolvedValue({ id: 'uploaded-id', name: 'test.pdf' })
const onFileUpdate = vi.fn()
const { result } = renderHook(
() => useFileUpload({ ...defaultOptions, onFileUpdate }),
{ wrapper: createWrapper() },
)
const mockFile = new File(['content'], 'test.pdf', { type: 'application/pdf' })
const event = {
target: { files: [mockFile] },
} as unknown as React.ChangeEvent<HTMLInputElement>
act(() => {
result.current.fileChangeHandle(event)
})
await waitFor(() => {
expect(mockUpload).toHaveBeenCalled()
})
})
it('should update progress during upload', async () => {
let progressCallback: ((e: ProgressEvent) => void) | undefined
mockUpload.mockImplementation(async (options: { onprogress: (e: ProgressEvent) => void }) => {
progressCallback = options.onprogress
return { id: 'uploaded-id' }
})
const onFileUpdate = vi.fn()
const { result } = renderHook(
() => useFileUpload({ ...defaultOptions, onFileUpdate }),
{ wrapper: createWrapper() },
)
const mockFile = new File(['content'], 'test.pdf', { type: 'application/pdf' })
const event = {
target: { files: [mockFile] },
} as unknown as React.ChangeEvent<HTMLInputElement>
act(() => {
result.current.fileChangeHandle(event)
})
await waitFor(() => {
expect(mockUpload).toHaveBeenCalled()
})
if (progressCallback) {
act(() => {
progressCallback!({
lengthComputable: true,
loaded: 50,
total: 100,
} as ProgressEvent)
})
expect(onFileUpdate).toHaveBeenCalled()
}
})
it('should handle upload error', async () => {
mockUpload.mockRejectedValue(new Error('Upload failed'))
const onFileUpdate = vi.fn()
const { result } = renderHook(
() => useFileUpload({ ...defaultOptions, onFileUpdate }),
{ wrapper: createWrapper() },
)
const mockFile = new File(['content'], 'test.pdf', { type: 'application/pdf' })
const event = {
target: { files: [mockFile] },
} as unknown as React.ChangeEvent<HTMLInputElement>
act(() => {
result.current.fileChangeHandle(event)
})
await waitFor(() => {
expect(mockNotify).toHaveBeenCalledWith(
expect.objectContaining({ type: 'error' }),
)
})
})
it('should update file with PROGRESS_COMPLETE on success', async () => {
mockUpload.mockResolvedValue({ id: 'uploaded-id', name: 'test.pdf' })
const onFileUpdate = vi.fn()
const { result } = renderHook(
() => useFileUpload({ ...defaultOptions, onFileUpdate }),
{ wrapper: createWrapper() },
)
const mockFile = new File(['content'], 'test.pdf', { type: 'application/pdf' })
const event = {
target: { files: [mockFile] },
} as unknown as React.ChangeEvent<HTMLInputElement>
act(() => {
result.current.fileChangeHandle(event)
})
await waitFor(() => {
const completeCalls = onFileUpdate.mock.calls.filter(
([, progress]) => progress === PROGRESS_COMPLETE,
)
expect(completeCalls.length).toBeGreaterThan(0)
})
})
it('should update file with PROGRESS_ERROR on failure', async () => {
mockUpload.mockRejectedValue(new Error('Upload failed'))
const onFileUpdate = vi.fn()
const { result } = renderHook(
() => useFileUpload({ ...defaultOptions, onFileUpdate }),
{ wrapper: createWrapper() },
)
const mockFile = new File(['content'], 'test.pdf', { type: 'application/pdf' })
const event = {
target: { files: [mockFile] },
} as unknown as React.ChangeEvent<HTMLInputElement>
act(() => {
result.current.fileChangeHandle(event)
})
await waitFor(() => {
const errorCalls = onFileUpdate.mock.calls.filter(
([, progress]) => progress === PROGRESS_ERROR,
)
expect(errorCalls.length).toBeGreaterThan(0)
})
})
})
describe('file count validation', () => {
it('should reject when total files exceed limit', () => {
const existingFiles: FileItem[] = Array.from({ length: 8 }, (_, i) => ({
fileID: `existing-${i}`,
file: { name: `existing-${i}.pdf`, size: 1024 } as CustomFile,
progress: 100,
}))
const { result } = renderHook(
() => useFileUpload({
...defaultOptions,
fileList: existingFiles,
}),
{ wrapper: createWrapper() },
)
const files = Array.from({ length: 5 }, (_, i) =>
new File(['content'], `new-${i}.pdf`, { type: 'application/pdf' }))
const event = {
target: { files },
} as unknown as React.ChangeEvent<HTMLInputElement>
act(() => {
result.current.fileChangeHandle(event)
})
expect(mockNotify).toHaveBeenCalledWith(
expect.objectContaining({ type: 'error' }),
)
})
})
describe('progress constants', () => {
it('should use PROGRESS_NOT_STARTED for new files', async () => {
mockUpload.mockResolvedValue({ id: 'file-id' })
const prepareFileList = vi.fn()
const { result } = renderHook(
() => useFileUpload({ ...defaultOptions, prepareFileList }),
{ wrapper: createWrapper() },
)
const mockFile = new File(['content'], 'test.pdf', { type: 'application/pdf' })
const event = {
target: { files: [mockFile] },
} as unknown as React.ChangeEvent<HTMLInputElement>
act(() => {
result.current.fileChangeHandle(event)
})
await waitFor(() => {
if (prepareFileList.mock.calls.length > 0) {
const files = prepareFileList.mock.calls[0][0]
expect(files[0].progress).toBe(PROGRESS_NOT_STARTED)
}
})
})
})
})

View File

@@ -0,0 +1,351 @@
'use client'
import type { RefObject } from 'react'
import type { CustomFile as File, FileItem } from '@/models/datasets'
import { useCallback, useEffect, useMemo, useRef, useState } from 'react'
import { useTranslation } from 'react-i18next'
import { useContext } from 'use-context-selector'
import { getFileUploadErrorMessage } from '@/app/components/base/file-uploader/utils'
import { ToastContext } from '@/app/components/base/toast'
import { IS_CE_EDITION } from '@/config'
import { useLocale } from '@/context/i18n'
import { LanguagesSupported } from '@/i18n-config/language'
import { upload } from '@/service/base'
import { useFileSupportTypes, useFileUploadConfig } from '@/service/use-common'
import { getFileExtension } from '@/utils/format'
import { PROGRESS_COMPLETE, PROGRESS_ERROR, PROGRESS_NOT_STARTED } from '../constants'
export type FileUploadConfig = {
file_size_limit: number
batch_count_limit: number
file_upload_limit: number
}
export type UseFileUploadOptions = {
fileList: FileItem[]
prepareFileList: (files: FileItem[]) => void
onFileUpdate: (fileItem: FileItem, progress: number, list: FileItem[]) => void
onFileListUpdate?: (files: FileItem[]) => void
onPreview: (file: File) => void
supportBatchUpload?: boolean
/**
* Optional list of allowed file extensions. If not provided, fetches from API.
* Pass this when you need custom extension filtering instead of using the global config.
*/
allowedExtensions?: string[]
}
export type UseFileUploadReturn = {
// Refs
dropRef: RefObject<HTMLDivElement | null>
dragRef: RefObject<HTMLDivElement | null>
fileUploaderRef: RefObject<HTMLInputElement | null>
// State
dragging: boolean
// Config
fileUploadConfig: FileUploadConfig
acceptTypes: string[]
supportTypesShowNames: string
hideUpload: boolean
// Handlers
selectHandle: () => void
fileChangeHandle: (e: React.ChangeEvent<HTMLInputElement>) => void
removeFile: (fileID: string) => void
handlePreview: (file: File) => void
}
type FileWithPath = {
relativePath?: string
} & File
export const useFileUpload = ({
fileList,
prepareFileList,
onFileUpdate,
onFileListUpdate,
onPreview,
supportBatchUpload = false,
allowedExtensions,
}: UseFileUploadOptions): UseFileUploadReturn => {
const { t } = useTranslation()
const { notify } = useContext(ToastContext)
const locale = useLocale()
const [dragging, setDragging] = useState(false)
const dropRef = useRef<HTMLDivElement>(null)
const dragRef = useRef<HTMLDivElement>(null)
const fileUploaderRef = useRef<HTMLInputElement>(null)
const fileListRef = useRef<FileItem[]>([])
const hideUpload = !supportBatchUpload && fileList.length > 0
const { data: fileUploadConfigResponse } = useFileUploadConfig()
const { data: supportFileTypesResponse } = useFileSupportTypes()
// Use provided allowedExtensions or fetch from API
const supportTypes = useMemo(
() => allowedExtensions ?? supportFileTypesResponse?.allowed_extensions ?? [],
[allowedExtensions, supportFileTypesResponse?.allowed_extensions],
)
const supportTypesShowNames = useMemo(() => {
const extensionMap: { [key: string]: string } = {
md: 'markdown',
pptx: 'pptx',
htm: 'html',
xlsx: 'xlsx',
docx: 'docx',
}
return [...supportTypes]
.map(item => extensionMap[item] || item)
.map(item => item.toLowerCase())
.filter((item, index, self) => self.indexOf(item) === index)
.map(item => item.toUpperCase())
.join(locale !== LanguagesSupported[1] ? ', ' : '、 ')
}, [supportTypes, locale])
const acceptTypes = useMemo(() => supportTypes.map((ext: string) => `.${ext}`), [supportTypes])
const fileUploadConfig = useMemo(() => ({
file_size_limit: fileUploadConfigResponse?.file_size_limit ?? 15,
batch_count_limit: supportBatchUpload ? (fileUploadConfigResponse?.batch_count_limit ?? 5) : 1,
file_upload_limit: supportBatchUpload ? (fileUploadConfigResponse?.file_upload_limit ?? 5) : 1,
}), [fileUploadConfigResponse, supportBatchUpload])
const isValid = useCallback((file: File) => {
const { size } = file
const ext = `.${getFileExtension(file.name)}`
const isValidType = acceptTypes.includes(ext.toLowerCase())
if (!isValidType)
notify({ type: 'error', message: t('stepOne.uploader.validation.typeError', { ns: 'datasetCreation' }) })
const isValidSize = size <= fileUploadConfig.file_size_limit * 1024 * 1024
if (!isValidSize)
notify({ type: 'error', message: t('stepOne.uploader.validation.size', { ns: 'datasetCreation', size: fileUploadConfig.file_size_limit }) })
return isValidType && isValidSize
}, [fileUploadConfig, notify, t, acceptTypes])
const fileUpload = useCallback(async (fileItem: FileItem): Promise<FileItem> => {
const formData = new FormData()
formData.append('file', fileItem.file)
const onProgress = (e: ProgressEvent) => {
if (e.lengthComputable) {
const percent = Math.floor(e.loaded / e.total * 100)
onFileUpdate(fileItem, percent, fileListRef.current)
}
}
return upload({
xhr: new XMLHttpRequest(),
data: formData,
onprogress: onProgress,
}, false, undefined, '?source=datasets')
.then((res) => {
const completeFile = {
fileID: fileItem.fileID,
file: res as unknown as File,
progress: PROGRESS_NOT_STARTED,
}
const index = fileListRef.current.findIndex(item => item.fileID === fileItem.fileID)
fileListRef.current[index] = completeFile
onFileUpdate(completeFile, PROGRESS_COMPLETE, fileListRef.current)
return Promise.resolve({ ...completeFile })
})
.catch((e) => {
const errorMessage = getFileUploadErrorMessage(e, t('stepOne.uploader.failed', { ns: 'datasetCreation' }), t)
notify({ type: 'error', message: errorMessage })
onFileUpdate(fileItem, PROGRESS_ERROR, fileListRef.current)
return Promise.resolve({ ...fileItem })
})
.finally()
}, [notify, onFileUpdate, t])
const uploadBatchFiles = useCallback((bFiles: FileItem[]) => {
bFiles.forEach(bf => (bf.progress = 0))
return Promise.all(bFiles.map(fileUpload))
}, [fileUpload])
const uploadMultipleFiles = useCallback(async (files: FileItem[]) => {
const batchCountLimit = fileUploadConfig.batch_count_limit
const length = files.length
let start = 0
let end = 0
while (start < length) {
if (start + batchCountLimit > length)
end = length
else
end = start + batchCountLimit
const bFiles = files.slice(start, end)
await uploadBatchFiles(bFiles)
start = end
}
}, [fileUploadConfig, uploadBatchFiles])
const initialUpload = useCallback((files: File[]) => {
const filesCountLimit = fileUploadConfig.file_upload_limit
if (!files.length)
return false
if (files.length + fileList.length > filesCountLimit && !IS_CE_EDITION) {
notify({ type: 'error', message: t('stepOne.uploader.validation.filesNumber', { ns: 'datasetCreation', filesNumber: filesCountLimit }) })
return false
}
const preparedFiles = files.map((file, index) => ({
fileID: `file${index}-${Date.now()}`,
file,
progress: PROGRESS_NOT_STARTED,
}))
const newFiles = [...fileListRef.current, ...preparedFiles]
prepareFileList(newFiles)
fileListRef.current = newFiles
uploadMultipleFiles(preparedFiles)
}, [prepareFileList, uploadMultipleFiles, notify, t, fileList, fileUploadConfig])
const traverseFileEntry = useCallback(
(entry: FileSystemEntry, prefix = ''): Promise<FileWithPath[]> => {
return new Promise((resolve) => {
if (entry.isFile) {
(entry as FileSystemFileEntry).file((file: FileWithPath) => {
file.relativePath = `${prefix}${file.name}`
resolve([file])
})
}
else if (entry.isDirectory) {
const reader = (entry as FileSystemDirectoryEntry).createReader()
const entries: FileSystemEntry[] = []
const read = () => {
reader.readEntries(async (results: FileSystemEntry[]) => {
if (!results.length) {
const files = await Promise.all(
entries.map(ent =>
traverseFileEntry(ent, `${prefix}${entry.name}/`),
),
)
resolve(files.flat())
}
else {
entries.push(...results)
read()
}
})
}
read()
}
else {
resolve([])
}
})
},
[],
)
const handleDragEnter = useCallback((e: DragEvent) => {
e.preventDefault()
e.stopPropagation()
if (e.target !== dragRef.current)
setDragging(true)
}, [])
const handleDragOver = useCallback((e: DragEvent) => {
e.preventDefault()
e.stopPropagation()
}, [])
const handleDragLeave = useCallback((e: DragEvent) => {
e.preventDefault()
e.stopPropagation()
if (e.target === dragRef.current)
setDragging(false)
}, [])
const handleDrop = useCallback(
async (e: DragEvent) => {
e.preventDefault()
e.stopPropagation()
setDragging(false)
if (!e.dataTransfer)
return
const nested = await Promise.all(
Array.from(e.dataTransfer.items).map((it) => {
const entry = (it as DataTransferItem & { webkitGetAsEntry?: () => FileSystemEntry | null }).webkitGetAsEntry?.()
if (entry)
return traverseFileEntry(entry)
const f = it.getAsFile?.()
return f ? Promise.resolve([f as FileWithPath]) : Promise.resolve([])
}),
)
let files = nested.flat()
if (!supportBatchUpload)
files = files.slice(0, 1)
files = files.slice(0, fileUploadConfig.batch_count_limit)
const valid = files.filter(isValid)
initialUpload(valid)
},
[initialUpload, isValid, supportBatchUpload, traverseFileEntry, fileUploadConfig],
)
const selectHandle = useCallback(() => {
if (fileUploaderRef.current)
fileUploaderRef.current.click()
}, [])
const removeFile = useCallback((fileID: string) => {
if (fileUploaderRef.current)
fileUploaderRef.current.value = ''
fileListRef.current = fileListRef.current.filter(item => item.fileID !== fileID)
onFileListUpdate?.([...fileListRef.current])
}, [onFileListUpdate])
const fileChangeHandle = useCallback((e: React.ChangeEvent<HTMLInputElement>) => {
let files = Array.from(e.target.files ?? []) as File[]
files = files.slice(0, fileUploadConfig.batch_count_limit)
initialUpload(files.filter(isValid))
}, [isValid, initialUpload, fileUploadConfig])
const handlePreview = useCallback((file: File) => {
if (file?.id)
onPreview(file)
}, [onPreview])
useEffect(() => {
const dropArea = dropRef.current
dropArea?.addEventListener('dragenter', handleDragEnter)
dropArea?.addEventListener('dragover', handleDragOver)
dropArea?.addEventListener('dragleave', handleDragLeave)
dropArea?.addEventListener('drop', handleDrop)
return () => {
dropArea?.removeEventListener('dragenter', handleDragEnter)
dropArea?.removeEventListener('dragover', handleDragOver)
dropArea?.removeEventListener('dragleave', handleDragLeave)
dropArea?.removeEventListener('drop', handleDrop)
}
}, [handleDragEnter, handleDragOver, handleDragLeave, handleDrop])
return {
// Refs
dropRef,
dragRef,
fileUploaderRef,
// State
dragging,
// Config
fileUploadConfig,
acceptTypes,
supportTypesShowNames,
hideUpload,
// Handlers
selectHandle,
fileChangeHandle,
removeFile,
handlePreview,
}
}

View File

@@ -0,0 +1,278 @@
import type { CustomFile as File, FileItem } from '@/models/datasets'
import { fireEvent, render, screen } from '@testing-library/react'
import { beforeEach, describe, expect, it, vi } from 'vitest'
import { PROGRESS_NOT_STARTED } from './constants'
import FileUploader from './index'
// Mock react-i18next
vi.mock('react-i18next', () => ({
useTranslation: () => ({
t: (key: string) => {
const translations: Record<string, string> = {
'stepOne.uploader.title': 'Upload Files',
'stepOne.uploader.button': 'Drag and drop files, or',
'stepOne.uploader.buttonSingleFile': 'Drag and drop file, or',
'stepOne.uploader.browse': 'Browse',
'stepOne.uploader.tip': 'Supports various file types',
}
return translations[key] || key
},
}),
}))
// Mock ToastContext
const mockNotify = vi.fn()
vi.mock('use-context-selector', async () => {
const actual = await vi.importActual<typeof import('use-context-selector')>('use-context-selector')
return {
...actual,
useContext: vi.fn(() => ({ notify: mockNotify })),
}
})
// Mock services
vi.mock('@/service/base', () => ({
upload: vi.fn().mockResolvedValue({ id: 'uploaded-id' }),
}))
vi.mock('@/service/use-common', () => ({
useFileUploadConfig: () => ({
data: { file_size_limit: 15, batch_count_limit: 5, file_upload_limit: 10 },
}),
useFileSupportTypes: () => ({
data: { allowed_extensions: ['pdf', 'docx', 'txt'] },
}),
}))
vi.mock('@/context/i18n', () => ({
useLocale: () => 'en-US',
}))
vi.mock('@/i18n-config/language', () => ({
LanguagesSupported: ['en-US', 'zh-Hans'],
}))
vi.mock('@/config', () => ({
IS_CE_EDITION: false,
}))
vi.mock('@/app/components/base/file-uploader/utils', () => ({
getFileUploadErrorMessage: () => 'Upload error',
}))
// Mock theme
vi.mock('@/hooks/use-theme', () => ({
default: () => ({ theme: 'light' }),
}))
vi.mock('@/types/app', () => ({
Theme: { dark: 'dark', light: 'light' },
}))
// Mock DocumentFileIcon - uses relative path from file-list-item.tsx
vi.mock('@/app/components/datasets/common/document-file-icon', () => ({
default: ({ extension }: { extension: string }) => <div data-testid="document-icon">{extension}</div>,
}))
// Mock SimplePieChart
vi.mock('next/dynamic', () => ({
default: () => {
const Component = ({ percentage }: { percentage: number }) => (
<div data-testid="pie-chart">
{percentage}
%
</div>
)
return Component
},
}))
describe('FileUploader', () => {
const createMockFile = (overrides: Partial<File> = {}): File => ({
name: 'test.pdf',
size: 1024,
type: 'application/pdf',
...overrides,
} as File)
const createMockFileItem = (overrides: Partial<FileItem> = {}): FileItem => ({
fileID: `file-${Date.now()}`,
file: createMockFile(overrides.file as Partial<File>),
progress: PROGRESS_NOT_STARTED,
...overrides,
})
const defaultProps = {
fileList: [] as FileItem[],
prepareFileList: vi.fn(),
onFileUpdate: vi.fn(),
onFileListUpdate: vi.fn(),
onPreview: vi.fn(),
supportBatchUpload: true,
}
beforeEach(() => {
vi.clearAllMocks()
})
describe('rendering', () => {
it('should render the component', () => {
render(<FileUploader {...defaultProps} />)
expect(screen.getByText('Upload Files')).toBeInTheDocument()
})
it('should render dropzone when no files', () => {
render(<FileUploader {...defaultProps} />)
expect(screen.getByText(/Drag and drop files/i)).toBeInTheDocument()
})
it('should render browse button', () => {
render(<FileUploader {...defaultProps} />)
expect(screen.getByText('Browse')).toBeInTheDocument()
})
it('should apply custom title className', () => {
render(<FileUploader {...defaultProps} titleClassName="custom-class" />)
const title = screen.getByText('Upload Files')
expect(title).toHaveClass('custom-class')
})
})
describe('file list rendering', () => {
it('should render file items when fileList has items', () => {
const fileList = [
createMockFileItem({ file: createMockFile({ name: 'file1.pdf' }) }),
createMockFileItem({ file: createMockFile({ name: 'file2.pdf' }) }),
]
render(<FileUploader {...defaultProps} fileList={fileList} />)
expect(screen.getByText('file1.pdf')).toBeInTheDocument()
expect(screen.getByText('file2.pdf')).toBeInTheDocument()
})
it('should render document icons for files', () => {
const fileList = [createMockFileItem()]
render(<FileUploader {...defaultProps} fileList={fileList} />)
expect(screen.getByTestId('document-icon')).toBeInTheDocument()
})
})
describe('batch upload mode', () => {
it('should show dropzone with batch upload enabled', () => {
render(<FileUploader {...defaultProps} supportBatchUpload={true} />)
expect(screen.getByText(/Drag and drop files/i)).toBeInTheDocument()
})
it('should show single file text when batch upload disabled', () => {
render(<FileUploader {...defaultProps} supportBatchUpload={false} />)
expect(screen.getByText(/Drag and drop file/i)).toBeInTheDocument()
})
it('should hide dropzone when not batch upload and has files', () => {
const fileList = [createMockFileItem()]
render(<FileUploader {...defaultProps} supportBatchUpload={false} fileList={fileList} />)
expect(screen.queryByText(/Drag and drop/i)).not.toBeInTheDocument()
})
})
describe('event handlers', () => {
it('should handle file preview click', () => {
const onPreview = vi.fn()
const fileItem = createMockFileItem({
file: createMockFile({ id: 'file-id' } as Partial<File>),
})
const { container } = render(<FileUploader {...defaultProps} fileList={[fileItem]} onPreview={onPreview} />)
// Find the file list item container by its class pattern
const fileElement = container.querySelector('[class*="flex h-12"]')
if (fileElement)
fireEvent.click(fileElement)
expect(onPreview).toHaveBeenCalledWith(fileItem.file)
})
it('should handle file remove click', () => {
const onFileListUpdate = vi.fn()
const fileItem = createMockFileItem()
const { container } = render(
<FileUploader {...defaultProps} fileList={[fileItem]} onFileListUpdate={onFileListUpdate} />,
)
// Find the delete button (the span with cursor-pointer containing the icon)
const deleteButtons = container.querySelectorAll('[class*="cursor-pointer"]')
// Get the last one which should be the delete button (not the browse label)
const deleteButton = deleteButtons[deleteButtons.length - 1]
if (deleteButton)
fireEvent.click(deleteButton)
expect(onFileListUpdate).toHaveBeenCalled()
})
it('should handle browse button click', () => {
render(<FileUploader {...defaultProps} />)
// The browse label should trigger file input click
const browseLabel = screen.getByText('Browse')
expect(browseLabel).toHaveClass('cursor-pointer')
})
})
describe('upload progress', () => {
it('should show progress chart for uploading files', () => {
const fileItem = createMockFileItem({ progress: 50 })
render(<FileUploader {...defaultProps} fileList={[fileItem]} />)
expect(screen.getByTestId('pie-chart')).toBeInTheDocument()
expect(screen.getByText('50%')).toBeInTheDocument()
})
it('should not show progress chart for completed files', () => {
const fileItem = createMockFileItem({ progress: 100 })
render(<FileUploader {...defaultProps} fileList={[fileItem]} />)
expect(screen.queryByTestId('pie-chart')).not.toBeInTheDocument()
})
it('should not show progress chart for not started files', () => {
const fileItem = createMockFileItem({ progress: PROGRESS_NOT_STARTED })
render(<FileUploader {...defaultProps} fileList={[fileItem]} />)
expect(screen.queryByTestId('pie-chart')).not.toBeInTheDocument()
})
})
describe('multiple files', () => {
it('should render all files in the list', () => {
const fileList = [
createMockFileItem({ fileID: 'f1', file: createMockFile({ name: 'doc1.pdf' }) }),
createMockFileItem({ fileID: 'f2', file: createMockFile({ name: 'doc2.docx' }) }),
createMockFileItem({ fileID: 'f3', file: createMockFile({ name: 'doc3.txt' }) }),
]
render(<FileUploader {...defaultProps} fileList={fileList} />)
expect(screen.getByText('doc1.pdf')).toBeInTheDocument()
expect(screen.getByText('doc2.docx')).toBeInTheDocument()
expect(screen.getByText('doc3.txt')).toBeInTheDocument()
})
})
describe('styling', () => {
it('should have correct container width', () => {
const { container } = render(<FileUploader {...defaultProps} />)
const wrapper = container.firstChild as HTMLElement
expect(wrapper).toHaveClass('w-[640px]')
})
it('should have proper spacing', () => {
const { container } = render(<FileUploader {...defaultProps} />)
const wrapper = container.firstChild as HTMLElement
expect(wrapper).toHaveClass('mb-5')
})
})
})

View File

@@ -1,23 +1,10 @@
'use client'
import type { CustomFile as File, FileItem } from '@/models/datasets'
import { RiDeleteBinLine, RiUploadCloud2Line } from '@remixicon/react'
import * as React from 'react'
import { useCallback, useEffect, useMemo, useRef, useState } from 'react'
import { useTranslation } from 'react-i18next'
import { useContext } from 'use-context-selector'
import { getFileUploadErrorMessage } from '@/app/components/base/file-uploader/utils'
import SimplePieChart from '@/app/components/base/simple-pie-chart'
import { ToastContext } from '@/app/components/base/toast'
import { IS_CE_EDITION } from '@/config'
import { useLocale } from '@/context/i18n'
import useTheme from '@/hooks/use-theme'
import { LanguagesSupported } from '@/i18n-config/language'
import { upload } from '@/service/base'
import { useFileSupportTypes, useFileUploadConfig } from '@/service/use-common'
import { Theme } from '@/types/app'
import { cn } from '@/utils/classnames'
import DocumentFileIcon from '../../common/document-file-icon'
import FileListItem from './components/file-list-item'
import UploadDropzone from './components/upload-dropzone'
import { useFileUpload } from './hooks/use-file-upload'
type IFileUploaderProps = {
fileList: FileItem[]
@@ -39,358 +26,62 @@ const FileUploader = ({
supportBatchUpload = false,
}: IFileUploaderProps) => {
const { t } = useTranslation()
const { notify } = useContext(ToastContext)
const locale = useLocale()
const [dragging, setDragging] = useState(false)
const dropRef = useRef<HTMLDivElement>(null)
const dragRef = useRef<HTMLDivElement>(null)
const fileUploader = useRef<HTMLInputElement>(null)
const hideUpload = !supportBatchUpload && fileList.length > 0
const { data: fileUploadConfigResponse } = useFileUploadConfig()
const { data: supportFileTypesResponse } = useFileSupportTypes()
const supportTypes = supportFileTypesResponse?.allowed_extensions || []
const supportTypesShowNames = (() => {
const extensionMap: { [key: string]: string } = {
md: 'markdown',
pptx: 'pptx',
htm: 'html',
xlsx: 'xlsx',
docx: 'docx',
}
return [...supportTypes]
.map(item => extensionMap[item] || item) // map to standardized extension
.map(item => item.toLowerCase()) // convert to lower case
.filter((item, index, self) => self.indexOf(item) === index) // remove duplicates
.map(item => item.toUpperCase()) // convert to upper case
.join(locale !== LanguagesSupported[1] ? ', ' : '、 ')
})()
const ACCEPTS = supportTypes.map((ext: string) => `.${ext}`)
const fileUploadConfig = useMemo(() => ({
file_size_limit: fileUploadConfigResponse?.file_size_limit ?? 15,
batch_count_limit: supportBatchUpload ? (fileUploadConfigResponse?.batch_count_limit ?? 5) : 1,
file_upload_limit: supportBatchUpload ? (fileUploadConfigResponse?.file_upload_limit ?? 5) : 1,
}), [fileUploadConfigResponse, supportBatchUpload])
const fileListRef = useRef<FileItem[]>([])
// utils
const getFileType = (currentFile: File) => {
if (!currentFile)
return ''
const arr = currentFile.name.split('.')
return arr[arr.length - 1]
}
const getFileSize = (size: number) => {
if (size / 1024 < 10)
return `${(size / 1024).toFixed(2)}KB`
return `${(size / 1024 / 1024).toFixed(2)}MB`
}
const isValid = useCallback((file: File) => {
const { size } = file
const ext = `.${getFileType(file)}`
const isValidType = ACCEPTS.includes(ext.toLowerCase())
if (!isValidType)
notify({ type: 'error', message: t('stepOne.uploader.validation.typeError', { ns: 'datasetCreation' }) })
const isValidSize = size <= fileUploadConfig.file_size_limit * 1024 * 1024
if (!isValidSize)
notify({ type: 'error', message: t('stepOne.uploader.validation.size', { ns: 'datasetCreation', size: fileUploadConfig.file_size_limit }) })
return isValidType && isValidSize
}, [fileUploadConfig, notify, t, ACCEPTS])
const fileUpload = useCallback(async (fileItem: FileItem): Promise<FileItem> => {
const formData = new FormData()
formData.append('file', fileItem.file)
const onProgress = (e: ProgressEvent) => {
if (e.lengthComputable) {
const percent = Math.floor(e.loaded / e.total * 100)
onFileUpdate(fileItem, percent, fileListRef.current)
}
}
return upload({
xhr: new XMLHttpRequest(),
data: formData,
onprogress: onProgress,
}, false, undefined, '?source=datasets')
.then((res) => {
const completeFile = {
fileID: fileItem.fileID,
file: res as unknown as File,
progress: -1,
}
const index = fileListRef.current.findIndex(item => item.fileID === fileItem.fileID)
fileListRef.current[index] = completeFile
onFileUpdate(completeFile, 100, fileListRef.current)
return Promise.resolve({ ...completeFile })
})
.catch((e) => {
const errorMessage = getFileUploadErrorMessage(e, t('stepOne.uploader.failed', { ns: 'datasetCreation' }), t)
notify({ type: 'error', message: errorMessage })
onFileUpdate(fileItem, -2, fileListRef.current)
return Promise.resolve({ ...fileItem })
})
.finally()
}, [fileListRef, notify, onFileUpdate, t])
const uploadBatchFiles = useCallback((bFiles: FileItem[]) => {
bFiles.forEach(bf => (bf.progress = 0))
return Promise.all(bFiles.map(fileUpload))
}, [fileUpload])
const uploadMultipleFiles = useCallback(async (files: FileItem[]) => {
const batchCountLimit = fileUploadConfig.batch_count_limit
const length = files.length
let start = 0
let end = 0
while (start < length) {
if (start + batchCountLimit > length)
end = length
else
end = start + batchCountLimit
const bFiles = files.slice(start, end)
await uploadBatchFiles(bFiles)
start = end
}
}, [fileUploadConfig, uploadBatchFiles])
const initialUpload = useCallback((files: File[]) => {
const filesCountLimit = fileUploadConfig.file_upload_limit
if (!files.length)
return false
if (files.length + fileList.length > filesCountLimit && !IS_CE_EDITION) {
notify({ type: 'error', message: t('stepOne.uploader.validation.filesNumber', { ns: 'datasetCreation', filesNumber: filesCountLimit }) })
return false
}
const preparedFiles = files.map((file, index) => ({
fileID: `file${index}-${Date.now()}`,
file,
progress: -1,
}))
const newFiles = [...fileListRef.current, ...preparedFiles]
prepareFileList(newFiles)
fileListRef.current = newFiles
uploadMultipleFiles(preparedFiles)
}, [prepareFileList, uploadMultipleFiles, notify, t, fileList, fileUploadConfig])
const handleDragEnter = (e: DragEvent) => {
e.preventDefault()
e.stopPropagation()
if (e.target !== dragRef.current)
setDragging(true)
}
const handleDragOver = (e: DragEvent) => {
e.preventDefault()
e.stopPropagation()
}
const handleDragLeave = (e: DragEvent) => {
e.preventDefault()
e.stopPropagation()
if (e.target === dragRef.current)
setDragging(false)
}
type FileWithPath = {
relativePath?: string
} & File
const traverseFileEntry = useCallback(
(entry: any, prefix = ''): Promise<FileWithPath[]> => {
return new Promise((resolve) => {
if (entry.isFile) {
entry.file((file: FileWithPath) => {
file.relativePath = `${prefix}${file.name}`
resolve([file])
})
}
else if (entry.isDirectory) {
const reader = entry.createReader()
const entries: any[] = []
const read = () => {
reader.readEntries(async (results: FileSystemEntry[]) => {
if (!results.length) {
const files = await Promise.all(
entries.map(ent =>
traverseFileEntry(ent, `${prefix}${entry.name}/`),
),
)
resolve(files.flat())
}
else {
entries.push(...results)
read()
}
})
}
read()
}
else {
resolve([])
}
})
},
[],
)
const handleDrop = useCallback(
async (e: DragEvent) => {
e.preventDefault()
e.stopPropagation()
setDragging(false)
if (!e.dataTransfer)
return
const nested = await Promise.all(
Array.from(e.dataTransfer.items).map((it) => {
const entry = (it as any).webkitGetAsEntry?.()
if (entry)
return traverseFileEntry(entry)
const f = it.getAsFile?.()
return f ? Promise.resolve([f]) : Promise.resolve([])
}),
)
let files = nested.flat()
if (!supportBatchUpload)
files = files.slice(0, 1)
files = files.slice(0, fileUploadConfig.batch_count_limit)
const valid = files.filter(isValid)
initialUpload(valid)
},
[initialUpload, isValid, supportBatchUpload, traverseFileEntry, fileUploadConfig],
)
const selectHandle = () => {
if (fileUploader.current)
fileUploader.current.click()
}
const removeFile = (fileID: string) => {
if (fileUploader.current)
fileUploader.current.value = ''
fileListRef.current = fileListRef.current.filter(item => item.fileID !== fileID)
onFileListUpdate?.([...fileListRef.current])
}
const fileChangeHandle = useCallback((e: React.ChangeEvent<HTMLInputElement>) => {
let files = Array.from(e.target.files ?? []) as File[]
files = files.slice(0, fileUploadConfig.batch_count_limit)
initialUpload(files.filter(isValid))
}, [isValid, initialUpload, fileUploadConfig])
const { theme } = useTheme()
const chartColor = useMemo(() => theme === Theme.dark ? '#5289ff' : '#296dff', [theme])
useEffect(() => {
dropRef.current?.addEventListener('dragenter', handleDragEnter)
dropRef.current?.addEventListener('dragover', handleDragOver)
dropRef.current?.addEventListener('dragleave', handleDragLeave)
dropRef.current?.addEventListener('drop', handleDrop)
return () => {
dropRef.current?.removeEventListener('dragenter', handleDragEnter)
dropRef.current?.removeEventListener('dragover', handleDragOver)
dropRef.current?.removeEventListener('dragleave', handleDragLeave)
dropRef.current?.removeEventListener('drop', handleDrop)
}
}, [handleDrop])
const {
dropRef,
dragRef,
fileUploaderRef,
dragging,
fileUploadConfig,
acceptTypes,
supportTypesShowNames,
hideUpload,
selectHandle,
fileChangeHandle,
removeFile,
handlePreview,
} = useFileUpload({
fileList,
prepareFileList,
onFileUpdate,
onFileListUpdate,
onPreview,
supportBatchUpload,
})
return (
<div className="mb-5 w-[640px]">
<div className={cn('mb-1 text-sm font-semibold leading-6 text-text-secondary', titleClassName)}>
{t('stepOne.uploader.title', { ns: 'datasetCreation' })}
</div>
{!hideUpload && (
<input
ref={fileUploader}
id="fileUploader"
className="hidden"
type="file"
multiple={supportBatchUpload}
accept={ACCEPTS.join(',')}
onChange={fileChangeHandle}
<UploadDropzone
dropRef={dropRef}
dragRef={dragRef}
fileUploaderRef={fileUploaderRef}
dragging={dragging}
supportBatchUpload={supportBatchUpload}
supportTypesShowNames={supportTypesShowNames}
fileUploadConfig={fileUploadConfig}
acceptTypes={acceptTypes}
onSelectFile={selectHandle}
onFileChange={fileChangeHandle}
/>
)}
<div className={cn('mb-1 text-sm font-semibold leading-6 text-text-secondary', titleClassName)}>{t('stepOne.uploader.title', { ns: 'datasetCreation' })}</div>
{!hideUpload && (
<div ref={dropRef} className={cn('relative mb-2 box-border flex min-h-20 max-w-[640px] flex-col items-center justify-center gap-1 rounded-xl border border-dashed border-components-dropzone-border bg-components-dropzone-bg px-4 py-3 text-xs leading-4 text-text-tertiary', dragging && 'border-components-dropzone-border-accent bg-components-dropzone-bg-accent')}>
<div className="flex min-h-5 items-center justify-center text-sm leading-4 text-text-secondary">
<RiUploadCloud2Line className="mr-2 size-5" />
<span>
{supportBatchUpload ? t('stepOne.uploader.button', { ns: 'datasetCreation' }) : t('stepOne.uploader.buttonSingleFile', { ns: 'datasetCreation' })}
{supportTypes.length > 0 && (
<label className="ml-1 cursor-pointer text-text-accent" onClick={selectHandle}>{t('stepOne.uploader.browse', { ns: 'datasetCreation' })}</label>
)}
</span>
</div>
<div>
{t('stepOne.uploader.tip', {
ns: 'datasetCreation',
size: fileUploadConfig.file_size_limit,
supportTypes: supportTypesShowNames,
batchCount: fileUploadConfig.batch_count_limit,
totalCount: fileUploadConfig.file_upload_limit,
})}
</div>
{dragging && <div ref={dragRef} className="absolute left-0 top-0 h-full w-full" />}
{fileList.length > 0 && (
<div className="max-w-[640px] cursor-default space-y-1">
{fileList.map(fileItem => (
<FileListItem
key={fileItem.fileID}
fileItem={fileItem}
onPreview={handlePreview}
onRemove={removeFile}
/>
))}
</div>
)}
<div className="max-w-[640px] cursor-default space-y-1">
{fileList.map((fileItem, index) => (
<div
key={`${fileItem.fileID}-${index}`}
onClick={() => fileItem.file?.id && onPreview(fileItem.file)}
className={cn(
'flex h-12 max-w-[640px] items-center rounded-lg border border-components-panel-border bg-components-panel-on-panel-item-bg text-xs leading-3 text-text-tertiary shadow-xs',
// 'border-state-destructive-border bg-state-destructive-hover',
)}
>
<div className="flex w-12 shrink-0 items-center justify-center">
<DocumentFileIcon
size="xl"
className="shrink-0"
name={fileItem.file.name}
extension={getFileType(fileItem.file)}
/>
</div>
<div className="flex shrink grow flex-col gap-0.5">
<div className="flex w-full">
<div className="w-0 grow truncate text-sm leading-4 text-text-secondary">{fileItem.file.name}</div>
</div>
<div className="w-full truncate leading-3 text-text-tertiary">
<span className="uppercase">{getFileType(fileItem.file)}</span>
<span className="px-1 text-text-quaternary">·</span>
<span>{getFileSize(fileItem.file.size)}</span>
{/* <span className='px-1 text-text-quaternary'>·</span>
<span>10k characters</span> */}
</div>
</div>
<div className="flex w-16 shrink-0 items-center justify-end gap-1 pr-3">
{/* <span className="flex justify-center items-center w-6 h-6 cursor-pointer">
<RiErrorWarningFill className='size-4 text-text-warning' />
</span> */}
{(fileItem.progress < 100 && fileItem.progress >= 0) && (
// <div className={s.percent}>{`${fileItem.progress}%`}</div>
<SimplePieChart percentage={fileItem.progress} stroke={chartColor} fill={chartColor} animationDuration={0} />
)}
<span
className="flex h-6 w-6 cursor-pointer items-center justify-center"
onClick={(e) => {
e.stopPropagation()
removeFile(fileItem.fileID)
}}
>
<RiDeleteBinLine className="size-4 text-text-tertiary" />
</span>
</div>
</div>
))}
</div>
</div>
)
}

View File

@@ -0,0 +1,262 @@
import type { SimpleDocumentDetail } from '@/models/datasets'
import { render } from '@testing-library/react'
import { describe, expect, it } from 'vitest'
import { DataSourceType } from '@/models/datasets'
import { DatasourceType } from '@/models/pipeline'
import DocumentSourceIcon from './document-source-icon'
const createMockDoc = (overrides: Record<string, unknown> = {}): SimpleDocumentDetail => ({
id: 'doc-1',
position: 1,
data_source_type: DataSourceType.FILE,
data_source_info: {},
data_source_detail_dict: {},
dataset_process_rule_id: 'rule-1',
dataset_id: 'dataset-1',
batch: 'batch-1',
name: 'test-document.txt',
created_from: 'web',
created_by: 'user-1',
created_at: Date.now(),
tokens: 100,
indexing_status: 'completed',
error: null,
enabled: true,
disabled_at: null,
disabled_by: null,
archived: false,
archived_reason: null,
archived_by: null,
archived_at: null,
updated_at: Date.now(),
doc_type: null,
doc_metadata: undefined,
doc_language: 'en',
display_status: 'available',
word_count: 100,
hit_count: 10,
doc_form: 'text_model',
...overrides,
}) as unknown as SimpleDocumentDetail
describe('DocumentSourceIcon', () => {
describe('Rendering', () => {
it('should render without crashing', () => {
const doc = createMockDoc()
const { container } = render(<DocumentSourceIcon doc={doc} />)
expect(container.firstChild).toBeInTheDocument()
})
})
describe('Local File Icon', () => {
it('should render FileTypeIcon for FILE data source type', () => {
const doc = createMockDoc({
data_source_type: DataSourceType.FILE,
data_source_info: {
upload_file: { extension: 'pdf' },
},
})
const { container } = render(<DocumentSourceIcon doc={doc} fileType="pdf" />)
const icon = container.querySelector('svg, img')
expect(icon).toBeInTheDocument()
})
it('should render FileTypeIcon for localFile data source type', () => {
const doc = createMockDoc({
data_source_type: DatasourceType.localFile,
created_from: 'rag-pipeline',
data_source_info: {
extension: 'docx',
},
})
const { container } = render(<DocumentSourceIcon doc={doc} />)
const icon = container.querySelector('svg, img')
expect(icon).toBeInTheDocument()
})
it('should use extension from upload_file for legacy data source', () => {
const doc = createMockDoc({
data_source_type: DataSourceType.FILE,
created_from: 'web',
data_source_info: {
upload_file: { extension: 'txt' },
},
})
const { container } = render(<DocumentSourceIcon doc={doc} />)
expect(container.firstChild).toBeInTheDocument()
})
it('should use fileType prop as fallback for extension', () => {
const doc = createMockDoc({
data_source_type: DataSourceType.FILE,
created_from: 'web',
data_source_info: {},
})
const { container } = render(<DocumentSourceIcon doc={doc} fileType="csv" />)
expect(container.firstChild).toBeInTheDocument()
})
})
describe('Notion Icon', () => {
it('should render NotionIcon for NOTION data source type', () => {
const doc = createMockDoc({
data_source_type: DataSourceType.NOTION,
created_from: 'web',
data_source_info: {
notion_page_icon: 'https://notion.so/icon.png',
},
})
const { container } = render(<DocumentSourceIcon doc={doc} />)
expect(container.firstChild).toBeInTheDocument()
})
it('should render NotionIcon for onlineDocument data source type', () => {
const doc = createMockDoc({
data_source_type: DatasourceType.onlineDocument,
created_from: 'rag-pipeline',
data_source_info: {
page: { page_icon: 'https://notion.so/icon.png' },
},
})
const { container } = render(<DocumentSourceIcon doc={doc} />)
expect(container.firstChild).toBeInTheDocument()
})
it('should use page_icon for rag-pipeline created documents', () => {
const doc = createMockDoc({
data_source_type: DataSourceType.NOTION,
created_from: 'rag-pipeline',
data_source_info: {
page: { page_icon: 'https://notion.so/custom-icon.png' },
},
})
const { container } = render(<DocumentSourceIcon doc={doc} />)
expect(container.firstChild).toBeInTheDocument()
})
})
describe('Web Crawl Icon', () => {
it('should render globe icon for WEB data source type', () => {
const doc = createMockDoc({
data_source_type: DataSourceType.WEB,
})
const { container } = render(<DocumentSourceIcon doc={doc} />)
const icon = container.querySelector('svg')
expect(icon).toBeInTheDocument()
expect(icon).toHaveClass('mr-1.5')
expect(icon).toHaveClass('size-4')
})
it('should render globe icon for websiteCrawl data source type', () => {
const doc = createMockDoc({
data_source_type: DatasourceType.websiteCrawl,
})
const { container } = render(<DocumentSourceIcon doc={doc} />)
const icon = container.querySelector('svg')
expect(icon).toBeInTheDocument()
})
})
describe('Online Drive Icon', () => {
it('should render FileTypeIcon for onlineDrive data source type', () => {
const doc = createMockDoc({
data_source_type: DatasourceType.onlineDrive,
data_source_info: {
name: 'document.xlsx',
},
})
const { container } = render(<DocumentSourceIcon doc={doc} />)
expect(container.firstChild).toBeInTheDocument()
})
it('should extract extension from file name', () => {
const doc = createMockDoc({
data_source_type: DatasourceType.onlineDrive,
data_source_info: {
name: 'spreadsheet.xlsx',
},
})
const { container } = render(<DocumentSourceIcon doc={doc} />)
expect(container.firstChild).toBeInTheDocument()
})
it('should handle file name without extension', () => {
const doc = createMockDoc({
data_source_type: DatasourceType.onlineDrive,
data_source_info: {
name: 'noextension',
},
})
const { container } = render(<DocumentSourceIcon doc={doc} />)
expect(container.firstChild).toBeInTheDocument()
})
it('should handle empty file name', () => {
const doc = createMockDoc({
data_source_type: DatasourceType.onlineDrive,
data_source_info: {
name: '',
},
})
const { container } = render(<DocumentSourceIcon doc={doc} />)
expect(container.firstChild).toBeInTheDocument()
})
it('should handle hidden files (starting with dot)', () => {
const doc = createMockDoc({
data_source_type: DatasourceType.onlineDrive,
data_source_info: {
name: '.gitignore',
},
})
const { container } = render(<DocumentSourceIcon doc={doc} />)
expect(container.firstChild).toBeInTheDocument()
})
})
describe('Unknown Data Source Type', () => {
it('should return null for unknown data source type', () => {
const doc = createMockDoc({
data_source_type: 'unknown',
})
const { container } = render(<DocumentSourceIcon doc={doc} />)
expect(container.firstChild).toBeNull()
})
})
describe('Edge Cases', () => {
it('should handle undefined data_source_info', () => {
const doc = createMockDoc({
data_source_type: DataSourceType.FILE,
data_source_info: undefined,
})
const { container } = render(<DocumentSourceIcon doc={doc} />)
expect(container.firstChild).toBeInTheDocument()
})
it('should memoize the component', () => {
const doc = createMockDoc()
const { rerender, container } = render(<DocumentSourceIcon doc={doc} />)
const firstRender = container.innerHTML
rerender(<DocumentSourceIcon doc={doc} />)
expect(container.innerHTML).toBe(firstRender)
})
})
})

View File

@@ -0,0 +1,100 @@
import type { FC } from 'react'
import type { LegacyDataSourceInfo, LocalFileInfo, OnlineDocumentInfo, OnlineDriveInfo, SimpleDocumentDetail } from '@/models/datasets'
import { RiGlobalLine } from '@remixicon/react'
import * as React from 'react'
import FileTypeIcon from '@/app/components/base/file-uploader/file-type-icon'
import NotionIcon from '@/app/components/base/notion-icon'
import { extensionToFileType } from '@/app/components/datasets/hit-testing/utils/extension-to-file-type'
import { DataSourceType } from '@/models/datasets'
import { DatasourceType } from '@/models/pipeline'
type DocumentSourceIconProps = {
doc: SimpleDocumentDetail
fileType?: string
}
const isLocalFile = (dataSourceType: DataSourceType | DatasourceType) => {
return dataSourceType === DatasourceType.localFile || dataSourceType === DataSourceType.FILE
}
const isOnlineDocument = (dataSourceType: DataSourceType | DatasourceType) => {
return dataSourceType === DatasourceType.onlineDocument || dataSourceType === DataSourceType.NOTION
}
const isWebsiteCrawl = (dataSourceType: DataSourceType | DatasourceType) => {
return dataSourceType === DatasourceType.websiteCrawl || dataSourceType === DataSourceType.WEB
}
const isOnlineDrive = (dataSourceType: DataSourceType | DatasourceType) => {
return dataSourceType === DatasourceType.onlineDrive
}
const isCreateFromRAGPipeline = (createdFrom: string) => {
return createdFrom === 'rag-pipeline'
}
const getFileExtension = (fileName: string): string => {
if (!fileName)
return ''
const parts = fileName.split('.')
if (parts.length <= 1 || (parts[0] === '' && parts.length === 2))
return ''
return parts[parts.length - 1].toLowerCase()
}
const DocumentSourceIcon: FC<DocumentSourceIconProps> = React.memo(({
doc,
fileType,
}) => {
if (isOnlineDocument(doc.data_source_type)) {
return (
<NotionIcon
className="mr-1.5"
type="page"
src={
isCreateFromRAGPipeline(doc.created_from)
? (doc.data_source_info as OnlineDocumentInfo).page.page_icon
: (doc.data_source_info as LegacyDataSourceInfo).notion_page_icon
}
/>
)
}
if (isLocalFile(doc.data_source_type)) {
return (
<FileTypeIcon
type={
extensionToFileType(
isCreateFromRAGPipeline(doc.created_from)
? (doc?.data_source_info as LocalFileInfo)?.extension
: ((doc?.data_source_info as LegacyDataSourceInfo)?.upload_file?.extension ?? fileType),
)
}
className="mr-1.5"
/>
)
}
if (isOnlineDrive(doc.data_source_type)) {
return (
<FileTypeIcon
type={
extensionToFileType(
getFileExtension((doc?.data_source_info as unknown as OnlineDriveInfo)?.name),
)
}
className="mr-1.5"
/>
)
}
if (isWebsiteCrawl(doc.data_source_type)) {
return <RiGlobalLine className="mr-1.5 size-4" />
}
return null
})
DocumentSourceIcon.displayName = 'DocumentSourceIcon'
export default DocumentSourceIcon

View File

@@ -0,0 +1,342 @@
import type { ReactNode } from 'react'
import type { SimpleDocumentDetail } from '@/models/datasets'
import { QueryClient, QueryClientProvider } from '@tanstack/react-query'
import { fireEvent, render, screen } from '@testing-library/react'
import { beforeEach, describe, expect, it, vi } from 'vitest'
import { DataSourceType } from '@/models/datasets'
import DocumentTableRow from './document-table-row'
const mockPush = vi.fn()
vi.mock('next/navigation', () => ({
useRouter: () => ({
push: mockPush,
}),
}))
const createTestQueryClient = () => new QueryClient({
defaultOptions: {
queries: { retry: false, gcTime: 0 },
mutations: { retry: false },
},
})
const createWrapper = () => {
const queryClient = createTestQueryClient()
return ({ children }: { children: ReactNode }) => (
<QueryClientProvider client={queryClient}>
<table>
<tbody>
{children}
</tbody>
</table>
</QueryClientProvider>
)
}
type LocalDoc = SimpleDocumentDetail & { percent?: number }
const createMockDoc = (overrides: Record<string, unknown> = {}): LocalDoc => ({
id: 'doc-1',
position: 1,
data_source_type: DataSourceType.FILE,
data_source_info: {},
data_source_detail_dict: {
upload_file: { name: 'test.txt', extension: 'txt' },
},
dataset_process_rule_id: 'rule-1',
dataset_id: 'dataset-1',
batch: 'batch-1',
name: 'test-document.txt',
created_from: 'web',
created_by: 'user-1',
created_at: Date.now(),
tokens: 100,
indexing_status: 'completed',
error: null,
enabled: true,
disabled_at: null,
disabled_by: null,
archived: false,
archived_reason: null,
archived_by: null,
archived_at: null,
updated_at: Date.now(),
doc_type: null,
doc_metadata: undefined,
doc_language: 'en',
display_status: 'available',
word_count: 500,
hit_count: 10,
doc_form: 'text_model',
...overrides,
}) as unknown as LocalDoc
// Helper to find the custom checkbox div (Checkbox component renders as a div, not a native checkbox)
const findCheckbox = (container: HTMLElement): HTMLElement | null => {
return container.querySelector('[class*="shadow-xs"]')
}
describe('DocumentTableRow', () => {
const defaultProps = {
doc: createMockDoc(),
index: 0,
datasetId: 'dataset-1',
isSelected: false,
isGeneralMode: true,
isQAMode: false,
embeddingAvailable: true,
selectedIds: [],
onSelectOne: vi.fn(),
onSelectedIdChange: vi.fn(),
onShowRenameModal: vi.fn(),
onUpdate: vi.fn(),
}
beforeEach(() => {
vi.clearAllMocks()
})
describe('Rendering', () => {
it('should render without crashing', () => {
render(<DocumentTableRow {...defaultProps} />, { wrapper: createWrapper() })
expect(screen.getByText('test-document.txt')).toBeInTheDocument()
})
it('should render index number correctly', () => {
render(<DocumentTableRow {...defaultProps} index={5} />, { wrapper: createWrapper() })
expect(screen.getByText('6')).toBeInTheDocument()
})
it('should render document name with tooltip', () => {
render(<DocumentTableRow {...defaultProps} />, { wrapper: createWrapper() })
expect(screen.getByText('test-document.txt')).toBeInTheDocument()
})
it('should render checkbox element', () => {
const { container } = render(<DocumentTableRow {...defaultProps} />, { wrapper: createWrapper() })
const checkbox = findCheckbox(container)
expect(checkbox).toBeInTheDocument()
})
})
describe('Selection', () => {
it('should show check icon when isSelected is true', () => {
const { container } = render(<DocumentTableRow {...defaultProps} isSelected />, { wrapper: createWrapper() })
// When selected, the checkbox should have a check icon (RiCheckLine svg)
const checkbox = findCheckbox(container)
expect(checkbox).toBeInTheDocument()
const checkIcon = checkbox?.querySelector('svg')
expect(checkIcon).toBeInTheDocument()
})
it('should not show check icon when isSelected is false', () => {
const { container } = render(<DocumentTableRow {...defaultProps} isSelected={false} />, { wrapper: createWrapper() })
const checkbox = findCheckbox(container)
expect(checkbox).toBeInTheDocument()
// When not selected, there should be no check icon inside the checkbox
const checkIcon = checkbox?.querySelector('svg')
expect(checkIcon).not.toBeInTheDocument()
})
it('should call onSelectOne when checkbox is clicked', () => {
const onSelectOne = vi.fn()
const { container } = render(<DocumentTableRow {...defaultProps} onSelectOne={onSelectOne} />, { wrapper: createWrapper() })
const checkbox = findCheckbox(container)
if (checkbox) {
fireEvent.click(checkbox)
expect(onSelectOne).toHaveBeenCalledWith('doc-1')
}
})
it('should stop propagation when checkbox container is clicked', () => {
const { container } = render(<DocumentTableRow {...defaultProps} />, { wrapper: createWrapper() })
// Click the div containing the checkbox (which has stopPropagation)
const checkboxContainer = container.querySelector('td')?.querySelector('div')
if (checkboxContainer) {
fireEvent.click(checkboxContainer)
expect(mockPush).not.toHaveBeenCalled()
}
})
})
describe('Row Navigation', () => {
it('should navigate to document detail on row click', () => {
render(<DocumentTableRow {...defaultProps} />, { wrapper: createWrapper() })
const row = screen.getByRole('row')
fireEvent.click(row)
expect(mockPush).toHaveBeenCalledWith('/datasets/dataset-1/documents/doc-1')
})
it('should navigate with correct datasetId and documentId', () => {
render(
<DocumentTableRow
{...defaultProps}
datasetId="custom-dataset"
doc={createMockDoc({ id: 'custom-doc' })}
/>,
{ wrapper: createWrapper() },
)
const row = screen.getByRole('row')
fireEvent.click(row)
expect(mockPush).toHaveBeenCalledWith('/datasets/custom-dataset/documents/custom-doc')
})
})
describe('Word Count Display', () => {
it('should display word count less than 1000 as is', () => {
const doc = createMockDoc({ word_count: 500 })
render(<DocumentTableRow {...defaultProps} doc={doc} />, { wrapper: createWrapper() })
expect(screen.getByText('500')).toBeInTheDocument()
})
it('should display word count 1000 or more in k format', () => {
const doc = createMockDoc({ word_count: 1500 })
render(<DocumentTableRow {...defaultProps} doc={doc} />, { wrapper: createWrapper() })
expect(screen.getByText('1.5k')).toBeInTheDocument()
})
it('should display 0 with empty style when word_count is 0', () => {
const doc = createMockDoc({ word_count: 0 })
const { container } = render(<DocumentTableRow {...defaultProps} doc={doc} />, { wrapper: createWrapper() })
const zeroCells = container.querySelectorAll('.text-text-tertiary')
expect(zeroCells.length).toBeGreaterThan(0)
})
it('should handle undefined word_count', () => {
const doc = createMockDoc({ word_count: undefined as unknown as number })
const { container } = render(<DocumentTableRow {...defaultProps} doc={doc} />, { wrapper: createWrapper() })
expect(container).toBeInTheDocument()
})
})
describe('Hit Count Display', () => {
it('should display hit count less than 1000 as is', () => {
const doc = createMockDoc({ hit_count: 100 })
render(<DocumentTableRow {...defaultProps} doc={doc} />, { wrapper: createWrapper() })
expect(screen.getByText('100')).toBeInTheDocument()
})
it('should display hit count 1000 or more in k format', () => {
const doc = createMockDoc({ hit_count: 2500 })
render(<DocumentTableRow {...defaultProps} doc={doc} />, { wrapper: createWrapper() })
expect(screen.getByText('2.5k')).toBeInTheDocument()
})
it('should display 0 with empty style when hit_count is 0', () => {
const doc = createMockDoc({ hit_count: 0 })
const { container } = render(<DocumentTableRow {...defaultProps} doc={doc} />, { wrapper: createWrapper() })
const zeroCells = container.querySelectorAll('.text-text-tertiary')
expect(zeroCells.length).toBeGreaterThan(0)
})
})
describe('Chunking Mode', () => {
it('should render ChunkingModeLabel with general mode', () => {
render(<DocumentTableRow {...defaultProps} isGeneralMode isQAMode={false} />, { wrapper: createWrapper() })
// ChunkingModeLabel should be rendered
expect(screen.getByRole('row')).toBeInTheDocument()
})
it('should render ChunkingModeLabel with QA mode', () => {
render(<DocumentTableRow {...defaultProps} isGeneralMode={false} isQAMode />, { wrapper: createWrapper() })
expect(screen.getByRole('row')).toBeInTheDocument()
})
})
describe('Summary Status', () => {
it('should render SummaryStatus when summary_index_status is present', () => {
const doc = createMockDoc({ summary_index_status: 'completed' })
render(<DocumentTableRow {...defaultProps} doc={doc} />, { wrapper: createWrapper() })
expect(screen.getByRole('row')).toBeInTheDocument()
})
it('should not render SummaryStatus when summary_index_status is absent', () => {
const doc = createMockDoc({ summary_index_status: undefined })
render(<DocumentTableRow {...defaultProps} doc={doc} />, { wrapper: createWrapper() })
expect(screen.getByRole('row')).toBeInTheDocument()
})
})
describe('Rename Action', () => {
it('should call onShowRenameModal when rename button is clicked', () => {
const onShowRenameModal = vi.fn()
const { container } = render(
<DocumentTableRow {...defaultProps} onShowRenameModal={onShowRenameModal} />,
{ wrapper: createWrapper() },
)
// Find the rename button by finding the RiEditLine icon's parent
const renameButtons = container.querySelectorAll('.cursor-pointer.rounded-md')
if (renameButtons.length > 0) {
fireEvent.click(renameButtons[0])
expect(onShowRenameModal).toHaveBeenCalledWith(defaultProps.doc)
expect(mockPush).not.toHaveBeenCalled()
}
})
})
describe('Operations', () => {
it('should pass selectedIds to Operations component', () => {
render(<DocumentTableRow {...defaultProps} selectedIds={['doc-1', 'doc-2']} />, { wrapper: createWrapper() })
expect(screen.getByRole('row')).toBeInTheDocument()
})
it('should pass onSelectedIdChange to Operations component', () => {
const onSelectedIdChange = vi.fn()
render(<DocumentTableRow {...defaultProps} onSelectedIdChange={onSelectedIdChange} />, { wrapper: createWrapper() })
expect(screen.getByRole('row')).toBeInTheDocument()
})
})
describe('Document Source Icon', () => {
it('should render with FILE data source type', () => {
const doc = createMockDoc({ data_source_type: DataSourceType.FILE })
render(<DocumentTableRow {...defaultProps} doc={doc} />, { wrapper: createWrapper() })
expect(screen.getByRole('row')).toBeInTheDocument()
})
it('should render with NOTION data source type', () => {
const doc = createMockDoc({
data_source_type: DataSourceType.NOTION,
data_source_info: { notion_page_icon: 'icon.png' },
})
render(<DocumentTableRow {...defaultProps} doc={doc} />, { wrapper: createWrapper() })
expect(screen.getByRole('row')).toBeInTheDocument()
})
it('should render with WEB data source type', () => {
const doc = createMockDoc({ data_source_type: DataSourceType.WEB })
render(<DocumentTableRow {...defaultProps} doc={doc} />, { wrapper: createWrapper() })
expect(screen.getByRole('row')).toBeInTheDocument()
})
})
describe('Edge Cases', () => {
it('should handle document with very long name', () => {
const doc = createMockDoc({ name: `${'a'.repeat(500)}.txt` })
render(<DocumentTableRow {...defaultProps} doc={doc} />, { wrapper: createWrapper() })
expect(screen.getByRole('row')).toBeInTheDocument()
})
it('should handle document with special characters in name', () => {
const doc = createMockDoc({ name: '<script>test</script>.txt' })
render(<DocumentTableRow {...defaultProps} doc={doc} />, { wrapper: createWrapper() })
expect(screen.getByText('<script>test</script>.txt')).toBeInTheDocument()
})
it('should memoize the component', () => {
const wrapper = createWrapper()
const { rerender } = render(<DocumentTableRow {...defaultProps} />, { wrapper })
rerender(<DocumentTableRow {...defaultProps} />)
expect(screen.getByRole('row')).toBeInTheDocument()
})
})
})

View File

@@ -0,0 +1,152 @@
import type { FC } from 'react'
import type { SimpleDocumentDetail } from '@/models/datasets'
import { RiEditLine } from '@remixicon/react'
import { pick } from 'es-toolkit/object'
import { useRouter } from 'next/navigation'
import * as React from 'react'
import { useCallback } from 'react'
import { useTranslation } from 'react-i18next'
import Checkbox from '@/app/components/base/checkbox'
import Tooltip from '@/app/components/base/tooltip'
import ChunkingModeLabel from '@/app/components/datasets/common/chunking-mode-label'
import Operations from '@/app/components/datasets/documents/components/operations'
import SummaryStatus from '@/app/components/datasets/documents/detail/completed/common/summary-status'
import StatusItem from '@/app/components/datasets/documents/status-item'
import useTimestamp from '@/hooks/use-timestamp'
import { DataSourceType } from '@/models/datasets'
import { formatNumber } from '@/utils/format'
import DocumentSourceIcon from './document-source-icon'
import { renderTdValue } from './utils'
type LocalDoc = SimpleDocumentDetail & { percent?: number }
type DocumentTableRowProps = {
doc: LocalDoc
index: number
datasetId: string
isSelected: boolean
isGeneralMode: boolean
isQAMode: boolean
embeddingAvailable: boolean
selectedIds: string[]
onSelectOne: (docId: string) => void
onSelectedIdChange: (ids: string[]) => void
onShowRenameModal: (doc: LocalDoc) => void
onUpdate: () => void
}
const renderCount = (count: number | undefined) => {
if (!count)
return renderTdValue(0, true)
if (count < 1000)
return count
return `${formatNumber((count / 1000).toFixed(1))}k`
}
const DocumentTableRow: FC<DocumentTableRowProps> = React.memo(({
doc,
index,
datasetId,
isSelected,
isGeneralMode,
isQAMode,
embeddingAvailable,
selectedIds,
onSelectOne,
onSelectedIdChange,
onShowRenameModal,
onUpdate,
}) => {
const { t } = useTranslation()
const { formatTime } = useTimestamp()
const router = useRouter()
const isFile = doc.data_source_type === DataSourceType.FILE
const fileType = isFile ? doc.data_source_detail_dict?.upload_file?.extension : ''
const handleRowClick = useCallback(() => {
router.push(`/datasets/${datasetId}/documents/${doc.id}`)
}, [router, datasetId, doc.id])
const handleCheckboxClick = useCallback((e: React.MouseEvent) => {
e.stopPropagation()
}, [])
const handleRenameClick = useCallback((e: React.MouseEvent) => {
e.stopPropagation()
onShowRenameModal(doc)
}, [doc, onShowRenameModal])
return (
<tr
className="h-8 cursor-pointer border-b border-divider-subtle hover:bg-background-default-hover"
onClick={handleRowClick}
>
<td className="text-left align-middle text-xs text-text-tertiary">
<div className="flex items-center" onClick={handleCheckboxClick}>
<Checkbox
className="mr-2 shrink-0"
checked={isSelected}
onCheck={() => onSelectOne(doc.id)}
/>
{index + 1}
</div>
</td>
<td>
<div className="group mr-6 flex max-w-[460px] items-center hover:mr-0">
<div className="flex shrink-0 items-center">
<DocumentSourceIcon doc={doc} fileType={fileType} />
</div>
<Tooltip popupContent={doc.name}>
<span className="grow-1 truncate text-sm">{doc.name}</span>
</Tooltip>
{doc.summary_index_status && (
<div className="ml-1 hidden shrink-0 group-hover:flex">
<SummaryStatus status={doc.summary_index_status} />
</div>
)}
<div className="hidden shrink-0 group-hover:ml-auto group-hover:flex">
<Tooltip popupContent={t('list.table.rename', { ns: 'datasetDocuments' })}>
<div
className="cursor-pointer rounded-md p-1 hover:bg-state-base-hover"
onClick={handleRenameClick}
>
<RiEditLine className="h-4 w-4 text-text-tertiary" />
</div>
</Tooltip>
</div>
</div>
</td>
<td>
<ChunkingModeLabel
isGeneralMode={isGeneralMode}
isQAMode={isQAMode}
/>
</td>
<td>{renderCount(doc.word_count)}</td>
<td>{renderCount(doc.hit_count)}</td>
<td className="text-[13px] text-text-secondary">
{formatTime(doc.created_at, t('dateTimeFormat', { ns: 'datasetHitTesting' }) as string)}
</td>
<td>
<StatusItem status={doc.display_status} />
</td>
<td>
<Operations
selectedIds={selectedIds}
onSelectedIdChange={onSelectedIdChange}
embeddingAvailable={embeddingAvailable}
datasetId={datasetId}
detail={pick(doc, ['name', 'enabled', 'archived', 'id', 'data_source_type', 'doc_form', 'display_status'])}
onUpdate={onUpdate}
/>
</td>
</tr>
)
})
DocumentTableRow.displayName = 'DocumentTableRow'
export default DocumentTableRow

View File

@@ -0,0 +1,4 @@
export { default as DocumentSourceIcon } from './document-source-icon'
export { default as DocumentTableRow } from './document-table-row'
export { default as SortHeader } from './sort-header'
export { renderTdValue } from './utils'

View File

@@ -0,0 +1,124 @@
import { fireEvent, render, screen } from '@testing-library/react'
import { describe, expect, it, vi } from 'vitest'
import SortHeader from './sort-header'
describe('SortHeader', () => {
const defaultProps = {
field: 'name' as const,
label: 'File Name',
currentSortField: null,
sortOrder: 'desc' as const,
onSort: vi.fn(),
}
describe('rendering', () => {
it('should render the label', () => {
render(<SortHeader {...defaultProps} />)
expect(screen.getByText('File Name')).toBeInTheDocument()
})
it('should render the sort icon', () => {
const { container } = render(<SortHeader {...defaultProps} />)
const icon = container.querySelector('svg')
expect(icon).toBeInTheDocument()
})
})
describe('inactive state', () => {
it('should have disabled text color when not active', () => {
const { container } = render(<SortHeader {...defaultProps} />)
const icon = container.querySelector('svg')
expect(icon).toHaveClass('text-text-disabled')
})
it('should not be rotated when not active', () => {
const { container } = render(<SortHeader {...defaultProps} />)
const icon = container.querySelector('svg')
expect(icon).not.toHaveClass('rotate-180')
})
})
describe('active state', () => {
it('should have tertiary text color when active', () => {
const { container } = render(
<SortHeader {...defaultProps} currentSortField="name" />,
)
const icon = container.querySelector('svg')
expect(icon).toHaveClass('text-text-tertiary')
})
it('should not be rotated when active and desc', () => {
const { container } = render(
<SortHeader {...defaultProps} currentSortField="name" sortOrder="desc" />,
)
const icon = container.querySelector('svg')
expect(icon).not.toHaveClass('rotate-180')
})
it('should be rotated when active and asc', () => {
const { container } = render(
<SortHeader {...defaultProps} currentSortField="name" sortOrder="asc" />,
)
const icon = container.querySelector('svg')
expect(icon).toHaveClass('rotate-180')
})
})
describe('interaction', () => {
it('should call onSort when clicked', () => {
const onSort = vi.fn()
render(<SortHeader {...defaultProps} onSort={onSort} />)
fireEvent.click(screen.getByText('File Name'))
expect(onSort).toHaveBeenCalledWith('name')
})
it('should call onSort with correct field', () => {
const onSort = vi.fn()
render(<SortHeader {...defaultProps} field="word_count" onSort={onSort} />)
fireEvent.click(screen.getByText('File Name'))
expect(onSort).toHaveBeenCalledWith('word_count')
})
})
describe('different fields', () => {
it('should work with word_count field', () => {
render(
<SortHeader
{...defaultProps}
field="word_count"
label="Words"
currentSortField="word_count"
/>,
)
expect(screen.getByText('Words')).toBeInTheDocument()
})
it('should work with hit_count field', () => {
render(
<SortHeader
{...defaultProps}
field="hit_count"
label="Hit Count"
currentSortField="hit_count"
/>,
)
expect(screen.getByText('Hit Count')).toBeInTheDocument()
})
it('should work with created_at field', () => {
render(
<SortHeader
{...defaultProps}
field="created_at"
label="Upload Time"
currentSortField="created_at"
/>,
)
expect(screen.getByText('Upload Time')).toBeInTheDocument()
})
})
})

View File

@@ -0,0 +1,44 @@
import type { FC } from 'react'
import type { SortField, SortOrder } from '../hooks'
import { RiArrowDownLine } from '@remixicon/react'
import * as React from 'react'
import { cn } from '@/utils/classnames'
type SortHeaderProps = {
field: Exclude<SortField, null>
label: string
currentSortField: SortField
sortOrder: SortOrder
onSort: (field: SortField) => void
}
const SortHeader: FC<SortHeaderProps> = React.memo(({
field,
label,
currentSortField,
sortOrder,
onSort,
}) => {
const isActive = currentSortField === field
const isDesc = isActive && sortOrder === 'desc'
return (
<div
className="flex cursor-pointer items-center hover:text-text-secondary"
onClick={() => onSort(field)}
>
{label}
<RiArrowDownLine
className={cn(
'ml-0.5 h-3 w-3 transition-all',
isActive ? 'text-text-tertiary' : 'text-text-disabled',
isActive && !isDesc ? 'rotate-180' : '',
)}
/>
</div>
)
})
SortHeader.displayName = 'SortHeader'
export default SortHeader

View File

@@ -0,0 +1,90 @@
import { render, screen } from '@testing-library/react'
import { describe, expect, it } from 'vitest'
import { renderTdValue } from './utils'
describe('renderTdValue', () => {
describe('Rendering', () => {
it('should render string value correctly', () => {
const { container } = render(<>{renderTdValue('test value')}</>)
expect(screen.getByText('test value')).toBeInTheDocument()
expect(container.querySelector('div')).toHaveClass('text-text-secondary')
})
it('should render number value correctly', () => {
const { container } = render(<>{renderTdValue(42)}</>)
expect(screen.getByText('42')).toBeInTheDocument()
expect(container.querySelector('div')).toHaveClass('text-text-secondary')
})
it('should render zero correctly', () => {
const { container } = render(<>{renderTdValue(0)}</>)
expect(screen.getByText('0')).toBeInTheDocument()
expect(container.querySelector('div')).toHaveClass('text-text-secondary')
})
})
describe('Null and undefined handling', () => {
it('should render dash for null value', () => {
render(<>{renderTdValue(null)}</>)
expect(screen.getByText('-')).toBeInTheDocument()
})
it('should render dash for null value with empty style', () => {
const { container } = render(<>{renderTdValue(null, true)}</>)
expect(screen.getByText('-')).toBeInTheDocument()
expect(container.querySelector('div')).toHaveClass('text-text-tertiary')
})
})
describe('Empty style', () => {
it('should apply text-text-tertiary class when isEmptyStyle is true', () => {
const { container } = render(<>{renderTdValue('value', true)}</>)
expect(container.querySelector('div')).toHaveClass('text-text-tertiary')
})
it('should apply text-text-secondary class when isEmptyStyle is false', () => {
const { container } = render(<>{renderTdValue('value', false)}</>)
expect(container.querySelector('div')).toHaveClass('text-text-secondary')
})
it('should apply text-text-secondary class when isEmptyStyle is not provided', () => {
const { container } = render(<>{renderTdValue('value')}</>)
expect(container.querySelector('div')).toHaveClass('text-text-secondary')
})
})
describe('Edge Cases', () => {
it('should handle empty string', () => {
render(<>{renderTdValue('')}</>)
// Empty string should still render but with no visible text
const div = document.querySelector('div')
expect(div).toBeInTheDocument()
})
it('should handle large numbers', () => {
render(<>{renderTdValue(1234567890)}</>)
expect(screen.getByText('1234567890')).toBeInTheDocument()
})
it('should handle negative numbers', () => {
render(<>{renderTdValue(-42)}</>)
expect(screen.getByText('-42')).toBeInTheDocument()
})
it('should handle special characters in string', () => {
render(<>{renderTdValue('<script>alert("xss")</script>')}</>)
expect(screen.getByText('<script>alert("xss")</script>')).toBeInTheDocument()
})
it('should handle unicode characters', () => {
render(<>{renderTdValue('Test Unicode: \u4E2D\u6587')}</>)
expect(screen.getByText('Test Unicode: \u4E2D\u6587')).toBeInTheDocument()
})
it('should handle very long strings', () => {
const longString = 'a'.repeat(1000)
render(<>{renderTdValue(longString)}</>)
expect(screen.getByText(longString)).toBeInTheDocument()
})
})
})

View File

@@ -0,0 +1,16 @@
import type { ReactNode } from 'react'
import { cn } from '@/utils/classnames'
import s from '../../../style.module.css'
export const renderTdValue = (value: string | number | null, isEmptyStyle = false): ReactNode => {
const className = cn(
isEmptyStyle ? 'text-text-tertiary' : 'text-text-secondary',
s.tdValue,
)
return (
<div className={className}>
{value ?? '-'}
</div>
)
}

View File

@@ -0,0 +1,4 @@
export { useDocumentActions } from './use-document-actions'
export { useDocumentSelection } from './use-document-selection'
export { useDocumentSort } from './use-document-sort'
export type { SortField, SortOrder } from './use-document-sort'

View File

@@ -0,0 +1,438 @@
import type { ReactNode } from 'react'
import { QueryClient, QueryClientProvider } from '@tanstack/react-query'
import { act, renderHook, waitFor } from '@testing-library/react'
import { beforeEach, describe, expect, it, vi } from 'vitest'
import { DocumentActionType } from '@/models/datasets'
import * as useDocument from '@/service/knowledge/use-document'
import { useDocumentActions } from './use-document-actions'
vi.mock('@/service/knowledge/use-document')
const mockUseDocumentArchive = vi.mocked(useDocument.useDocumentArchive)
const mockUseDocumentSummary = vi.mocked(useDocument.useDocumentSummary)
const mockUseDocumentEnable = vi.mocked(useDocument.useDocumentEnable)
const mockUseDocumentDisable = vi.mocked(useDocument.useDocumentDisable)
const mockUseDocumentDelete = vi.mocked(useDocument.useDocumentDelete)
const mockUseDocumentBatchRetryIndex = vi.mocked(useDocument.useDocumentBatchRetryIndex)
const mockUseDocumentDownloadZip = vi.mocked(useDocument.useDocumentDownloadZip)
const createTestQueryClient = () => new QueryClient({
defaultOptions: {
queries: { retry: false },
mutations: { retry: false },
},
})
const createWrapper = () => {
const queryClient = createTestQueryClient()
return ({ children }: { children: ReactNode }) => (
<QueryClientProvider client={queryClient}>
{children}
</QueryClientProvider>
)
}
describe('useDocumentActions', () => {
const mockMutateAsync = vi.fn()
beforeEach(() => {
vi.clearAllMocks()
// Setup all mocks with default values
const createMockMutation = () => ({
mutateAsync: mockMutateAsync,
isPending: false,
isError: false,
isSuccess: false,
isIdle: true,
data: undefined,
error: null,
mutate: vi.fn(),
reset: vi.fn(),
status: 'idle' as const,
variables: undefined,
context: undefined,
failureCount: 0,
failureReason: null,
submittedAt: 0,
})
mockUseDocumentArchive.mockReturnValue(createMockMutation() as unknown as ReturnType<typeof useDocument.useDocumentArchive>)
mockUseDocumentSummary.mockReturnValue(createMockMutation() as unknown as ReturnType<typeof useDocument.useDocumentSummary>)
mockUseDocumentEnable.mockReturnValue(createMockMutation() as unknown as ReturnType<typeof useDocument.useDocumentEnable>)
mockUseDocumentDisable.mockReturnValue(createMockMutation() as unknown as ReturnType<typeof useDocument.useDocumentDisable>)
mockUseDocumentDelete.mockReturnValue(createMockMutation() as unknown as ReturnType<typeof useDocument.useDocumentDelete>)
mockUseDocumentBatchRetryIndex.mockReturnValue(createMockMutation() as unknown as ReturnType<typeof useDocument.useDocumentBatchRetryIndex>)
mockUseDocumentDownloadZip.mockReturnValue({
...createMockMutation(),
isPending: false,
} as unknown as ReturnType<typeof useDocument.useDocumentDownloadZip>)
})
describe('handleAction', () => {
it('should call archive mutation when archive action is triggered', async () => {
mockMutateAsync.mockResolvedValue({ result: 'success' })
const onUpdate = vi.fn()
const onClearSelection = vi.fn()
const { result } = renderHook(
() => useDocumentActions({
datasetId: 'ds1',
selectedIds: ['doc1'],
downloadableSelectedIds: [],
onUpdate,
onClearSelection,
}),
{ wrapper: createWrapper() },
)
await act(async () => {
await result.current.handleAction(DocumentActionType.archive)()
})
expect(mockMutateAsync).toHaveBeenCalledWith({
datasetId: 'ds1',
documentIds: ['doc1'],
})
})
it('should call onUpdate on successful action', async () => {
mockMutateAsync.mockResolvedValue({ result: 'success' })
const onUpdate = vi.fn()
const onClearSelection = vi.fn()
const { result } = renderHook(
() => useDocumentActions({
datasetId: 'ds1',
selectedIds: ['doc1'],
downloadableSelectedIds: [],
onUpdate,
onClearSelection,
}),
{ wrapper: createWrapper() },
)
await act(async () => {
await result.current.handleAction(DocumentActionType.enable)()
})
await waitFor(() => {
expect(onUpdate).toHaveBeenCalled()
})
})
it('should call onClearSelection on delete action', async () => {
mockMutateAsync.mockResolvedValue({ result: 'success' })
const onUpdate = vi.fn()
const onClearSelection = vi.fn()
const { result } = renderHook(
() => useDocumentActions({
datasetId: 'ds1',
selectedIds: ['doc1'],
downloadableSelectedIds: [],
onUpdate,
onClearSelection,
}),
{ wrapper: createWrapper() },
)
await act(async () => {
await result.current.handleAction(DocumentActionType.delete)()
})
await waitFor(() => {
expect(onClearSelection).toHaveBeenCalled()
})
})
})
describe('handleBatchReIndex', () => {
it('should call retry index mutation', async () => {
mockMutateAsync.mockResolvedValue({ result: 'success' })
const onUpdate = vi.fn()
const onClearSelection = vi.fn()
const { result } = renderHook(
() => useDocumentActions({
datasetId: 'ds1',
selectedIds: ['doc1', 'doc2'],
downloadableSelectedIds: [],
onUpdate,
onClearSelection,
}),
{ wrapper: createWrapper() },
)
await act(async () => {
await result.current.handleBatchReIndex()
})
expect(mockMutateAsync).toHaveBeenCalledWith({
datasetId: 'ds1',
documentIds: ['doc1', 'doc2'],
})
})
it('should call onClearSelection on success', async () => {
mockMutateAsync.mockResolvedValue({ result: 'success' })
const onUpdate = vi.fn()
const onClearSelection = vi.fn()
const { result } = renderHook(
() => useDocumentActions({
datasetId: 'ds1',
selectedIds: ['doc1'],
downloadableSelectedIds: [],
onUpdate,
onClearSelection,
}),
{ wrapper: createWrapper() },
)
await act(async () => {
await result.current.handleBatchReIndex()
})
await waitFor(() => {
expect(onClearSelection).toHaveBeenCalled()
expect(onUpdate).toHaveBeenCalled()
})
})
})
describe('handleBatchDownload', () => {
it('should not proceed when already downloading', async () => {
mockUseDocumentDownloadZip.mockReturnValue({
mutateAsync: mockMutateAsync,
isPending: true,
} as unknown as ReturnType<typeof useDocument.useDocumentDownloadZip>)
const { result } = renderHook(
() => useDocumentActions({
datasetId: 'ds1',
selectedIds: ['doc1'],
downloadableSelectedIds: ['doc1'],
onUpdate: vi.fn(),
onClearSelection: vi.fn(),
}),
{ wrapper: createWrapper() },
)
await act(async () => {
await result.current.handleBatchDownload()
})
expect(mockMutateAsync).not.toHaveBeenCalled()
})
it('should call download mutation with downloadable ids', async () => {
const mockBlob = new Blob(['test'])
mockMutateAsync.mockResolvedValue(mockBlob)
mockUseDocumentDownloadZip.mockReturnValue({
mutateAsync: mockMutateAsync,
isPending: false,
} as unknown as ReturnType<typeof useDocument.useDocumentDownloadZip>)
const { result } = renderHook(
() => useDocumentActions({
datasetId: 'ds1',
selectedIds: ['doc1', 'doc2'],
downloadableSelectedIds: ['doc1'],
onUpdate: vi.fn(),
onClearSelection: vi.fn(),
}),
{ wrapper: createWrapper() },
)
await act(async () => {
await result.current.handleBatchDownload()
})
expect(mockMutateAsync).toHaveBeenCalledWith({
datasetId: 'ds1',
documentIds: ['doc1'],
})
})
})
describe('isDownloadingZip', () => {
it('should reflect isPending state from mutation', () => {
mockUseDocumentDownloadZip.mockReturnValue({
mutateAsync: mockMutateAsync,
isPending: true,
} as unknown as ReturnType<typeof useDocument.useDocumentDownloadZip>)
const { result } = renderHook(
() => useDocumentActions({
datasetId: 'ds1',
selectedIds: [],
downloadableSelectedIds: [],
onUpdate: vi.fn(),
onClearSelection: vi.fn(),
}),
{ wrapper: createWrapper() },
)
expect(result.current.isDownloadingZip).toBe(true)
})
})
describe('error handling', () => {
it('should show error toast when handleAction fails', async () => {
mockMutateAsync.mockRejectedValue(new Error('Action failed'))
const onUpdate = vi.fn()
const onClearSelection = vi.fn()
const { result } = renderHook(
() => useDocumentActions({
datasetId: 'ds1',
selectedIds: ['doc1'],
downloadableSelectedIds: [],
onUpdate,
onClearSelection,
}),
{ wrapper: createWrapper() },
)
await act(async () => {
await result.current.handleAction(DocumentActionType.archive)()
})
// onUpdate should not be called on error
expect(onUpdate).not.toHaveBeenCalled()
})
it('should show error toast when handleBatchReIndex fails', async () => {
mockMutateAsync.mockRejectedValue(new Error('Re-index failed'))
const onUpdate = vi.fn()
const onClearSelection = vi.fn()
const { result } = renderHook(
() => useDocumentActions({
datasetId: 'ds1',
selectedIds: ['doc1'],
downloadableSelectedIds: [],
onUpdate,
onClearSelection,
}),
{ wrapper: createWrapper() },
)
await act(async () => {
await result.current.handleBatchReIndex()
})
// onUpdate and onClearSelection should not be called on error
expect(onUpdate).not.toHaveBeenCalled()
expect(onClearSelection).not.toHaveBeenCalled()
})
it('should show error toast when handleBatchDownload fails', async () => {
mockMutateAsync.mockRejectedValue(new Error('Download failed'))
mockUseDocumentDownloadZip.mockReturnValue({
mutateAsync: mockMutateAsync,
isPending: false,
} as unknown as ReturnType<typeof useDocument.useDocumentDownloadZip>)
const { result } = renderHook(
() => useDocumentActions({
datasetId: 'ds1',
selectedIds: ['doc1'],
downloadableSelectedIds: ['doc1'],
onUpdate: vi.fn(),
onClearSelection: vi.fn(),
}),
{ wrapper: createWrapper() },
)
await act(async () => {
await result.current.handleBatchDownload()
})
// Mutation was called but failed
expect(mockMutateAsync).toHaveBeenCalled()
})
it('should show error toast when handleBatchDownload returns null blob', async () => {
mockMutateAsync.mockResolvedValue(null)
mockUseDocumentDownloadZip.mockReturnValue({
mutateAsync: mockMutateAsync,
isPending: false,
} as unknown as ReturnType<typeof useDocument.useDocumentDownloadZip>)
const { result } = renderHook(
() => useDocumentActions({
datasetId: 'ds1',
selectedIds: ['doc1'],
downloadableSelectedIds: ['doc1'],
onUpdate: vi.fn(),
onClearSelection: vi.fn(),
}),
{ wrapper: createWrapper() },
)
await act(async () => {
await result.current.handleBatchDownload()
})
// Mutation was called but returned null
expect(mockMutateAsync).toHaveBeenCalled()
})
})
describe('all action types', () => {
it('should handle summary action', async () => {
mockMutateAsync.mockResolvedValue({ result: 'success' })
const onUpdate = vi.fn()
const { result } = renderHook(
() => useDocumentActions({
datasetId: 'ds1',
selectedIds: ['doc1'],
downloadableSelectedIds: [],
onUpdate,
onClearSelection: vi.fn(),
}),
{ wrapper: createWrapper() },
)
await act(async () => {
await result.current.handleAction(DocumentActionType.summary)()
})
expect(mockMutateAsync).toHaveBeenCalled()
await waitFor(() => {
expect(onUpdate).toHaveBeenCalled()
})
})
it('should handle disable action', async () => {
mockMutateAsync.mockResolvedValue({ result: 'success' })
const onUpdate = vi.fn()
const { result } = renderHook(
() => useDocumentActions({
datasetId: 'ds1',
selectedIds: ['doc1'],
downloadableSelectedIds: [],
onUpdate,
onClearSelection: vi.fn(),
}),
{ wrapper: createWrapper() },
)
await act(async () => {
await result.current.handleAction(DocumentActionType.disable)()
})
expect(mockMutateAsync).toHaveBeenCalled()
await waitFor(() => {
expect(onUpdate).toHaveBeenCalled()
})
})
})
})

View File

@@ -0,0 +1,126 @@
import type { CommonResponse } from '@/models/common'
import { useCallback, useMemo } from 'react'
import { useTranslation } from 'react-i18next'
import Toast from '@/app/components/base/toast'
import { DocumentActionType } from '@/models/datasets'
import {
useDocumentArchive,
useDocumentBatchRetryIndex,
useDocumentDelete,
useDocumentDisable,
useDocumentDownloadZip,
useDocumentEnable,
useDocumentSummary,
} from '@/service/knowledge/use-document'
import { asyncRunSafe } from '@/utils'
import { downloadBlob } from '@/utils/download'
type UseDocumentActionsOptions = {
datasetId: string
selectedIds: string[]
downloadableSelectedIds: string[]
onUpdate: () => void
onClearSelection: () => void
}
/**
* Generate a random ZIP filename for bulk document downloads.
* We intentionally avoid leaking dataset info in the exported archive name.
*/
const generateDocsZipFileName = (): string => {
const randomPart = (typeof crypto !== 'undefined' && typeof crypto.randomUUID === 'function')
? crypto.randomUUID()
: `${Date.now().toString(36)}${Math.random().toString(36).slice(2, 10)}`
return `${randomPart}-docs.zip`
}
export const useDocumentActions = ({
datasetId,
selectedIds,
downloadableSelectedIds,
onUpdate,
onClearSelection,
}: UseDocumentActionsOptions) => {
const { t } = useTranslation()
const { mutateAsync: archiveDocument } = useDocumentArchive()
const { mutateAsync: generateSummary } = useDocumentSummary()
const { mutateAsync: enableDocument } = useDocumentEnable()
const { mutateAsync: disableDocument } = useDocumentDisable()
const { mutateAsync: deleteDocument } = useDocumentDelete()
const { mutateAsync: retryIndexDocument } = useDocumentBatchRetryIndex()
const { mutateAsync: requestDocumentsZip, isPending: isDownloadingZip } = useDocumentDownloadZip()
type SupportedActionType
= | typeof DocumentActionType.archive
| typeof DocumentActionType.summary
| typeof DocumentActionType.enable
| typeof DocumentActionType.disable
| typeof DocumentActionType.delete
const actionMutationMap = useMemo(() => ({
[DocumentActionType.archive]: archiveDocument,
[DocumentActionType.summary]: generateSummary,
[DocumentActionType.enable]: enableDocument,
[DocumentActionType.disable]: disableDocument,
[DocumentActionType.delete]: deleteDocument,
} as const), [archiveDocument, generateSummary, enableDocument, disableDocument, deleteDocument])
const handleAction = useCallback((actionName: SupportedActionType) => {
return async () => {
const opApi = actionMutationMap[actionName]
if (!opApi)
return
const [e] = await asyncRunSafe<CommonResponse>(
opApi({ datasetId, documentIds: selectedIds }),
)
if (!e) {
if (actionName === DocumentActionType.delete)
onClearSelection()
Toast.notify({ type: 'success', message: t('actionMsg.modifiedSuccessfully', { ns: 'common' }) })
onUpdate()
}
else {
Toast.notify({ type: 'error', message: t('actionMsg.modifiedUnsuccessfully', { ns: 'common' }) })
}
}
}, [actionMutationMap, datasetId, selectedIds, onClearSelection, onUpdate, t])
const handleBatchReIndex = useCallback(async () => {
const [e] = await asyncRunSafe<CommonResponse>(
retryIndexDocument({ datasetId, documentIds: selectedIds }),
)
if (!e) {
onClearSelection()
Toast.notify({ type: 'success', message: t('actionMsg.modifiedSuccessfully', { ns: 'common' }) })
onUpdate()
}
else {
Toast.notify({ type: 'error', message: t('actionMsg.modifiedUnsuccessfully', { ns: 'common' }) })
}
}, [retryIndexDocument, datasetId, selectedIds, onClearSelection, onUpdate, t])
const handleBatchDownload = useCallback(async () => {
if (isDownloadingZip)
return
const [e, blob] = await asyncRunSafe(
requestDocumentsZip({ datasetId, documentIds: downloadableSelectedIds }),
)
if (e || !blob) {
Toast.notify({ type: 'error', message: t('actionMsg.downloadUnsuccessfully', { ns: 'common' }) })
return
}
downloadBlob({ data: blob, fileName: generateDocsZipFileName() })
}, [datasetId, downloadableSelectedIds, isDownloadingZip, requestDocumentsZip, t])
return {
handleAction,
handleBatchReIndex,
handleBatchDownload,
isDownloadingZip,
}
}

View File

@@ -0,0 +1,317 @@
import type { SimpleDocumentDetail } from '@/models/datasets'
import { act, renderHook } from '@testing-library/react'
import { describe, expect, it, vi } from 'vitest'
import { DataSourceType } from '@/models/datasets'
import { useDocumentSelection } from './use-document-selection'
type LocalDoc = SimpleDocumentDetail & { percent?: number }
const createMockDocument = (overrides: Partial<LocalDoc> = {}): LocalDoc => ({
id: 'doc1',
name: 'Test Document',
data_source_type: DataSourceType.FILE,
data_source_info: {},
data_source_detail_dict: {},
word_count: 100,
hit_count: 10,
created_at: 1000000,
position: 1,
doc_form: 'text_model',
enabled: true,
archived: false,
display_status: 'available',
created_from: 'api',
...overrides,
} as LocalDoc)
describe('useDocumentSelection', () => {
describe('isAllSelected', () => {
it('should return false when documents is empty', () => {
const onSelectedIdChange = vi.fn()
const { result } = renderHook(() =>
useDocumentSelection({
documents: [],
selectedIds: [],
onSelectedIdChange,
}),
)
expect(result.current.isAllSelected).toBe(false)
})
it('should return true when all documents are selected', () => {
const docs = [
createMockDocument({ id: 'doc1' }),
createMockDocument({ id: 'doc2' }),
]
const onSelectedIdChange = vi.fn()
const { result } = renderHook(() =>
useDocumentSelection({
documents: docs,
selectedIds: ['doc1', 'doc2'],
onSelectedIdChange,
}),
)
expect(result.current.isAllSelected).toBe(true)
})
it('should return false when not all documents are selected', () => {
const docs = [
createMockDocument({ id: 'doc1' }),
createMockDocument({ id: 'doc2' }),
]
const onSelectedIdChange = vi.fn()
const { result } = renderHook(() =>
useDocumentSelection({
documents: docs,
selectedIds: ['doc1'],
onSelectedIdChange,
}),
)
expect(result.current.isAllSelected).toBe(false)
})
})
describe('isSomeSelected', () => {
it('should return false when no documents are selected', () => {
const docs = [createMockDocument({ id: 'doc1' })]
const onSelectedIdChange = vi.fn()
const { result } = renderHook(() =>
useDocumentSelection({
documents: docs,
selectedIds: [],
onSelectedIdChange,
}),
)
expect(result.current.isSomeSelected).toBe(false)
})
it('should return true when some documents are selected', () => {
const docs = [
createMockDocument({ id: 'doc1' }),
createMockDocument({ id: 'doc2' }),
]
const onSelectedIdChange = vi.fn()
const { result } = renderHook(() =>
useDocumentSelection({
documents: docs,
selectedIds: ['doc1'],
onSelectedIdChange,
}),
)
expect(result.current.isSomeSelected).toBe(true)
})
})
describe('onSelectAll', () => {
it('should select all documents when none are selected', () => {
const docs = [
createMockDocument({ id: 'doc1' }),
createMockDocument({ id: 'doc2' }),
]
const onSelectedIdChange = vi.fn()
const { result } = renderHook(() =>
useDocumentSelection({
documents: docs,
selectedIds: [],
onSelectedIdChange,
}),
)
act(() => {
result.current.onSelectAll()
})
expect(onSelectedIdChange).toHaveBeenCalledWith(['doc1', 'doc2'])
})
it('should deselect all when all are selected', () => {
const docs = [
createMockDocument({ id: 'doc1' }),
createMockDocument({ id: 'doc2' }),
]
const onSelectedIdChange = vi.fn()
const { result } = renderHook(() =>
useDocumentSelection({
documents: docs,
selectedIds: ['doc1', 'doc2'],
onSelectedIdChange,
}),
)
act(() => {
result.current.onSelectAll()
})
expect(onSelectedIdChange).toHaveBeenCalledWith([])
})
it('should add to existing selection when some are selected', () => {
const docs = [
createMockDocument({ id: 'doc1' }),
createMockDocument({ id: 'doc2' }),
createMockDocument({ id: 'doc3' }),
]
const onSelectedIdChange = vi.fn()
const { result } = renderHook(() =>
useDocumentSelection({
documents: docs,
selectedIds: ['doc1'],
onSelectedIdChange,
}),
)
act(() => {
result.current.onSelectAll()
})
expect(onSelectedIdChange).toHaveBeenCalledWith(['doc1', 'doc2', 'doc3'])
})
})
describe('onSelectOne', () => {
it('should add document to selection when not selected', () => {
const onSelectedIdChange = vi.fn()
const { result } = renderHook(() =>
useDocumentSelection({
documents: [],
selectedIds: [],
onSelectedIdChange,
}),
)
act(() => {
result.current.onSelectOne('doc1')
})
expect(onSelectedIdChange).toHaveBeenCalledWith(['doc1'])
})
it('should remove document from selection when already selected', () => {
const onSelectedIdChange = vi.fn()
const { result } = renderHook(() =>
useDocumentSelection({
documents: [],
selectedIds: ['doc1', 'doc2'],
onSelectedIdChange,
}),
)
act(() => {
result.current.onSelectOne('doc1')
})
expect(onSelectedIdChange).toHaveBeenCalledWith(['doc2'])
})
})
describe('hasErrorDocumentsSelected', () => {
it('should return false when no error documents are selected', () => {
const docs = [
createMockDocument({ id: 'doc1', display_status: 'available' }),
createMockDocument({ id: 'doc2', display_status: 'error' }),
]
const onSelectedIdChange = vi.fn()
const { result } = renderHook(() =>
useDocumentSelection({
documents: docs,
selectedIds: ['doc1'],
onSelectedIdChange,
}),
)
expect(result.current.hasErrorDocumentsSelected).toBe(false)
})
it('should return true when an error document is selected', () => {
const docs = [
createMockDocument({ id: 'doc1', display_status: 'available' }),
createMockDocument({ id: 'doc2', display_status: 'error' }),
]
const onSelectedIdChange = vi.fn()
const { result } = renderHook(() =>
useDocumentSelection({
documents: docs,
selectedIds: ['doc2'],
onSelectedIdChange,
}),
)
expect(result.current.hasErrorDocumentsSelected).toBe(true)
})
})
describe('downloadableSelectedIds', () => {
it('should return only FILE type documents from selection', () => {
const docs = [
createMockDocument({ id: 'doc1', data_source_type: DataSourceType.FILE }),
createMockDocument({ id: 'doc2', data_source_type: DataSourceType.NOTION }),
createMockDocument({ id: 'doc3', data_source_type: DataSourceType.FILE }),
]
const onSelectedIdChange = vi.fn()
const { result } = renderHook(() =>
useDocumentSelection({
documents: docs,
selectedIds: ['doc1', 'doc2', 'doc3'],
onSelectedIdChange,
}),
)
expect(result.current.downloadableSelectedIds).toEqual(['doc1', 'doc3'])
})
it('should return empty array when no FILE documents selected', () => {
const docs = [
createMockDocument({ id: 'doc1', data_source_type: DataSourceType.NOTION }),
createMockDocument({ id: 'doc2', data_source_type: DataSourceType.WEB }),
]
const onSelectedIdChange = vi.fn()
const { result } = renderHook(() =>
useDocumentSelection({
documents: docs,
selectedIds: ['doc1', 'doc2'],
onSelectedIdChange,
}),
)
expect(result.current.downloadableSelectedIds).toEqual([])
})
})
describe('clearSelection', () => {
it('should call onSelectedIdChange with empty array', () => {
const onSelectedIdChange = vi.fn()
const { result } = renderHook(() =>
useDocumentSelection({
documents: [],
selectedIds: ['doc1', 'doc2'],
onSelectedIdChange,
}),
)
act(() => {
result.current.clearSelection()
})
expect(onSelectedIdChange).toHaveBeenCalledWith([])
})
})
})

Some files were not shown because too many files have changed in this diff Show More