mirror of
https://github.com/langgenius/dify.git
synced 2026-02-11 19:14:00 +00:00
Compare commits
6 Commits
2-11-fix-l
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
32350f7a04 | ||
|
|
c730fec1e4 | ||
|
|
b4fec9b7aa | ||
|
|
7e0bccbbf0 | ||
|
|
2f87ecc0ce | ||
|
|
5b4c7b2a40 |
@@ -553,6 +553,8 @@ WORKFLOW_LOG_CLEANUP_ENABLED=false
|
||||
WORKFLOW_LOG_RETENTION_DAYS=30
|
||||
# Batch size for workflow log cleanup operations (default: 100)
|
||||
WORKFLOW_LOG_CLEANUP_BATCH_SIZE=100
|
||||
# Comma-separated list of workflow IDs to clean logs for
|
||||
WORKFLOW_LOG_CLEANUP_SPECIFIC_WORKFLOW_IDS=
|
||||
|
||||
# App configuration
|
||||
APP_MAX_EXECUTION_TIME=1200
|
||||
@@ -715,6 +717,7 @@ ANNOTATION_IMPORT_MAX_CONCURRENT=5
|
||||
# Sandbox expired records clean configuration
|
||||
SANDBOX_EXPIRED_RECORDS_CLEAN_GRACEFUL_PERIOD=21
|
||||
SANDBOX_EXPIRED_RECORDS_CLEAN_BATCH_SIZE=1000
|
||||
SANDBOX_EXPIRED_RECORDS_CLEAN_BATCH_MAX_INTERVAL=200
|
||||
SANDBOX_EXPIRED_RECORDS_RETENTION_DAYS=30
|
||||
SANDBOX_EXPIRED_RECORDS_CLEAN_TASK_LOCK_TTL=90000
|
||||
|
||||
|
||||
@@ -1314,6 +1314,9 @@ class WorkflowLogConfig(BaseSettings):
|
||||
WORKFLOW_LOG_CLEANUP_BATCH_SIZE: int = Field(
|
||||
default=100, description="Batch size for workflow run log cleanup operations"
|
||||
)
|
||||
WORKFLOW_LOG_CLEANUP_SPECIFIC_WORKFLOW_IDS: str = Field(
|
||||
default="", description="Comma-separated list of workflow IDs to clean logs for"
|
||||
)
|
||||
|
||||
|
||||
class SwaggerUIConfig(BaseSettings):
|
||||
@@ -1344,6 +1347,10 @@ class SandboxExpiredRecordsCleanConfig(BaseSettings):
|
||||
description="Maximum number of records to process in each batch",
|
||||
default=1000,
|
||||
)
|
||||
SANDBOX_EXPIRED_RECORDS_CLEAN_BATCH_MAX_INTERVAL: PositiveInt = Field(
|
||||
description="Maximum interval in milliseconds between batches",
|
||||
default=200,
|
||||
)
|
||||
SANDBOX_EXPIRED_RECORDS_RETENTION_DAYS: PositiveInt = Field(
|
||||
description="Retention days for sandbox expired workflow_run records and message records",
|
||||
default=30,
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import urllib.parse
|
||||
|
||||
import httpx
|
||||
from flask_restx import Resource
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
import services
|
||||
@@ -10,12 +11,12 @@ from controllers.common.errors import (
|
||||
RemoteFileUploadError,
|
||||
UnsupportedFileTypeError,
|
||||
)
|
||||
from controllers.fastopenapi import console_router
|
||||
from controllers.console import console_ns
|
||||
from core.file import helpers as file_helpers
|
||||
from core.helper import ssrf_proxy
|
||||
from extensions.ext_database import db
|
||||
from fields.file_fields import FileWithSignedUrl, RemoteFileInfo
|
||||
from libs.login import current_account_with_tenant
|
||||
from libs.login import current_account_with_tenant, login_required
|
||||
from services.file_service import FileService
|
||||
|
||||
|
||||
@@ -23,69 +24,73 @@ class RemoteFileUploadPayload(BaseModel):
|
||||
url: str = Field(..., description="URL to fetch")
|
||||
|
||||
|
||||
@console_router.get(
|
||||
"/remote-files/<path:url>",
|
||||
response_model=RemoteFileInfo,
|
||||
tags=["console"],
|
||||
)
|
||||
def get_remote_file_info(url: str) -> RemoteFileInfo:
|
||||
decoded_url = urllib.parse.unquote(url)
|
||||
resp = ssrf_proxy.head(decoded_url)
|
||||
if resp.status_code != httpx.codes.OK:
|
||||
resp = ssrf_proxy.get(decoded_url, timeout=3)
|
||||
resp.raise_for_status()
|
||||
return RemoteFileInfo(
|
||||
file_type=resp.headers.get("Content-Type", "application/octet-stream"),
|
||||
file_length=int(resp.headers.get("Content-Length", 0)),
|
||||
)
|
||||
|
||||
|
||||
@console_router.post(
|
||||
"/remote-files/upload",
|
||||
response_model=FileWithSignedUrl,
|
||||
tags=["console"],
|
||||
status_code=201,
|
||||
)
|
||||
def upload_remote_file(payload: RemoteFileUploadPayload) -> FileWithSignedUrl:
|
||||
url = payload.url
|
||||
|
||||
try:
|
||||
resp = ssrf_proxy.head(url=url)
|
||||
@console_ns.route("/remote-files/<path:url>")
|
||||
class GetRemoteFileInfo(Resource):
|
||||
@login_required
|
||||
def get(self, url: str):
|
||||
decoded_url = urllib.parse.unquote(url)
|
||||
resp = ssrf_proxy.head(decoded_url)
|
||||
if resp.status_code != httpx.codes.OK:
|
||||
resp = ssrf_proxy.get(url=url, timeout=3, follow_redirects=True)
|
||||
if resp.status_code != httpx.codes.OK:
|
||||
raise RemoteFileUploadError(f"Failed to fetch file from {url}: {resp.text}")
|
||||
except httpx.RequestError as e:
|
||||
raise RemoteFileUploadError(f"Failed to fetch file from {url}: {str(e)}")
|
||||
resp = ssrf_proxy.get(decoded_url, timeout=3)
|
||||
resp.raise_for_status()
|
||||
return RemoteFileInfo(
|
||||
file_type=resp.headers.get("Content-Type", "application/octet-stream"),
|
||||
file_length=int(resp.headers.get("Content-Length", 0)),
|
||||
).model_dump(mode="json")
|
||||
|
||||
file_info = helpers.guess_file_info_from_response(resp)
|
||||
|
||||
if not FileService.is_file_size_within_limit(extension=file_info.extension, file_size=file_info.size):
|
||||
raise FileTooLargeError
|
||||
@console_ns.route("/remote-files/upload")
|
||||
class RemoteFileUpload(Resource):
|
||||
@login_required
|
||||
def post(self):
|
||||
payload = RemoteFileUploadPayload.model_validate(console_ns.payload)
|
||||
url = payload.url
|
||||
|
||||
content = resp.content if resp.request.method == "GET" else ssrf_proxy.get(url).content
|
||||
# Try to fetch remote file metadata/content first
|
||||
try:
|
||||
resp = ssrf_proxy.head(url=url)
|
||||
if resp.status_code != httpx.codes.OK:
|
||||
resp = ssrf_proxy.get(url=url, timeout=3, follow_redirects=True)
|
||||
if resp.status_code != httpx.codes.OK:
|
||||
# Normalize into a user-friendly error message expected by tests
|
||||
raise RemoteFileUploadError(f"Failed to fetch file from {url}: {resp.text}")
|
||||
except httpx.RequestError as e:
|
||||
raise RemoteFileUploadError(f"Failed to fetch file from {url}: {str(e)}")
|
||||
|
||||
try:
|
||||
user, _ = current_account_with_tenant()
|
||||
upload_file = FileService(db.engine).upload_file(
|
||||
filename=file_info.filename,
|
||||
content=content,
|
||||
mimetype=file_info.mimetype,
|
||||
user=user,
|
||||
source_url=url,
|
||||
file_info = helpers.guess_file_info_from_response(resp)
|
||||
|
||||
# Enforce file size limit with 400 (Bad Request) per tests' expectation
|
||||
if not FileService.is_file_size_within_limit(extension=file_info.extension, file_size=file_info.size):
|
||||
raise FileTooLargeError()
|
||||
|
||||
# Load content if needed
|
||||
content = resp.content if resp.request.method == "GET" else ssrf_proxy.get(url).content
|
||||
|
||||
try:
|
||||
user, _ = current_account_with_tenant()
|
||||
upload_file = FileService(db.engine).upload_file(
|
||||
filename=file_info.filename,
|
||||
content=content,
|
||||
mimetype=file_info.mimetype,
|
||||
user=user,
|
||||
source_url=url,
|
||||
)
|
||||
except services.errors.file.FileTooLargeError as file_too_large_error:
|
||||
raise FileTooLargeError(file_too_large_error.description)
|
||||
except services.errors.file.UnsupportedFileTypeError:
|
||||
raise UnsupportedFileTypeError()
|
||||
|
||||
# Success: return created resource with 201 status
|
||||
return (
|
||||
FileWithSignedUrl(
|
||||
id=upload_file.id,
|
||||
name=upload_file.name,
|
||||
size=upload_file.size,
|
||||
extension=upload_file.extension,
|
||||
url=file_helpers.get_signed_file_url(upload_file_id=upload_file.id),
|
||||
mime_type=upload_file.mime_type,
|
||||
created_by=upload_file.created_by,
|
||||
created_at=int(upload_file.created_at.timestamp()),
|
||||
).model_dump(mode="json"),
|
||||
201,
|
||||
)
|
||||
except services.errors.file.FileTooLargeError as file_too_large_error:
|
||||
raise FileTooLargeError(file_too_large_error.description)
|
||||
except services.errors.file.UnsupportedFileTypeError:
|
||||
raise UnsupportedFileTypeError()
|
||||
|
||||
return FileWithSignedUrl(
|
||||
id=upload_file.id,
|
||||
name=upload_file.name,
|
||||
size=upload_file.size,
|
||||
extension=upload_file.extension,
|
||||
url=file_helpers.get_signed_file_url(upload_file_id=upload_file.id),
|
||||
mime_type=upload_file.mime_type,
|
||||
created_by=upload_file.created_by,
|
||||
created_at=int(upload_file.created_at.timestamp()),
|
||||
)
|
||||
|
||||
@@ -0,0 +1,39 @@
|
||||
"""fix index to optimize message clean job performance
|
||||
|
||||
Revision ID: fce013ca180e
|
||||
Revises: f55813ffe2c8
|
||||
Create Date: 2026-02-11 15:49:17.603638
|
||||
|
||||
"""
|
||||
from alembic import op
|
||||
import models as models
|
||||
import sqlalchemy as sa
|
||||
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = 'fce013ca180e'
|
||||
down_revision = 'f55813ffe2c8'
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade():
|
||||
# ### commands auto generated by Alembic - please adjust! ###
|
||||
with op.batch_alter_table('messages', schema=None) as batch_op:
|
||||
batch_op.drop_index(batch_op.f('message_created_at_idx'))
|
||||
|
||||
with op.batch_alter_table('saved_messages', schema=None) as batch_op:
|
||||
batch_op.create_index('saved_message_message_id_idx', ['message_id'], unique=False)
|
||||
|
||||
# ### end Alembic commands ###
|
||||
|
||||
|
||||
def downgrade():
|
||||
# ### commands auto generated by Alembic - please adjust! ###
|
||||
with op.batch_alter_table('saved_messages', schema=None) as batch_op:
|
||||
batch_op.drop_index('saved_message_message_id_idx')
|
||||
|
||||
with op.batch_alter_table('messages', schema=None) as batch_op:
|
||||
batch_op.create_index(batch_op.f('message_created_at_idx'), ['created_at'], unique=False)
|
||||
|
||||
# ### end Alembic commands ###
|
||||
@@ -1040,7 +1040,6 @@ class Message(Base):
|
||||
Index("message_end_user_idx", "app_id", "from_source", "from_end_user_id"),
|
||||
Index("message_account_idx", "app_id", "from_source", "from_account_id"),
|
||||
Index("message_workflow_run_id_idx", "conversation_id", "workflow_run_id"),
|
||||
Index("message_created_at_idx", "created_at"),
|
||||
Index("message_app_mode_idx", "app_mode"),
|
||||
Index("message_created_at_id_idx", "created_at", "id"),
|
||||
)
|
||||
|
||||
@@ -16,6 +16,7 @@ class SavedMessage(TypeBase):
|
||||
__table_args__ = (
|
||||
sa.PrimaryKeyConstraint("id", name="saved_message_pkey"),
|
||||
sa.Index("saved_message_message_idx", "app_id", "message_id", "created_by_role", "created_by"),
|
||||
sa.Index("saved_message_message_id_idx", "message_id"),
|
||||
)
|
||||
|
||||
id: Mapped[str] = mapped_column(
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[project]
|
||||
name = "dify-api"
|
||||
version = "1.12.1"
|
||||
version = "1.13.0"
|
||||
requires-python = ">=3.11,<3.13"
|
||||
|
||||
dependencies = [
|
||||
|
||||
@@ -264,9 +264,15 @@ class APIWorkflowRunRepository(WorkflowExecutionRepository, Protocol):
|
||||
batch_size: int,
|
||||
run_types: Sequence[WorkflowType] | None = None,
|
||||
tenant_ids: Sequence[str] | None = None,
|
||||
workflow_ids: Sequence[str] | None = None,
|
||||
) -> Sequence[WorkflowRun]:
|
||||
"""
|
||||
Fetch ended workflow runs in a time window for archival and clean batching.
|
||||
|
||||
Optional filters:
|
||||
- run_types
|
||||
- tenant_ids
|
||||
- workflow_ids
|
||||
"""
|
||||
...
|
||||
|
||||
|
||||
@@ -386,6 +386,7 @@ class DifyAPISQLAlchemyWorkflowRunRepository(APIWorkflowRunRepository):
|
||||
batch_size: int,
|
||||
run_types: Sequence[WorkflowType] | None = None,
|
||||
tenant_ids: Sequence[str] | None = None,
|
||||
workflow_ids: Sequence[str] | None = None,
|
||||
) -> Sequence[WorkflowRun]:
|
||||
"""
|
||||
Fetch ended workflow runs in a time window for archival and clean batching.
|
||||
@@ -394,7 +395,7 @@ class DifyAPISQLAlchemyWorkflowRunRepository(APIWorkflowRunRepository):
|
||||
- created_at in [start_from, end_before)
|
||||
- type in run_types (when provided)
|
||||
- status is an ended state
|
||||
- optional tenant_id filter and cursor (last_seen) for pagination
|
||||
- optional tenant_id, workflow_id filters and cursor (last_seen) for pagination
|
||||
"""
|
||||
with self._session_maker() as session:
|
||||
stmt = (
|
||||
@@ -417,6 +418,9 @@ class DifyAPISQLAlchemyWorkflowRunRepository(APIWorkflowRunRepository):
|
||||
if tenant_ids:
|
||||
stmt = stmt.where(WorkflowRun.tenant_id.in_(tenant_ids))
|
||||
|
||||
if workflow_ids:
|
||||
stmt = stmt.where(WorkflowRun.workflow_id.in_(workflow_ids))
|
||||
|
||||
if last_seen:
|
||||
stmt = stmt.where(
|
||||
or_(
|
||||
|
||||
@@ -4,7 +4,6 @@ import time
|
||||
from collections.abc import Sequence
|
||||
|
||||
import click
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.orm import Session, sessionmaker
|
||||
|
||||
import app
|
||||
@@ -13,6 +12,7 @@ from extensions.ext_database import db
|
||||
from models.model import (
|
||||
AppAnnotationHitHistory,
|
||||
Conversation,
|
||||
DatasetRetrieverResource,
|
||||
Message,
|
||||
MessageAgentThought,
|
||||
MessageAnnotation,
|
||||
@@ -20,7 +20,10 @@ from models.model import (
|
||||
MessageFeedback,
|
||||
MessageFile,
|
||||
)
|
||||
from models.workflow import ConversationVariable, WorkflowAppLog, WorkflowNodeExecutionModel, WorkflowRun
|
||||
from models.web import SavedMessage
|
||||
from models.workflow import ConversationVariable, WorkflowRun
|
||||
from repositories.factory import DifyAPIRepositoryFactory
|
||||
from repositories.sqlalchemy_workflow_trigger_log_repository import SQLAlchemyWorkflowTriggerLogRepository
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -29,8 +32,15 @@ MAX_RETRIES = 3
|
||||
BATCH_SIZE = dify_config.WORKFLOW_LOG_CLEANUP_BATCH_SIZE
|
||||
|
||||
|
||||
@app.celery.task(queue="dataset")
|
||||
def clean_workflow_runlogs_precise():
|
||||
def _get_specific_workflow_ids() -> list[str]:
|
||||
workflow_ids_str = dify_config.WORKFLOW_LOG_CLEANUP_SPECIFIC_WORKFLOW_IDS.strip()
|
||||
if not workflow_ids_str:
|
||||
return []
|
||||
return [wid.strip() for wid in workflow_ids_str.split(",") if wid.strip()]
|
||||
|
||||
|
||||
@app.celery.task(queue="retention")
|
||||
def clean_workflow_runlogs_precise() -> None:
|
||||
"""Clean expired workflow run logs with retry mechanism and complete message cascade"""
|
||||
|
||||
click.echo(click.style("Start clean workflow run logs (precise mode with complete cascade).", fg="green"))
|
||||
@@ -39,48 +49,48 @@ def clean_workflow_runlogs_precise():
|
||||
retention_days = dify_config.WORKFLOW_LOG_RETENTION_DAYS
|
||||
cutoff_date = datetime.datetime.now() - datetime.timedelta(days=retention_days)
|
||||
session_factory = sessionmaker(db.engine, expire_on_commit=False)
|
||||
workflow_run_repo = DifyAPIRepositoryFactory.create_api_workflow_run_repository(session_factory)
|
||||
workflow_ids = _get_specific_workflow_ids()
|
||||
workflow_ids_filter = workflow_ids or None
|
||||
|
||||
try:
|
||||
with session_factory.begin() as session:
|
||||
total_workflow_runs = session.query(WorkflowRun).where(WorkflowRun.created_at < cutoff_date).count()
|
||||
if total_workflow_runs == 0:
|
||||
logger.info("No expired workflow run logs found")
|
||||
return
|
||||
logger.info("Found %s expired workflow run logs to clean", total_workflow_runs)
|
||||
|
||||
total_deleted = 0
|
||||
failed_batches = 0
|
||||
batch_count = 0
|
||||
last_seen: tuple[datetime.datetime, str] | None = None
|
||||
while True:
|
||||
run_rows = workflow_run_repo.get_runs_batch_by_time_range(
|
||||
start_from=None,
|
||||
end_before=cutoff_date,
|
||||
last_seen=last_seen,
|
||||
batch_size=BATCH_SIZE,
|
||||
workflow_ids=workflow_ids_filter,
|
||||
)
|
||||
|
||||
if not run_rows:
|
||||
if batch_count == 0:
|
||||
logger.info("No expired workflow run logs found")
|
||||
break
|
||||
|
||||
last_seen = (run_rows[-1].created_at, run_rows[-1].id)
|
||||
batch_count += 1
|
||||
with session_factory.begin() as session:
|
||||
workflow_run_ids = session.scalars(
|
||||
select(WorkflowRun.id)
|
||||
.where(WorkflowRun.created_at < cutoff_date)
|
||||
.order_by(WorkflowRun.created_at, WorkflowRun.id)
|
||||
.limit(BATCH_SIZE)
|
||||
).all()
|
||||
success = _delete_batch(session, workflow_run_repo, run_rows, failed_batches)
|
||||
|
||||
if not workflow_run_ids:
|
||||
if success:
|
||||
total_deleted += len(run_rows)
|
||||
failed_batches = 0
|
||||
else:
|
||||
failed_batches += 1
|
||||
if failed_batches >= MAX_RETRIES:
|
||||
logger.error("Failed to delete batch after %s retries, aborting cleanup for today", MAX_RETRIES)
|
||||
break
|
||||
|
||||
batch_count += 1
|
||||
|
||||
success = _delete_batch(session, workflow_run_ids, failed_batches)
|
||||
|
||||
if success:
|
||||
total_deleted += len(workflow_run_ids)
|
||||
failed_batches = 0
|
||||
else:
|
||||
failed_batches += 1
|
||||
if failed_batches >= MAX_RETRIES:
|
||||
logger.error("Failed to delete batch after %s retries, aborting cleanup for today", MAX_RETRIES)
|
||||
break
|
||||
else:
|
||||
# Calculate incremental delay times: 5, 10, 15 minutes
|
||||
retry_delay_minutes = failed_batches * 5
|
||||
logger.warning("Batch deletion failed, retrying in %s minutes...", retry_delay_minutes)
|
||||
time.sleep(retry_delay_minutes * 60)
|
||||
continue
|
||||
# Calculate incremental delay times: 5, 10, 15 minutes
|
||||
retry_delay_minutes = failed_batches * 5
|
||||
logger.warning("Batch deletion failed, retrying in %s minutes...", retry_delay_minutes)
|
||||
time.sleep(retry_delay_minutes * 60)
|
||||
continue
|
||||
|
||||
logger.info("Cleanup completed: %s expired workflow run logs deleted", total_deleted)
|
||||
|
||||
@@ -93,10 +103,16 @@ def clean_workflow_runlogs_precise():
|
||||
click.echo(click.style(f"Cleaned workflow run logs from db success latency: {execution_time:.2f}s", fg="green"))
|
||||
|
||||
|
||||
def _delete_batch(session: Session, workflow_run_ids: Sequence[str], attempt_count: int) -> bool:
|
||||
def _delete_batch(
|
||||
session: Session,
|
||||
workflow_run_repo,
|
||||
workflow_runs: Sequence[WorkflowRun],
|
||||
attempt_count: int,
|
||||
) -> bool:
|
||||
"""Delete a single batch of workflow runs and all related data within a nested transaction."""
|
||||
try:
|
||||
with session.begin_nested():
|
||||
workflow_run_ids = [run.id for run in workflow_runs]
|
||||
message_data = (
|
||||
session.query(Message.id, Message.conversation_id)
|
||||
.where(Message.workflow_run_id.in_(workflow_run_ids))
|
||||
@@ -107,11 +123,13 @@ def _delete_batch(session: Session, workflow_run_ids: Sequence[str], attempt_cou
|
||||
if message_id_list:
|
||||
message_related_models = [
|
||||
AppAnnotationHitHistory,
|
||||
DatasetRetrieverResource,
|
||||
MessageAgentThought,
|
||||
MessageChain,
|
||||
MessageFile,
|
||||
MessageAnnotation,
|
||||
MessageFeedback,
|
||||
SavedMessage,
|
||||
]
|
||||
for model in message_related_models:
|
||||
session.query(model).where(model.message_id.in_(message_id_list)).delete(synchronize_session=False) # type: ignore
|
||||
@@ -122,14 +140,6 @@ def _delete_batch(session: Session, workflow_run_ids: Sequence[str], attempt_cou
|
||||
synchronize_session=False
|
||||
)
|
||||
|
||||
session.query(WorkflowAppLog).where(WorkflowAppLog.workflow_run_id.in_(workflow_run_ids)).delete(
|
||||
synchronize_session=False
|
||||
)
|
||||
|
||||
session.query(WorkflowNodeExecutionModel).where(
|
||||
WorkflowNodeExecutionModel.workflow_run_id.in_(workflow_run_ids)
|
||||
).delete(synchronize_session=False)
|
||||
|
||||
if conversation_id_list:
|
||||
session.query(ConversationVariable).where(
|
||||
ConversationVariable.conversation_id.in_(conversation_id_list)
|
||||
@@ -139,7 +149,22 @@ def _delete_batch(session: Session, workflow_run_ids: Sequence[str], attempt_cou
|
||||
synchronize_session=False
|
||||
)
|
||||
|
||||
session.query(WorkflowRun).where(WorkflowRun.id.in_(workflow_run_ids)).delete(synchronize_session=False)
|
||||
def _delete_node_executions(active_session: Session, runs: Sequence[WorkflowRun]) -> tuple[int, int]:
|
||||
run_ids = [run.id for run in runs]
|
||||
repo = DifyAPIRepositoryFactory.create_api_workflow_node_execution_repository(
|
||||
session_maker=sessionmaker(bind=active_session.get_bind(), expire_on_commit=False)
|
||||
)
|
||||
return repo.delete_by_runs(active_session, run_ids)
|
||||
|
||||
def _delete_trigger_logs(active_session: Session, run_ids: Sequence[str]) -> int:
|
||||
trigger_repo = SQLAlchemyWorkflowTriggerLogRepository(active_session)
|
||||
return trigger_repo.delete_by_run_ids(run_ids)
|
||||
|
||||
workflow_run_repo.delete_runs_with_related(
|
||||
workflow_runs,
|
||||
delete_node_executions=_delete_node_executions,
|
||||
delete_trigger_logs=_delete_trigger_logs,
|
||||
)
|
||||
|
||||
return True
|
||||
|
||||
|
||||
@@ -1,10 +1,13 @@
|
||||
import datetime
|
||||
import logging
|
||||
import os
|
||||
import random
|
||||
import time
|
||||
from collections.abc import Sequence
|
||||
from typing import cast
|
||||
|
||||
from sqlalchemy import delete, select
|
||||
import sqlalchemy as sa
|
||||
from sqlalchemy import delete, select, tuple_
|
||||
from sqlalchemy.engine import CursorResult
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
@@ -193,11 +196,15 @@ class MessagesCleanService:
|
||||
self._end_before,
|
||||
)
|
||||
|
||||
max_batch_interval_ms = int(os.environ.get("SANDBOX_EXPIRED_RECORDS_CLEAN_BATCH_MAX_INTERVAL", 200))
|
||||
|
||||
while True:
|
||||
stats["batches"] += 1
|
||||
batch_start = time.monotonic()
|
||||
|
||||
# Step 1: Fetch a batch of messages using cursor
|
||||
with Session(db.engine, expire_on_commit=False) as session:
|
||||
fetch_messages_start = time.monotonic()
|
||||
msg_stmt = (
|
||||
select(Message.id, Message.app_id, Message.created_at)
|
||||
.where(Message.created_at < self._end_before)
|
||||
@@ -209,13 +216,13 @@ class MessagesCleanService:
|
||||
msg_stmt = msg_stmt.where(Message.created_at >= self._start_from)
|
||||
|
||||
# Apply cursor condition: (created_at, id) > (last_created_at, last_message_id)
|
||||
# This translates to:
|
||||
# created_at > last_created_at OR (created_at = last_created_at AND id > last_message_id)
|
||||
if _cursor:
|
||||
# Continuing from previous batch
|
||||
msg_stmt = msg_stmt.where(
|
||||
(Message.created_at > _cursor[0])
|
||||
| ((Message.created_at == _cursor[0]) & (Message.id > _cursor[1]))
|
||||
tuple_(Message.created_at, Message.id)
|
||||
> tuple_(
|
||||
sa.literal(_cursor[0], type_=sa.DateTime()),
|
||||
sa.literal(_cursor[1], type_=Message.id.type),
|
||||
)
|
||||
)
|
||||
|
||||
raw_messages = list(session.execute(msg_stmt).all())
|
||||
@@ -223,6 +230,12 @@ class MessagesCleanService:
|
||||
SimpleMessage(id=msg_id, app_id=app_id, created_at=msg_created_at)
|
||||
for msg_id, app_id, msg_created_at in raw_messages
|
||||
]
|
||||
logger.info(
|
||||
"clean_messages (batch %s): fetched %s messages in %sms",
|
||||
stats["batches"],
|
||||
len(messages),
|
||||
int((time.monotonic() - fetch_messages_start) * 1000),
|
||||
)
|
||||
|
||||
# Track total messages fetched across all batches
|
||||
stats["total_messages"] += len(messages)
|
||||
@@ -241,8 +254,16 @@ class MessagesCleanService:
|
||||
logger.info("clean_messages (batch %s): no app_ids found, skip", stats["batches"])
|
||||
continue
|
||||
|
||||
fetch_apps_start = time.monotonic()
|
||||
app_stmt = select(App.id, App.tenant_id).where(App.id.in_(app_ids))
|
||||
apps = list(session.execute(app_stmt).all())
|
||||
logger.info(
|
||||
"clean_messages (batch %s): fetched %s apps for %s app_ids in %sms",
|
||||
stats["batches"],
|
||||
len(apps),
|
||||
len(app_ids),
|
||||
int((time.monotonic() - fetch_apps_start) * 1000),
|
||||
)
|
||||
|
||||
if not apps:
|
||||
logger.info("clean_messages (batch %s): no apps found, skip", stats["batches"])
|
||||
@@ -252,7 +273,15 @@ class MessagesCleanService:
|
||||
app_to_tenant: dict[str, str] = {app.id: app.tenant_id for app in apps}
|
||||
|
||||
# Step 3: Delegate to policy to determine which messages to delete
|
||||
policy_start = time.monotonic()
|
||||
message_ids_to_delete = self._policy.filter_message_ids(messages, app_to_tenant)
|
||||
logger.info(
|
||||
"clean_messages (batch %s): policy selected %s/%s messages in %sms",
|
||||
stats["batches"],
|
||||
len(message_ids_to_delete),
|
||||
len(messages),
|
||||
int((time.monotonic() - policy_start) * 1000),
|
||||
)
|
||||
|
||||
if not message_ids_to_delete:
|
||||
logger.info("clean_messages (batch %s): no messages to delete, skip", stats["batches"])
|
||||
@@ -263,14 +292,20 @@ class MessagesCleanService:
|
||||
# Step 4: Batch delete messages and their relations
|
||||
if not self._dry_run:
|
||||
with Session(db.engine, expire_on_commit=False) as session:
|
||||
delete_relations_start = time.monotonic()
|
||||
# Delete related records first
|
||||
self._batch_delete_message_relations(session, message_ids_to_delete)
|
||||
delete_relations_ms = int((time.monotonic() - delete_relations_start) * 1000)
|
||||
|
||||
# Delete messages
|
||||
delete_messages_start = time.monotonic()
|
||||
delete_stmt = delete(Message).where(Message.id.in_(message_ids_to_delete))
|
||||
delete_result = cast(CursorResult, session.execute(delete_stmt))
|
||||
messages_deleted = delete_result.rowcount
|
||||
delete_messages_ms = int((time.monotonic() - delete_messages_start) * 1000)
|
||||
commit_start = time.monotonic()
|
||||
session.commit()
|
||||
commit_ms = int((time.monotonic() - commit_start) * 1000)
|
||||
|
||||
stats["total_deleted"] += messages_deleted
|
||||
|
||||
@@ -280,6 +315,19 @@ class MessagesCleanService:
|
||||
len(messages),
|
||||
messages_deleted,
|
||||
)
|
||||
logger.info(
|
||||
"clean_messages (batch %s): relations %sms, messages %sms, commit %sms, batch total %sms",
|
||||
stats["batches"],
|
||||
delete_relations_ms,
|
||||
delete_messages_ms,
|
||||
commit_ms,
|
||||
int((time.monotonic() - batch_start) * 1000),
|
||||
)
|
||||
|
||||
# Random sleep between batches to avoid overwhelming the database
|
||||
sleep_ms = random.uniform(0, max_batch_interval_ms) # noqa: S311
|
||||
logger.info("clean_messages (batch %s): sleeping for %.2fms", stats["batches"], sleep_ms)
|
||||
time.sleep(sleep_ms / 1000)
|
||||
else:
|
||||
# Log random sample of message IDs that would be deleted (up to 10)
|
||||
sample_size = min(10, len(message_ids_to_delete))
|
||||
|
||||
@@ -1,5 +1,8 @@
|
||||
import datetime
|
||||
import logging
|
||||
import os
|
||||
import random
|
||||
import time
|
||||
from collections.abc import Iterable, Sequence
|
||||
|
||||
import click
|
||||
@@ -72,7 +75,12 @@ class WorkflowRunCleanup:
|
||||
batch_index = 0
|
||||
last_seen: tuple[datetime.datetime, str] | None = None
|
||||
|
||||
max_batch_interval_ms = int(os.environ.get("SANDBOX_EXPIRED_RECORDS_CLEAN_BATCH_MAX_INTERVAL", 200))
|
||||
|
||||
while True:
|
||||
batch_start = time.monotonic()
|
||||
|
||||
fetch_start = time.monotonic()
|
||||
run_rows = self.workflow_run_repo.get_runs_batch_by_time_range(
|
||||
start_from=self.window_start,
|
||||
end_before=self.window_end,
|
||||
@@ -80,12 +88,30 @@ class WorkflowRunCleanup:
|
||||
batch_size=self.batch_size,
|
||||
)
|
||||
if not run_rows:
|
||||
logger.info("workflow_run_cleanup (batch #%s): no more rows to process", batch_index + 1)
|
||||
break
|
||||
|
||||
batch_index += 1
|
||||
last_seen = (run_rows[-1].created_at, run_rows[-1].id)
|
||||
logger.info(
|
||||
"workflow_run_cleanup (batch #%s): fetched %s rows in %sms",
|
||||
batch_index,
|
||||
len(run_rows),
|
||||
int((time.monotonic() - fetch_start) * 1000),
|
||||
)
|
||||
|
||||
tenant_ids = {row.tenant_id for row in run_rows}
|
||||
|
||||
filter_start = time.monotonic()
|
||||
free_tenants = self._filter_free_tenants(tenant_ids)
|
||||
logger.info(
|
||||
"workflow_run_cleanup (batch #%s): filtered %s free tenants from %s tenants in %sms",
|
||||
batch_index,
|
||||
len(free_tenants),
|
||||
len(tenant_ids),
|
||||
int((time.monotonic() - filter_start) * 1000),
|
||||
)
|
||||
|
||||
free_runs = [row for row in run_rows if row.tenant_id in free_tenants]
|
||||
paid_or_skipped = len(run_rows) - len(free_runs)
|
||||
|
||||
@@ -104,11 +130,17 @@ class WorkflowRunCleanup:
|
||||
total_runs_targeted += len(free_runs)
|
||||
|
||||
if self.dry_run:
|
||||
count_start = time.monotonic()
|
||||
batch_counts = self.workflow_run_repo.count_runs_with_related(
|
||||
free_runs,
|
||||
count_node_executions=self._count_node_executions,
|
||||
count_trigger_logs=self._count_trigger_logs,
|
||||
)
|
||||
logger.info(
|
||||
"workflow_run_cleanup (batch #%s, dry_run): counted related records in %sms",
|
||||
batch_index,
|
||||
int((time.monotonic() - count_start) * 1000),
|
||||
)
|
||||
if related_totals is not None:
|
||||
for key in related_totals:
|
||||
related_totals[key] += batch_counts.get(key, 0)
|
||||
@@ -120,14 +152,21 @@ class WorkflowRunCleanup:
|
||||
fg="yellow",
|
||||
)
|
||||
)
|
||||
logger.info(
|
||||
"workflow_run_cleanup (batch #%s, dry_run): batch total %sms",
|
||||
batch_index,
|
||||
int((time.monotonic() - batch_start) * 1000),
|
||||
)
|
||||
continue
|
||||
|
||||
try:
|
||||
delete_start = time.monotonic()
|
||||
counts = self.workflow_run_repo.delete_runs_with_related(
|
||||
free_runs,
|
||||
delete_node_executions=self._delete_node_executions,
|
||||
delete_trigger_logs=self._delete_trigger_logs,
|
||||
)
|
||||
delete_ms = int((time.monotonic() - delete_start) * 1000)
|
||||
except Exception:
|
||||
logger.exception("Failed to delete workflow runs batch ending at %s", last_seen[0])
|
||||
raise
|
||||
@@ -143,6 +182,17 @@ class WorkflowRunCleanup:
|
||||
fg="green",
|
||||
)
|
||||
)
|
||||
logger.info(
|
||||
"workflow_run_cleanup (batch #%s): delete %sms, batch total %sms",
|
||||
batch_index,
|
||||
delete_ms,
|
||||
int((time.monotonic() - batch_start) * 1000),
|
||||
)
|
||||
|
||||
# Random sleep between batches to avoid overwhelming the database
|
||||
sleep_ms = random.uniform(0, max_batch_interval_ms) # noqa: S311
|
||||
logger.info("workflow_run_cleanup (batch #%s): sleeping for %.2fms", batch_index, sleep_ms)
|
||||
time.sleep(sleep_ms / 1000)
|
||||
|
||||
if self.dry_run:
|
||||
if self.window_start:
|
||||
|
||||
@@ -1,92 +1,286 @@
|
||||
import builtins
|
||||
"""Tests for remote file upload API endpoints using Flask-RESTX."""
|
||||
|
||||
import contextlib
|
||||
from datetime import datetime
|
||||
from types import SimpleNamespace
|
||||
from unittest.mock import patch
|
||||
from unittest.mock import Mock, patch
|
||||
|
||||
import httpx
|
||||
import pytest
|
||||
from flask import Flask
|
||||
from flask.views import MethodView
|
||||
|
||||
from extensions import ext_fastopenapi
|
||||
|
||||
if not hasattr(builtins, "MethodView"):
|
||||
builtins.MethodView = MethodView # type: ignore[attr-defined]
|
||||
from flask import Flask, g
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def app() -> Flask:
|
||||
"""Create Flask app for testing."""
|
||||
app = Flask(__name__)
|
||||
app.config["TESTING"] = True
|
||||
app.config["SECRET_KEY"] = "test-secret-key"
|
||||
return app
|
||||
|
||||
|
||||
def test_console_remote_files_fastopenapi_get_info(app: Flask):
|
||||
ext_fastopenapi.init_app(app)
|
||||
@pytest.fixture
|
||||
def client(app):
|
||||
"""Create test client with console blueprint registered."""
|
||||
from controllers.console import bp
|
||||
|
||||
response = httpx.Response(
|
||||
200,
|
||||
request=httpx.Request("HEAD", "http://example.com/file.txt"),
|
||||
headers={"Content-Type": "text/plain", "Content-Length": "10"},
|
||||
)
|
||||
|
||||
with patch("controllers.console.remote_files.ssrf_proxy.head", return_value=response):
|
||||
client = app.test_client()
|
||||
encoded_url = "http%3A%2F%2Fexample.com%2Ffile.txt"
|
||||
resp = client.get(f"/console/api/remote-files/{encoded_url}")
|
||||
|
||||
assert resp.status_code == 200
|
||||
assert resp.get_json() == {"file_type": "text/plain", "file_length": 10}
|
||||
app.register_blueprint(bp)
|
||||
return app.test_client()
|
||||
|
||||
|
||||
def test_console_remote_files_fastopenapi_upload(app: Flask):
|
||||
ext_fastopenapi.init_app(app)
|
||||
@pytest.fixture
|
||||
def mock_account():
|
||||
"""Create a mock account for testing."""
|
||||
from models import Account
|
||||
|
||||
head_response = httpx.Response(
|
||||
200,
|
||||
request=httpx.Request("GET", "http://example.com/file.txt"),
|
||||
content=b"hello",
|
||||
)
|
||||
file_info = SimpleNamespace(
|
||||
extension="txt",
|
||||
size=5,
|
||||
filename="file.txt",
|
||||
mimetype="text/plain",
|
||||
)
|
||||
uploaded = SimpleNamespace(
|
||||
id="file-id",
|
||||
name="file.txt",
|
||||
size=5,
|
||||
extension="txt",
|
||||
mime_type="text/plain",
|
||||
created_by="user-id",
|
||||
created_at=datetime(2024, 1, 1),
|
||||
)
|
||||
account = Mock(spec=Account)
|
||||
account.id = "test-account-id"
|
||||
account.current_tenant_id = "test-tenant-id"
|
||||
return account
|
||||
|
||||
with (
|
||||
patch("controllers.console.remote_files.db", new=SimpleNamespace(engine=object())),
|
||||
patch("controllers.console.remote_files.ssrf_proxy.head", return_value=head_response),
|
||||
patch("controllers.console.remote_files.helpers.guess_file_info_from_response", return_value=file_info),
|
||||
patch("controllers.console.remote_files.FileService.is_file_size_within_limit", return_value=True),
|
||||
patch("controllers.console.remote_files.FileService.__init__", return_value=None),
|
||||
patch("controllers.console.remote_files.current_account_with_tenant", return_value=(object(), "tenant-id")),
|
||||
patch("controllers.console.remote_files.FileService.upload_file", return_value=uploaded),
|
||||
patch("controllers.console.remote_files.file_helpers.get_signed_file_url", return_value="signed-url"),
|
||||
):
|
||||
client = app.test_client()
|
||||
resp = client.post(
|
||||
"/console/api/remote-files/upload",
|
||||
json={"url": "http://example.com/file.txt"},
|
||||
|
||||
@pytest.fixture
|
||||
def auth_ctx(app, mock_account):
|
||||
"""Context manager to set auth/tenant context in flask.g for a request."""
|
||||
|
||||
@contextlib.contextmanager
|
||||
def _ctx():
|
||||
with app.test_request_context():
|
||||
g._login_user = mock_account
|
||||
g._current_tenant = mock_account.current_tenant_id
|
||||
yield
|
||||
|
||||
return _ctx
|
||||
|
||||
|
||||
class TestGetRemoteFileInfo:
|
||||
"""Test GET /console/api/remote-files/<path:url> endpoint."""
|
||||
|
||||
def test_get_remote_file_info_success(self, app, client, mock_account):
|
||||
"""Test successful retrieval of remote file info."""
|
||||
response = httpx.Response(
|
||||
200,
|
||||
request=httpx.Request("HEAD", "http://example.com/file.txt"),
|
||||
headers={"Content-Type": "text/plain", "Content-Length": "1024"},
|
||||
)
|
||||
|
||||
assert resp.status_code == 201
|
||||
assert resp.get_json() == {
|
||||
"id": "file-id",
|
||||
"name": "file.txt",
|
||||
"size": 5,
|
||||
"extension": "txt",
|
||||
"url": "signed-url",
|
||||
"mime_type": "text/plain",
|
||||
"created_by": "user-id",
|
||||
"created_at": int(uploaded.created_at.timestamp()),
|
||||
}
|
||||
with (
|
||||
patch(
|
||||
"controllers.console.remote_files.current_account_with_tenant",
|
||||
return_value=(mock_account, "test-tenant-id"),
|
||||
),
|
||||
patch("controllers.console.remote_files.ssrf_proxy.head", return_value=response),
|
||||
patch("libs.login.check_csrf_token", return_value=None),
|
||||
):
|
||||
with app.test_request_context():
|
||||
g._login_user = mock_account
|
||||
g._current_tenant = mock_account.current_tenant_id
|
||||
encoded_url = "http%3A%2F%2Fexample.com%2Ffile.txt"
|
||||
resp = client.get(f"/console/api/remote-files/{encoded_url}")
|
||||
|
||||
assert resp.status_code == 200
|
||||
data = resp.get_json()
|
||||
assert data["file_type"] == "text/plain"
|
||||
assert data["file_length"] == 1024
|
||||
|
||||
def test_get_remote_file_info_fallback_to_get_on_head_failure(self, app, client, mock_account):
|
||||
"""Test fallback to GET when HEAD returns non-200 status."""
|
||||
head_response = httpx.Response(
|
||||
404,
|
||||
request=httpx.Request("HEAD", "http://example.com/file.pdf"),
|
||||
)
|
||||
get_response = httpx.Response(
|
||||
200,
|
||||
request=httpx.Request("GET", "http://example.com/file.pdf"),
|
||||
headers={"Content-Type": "application/pdf", "Content-Length": "2048"},
|
||||
)
|
||||
|
||||
with (
|
||||
patch(
|
||||
"controllers.console.remote_files.current_account_with_tenant",
|
||||
return_value=(mock_account, "test-tenant-id"),
|
||||
),
|
||||
patch("controllers.console.remote_files.ssrf_proxy.head", return_value=head_response),
|
||||
patch("controllers.console.remote_files.ssrf_proxy.get", return_value=get_response),
|
||||
patch("libs.login.check_csrf_token", return_value=None),
|
||||
):
|
||||
with app.test_request_context():
|
||||
g._login_user = mock_account
|
||||
g._current_tenant = mock_account.current_tenant_id
|
||||
encoded_url = "http%3A%2F%2Fexample.com%2Ffile.pdf"
|
||||
resp = client.get(f"/console/api/remote-files/{encoded_url}")
|
||||
|
||||
assert resp.status_code == 200
|
||||
data = resp.get_json()
|
||||
assert data["file_type"] == "application/pdf"
|
||||
assert data["file_length"] == 2048
|
||||
|
||||
|
||||
class TestRemoteFileUpload:
|
||||
"""Test POST /console/api/remote-files/upload endpoint."""
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
("head_status", "use_get"),
|
||||
[
|
||||
(200, False), # HEAD succeeds
|
||||
(405, True), # HEAD fails -> fallback GET
|
||||
],
|
||||
)
|
||||
def test_upload_remote_file_success_paths(self, client, mock_account, auth_ctx, head_status, use_get):
|
||||
url = "http://example.com/file.pdf"
|
||||
head_resp = httpx.Response(
|
||||
head_status,
|
||||
request=httpx.Request("HEAD", url),
|
||||
headers={"Content-Type": "application/pdf", "Content-Length": "1024"},
|
||||
)
|
||||
get_resp = httpx.Response(
|
||||
200,
|
||||
request=httpx.Request("GET", url),
|
||||
headers={"Content-Type": "application/pdf", "Content-Length": "1024"},
|
||||
content=b"file content",
|
||||
)
|
||||
|
||||
file_info = SimpleNamespace(
|
||||
extension="pdf",
|
||||
size=1024,
|
||||
filename="file.pdf",
|
||||
mimetype="application/pdf",
|
||||
)
|
||||
uploaded_file = SimpleNamespace(
|
||||
id="uploaded-file-id",
|
||||
name="file.pdf",
|
||||
size=1024,
|
||||
extension="pdf",
|
||||
mime_type="application/pdf",
|
||||
created_by="test-account-id",
|
||||
created_at=datetime(2024, 1, 1, 12, 0, 0),
|
||||
)
|
||||
|
||||
with (
|
||||
patch(
|
||||
"controllers.console.remote_files.current_account_with_tenant",
|
||||
return_value=(mock_account, "test-tenant-id"),
|
||||
),
|
||||
patch("controllers.console.remote_files.ssrf_proxy.head", return_value=head_resp) as p_head,
|
||||
patch("controllers.console.remote_files.ssrf_proxy.get", return_value=get_resp) as p_get,
|
||||
patch(
|
||||
"controllers.console.remote_files.helpers.guess_file_info_from_response",
|
||||
return_value=file_info,
|
||||
),
|
||||
patch(
|
||||
"controllers.console.remote_files.FileService.is_file_size_within_limit",
|
||||
return_value=True,
|
||||
),
|
||||
patch("controllers.console.remote_files.db", spec=["engine"]),
|
||||
patch("controllers.console.remote_files.FileService") as mock_file_service,
|
||||
patch(
|
||||
"controllers.console.remote_files.file_helpers.get_signed_file_url",
|
||||
return_value="http://example.com/signed-url",
|
||||
),
|
||||
patch("libs.login.check_csrf_token", return_value=None),
|
||||
):
|
||||
mock_file_service.return_value.upload_file.return_value = uploaded_file
|
||||
|
||||
with auth_ctx():
|
||||
resp = client.post(
|
||||
"/console/api/remote-files/upload",
|
||||
json={"url": url},
|
||||
)
|
||||
|
||||
assert resp.status_code == 201
|
||||
p_head.assert_called_once()
|
||||
# GET is used either for fallback (HEAD fails) or to fetch content after HEAD succeeds
|
||||
p_get.assert_called_once()
|
||||
mock_file_service.return_value.upload_file.assert_called_once()
|
||||
|
||||
data = resp.get_json()
|
||||
assert data["id"] == "uploaded-file-id"
|
||||
assert data["name"] == "file.pdf"
|
||||
assert data["size"] == 1024
|
||||
assert data["extension"] == "pdf"
|
||||
assert data["url"] == "http://example.com/signed-url"
|
||||
assert data["mime_type"] == "application/pdf"
|
||||
assert data["created_by"] == "test-account-id"
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
("size_ok", "raises", "expected_status", "expected_msg"),
|
||||
[
|
||||
# When size check fails in controller, API returns 413 with message "File size exceeded..."
|
||||
(False, None, 413, "file size exceeded"),
|
||||
# When service raises unsupported type, controller maps to 415 with message "File type not allowed."
|
||||
(True, "unsupported", 415, "file type not allowed"),
|
||||
],
|
||||
)
|
||||
def test_upload_remote_file_errors(
|
||||
self, client, mock_account, auth_ctx, size_ok, raises, expected_status, expected_msg
|
||||
):
|
||||
url = "http://example.com/x.pdf"
|
||||
head_resp = httpx.Response(
|
||||
200,
|
||||
request=httpx.Request("HEAD", url),
|
||||
headers={"Content-Type": "application/pdf", "Content-Length": "9"},
|
||||
)
|
||||
file_info = SimpleNamespace(extension="pdf", size=9, filename="x.pdf", mimetype="application/pdf")
|
||||
|
||||
with (
|
||||
patch(
|
||||
"controllers.console.remote_files.current_account_with_tenant",
|
||||
return_value=(mock_account, "test-tenant-id"),
|
||||
),
|
||||
patch("controllers.console.remote_files.ssrf_proxy.head", return_value=head_resp),
|
||||
patch(
|
||||
"controllers.console.remote_files.helpers.guess_file_info_from_response",
|
||||
return_value=file_info,
|
||||
),
|
||||
patch(
|
||||
"controllers.console.remote_files.FileService.is_file_size_within_limit",
|
||||
return_value=size_ok,
|
||||
),
|
||||
patch("controllers.console.remote_files.db", spec=["engine"]),
|
||||
patch("libs.login.check_csrf_token", return_value=None),
|
||||
):
|
||||
if raises == "unsupported":
|
||||
from services.errors.file import UnsupportedFileTypeError
|
||||
|
||||
with patch("controllers.console.remote_files.FileService") as mock_file_service:
|
||||
mock_file_service.return_value.upload_file.side_effect = UnsupportedFileTypeError("bad")
|
||||
with auth_ctx():
|
||||
resp = client.post(
|
||||
"/console/api/remote-files/upload",
|
||||
json={"url": url},
|
||||
)
|
||||
else:
|
||||
with auth_ctx():
|
||||
resp = client.post(
|
||||
"/console/api/remote-files/upload",
|
||||
json={"url": url},
|
||||
)
|
||||
|
||||
assert resp.status_code == expected_status
|
||||
data = resp.get_json()
|
||||
msg = (data.get("error") or {}).get("message") or data.get("message", "")
|
||||
assert expected_msg in msg.lower()
|
||||
|
||||
def test_upload_remote_file_fetch_failure(self, client, mock_account, auth_ctx):
|
||||
"""Test upload when fetching of remote file fails."""
|
||||
with (
|
||||
patch(
|
||||
"controllers.console.remote_files.current_account_with_tenant",
|
||||
return_value=(mock_account, "test-tenant-id"),
|
||||
),
|
||||
patch(
|
||||
"controllers.console.remote_files.ssrf_proxy.head",
|
||||
side_effect=httpx.RequestError("Connection failed"),
|
||||
),
|
||||
patch("libs.login.check_csrf_token", return_value=None),
|
||||
):
|
||||
with auth_ctx():
|
||||
resp = client.post(
|
||||
"/console/api/remote-files/upload",
|
||||
json={"url": "http://unreachable.com/file.pdf"},
|
||||
)
|
||||
|
||||
assert resp.status_code == 400
|
||||
data = resp.get_json()
|
||||
msg = (data.get("error") or {}).get("message") or data.get("message", "")
|
||||
assert "failed to fetch" in msg.lower()
|
||||
|
||||
@@ -496,6 +496,9 @@ class TestSchemaResolverClass:
|
||||
avg_time_no_cache = sum(results1) / len(results1)
|
||||
|
||||
# Second run (with cache) - run multiple times
|
||||
# Warm up cache first
|
||||
resolve_dify_schema_refs(schema)
|
||||
|
||||
results2 = []
|
||||
for _ in range(3):
|
||||
start = time.perf_counter()
|
||||
|
||||
@@ -62,6 +62,9 @@ class FakeRepo:
|
||||
end_before: datetime.datetime,
|
||||
last_seen: tuple[datetime.datetime, str] | None,
|
||||
batch_size: int,
|
||||
run_types=None,
|
||||
tenant_ids=None,
|
||||
workflow_ids=None,
|
||||
) -> list[FakeRun]:
|
||||
if self.call_idx >= len(self.batches):
|
||||
return []
|
||||
|
||||
2
api/uv.lock
generated
2
api/uv.lock
generated
@@ -1366,7 +1366,7 @@ wheels = [
|
||||
|
||||
[[package]]
|
||||
name = "dify-api"
|
||||
version = "1.12.1"
|
||||
version = "1.13.0"
|
||||
source = { virtual = "." }
|
||||
dependencies = [
|
||||
{ name = "aliyun-log-python-sdk" },
|
||||
|
||||
@@ -1073,6 +1073,8 @@ WORKFLOW_LOG_CLEANUP_ENABLED=false
|
||||
WORKFLOW_LOG_RETENTION_DAYS=30
|
||||
# Batch size for workflow log cleanup operations (default: 100)
|
||||
WORKFLOW_LOG_CLEANUP_BATCH_SIZE=100
|
||||
# Comma-separated list of workflow IDs to clean logs for
|
||||
WORKFLOW_LOG_CLEANUP_SPECIFIC_WORKFLOW_IDS=
|
||||
|
||||
# Aliyun SLS Logstore Configuration
|
||||
# Aliyun Access Key ID
|
||||
@@ -1523,6 +1525,7 @@ AMPLITUDE_API_KEY=
|
||||
# Sandbox expired records clean configuration
|
||||
SANDBOX_EXPIRED_RECORDS_CLEAN_GRACEFUL_PERIOD=21
|
||||
SANDBOX_EXPIRED_RECORDS_CLEAN_BATCH_SIZE=1000
|
||||
SANDBOX_EXPIRED_RECORDS_CLEAN_BATCH_MAX_INTERVAL=200
|
||||
SANDBOX_EXPIRED_RECORDS_RETENTION_DAYS=30
|
||||
|
||||
|
||||
|
||||
@@ -21,7 +21,7 @@ services:
|
||||
|
||||
# API service
|
||||
api:
|
||||
image: langgenius/dify-api:1.12.1
|
||||
image: langgenius/dify-api:1.13.0
|
||||
restart: always
|
||||
environment:
|
||||
# Use the shared environment variables.
|
||||
@@ -63,7 +63,7 @@ services:
|
||||
# worker service
|
||||
# The Celery worker for processing all queues (dataset, workflow, mail, etc.)
|
||||
worker:
|
||||
image: langgenius/dify-api:1.12.1
|
||||
image: langgenius/dify-api:1.13.0
|
||||
restart: always
|
||||
environment:
|
||||
# Use the shared environment variables.
|
||||
@@ -102,7 +102,7 @@ services:
|
||||
# worker_beat service
|
||||
# Celery beat for scheduling periodic tasks.
|
||||
worker_beat:
|
||||
image: langgenius/dify-api:1.12.1
|
||||
image: langgenius/dify-api:1.13.0
|
||||
restart: always
|
||||
environment:
|
||||
# Use the shared environment variables.
|
||||
@@ -132,7 +132,7 @@ services:
|
||||
|
||||
# Frontend web application.
|
||||
web:
|
||||
image: langgenius/dify-web:1.12.1
|
||||
image: langgenius/dify-web:1.13.0
|
||||
restart: always
|
||||
environment:
|
||||
CONSOLE_API_URL: ${CONSOLE_API_URL:-}
|
||||
|
||||
@@ -470,6 +470,7 @@ x-shared-env: &shared-api-worker-env
|
||||
WORKFLOW_LOG_CLEANUP_ENABLED: ${WORKFLOW_LOG_CLEANUP_ENABLED:-false}
|
||||
WORKFLOW_LOG_RETENTION_DAYS: ${WORKFLOW_LOG_RETENTION_DAYS:-30}
|
||||
WORKFLOW_LOG_CLEANUP_BATCH_SIZE: ${WORKFLOW_LOG_CLEANUP_BATCH_SIZE:-100}
|
||||
WORKFLOW_LOG_CLEANUP_SPECIFIC_WORKFLOW_IDS: ${WORKFLOW_LOG_CLEANUP_SPECIFIC_WORKFLOW_IDS:-}
|
||||
ALIYUN_SLS_ACCESS_KEY_ID: ${ALIYUN_SLS_ACCESS_KEY_ID:-}
|
||||
ALIYUN_SLS_ACCESS_KEY_SECRET: ${ALIYUN_SLS_ACCESS_KEY_SECRET:-}
|
||||
ALIYUN_SLS_ENDPOINT: ${ALIYUN_SLS_ENDPOINT:-}
|
||||
@@ -684,6 +685,7 @@ x-shared-env: &shared-api-worker-env
|
||||
AMPLITUDE_API_KEY: ${AMPLITUDE_API_KEY:-}
|
||||
SANDBOX_EXPIRED_RECORDS_CLEAN_GRACEFUL_PERIOD: ${SANDBOX_EXPIRED_RECORDS_CLEAN_GRACEFUL_PERIOD:-21}
|
||||
SANDBOX_EXPIRED_RECORDS_CLEAN_BATCH_SIZE: ${SANDBOX_EXPIRED_RECORDS_CLEAN_BATCH_SIZE:-1000}
|
||||
SANDBOX_EXPIRED_RECORDS_CLEAN_BATCH_MAX_INTERVAL: ${SANDBOX_EXPIRED_RECORDS_CLEAN_BATCH_MAX_INTERVAL:-200}
|
||||
SANDBOX_EXPIRED_RECORDS_RETENTION_DAYS: ${SANDBOX_EXPIRED_RECORDS_RETENTION_DAYS:-30}
|
||||
PUBSUB_REDIS_URL: ${PUBSUB_REDIS_URL:-}
|
||||
PUBSUB_REDIS_CHANNEL_TYPE: ${PUBSUB_REDIS_CHANNEL_TYPE:-pubsub}
|
||||
@@ -714,7 +716,7 @@ services:
|
||||
|
||||
# API service
|
||||
api:
|
||||
image: langgenius/dify-api:1.12.1
|
||||
image: langgenius/dify-api:1.13.0
|
||||
restart: always
|
||||
environment:
|
||||
# Use the shared environment variables.
|
||||
@@ -756,7 +758,7 @@ services:
|
||||
# worker service
|
||||
# The Celery worker for processing all queues (dataset, workflow, mail, etc.)
|
||||
worker:
|
||||
image: langgenius/dify-api:1.12.1
|
||||
image: langgenius/dify-api:1.13.0
|
||||
restart: always
|
||||
environment:
|
||||
# Use the shared environment variables.
|
||||
@@ -795,7 +797,7 @@ services:
|
||||
# worker_beat service
|
||||
# Celery beat for scheduling periodic tasks.
|
||||
worker_beat:
|
||||
image: langgenius/dify-api:1.12.1
|
||||
image: langgenius/dify-api:1.13.0
|
||||
restart: always
|
||||
environment:
|
||||
# Use the shared environment variables.
|
||||
@@ -825,7 +827,7 @@ services:
|
||||
|
||||
# Frontend web application.
|
||||
web:
|
||||
image: langgenius/dify-web:1.12.1
|
||||
image: langgenius/dify-web:1.13.0
|
||||
restart: always
|
||||
environment:
|
||||
CONSOLE_API_URL: ${CONSOLE_API_URL:-}
|
||||
|
||||
@@ -104,7 +104,7 @@ const MembersPage = () => {
|
||||
<UpgradeBtn className="mr-2" loc="member-invite" />
|
||||
)}
|
||||
<div className="shrink-0">
|
||||
<InviteButton disabled={!isCurrentWorkspaceManager || isMemberFull} onClick={() => setInviteModalVisible(true)} />
|
||||
{isCurrentWorkspaceManager && <InviteButton disabled={isMemberFull} onClick={() => setInviteModalVisible(true)} />}
|
||||
</div>
|
||||
</div>
|
||||
<div className="overflow-visible lg:overflow-visible">
|
||||
|
||||
@@ -92,8 +92,10 @@ vi.mock('@/service/workflow', () => ({
|
||||
}))
|
||||
|
||||
const mockInvalidAllLastRun = vi.fn()
|
||||
const mockInvalidateRunHistory = vi.fn()
|
||||
vi.mock('@/service/use-workflow', () => ({
|
||||
useInvalidAllLastRun: () => mockInvalidAllLastRun,
|
||||
useInvalidateWorkflowRunHistory: () => mockInvalidateRunHistory,
|
||||
}))
|
||||
|
||||
// Mock FlowType
|
||||
@@ -472,6 +474,7 @@ describe('usePipelineRun', () => {
|
||||
})
|
||||
|
||||
expect(onWorkflowStarted).toHaveBeenCalledWith({ task_id: 'task-1' })
|
||||
expect(mockInvalidateRunHistory).toHaveBeenCalled()
|
||||
})
|
||||
|
||||
it('should call onWorkflowFinished callback when provided', async () => {
|
||||
@@ -493,6 +496,7 @@ describe('usePipelineRun', () => {
|
||||
})
|
||||
|
||||
expect(onWorkflowFinished).toHaveBeenCalledWith({ status: 'succeeded' })
|
||||
expect(mockInvalidateRunHistory).toHaveBeenCalled()
|
||||
})
|
||||
|
||||
it('should call onError callback when provided', async () => {
|
||||
@@ -514,6 +518,7 @@ describe('usePipelineRun', () => {
|
||||
})
|
||||
|
||||
expect(onError).toHaveBeenCalledWith({ message: 'error' })
|
||||
expect(mockInvalidateRunHistory).toHaveBeenCalled()
|
||||
})
|
||||
|
||||
it('should call onNodeStarted callback when provided', async () => {
|
||||
|
||||
@@ -2299,7 +2299,7 @@
|
||||
},
|
||||
"app/components/base/markdown-blocks/code-block.tsx": {
|
||||
"react-hooks-extra/no-direct-set-state-in-use-effect": {
|
||||
"count": 10
|
||||
"count": 7
|
||||
},
|
||||
"tailwindcss/enforce-consistent-class-order": {
|
||||
"count": 1
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
{
|
||||
"name": "dify-web",
|
||||
"type": "module",
|
||||
"version": "1.12.1",
|
||||
"version": "1.13.0",
|
||||
"private": true,
|
||||
"packageManager": "pnpm@10.27.0+sha512.72d699da16b1179c14ba9e64dc71c9a40988cbdc65c264cb0e489db7de917f20dcf4d64d8723625f2969ba52d4b7e2a1170682d9ac2a5dcaeaab732b7e16f04a",
|
||||
"imports": {
|
||||
|
||||
Reference in New Issue
Block a user