mirror of
https://github.com/langgenius/dify.git
synced 2025-12-22 15:27:32 +00:00
feat: knowledge pipeline (#25360)
Signed-off-by: -LAN- <laipz8200@outlook.com> Co-authored-by: twwu <twwu@dify.ai> Co-authored-by: crazywoola <100913391+crazywoola@users.noreply.github.com> Co-authored-by: jyong <718720800@qq.com> Co-authored-by: Wu Tianwei <30284043+WTW0313@users.noreply.github.com> Co-authored-by: QuantumGhost <obelisk.reg+git@gmail.com> Co-authored-by: lyzno1 <yuanyouhuilyz@gmail.com> Co-authored-by: quicksand <quicksandzn@gmail.com> Co-authored-by: Jyong <76649700+JohnJyong@users.noreply.github.com> Co-authored-by: lyzno1 <92089059+lyzno1@users.noreply.github.com> Co-authored-by: zxhlyh <jasonapring2015@outlook.com> Co-authored-by: Yongtao Huang <yongtaoh2022@gmail.com> Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com> Co-authored-by: Joel <iamjoel007@gmail.com> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Co-authored-by: nite-knite <nkCoding@gmail.com> Co-authored-by: Hanqing Zhao <sherry9277@gmail.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> Co-authored-by: Harry <xh001x@hotmail.com>
This commit is contained in:
@@ -354,6 +354,11 @@ def delete_draft_variables_batch(app_id: str, batch_size: int = 1000) -> int:
|
||||
"""
|
||||
Delete draft variables for an app in batches.
|
||||
|
||||
This function now handles cleanup of associated Offload data including:
|
||||
- WorkflowDraftVariableFile records
|
||||
- UploadFile records
|
||||
- Object storage files
|
||||
|
||||
Args:
|
||||
app_id: The ID of the app whose draft variables should be deleted
|
||||
batch_size: Number of records to delete per batch
|
||||
@@ -365,22 +370,31 @@ def delete_draft_variables_batch(app_id: str, batch_size: int = 1000) -> int:
|
||||
raise ValueError("batch_size must be positive")
|
||||
|
||||
total_deleted = 0
|
||||
total_files_deleted = 0
|
||||
|
||||
while True:
|
||||
with db.engine.begin() as conn:
|
||||
# Get a batch of draft variable IDs
|
||||
# Get a batch of draft variable IDs along with their file_ids
|
||||
query_sql = """
|
||||
SELECT id FROM workflow_draft_variables
|
||||
SELECT id, file_id FROM workflow_draft_variables
|
||||
WHERE app_id = :app_id
|
||||
LIMIT :batch_size
|
||||
"""
|
||||
result = conn.execute(sa.text(query_sql), {"app_id": app_id, "batch_size": batch_size})
|
||||
|
||||
draft_var_ids = [row[0] for row in result]
|
||||
if not draft_var_ids:
|
||||
rows = list(result)
|
||||
if not rows:
|
||||
break
|
||||
|
||||
# Delete the batch
|
||||
draft_var_ids = [row[0] for row in rows]
|
||||
file_ids = [row[1] for row in rows if row[1] is not None]
|
||||
|
||||
# Clean up associated Offload data first
|
||||
if file_ids:
|
||||
files_deleted = _delete_draft_variable_offload_data(conn, file_ids)
|
||||
total_files_deleted += files_deleted
|
||||
|
||||
# Delete the draft variables
|
||||
delete_sql = """
|
||||
DELETE FROM workflow_draft_variables
|
||||
WHERE id IN :ids
|
||||
@@ -391,11 +405,86 @@ def delete_draft_variables_batch(app_id: str, batch_size: int = 1000) -> int:
|
||||
|
||||
logger.info(click.style(f"Deleted {batch_deleted} draft variables (batch) for app {app_id}", fg="green"))
|
||||
|
||||
logger.info(click.style(f"Deleted {total_deleted} total draft variables for app {app_id}", fg="green"))
|
||||
logger.info(
|
||||
click.style(
|
||||
f"Deleted {total_deleted} total draft variables for app {app_id}. "
|
||||
f"Cleaned up {total_files_deleted} total associated files.",
|
||||
fg="green",
|
||||
)
|
||||
)
|
||||
return total_deleted
|
||||
|
||||
|
||||
def _delete_records(query_sql: str, params: dict, delete_func: Callable, name: str):
|
||||
def _delete_draft_variable_offload_data(conn, file_ids: list[str]) -> int:
|
||||
"""
|
||||
Delete Offload data associated with WorkflowDraftVariable file_ids.
|
||||
|
||||
This function:
|
||||
1. Finds WorkflowDraftVariableFile records by file_ids
|
||||
2. Deletes associated files from object storage
|
||||
3. Deletes UploadFile records
|
||||
4. Deletes WorkflowDraftVariableFile records
|
||||
|
||||
Args:
|
||||
conn: Database connection
|
||||
file_ids: List of WorkflowDraftVariableFile IDs
|
||||
|
||||
Returns:
|
||||
Number of files cleaned up
|
||||
"""
|
||||
from extensions.ext_storage import storage
|
||||
|
||||
if not file_ids:
|
||||
return 0
|
||||
|
||||
files_deleted = 0
|
||||
|
||||
try:
|
||||
# Get WorkflowDraftVariableFile records and their associated UploadFile keys
|
||||
query_sql = """
|
||||
SELECT wdvf.id, uf.key, uf.id as upload_file_id
|
||||
FROM workflow_draft_variable_files wdvf
|
||||
JOIN upload_files uf ON wdvf.upload_file_id = uf.id
|
||||
WHERE wdvf.id IN :file_ids
|
||||
"""
|
||||
result = conn.execute(sa.text(query_sql), {"file_ids": tuple(file_ids)})
|
||||
file_records = list(result)
|
||||
|
||||
# Delete from object storage and collect upload file IDs
|
||||
upload_file_ids = []
|
||||
for _, storage_key, upload_file_id in file_records:
|
||||
try:
|
||||
storage.delete(storage_key)
|
||||
upload_file_ids.append(upload_file_id)
|
||||
files_deleted += 1
|
||||
except Exception:
|
||||
logging.exception("Failed to delete storage object %s", storage_key)
|
||||
# Continue with database cleanup even if storage deletion fails
|
||||
upload_file_ids.append(upload_file_id)
|
||||
|
||||
# Delete UploadFile records
|
||||
if upload_file_ids:
|
||||
delete_upload_files_sql = """
|
||||
DELETE FROM upload_files
|
||||
WHERE id IN :upload_file_ids
|
||||
"""
|
||||
conn.execute(sa.text(delete_upload_files_sql), {"upload_file_ids": tuple(upload_file_ids)})
|
||||
|
||||
# Delete WorkflowDraftVariableFile records
|
||||
delete_variable_files_sql = """
|
||||
DELETE FROM workflow_draft_variable_files
|
||||
WHERE id IN :file_ids
|
||||
"""
|
||||
conn.execute(sa.text(delete_variable_files_sql), {"file_ids": tuple(file_ids)})
|
||||
|
||||
except Exception:
|
||||
logging.exception("Error deleting draft variable offload data:")
|
||||
# Don't raise, as we want to continue with the main deletion process
|
||||
|
||||
return files_deleted
|
||||
|
||||
|
||||
def _delete_records(query_sql: str, params: dict, delete_func: Callable, name: str) -> None:
|
||||
while True:
|
||||
with db.engine.begin() as conn:
|
||||
rs = conn.execute(sa.text(query_sql), params)
|
||||
|
||||
Reference in New Issue
Block a user