Compare commits

...

35 Commits

Author SHA1 Message Date
Coding On Star
c49e0d38ed Merge branch 'main' into refactor/documents 2026-01-13 15:09:39 +08:00
CodingOnStar
b56777914d fix(web): enhance security for external links and improve pagination 2026-01-13 15:09:08 +08:00
呆萌闷油瓶
9be863fefa fix: missing content if assistant message with tool_calls (#30083)
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
2026-01-13 12:46:33 +08:00
Coding On Star
8f43629cd8 fix(amplitude): update sessionReplaySampleRate default value to 0.5 (#30880)
Co-authored-by: CodingOnStar <hanxujiang@dify.ai>
2026-01-13 12:26:50 +08:00
wangxiaolei
9ee71902c1 fix: fix formatNumber accuracy (#30877) 2026-01-13 11:51:15 +08:00
hsiong
a012c87445 fix: entrypoint.sh overrides NEXT_PUBLIC_TEXT_GENERATION_TIMEOUT_MS when TEXT_GENERATION_TIMEOUT_MS is unset (#30864) (#30865) 2026-01-13 10:12:51 +08:00
heyszt
450578d4c0 feat(ops): set root span kind for AliyunTrace to enable service-level metrics aggregation (#30728)
Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
2026-01-13 10:12:00 +08:00
非法操作
837237aa6d fix: use node factory for single-step workflow nodes (#30859) 2026-01-13 10:11:18 +08:00
CodingOnStar
532073175a feat(web): refactor documents component structure and enhance functionality
- Extracted and modularized components for better organization: DocumentsHeader, EmptyElement, List, and Operations.
- Introduced a custom hook, useDocumentsPageState, to manage document page state including search, filter, and pagination.
- Updated the main Documents component to utilize the new structure and improve readability.
- Enhanced the user interface with new icons and improved button functionalities for document management.
2026-01-12 16:25:30 +08:00
QuantumGhost
b63dfbf654 fix(api): defer streaming response until referenced variables are updated (#30832)
Some checks failed
autofix.ci / autofix (push) Has been cancelled
Build and Push API & Web / build (api, DIFY_API_IMAGE_NAME, linux/amd64, build-api-amd64) (push) Has been cancelled
Build and Push API & Web / build (api, DIFY_API_IMAGE_NAME, linux/arm64, build-api-arm64) (push) Has been cancelled
Build and Push API & Web / build (web, DIFY_WEB_IMAGE_NAME, linux/amd64, build-web-amd64) (push) Has been cancelled
Build and Push API & Web / build (web, DIFY_WEB_IMAGE_NAME, linux/arm64, build-web-arm64) (push) Has been cancelled
Build and Push API & Web / create-manifest (api, DIFY_API_IMAGE_NAME, merge-api-images) (push) Has been cancelled
Build and Push API & Web / create-manifest (web, DIFY_WEB_IMAGE_NAME, merge-web-images) (push) Has been cancelled
Main CI Pipeline / Check Changed Files (push) Has been cancelled
Main CI Pipeline / API Tests (push) Has been cancelled
Main CI Pipeline / Web Tests (push) Has been cancelled
Main CI Pipeline / Style Check (push) Has been cancelled
Main CI Pipeline / VDB Tests (push) Has been cancelled
Main CI Pipeline / DB Migration Test (push) Has been cancelled
Mark stale issues and pull requests / stale (push) Has been cancelled
Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
2026-01-12 16:23:18 +08:00
非法操作
51ea87ab85 feat: clear free plan workflow run logs (#29494)
Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
Co-authored-by: crazywoola <100913391+crazywoola@users.noreply.github.com>
2026-01-12 15:57:40 +08:00
Stephen Zhou
00698e41b7 build: limit esbuild, glob, docker base version to avoid cve (#30848) 2026-01-12 15:33:20 +08:00
QuantumGhost
df938a4543 ci: add HITL test env deployment action (#30846) 2026-01-12 15:07:53 +08:00
yyh
9161936f41 refactor(web): extract isServer/isClient utility & upgrade Node.js to 22.12.0 (#30803)
Co-authored-by: Stephen Zhou <38493346+hyoban@users.noreply.github.com>
2026-01-12 12:57:43 +08:00
Lemonadeccc
f9a21b56ab feat: add block-no-verify hook for Claude Code (#30839) 2026-01-12 12:56:05 +08:00
Stephen Zhou
220e1df847 docs(web): add corepack recommendation (#30837)
Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
2026-01-12 12:44:30 +08:00
dependabot[bot]
8cfdde594c chore(deps-dev): bump tos from 2.7.2 to 2.9.0 in /api (#30834)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2026-01-12 12:44:21 +08:00
dependabot[bot]
31a8fd810c chore(deps-dev): bump @storybook/react from 9.1.13 to 9.1.17 in /web (#30833)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2026-01-12 12:44:11 +08:00
yihong
9fad97ec9b fix: drop useless pyrefly in ci (#30826)
Signed-off-by: yihong0618 <zouzou0208@gmail.com>
2026-01-12 09:45:49 +08:00
wangxiaolei
0c2729d9b3 fix: fix refresh token deadlock (#30828) 2026-01-12 09:35:31 +08:00
wangxiaolei
a2e03b811e fix: Broken import in .storybook/preview.tsx (#30812)
Some checks failed
autofix.ci / autofix (push) Has been cancelled
Build and Push API & Web / build (api, DIFY_API_IMAGE_NAME, linux/amd64, build-api-amd64) (push) Has been cancelled
Build and Push API & Web / build (api, DIFY_API_IMAGE_NAME, linux/arm64, build-api-arm64) (push) Has been cancelled
Build and Push API & Web / build (web, DIFY_WEB_IMAGE_NAME, linux/amd64, build-web-amd64) (push) Has been cancelled
Build and Push API & Web / build (web, DIFY_WEB_IMAGE_NAME, linux/arm64, build-web-arm64) (push) Has been cancelled
Main CI Pipeline / Check Changed Files (push) Has been cancelled
Main CI Pipeline / Style Check (push) Has been cancelled
Build and Push API & Web / create-manifest (api, DIFY_API_IMAGE_NAME, merge-api-images) (push) Has been cancelled
Build and Push API & Web / create-manifest (web, DIFY_WEB_IMAGE_NAME, merge-web-images) (push) Has been cancelled
Main CI Pipeline / API Tests (push) Has been cancelled
Main CI Pipeline / Web Tests (push) Has been cancelled
Main CI Pipeline / VDB Tests (push) Has been cancelled
Main CI Pipeline / DB Migration Test (push) Has been cancelled
Mark stale issues and pull requests / stale (push) Has been cancelled
2026-01-10 19:49:23 +08:00
-LAN-
1e10bf525c refactor(models): Refine MessageAgentThought SQLAlchemy typing (#27749)
Co-authored-by: Asuka Minato <i@asukaminato.eu.org>
Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
2026-01-10 17:17:45 +09:00
Stephen Zhou
8b1af36d94 feat(web): migrate PWA to Serwist (#30808) 2026-01-10 17:16:18 +09:00
wangxiaolei
0711dd4159 feat: enhance start node object value check (#30732)
Some checks failed
autofix.ci / autofix (push) Has been cancelled
Build and Push API & Web / build (api, DIFY_API_IMAGE_NAME, linux/amd64, build-api-amd64) (push) Has been cancelled
Build and Push API & Web / build (api, DIFY_API_IMAGE_NAME, linux/arm64, build-api-arm64) (push) Has been cancelled
Build and Push API & Web / build (web, DIFY_WEB_IMAGE_NAME, linux/amd64, build-web-amd64) (push) Has been cancelled
Build and Push API & Web / build (web, DIFY_WEB_IMAGE_NAME, linux/arm64, build-web-arm64) (push) Has been cancelled
Main CI Pipeline / Check Changed Files (push) Has been cancelled
Main CI Pipeline / Style Check (push) Has been cancelled
Build and Push API & Web / create-manifest (api, DIFY_API_IMAGE_NAME, merge-api-images) (push) Has been cancelled
Build and Push API & Web / create-manifest (web, DIFY_WEB_IMAGE_NAME, merge-web-images) (push) Has been cancelled
Main CI Pipeline / API Tests (push) Has been cancelled
Main CI Pipeline / Web Tests (push) Has been cancelled
Main CI Pipeline / VDB Tests (push) Has been cancelled
Main CI Pipeline / DB Migration Test (push) Has been cancelled
Mark stale issues and pull requests / stale (push) Has been cancelled
2026-01-09 16:13:17 +08:00
QuantumGhost
ae0a26f5b6 revert: "fix: fix assign value stand as default (#30651)" (#30717)
The original fix seems correct on its own. However, for chatflows with multiple answer nodes, the `message_replace` command only preserves the output of the last executed answer node.
2026-01-09 16:08:24 +08:00
Stephen Zhou
d4432ed80f refactor: marketplace state management (#30702)
Some checks failed
autofix.ci / autofix (push) Has been cancelled
Build and Push API & Web / build (api, DIFY_API_IMAGE_NAME, linux/amd64, build-api-amd64) (push) Has been cancelled
Build and Push API & Web / build (api, DIFY_API_IMAGE_NAME, linux/arm64, build-api-arm64) (push) Has been cancelled
Build and Push API & Web / build (web, DIFY_WEB_IMAGE_NAME, linux/amd64, build-web-amd64) (push) Has been cancelled
Build and Push API & Web / build (web, DIFY_WEB_IMAGE_NAME, linux/arm64, build-web-arm64) (push) Has been cancelled
Build and Push API & Web / create-manifest (api, DIFY_API_IMAGE_NAME, merge-api-images) (push) Has been cancelled
Build and Push API & Web / create-manifest (web, DIFY_WEB_IMAGE_NAME, merge-web-images) (push) Has been cancelled
Main CI Pipeline / Check Changed Files (push) Has been cancelled
Main CI Pipeline / API Tests (push) Has been cancelled
Main CI Pipeline / Web Tests (push) Has been cancelled
Main CI Pipeline / Style Check (push) Has been cancelled
Main CI Pipeline / VDB Tests (push) Has been cancelled
Main CI Pipeline / DB Migration Test (push) Has been cancelled
Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-09 14:31:24 +08:00
lif
9d9f027246 fix(web): invalidate app list cache after deleting app from detail page (#30751)
Signed-off-by: majiayu000 <1835304752@qq.com>
2026-01-09 14:08:37 +08:00
wangxiaolei
77f097ce76 fix: fix enhance app mode check (#30758) 2026-01-09 14:07:40 +08:00
Maries
7843afc91c feat(workflows): add agent-dev deploy workflow (#30774) 2026-01-09 13:55:49 +08:00
Coding On Star
98df99b0ca feat(embedding-process): implement embedding process components and polling logic (#30622)
Co-authored-by: CodingOnStar <hanxujiang@dify.ai>
2026-01-09 10:21:27 +08:00
Coding On Star
9848823dcd feat: implement step two of dataset creation with comprehensive UI components and hooks (#30681)
Co-authored-by: CodingOnStar <hanxujiang@dify.ai>
2026-01-09 10:21:18 +08:00
github-actions[bot]
5ad2385799 chore(i18n): sync translations with en-US (#30750)
Some checks failed
autofix.ci / autofix (push) Has been cancelled
Build and Push API & Web / build (api, DIFY_API_IMAGE_NAME, linux/amd64, build-api-amd64) (push) Has been cancelled
Build and Push API & Web / build (api, DIFY_API_IMAGE_NAME, linux/arm64, build-api-arm64) (push) Has been cancelled
Build and Push API & Web / build (web, DIFY_WEB_IMAGE_NAME, linux/amd64, build-web-amd64) (push) Has been cancelled
Build and Push API & Web / build (web, DIFY_WEB_IMAGE_NAME, linux/arm64, build-web-arm64) (push) Has been cancelled
Build and Push API & Web / create-manifest (api, DIFY_API_IMAGE_NAME, merge-api-images) (push) Has been cancelled
Build and Push API & Web / create-manifest (web, DIFY_WEB_IMAGE_NAME, merge-web-images) (push) Has been cancelled
Main CI Pipeline / Check Changed Files (push) Has been cancelled
Main CI Pipeline / API Tests (push) Has been cancelled
Main CI Pipeline / Web Tests (push) Has been cancelled
Main CI Pipeline / Style Check (push) Has been cancelled
Main CI Pipeline / VDB Tests (push) Has been cancelled
Main CI Pipeline / DB Migration Test (push) Has been cancelled
Mark stale issues and pull requests / stale (push) Has been cancelled
Co-authored-by: claude[bot] <41898282+claude[bot]@users.noreply.github.com>
2026-01-08 22:53:04 +08:00
yyh
7774a1312e fix(ci): use repository_dispatch for i18n sync workflow (#30744) 2026-01-08 21:28:49 +08:00
MkDev11
91d44719f4 fix(web): resolve chat message loading race conditions and infinite loops (#30695)
Co-authored-by: crazywoola <100913391+crazywoola@users.noreply.github.com>
2026-01-08 18:05:32 +08:00
xuwei95
b2cbeeae92 fix(web): restrict postMessage targetOrigin from wildcard to specific origins (#30690)
Co-authored-by: XW <wei.xu1@wiz.ai>
2026-01-08 17:23:27 +08:00
176 changed files with 10732 additions and 6352 deletions

View File

@@ -5,5 +5,18 @@
"typescript-lsp@claude-plugins-official": true,
"pyright-lsp@claude-plugins-official": true,
"ralph-loop@claude-plugins-official": true
},
"hooks": {
"PreToolUse": [
{
"matcher": "Bash",
"hooks": [
{
"type": "command",
"command": "npx -y block-no-verify@1.1.1"
}
]
}
]
}
}

View File

@@ -39,12 +39,6 @@ jobs:
- name: Install dependencies
run: uv sync --project api --dev
- name: Run pyrefly check
run: |
cd api
uv add --dev pyrefly
uv run pyrefly check || true
- name: Run dify config tests
run: uv run --project api dev/pytest/pytest_config_tests.py

View File

@@ -1,4 +1,4 @@
name: Deploy Trigger Dev
name: Deploy Agent Dev
permissions:
contents: read
@@ -7,7 +7,7 @@ on:
workflow_run:
workflows: ["Build and Push API & Web"]
branches:
- "deploy/trigger-dev"
- "deploy/agent-dev"
types:
- completed
@@ -16,12 +16,12 @@ jobs:
runs-on: ubuntu-latest
if: |
github.event.workflow_run.conclusion == 'success' &&
github.event.workflow_run.head_branch == 'deploy/trigger-dev'
github.event.workflow_run.head_branch == 'deploy/agent-dev'
steps:
- name: Deploy to server
uses: appleboy/ssh-action@v0.1.8
with:
host: ${{ secrets.TRIGGER_SSH_HOST }}
host: ${{ secrets.AGENT_DEV_SSH_HOST }}
username: ${{ secrets.SSH_USER }}
key: ${{ secrets.SSH_PRIVATE_KEY }}
script: |

29
.github/workflows/deploy-hitl.yml vendored Normal file
View File

@@ -0,0 +1,29 @@
name: Deploy HITL
on:
workflow_run:
workflows: ["Build and Push API & Web"]
branches:
- "feat/hitl-frontend"
- "feat/hitl-backend"
types:
- completed
jobs:
deploy:
runs-on: ubuntu-latest
if: |
github.event.workflow_run.conclusion == 'success' &&
(
github.event.workflow_run.head_branch == 'feat/hitl-frontend' ||
github.event.workflow_run.head_branch == 'feat/hitl-backend'
)
steps:
- name: Deploy to server
uses: appleboy/ssh-action@v0.1.8
with:
host: ${{ secrets.HITL_SSH_HOST }}
username: ${{ secrets.SSH_USER }}
key: ${{ secrets.SSH_PRIVATE_KEY }}
script: |
${{ vars.SSH_SCRIPT || secrets.SSH_SCRIPT }}

View File

@@ -1,10 +1,12 @@
name: Translate i18n Files with Claude Code
# Note: claude-code-action doesn't support push events directly.
# Push events are handled by trigger-i18n-sync.yml which sends repository_dispatch.
# See: https://github.com/langgenius/dify/issues/30743
on:
push:
branches: [main]
paths:
- 'web/i18n/en-US/*.json'
repository_dispatch:
types: [i18n-sync]
workflow_dispatch:
inputs:
files:
@@ -87,26 +89,35 @@ jobs:
echo "DIFF_AVAILABLE=false" >> $GITHUB_OUTPUT
fi
fi
else
# Push trigger - detect changed files from the push
BEFORE_SHA="${{ github.event.before }}"
# Handle edge case: first push or force push may have null/zero SHA
if [ -z "$BEFORE_SHA" ] || [ "$BEFORE_SHA" = "0000000000000000000000000000000000000000" ]; then
# Fallback to comparing with parent commit
BEFORE_SHA="HEAD~1"
elif [ "${{ github.event_name }}" == "repository_dispatch" ]; then
# Triggered by push via trigger-i18n-sync.yml workflow
# Validate required payload fields
if [ -z "${{ github.event.client_payload.changed_files }}" ]; then
echo "Error: repository_dispatch payload missing required 'changed_files' field" >&2
exit 1
fi
changed=$(git diff --name-only "$BEFORE_SHA" ${{ github.sha }} -- 'web/i18n/en-US/*.json' 2>/dev/null | xargs -n1 basename 2>/dev/null | sed 's/.json$//' | tr '\n' ' ' || echo "")
echo "CHANGED_FILES=$changed" >> $GITHUB_OUTPUT
echo "CHANGED_FILES=${{ github.event.client_payload.changed_files }}" >> $GITHUB_OUTPUT
echo "TARGET_LANGS=" >> $GITHUB_OUTPUT
echo "SYNC_MODE=incremental" >> $GITHUB_OUTPUT
echo "SYNC_MODE=${{ github.event.client_payload.sync_mode || 'incremental' }}" >> $GITHUB_OUTPUT
# Generate detailed diff for the push
git diff "$BEFORE_SHA"..${{ github.sha }} -- 'web/i18n/en-US/*.json' > /tmp/i18n-diff.txt 2>/dev/null || echo "" > /tmp/i18n-diff.txt
if [ -s /tmp/i18n-diff.txt ]; then
echo "DIFF_AVAILABLE=true" >> $GITHUB_OUTPUT
# Decode the base64-encoded diff from the trigger workflow
if [ -n "${{ github.event.client_payload.diff_base64 }}" ]; then
if ! echo "${{ github.event.client_payload.diff_base64 }}" | base64 -d > /tmp/i18n-diff.txt 2>&1; then
echo "Warning: Failed to decode base64 diff payload" >&2
echo "" > /tmp/i18n-diff.txt
echo "DIFF_AVAILABLE=false" >> $GITHUB_OUTPUT
elif [ -s /tmp/i18n-diff.txt ]; then
echo "DIFF_AVAILABLE=true" >> $GITHUB_OUTPUT
else
echo "DIFF_AVAILABLE=false" >> $GITHUB_OUTPUT
fi
else
echo "" > /tmp/i18n-diff.txt
echo "DIFF_AVAILABLE=false" >> $GITHUB_OUTPUT
fi
else
echo "Unsupported event type: ${{ github.event_name }}"
exit 1
fi
# Truncate diff if too large (keep first 50KB)

66
.github/workflows/trigger-i18n-sync.yml vendored Normal file
View File

@@ -0,0 +1,66 @@
name: Trigger i18n Sync on Push
# This workflow bridges the push event to repository_dispatch
# because claude-code-action doesn't support push events directly.
# See: https://github.com/langgenius/dify/issues/30743
on:
push:
branches: [main]
paths:
- 'web/i18n/en-US/*.json'
permissions:
contents: write
jobs:
trigger:
if: github.repository == 'langgenius/dify'
runs-on: ubuntu-latest
timeout-minutes: 5
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Detect changed files and generate diff
id: detect
run: |
BEFORE_SHA="${{ github.event.before }}"
# Handle edge case: force push may have null/zero SHA
if [ -z "$BEFORE_SHA" ] || [ "$BEFORE_SHA" = "0000000000000000000000000000000000000000" ]; then
BEFORE_SHA="HEAD~1"
fi
# Detect changed i18n files
changed=$(git diff --name-only "$BEFORE_SHA" "${{ github.sha }}" -- 'web/i18n/en-US/*.json' 2>/dev/null | xargs -n1 basename 2>/dev/null | sed 's/.json$//' | tr '\n' ' ' || echo "")
echo "changed_files=$changed" >> $GITHUB_OUTPUT
# Generate diff for context
git diff "$BEFORE_SHA" "${{ github.sha }}" -- 'web/i18n/en-US/*.json' > /tmp/i18n-diff.txt 2>/dev/null || echo "" > /tmp/i18n-diff.txt
# Truncate if too large (keep first 50KB to match receiving workflow)
head -c 50000 /tmp/i18n-diff.txt > /tmp/i18n-diff-truncated.txt
mv /tmp/i18n-diff-truncated.txt /tmp/i18n-diff.txt
# Base64 encode the diff for safe JSON transport (portable, single-line)
diff_base64=$(base64 < /tmp/i18n-diff.txt | tr -d '\n')
echo "diff_base64=$diff_base64" >> $GITHUB_OUTPUT
if [ -n "$changed" ]; then
echo "has_changes=true" >> $GITHUB_OUTPUT
echo "Detected changed files: $changed"
else
echo "has_changes=false" >> $GITHUB_OUTPUT
echo "No i18n changes detected"
fi
- name: Trigger i18n sync workflow
if: steps.detect.outputs.has_changes == 'true'
uses: peter-evans/repository-dispatch@v3
with:
token: ${{ secrets.GITHUB_TOKEN }}
event-type: i18n-sync
client-payload: '{"changed_files": "${{ steps.detect.outputs.changed_files }}", "diff_base64": "${{ steps.detect.outputs.diff_base64 }}", "sync_mode": "incremental", "trigger_sha": "${{ github.sha }}"}'

1
.nvmrc
View File

@@ -1 +0,0 @@
22.11.0

View File

@@ -589,6 +589,7 @@ ENABLE_CLEAN_UNUSED_DATASETS_TASK=false
ENABLE_CREATE_TIDB_SERVERLESS_TASK=false
ENABLE_UPDATE_TIDB_SERVERLESS_STATUS_TASK=false
ENABLE_CLEAN_MESSAGES=false
ENABLE_WORKFLOW_RUN_CLEANUP_TASK=false
ENABLE_MAIL_CLEAN_DOCUMENT_NOTIFY_TASK=false
ENABLE_DATASETS_QUEUE_MONITOR=false
ENABLE_CHECK_UPGRADABLE_PLUGIN_TASK=true

View File

@@ -1,4 +1,5 @@
import base64
import datetime
import json
import logging
import secrets
@@ -45,6 +46,7 @@ from services.clear_free_plan_tenant_expired_logs import ClearFreePlanTenantExpi
from services.plugin.data_migration import PluginDataMigration
from services.plugin.plugin_migration import PluginMigration
from services.plugin.plugin_service import PluginService
from services.retention.workflow_run.clear_free_plan_expired_workflow_run_logs import WorkflowRunCleanup
from tasks.remove_app_and_related_data_task import delete_draft_variables_batch
logger = logging.getLogger(__name__)
@@ -852,6 +854,61 @@ def clear_free_plan_tenant_expired_logs(days: int, batch: int, tenant_ids: list[
click.echo(click.style("Clear free plan tenant expired logs completed.", fg="green"))
@click.command("clean-workflow-runs", help="Clean expired workflow runs and related data for free tenants.")
@click.option("--days", default=30, show_default=True, help="Delete workflow runs created before N days ago.")
@click.option("--batch-size", default=200, show_default=True, help="Batch size for selecting workflow runs.")
@click.option(
"--start-from",
type=click.DateTime(formats=["%Y-%m-%d", "%Y-%m-%dT%H:%M:%S"]),
default=None,
help="Optional lower bound (inclusive) for created_at; must be paired with --end-before.",
)
@click.option(
"--end-before",
type=click.DateTime(formats=["%Y-%m-%d", "%Y-%m-%dT%H:%M:%S"]),
default=None,
help="Optional upper bound (exclusive) for created_at; must be paired with --start-from.",
)
@click.option(
"--dry-run",
is_flag=True,
help="Preview cleanup results without deleting any workflow run data.",
)
def clean_workflow_runs(
days: int,
batch_size: int,
start_from: datetime.datetime | None,
end_before: datetime.datetime | None,
dry_run: bool,
):
"""
Clean workflow runs and related workflow data for free tenants.
"""
if (start_from is None) ^ (end_before is None):
raise click.UsageError("--start-from and --end-before must be provided together.")
start_time = datetime.datetime.now(datetime.UTC)
click.echo(click.style(f"Starting workflow run cleanup at {start_time.isoformat()}.", fg="white"))
WorkflowRunCleanup(
days=days,
batch_size=batch_size,
start_from=start_from,
end_before=end_before,
dry_run=dry_run,
).run()
end_time = datetime.datetime.now(datetime.UTC)
elapsed = end_time - start_time
click.echo(
click.style(
f"Workflow run cleanup completed. start={start_time.isoformat()} "
f"end={end_time.isoformat()} duration={elapsed}",
fg="green",
)
)
@click.option("-f", "--force", is_flag=True, help="Skip user confirmation and force the command to execute.")
@click.command("clear-orphaned-file-records", help="Clear orphaned file records.")
def clear_orphaned_file_records(force: bool):

View File

@@ -1101,6 +1101,10 @@ class CeleryScheduleTasksConfig(BaseSettings):
description="Enable clean messages task",
default=False,
)
ENABLE_WORKFLOW_RUN_CLEANUP_TASK: bool = Field(
description="Enable scheduled workflow run cleanup task",
default=False,
)
ENABLE_MAIL_CLEAN_DOCUMENT_NOTIFY_TASK: bool = Field(
description="Enable mail clean document notify task",
default=False,

View File

@@ -1,6 +1,7 @@
import json
import logging
import uuid
from decimal import Decimal
from typing import Union, cast
from sqlalchemy import select
@@ -41,6 +42,7 @@ from core.tools.tool_manager import ToolManager
from core.tools.utils.dataset_retriever_tool import DatasetRetrieverTool
from extensions.ext_database import db
from factories import file_factory
from models.enums import CreatorUserRole
from models.model import Conversation, Message, MessageAgentThought, MessageFile
logger = logging.getLogger(__name__)
@@ -289,6 +291,7 @@ class BaseAgentRunner(AppRunner):
thought = MessageAgentThought(
message_id=message_id,
message_chain_id=None,
tool_process_data=None,
thought="",
tool=tool_name,
tool_labels_str="{}",
@@ -296,20 +299,20 @@ class BaseAgentRunner(AppRunner):
tool_input=tool_input,
message=message,
message_token=0,
message_unit_price=0,
message_price_unit=0,
message_unit_price=Decimal(0),
message_price_unit=Decimal("0.001"),
message_files=json.dumps(messages_ids) if messages_ids else "",
answer="",
observation="",
answer_token=0,
answer_unit_price=0,
answer_price_unit=0,
answer_unit_price=Decimal(0),
answer_price_unit=Decimal("0.001"),
tokens=0,
total_price=0,
total_price=Decimal(0),
position=self.agent_thought_count + 1,
currency="USD",
latency=0,
created_by_role="account",
created_by_role=CreatorUserRole.ACCOUNT,
created_by=self.user_id,
)
@@ -342,7 +345,8 @@ class BaseAgentRunner(AppRunner):
raise ValueError("agent thought not found")
if thought:
agent_thought.thought += thought
existing_thought = agent_thought.thought or ""
agent_thought.thought = f"{existing_thought}{thought}"
if tool_name:
agent_thought.tool = tool_name
@@ -440,21 +444,30 @@ class BaseAgentRunner(AppRunner):
agent_thoughts: list[MessageAgentThought] = message.agent_thoughts
if agent_thoughts:
for agent_thought in agent_thoughts:
tools = agent_thought.tool
if tools:
tools = tools.split(";")
tool_names_raw = agent_thought.tool
if tool_names_raw:
tool_names = tool_names_raw.split(";")
tool_calls: list[AssistantPromptMessage.ToolCall] = []
tool_call_response: list[ToolPromptMessage] = []
try:
tool_inputs = json.loads(agent_thought.tool_input)
except Exception:
tool_inputs = {tool: {} for tool in tools}
try:
tool_responses = json.loads(agent_thought.observation)
except Exception:
tool_responses = dict.fromkeys(tools, agent_thought.observation)
tool_input_payload = agent_thought.tool_input
if tool_input_payload:
try:
tool_inputs = json.loads(tool_input_payload)
except Exception:
tool_inputs = {tool: {} for tool in tool_names}
else:
tool_inputs = {tool: {} for tool in tool_names}
for tool in tools:
observation_payload = agent_thought.observation
if observation_payload:
try:
tool_responses = json.loads(observation_payload)
except Exception:
tool_responses = dict.fromkeys(tool_names, observation_payload)
else:
tool_responses = dict.fromkeys(tool_names, observation_payload)
for tool in tool_names:
# generate a uuid for tool call
tool_call_id = str(uuid.uuid4())
tool_calls.append(
@@ -484,7 +497,7 @@ class BaseAgentRunner(AppRunner):
*tool_call_response,
]
)
if not tools:
if not tool_names_raw:
result.append(AssistantPromptMessage(content=agent_thought.thought))
else:
if message.answer:

View File

@@ -188,7 +188,7 @@ class FunctionCallAgentRunner(BaseAgentRunner):
),
)
assistant_message = AssistantPromptMessage(content="", tool_calls=[])
assistant_message = AssistantPromptMessage(content=response, tool_calls=[])
if tool_calls:
assistant_message.tool_calls = [
AssistantPromptMessage.ToolCall(
@@ -200,8 +200,6 @@ class FunctionCallAgentRunner(BaseAgentRunner):
)
for tool_call in tool_calls
]
else:
assistant_message.content = response
self._current_thoughts.append(assistant_message)

View File

@@ -1,4 +1,3 @@
import json
from collections.abc import Sequence
from enum import StrEnum, auto
from typing import Any, Literal
@@ -121,7 +120,7 @@ class VariableEntity(BaseModel):
allowed_file_types: Sequence[FileType] | None = Field(default_factory=list)
allowed_file_extensions: Sequence[str] | None = Field(default_factory=list)
allowed_file_upload_methods: Sequence[FileTransferMethod] | None = Field(default_factory=list)
json_schema: str | None = Field(default=None)
json_schema: dict | None = Field(default=None)
@field_validator("description", mode="before")
@classmethod
@@ -135,17 +134,11 @@ class VariableEntity(BaseModel):
@field_validator("json_schema")
@classmethod
def validate_json_schema(cls, schema: str | None) -> str | None:
def validate_json_schema(cls, schema: dict | None) -> dict | None:
if schema is None:
return None
try:
json_schema = json.loads(schema)
except json.JSONDecodeError:
raise ValueError(f"invalid json_schema value {schema}")
try:
Draft7Validator.check_schema(json_schema)
Draft7Validator.check_schema(schema)
except SchemaError as e:
raise ValueError(f"Invalid JSON schema: {e.message}")
return schema

View File

@@ -26,7 +26,6 @@ class AdvancedChatAppConfigManager(BaseAppConfigManager):
@classmethod
def get_app_config(cls, app_model: App, workflow: Workflow) -> AdvancedChatAppConfig:
features_dict = workflow.features_dict
app_mode = AppMode.value_of(app_model.mode)
app_config = AdvancedChatAppConfig(
tenant_id=app_model.tenant_id,

View File

@@ -358,25 +358,6 @@ class AdvancedChatAppGenerateTaskPipeline(GraphRuntimeStateSupport):
if node_finish_resp:
yield node_finish_resp
# For ANSWER nodes, check if we need to send a message_replace event
# Only send if the final output differs from the accumulated task_state.answer
# This happens when variables were updated by variable_assigner during workflow execution
if event.node_type == NodeType.ANSWER and event.outputs:
final_answer = event.outputs.get("answer")
if final_answer is not None and final_answer != self._task_state.answer:
logger.info(
"ANSWER node final output '%s' differs from accumulated answer '%s', sending message_replace event",
final_answer,
self._task_state.answer,
)
# Update the task state answer
self._task_state.answer = str(final_answer)
# Send message_replace event to update the UI
yield self._message_cycle_manager.message_replace_to_stream_response(
answer=str(final_answer),
reason="variable_update",
)
def _handle_node_failed_events(
self,
event: Union[QueueNodeFailedEvent, QueueNodeExceptionEvent],

View File

@@ -1,4 +1,3 @@
import json
from collections.abc import Generator, Mapping, Sequence
from typing import TYPE_CHECKING, Any, Union, final
@@ -76,12 +75,24 @@ class BaseAppGenerator:
user_inputs = {**user_inputs, **files_inputs, **file_list_inputs}
# Check if all files are converted to File
if any(filter(lambda v: isinstance(v, dict), user_inputs.values())):
raise ValueError("Invalid input type")
if any(
filter(lambda v: isinstance(v, dict), filter(lambda item: isinstance(item, list), user_inputs.values()))
):
raise ValueError("Invalid input type")
invalid_dict_keys = [
k
for k, v in user_inputs.items()
if isinstance(v, dict)
and entity_dictionary[k].type not in {VariableEntityType.FILE, VariableEntityType.JSON_OBJECT}
]
if invalid_dict_keys:
raise ValueError(f"Invalid input type for {invalid_dict_keys}")
invalid_list_dict_keys = [
k
for k, v in user_inputs.items()
if isinstance(v, list)
and any(isinstance(item, dict) for item in v)
and entity_dictionary[k].type != VariableEntityType.FILE_LIST
]
if invalid_list_dict_keys:
raise ValueError(f"Invalid input type for {invalid_list_dict_keys}")
return user_inputs
@@ -178,12 +189,8 @@ class BaseAppGenerator:
elif value == 0:
value = False
case VariableEntityType.JSON_OBJECT:
if not isinstance(value, str):
raise ValueError(f"{variable_entity.variable} in input form must be a string")
try:
json.loads(value)
except json.JSONDecodeError:
raise ValueError(f"{variable_entity.variable} in input form must be a valid JSON object")
if not isinstance(value, dict):
raise ValueError(f"{variable_entity.variable} in input form must be a dict")
case _:
raise AssertionError("this statement should be unreachable.")

View File

@@ -251,10 +251,7 @@ class AssistantPromptMessage(PromptMessage):
:return: True if prompt message is empty, False otherwise
"""
if not super().is_empty() and not self.tool_calls:
return False
return True
return super().is_empty() and not self.tool_calls
class SystemPromptMessage(PromptMessage):

View File

@@ -1,6 +1,7 @@
import logging
from collections.abc import Sequence
from opentelemetry.trace import SpanKind
from sqlalchemy.orm import sessionmaker
from core.ops.aliyun_trace.data_exporter.traceclient import (
@@ -151,6 +152,7 @@ class AliyunDataTrace(BaseTraceInstance):
),
status=status,
links=trace_metadata.links,
span_kind=SpanKind.SERVER,
)
self.trace_client.add_span(message_span)
@@ -456,6 +458,7 @@ class AliyunDataTrace(BaseTraceInstance):
),
status=status,
links=trace_metadata.links,
span_kind=SpanKind.SERVER,
)
self.trace_client.add_span(message_span)
@@ -475,6 +478,7 @@ class AliyunDataTrace(BaseTraceInstance):
),
status=status,
links=trace_metadata.links,
span_kind=SpanKind.SERVER if message_span_id is None else SpanKind.INTERNAL,
)
self.trace_client.add_span(workflow_span)

View File

@@ -166,7 +166,7 @@ class SpanBuilder:
attributes=span_data.attributes,
events=span_data.events,
links=span_data.links,
kind=trace_api.SpanKind.INTERNAL,
kind=span_data.span_kind,
status=span_data.status,
start_time=span_data.start_time,
end_time=span_data.end_time,

View File

@@ -4,7 +4,7 @@ from typing import Any
from opentelemetry import trace as trace_api
from opentelemetry.sdk.trace import Event
from opentelemetry.trace import Status, StatusCode
from opentelemetry.trace import SpanKind, Status, StatusCode
from pydantic import BaseModel, Field
@@ -34,3 +34,4 @@ class SpanData(BaseModel):
status: Status = Field(default=Status(StatusCode.UNSET), description="The status of the span.")
start_time: int | None = Field(..., description="The start time of the span in nanoseconds.")
end_time: int | None = Field(..., description="The end time of the span in nanoseconds.")
span_kind: SpanKind = Field(default=SpanKind.INTERNAL, description="The OpenTelemetry SpanKind for this span.")

View File

@@ -211,6 +211,10 @@ class WorkflowExecutionStatus(StrEnum):
def is_ended(self) -> bool:
return self in _END_STATE
@classmethod
def ended_values(cls) -> list[str]:
return [status.value for status in _END_STATE]
_END_STATE = frozenset(
[

View File

@@ -1,4 +1,3 @@
import json
from typing import Any
from jsonschema import Draft7Validator, ValidationError
@@ -43,25 +42,22 @@ class StartNode(Node[StartNodeData]):
if value is None and variable.required:
raise ValueError(f"{key} is required in input form")
# If no value provided, skip further processing for this key
if not value:
continue
if not isinstance(value, dict):
raise ValueError(f"JSON object for '{key}' must be an object")
# Overwrite with normalized dict to ensure downstream consistency
node_inputs[key] = value
# If schema exists, then validate against it
schema = variable.json_schema
if not schema:
continue
if not value:
continue
try:
json_schema = json.loads(schema)
except json.JSONDecodeError as e:
raise ValueError(f"{schema} must be a valid JSON object")
try:
json_value = json.loads(value)
except json.JSONDecodeError as e:
raise ValueError(f"{value} must be a valid JSON object")
try:
Draft7Validator(json_schema).validate(json_value)
Draft7Validator(schema).validate(value)
except ValidationError as e:
raise ValueError(f"JSON object for '{key}' does not match schema: {e.message}")
node_inputs[key] = json_value

View File

@@ -33,6 +33,15 @@ class VariableAssignerNode(Node[VariableAssignerData]):
graph_runtime_state=graph_runtime_state,
)
def blocks_variable_output(self, variable_selectors: set[tuple[str, ...]]) -> bool:
"""
Check if this Variable Assigner node blocks the output of specific variables.
Returns True if this node updates any of the requested conversation variables.
"""
assigned_selector = tuple(self.node_data.assigned_variable_selector)
return assigned_selector in variable_selectors
@classmethod
def version(cls) -> str:
return "1"

View File

@@ -19,6 +19,7 @@ from core.workflow.graph_engine.protocols.command_channel import CommandChannel
from core.workflow.graph_events import GraphEngineEvent, GraphNodeEventBase, GraphRunFailedEvent
from core.workflow.nodes import NodeType
from core.workflow.nodes.base.node import Node
from core.workflow.nodes.node_factory import DifyNodeFactory
from core.workflow.nodes.node_mapping import NODE_TYPE_CLASSES_MAPPING
from core.workflow.runtime import GraphRuntimeState, VariablePool
from core.workflow.system_variable import SystemVariable
@@ -136,13 +137,11 @@ class WorkflowEntry:
:param user_inputs: user inputs
:return:
"""
node_config = workflow.get_node_config_by_id(node_id)
node_config = dict(workflow.get_node_config_by_id(node_id))
node_config_data = node_config.get("data", {})
# Get node class
# Get node type
node_type = NodeType(node_config_data.get("type"))
node_version = node_config_data.get("version", "1")
node_cls = NODE_TYPE_CLASSES_MAPPING[node_type][node_version]
# init graph init params and runtime state
graph_init_params = GraphInitParams(
@@ -158,12 +157,12 @@ class WorkflowEntry:
graph_runtime_state = GraphRuntimeState(variable_pool=variable_pool, start_at=time.perf_counter())
# init workflow run state
node = node_cls(
id=str(uuid.uuid4()),
config=node_config,
node_factory = DifyNodeFactory(
graph_init_params=graph_init_params,
graph_runtime_state=graph_runtime_state,
)
node = node_factory.create_node(node_config)
node_cls = type(node)
try:
# variable selector to variable mapping

View File

@@ -163,6 +163,13 @@ def init_app(app: DifyApp) -> Celery:
"task": "schedule.clean_workflow_runlogs_precise.clean_workflow_runlogs_precise",
"schedule": crontab(minute="0", hour="2"),
}
if dify_config.ENABLE_WORKFLOW_RUN_CLEANUP_TASK:
# for saas only
imports.append("schedule.clean_workflow_runs_task")
beat_schedule["clean_workflow_runs_task"] = {
"task": "schedule.clean_workflow_runs_task.clean_workflow_runs_task",
"schedule": crontab(minute="0", hour="0"),
}
if dify_config.ENABLE_WORKFLOW_SCHEDULE_POLLER_TASK:
imports.append("schedule.workflow_schedule_task")
beat_schedule["workflow_schedule_task"] = {

View File

@@ -4,6 +4,7 @@ from dify_app import DifyApp
def init_app(app: DifyApp):
from commands import (
add_qdrant_index,
clean_workflow_runs,
cleanup_orphaned_draft_variables,
clear_free_plan_tenant_expired_logs,
clear_orphaned_file_records,
@@ -56,6 +57,7 @@ def init_app(app: DifyApp):
setup_datasource_oauth_client,
transform_datasource_credentials,
install_rag_pipeline_plugins,
clean_workflow_runs,
]
for cmd in cmds_to_register:
app.cli.add_command(cmd)

View File

@@ -0,0 +1,30 @@
"""add workflow_run_created_at_id_idx
Revision ID: 905527cc8fd3
Revises: 7df29de0f6be
Create Date: 2025-01-09 16:30:02.462084
"""
from alembic import op
import models as models
# revision identifiers, used by Alembic.
revision = '905527cc8fd3'
down_revision = '7df29de0f6be'
branch_labels = None
depends_on = None
def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
with op.batch_alter_table('workflow_runs', schema=None) as batch_op:
batch_op.create_index('workflow_run_created_at_id_idx', ['created_at', 'id'], unique=False)
# ### end Alembic commands ###
def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
with op.batch_alter_table('workflow_runs', schema=None) as batch_op:
batch_op.drop_index('workflow_run_created_at_id_idx')
# ### end Alembic commands ###

View File

@@ -1843,7 +1843,7 @@ class MessageChain(TypeBase):
)
class MessageAgentThought(Base):
class MessageAgentThought(TypeBase):
__tablename__ = "message_agent_thoughts"
__table_args__ = (
sa.PrimaryKeyConstraint("id", name="message_agent_thought_pkey"),
@@ -1851,34 +1851,42 @@ class MessageAgentThought(Base):
sa.Index("message_agent_thought_message_chain_id_idx", "message_chain_id"),
)
id = mapped_column(StringUUID, default=lambda: str(uuid4()))
message_id = mapped_column(StringUUID, nullable=False)
message_chain_id = mapped_column(StringUUID, nullable=True)
id: Mapped[str] = mapped_column(
StringUUID, insert_default=lambda: str(uuid4()), default_factory=lambda: str(uuid4()), init=False
)
message_id: Mapped[str] = mapped_column(StringUUID, nullable=False)
position: Mapped[int] = mapped_column(sa.Integer, nullable=False)
thought = mapped_column(LongText, nullable=True)
tool = mapped_column(LongText, nullable=True)
tool_labels_str = mapped_column(LongText, nullable=False, default=sa.text("'{}'"))
tool_meta_str = mapped_column(LongText, nullable=False, default=sa.text("'{}'"))
tool_input = mapped_column(LongText, nullable=True)
observation = mapped_column(LongText, nullable=True)
created_by_role: Mapped[str] = mapped_column(String(255), nullable=False)
created_by: Mapped[str] = mapped_column(StringUUID, nullable=False)
message_chain_id: Mapped[str | None] = mapped_column(StringUUID, nullable=True, default=None)
thought: Mapped[str | None] = mapped_column(LongText, nullable=True, default=None)
tool: Mapped[str | None] = mapped_column(LongText, nullable=True, default=None)
tool_labels_str: Mapped[str] = mapped_column(LongText, nullable=False, default=sa.text("'{}'"))
tool_meta_str: Mapped[str] = mapped_column(LongText, nullable=False, default=sa.text("'{}'"))
tool_input: Mapped[str | None] = mapped_column(LongText, nullable=True, default=None)
observation: Mapped[str | None] = mapped_column(LongText, nullable=True, default=None)
# plugin_id = mapped_column(StringUUID, nullable=True) ## for future design
tool_process_data = mapped_column(LongText, nullable=True)
message = mapped_column(LongText, nullable=True)
message_token: Mapped[int | None] = mapped_column(sa.Integer, nullable=True)
message_unit_price = mapped_column(sa.Numeric, nullable=True)
message_price_unit = mapped_column(sa.Numeric(10, 7), nullable=False, server_default=sa.text("0.001"))
message_files = mapped_column(LongText, nullable=True)
answer = mapped_column(LongText, nullable=True)
answer_token: Mapped[int | None] = mapped_column(sa.Integer, nullable=True)
answer_unit_price = mapped_column(sa.Numeric, nullable=True)
answer_price_unit = mapped_column(sa.Numeric(10, 7), nullable=False, server_default=sa.text("0.001"))
tokens: Mapped[int | None] = mapped_column(sa.Integer, nullable=True)
total_price = mapped_column(sa.Numeric, nullable=True)
currency = mapped_column(String(255), nullable=True)
latency: Mapped[float | None] = mapped_column(sa.Float, nullable=True)
created_by_role = mapped_column(String(255), nullable=False)
created_by = mapped_column(StringUUID, nullable=False)
created_at = mapped_column(sa.DateTime, nullable=False, server_default=sa.func.current_timestamp())
tool_process_data: Mapped[str | None] = mapped_column(LongText, nullable=True, default=None)
message: Mapped[str | None] = mapped_column(LongText, nullable=True, default=None)
message_token: Mapped[int | None] = mapped_column(sa.Integer, nullable=True, default=None)
message_unit_price: Mapped[Decimal | None] = mapped_column(sa.Numeric, nullable=True, default=None)
message_price_unit: Mapped[Decimal] = mapped_column(
sa.Numeric(10, 7), nullable=False, default=Decimal("0.001"), server_default=sa.text("0.001")
)
message_files: Mapped[str | None] = mapped_column(LongText, nullable=True, default=None)
answer: Mapped[str | None] = mapped_column(LongText, nullable=True, default=None)
answer_token: Mapped[int | None] = mapped_column(sa.Integer, nullable=True, default=None)
answer_unit_price: Mapped[Decimal | None] = mapped_column(sa.Numeric, nullable=True, default=None)
answer_price_unit: Mapped[Decimal] = mapped_column(
sa.Numeric(10, 7), nullable=False, default=Decimal("0.001"), server_default=sa.text("0.001")
)
tokens: Mapped[int | None] = mapped_column(sa.Integer, nullable=True, default=None)
total_price: Mapped[Decimal | None] = mapped_column(sa.Numeric, nullable=True, default=None)
currency: Mapped[str | None] = mapped_column(String(255), nullable=True, default=None)
latency: Mapped[float | None] = mapped_column(sa.Float, nullable=True, default=None)
created_at: Mapped[datetime] = mapped_column(
sa.DateTime, nullable=False, init=False, server_default=sa.func.current_timestamp()
)
@property
def files(self) -> list[Any]:

View File

@@ -597,6 +597,7 @@ class WorkflowRun(Base):
__table_args__ = (
sa.PrimaryKeyConstraint("id", name="workflow_run_pkey"),
sa.Index("workflow_run_triggerd_from_idx", "tenant_id", "app_id", "triggered_from"),
sa.Index("workflow_run_created_at_id_idx", "created_at", "id"),
)
id: Mapped[str] = mapped_column(StringUUID, default=lambda: str(uuid4()))

View File

@@ -189,7 +189,7 @@ storage = [
"opendal~=0.46.0",
"oss2==2.18.5",
"supabase~=2.18.1",
"tos~=2.7.1",
"tos~=2.9.0",
]
############################################################

View File

@@ -34,11 +34,14 @@ Example:
```
"""
from collections.abc import Sequence
from collections.abc import Callable, Sequence
from datetime import datetime
from typing import Protocol
from sqlalchemy.orm import Session
from core.workflow.entities.pause_reason import PauseReason
from core.workflow.enums import WorkflowType
from core.workflow.repositories.workflow_execution_repository import WorkflowExecutionRepository
from libs.infinite_scroll_pagination import InfiniteScrollPagination
from models.enums import WorkflowRunTriggeredFrom
@@ -253,6 +256,44 @@ class APIWorkflowRunRepository(WorkflowExecutionRepository, Protocol):
"""
...
def get_runs_batch_by_time_range(
self,
start_from: datetime | None,
end_before: datetime,
last_seen: tuple[datetime, str] | None,
batch_size: int,
run_types: Sequence[WorkflowType] | None = None,
tenant_ids: Sequence[str] | None = None,
) -> Sequence[WorkflowRun]:
"""
Fetch ended workflow runs in a time window for archival and clean batching.
"""
...
def delete_runs_with_related(
self,
runs: Sequence[WorkflowRun],
delete_node_executions: Callable[[Session, Sequence[WorkflowRun]], tuple[int, int]] | None = None,
delete_trigger_logs: Callable[[Session, Sequence[str]], int] | None = None,
) -> dict[str, int]:
"""
Delete workflow runs and their related records (node executions, offloads, app logs,
trigger logs, pauses, pause reasons).
"""
...
def count_runs_with_related(
self,
runs: Sequence[WorkflowRun],
count_node_executions: Callable[[Session, Sequence[WorkflowRun]], tuple[int, int]] | None = None,
count_trigger_logs: Callable[[Session, Sequence[str]], int] | None = None,
) -> dict[str, int]:
"""
Count workflow runs and their related records (node executions, offloads, app logs,
trigger logs, pauses, pause reasons) without deleting data.
"""
...
def create_workflow_pause(
self,
workflow_run_id: str,

View File

@@ -7,13 +7,18 @@ using SQLAlchemy 2.0 style queries for WorkflowNodeExecutionModel operations.
from collections.abc import Sequence
from datetime import datetime
from typing import cast
from typing import TypedDict, cast
from sqlalchemy import asc, delete, desc, select
from sqlalchemy import asc, delete, desc, func, select, tuple_
from sqlalchemy.engine import CursorResult
from sqlalchemy.orm import Session, sessionmaker
from models.workflow import WorkflowNodeExecutionModel
from models.enums import WorkflowRunTriggeredFrom
from models.workflow import (
WorkflowNodeExecutionModel,
WorkflowNodeExecutionOffload,
WorkflowNodeExecutionTriggeredFrom,
)
from repositories.api_workflow_node_execution_repository import DifyAPIWorkflowNodeExecutionRepository
@@ -44,6 +49,26 @@ class DifyAPISQLAlchemyWorkflowNodeExecutionRepository(DifyAPIWorkflowNodeExecut
"""
self._session_maker = session_maker
@staticmethod
def _map_run_triggered_from_to_node_triggered_from(triggered_from: str) -> str:
"""
Map workflow run triggered_from values to workflow node execution triggered_from values.
"""
if triggered_from in {
WorkflowRunTriggeredFrom.APP_RUN.value,
WorkflowRunTriggeredFrom.DEBUGGING.value,
WorkflowRunTriggeredFrom.SCHEDULE.value,
WorkflowRunTriggeredFrom.PLUGIN.value,
WorkflowRunTriggeredFrom.WEBHOOK.value,
}:
return WorkflowNodeExecutionTriggeredFrom.WORKFLOW_RUN.value
if triggered_from in {
WorkflowRunTriggeredFrom.RAG_PIPELINE_RUN.value,
WorkflowRunTriggeredFrom.RAG_PIPELINE_DEBUGGING.value,
}:
return WorkflowNodeExecutionTriggeredFrom.RAG_PIPELINE_RUN.value
return ""
def get_node_last_execution(
self,
tenant_id: str,
@@ -290,3 +315,119 @@ class DifyAPISQLAlchemyWorkflowNodeExecutionRepository(DifyAPIWorkflowNodeExecut
result = cast(CursorResult, session.execute(stmt))
session.commit()
return result.rowcount
class RunContext(TypedDict):
run_id: str
tenant_id: str
app_id: str
workflow_id: str
triggered_from: str
@staticmethod
def delete_by_runs(session: Session, runs: Sequence[RunContext]) -> tuple[int, int]:
"""
Delete node executions (and offloads) for the given workflow runs using indexed columns.
Uses the composite index on (tenant_id, app_id, workflow_id, triggered_from, workflow_run_id)
by filtering on those columns with tuple IN.
"""
if not runs:
return 0, 0
tuple_values = [
(
run["tenant_id"],
run["app_id"],
run["workflow_id"],
DifyAPISQLAlchemyWorkflowNodeExecutionRepository._map_run_triggered_from_to_node_triggered_from(
run["triggered_from"]
),
run["run_id"],
)
for run in runs
]
node_execution_ids = session.scalars(
select(WorkflowNodeExecutionModel.id).where(
tuple_(
WorkflowNodeExecutionModel.tenant_id,
WorkflowNodeExecutionModel.app_id,
WorkflowNodeExecutionModel.workflow_id,
WorkflowNodeExecutionModel.triggered_from,
WorkflowNodeExecutionModel.workflow_run_id,
).in_(tuple_values)
)
).all()
if not node_execution_ids:
return 0, 0
offloads_deleted = (
cast(
CursorResult,
session.execute(
delete(WorkflowNodeExecutionOffload).where(
WorkflowNodeExecutionOffload.node_execution_id.in_(node_execution_ids)
)
),
).rowcount
or 0
)
node_executions_deleted = (
cast(
CursorResult,
session.execute(
delete(WorkflowNodeExecutionModel).where(WorkflowNodeExecutionModel.id.in_(node_execution_ids))
),
).rowcount
or 0
)
return node_executions_deleted, offloads_deleted
@staticmethod
def count_by_runs(session: Session, runs: Sequence[RunContext]) -> tuple[int, int]:
"""
Count node executions (and offloads) for the given workflow runs using indexed columns.
"""
if not runs:
return 0, 0
tuple_values = [
(
run["tenant_id"],
run["app_id"],
run["workflow_id"],
DifyAPISQLAlchemyWorkflowNodeExecutionRepository._map_run_triggered_from_to_node_triggered_from(
run["triggered_from"]
),
run["run_id"],
)
for run in runs
]
tuple_filter = tuple_(
WorkflowNodeExecutionModel.tenant_id,
WorkflowNodeExecutionModel.app_id,
WorkflowNodeExecutionModel.workflow_id,
WorkflowNodeExecutionModel.triggered_from,
WorkflowNodeExecutionModel.workflow_run_id,
).in_(tuple_values)
node_executions_count = (
session.scalar(select(func.count()).select_from(WorkflowNodeExecutionModel).where(tuple_filter)) or 0
)
offloads_count = (
session.scalar(
select(func.count())
.select_from(WorkflowNodeExecutionOffload)
.join(
WorkflowNodeExecutionModel,
WorkflowNodeExecutionOffload.node_execution_id == WorkflowNodeExecutionModel.id,
)
.where(tuple_filter)
)
or 0
)
return int(node_executions_count), int(offloads_count)

View File

@@ -21,7 +21,7 @@ Implementation Notes:
import logging
import uuid
from collections.abc import Sequence
from collections.abc import Callable, Sequence
from datetime import datetime
from decimal import Decimal
from typing import Any, cast
@@ -32,7 +32,7 @@ from sqlalchemy.engine import CursorResult
from sqlalchemy.orm import Session, selectinload, sessionmaker
from core.workflow.entities.pause_reason import HumanInputRequired, PauseReason, SchedulingPause
from core.workflow.enums import WorkflowExecutionStatus
from core.workflow.enums import WorkflowExecutionStatus, WorkflowType
from extensions.ext_storage import storage
from libs.datetime_utils import naive_utc_now
from libs.helper import convert_datetime_to_date
@@ -40,8 +40,14 @@ from libs.infinite_scroll_pagination import InfiniteScrollPagination
from libs.time_parser import get_time_threshold
from libs.uuid_utils import uuidv7
from models.enums import WorkflowRunTriggeredFrom
from models.workflow import WorkflowPause as WorkflowPauseModel
from models.workflow import WorkflowPauseReason, WorkflowRun
from models.workflow import (
WorkflowAppLog,
WorkflowPauseReason,
WorkflowRun,
)
from models.workflow import (
WorkflowPause as WorkflowPauseModel,
)
from repositories.api_workflow_run_repository import APIWorkflowRunRepository
from repositories.entities.workflow_pause import WorkflowPauseEntity
from repositories.types import (
@@ -314,6 +320,171 @@ class DifyAPISQLAlchemyWorkflowRunRepository(APIWorkflowRunRepository):
logger.info("Total deleted %s workflow runs for app %s", total_deleted, app_id)
return total_deleted
def get_runs_batch_by_time_range(
self,
start_from: datetime | None,
end_before: datetime,
last_seen: tuple[datetime, str] | None,
batch_size: int,
run_types: Sequence[WorkflowType] | None = None,
tenant_ids: Sequence[str] | None = None,
) -> Sequence[WorkflowRun]:
"""
Fetch ended workflow runs in a time window for archival and clean batching.
Query scope:
- created_at in [start_from, end_before)
- type in run_types (when provided)
- status is an ended state
- optional tenant_id filter and cursor (last_seen) for pagination
"""
with self._session_maker() as session:
stmt = (
select(WorkflowRun)
.where(
WorkflowRun.created_at < end_before,
WorkflowRun.status.in_(WorkflowExecutionStatus.ended_values()),
)
.order_by(WorkflowRun.created_at.asc(), WorkflowRun.id.asc())
.limit(batch_size)
)
if run_types is not None:
if not run_types:
return []
stmt = stmt.where(WorkflowRun.type.in_(run_types))
if start_from:
stmt = stmt.where(WorkflowRun.created_at >= start_from)
if tenant_ids:
stmt = stmt.where(WorkflowRun.tenant_id.in_(tenant_ids))
if last_seen:
stmt = stmt.where(
or_(
WorkflowRun.created_at > last_seen[0],
and_(WorkflowRun.created_at == last_seen[0], WorkflowRun.id > last_seen[1]),
)
)
return session.scalars(stmt).all()
def delete_runs_with_related(
self,
runs: Sequence[WorkflowRun],
delete_node_executions: Callable[[Session, Sequence[WorkflowRun]], tuple[int, int]] | None = None,
delete_trigger_logs: Callable[[Session, Sequence[str]], int] | None = None,
) -> dict[str, int]:
if not runs:
return {
"runs": 0,
"node_executions": 0,
"offloads": 0,
"app_logs": 0,
"trigger_logs": 0,
"pauses": 0,
"pause_reasons": 0,
}
with self._session_maker() as session:
run_ids = [run.id for run in runs]
if delete_node_executions:
node_executions_deleted, offloads_deleted = delete_node_executions(session, runs)
else:
node_executions_deleted, offloads_deleted = 0, 0
app_logs_result = session.execute(delete(WorkflowAppLog).where(WorkflowAppLog.workflow_run_id.in_(run_ids)))
app_logs_deleted = cast(CursorResult, app_logs_result).rowcount or 0
pause_ids = session.scalars(
select(WorkflowPauseModel.id).where(WorkflowPauseModel.workflow_run_id.in_(run_ids))
).all()
pause_reasons_deleted = 0
pauses_deleted = 0
if pause_ids:
pause_reasons_result = session.execute(
delete(WorkflowPauseReason).where(WorkflowPauseReason.pause_id.in_(pause_ids))
)
pause_reasons_deleted = cast(CursorResult, pause_reasons_result).rowcount or 0
pauses_result = session.execute(delete(WorkflowPauseModel).where(WorkflowPauseModel.id.in_(pause_ids)))
pauses_deleted = cast(CursorResult, pauses_result).rowcount or 0
trigger_logs_deleted = delete_trigger_logs(session, run_ids) if delete_trigger_logs else 0
runs_result = session.execute(delete(WorkflowRun).where(WorkflowRun.id.in_(run_ids)))
runs_deleted = cast(CursorResult, runs_result).rowcount or 0
session.commit()
return {
"runs": runs_deleted,
"node_executions": node_executions_deleted,
"offloads": offloads_deleted,
"app_logs": app_logs_deleted,
"trigger_logs": trigger_logs_deleted,
"pauses": pauses_deleted,
"pause_reasons": pause_reasons_deleted,
}
def count_runs_with_related(
self,
runs: Sequence[WorkflowRun],
count_node_executions: Callable[[Session, Sequence[WorkflowRun]], tuple[int, int]] | None = None,
count_trigger_logs: Callable[[Session, Sequence[str]], int] | None = None,
) -> dict[str, int]:
if not runs:
return {
"runs": 0,
"node_executions": 0,
"offloads": 0,
"app_logs": 0,
"trigger_logs": 0,
"pauses": 0,
"pause_reasons": 0,
}
with self._session_maker() as session:
run_ids = [run.id for run in runs]
if count_node_executions:
node_executions_count, offloads_count = count_node_executions(session, runs)
else:
node_executions_count, offloads_count = 0, 0
app_logs_count = (
session.scalar(
select(func.count()).select_from(WorkflowAppLog).where(WorkflowAppLog.workflow_run_id.in_(run_ids))
)
or 0
)
pause_ids = session.scalars(
select(WorkflowPauseModel.id).where(WorkflowPauseModel.workflow_run_id.in_(run_ids))
).all()
pauses_count = len(pause_ids)
pause_reasons_count = 0
if pause_ids:
pause_reasons_count = (
session.scalar(
select(func.count())
.select_from(WorkflowPauseReason)
.where(WorkflowPauseReason.pause_id.in_(pause_ids))
)
or 0
)
trigger_logs_count = count_trigger_logs(session, run_ids) if count_trigger_logs else 0
return {
"runs": len(runs),
"node_executions": node_executions_count,
"offloads": offloads_count,
"app_logs": int(app_logs_count),
"trigger_logs": trigger_logs_count,
"pauses": pauses_count,
"pause_reasons": int(pause_reasons_count),
}
def create_workflow_pause(
self,
workflow_run_id: str,

View File

@@ -4,8 +4,10 @@ SQLAlchemy implementation of WorkflowTriggerLogRepository.
from collections.abc import Sequence
from datetime import UTC, datetime, timedelta
from typing import cast
from sqlalchemy import and_, select
from sqlalchemy import and_, delete, func, select
from sqlalchemy.engine import CursorResult
from sqlalchemy.orm import Session
from models.enums import WorkflowTriggerStatus
@@ -84,3 +86,37 @@ class SQLAlchemyWorkflowTriggerLogRepository(WorkflowTriggerLogRepository):
)
return list(self.session.scalars(query).all())
def delete_by_run_ids(self, run_ids: Sequence[str]) -> int:
"""
Delete trigger logs associated with the given workflow run ids.
Args:
run_ids: Collection of workflow run identifiers.
Returns:
Number of rows deleted.
"""
if not run_ids:
return 0
result = self.session.execute(delete(WorkflowTriggerLog).where(WorkflowTriggerLog.workflow_run_id.in_(run_ids)))
return cast(CursorResult, result).rowcount or 0
def count_by_run_ids(self, run_ids: Sequence[str]) -> int:
"""
Count trigger logs associated with the given workflow run ids.
Args:
run_ids: Collection of workflow run identifiers.
Returns:
Number of rows matched.
"""
if not run_ids:
return 0
count = self.session.scalar(
select(func.count()).select_from(WorkflowTriggerLog).where(WorkflowTriggerLog.workflow_run_id.in_(run_ids))
)
return int(count or 0)

View File

@@ -109,3 +109,15 @@ class WorkflowTriggerLogRepository(Protocol):
A sequence of recent WorkflowTriggerLog instances
"""
...
def delete_by_run_ids(self, run_ids: Sequence[str]) -> int:
"""
Delete trigger logs for workflow run IDs.
Args:
run_ids: Workflow run IDs to delete
Returns:
Number of rows deleted
"""
...

View File

@@ -0,0 +1,43 @@
from datetime import UTC, datetime
import click
import app
from configs import dify_config
from services.retention.workflow_run.clear_free_plan_expired_workflow_run_logs import WorkflowRunCleanup
@app.celery.task(queue="retention")
def clean_workflow_runs_task() -> None:
"""
Scheduled cleanup for workflow runs and related records (sandbox tenants only).
"""
click.echo(
click.style(
(
"Scheduled workflow run cleanup starting: "
f"cutoff={dify_config.SANDBOX_EXPIRED_RECORDS_RETENTION_DAYS} days, "
f"batch={dify_config.SANDBOX_EXPIRED_RECORDS_CLEAN_BATCH_SIZE}"
),
fg="green",
)
)
start_time = datetime.now(UTC)
WorkflowRunCleanup(
days=dify_config.SANDBOX_EXPIRED_RECORDS_RETENTION_DAYS,
batch_size=dify_config.SANDBOX_EXPIRED_RECORDS_CLEAN_BATCH_SIZE,
start_from=None,
end_before=None,
).run()
end_time = datetime.now(UTC)
elapsed = end_time - start_time
click.echo(
click.style(
f"Scheduled workflow run cleanup finished. start={start_time.isoformat()} "
f"end={end_time.isoformat()} duration={elapsed}",
fg="green",
)
)

View File

View File

@@ -0,0 +1,301 @@
import datetime
import logging
from collections.abc import Iterable, Sequence
import click
from sqlalchemy.orm import Session, sessionmaker
from configs import dify_config
from enums.cloud_plan import CloudPlan
from extensions.ext_database import db
from models.workflow import WorkflowRun
from repositories.api_workflow_run_repository import APIWorkflowRunRepository
from repositories.sqlalchemy_api_workflow_node_execution_repository import (
DifyAPISQLAlchemyWorkflowNodeExecutionRepository,
)
from repositories.sqlalchemy_workflow_trigger_log_repository import SQLAlchemyWorkflowTriggerLogRepository
from services.billing_service import BillingService, SubscriptionPlan
logger = logging.getLogger(__name__)
class WorkflowRunCleanup:
def __init__(
self,
days: int,
batch_size: int,
start_from: datetime.datetime | None = None,
end_before: datetime.datetime | None = None,
workflow_run_repo: APIWorkflowRunRepository | None = None,
dry_run: bool = False,
):
if (start_from is None) ^ (end_before is None):
raise ValueError("start_from and end_before must be both set or both omitted.")
computed_cutoff = datetime.datetime.now() - datetime.timedelta(days=days)
self.window_start = start_from
self.window_end = end_before or computed_cutoff
if self.window_start and self.window_end <= self.window_start:
raise ValueError("end_before must be greater than start_from.")
if batch_size <= 0:
raise ValueError("batch_size must be greater than 0.")
self.batch_size = batch_size
self._cleanup_whitelist: set[str] | None = None
self.dry_run = dry_run
self.free_plan_grace_period_days = dify_config.SANDBOX_EXPIRED_RECORDS_CLEAN_GRACEFUL_PERIOD
self.workflow_run_repo: APIWorkflowRunRepository
if workflow_run_repo:
self.workflow_run_repo = workflow_run_repo
else:
# Lazy import to avoid circular dependencies during module import
from repositories.factory import DifyAPIRepositoryFactory
session_maker = sessionmaker(bind=db.engine, expire_on_commit=False)
self.workflow_run_repo = DifyAPIRepositoryFactory.create_api_workflow_run_repository(session_maker)
def run(self) -> None:
click.echo(
click.style(
f"{'Inspecting' if self.dry_run else 'Cleaning'} workflow runs "
f"{'between ' + self.window_start.isoformat() + ' and ' if self.window_start else 'before '}"
f"{self.window_end.isoformat()} (batch={self.batch_size})",
fg="white",
)
)
if self.dry_run:
click.echo(click.style("Dry run mode enabled. No data will be deleted.", fg="yellow"))
total_runs_deleted = 0
total_runs_targeted = 0
related_totals = self._empty_related_counts() if self.dry_run else None
batch_index = 0
last_seen: tuple[datetime.datetime, str] | None = None
while True:
run_rows = self.workflow_run_repo.get_runs_batch_by_time_range(
start_from=self.window_start,
end_before=self.window_end,
last_seen=last_seen,
batch_size=self.batch_size,
)
if not run_rows:
break
batch_index += 1
last_seen = (run_rows[-1].created_at, run_rows[-1].id)
tenant_ids = {row.tenant_id for row in run_rows}
free_tenants = self._filter_free_tenants(tenant_ids)
free_runs = [row for row in run_rows if row.tenant_id in free_tenants]
paid_or_skipped = len(run_rows) - len(free_runs)
if not free_runs:
click.echo(
click.style(
f"[batch #{batch_index}] skipped (no sandbox runs in batch, {paid_or_skipped} paid/unknown)",
fg="yellow",
)
)
continue
total_runs_targeted += len(free_runs)
if self.dry_run:
batch_counts = self.workflow_run_repo.count_runs_with_related(
free_runs,
count_node_executions=self._count_node_executions,
count_trigger_logs=self._count_trigger_logs,
)
if related_totals is not None:
for key in related_totals:
related_totals[key] += batch_counts.get(key, 0)
sample_ids = ", ".join(run.id for run in free_runs[:5])
click.echo(
click.style(
f"[batch #{batch_index}] would delete {len(free_runs)} runs "
f"(sample ids: {sample_ids}) and skip {paid_or_skipped} paid/unknown",
fg="yellow",
)
)
continue
try:
counts = self.workflow_run_repo.delete_runs_with_related(
free_runs,
delete_node_executions=self._delete_node_executions,
delete_trigger_logs=self._delete_trigger_logs,
)
except Exception:
logger.exception("Failed to delete workflow runs batch ending at %s", last_seen[0])
raise
total_runs_deleted += counts["runs"]
click.echo(
click.style(
f"[batch #{batch_index}] deleted runs: {counts['runs']} "
f"(nodes {counts['node_executions']}, offloads {counts['offloads']}, "
f"app_logs {counts['app_logs']}, trigger_logs {counts['trigger_logs']}, "
f"pauses {counts['pauses']}, pause_reasons {counts['pause_reasons']}); "
f"skipped {paid_or_skipped} paid/unknown",
fg="green",
)
)
if self.dry_run:
if self.window_start:
summary_message = (
f"Dry run complete. Would delete {total_runs_targeted} workflow runs "
f"between {self.window_start.isoformat()} and {self.window_end.isoformat()}"
)
else:
summary_message = (
f"Dry run complete. Would delete {total_runs_targeted} workflow runs "
f"before {self.window_end.isoformat()}"
)
if related_totals is not None:
summary_message = f"{summary_message}; related records: {self._format_related_counts(related_totals)}"
summary_color = "yellow"
else:
if self.window_start:
summary_message = (
f"Cleanup complete. Deleted {total_runs_deleted} workflow runs "
f"between {self.window_start.isoformat()} and {self.window_end.isoformat()}"
)
else:
summary_message = (
f"Cleanup complete. Deleted {total_runs_deleted} workflow runs before {self.window_end.isoformat()}"
)
summary_color = "white"
click.echo(click.style(summary_message, fg=summary_color))
def _filter_free_tenants(self, tenant_ids: Iterable[str]) -> set[str]:
tenant_id_list = list(tenant_ids)
if not dify_config.BILLING_ENABLED:
return set(tenant_id_list)
if not tenant_id_list:
return set()
cleanup_whitelist = self._get_cleanup_whitelist()
try:
bulk_info = BillingService.get_plan_bulk_with_cache(tenant_id_list)
except Exception:
bulk_info = {}
logger.exception("Failed to fetch billing plans in bulk for tenants: %s", tenant_id_list)
eligible_free_tenants: set[str] = set()
for tenant_id in tenant_id_list:
if tenant_id in cleanup_whitelist:
continue
info = bulk_info.get(tenant_id)
if info is None:
logger.warning("Missing billing info for tenant %s in bulk resp; treating as non-free", tenant_id)
continue
if info.get("plan") != CloudPlan.SANDBOX:
continue
if self._is_within_grace_period(tenant_id, info):
continue
eligible_free_tenants.add(tenant_id)
return eligible_free_tenants
def _expiration_datetime(self, tenant_id: str, expiration_value: int) -> datetime.datetime | None:
if expiration_value < 0:
return None
try:
return datetime.datetime.fromtimestamp(expiration_value, datetime.UTC)
except (OverflowError, OSError, ValueError):
logger.exception("Failed to parse expiration timestamp for tenant %s", tenant_id)
return None
def _is_within_grace_period(self, tenant_id: str, info: SubscriptionPlan) -> bool:
if self.free_plan_grace_period_days <= 0:
return False
expiration_value = info.get("expiration_date", -1)
expiration_at = self._expiration_datetime(tenant_id, expiration_value)
if expiration_at is None:
return False
grace_deadline = expiration_at + datetime.timedelta(days=self.free_plan_grace_period_days)
return datetime.datetime.now(datetime.UTC) < grace_deadline
def _get_cleanup_whitelist(self) -> set[str]:
if self._cleanup_whitelist is not None:
return self._cleanup_whitelist
if not dify_config.BILLING_ENABLED:
self._cleanup_whitelist = set()
return self._cleanup_whitelist
try:
whitelist_ids = BillingService.get_expired_subscription_cleanup_whitelist()
except Exception:
logger.exception("Failed to fetch cleanup whitelist from billing service")
whitelist_ids = []
self._cleanup_whitelist = set(whitelist_ids)
return self._cleanup_whitelist
def _delete_trigger_logs(self, session: Session, run_ids: Sequence[str]) -> int:
trigger_repo = SQLAlchemyWorkflowTriggerLogRepository(session)
return trigger_repo.delete_by_run_ids(run_ids)
def _count_trigger_logs(self, session: Session, run_ids: Sequence[str]) -> int:
trigger_repo = SQLAlchemyWorkflowTriggerLogRepository(session)
return trigger_repo.count_by_run_ids(run_ids)
@staticmethod
def _build_run_contexts(
runs: Sequence[WorkflowRun],
) -> list[DifyAPISQLAlchemyWorkflowNodeExecutionRepository.RunContext]:
return [
{
"run_id": run.id,
"tenant_id": run.tenant_id,
"app_id": run.app_id,
"workflow_id": run.workflow_id,
"triggered_from": run.triggered_from,
}
for run in runs
]
@staticmethod
def _empty_related_counts() -> dict[str, int]:
return {
"node_executions": 0,
"offloads": 0,
"app_logs": 0,
"trigger_logs": 0,
"pauses": 0,
"pause_reasons": 0,
}
@staticmethod
def _format_related_counts(counts: dict[str, int]) -> str:
return (
f"node_executions {counts['node_executions']}, "
f"offloads {counts['offloads']}, "
f"app_logs {counts['app_logs']}, "
f"trigger_logs {counts['trigger_logs']}, "
f"pauses {counts['pauses']}, "
f"pause_reasons {counts['pause_reasons']}"
)
def _count_node_executions(self, session: Session, runs: Sequence[WorkflowRun]) -> tuple[int, int]:
run_contexts = self._build_run_contexts(runs)
return DifyAPISQLAlchemyWorkflowNodeExecutionRepository.count_by_runs(session, run_contexts)
def _delete_node_executions(self, session: Session, runs: Sequence[WorkflowRun]) -> tuple[int, int]:
run_contexts = self._build_run_contexts(runs)
return DifyAPISQLAlchemyWorkflowNodeExecutionRepository.delete_by_runs(session, run_contexts)

View File

@@ -0,0 +1,158 @@
app:
description: Validate v1 Variable Assigner blocks streaming until conversation variable is updated.
icon: 🤖
icon_background: '#FFEAD5'
mode: advanced-chat
name: test_streaming_conversation_variables_v1_overwrite
use_icon_as_answer_icon: false
dependencies: []
kind: app
version: 0.5.0
workflow:
conversation_variables:
- description: ''
id: 6ddf2d7f-3d1b-4bb0-9a5e-9b0c87c7b5e6
name: conv_var
selector:
- conversation
- conv_var
value: default
value_type: string
environment_variables: []
features:
file_upload:
allowed_file_extensions:
- .JPG
- .JPEG
- .PNG
- .GIF
- .WEBP
- .SVG
allowed_file_types:
- image
allowed_file_upload_methods:
- local_file
- remote_url
enabled: false
fileUploadConfig:
audio_file_size_limit: 50
batch_count_limit: 5
file_size_limit: 15
image_file_size_limit: 10
video_file_size_limit: 100
workflow_file_upload_limit: 10
image:
enabled: false
number_limits: 3
transfer_methods:
- local_file
- remote_url
number_limits: 3
opening_statement: ''
retriever_resource:
enabled: true
sensitive_word_avoidance:
enabled: false
speech_to_text:
enabled: false
suggested_questions: []
suggested_questions_after_answer:
enabled: false
text_to_speech:
enabled: false
language: ''
voice: ''
graph:
edges:
- data:
isInIteration: false
isInLoop: false
sourceType: start
targetType: assigner
id: start-source-assigner-target
source: start
sourceHandle: source
target: assigner
targetHandle: target
type: custom
zIndex: 0
- data:
isInLoop: false
sourceType: assigner
targetType: answer
id: assigner-source-answer-target
source: assigner
sourceHandle: source
target: answer
targetHandle: target
type: custom
zIndex: 0
nodes:
- data:
desc: ''
selected: false
title: Start
type: start
variables: []
height: 54
id: start
position:
x: 30
y: 253
positionAbsolute:
x: 30
y: 253
selected: false
sourcePosition: right
targetPosition: left
type: custom
width: 244
- data:
answer: 'Current Value Of `conv_var` is:{{#conversation.conv_var#}}'
desc: ''
selected: false
title: Answer
type: answer
variables: []
height: 106
id: answer
position:
x: 638
y: 253
positionAbsolute:
x: 638
y: 253
selected: true
sourcePosition: right
targetPosition: left
type: custom
width: 244
- data:
assigned_variable_selector:
- conversation
- conv_var
desc: ''
input_variable_selector:
- sys
- query
selected: false
title: Variable Assigner
type: assigner
write_mode: over-write
height: 84
id: assigner
position:
x: 334
y: 253
positionAbsolute:
x: 334
y: 253
selected: false
sourcePosition: right
targetPosition: left
type: custom
width: 244
viewport:
x: 0
y: 0
zoom: 0.7

View File

@@ -230,7 +230,6 @@ class TestAgentService:
# Create first agent thought
thought1 = MessageAgentThought(
id=fake.uuid4(),
message_id=message.id,
position=1,
thought="I need to analyze the user's request",
@@ -257,7 +256,6 @@ class TestAgentService:
# Create second agent thought
thought2 = MessageAgentThought(
id=fake.uuid4(),
message_id=message.id,
position=2,
thought="Based on the analysis, I can provide a response",
@@ -545,7 +543,6 @@ class TestAgentService:
# Create agent thought with tool error
thought_with_error = MessageAgentThought(
id=fake.uuid4(),
message_id=message.id,
position=1,
thought="I need to analyze the user's request",
@@ -759,7 +756,6 @@ class TestAgentService:
# Create agent thought with multiple tools
complex_thought = MessageAgentThought(
id=fake.uuid4(),
message_id=message.id,
position=1,
thought="I need to use multiple tools to complete this task",
@@ -877,7 +873,6 @@ class TestAgentService:
# Create agent thought with files
thought_with_files = MessageAgentThought(
id=fake.uuid4(),
message_id=message.id,
position=1,
thought="I need to process some files",
@@ -957,7 +952,6 @@ class TestAgentService:
# Create agent thought with empty tool data
empty_thought = MessageAgentThought(
id=fake.uuid4(),
message_id=message.id,
position=1,
thought="I need to analyze the user's request",
@@ -999,7 +993,6 @@ class TestAgentService:
# Create agent thought with malformed JSON
malformed_thought = MessageAgentThought(
id=fake.uuid4(),
message_id=message.id,
position=1,
thought="I need to analyze the user's request",

View File

@@ -1,390 +0,0 @@
"""
Tests for AdvancedChatAppGenerateTaskPipeline._handle_node_succeeded_event method,
specifically testing the ANSWER node message_replace logic.
"""
from datetime import datetime
from types import SimpleNamespace
from unittest.mock import MagicMock, Mock, patch
import pytest
from core.app.entities.app_invoke_entities import AdvancedChatAppGenerateEntity
from core.app.entities.queue_entities import QueueNodeSucceededEvent
from core.workflow.enums import NodeType
from models import EndUser
from models.model import AppMode
class TestAnswerNodeMessageReplace:
"""Test cases for ANSWER node message_replace event logic."""
@pytest.fixture
def mock_application_generate_entity(self):
"""Create a mock application generate entity."""
entity = Mock(spec=AdvancedChatAppGenerateEntity)
entity.task_id = "test-task-id"
entity.app_id = "test-app-id"
entity.workflow_run_id = "test-workflow-run-id"
# minimal app_config used by pipeline internals
entity.app_config = SimpleNamespace(
tenant_id="test-tenant-id",
app_id="test-app-id",
app_mode=AppMode.ADVANCED_CHAT,
app_model_config_dict={},
additional_features=None,
sensitive_word_avoidance=None,
)
entity.query = "test query"
entity.files = []
entity.extras = {}
entity.trace_manager = None
entity.inputs = {}
entity.invoke_from = "debugger"
return entity
@pytest.fixture
def mock_workflow(self):
"""Create a mock workflow."""
workflow = Mock()
workflow.id = "test-workflow-id"
workflow.features_dict = {}
return workflow
@pytest.fixture
def mock_queue_manager(self):
"""Create a mock queue manager."""
manager = Mock()
manager.listen.return_value = []
manager.graph_runtime_state = None
return manager
@pytest.fixture
def mock_conversation(self):
"""Create a mock conversation."""
conversation = Mock()
conversation.id = "test-conversation-id"
conversation.mode = "advanced_chat"
return conversation
@pytest.fixture
def mock_message(self):
"""Create a mock message."""
message = Mock()
message.id = "test-message-id"
message.query = "test query"
message.created_at = Mock()
message.created_at.timestamp.return_value = 1234567890
return message
@pytest.fixture
def mock_user(self):
"""Create a mock end user."""
user = MagicMock(spec=EndUser)
user.id = "test-user-id"
user.session_id = "test-session-id"
return user
@pytest.fixture
def mock_draft_var_saver_factory(self):
"""Create a mock draft variable saver factory."""
return Mock()
@pytest.fixture
def pipeline(
self,
mock_application_generate_entity,
mock_workflow,
mock_queue_manager,
mock_conversation,
mock_message,
mock_user,
mock_draft_var_saver_factory,
):
"""Create an AdvancedChatAppGenerateTaskPipeline instance with mocked dependencies."""
from core.app.apps.advanced_chat.generate_task_pipeline import AdvancedChatAppGenerateTaskPipeline
with patch("core.app.apps.advanced_chat.generate_task_pipeline.db"):
pipeline = AdvancedChatAppGenerateTaskPipeline(
application_generate_entity=mock_application_generate_entity,
workflow=mock_workflow,
queue_manager=mock_queue_manager,
conversation=mock_conversation,
message=mock_message,
user=mock_user,
stream=True,
dialogue_count=1,
draft_var_saver_factory=mock_draft_var_saver_factory,
)
# Initialize workflow run id to avoid validation errors
pipeline._workflow_run_id = "test-workflow-run-id"
# Mock the message cycle manager methods we need to track
pipeline._message_cycle_manager.message_replace_to_stream_response = Mock()
return pipeline
def test_answer_node_with_different_output_sends_message_replace(self, pipeline, mock_application_generate_entity):
"""
Test that when an ANSWER node's final output differs from accumulated answer,
a message_replace event is sent.
"""
# Arrange: Set initial accumulated answer
pipeline._task_state.answer = "initial answer"
# Create ANSWER node succeeded event with different final output
event = QueueNodeSucceededEvent(
node_execution_id="test-node-execution-id",
node_id="test-answer-node",
node_type=NodeType.ANSWER,
start_at=datetime.now(),
outputs={"answer": "updated final answer"},
)
# Mock the workflow response converter to avoid extra processing
pipeline._workflow_response_converter.workflow_node_finish_to_stream_response = Mock(return_value=None)
pipeline._save_output_for_event = Mock()
# Act
responses = list(pipeline._handle_node_succeeded_event(event))
# Assert
assert pipeline._task_state.answer == "updated final answer"
# Verify message_replace was called
pipeline._message_cycle_manager.message_replace_to_stream_response.assert_called_once_with(
answer="updated final answer", reason="variable_update"
)
def test_answer_node_with_same_output_does_not_send_message_replace(self, pipeline):
"""
Test that when an ANSWER node's final output is the same as accumulated answer,
no message_replace event is sent.
"""
# Arrange: Set initial accumulated answer
pipeline._task_state.answer = "same answer"
# Create ANSWER node succeeded event with same output
event = QueueNodeSucceededEvent(
node_execution_id="test-node-execution-id",
node_id="test-answer-node",
node_type=NodeType.ANSWER,
start_at=datetime.now(),
outputs={"answer": "same answer"},
)
# Mock the workflow response converter
pipeline._workflow_response_converter.workflow_node_finish_to_stream_response = Mock(return_value=None)
pipeline._save_output_for_event = Mock()
# Act
list(pipeline._handle_node_succeeded_event(event))
# Assert: answer should remain unchanged
assert pipeline._task_state.answer == "same answer"
# Verify message_replace was NOT called
pipeline._message_cycle_manager.message_replace_to_stream_response.assert_not_called()
def test_answer_node_with_none_output_does_not_send_message_replace(self, pipeline):
"""
Test that when an ANSWER node's output is None or missing 'answer' key,
no message_replace event is sent.
"""
# Arrange: Set initial accumulated answer
pipeline._task_state.answer = "existing answer"
# Create ANSWER node succeeded event with None output
event = QueueNodeSucceededEvent(
node_execution_id="test-node-execution-id",
node_id="test-answer-node",
node_type=NodeType.ANSWER,
start_at=datetime.now(),
outputs={"answer": None},
)
# Mock the workflow response converter
pipeline._workflow_response_converter.workflow_node_finish_to_stream_response = Mock(return_value=None)
pipeline._save_output_for_event = Mock()
# Act
list(pipeline._handle_node_succeeded_event(event))
# Assert: answer should remain unchanged
assert pipeline._task_state.answer == "existing answer"
# Verify message_replace was NOT called
pipeline._message_cycle_manager.message_replace_to_stream_response.assert_not_called()
def test_answer_node_with_empty_outputs_does_not_send_message_replace(self, pipeline):
"""
Test that when an ANSWER node has empty outputs dict,
no message_replace event is sent.
"""
# Arrange: Set initial accumulated answer
pipeline._task_state.answer = "existing answer"
# Create ANSWER node succeeded event with empty outputs
event = QueueNodeSucceededEvent(
node_execution_id="test-node-execution-id",
node_id="test-answer-node",
node_type=NodeType.ANSWER,
start_at=datetime.now(),
outputs={},
)
# Mock the workflow response converter
pipeline._workflow_response_converter.workflow_node_finish_to_stream_response = Mock(return_value=None)
pipeline._save_output_for_event = Mock()
# Act
list(pipeline._handle_node_succeeded_event(event))
# Assert: answer should remain unchanged
assert pipeline._task_state.answer == "existing answer"
# Verify message_replace was NOT called
pipeline._message_cycle_manager.message_replace_to_stream_response.assert_not_called()
def test_answer_node_with_no_answer_key_in_outputs(self, pipeline):
"""
Test that when an ANSWER node's outputs don't contain 'answer' key,
no message_replace event is sent.
"""
# Arrange: Set initial accumulated answer
pipeline._task_state.answer = "existing answer"
# Create ANSWER node succeeded event without 'answer' key in outputs
event = QueueNodeSucceededEvent(
node_execution_id="test-node-execution-id",
node_id="test-answer-node",
node_type=NodeType.ANSWER,
start_at=datetime.now(),
outputs={"other_key": "some value"},
)
# Mock the workflow response converter
pipeline._workflow_response_converter.workflow_node_finish_to_stream_response = Mock(return_value=None)
pipeline._save_output_for_event = Mock()
# Act
list(pipeline._handle_node_succeeded_event(event))
# Assert: answer should remain unchanged
assert pipeline._task_state.answer == "existing answer"
# Verify message_replace was NOT called
pipeline._message_cycle_manager.message_replace_to_stream_response.assert_not_called()
def test_non_answer_node_does_not_send_message_replace(self, pipeline):
"""
Test that non-ANSWER nodes (e.g., LLM, END) don't trigger message_replace events.
"""
# Arrange: Set initial accumulated answer
pipeline._task_state.answer = "existing answer"
# Test with LLM node
llm_event = QueueNodeSucceededEvent(
node_execution_id="test-llm-execution-id",
node_id="test-llm-node",
node_type=NodeType.LLM,
start_at=datetime.now(),
outputs={"answer": "different answer"},
)
# Mock the workflow response converter
pipeline._workflow_response_converter.workflow_node_finish_to_stream_response = Mock(return_value=None)
pipeline._save_output_for_event = Mock()
# Act
list(pipeline._handle_node_succeeded_event(llm_event))
# Assert: answer should remain unchanged
assert pipeline._task_state.answer == "existing answer"
# Verify message_replace was NOT called
pipeline._message_cycle_manager.message_replace_to_stream_response.assert_not_called()
def test_end_node_does_not_send_message_replace(self, pipeline):
"""
Test that END nodes don't trigger message_replace events even with 'answer' output.
"""
# Arrange: Set initial accumulated answer
pipeline._task_state.answer = "existing answer"
# Create END node succeeded event with answer output
event = QueueNodeSucceededEvent(
node_execution_id="test-end-execution-id",
node_id="test-end-node",
node_type=NodeType.END,
start_at=datetime.now(),
outputs={"answer": "different answer"},
)
# Mock the workflow response converter
pipeline._workflow_response_converter.workflow_node_finish_to_stream_response = Mock(return_value=None)
pipeline._save_output_for_event = Mock()
# Act
list(pipeline._handle_node_succeeded_event(event))
# Assert: answer should remain unchanged
assert pipeline._task_state.answer == "existing answer"
# Verify message_replace was NOT called
pipeline._message_cycle_manager.message_replace_to_stream_response.assert_not_called()
def test_answer_node_with_numeric_output_converts_to_string(self, pipeline):
"""
Test that when an ANSWER node's final output is numeric,
it gets converted to string properly.
"""
# Arrange: Set initial accumulated answer
pipeline._task_state.answer = "text answer"
# Create ANSWER node succeeded event with numeric output
event = QueueNodeSucceededEvent(
node_execution_id="test-node-execution-id",
node_id="test-answer-node",
node_type=NodeType.ANSWER,
start_at=datetime.now(),
outputs={"answer": 12345},
)
# Mock the workflow response converter
pipeline._workflow_response_converter.workflow_node_finish_to_stream_response = Mock(return_value=None)
pipeline._save_output_for_event = Mock()
# Act
list(pipeline._handle_node_succeeded_event(event))
# Assert: answer should be converted to string
assert pipeline._task_state.answer == "12345"
# Verify message_replace was called with string
pipeline._message_cycle_manager.message_replace_to_stream_response.assert_called_once_with(
answer="12345", reason="variable_update"
)
def test_answer_node_files_are_recorded(self, pipeline):
"""
Test that ANSWER nodes properly record files from outputs.
"""
# Arrange
pipeline._task_state.answer = "existing answer"
# Create ANSWER node succeeded event with files
event = QueueNodeSucceededEvent(
node_execution_id="test-node-execution-id",
node_id="test-answer-node",
node_type=NodeType.ANSWER,
start_at=datetime.now(),
outputs={
"answer": "same answer",
"files": [
{"type": "image", "transfer_method": "remote_url", "remote_url": "http://example.com/img.png"}
],
},
)
# Mock the workflow response converter
pipeline._workflow_response_converter.fetch_files_from_node_outputs = Mock(return_value=event.outputs["files"])
pipeline._workflow_response_converter.workflow_node_finish_to_stream_response = Mock(return_value=None)
pipeline._save_output_for_event = Mock()
# Act
list(pipeline._handle_node_succeeded_event(event))
# Assert: files should be recorded
assert len(pipeline._recorded_files) == 1
assert pipeline._recorded_files[0] == event.outputs["files"][0]

View File

@@ -45,3 +45,33 @@ def test_streaming_conversation_variables():
runner = TableTestRunner()
result = runner.run_test_case(case)
assert result.success, f"Test failed: {result.error}"
def test_streaming_conversation_variables_v1_overwrite_waits_for_assignment():
fixture_name = "test_streaming_conversation_variables_v1_overwrite"
input_query = "overwrite-value"
case = WorkflowTestCase(
fixture_path=fixture_name,
use_auto_mock=False,
mock_config=MockConfigBuilder().build(),
query=input_query,
inputs={},
expected_outputs={"answer": f"Current Value Of `conv_var` is:{input_query}"},
)
runner = TableTestRunner()
result = runner.run_test_case(case)
assert result.success, f"Test failed: {result.error}"
events = result.events
conv_var_chunk_events = [
event
for event in events
if isinstance(event, NodeRunStreamChunkEvent) and tuple(event.selector) == ("conversation", "conv_var")
]
assert conv_var_chunk_events, "Expected conversation variable chunk events to be emitted"
assert all(event.chunk == input_query for event in conv_var_chunk_events), (
"Expected streamed conversation variable value to match the input query"
)

View File

@@ -58,6 +58,8 @@ def test_json_object_valid_schema():
}
)
schema = json.loads(schema)
variables = [
VariableEntity(
variable="profile",
@@ -68,7 +70,7 @@ def test_json_object_valid_schema():
)
]
user_inputs = {"profile": json.dumps({"age": 20, "name": "Tom"})}
user_inputs = {"profile": {"age": 20, "name": "Tom"}}
node = make_start_node(user_inputs, variables)
result = node._run()
@@ -87,6 +89,8 @@ def test_json_object_invalid_json_string():
"required": ["age", "name"],
}
)
schema = json.loads(schema)
variables = [
VariableEntity(
variable="profile",
@@ -97,12 +101,12 @@ def test_json_object_invalid_json_string():
)
]
# Missing closing brace makes this invalid JSON
# Providing a string instead of an object should raise a type error
user_inputs = {"profile": '{"age": 20, "name": "Tom"'}
node = make_start_node(user_inputs, variables)
with pytest.raises(ValueError, match='{"age": 20, "name": "Tom" must be a valid JSON object'):
with pytest.raises(ValueError, match="JSON object for 'profile' must be an object"):
node._run()
@@ -118,6 +122,8 @@ def test_json_object_does_not_match_schema():
}
)
schema = json.loads(schema)
variables = [
VariableEntity(
variable="profile",
@@ -129,7 +135,7 @@ def test_json_object_does_not_match_schema():
]
# age is a string, which violates the schema (expects number)
user_inputs = {"profile": json.dumps({"age": "twenty", "name": "Tom"})}
user_inputs = {"profile": {"age": "twenty", "name": "Tom"}}
node = make_start_node(user_inputs, variables)
@@ -149,6 +155,8 @@ def test_json_object_missing_required_schema_field():
}
)
schema = json.loads(schema)
variables = [
VariableEntity(
variable="profile",
@@ -160,7 +168,7 @@ def test_json_object_missing_required_schema_field():
]
# Missing required field "name"
user_inputs = {"profile": json.dumps({"age": 20})}
user_inputs = {"profile": {"age": 20}}
node = make_start_node(user_inputs, variables)

View File

@@ -2,13 +2,17 @@ from types import SimpleNamespace
import pytest
from configs import dify_config
from core.file.enums import FileType
from core.file.models import File, FileTransferMethod
from core.helper.code_executor.code_executor import CodeLanguage
from core.variables.variables import StringVariable
from core.workflow.constants import (
CONVERSATION_VARIABLE_NODE_ID,
ENVIRONMENT_VARIABLE_NODE_ID,
)
from core.workflow.nodes.code.code_node import CodeNode
from core.workflow.nodes.code.limits import CodeNodeLimits
from core.workflow.runtime import VariablePool
from core.workflow.system_variable import SystemVariable
from core.workflow.workflow_entry import WorkflowEntry
@@ -96,6 +100,58 @@ class TestWorkflowEntry:
assert output_var is not None
assert output_var.value == "system_user"
def test_single_step_run_injects_code_limits(self):
"""Ensure single-step CodeNode execution configures limits."""
# Arrange
node_id = "code_node"
node_data = {
"type": "code",
"title": "Code",
"desc": None,
"variables": [],
"code_language": CodeLanguage.PYTHON3,
"code": "def main():\n return {}",
"outputs": {},
}
node_config = {"id": node_id, "data": node_data}
class StubWorkflow:
def __init__(self):
self.tenant_id = "tenant"
self.app_id = "app"
self.id = "workflow"
self.graph_dict = {"nodes": [node_config], "edges": []}
def get_node_config_by_id(self, target_id: str):
assert target_id == node_id
return node_config
workflow = StubWorkflow()
variable_pool = VariablePool(system_variables=SystemVariable.empty(), user_inputs={})
expected_limits = CodeNodeLimits(
max_string_length=dify_config.CODE_MAX_STRING_LENGTH,
max_number=dify_config.CODE_MAX_NUMBER,
min_number=dify_config.CODE_MIN_NUMBER,
max_precision=dify_config.CODE_MAX_PRECISION,
max_depth=dify_config.CODE_MAX_DEPTH,
max_number_array_length=dify_config.CODE_MAX_NUMBER_ARRAY_LENGTH,
max_string_array_length=dify_config.CODE_MAX_STRING_ARRAY_LENGTH,
max_object_array_length=dify_config.CODE_MAX_OBJECT_ARRAY_LENGTH,
)
# Act
node, _ = WorkflowEntry.single_step_run(
workflow=workflow,
node_id=node_id,
user_id="user",
user_inputs={},
variable_pool=variable_pool,
)
# Assert
assert isinstance(node, CodeNode)
assert node._limits == expected_limits
def test_mapping_user_inputs_to_variable_pool_with_env_variables(self):
"""Test mapping environment variables from user inputs to variable pool."""
# Initialize variable pool with environment variables

View File

@@ -4,6 +4,7 @@ from datetime import UTC, datetime
from unittest.mock import Mock, patch
import pytest
from sqlalchemy.dialects import postgresql
from sqlalchemy.orm import Session, sessionmaker
from core.workflow.enums import WorkflowExecutionStatus
@@ -104,6 +105,42 @@ class TestDifyAPISQLAlchemyWorkflowRunRepository:
return pause
class TestGetRunsBatchByTimeRange(TestDifyAPISQLAlchemyWorkflowRunRepository):
def test_get_runs_batch_by_time_range_filters_terminal_statuses(
self, repository: DifyAPISQLAlchemyWorkflowRunRepository, mock_session: Mock
):
scalar_result = Mock()
scalar_result.all.return_value = []
mock_session.scalars.return_value = scalar_result
repository.get_runs_batch_by_time_range(
start_from=None,
end_before=datetime(2024, 1, 1),
last_seen=None,
batch_size=50,
)
stmt = mock_session.scalars.call_args[0][0]
compiled_sql = str(
stmt.compile(
dialect=postgresql.dialect(),
compile_kwargs={"literal_binds": True},
)
)
assert "workflow_runs.status" in compiled_sql
for status in (
WorkflowExecutionStatus.SUCCEEDED,
WorkflowExecutionStatus.FAILED,
WorkflowExecutionStatus.STOPPED,
WorkflowExecutionStatus.PARTIAL_SUCCEEDED,
):
assert f"'{status.value}'" in compiled_sql
assert "'running'" not in compiled_sql
assert "'paused'" not in compiled_sql
class TestCreateWorkflowPause(TestDifyAPISQLAlchemyWorkflowRunRepository):
"""Test create_workflow_pause method."""
@@ -181,6 +218,61 @@ class TestCreateWorkflowPause(TestDifyAPISQLAlchemyWorkflowRunRepository):
)
class TestDeleteRunsWithRelated(TestDifyAPISQLAlchemyWorkflowRunRepository):
def test_uses_trigger_log_repository(self, repository: DifyAPISQLAlchemyWorkflowRunRepository, mock_session: Mock):
node_ids_result = Mock()
node_ids_result.all.return_value = []
pause_ids_result = Mock()
pause_ids_result.all.return_value = []
mock_session.scalars.side_effect = [node_ids_result, pause_ids_result]
# app_logs delete, runs delete
mock_session.execute.side_effect = [Mock(rowcount=0), Mock(rowcount=1)]
fake_trigger_repo = Mock()
fake_trigger_repo.delete_by_run_ids.return_value = 3
run = Mock(id="run-1", tenant_id="t1", app_id="a1", workflow_id="w1", triggered_from="tf")
counts = repository.delete_runs_with_related(
[run],
delete_node_executions=lambda session, runs: (2, 1),
delete_trigger_logs=lambda session, run_ids: fake_trigger_repo.delete_by_run_ids(run_ids),
)
fake_trigger_repo.delete_by_run_ids.assert_called_once_with(["run-1"])
assert counts["node_executions"] == 2
assert counts["offloads"] == 1
assert counts["trigger_logs"] == 3
assert counts["runs"] == 1
class TestCountRunsWithRelated(TestDifyAPISQLAlchemyWorkflowRunRepository):
def test_uses_trigger_log_repository(self, repository: DifyAPISQLAlchemyWorkflowRunRepository, mock_session: Mock):
pause_ids_result = Mock()
pause_ids_result.all.return_value = ["pause-1", "pause-2"]
mock_session.scalars.return_value = pause_ids_result
mock_session.scalar.side_effect = [5, 2]
fake_trigger_repo = Mock()
fake_trigger_repo.count_by_run_ids.return_value = 3
run = Mock(id="run-1", tenant_id="t1", app_id="a1", workflow_id="w1", triggered_from="tf")
counts = repository.count_runs_with_related(
[run],
count_node_executions=lambda session, runs: (2, 1),
count_trigger_logs=lambda session, run_ids: fake_trigger_repo.count_by_run_ids(run_ids),
)
fake_trigger_repo.count_by_run_ids.assert_called_once_with(["run-1"])
assert counts["node_executions"] == 2
assert counts["offloads"] == 1
assert counts["trigger_logs"] == 3
assert counts["app_logs"] == 5
assert counts["pauses"] == 2
assert counts["pause_reasons"] == 2
assert counts["runs"] == 1
class TestResumeWorkflowPause(TestDifyAPISQLAlchemyWorkflowRunRepository):
"""Test resume_workflow_pause method."""

View File

@@ -0,0 +1,31 @@
from unittest.mock import Mock
from sqlalchemy.dialects import postgresql
from sqlalchemy.orm import Session
from repositories.sqlalchemy_workflow_trigger_log_repository import SQLAlchemyWorkflowTriggerLogRepository
def test_delete_by_run_ids_executes_delete():
session = Mock(spec=Session)
session.execute.return_value = Mock(rowcount=2)
repo = SQLAlchemyWorkflowTriggerLogRepository(session)
deleted = repo.delete_by_run_ids(["run-1", "run-2"])
stmt = session.execute.call_args[0][0]
compiled_sql = str(stmt.compile(dialect=postgresql.dialect(), compile_kwargs={"literal_binds": True}))
assert "workflow_trigger_logs" in compiled_sql
assert "'run-1'" in compiled_sql
assert "'run-2'" in compiled_sql
assert deleted == 2
def test_delete_by_run_ids_empty_short_circuits():
session = Mock(spec=Session)
repo = SQLAlchemyWorkflowTriggerLogRepository(session)
deleted = repo.delete_by_run_ids([])
session.execute.assert_not_called()
assert deleted == 0

View File

@@ -0,0 +1,327 @@
import datetime
from typing import Any
import pytest
from services.billing_service import SubscriptionPlan
from services.retention.workflow_run import clear_free_plan_expired_workflow_run_logs as cleanup_module
from services.retention.workflow_run.clear_free_plan_expired_workflow_run_logs import WorkflowRunCleanup
class FakeRun:
def __init__(
self,
run_id: str,
tenant_id: str,
created_at: datetime.datetime,
app_id: str = "app-1",
workflow_id: str = "wf-1",
triggered_from: str = "workflow-run",
) -> None:
self.id = run_id
self.tenant_id = tenant_id
self.app_id = app_id
self.workflow_id = workflow_id
self.triggered_from = triggered_from
self.created_at = created_at
class FakeRepo:
def __init__(
self,
batches: list[list[FakeRun]],
delete_result: dict[str, int] | None = None,
count_result: dict[str, int] | None = None,
) -> None:
self.batches = batches
self.call_idx = 0
self.deleted: list[list[str]] = []
self.counted: list[list[str]] = []
self.delete_result = delete_result or {
"runs": 0,
"node_executions": 0,
"offloads": 0,
"app_logs": 0,
"trigger_logs": 0,
"pauses": 0,
"pause_reasons": 0,
}
self.count_result = count_result or {
"runs": 0,
"node_executions": 0,
"offloads": 0,
"app_logs": 0,
"trigger_logs": 0,
"pauses": 0,
"pause_reasons": 0,
}
def get_runs_batch_by_time_range(
self,
start_from: datetime.datetime | None,
end_before: datetime.datetime,
last_seen: tuple[datetime.datetime, str] | None,
batch_size: int,
) -> list[FakeRun]:
if self.call_idx >= len(self.batches):
return []
batch = self.batches[self.call_idx]
self.call_idx += 1
return batch
def delete_runs_with_related(
self, runs: list[FakeRun], delete_node_executions=None, delete_trigger_logs=None
) -> dict[str, int]:
self.deleted.append([run.id for run in runs])
result = self.delete_result.copy()
result["runs"] = len(runs)
return result
def count_runs_with_related(
self, runs: list[FakeRun], count_node_executions=None, count_trigger_logs=None
) -> dict[str, int]:
self.counted.append([run.id for run in runs])
result = self.count_result.copy()
result["runs"] = len(runs)
return result
def plan_info(plan: str, expiration: int) -> SubscriptionPlan:
return SubscriptionPlan(plan=plan, expiration_date=expiration)
def create_cleanup(
monkeypatch: pytest.MonkeyPatch,
repo: FakeRepo,
*,
grace_period_days: int = 0,
whitelist: set[str] | None = None,
**kwargs: Any,
) -> WorkflowRunCleanup:
monkeypatch.setattr(
cleanup_module.dify_config,
"SANDBOX_EXPIRED_RECORDS_CLEAN_GRACEFUL_PERIOD",
grace_period_days,
)
monkeypatch.setattr(
cleanup_module.WorkflowRunCleanup,
"_get_cleanup_whitelist",
lambda self: whitelist or set(),
)
return WorkflowRunCleanup(workflow_run_repo=repo, **kwargs)
def test_filter_free_tenants_billing_disabled(monkeypatch: pytest.MonkeyPatch) -> None:
cleanup = create_cleanup(monkeypatch, repo=FakeRepo([]), days=30, batch_size=10)
monkeypatch.setattr(cleanup_module.dify_config, "BILLING_ENABLED", False)
def fail_bulk(_: list[str]) -> dict[str, SubscriptionPlan]:
raise RuntimeError("should not call")
monkeypatch.setattr(cleanup_module.BillingService, "get_plan_bulk_with_cache", staticmethod(fail_bulk))
tenants = {"t1", "t2"}
free = cleanup._filter_free_tenants(tenants)
assert free == tenants
def test_filter_free_tenants_bulk_mixed(monkeypatch: pytest.MonkeyPatch) -> None:
cleanup = create_cleanup(monkeypatch, repo=FakeRepo([]), days=30, batch_size=10)
monkeypatch.setattr(cleanup_module.dify_config, "BILLING_ENABLED", True)
monkeypatch.setattr(
cleanup_module.BillingService,
"get_plan_bulk_with_cache",
staticmethod(
lambda tenant_ids: {
tenant_id: (plan_info("team", -1) if tenant_id == "t_paid" else plan_info("sandbox", -1))
for tenant_id in tenant_ids
}
),
)
free = cleanup._filter_free_tenants({"t_free", "t_paid", "t_missing"})
assert free == {"t_free", "t_missing"}
def test_filter_free_tenants_respects_grace_period(monkeypatch: pytest.MonkeyPatch) -> None:
cleanup = create_cleanup(monkeypatch, repo=FakeRepo([]), days=30, batch_size=10, grace_period_days=45)
monkeypatch.setattr(cleanup_module.dify_config, "BILLING_ENABLED", True)
now = datetime.datetime.now(datetime.UTC)
within_grace_ts = int((now - datetime.timedelta(days=10)).timestamp())
outside_grace_ts = int((now - datetime.timedelta(days=90)).timestamp())
def fake_bulk(_: list[str]) -> dict[str, SubscriptionPlan]:
return {
"recently_downgraded": plan_info("sandbox", within_grace_ts),
"long_sandbox": plan_info("sandbox", outside_grace_ts),
}
monkeypatch.setattr(cleanup_module.BillingService, "get_plan_bulk_with_cache", staticmethod(fake_bulk))
free = cleanup._filter_free_tenants({"recently_downgraded", "long_sandbox"})
assert free == {"long_sandbox"}
def test_filter_free_tenants_skips_cleanup_whitelist(monkeypatch: pytest.MonkeyPatch) -> None:
cleanup = create_cleanup(
monkeypatch,
repo=FakeRepo([]),
days=30,
batch_size=10,
whitelist={"tenant_whitelist"},
)
monkeypatch.setattr(cleanup_module.dify_config, "BILLING_ENABLED", True)
monkeypatch.setattr(
cleanup_module.BillingService,
"get_plan_bulk_with_cache",
staticmethod(
lambda tenant_ids: {
tenant_id: (plan_info("team", -1) if tenant_id == "t_paid" else plan_info("sandbox", -1))
for tenant_id in tenant_ids
}
),
)
tenants = {"tenant_whitelist", "tenant_regular"}
free = cleanup._filter_free_tenants(tenants)
assert free == {"tenant_regular"}
def test_filter_free_tenants_bulk_failure(monkeypatch: pytest.MonkeyPatch) -> None:
cleanup = create_cleanup(monkeypatch, repo=FakeRepo([]), days=30, batch_size=10)
monkeypatch.setattr(cleanup_module.dify_config, "BILLING_ENABLED", True)
monkeypatch.setattr(
cleanup_module.BillingService,
"get_plan_bulk_with_cache",
staticmethod(lambda tenant_ids: (_ for _ in ()).throw(RuntimeError("boom"))),
)
free = cleanup._filter_free_tenants({"t1", "t2"})
assert free == set()
def test_run_deletes_only_free_tenants(monkeypatch: pytest.MonkeyPatch) -> None:
cutoff = datetime.datetime.now()
repo = FakeRepo(
batches=[
[
FakeRun("run-free", "t_free", cutoff),
FakeRun("run-paid", "t_paid", cutoff),
]
]
)
cleanup = create_cleanup(monkeypatch, repo=repo, days=30, batch_size=10)
monkeypatch.setattr(cleanup_module.dify_config, "BILLING_ENABLED", True)
monkeypatch.setattr(
cleanup_module.BillingService,
"get_plan_bulk_with_cache",
staticmethod(
lambda tenant_ids: {
tenant_id: (plan_info("team", -1) if tenant_id == "t_paid" else plan_info("sandbox", -1))
for tenant_id in tenant_ids
}
),
)
cleanup.run()
assert repo.deleted == [["run-free"]]
def test_run_skips_when_no_free_tenants(monkeypatch: pytest.MonkeyPatch) -> None:
cutoff = datetime.datetime.now()
repo = FakeRepo(batches=[[FakeRun("run-paid", "t_paid", cutoff)]])
cleanup = create_cleanup(monkeypatch, repo=repo, days=30, batch_size=10)
monkeypatch.setattr(cleanup_module.dify_config, "BILLING_ENABLED", True)
monkeypatch.setattr(
cleanup_module.BillingService,
"get_plan_bulk_with_cache",
staticmethod(lambda tenant_ids: {tenant_id: plan_info("team", 1893456000) for tenant_id in tenant_ids}),
)
cleanup.run()
assert repo.deleted == []
def test_run_exits_on_empty_batch(monkeypatch: pytest.MonkeyPatch) -> None:
cleanup = create_cleanup(monkeypatch, repo=FakeRepo([]), days=30, batch_size=10)
cleanup.run()
def test_run_dry_run_skips_deletions(monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str]) -> None:
cutoff = datetime.datetime.now()
repo = FakeRepo(
batches=[[FakeRun("run-free", "t_free", cutoff)]],
count_result={
"runs": 0,
"node_executions": 2,
"offloads": 1,
"app_logs": 3,
"trigger_logs": 4,
"pauses": 5,
"pause_reasons": 6,
},
)
cleanup = create_cleanup(monkeypatch, repo=repo, days=30, batch_size=10, dry_run=True)
monkeypatch.setattr(cleanup_module.dify_config, "BILLING_ENABLED", False)
cleanup.run()
assert repo.deleted == []
assert repo.counted == [["run-free"]]
captured = capsys.readouterr().out
assert "Dry run mode enabled" in captured
assert "would delete 1 runs" in captured
assert "related records" in captured
assert "node_executions 2" in captured
assert "offloads 1" in captured
assert "app_logs 3" in captured
assert "trigger_logs 4" in captured
assert "pauses 5" in captured
assert "pause_reasons 6" in captured
def test_between_sets_window_bounds(monkeypatch: pytest.MonkeyPatch) -> None:
start_from = datetime.datetime(2024, 5, 1, 0, 0, 0)
end_before = datetime.datetime(2024, 6, 1, 0, 0, 0)
cleanup = create_cleanup(
monkeypatch, repo=FakeRepo([]), days=30, batch_size=10, start_from=start_from, end_before=end_before
)
assert cleanup.window_start == start_from
assert cleanup.window_end == end_before
def test_between_requires_both_boundaries(monkeypatch: pytest.MonkeyPatch) -> None:
with pytest.raises(ValueError):
create_cleanup(
monkeypatch, repo=FakeRepo([]), days=30, batch_size=10, start_from=datetime.datetime.now(), end_before=None
)
with pytest.raises(ValueError):
create_cleanup(
monkeypatch, repo=FakeRepo([]), days=30, batch_size=10, start_from=None, end_before=datetime.datetime.now()
)
def test_between_requires_end_after_start(monkeypatch: pytest.MonkeyPatch) -> None:
start_from = datetime.datetime(2024, 6, 1, 0, 0, 0)
end_before = datetime.datetime(2024, 5, 1, 0, 0, 0)
with pytest.raises(ValueError):
create_cleanup(
monkeypatch, repo=FakeRepo([]), days=30, batch_size=10, start_from=start_from, end_before=end_before
)

53
api/uv.lock generated
View File

@@ -1731,7 +1731,7 @@ storage = [
{ name = "opendal", specifier = "~=0.46.0" },
{ name = "oss2", specifier = "==2.18.5" },
{ name = "supabase", specifier = "~=2.18.1" },
{ name = "tos", specifier = "~=2.7.1" },
{ name = "tos", specifier = "~=2.9.0" },
]
tools = [
{ name = "cloudscraper", specifier = "~=1.2.71" },
@@ -6148,7 +6148,7 @@ wheels = [
[[package]]
name = "tos"
version = "2.7.2"
version = "2.9.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "crcmod" },
@@ -6156,8 +6156,9 @@ dependencies = [
{ name = "pytz" },
{ name = "requests" },
{ name = "six" },
{ name = "wrapt" },
]
sdist = { url = "https://files.pythonhosted.org/packages/0c/01/f811af86f1f80d5f289be075c3b281e74bf3fe081cfbe5cfce44954d2c3a/tos-2.7.2.tar.gz", hash = "sha256:3c31257716785bca7b2cac51474ff32543cda94075a7b7aff70d769c15c7b7ed", size = 123407, upload-time = "2024-10-16T15:59:08.634Z" }
sdist = { url = "https://files.pythonhosted.org/packages/9a/b3/13451226f564f88d9db2323e9b7eabcced792a0ad5ee1e333751a7634257/tos-2.9.0.tar.gz", hash = "sha256:861cfc348e770f099f911cb96b2c41774ada6c9c51b7a89d97e0c426074dd99e", size = 157071, upload-time = "2026-01-06T04:13:08.921Z" }
[[package]]
name = "tqdm"
@@ -7146,31 +7147,31 @@ wheels = [
[[package]]
name = "wrapt"
version = "1.17.3"
version = "1.16.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/95/8f/aeb76c5b46e273670962298c23e7ddde79916cb74db802131d49a85e4b7d/wrapt-1.17.3.tar.gz", hash = "sha256:f66eb08feaa410fe4eebd17f2a2c8e2e46d3476e9f8c783daa8e09e0faa666d0", size = 55547, upload-time = "2025-08-12T05:53:21.714Z" }
sdist = { url = "https://files.pythonhosted.org/packages/95/4c/063a912e20bcef7124e0df97282a8af3ff3e4b603ce84c481d6d7346be0a/wrapt-1.16.0.tar.gz", hash = "sha256:5f370f952971e7d17c7d1ead40e49f32345a7f7a5373571ef44d800d06b1899d", size = 53972, upload-time = "2023-11-09T06:33:30.191Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/52/db/00e2a219213856074a213503fdac0511203dceefff26e1daa15250cc01a0/wrapt-1.17.3-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:273a736c4645e63ac582c60a56b0acb529ef07f78e08dc6bfadf6a46b19c0da7", size = 53482, upload-time = "2025-08-12T05:51:45.79Z" },
{ url = "https://files.pythonhosted.org/packages/5e/30/ca3c4a5eba478408572096fe9ce36e6e915994dd26a4e9e98b4f729c06d9/wrapt-1.17.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:5531d911795e3f935a9c23eb1c8c03c211661a5060aab167065896bbf62a5f85", size = 38674, upload-time = "2025-08-12T05:51:34.629Z" },
{ url = "https://files.pythonhosted.org/packages/31/25/3e8cc2c46b5329c5957cec959cb76a10718e1a513309c31399a4dad07eb3/wrapt-1.17.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:0610b46293c59a3adbae3dee552b648b984176f8562ee0dba099a56cfbe4df1f", size = 38959, upload-time = "2025-08-12T05:51:56.074Z" },
{ url = "https://files.pythonhosted.org/packages/5d/8f/a32a99fc03e4b37e31b57cb9cefc65050ea08147a8ce12f288616b05ef54/wrapt-1.17.3-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:b32888aad8b6e68f83a8fdccbf3165f5469702a7544472bdf41f582970ed3311", size = 82376, upload-time = "2025-08-12T05:52:32.134Z" },
{ url = "https://files.pythonhosted.org/packages/31/57/4930cb8d9d70d59c27ee1332a318c20291749b4fba31f113c2f8ac49a72e/wrapt-1.17.3-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8cccf4f81371f257440c88faed6b74f1053eef90807b77e31ca057b2db74edb1", size = 83604, upload-time = "2025-08-12T05:52:11.663Z" },
{ url = "https://files.pythonhosted.org/packages/a8/f3/1afd48de81d63dd66e01b263a6fbb86e1b5053b419b9b33d13e1f6d0f7d0/wrapt-1.17.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d8a210b158a34164de8bb68b0e7780041a903d7b00c87e906fb69928bf7890d5", size = 82782, upload-time = "2025-08-12T05:52:12.626Z" },
{ url = "https://files.pythonhosted.org/packages/1e/d7/4ad5327612173b144998232f98a85bb24b60c352afb73bc48e3e0d2bdc4e/wrapt-1.17.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:79573c24a46ce11aab457b472efd8d125e5a51da2d1d24387666cd85f54c05b2", size = 82076, upload-time = "2025-08-12T05:52:33.168Z" },
{ url = "https://files.pythonhosted.org/packages/bb/59/e0adfc831674a65694f18ea6dc821f9fcb9ec82c2ce7e3d73a88ba2e8718/wrapt-1.17.3-cp311-cp311-win32.whl", hash = "sha256:c31eebe420a9a5d2887b13000b043ff6ca27c452a9a22fa71f35f118e8d4bf89", size = 36457, upload-time = "2025-08-12T05:53:03.936Z" },
{ url = "https://files.pythonhosted.org/packages/83/88/16b7231ba49861b6f75fc309b11012ede4d6b0a9c90969d9e0db8d991aeb/wrapt-1.17.3-cp311-cp311-win_amd64.whl", hash = "sha256:0b1831115c97f0663cb77aa27d381237e73ad4f721391a9bfb2fe8bc25fa6e77", size = 38745, upload-time = "2025-08-12T05:53:02.885Z" },
{ url = "https://files.pythonhosted.org/packages/9a/1e/c4d4f3398ec073012c51d1c8d87f715f56765444e1a4b11e5180577b7e6e/wrapt-1.17.3-cp311-cp311-win_arm64.whl", hash = "sha256:5a7b3c1ee8265eb4c8f1b7d29943f195c00673f5ab60c192eba2d4a7eae5f46a", size = 36806, upload-time = "2025-08-12T05:52:53.368Z" },
{ url = "https://files.pythonhosted.org/packages/9f/41/cad1aba93e752f1f9268c77270da3c469883d56e2798e7df6240dcb2287b/wrapt-1.17.3-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:ab232e7fdb44cdfbf55fc3afa31bcdb0d8980b9b95c38b6405df2acb672af0e0", size = 53998, upload-time = "2025-08-12T05:51:47.138Z" },
{ url = "https://files.pythonhosted.org/packages/60/f8/096a7cc13097a1869fe44efe68dace40d2a16ecb853141394047f0780b96/wrapt-1.17.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:9baa544e6acc91130e926e8c802a17f3b16fbea0fd441b5a60f5cf2cc5c3deba", size = 39020, upload-time = "2025-08-12T05:51:35.906Z" },
{ url = "https://files.pythonhosted.org/packages/33/df/bdf864b8997aab4febb96a9ae5c124f700a5abd9b5e13d2a3214ec4be705/wrapt-1.17.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6b538e31eca1a7ea4605e44f81a48aa24c4632a277431a6ed3f328835901f4fd", size = 39098, upload-time = "2025-08-12T05:51:57.474Z" },
{ url = "https://files.pythonhosted.org/packages/9f/81/5d931d78d0eb732b95dc3ddaeeb71c8bb572fb01356e9133916cd729ecdd/wrapt-1.17.3-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:042ec3bb8f319c147b1301f2393bc19dba6e176b7da446853406d041c36c7828", size = 88036, upload-time = "2025-08-12T05:52:34.784Z" },
{ url = "https://files.pythonhosted.org/packages/ca/38/2e1785df03b3d72d34fc6252d91d9d12dc27a5c89caef3335a1bbb8908ca/wrapt-1.17.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3af60380ba0b7b5aeb329bc4e402acd25bd877e98b3727b0135cb5c2efdaefe9", size = 88156, upload-time = "2025-08-12T05:52:13.599Z" },
{ url = "https://files.pythonhosted.org/packages/b3/8b/48cdb60fe0603e34e05cffda0b2a4adab81fd43718e11111a4b0100fd7c1/wrapt-1.17.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:0b02e424deef65c9f7326d8c19220a2c9040c51dc165cddb732f16198c168396", size = 87102, upload-time = "2025-08-12T05:52:14.56Z" },
{ url = "https://files.pythonhosted.org/packages/3c/51/d81abca783b58f40a154f1b2c56db1d2d9e0d04fa2d4224e357529f57a57/wrapt-1.17.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:74afa28374a3c3a11b3b5e5fca0ae03bef8450d6aa3ab3a1e2c30e3a75d023dc", size = 87732, upload-time = "2025-08-12T05:52:36.165Z" },
{ url = "https://files.pythonhosted.org/packages/9e/b1/43b286ca1392a006d5336412d41663eeef1ad57485f3e52c767376ba7e5a/wrapt-1.17.3-cp312-cp312-win32.whl", hash = "sha256:4da9f45279fff3543c371d5ababc57a0384f70be244de7759c85a7f989cb4ebe", size = 36705, upload-time = "2025-08-12T05:53:07.123Z" },
{ url = "https://files.pythonhosted.org/packages/28/de/49493f962bd3c586ab4b88066e967aa2e0703d6ef2c43aa28cb83bf7b507/wrapt-1.17.3-cp312-cp312-win_amd64.whl", hash = "sha256:e71d5c6ebac14875668a1e90baf2ea0ef5b7ac7918355850c0908ae82bcb297c", size = 38877, upload-time = "2025-08-12T05:53:05.436Z" },
{ url = "https://files.pythonhosted.org/packages/f1/48/0f7102fe9cb1e8a5a77f80d4f0956d62d97034bbe88d33e94699f99d181d/wrapt-1.17.3-cp312-cp312-win_arm64.whl", hash = "sha256:604d076c55e2fdd4c1c03d06dc1a31b95130010517b5019db15365ec4a405fc6", size = 36885, upload-time = "2025-08-12T05:52:54.367Z" },
{ url = "https://files.pythonhosted.org/packages/1f/f6/a933bd70f98e9cf3e08167fc5cd7aaaca49147e48411c0bd5ae701bb2194/wrapt-1.17.3-py3-none-any.whl", hash = "sha256:7171ae35d2c33d326ac19dd8facb1e82e5fd04ef8c6c0e394d7af55a55051c22", size = 23591, upload-time = "2025-08-12T05:53:20.674Z" },
{ url = "https://files.pythonhosted.org/packages/fd/03/c188ac517f402775b90d6f312955a5e53b866c964b32119f2ed76315697e/wrapt-1.16.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:1a5db485fe2de4403f13fafdc231b0dbae5eca4359232d2efc79025527375b09", size = 37313, upload-time = "2023-11-09T06:31:52.168Z" },
{ url = "https://files.pythonhosted.org/packages/0f/16/ea627d7817394db04518f62934a5de59874b587b792300991b3c347ff5e0/wrapt-1.16.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:75ea7d0ee2a15733684badb16de6794894ed9c55aa5e9903260922f0482e687d", size = 38164, upload-time = "2023-11-09T06:31:53.522Z" },
{ url = "https://files.pythonhosted.org/packages/7f/a7/f1212ba098f3de0fd244e2de0f8791ad2539c03bef6c05a9fcb03e45b089/wrapt-1.16.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a452f9ca3e3267cd4d0fcf2edd0d035b1934ac2bd7e0e57ac91ad6b95c0c6389", size = 80890, upload-time = "2023-11-09T06:31:55.247Z" },
{ url = "https://files.pythonhosted.org/packages/b7/96/bb5e08b3d6db003c9ab219c487714c13a237ee7dcc572a555eaf1ce7dc82/wrapt-1.16.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:43aa59eadec7890d9958748db829df269f0368521ba6dc68cc172d5d03ed8060", size = 73118, upload-time = "2023-11-09T06:31:57.023Z" },
{ url = "https://files.pythonhosted.org/packages/6e/52/2da48b35193e39ac53cfb141467d9f259851522d0e8c87153f0ba4205fb1/wrapt-1.16.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:72554a23c78a8e7aa02abbd699d129eead8b147a23c56e08d08dfc29cfdddca1", size = 80746, upload-time = "2023-11-09T06:31:58.686Z" },
{ url = "https://files.pythonhosted.org/packages/11/fb/18ec40265ab81c0e82a934de04596b6ce972c27ba2592c8b53d5585e6bcd/wrapt-1.16.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:d2efee35b4b0a347e0d99d28e884dfd82797852d62fcd7ebdeee26f3ceb72cf3", size = 85668, upload-time = "2023-11-09T06:31:59.992Z" },
{ url = "https://files.pythonhosted.org/packages/0f/ef/0ecb1fa23145560431b970418dce575cfaec555ab08617d82eb92afc7ccf/wrapt-1.16.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:6dcfcffe73710be01d90cae08c3e548d90932d37b39ef83969ae135d36ef3956", size = 78556, upload-time = "2023-11-09T06:32:01.942Z" },
{ url = "https://files.pythonhosted.org/packages/25/62/cd284b2b747f175b5a96cbd8092b32e7369edab0644c45784871528eb852/wrapt-1.16.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:eb6e651000a19c96f452c85132811d25e9264d836951022d6e81df2fff38337d", size = 85712, upload-time = "2023-11-09T06:32:03.686Z" },
{ url = "https://files.pythonhosted.org/packages/e5/a7/47b7ff74fbadf81b696872d5ba504966591a3468f1bc86bca2f407baef68/wrapt-1.16.0-cp311-cp311-win32.whl", hash = "sha256:66027d667efe95cc4fa945af59f92c5a02c6f5bb6012bff9e60542c74c75c362", size = 35327, upload-time = "2023-11-09T06:32:05.284Z" },
{ url = "https://files.pythonhosted.org/packages/cf/c3/0084351951d9579ae83a3d9e38c140371e4c6b038136909235079f2e6e78/wrapt-1.16.0-cp311-cp311-win_amd64.whl", hash = "sha256:aefbc4cb0a54f91af643660a0a150ce2c090d3652cf4052a5397fb2de549cd89", size = 37523, upload-time = "2023-11-09T06:32:07.17Z" },
{ url = "https://files.pythonhosted.org/packages/92/17/224132494c1e23521868cdd57cd1e903f3b6a7ba6996b7b8f077ff8ac7fe/wrapt-1.16.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:5eb404d89131ec9b4f748fa5cfb5346802e5ee8836f57d516576e61f304f3b7b", size = 37614, upload-time = "2023-11-09T06:32:08.859Z" },
{ url = "https://files.pythonhosted.org/packages/6a/d7/cfcd73e8f4858079ac59d9db1ec5a1349bc486ae8e9ba55698cc1f4a1dff/wrapt-1.16.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:9090c9e676d5236a6948330e83cb89969f433b1943a558968f659ead07cb3b36", size = 38316, upload-time = "2023-11-09T06:32:10.719Z" },
{ url = "https://files.pythonhosted.org/packages/7e/79/5ff0a5c54bda5aec75b36453d06be4f83d5cd4932cc84b7cb2b52cee23e2/wrapt-1.16.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:94265b00870aa407bd0cbcfd536f17ecde43b94fb8d228560a1e9d3041462d73", size = 86322, upload-time = "2023-11-09T06:32:12.592Z" },
{ url = "https://files.pythonhosted.org/packages/c4/81/e799bf5d419f422d8712108837c1d9bf6ebe3cb2a81ad94413449543a923/wrapt-1.16.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f2058f813d4f2b5e3a9eb2eb3faf8f1d99b81c3e51aeda4b168406443e8ba809", size = 79055, upload-time = "2023-11-09T06:32:14.394Z" },
{ url = "https://files.pythonhosted.org/packages/62/62/30ca2405de6a20448ee557ab2cd61ab9c5900be7cbd18a2639db595f0b98/wrapt-1.16.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:98b5e1f498a8ca1858a1cdbffb023bfd954da4e3fa2c0cb5853d40014557248b", size = 87291, upload-time = "2023-11-09T06:32:16.201Z" },
{ url = "https://files.pythonhosted.org/packages/49/4e/5d2f6d7b57fc9956bf06e944eb00463551f7d52fc73ca35cfc4c2cdb7aed/wrapt-1.16.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:14d7dc606219cdd7405133c713f2c218d4252f2a469003f8c46bb92d5d095d81", size = 90374, upload-time = "2023-11-09T06:32:18.052Z" },
{ url = "https://files.pythonhosted.org/packages/a6/9b/c2c21b44ff5b9bf14a83252a8b973fb84923764ff63db3e6dfc3895cf2e0/wrapt-1.16.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:49aac49dc4782cb04f58986e81ea0b4768e4ff197b57324dcbd7699c5dfb40b9", size = 83896, upload-time = "2023-11-09T06:32:19.533Z" },
{ url = "https://files.pythonhosted.org/packages/14/26/93a9fa02c6f257df54d7570dfe8011995138118d11939a4ecd82cb849613/wrapt-1.16.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:418abb18146475c310d7a6dc71143d6f7adec5b004ac9ce08dc7a34e2babdc5c", size = 91738, upload-time = "2023-11-09T06:32:20.989Z" },
{ url = "https://files.pythonhosted.org/packages/a2/5b/4660897233eb2c8c4de3dc7cefed114c61bacb3c28327e64150dc44ee2f6/wrapt-1.16.0-cp312-cp312-win32.whl", hash = "sha256:685f568fa5e627e93f3b52fda002c7ed2fa1800b50ce51f6ed1d572d8ab3e7fc", size = 35568, upload-time = "2023-11-09T06:32:22.715Z" },
{ url = "https://files.pythonhosted.org/packages/5c/cc/8297f9658506b224aa4bd71906447dea6bb0ba629861a758c28f67428b91/wrapt-1.16.0-cp312-cp312-win_amd64.whl", hash = "sha256:dcdba5c86e368442528f7060039eda390cc4091bfd1dca41e8046af7c910dda8", size = 37653, upload-time = "2023-11-09T06:32:24.533Z" },
{ url = "https://files.pythonhosted.org/packages/ff/21/abdedb4cdf6ff41ebf01a74087740a709e2edb146490e4d9beea054b0b7a/wrapt-1.16.0-py3-none-any.whl", hash = "sha256:6906c4100a8fcbf2fa735f6059214bb13b97f75b1a61777fcf6432121ef12ef1", size = 23362, upload-time = "2023-11-09T06:33:28.271Z" },
]
[[package]]

View File

@@ -1478,6 +1478,7 @@ ENABLE_CLEAN_UNUSED_DATASETS_TASK=false
ENABLE_CREATE_TIDB_SERVERLESS_TASK=false
ENABLE_UPDATE_TIDB_SERVERLESS_STATUS_TASK=false
ENABLE_CLEAN_MESSAGES=false
ENABLE_WORKFLOW_RUN_CLEANUP_TASK=false
ENABLE_MAIL_CLEAN_DOCUMENT_NOTIFY_TASK=false
ENABLE_DATASETS_QUEUE_MONITOR=false
ENABLE_CHECK_UPGRADABLE_PLUGIN_TASK=true

View File

@@ -662,6 +662,7 @@ x-shared-env: &shared-api-worker-env
ENABLE_CREATE_TIDB_SERVERLESS_TASK: ${ENABLE_CREATE_TIDB_SERVERLESS_TASK:-false}
ENABLE_UPDATE_TIDB_SERVERLESS_STATUS_TASK: ${ENABLE_UPDATE_TIDB_SERVERLESS_STATUS_TASK:-false}
ENABLE_CLEAN_MESSAGES: ${ENABLE_CLEAN_MESSAGES:-false}
ENABLE_WORKFLOW_RUN_CLEANUP_TASK: ${ENABLE_WORKFLOW_RUN_CLEANUP_TASK:-false}
ENABLE_MAIL_CLEAN_DOCUMENT_NOTIFY_TASK: ${ENABLE_MAIL_CLEAN_DOCUMENT_NOTIFY_TASK:-false}
ENABLE_DATASETS_QUEUE_MONITOR: ${ENABLE_DATASETS_QUEUE_MONITOR:-false}
ENABLE_CHECK_UPGRADABLE_PLUGIN_TASK: ${ENABLE_CHECK_UPGRADABLE_PLUGIN_TASK:-true}

View File

@@ -31,6 +31,8 @@ NEXT_PUBLIC_UPLOAD_IMAGE_AS_ICON=false
# The timeout for the text generation in millisecond
NEXT_PUBLIC_TEXT_GENERATION_TIMEOUT_MS=60000
# Used by web/docker/entrypoint.sh to overwrite/export NEXT_PUBLIC_TEXT_GENERATION_TIMEOUT_MS at container startup (Docker only)
TEXT_GENERATION_TIMEOUT_MS=60000
# CSP https://developer.mozilla.org/en-US/docs/Web/HTTP/CSP
NEXT_PUBLIC_CSP_WHITELIST=

1
web/.nvmrc Normal file
View File

@@ -0,0 +1 @@
22.21.1

View File

@@ -1,8 +1,10 @@
import type { Preview } from '@storybook/react'
import type { Resource } from 'i18next'
import { withThemeByDataAttribute } from '@storybook/addon-themes'
import { QueryClient, QueryClientProvider } from '@tanstack/react-query'
import { ToastProvider } from '../app/components/base/toast'
import I18N from '../app/components/i18n'
import { I18nClientProvider as I18N } from '../app/components/provider/i18n'
import commonEnUS from '../i18n/en-US/common.json'
import '../app/styles/globals.css'
import '../app/styles/markdown.scss'
@@ -16,6 +18,14 @@ const queryClient = new QueryClient({
},
})
const storyResources: Resource = {
'en-US': {
// Preload the most common namespace to avoid missing keys during initial render;
// other namespaces will be loaded on demand via resourcesToBackend.
common: commonEnUS as unknown as Record<string, unknown>,
},
}
export const decorators = [
withThemeByDataAttribute({
themes: {
@@ -28,7 +38,7 @@ export const decorators = [
(Story) => {
return (
<QueryClientProvider client={queryClient}>
<I18N locale="en-US">
<I18N locale="en-US" resource={storyResources}>
<ToastProvider>
<Story />
</ToastProvider>

View File

@@ -1,5 +1,5 @@
# base image
FROM node:22-alpine3.21 AS base
FROM node:22.21.1-alpine3.23 AS base
LABEL maintainer="takatost@gmail.com"
# if you located in China, you can use aliyun mirror to speed up

View File

@@ -11,6 +11,16 @@ Before starting the web frontend service, please make sure the following environ
- [Node.js](https://nodejs.org) >= v22.11.x
- [pnpm](https://pnpm.io) v10.x
> [!TIP]
> It is recommended to install and enable Corepack to manage package manager versions automatically:
>
> ```bash
> npm install -g corepack
> corepack enable
> ```
>
> Learn more: [Corepack](https://github.com/nodejs/corepack#readme)
First, install the dependencies:
```bash

View File

@@ -2,11 +2,11 @@ import Marketplace from '@/app/components/plugins/marketplace'
import PluginPage from '@/app/components/plugins/plugin-page'
import PluginsPanel from '@/app/components/plugins/plugin-page/plugins-panel'
const PluginList = async () => {
const PluginList = () => {
return (
<PluginPage
plugins={<PluginsPanel />}
marketplace={<Marketplace pluginTypeSwitchClassName="top-[60px]" showSearchParams={false} />}
marketplace={<Marketplace pluginTypeSwitchClassName="top-[60px]" />}
/>
)
}

View File

@@ -26,6 +26,7 @@ import { NEED_REFRESH_APP_LIST_KEY } from '@/config'
import { useAppContext } from '@/context/app-context'
import { useProviderContext } from '@/context/provider-context'
import { copyApp, deleteApp, exportAppConfig, updateAppInfo } from '@/service/apps'
import { useInvalidateAppList } from '@/service/use-apps'
import { fetchWorkflowDraft } from '@/service/workflow'
import { AppModeEnum } from '@/types/app'
import { getRedirection } from '@/utils/app-redirection'
@@ -66,6 +67,7 @@ const AppInfo = ({ expand, onlyShowDetail = false, openState = false, onDetailEx
const { onPlanInfoChanged } = useProviderContext()
const appDetail = useAppStore(state => state.appDetail)
const setAppDetail = useAppStore(state => state.setAppDetail)
const invalidateAppList = useInvalidateAppList()
const [open, setOpen] = useState(openState)
const [showEditModal, setShowEditModal] = useState(false)
const [showDuplicateModal, setShowDuplicateModal] = useState(false)
@@ -191,6 +193,7 @@ const AppInfo = ({ expand, onlyShowDetail = false, openState = false, onDetailEx
try {
await deleteApp(appDetail.id)
notify({ type: 'success', message: t('appDeleted', { ns: 'app' }) })
invalidateAppList()
onPlanInfoChanged()
setAppDetail()
replace('/apps')
@@ -202,7 +205,7 @@ const AppInfo = ({ expand, onlyShowDetail = false, openState = false, onDetailEx
})
}
setShowConfirmDelete(false)
}, [appDetail, notify, onPlanInfoChanged, replace, setAppDetail, t])
}, [appDetail, invalidateAppList, notify, onPlanInfoChanged, replace, setAppDetail, t])
const { isCurrentWorkspaceEditor } = useAppContext()

View File

@@ -83,7 +83,7 @@ const ConfigModal: FC<IConfigModalProps> = ({
if (!isJsonObject || !tempPayload.json_schema)
return ''
try {
return JSON.stringify(JSON.parse(tempPayload.json_schema), null, 2)
return tempPayload.json_schema
}
catch {
return ''

View File

@@ -0,0 +1,228 @@
/**
* Tests for race condition prevention logic in chat message loading.
* These tests verify the core algorithms used in fetchData and loadMoreMessages
* to prevent race conditions, infinite loops, and stale state issues.
* See GitHub issue #30259 for context.
*/
// Test the race condition prevention logic in isolation
describe('Chat Message Loading Race Condition Prevention', () => {
beforeEach(() => {
vi.clearAllMocks()
vi.useFakeTimers()
})
afterEach(() => {
vi.useRealTimers()
})
describe('Request Deduplication', () => {
it('should deduplicate messages with same IDs when merging responses', async () => {
// Simulate the deduplication logic used in setAllChatItems
const existingItems = [
{ id: 'msg-1', isAnswer: false },
{ id: 'msg-2', isAnswer: true },
]
const newItems = [
{ id: 'msg-2', isAnswer: true }, // duplicate
{ id: 'msg-3', isAnswer: false }, // new
]
const existingIds = new Set(existingItems.map(item => item.id))
const uniqueNewItems = newItems.filter(item => !existingIds.has(item.id))
const mergedItems = [...uniqueNewItems, ...existingItems]
expect(uniqueNewItems).toHaveLength(1)
expect(uniqueNewItems[0].id).toBe('msg-3')
expect(mergedItems).toHaveLength(3)
})
})
describe('Retry Counter Logic', () => {
const MAX_RETRY_COUNT = 3
it('should increment retry counter when no unique items found', () => {
const state = { retryCount: 0 }
const prevItemsLength = 5
// Simulate the retry logic from loadMoreMessages
const uniqueNewItemsLength = 0
if (uniqueNewItemsLength === 0) {
if (state.retryCount < MAX_RETRY_COUNT && prevItemsLength > 1) {
state.retryCount++
}
else {
state.retryCount = 0
}
}
expect(state.retryCount).toBe(1)
})
it('should reset retry counter after MAX_RETRY_COUNT attempts', () => {
const state = { retryCount: MAX_RETRY_COUNT }
const prevItemsLength = 5
const uniqueNewItemsLength = 0
if (uniqueNewItemsLength === 0) {
if (state.retryCount < MAX_RETRY_COUNT && prevItemsLength > 1) {
state.retryCount++
}
else {
state.retryCount = 0
}
}
expect(state.retryCount).toBe(0)
})
it('should reset retry counter when unique items are found', () => {
const state = { retryCount: 2 }
// Simulate finding unique items (length > 0)
const processRetry = (uniqueCount: number) => {
if (uniqueCount === 0) {
state.retryCount++
}
else {
state.retryCount = 0
}
}
processRetry(3) // Found 3 unique items
expect(state.retryCount).toBe(0)
})
})
describe('Throttling Logic', () => {
const SCROLL_DEBOUNCE_MS = 200
it('should throttle requests within debounce window', () => {
const state = { lastLoadTime: 0 }
const results: boolean[] = []
const tryRequest = (now: number): boolean => {
if (now - state.lastLoadTime >= SCROLL_DEBOUNCE_MS) {
state.lastLoadTime = now
return true
}
return false
}
// First request - should pass
results.push(tryRequest(1000))
// Second request within debounce - should be blocked
results.push(tryRequest(1100))
// Third request after debounce - should pass
results.push(tryRequest(1300))
expect(results).toEqual([true, false, true])
})
})
describe('AbortController Cancellation', () => {
it('should abort previous request when new request starts', () => {
const state: { controller: AbortController | null } = { controller: null }
const abortedSignals: boolean[] = []
// First request
const controller1 = new AbortController()
state.controller = controller1
// Second request - should abort first
if (state.controller) {
state.controller.abort()
abortedSignals.push(state.controller.signal.aborted)
}
const controller2 = new AbortController()
state.controller = controller2
expect(abortedSignals).toEqual([true])
expect(controller1.signal.aborted).toBe(true)
expect(controller2.signal.aborted).toBe(false)
})
})
describe('Stale Response Detection', () => {
it('should ignore responses from outdated requests', () => {
const state = { requestId: 0 }
const processedResponses: number[] = []
// Simulate concurrent requests - each gets its own captured ID
const request1Id = ++state.requestId
const request2Id = ++state.requestId
// Request 2 completes first (current requestId is 2)
if (request2Id === state.requestId) {
processedResponses.push(request2Id)
}
// Request 1 completes later (stale - requestId is still 2)
if (request1Id === state.requestId) {
processedResponses.push(request1Id)
}
expect(processedResponses).toEqual([2])
expect(processedResponses).not.toContain(1)
})
})
describe('Pagination Anchor Management', () => {
it('should track oldest answer ID for pagination', () => {
let oldestAnswerIdRef: string | undefined
const chatItems = [
{ id: 'question-1', isAnswer: false },
{ id: 'answer-1', isAnswer: true },
{ id: 'question-2', isAnswer: false },
{ id: 'answer-2', isAnswer: true },
]
// Update pagination anchor with oldest answer ID
const answerItems = chatItems.filter(item => item.isAnswer)
const oldestAnswer = answerItems[answerItems.length - 1]
if (oldestAnswer?.id) {
oldestAnswerIdRef = oldestAnswer.id
}
expect(oldestAnswerIdRef).toBe('answer-2')
})
it('should use pagination anchor in subsequent requests', () => {
const oldestAnswerIdRef = 'answer-123'
const params: { conversation_id: string, limit: number, first_id?: string } = {
conversation_id: 'conv-1',
limit: 10,
}
if (oldestAnswerIdRef) {
params.first_id = oldestAnswerIdRef
}
expect(params.first_id).toBe('answer-123')
})
})
})
describe('Functional State Update Pattern', () => {
it('should use functional update to avoid stale closures', () => {
// Simulate the functional update pattern used in setAllChatItems
let state = [{ id: '1' }, { id: '2' }]
const newItems = [{ id: '3' }, { id: '2' }] // id '2' is duplicate
// Functional update pattern
const updater = (prevItems: { id: string }[]) => {
const existingIds = new Set(prevItems.map(item => item.id))
const uniqueNewItems = newItems.filter(item => !existingIds.has(item.id))
return [...uniqueNewItems, ...prevItems]
}
state = updater(state)
expect(state).toHaveLength(3)
expect(state.map(i => i.id)).toEqual(['3', '1', '2'])
})
})

View File

@@ -209,7 +209,6 @@ type IDetailPanel = {
function DetailPanel({ detail, onFeedback }: IDetailPanel) {
const MIN_ITEMS_FOR_SCROLL_LOADING = 8
const SCROLL_THRESHOLD_PX = 50
const SCROLL_DEBOUNCE_MS = 200
const { userProfile: { timezone } } = useAppContext()
const { formatTime } = useTimestamp()
@@ -228,69 +227,103 @@ function DetailPanel({ detail, onFeedback }: IDetailPanel) {
const [hasMore, setHasMore] = useState(true)
const [varValues, setVarValues] = useState<Record<string, string>>({})
const isLoadingRef = useRef(false)
const abortControllerRef = useRef<AbortController | null>(null)
const requestIdRef = useRef(0)
const lastLoadTimeRef = useRef(0)
const retryCountRef = useRef(0)
const oldestAnswerIdRef = useRef<string | undefined>(undefined)
const MAX_RETRY_COUNT = 3
const [allChatItems, setAllChatItems] = useState<IChatItem[]>([])
const [chatItemTree, setChatItemTree] = useState<ChatItemInTree[]>([])
const [threadChatItems, setThreadChatItems] = useState<IChatItem[]>([])
const fetchData = useCallback(async () => {
if (isLoadingRef.current)
if (isLoadingRef.current || !hasMore)
return
// Cancel any in-flight request
if (abortControllerRef.current) {
abortControllerRef.current.abort()
}
const controller = new AbortController()
abortControllerRef.current = controller
const currentRequestId = ++requestIdRef.current
try {
isLoadingRef.current = true
if (!hasMore)
return
const params: ChatMessagesRequest = {
conversation_id: detail.id,
limit: 10,
}
// Use the oldest answer item ID for pagination
const answerItems = allChatItems.filter(item => item.isAnswer)
const oldestAnswerItem = answerItems[answerItems.length - 1]
if (oldestAnswerItem?.id)
params.first_id = oldestAnswerItem.id
// Use ref for pagination anchor to avoid stale closure issues
if (oldestAnswerIdRef.current)
params.first_id = oldestAnswerIdRef.current
const messageRes = await fetchChatMessages({
url: `/apps/${appDetail?.id}/chat-messages`,
params,
})
// Ignore stale responses
if (currentRequestId !== requestIdRef.current || controller.signal.aborted)
return
if (messageRes.data.length > 0) {
const varValues = messageRes.data.at(-1)!.inputs
setVarValues(varValues)
}
setHasMore(messageRes.has_more)
const newAllChatItems = [
...getFormattedChatList(messageRes.data, detail.id, timezone!, t('dateTimeFormat', { ns: 'appLog' }) as string),
...allChatItems,
]
setAllChatItems(newAllChatItems)
const newItems = getFormattedChatList(messageRes.data, detail.id, timezone!, t('dateTimeFormat', { ns: 'appLog' }) as string)
let tree = buildChatItemTree(newAllChatItems)
if (messageRes.has_more === false && detail?.model_config?.configs?.introduction) {
tree = [{
id: 'introduction',
isAnswer: true,
isOpeningStatement: true,
content: detail?.model_config?.configs?.introduction ?? 'hello',
feedbackDisabled: true,
children: tree,
}]
}
setChatItemTree(tree)
const lastMessageId = newAllChatItems.length > 0 ? newAllChatItems[newAllChatItems.length - 1].id : undefined
setThreadChatItems(getThreadMessages(tree, lastMessageId))
// Use functional update to avoid stale state issues
setAllChatItems((prevItems: IChatItem[]) => {
const existingIds = new Set(prevItems.map(item => item.id))
const uniqueNewItems = newItems.filter(item => !existingIds.has(item.id))
return [...uniqueNewItems, ...prevItems]
})
}
catch (err) {
catch (err: unknown) {
if (err instanceof Error && err.name === 'AbortError')
return
console.error('fetchData execution failed:', err)
}
finally {
isLoadingRef.current = false
if (abortControllerRef.current === controller)
abortControllerRef.current = null
}
}, [allChatItems, detail.id, hasMore, timezone, t, appDetail, detail?.model_config?.configs?.introduction])
}, [detail.id, hasMore, timezone, t, appDetail, detail?.model_config?.configs?.introduction])
// Derive chatItemTree, threadChatItems, and oldestAnswerIdRef from allChatItems
useEffect(() => {
if (allChatItems.length === 0)
return
let tree = buildChatItemTree(allChatItems)
if (!hasMore && detail?.model_config?.configs?.introduction) {
tree = [{
id: 'introduction',
isAnswer: true,
isOpeningStatement: true,
content: detail?.model_config?.configs?.introduction ?? 'hello',
feedbackDisabled: true,
children: tree,
}]
}
setChatItemTree(tree)
const lastMessageId = allChatItems.length > 0 ? allChatItems[allChatItems.length - 1].id : undefined
setThreadChatItems(getThreadMessages(tree, lastMessageId))
// Update pagination anchor ref with the oldest answer ID
const answerItems = allChatItems.filter(item => item.isAnswer)
const oldestAnswer = answerItems[answerItems.length - 1]
if (oldestAnswer?.id)
oldestAnswerIdRef.current = oldestAnswer.id
}, [allChatItems, hasMore, detail?.model_config?.configs?.introduction])
const switchSibling = useCallback((siblingMessageId: string) => {
const newThreadChatItems = getThreadMessages(chatItemTree, siblingMessageId)
@@ -397,6 +430,12 @@ function DetailPanel({ detail, onFeedback }: IDetailPanel) {
if (isLoading || !hasMore || !appDetail?.id || !detail.id)
return
// Throttle using ref to persist across re-renders
const now = Date.now()
if (now - lastLoadTimeRef.current < SCROLL_DEBOUNCE_MS)
return
lastLoadTimeRef.current = now
setIsLoading(true)
try {
@@ -405,15 +444,9 @@ function DetailPanel({ detail, onFeedback }: IDetailPanel) {
limit: 10,
}
// Use the earliest response item as the first_id
const answerItems = allChatItems.filter(item => item.isAnswer)
const oldestAnswerItem = answerItems[answerItems.length - 1]
if (oldestAnswerItem?.id) {
params.first_id = oldestAnswerItem.id
}
else if (allChatItems.length > 0 && allChatItems[0]?.id) {
const firstId = allChatItems[0].id.replace('question-', '').replace('answer-', '')
params.first_id = firstId
// Use ref for pagination anchor to avoid stale closure issues
if (oldestAnswerIdRef.current) {
params.first_id = oldestAnswerIdRef.current
}
const messageRes = await fetchChatMessages({
@@ -423,6 +456,7 @@ function DetailPanel({ detail, onFeedback }: IDetailPanel) {
if (!messageRes.data || messageRes.data.length === 0) {
setHasMore(false)
retryCountRef.current = 0
return
}
@@ -440,91 +474,36 @@ function DetailPanel({ detail, onFeedback }: IDetailPanel) {
t('dateTimeFormat', { ns: 'appLog' }) as string,
)
// Check for duplicate messages
const existingIds = new Set(allChatItems.map(item => item.id))
const uniqueNewItems = newItems.filter(item => !existingIds.has(item.id))
// Use functional update to get latest state and avoid stale closures
setAllChatItems((prevItems: IChatItem[]) => {
const existingIds = new Set(prevItems.map(item => item.id))
const uniqueNewItems = newItems.filter(item => !existingIds.has(item.id))
if (uniqueNewItems.length === 0) {
if (allChatItems.length > 1) {
const nextId = allChatItems[1].id.replace('question-', '').replace('answer-', '')
const retryParams = {
...params,
first_id: nextId,
// If no unique items and we haven't exceeded retry limit, signal retry needed
if (uniqueNewItems.length === 0) {
if (retryCountRef.current < MAX_RETRY_COUNT && prevItems.length > 1) {
retryCountRef.current++
return prevItems
}
const retryRes = await fetchChatMessages({
url: `/apps/${appDetail.id}/chat-messages`,
params: retryParams,
})
if (retryRes.data && retryRes.data.length > 0) {
const retryItems = getFormattedChatList(
retryRes.data,
detail.id,
timezone!,
t('dateTimeFormat', { ns: 'appLog' }) as string,
)
const retryUniqueItems = retryItems.filter(item => !existingIds.has(item.id))
if (retryUniqueItems.length > 0) {
const newAllChatItems = [
...retryUniqueItems,
...allChatItems,
]
setAllChatItems(newAllChatItems)
let tree = buildChatItemTree(newAllChatItems)
if (retryRes.has_more === false && detail?.model_config?.configs?.introduction) {
tree = [{
id: 'introduction',
isAnswer: true,
isOpeningStatement: true,
content: detail?.model_config?.configs?.introduction ?? 'hello',
feedbackDisabled: true,
children: tree,
}]
}
setChatItemTree(tree)
setHasMore(retryRes.has_more)
setThreadChatItems(getThreadMessages(tree, newAllChatItems.at(-1)?.id))
return
}
else {
retryCountRef.current = 0
return prevItems
}
}
}
const newAllChatItems = [
...uniqueNewItems,
...allChatItems,
]
setAllChatItems(newAllChatItems)
let tree = buildChatItemTree(newAllChatItems)
if (messageRes.has_more === false && detail?.model_config?.configs?.introduction) {
tree = [{
id: 'introduction',
isAnswer: true,
isOpeningStatement: true,
content: detail?.model_config?.configs?.introduction ?? 'hello',
feedbackDisabled: true,
children: tree,
}]
}
setChatItemTree(tree)
setThreadChatItems(getThreadMessages(tree, newAllChatItems.at(-1)?.id))
retryCountRef.current = 0
return [...uniqueNewItems, ...prevItems]
})
}
catch (error) {
console.error(error)
setHasMore(false)
retryCountRef.current = 0
}
finally {
setIsLoading(false)
}
}, [allChatItems, detail.id, hasMore, isLoading, timezone, t, appDetail])
}, [detail.id, hasMore, isLoading, timezone, t, appDetail, detail?.model_config?.configs?.introduction])
useEffect(() => {
const scrollableDiv = document.getElementById('scrollableDiv')
@@ -556,24 +535,11 @@ function DetailPanel({ detail, onFeedback }: IDetailPanel) {
if (!scrollContainer)
return
let lastLoadTime = 0
const throttleDelay = 200
const handleScroll = () => {
const currentScrollTop = scrollContainer!.scrollTop
const scrollHeight = scrollContainer!.scrollHeight
const clientHeight = scrollContainer!.clientHeight
const isNearTop = currentScrollTop < 30
const distanceFromTop = currentScrollTop
const distanceFromBottom = scrollHeight - currentScrollTop - clientHeight
const now = Date.now()
const isNearTop = distanceFromTop < 30
// eslint-disable-next-line sonarjs/no-unused-vars
const _distanceFromBottom = distanceFromBottom < 30
if (isNearTop && hasMore && !isLoading && (now - lastLoadTime > throttleDelay)) {
lastLoadTime = now
if (isNearTop && hasMore && !isLoading) {
loadMoreMessages()
}
}
@@ -619,36 +585,6 @@ function DetailPanel({ detail, onFeedback }: IDetailPanel) {
return () => cancelAnimationFrame(raf)
}, [])
// Add scroll listener to ensure loading is triggered
useEffect(() => {
if (threadChatItems.length >= MIN_ITEMS_FOR_SCROLL_LOADING && hasMore) {
const scrollableDiv = document.getElementById('scrollableDiv')
if (scrollableDiv) {
let loadingTimeout: NodeJS.Timeout | null = null
const handleScroll = () => {
const { scrollTop } = scrollableDiv
// Trigger loading when scrolling near the top
if (scrollTop < SCROLL_THRESHOLD_PX && !isLoadingRef.current) {
if (loadingTimeout)
clearTimeout(loadingTimeout)
loadingTimeout = setTimeout(fetchData, SCROLL_DEBOUNCE_MS) // 200ms debounce
}
}
scrollableDiv.addEventListener('scroll', handleScroll)
return () => {
scrollableDiv.removeEventListener('scroll', handleScroll)
if (loadingTimeout)
clearTimeout(loadingTimeout)
}
}
}
}, [threadChatItems.length, hasMore, fetchData])
return (
<div ref={ref} className="flex h-full flex-col rounded-xl border-[0.5px] border-components-panel-border">
{/* Panel Header */}

View File

@@ -10,6 +10,7 @@ const mockReplace = vi.fn()
const mockRouter = { replace: mockReplace }
vi.mock('next/navigation', () => ({
useRouter: () => mockRouter,
useSearchParams: () => new URLSearchParams(''),
}))
// Mock app context

View File

@@ -12,6 +12,7 @@ import { useDebounceFn } from 'ahooks'
import dynamic from 'next/dynamic'
import {
useRouter,
useSearchParams,
} from 'next/navigation'
import { parseAsString, useQueryState } from 'nuqs'
import { useCallback, useEffect, useRef, useState } from 'react'
@@ -28,6 +29,7 @@ import { CheckModal } from '@/hooks/use-pay'
import { useInfiniteAppList } from '@/service/use-apps'
import { AppModeEnum } from '@/types/app'
import { cn } from '@/utils/classnames'
import { isServer } from '@/utils/client'
import AppCard from './app-card'
import { AppCardSkeleton } from './app-card-skeleton'
import Empty from './empty'
@@ -36,6 +38,16 @@ import useAppsQueryState from './hooks/use-apps-query-state'
import { useDSLDragDrop } from './hooks/use-dsl-drag-drop'
import NewAppCard from './new-app-card'
// Define valid tabs at module scope to avoid re-creation on each render and stale closures
const validTabs = new Set<string | AppModeEnum>([
'all',
AppModeEnum.WORKFLOW,
AppModeEnum.ADVANCED_CHAT,
AppModeEnum.CHAT,
AppModeEnum.AGENT_CHAT,
AppModeEnum.COMPLETION,
])
const TagManagementModal = dynamic(() => import('@/app/components/base/tag-management'), {
ssr: false,
})
@@ -47,12 +59,41 @@ const List = () => {
const { t } = useTranslation()
const { systemFeatures } = useGlobalPublicStore()
const router = useRouter()
const searchParams = useSearchParams()
const { isCurrentWorkspaceEditor, isCurrentWorkspaceDatasetOperator, isLoadingCurrentWorkspace } = useAppContext()
const showTagManagementModal = useTagStore(s => s.showTagManagementModal)
const [activeTab, setActiveTab] = useQueryState(
'category',
parseAsString.withDefault('all').withOptions({ history: 'push' }),
)
// valid tabs for apps list; anything else should fallback to 'all'
// 1) Normalize legacy/incorrect query params like ?mode=discover -> ?category=all
useEffect(() => {
// avoid running on server
if (isServer)
return
const mode = searchParams.get('mode')
if (!mode)
return
const url = new URL(window.location.href)
url.searchParams.delete('mode')
if (validTabs.has(mode)) {
// migrate to category key
url.searchParams.set('category', mode)
}
else {
url.searchParams.set('category', 'all')
}
router.replace(url.pathname + url.search)
}, [router, searchParams])
// 2) If category has an invalid value (e.g., 'discover'), reset to 'all'
useEffect(() => {
if (!validTabs.has(activeTab))
setActiveTab('all')
}, [activeTab, setActiveTab])
const { query: { tagIDs = [], keywords = '', isCreatedByMe: queryIsCreatedByMe = false }, setQuery } = useAppsQueryState()
const [isCreatedByMe, setIsCreatedByMe] = useState(queryIsCreatedByMe)
const [tagFilterValue, setTagFilterValue] = useState<string[]>(tagIDs)

View File

@@ -54,7 +54,7 @@ const pageNameEnrichmentPlugin = (): amplitude.Types.EnrichmentPlugin => {
}
const AmplitudeProvider: FC<IAmplitudeProps> = ({
sessionReplaySampleRate = 1,
sessionReplaySampleRate = 0.5,
}) => {
useEffect(() => {
// Only enable in Saas edition with valid API key

View File

@@ -37,7 +37,7 @@ export const getProcessedInputs = (inputs: Record<string, any>, inputsForm: Inpu
return
}
if (!inputValue)
if (inputValue == null)
return
if (item.type === InputVarType.singleFile) {
@@ -52,6 +52,20 @@ export const getProcessedInputs = (inputs: Record<string, any>, inputsForm: Inpu
else
processedInputs[item.variable] = getProcessedFiles(inputValue)
}
else if (item.type === InputVarType.jsonObject) {
// Prefer sending an object if the user entered valid JSON; otherwise keep the raw string.
try {
const v = typeof inputValue === 'string' ? JSON.parse(inputValue) : inputValue
if (v && typeof v === 'object' && !Array.isArray(v))
processedInputs[item.variable] = v
else
processedInputs[item.variable] = inputValue
}
catch {
// keep original string; backend will parse/validate
processedInputs[item.variable] = inputValue
}
}
})
return processedInputs

View File

@@ -11,6 +11,7 @@ import DifyLogo from '@/app/components/base/logo/dify-logo'
import Tooltip from '@/app/components/base/tooltip'
import { useGlobalPublicStore } from '@/context/global-public-context'
import { cn } from '@/utils/classnames'
import { isClient } from '@/utils/client'
import {
useEmbeddedChatbotContext,
} from '../context'
@@ -40,7 +41,6 @@ const Header: FC<IHeaderProps> = ({
allInputsHidden,
} = useEmbeddedChatbotContext()
const isClient = typeof window !== 'undefined'
const isIframe = isClient ? window.self !== window.top : false
const [parentOrigin, setParentOrigin] = useState('')
const [showToggleExpandButton, setShowToggleExpandButton] = useState(false)
@@ -66,7 +66,9 @@ const Header: FC<IHeaderProps> = ({
const listener = (event: MessageEvent) => handleMessageReceived(event)
window.addEventListener('message', listener)
window.parent.postMessage({ type: 'dify-chatbot-iframe-ready' }, '*')
// Security: Use document.referrer to get parent origin
const targetOrigin = document.referrer ? new URL(document.referrer).origin : '*'
window.parent.postMessage({ type: 'dify-chatbot-iframe-ready' }, targetOrigin)
return () => window.removeEventListener('message', listener)
}, [isIframe, handleMessageReceived])

File diff suppressed because it is too large Load Diff

View File

@@ -1,47 +1,29 @@
import type { FC } from 'react'
import type {
DataSourceInfo,
FullDocumentDetail,
IndexingStatusResponse,
LegacyDataSourceInfo,
ProcessRuleResponse,
} from '@/models/datasets'
import type { FullDocumentDetail } from '@/models/datasets'
import type { RETRIEVE_METHOD } from '@/types/app'
import {
RiArrowRightLine,
RiCheckboxCircleFill,
RiErrorWarningFill,
RiLoader2Fill,
RiTerminalBoxLine,
} from '@remixicon/react'
import Image from 'next/image'
import Link from 'next/link'
import { useRouter } from 'next/navigation'
import * as React from 'react'
import { useCallback, useEffect, useMemo, useRef, useState } from 'react'
import { useMemo } from 'react'
import { useTranslation } from 'react-i18next'
import Button from '@/app/components/base/button'
import Divider from '@/app/components/base/divider'
import { ZapFast } from '@/app/components/base/icons/src/vender/solid/general'
import NotionIcon from '@/app/components/base/notion-icon'
import Tooltip from '@/app/components/base/tooltip'
import PriorityLabel from '@/app/components/billing/priority-label'
import { Plan } from '@/app/components/billing/type'
import UpgradeBtn from '@/app/components/billing/upgrade-btn'
import { FieldInfo } from '@/app/components/datasets/documents/detail/metadata'
import { useProviderContext } from '@/context/provider-context'
import { useDatasetApiAccessUrl } from '@/hooks/use-api-access-url'
import { DataSourceType, ProcessMode } from '@/models/datasets'
import { fetchIndexingStatusBatch as doFetchIndexingStatus } from '@/service/datasets'
import { useProcessRule } from '@/service/knowledge/use-dataset'
import { useInvalidDocumentList } from '@/service/knowledge/use-document'
import { RETRIEVE_METHOD } from '@/types/app'
import { sleep } from '@/utils'
import { cn } from '@/utils/classnames'
import DocumentFileIcon from '../../common/document-file-icon'
import { indexMethodIcon, retrievalIcon } from '../icons'
import { IndexingType } from '../step-two'
import IndexingProgressItem from './indexing-progress-item'
import RuleDetail from './rule-detail'
import UpgradeBanner from './upgrade-banner'
import { useIndexingStatusPolling } from './use-indexing-status-polling'
import { createDocumentLookup } from './utils'
type Props = {
type EmbeddingProcessProps = {
datasetId: string
batchId: string
documents?: FullDocumentDetail[]
@@ -49,333 +31,121 @@ type Props = {
retrievalMethod?: RETRIEVE_METHOD
}
const RuleDetail: FC<{
sourceData?: ProcessRuleResponse
indexingType?: string
retrievalMethod?: RETRIEVE_METHOD
}> = ({ sourceData, indexingType, retrievalMethod }) => {
// Status header component
const StatusHeader: FC<{ isEmbedding: boolean, isCompleted: boolean }> = ({
isEmbedding,
isCompleted,
}) => {
const { t } = useTranslation()
const segmentationRuleMap = {
mode: t('embedding.mode', { ns: 'datasetDocuments' }),
segmentLength: t('embedding.segmentLength', { ns: 'datasetDocuments' }),
textCleaning: t('embedding.textCleaning', { ns: 'datasetDocuments' }),
}
const getRuleName = (key: string) => {
if (key === 'remove_extra_spaces')
return t('stepTwo.removeExtraSpaces', { ns: 'datasetCreation' })
if (key === 'remove_urls_emails')
return t('stepTwo.removeUrlEmails', { ns: 'datasetCreation' })
if (key === 'remove_stopwords')
return t('stepTwo.removeStopwords', { ns: 'datasetCreation' })
}
const isNumber = (value: unknown) => {
return typeof value === 'number'
}
const getValue = useCallback((field: string) => {
let value: string | number | undefined = '-'
const maxTokens = isNumber(sourceData?.rules?.segmentation?.max_tokens)
? sourceData.rules.segmentation.max_tokens
: value
const childMaxTokens = isNumber(sourceData?.rules?.subchunk_segmentation?.max_tokens)
? sourceData.rules.subchunk_segmentation.max_tokens
: value
switch (field) {
case 'mode':
value = !sourceData?.mode
? value
: sourceData.mode === ProcessMode.general
? (t('embedding.custom', { ns: 'datasetDocuments' }) as string)
: `${t('embedding.hierarchical', { ns: 'datasetDocuments' })} · ${sourceData?.rules?.parent_mode === 'paragraph'
? t('parentMode.paragraph', { ns: 'dataset' })
: t('parentMode.fullDoc', { ns: 'dataset' })}`
break
case 'segmentLength':
value = !sourceData?.mode
? value
: sourceData.mode === ProcessMode.general
? maxTokens
: `${t('embedding.parentMaxTokens', { ns: 'datasetDocuments' })} ${maxTokens}; ${t('embedding.childMaxTokens', { ns: 'datasetDocuments' })} ${childMaxTokens}`
break
default:
value = !sourceData?.mode
? value
: sourceData?.rules?.pre_processing_rules?.filter(rule =>
rule.enabled).map(rule => getRuleName(rule.id)).join(',')
break
}
return value
}, [sourceData])
return (
<div className="flex flex-col gap-1">
{Object.keys(segmentationRuleMap).map((field) => {
return (
<FieldInfo
key={field}
label={segmentationRuleMap[field as keyof typeof segmentationRuleMap]}
displayedValue={String(getValue(field))}
/>
)
})}
<FieldInfo
label={t('stepTwo.indexMode', { ns: 'datasetCreation' })}
displayedValue={t(`stepTwo.${indexingType === IndexingType.ECONOMICAL ? 'economical' : 'qualified'}`, { ns: 'datasetCreation' }) as string}
valueIcon={(
<Image
className="size-4"
src={
indexingType === IndexingType.ECONOMICAL
? indexMethodIcon.economical
: indexMethodIcon.high_quality
}
alt=""
/>
)}
/>
<FieldInfo
label={t('form.retrievalSetting.title', { ns: 'datasetSettings' })}
// displayedValue={t(`datasetSettings.form.retrievalSetting.${retrievalMethod}`) as string}
displayedValue={t(`retrieval.${indexingType === IndexingType.ECONOMICAL ? 'keyword_search' : retrievalMethod ?? 'semantic_search'}.title`, { ns: 'dataset' })}
valueIcon={(
<Image
className="size-4"
src={
retrievalMethod === RETRIEVE_METHOD.fullText
? retrievalIcon.fullText
: retrievalMethod === RETRIEVE_METHOD.hybrid
? retrievalIcon.hybrid
: retrievalIcon.vector
}
alt=""
/>
)}
/>
<div className="system-md-semibold-uppercase flex items-center gap-x-1 text-text-secondary">
{isEmbedding && (
<>
<RiLoader2Fill className="size-4 animate-spin" />
<span>{t('embedding.processing', { ns: 'datasetDocuments' })}</span>
</>
)}
{isCompleted && t('embedding.completed', { ns: 'datasetDocuments' })}
</div>
)
}
const EmbeddingProcess: FC<Props> = ({ datasetId, batchId, documents = [], indexingType, retrievalMethod }) => {
// Action buttons component
const ActionButtons: FC<{
apiReferenceUrl: string
onNavToDocuments: () => void
}> = ({ apiReferenceUrl, onNavToDocuments }) => {
const { t } = useTranslation()
return (
<div className="mt-6 flex items-center gap-x-2 py-2">
<Link href={apiReferenceUrl} target="_blank" rel="noopener noreferrer">
<Button className="w-fit gap-x-0.5 px-3">
<RiTerminalBoxLine className="size-4" />
<span className="px-0.5">Access the API</span>
</Button>
</Link>
<Button
className="w-fit gap-x-0.5 px-3"
variant="primary"
onClick={onNavToDocuments}
>
<span className="px-0.5">{t('stepThree.navTo', { ns: 'datasetCreation' })}</span>
<RiArrowRightLine className="size-4 stroke-current stroke-1" />
</Button>
</div>
)
}
const EmbeddingProcess: FC<EmbeddingProcessProps> = ({
datasetId,
batchId,
documents = [],
indexingType,
retrievalMethod,
}) => {
const { enableBilling, plan } = useProviderContext()
const getFirstDocument = documents[0]
const [indexingStatusBatchDetail, setIndexingStatusDetail] = useState<IndexingStatusResponse[]>([])
const fetchIndexingStatus = async () => {
const status = await doFetchIndexingStatus({ datasetId, batchId })
setIndexingStatusDetail(status.data)
return status.data
}
const [isStopQuery, setIsStopQuery] = useState(false)
const isStopQueryRef = useRef(isStopQuery)
useEffect(() => {
isStopQueryRef.current = isStopQuery
}, [isStopQuery])
const stopQueryStatus = () => {
setIsStopQuery(true)
}
const startQueryStatus = async () => {
if (isStopQueryRef.current)
return
try {
const indexingStatusBatchDetail = await fetchIndexingStatus()
const isCompleted = indexingStatusBatchDetail.every(indexingStatusDetail => ['completed', 'error', 'paused'].includes(indexingStatusDetail.indexing_status))
if (isCompleted) {
stopQueryStatus()
return
}
await sleep(2500)
await startQueryStatus()
}
catch {
await sleep(2500)
await startQueryStatus()
}
}
useEffect(() => {
setIsStopQuery(false)
startQueryStatus()
return () => {
stopQueryStatus()
}
}, [])
// get rule
const { data: ruleDetail } = useProcessRule(getFirstDocument?.id)
const router = useRouter()
const invalidDocumentList = useInvalidDocumentList()
const navToDocumentList = () => {
const apiReferenceUrl = useDatasetApiAccessUrl()
// Polling hook for indexing status
const { statusList, isEmbedding, isEmbeddingCompleted } = useIndexingStatusPolling({
datasetId,
batchId,
})
// Get process rule for the first document
const firstDocumentId = documents[0]?.id
const { data: ruleDetail } = useProcessRule(firstDocumentId)
// Document lookup utilities - memoized for performance
const documentLookup = useMemo(
() => createDocumentLookup(documents),
[documents],
)
const handleNavToDocuments = () => {
invalidDocumentList()
router.push(`/datasets/${datasetId}/documents`)
}
const apiReferenceUrl = useDatasetApiAccessUrl()
const isEmbedding = useMemo(() => {
return indexingStatusBatchDetail.some(indexingStatusDetail => ['indexing', 'splitting', 'parsing', 'cleaning'].includes(indexingStatusDetail?.indexing_status || ''))
}, [indexingStatusBatchDetail])
const isEmbeddingCompleted = useMemo(() => {
return indexingStatusBatchDetail.every(indexingStatusDetail => ['completed', 'error', 'paused'].includes(indexingStatusDetail?.indexing_status || ''))
}, [indexingStatusBatchDetail])
const getSourceName = (id: string) => {
const doc = documents.find(document => document.id === id)
return doc?.name
}
const getFileType = (name?: string) => name?.split('.').pop() || 'txt'
const getSourcePercent = (detail: IndexingStatusResponse) => {
const completedCount = detail.completed_segments || 0
const totalCount = detail.total_segments || 0
if (totalCount === 0)
return 0
const percent = Math.round(completedCount * 100 / totalCount)
return percent > 100 ? 100 : percent
}
const getSourceType = (id: string) => {
const doc = documents.find(document => document.id === id)
return doc?.data_source_type as DataSourceType
}
const isLegacyDataSourceInfo = (info: DataSourceInfo): info is LegacyDataSourceInfo => {
return info != null && typeof (info as LegacyDataSourceInfo).upload_file === 'object'
}
const getIcon = (id: string) => {
const doc = documents.find(document => document.id === id)
const info = doc?.data_source_info
if (info && isLegacyDataSourceInfo(info))
return info.notion_page_icon
return undefined
}
const isSourceEmbedding = (detail: IndexingStatusResponse) =>
['indexing', 'splitting', 'parsing', 'cleaning', 'waiting'].includes(detail.indexing_status || '')
const showUpgradeBanner = enableBilling && plan.type !== Plan.team
return (
<>
<div className="flex flex-col gap-y-3">
<div className="system-md-semibold-uppercase flex items-center gap-x-1 text-text-secondary">
{isEmbedding && (
<>
<RiLoader2Fill className="size-4 animate-spin" />
<span>{t('embedding.processing', { ns: 'datasetDocuments' })}</span>
</>
)}
{isEmbeddingCompleted && t('embedding.completed', { ns: 'datasetDocuments' })}
</div>
{
enableBilling && plan.type !== Plan.team && (
<div className="flex h-14 items-center rounded-xl border-[0.5px] border-black/5 bg-white p-3 shadow-md">
<div className="flex h-8 w-8 shrink-0 items-center justify-center rounded-lg bg-[#FFF6ED]">
<ZapFast className="h-4 w-4 text-[#FB6514]" />
</div>
<div className="mx-3 grow text-[13px] font-medium text-gray-700">
{t('plansCommon.documentProcessingPriorityUpgrade', { ns: 'billing' })}
</div>
<UpgradeBtn loc="knowledge-speed-up" />
</div>
)
}
<StatusHeader isEmbedding={isEmbedding} isCompleted={isEmbeddingCompleted} />
{showUpgradeBanner && <UpgradeBanner />}
<div className="flex flex-col gap-0.5 pb-2">
{indexingStatusBatchDetail.map(indexingStatusDetail => (
<div
key={indexingStatusDetail.id}
className={cn(
'relative h-[26px] overflow-hidden rounded-md bg-components-progress-bar-bg',
indexingStatusDetail.indexing_status === 'error' && 'bg-state-destructive-hover-alt',
)}
>
{isSourceEmbedding(indexingStatusDetail) && (
<div
className="absolute left-0 top-0 h-full min-w-0.5 border-r-[2px] border-r-components-progress-bar-progress-highlight bg-components-progress-bar-progress"
style={{ width: `${getSourcePercent(indexingStatusDetail)}%` }}
/>
)}
<div className="z-[1] flex h-full items-center gap-1 pl-[6px] pr-2">
{getSourceType(indexingStatusDetail.id) === DataSourceType.FILE && (
<DocumentFileIcon
size="sm"
className="shrink-0"
name={getSourceName(indexingStatusDetail.id)}
extension={getFileType(getSourceName(indexingStatusDetail.id))}
/>
)}
{getSourceType(indexingStatusDetail.id) === DataSourceType.NOTION && (
<NotionIcon
className="shrink-0"
type="page"
src={getIcon(indexingStatusDetail.id)}
/>
)}
<div className="flex w-0 grow items-center gap-1" title={getSourceName(indexingStatusDetail.id)}>
<div className="system-xs-medium truncate text-text-secondary">
{getSourceName(indexingStatusDetail.id)}
</div>
{
enableBilling && (
<PriorityLabel className="ml-0" />
)
}
</div>
{isSourceEmbedding(indexingStatusDetail) && (
<div className="shrink-0 text-xs text-text-secondary">{`${getSourcePercent(indexingStatusDetail)}%`}</div>
)}
{indexingStatusDetail.indexing_status === 'error' && (
<Tooltip
popupClassName="px-4 py-[14px] max-w-60 body-xs-regular text-text-secondary border-[0.5px] border-components-panel-border rounded-xl"
offset={4}
popupContent={indexingStatusDetail.error}
>
<span>
<RiErrorWarningFill className="size-4 shrink-0 text-text-destructive" />
</span>
</Tooltip>
)}
{indexingStatusDetail.indexing_status === 'completed' && (
<RiCheckboxCircleFill className="size-4 shrink-0 text-text-success" />
)}
</div>
</div>
{statusList.map(detail => (
<IndexingProgressItem
key={detail.id}
detail={detail}
name={documentLookup.getName(detail.id)}
sourceType={documentLookup.getSourceType(detail.id)}
notionIcon={documentLookup.getNotionIcon(detail.id)}
enableBilling={enableBilling}
/>
))}
</div>
<Divider type="horizontal" className="my-0 bg-divider-subtle" />
<RuleDetail
sourceData={ruleDetail}
indexingType={indexingType}
retrievalMethod={retrievalMethod}
/>
</div>
<div className="mt-6 flex items-center gap-x-2 py-2">
<Link
href={apiReferenceUrl}
target="_blank"
rel="noopener noreferrer"
>
<Button
className="w-fit gap-x-0.5 px-3"
>
<RiTerminalBoxLine className="size-4" />
<span className="px-0.5">Access the API</span>
</Button>
</Link>
<Button
className="w-fit gap-x-0.5 px-3"
variant="primary"
onClick={navToDocumentList}
>
<span className="px-0.5">{t('stepThree.navTo', { ns: 'datasetCreation' })}</span>
<RiArrowRightLine className="size-4 stroke-current stroke-1" />
</Button>
</div>
<ActionButtons
apiReferenceUrl={apiReferenceUrl}
onNavToDocuments={handleNavToDocuments}
/>
</>
)
}

View File

@@ -0,0 +1,120 @@
import type { FC } from 'react'
import type { IndexingStatusResponse } from '@/models/datasets'
import {
RiCheckboxCircleFill,
RiErrorWarningFill,
} from '@remixicon/react'
import NotionIcon from '@/app/components/base/notion-icon'
import Tooltip from '@/app/components/base/tooltip'
import PriorityLabel from '@/app/components/billing/priority-label'
import { DataSourceType } from '@/models/datasets'
import { cn } from '@/utils/classnames'
import DocumentFileIcon from '../../common/document-file-icon'
import { getFileType, getSourcePercent, isSourceEmbedding } from './utils'
type IndexingProgressItemProps = {
detail: IndexingStatusResponse
name?: string
sourceType?: DataSourceType
notionIcon?: string
enableBilling?: boolean
}
// Status icon component for completed/error states
const StatusIcon: FC<{ status: string, error?: string }> = ({ status, error }) => {
if (status === 'completed')
return <RiCheckboxCircleFill className="size-4 shrink-0 text-text-success" />
if (status === 'error') {
return (
<Tooltip
popupClassName="px-4 py-[14px] max-w-60 body-xs-regular text-text-secondary border-[0.5px] border-components-panel-border rounded-xl"
offset={4}
popupContent={error}
>
<span>
<RiErrorWarningFill className="size-4 shrink-0 text-text-destructive" />
</span>
</Tooltip>
)
}
return null
}
// Source type icon component
const SourceTypeIcon: FC<{
sourceType?: DataSourceType
name?: string
notionIcon?: string
}> = ({ sourceType, name, notionIcon }) => {
if (sourceType === DataSourceType.FILE) {
return (
<DocumentFileIcon
size="sm"
className="shrink-0"
name={name}
extension={getFileType(name)}
/>
)
}
if (sourceType === DataSourceType.NOTION) {
return (
<NotionIcon
className="shrink-0"
type="page"
src={notionIcon}
/>
)
}
return null
}
const IndexingProgressItem: FC<IndexingProgressItemProps> = ({
detail,
name,
sourceType,
notionIcon,
enableBilling,
}) => {
const isEmbedding = isSourceEmbedding(detail)
const percent = getSourcePercent(detail)
const isError = detail.indexing_status === 'error'
return (
<div
className={cn(
'relative h-[26px] overflow-hidden rounded-md bg-components-progress-bar-bg',
isError && 'bg-state-destructive-hover-alt',
)}
>
{isEmbedding && (
<div
className="absolute left-0 top-0 h-full min-w-0.5 border-r-[2px] border-r-components-progress-bar-progress-highlight bg-components-progress-bar-progress"
style={{ width: `${percent}%` }}
/>
)}
<div className="z-[1] flex h-full items-center gap-1 pl-[6px] pr-2">
<SourceTypeIcon
sourceType={sourceType}
name={name}
notionIcon={notionIcon}
/>
<div className="flex w-0 grow items-center gap-1" title={name}>
<div className="system-xs-medium truncate text-text-secondary">
{name}
</div>
{enableBilling && <PriorityLabel className="ml-0" />}
</div>
{isEmbedding && (
<div className="shrink-0 text-xs text-text-secondary">{`${percent}%`}</div>
)}
<StatusIcon status={detail.indexing_status} error={detail.error} />
</div>
</div>
)
}
export default IndexingProgressItem

View File

@@ -0,0 +1,133 @@
import type { FC } from 'react'
import type { ProcessRuleResponse } from '@/models/datasets'
import Image from 'next/image'
import { useCallback } from 'react'
import { useTranslation } from 'react-i18next'
import { FieldInfo } from '@/app/components/datasets/documents/detail/metadata'
import { ProcessMode } from '@/models/datasets'
import { RETRIEVE_METHOD } from '@/types/app'
import { indexMethodIcon, retrievalIcon } from '../icons'
import { IndexingType } from '../step-two'
type RuleDetailProps = {
sourceData?: ProcessRuleResponse
indexingType?: string
retrievalMethod?: RETRIEVE_METHOD
}
// Lookup table for pre-processing rule names
const PRE_PROCESSING_RULE_KEYS = {
remove_extra_spaces: 'stepTwo.removeExtraSpaces',
remove_urls_emails: 'stepTwo.removeUrlEmails',
remove_stopwords: 'stepTwo.removeStopwords',
} as const
// Lookup table for retrieval method icons
const RETRIEVAL_ICON_MAP: Partial<Record<RETRIEVE_METHOD, string>> = {
[RETRIEVE_METHOD.fullText]: retrievalIcon.fullText,
[RETRIEVE_METHOD.hybrid]: retrievalIcon.hybrid,
[RETRIEVE_METHOD.semantic]: retrievalIcon.vector,
[RETRIEVE_METHOD.invertedIndex]: retrievalIcon.fullText,
[RETRIEVE_METHOD.keywordSearch]: retrievalIcon.fullText,
}
const isNumber = (value: unknown): value is number => typeof value === 'number'
const RuleDetail: FC<RuleDetailProps> = ({ sourceData, indexingType, retrievalMethod }) => {
const { t } = useTranslation()
const segmentationRuleLabels = {
mode: t('embedding.mode', { ns: 'datasetDocuments' }),
segmentLength: t('embedding.segmentLength', { ns: 'datasetDocuments' }),
textCleaning: t('embedding.textCleaning', { ns: 'datasetDocuments' }),
}
const getRuleName = useCallback((key: string): string | undefined => {
const translationKey = PRE_PROCESSING_RULE_KEYS[key as keyof typeof PRE_PROCESSING_RULE_KEYS]
return translationKey ? t(translationKey, { ns: 'datasetCreation' }) : undefined
}, [t])
const getModeValue = useCallback((): string => {
if (!sourceData?.mode)
return '-'
if (sourceData.mode === ProcessMode.general)
return t('embedding.custom', { ns: 'datasetDocuments' })
const parentModeLabel = sourceData.rules?.parent_mode === 'paragraph'
? t('parentMode.paragraph', { ns: 'dataset' })
: t('parentMode.fullDoc', { ns: 'dataset' })
return `${t('embedding.hierarchical', { ns: 'datasetDocuments' })} · ${parentModeLabel}`
}, [sourceData, t])
const getSegmentLengthValue = useCallback((): string | number => {
if (!sourceData?.mode)
return '-'
const maxTokens = isNumber(sourceData.rules?.segmentation?.max_tokens)
? sourceData.rules.segmentation.max_tokens
: '-'
if (sourceData.mode === ProcessMode.general)
return maxTokens
const childMaxTokens = isNumber(sourceData.rules?.subchunk_segmentation?.max_tokens)
? sourceData.rules.subchunk_segmentation.max_tokens
: '-'
return `${t('embedding.parentMaxTokens', { ns: 'datasetDocuments' })} ${maxTokens}; ${t('embedding.childMaxTokens', { ns: 'datasetDocuments' })} ${childMaxTokens}`
}, [sourceData, t])
const getTextCleaningValue = useCallback((): string => {
if (!sourceData?.mode)
return '-'
const enabledRules = sourceData.rules?.pre_processing_rules?.filter(rule => rule.enabled) || []
const ruleNames = enabledRules
.map((rule) => {
const name = getRuleName(rule.id)
return typeof name === 'string' ? name : ''
})
.filter(name => name)
return ruleNames.length > 0 ? ruleNames.join(',') : '-'
}, [sourceData, getRuleName])
const fieldValueGetters: Record<string, () => string | number> = {
mode: getModeValue,
segmentLength: getSegmentLengthValue,
textCleaning: getTextCleaningValue,
}
const isEconomical = indexingType === IndexingType.ECONOMICAL
const indexMethodIconSrc = isEconomical ? indexMethodIcon.economical : indexMethodIcon.high_quality
const indexModeLabel = t(`stepTwo.${isEconomical ? 'economical' : 'qualified'}`, { ns: 'datasetCreation' })
const effectiveRetrievalMethod = isEconomical ? 'keyword_search' : (retrievalMethod ?? 'semantic_search')
const retrievalLabel = t(`retrieval.${effectiveRetrievalMethod}.title`, { ns: 'dataset' })
const retrievalIconSrc = RETRIEVAL_ICON_MAP[retrievalMethod as keyof typeof RETRIEVAL_ICON_MAP] ?? retrievalIcon.vector
return (
<div className="flex flex-col gap-1">
{Object.keys(segmentationRuleLabels).map(field => (
<FieldInfo
key={field}
label={segmentationRuleLabels[field as keyof typeof segmentationRuleLabels]}
displayedValue={String(fieldValueGetters[field]())}
/>
))}
<FieldInfo
label={t('stepTwo.indexMode', { ns: 'datasetCreation' })}
displayedValue={indexModeLabel}
valueIcon={<Image className="size-4" src={indexMethodIconSrc} alt="" />}
/>
<FieldInfo
label={t('form.retrievalSetting.title', { ns: 'datasetSettings' })}
displayedValue={retrievalLabel}
valueIcon={<Image className="size-4" src={retrievalIconSrc} alt="" />}
/>
</div>
)
}
export default RuleDetail

View File

@@ -0,0 +1,22 @@
import type { FC } from 'react'
import { useTranslation } from 'react-i18next'
import { ZapFast } from '@/app/components/base/icons/src/vender/solid/general'
import UpgradeBtn from '@/app/components/billing/upgrade-btn'
const UpgradeBanner: FC = () => {
const { t } = useTranslation()
return (
<div className="flex h-14 items-center rounded-xl border-[0.5px] border-black/5 bg-white p-3 shadow-md">
<div className="flex h-8 w-8 shrink-0 items-center justify-center rounded-lg bg-[#FFF6ED]">
<ZapFast className="h-4 w-4 text-[#FB6514]" />
</div>
<div className="mx-3 grow text-[13px] font-medium text-gray-700">
{t('plansCommon.documentProcessingPriorityUpgrade', { ns: 'billing' })}
</div>
<UpgradeBtn loc="knowledge-speed-up" />
</div>
)
}
export default UpgradeBanner

View File

@@ -0,0 +1,90 @@
import type { IndexingStatusResponse } from '@/models/datasets'
import { useEffect, useRef, useState } from 'react'
import { fetchIndexingStatusBatch } from '@/service/datasets'
const POLLING_INTERVAL = 2500
const COMPLETED_STATUSES = ['completed', 'error', 'paused'] as const
const EMBEDDING_STATUSES = ['indexing', 'splitting', 'parsing', 'cleaning', 'waiting'] as const
type IndexingStatusPollingParams = {
datasetId: string
batchId: string
}
type IndexingStatusPollingResult = {
statusList: IndexingStatusResponse[]
isEmbedding: boolean
isEmbeddingCompleted: boolean
}
const isStatusCompleted = (status: string): boolean =>
COMPLETED_STATUSES.includes(status as typeof COMPLETED_STATUSES[number])
const isAllCompleted = (statusList: IndexingStatusResponse[]): boolean =>
statusList.every(item => isStatusCompleted(item.indexing_status))
/**
* Custom hook for polling indexing status with automatic stop on completion.
* Handles the polling lifecycle and provides derived states for UI rendering.
*/
export const useIndexingStatusPolling = ({
datasetId,
batchId,
}: IndexingStatusPollingParams): IndexingStatusPollingResult => {
const [statusList, setStatusList] = useState<IndexingStatusResponse[]>([])
const isStopPollingRef = useRef(false)
useEffect(() => {
// Reset polling state on mount
isStopPollingRef.current = false
let timeoutId: ReturnType<typeof setTimeout> | null = null
const fetchStatus = async (): Promise<IndexingStatusResponse[]> => {
const response = await fetchIndexingStatusBatch({ datasetId, batchId })
setStatusList(response.data)
return response.data
}
const poll = async (): Promise<void> => {
if (isStopPollingRef.current)
return
try {
const data = await fetchStatus()
if (isAllCompleted(data)) {
isStopPollingRef.current = true
return
}
}
catch {
// Continue polling on error
}
if (!isStopPollingRef.current) {
timeoutId = setTimeout(() => {
poll()
}, POLLING_INTERVAL)
}
}
poll()
return () => {
isStopPollingRef.current = true
if (timeoutId)
clearTimeout(timeoutId)
}
}, [datasetId, batchId])
const isEmbedding = statusList.some(item =>
EMBEDDING_STATUSES.includes(item?.indexing_status as typeof EMBEDDING_STATUSES[number]),
)
const isEmbeddingCompleted = statusList.length > 0 && isAllCompleted(statusList)
return {
statusList,
isEmbedding,
isEmbeddingCompleted,
}
}

View File

@@ -0,0 +1,64 @@
import type {
DataSourceInfo,
DataSourceType,
FullDocumentDetail,
IndexingStatusResponse,
LegacyDataSourceInfo,
} from '@/models/datasets'
const EMBEDDING_STATUSES = ['indexing', 'splitting', 'parsing', 'cleaning', 'waiting'] as const
/**
* Type guard for legacy data source info with upload_file property
*/
export const isLegacyDataSourceInfo = (info: DataSourceInfo): info is LegacyDataSourceInfo => {
return info != null && typeof (info as LegacyDataSourceInfo).upload_file === 'object'
}
/**
* Check if a status indicates the source is being embedded
*/
export const isSourceEmbedding = (detail: IndexingStatusResponse): boolean =>
EMBEDDING_STATUSES.includes(detail.indexing_status as typeof EMBEDDING_STATUSES[number])
/**
* Calculate the progress percentage for a document
*/
export const getSourcePercent = (detail: IndexingStatusResponse): number => {
const completedCount = detail.completed_segments || 0
const totalCount = detail.total_segments || 0
if (totalCount === 0)
return 0
const percent = Math.round(completedCount * 100 / totalCount)
return Math.min(percent, 100)
}
/**
* Get file extension from filename, defaults to 'txt'
*/
export const getFileType = (name?: string): string =>
name?.split('.').pop() || 'txt'
/**
* Document lookup utilities - provides document info by ID from a list
*/
export const createDocumentLookup = (documents: FullDocumentDetail[]) => {
const documentMap = new Map(documents.map(doc => [doc.id, doc]))
return {
getDocument: (id: string) => documentMap.get(id),
getName: (id: string) => documentMap.get(id)?.name,
getSourceType: (id: string) => documentMap.get(id)?.data_source_type as DataSourceType | undefined,
getNotionIcon: (id: string) => {
const info = documentMap.get(id)?.data_source_info
if (info && isLegacyDataSourceInfo(info))
return info.notion_page_icon
return undefined
},
}
}

View File

@@ -0,0 +1,199 @@
'use client'
import type { FC } from 'react'
import type { PreProcessingRule } from '@/models/datasets'
import {
RiAlertFill,
RiSearchEyeLine,
} from '@remixicon/react'
import Image from 'next/image'
import { useTranslation } from 'react-i18next'
import Button from '@/app/components/base/button'
import Checkbox from '@/app/components/base/checkbox'
import Divider from '@/app/components/base/divider'
import Tooltip from '@/app/components/base/tooltip'
import { IS_CE_EDITION } from '@/config'
import { ChunkingMode } from '@/models/datasets'
import SettingCog from '../../assets/setting-gear-mod.svg'
import s from '../index.module.css'
import LanguageSelect from '../language-select'
import { DelimiterInput, MaxLengthInput, OverlapInput } from './inputs'
import { OptionCard } from './option-card'
type TextLabelProps = {
children: React.ReactNode
}
const TextLabel: FC<TextLabelProps> = ({ children }) => {
return <label className="system-sm-semibold text-text-secondary">{children}</label>
}
type GeneralChunkingOptionsProps = {
// State
segmentIdentifier: string
maxChunkLength: number
overlap: number
rules: PreProcessingRule[]
currentDocForm: ChunkingMode
docLanguage: string
// Flags
isActive: boolean
isInUpload: boolean
isNotUploadInEmptyDataset: boolean
hasCurrentDatasetDocForm: boolean
// Actions
onSegmentIdentifierChange: (value: string) => void
onMaxChunkLengthChange: (value: number) => void
onOverlapChange: (value: number) => void
onRuleToggle: (id: string) => void
onDocFormChange: (form: ChunkingMode) => void
onDocLanguageChange: (lang: string) => void
onPreview: () => void
onReset: () => void
// Locale
locale: string
}
export const GeneralChunkingOptions: FC<GeneralChunkingOptionsProps> = ({
segmentIdentifier,
maxChunkLength,
overlap,
rules,
currentDocForm,
docLanguage,
isActive,
isInUpload,
isNotUploadInEmptyDataset,
hasCurrentDatasetDocForm,
onSegmentIdentifierChange,
onMaxChunkLengthChange,
onOverlapChange,
onRuleToggle,
onDocFormChange,
onDocLanguageChange,
onPreview,
onReset,
locale,
}) => {
const { t } = useTranslation()
const getRuleName = (key: string): string => {
const ruleNameMap: Record<string, string> = {
remove_extra_spaces: t('stepTwo.removeExtraSpaces', { ns: 'datasetCreation' }),
remove_urls_emails: t('stepTwo.removeUrlEmails', { ns: 'datasetCreation' }),
remove_stopwords: t('stepTwo.removeStopwords', { ns: 'datasetCreation' }),
}
return ruleNameMap[key] ?? key
}
return (
<OptionCard
className="mb-2 bg-background-section"
title={t('stepTwo.general', { ns: 'datasetCreation' })}
icon={<Image width={20} height={20} src={SettingCog} alt={t('stepTwo.general', { ns: 'datasetCreation' })} />}
activeHeaderClassName="bg-dataset-option-card-blue-gradient"
description={t('stepTwo.generalTip', { ns: 'datasetCreation' })}
isActive={isActive}
onSwitched={() => onDocFormChange(ChunkingMode.text)}
actions={(
<>
<Button variant="secondary-accent" onClick={onPreview}>
<RiSearchEyeLine className="mr-0.5 h-4 w-4" />
{t('stepTwo.previewChunk', { ns: 'datasetCreation' })}
</Button>
<Button variant="ghost" onClick={onReset}>
{t('stepTwo.reset', { ns: 'datasetCreation' })}
</Button>
</>
)}
noHighlight={isInUpload && isNotUploadInEmptyDataset}
>
<div className="flex flex-col gap-y-4">
<div className="flex gap-3">
<DelimiterInput
value={segmentIdentifier}
onChange={e => onSegmentIdentifierChange(e.target.value)}
/>
<MaxLengthInput
unit="characters"
value={maxChunkLength}
onChange={onMaxChunkLengthChange}
/>
<OverlapInput
unit="characters"
value={overlap}
min={1}
onChange={onOverlapChange}
/>
</div>
<div className="flex w-full flex-col">
<div className="flex items-center gap-x-2">
<div className="inline-flex shrink-0">
<TextLabel>{t('stepTwo.rules', { ns: 'datasetCreation' })}</TextLabel>
</div>
<Divider className="grow" bgStyle="gradient" />
</div>
<div className="mt-1">
{rules.map(rule => (
<div
key={rule.id}
className={s.ruleItem}
onClick={() => onRuleToggle(rule.id)}
>
<Checkbox checked={rule.enabled} />
<label className="system-sm-regular ml-2 cursor-pointer text-text-secondary">
{getRuleName(rule.id)}
</label>
</div>
))}
{IS_CE_EDITION && (
<>
<Divider type="horizontal" className="my-4 bg-divider-subtle" />
<div className="flex items-center py-0.5">
<div
className="flex items-center"
onClick={() => {
if (hasCurrentDatasetDocForm)
return
if (currentDocForm === ChunkingMode.qa)
onDocFormChange(ChunkingMode.text)
else
onDocFormChange(ChunkingMode.qa)
}}
>
<Checkbox
checked={currentDocForm === ChunkingMode.qa}
disabled={hasCurrentDatasetDocForm}
/>
<label className="system-sm-regular ml-2 cursor-pointer text-text-secondary">
{t('stepTwo.useQALanguage', { ns: 'datasetCreation' })}
</label>
</div>
<LanguageSelect
currentLanguage={docLanguage || locale}
onSelect={onDocLanguageChange}
disabled={currentDocForm !== ChunkingMode.qa}
/>
<Tooltip popupContent={t('stepTwo.QATip', { ns: 'datasetCreation' })} />
</div>
{currentDocForm === ChunkingMode.qa && (
<div
style={{
background: 'linear-gradient(92deg, rgba(247, 144, 9, 0.1) 0%, rgba(255, 255, 255, 0.00) 100%)',
}}
className="mt-2 flex h-10 items-center gap-2 rounded-xl border border-components-panel-border px-3 text-xs shadow-xs backdrop-blur-[5px]"
>
<RiAlertFill className="size-4 text-text-warning-secondary" />
<span className="system-xs-medium text-text-primary">
{t('stepTwo.QATip', { ns: 'datasetCreation' })}
</span>
</div>
)}
</>
)}
</div>
</div>
</div>
</OptionCard>
)
}

View File

@@ -0,0 +1,5 @@
export { GeneralChunkingOptions } from './general-chunking-options'
export { IndexingModeSection } from './indexing-mode-section'
export { ParentChildOptions } from './parent-child-options'
export { PreviewPanel } from './preview-panel'
export { StepTwoFooter } from './step-two-footer'

View File

@@ -0,0 +1,253 @@
'use client'
import type { FC } from 'react'
import type { DefaultModel, Model } from '@/app/components/header/account-setting/model-provider-page/declarations'
import type { RetrievalConfig } from '@/types/app'
import Image from 'next/image'
import Link from 'next/link'
import { useTranslation } from 'react-i18next'
import Badge from '@/app/components/base/badge'
import Button from '@/app/components/base/button'
import CustomDialog from '@/app/components/base/dialog'
import Divider from '@/app/components/base/divider'
import { AlertTriangle } from '@/app/components/base/icons/src/vender/solid/alertsAndFeedback'
import Tooltip from '@/app/components/base/tooltip'
import EconomicalRetrievalMethodConfig from '@/app/components/datasets/common/economical-retrieval-method-config'
import RetrievalMethodConfig from '@/app/components/datasets/common/retrieval-method-config'
import ModelSelector from '@/app/components/header/account-setting/model-provider-page/model-selector'
import { useDocLink } from '@/context/i18n'
import { ChunkingMode } from '@/models/datasets'
import { cn } from '@/utils/classnames'
import { indexMethodIcon } from '../../icons'
import { IndexingType } from '../hooks'
import s from '../index.module.css'
import { OptionCard } from './option-card'
type IndexingModeSectionProps = {
// State
indexType: IndexingType
hasSetIndexType: boolean
docForm: ChunkingMode
embeddingModel: DefaultModel
embeddingModelList?: Model[]
retrievalConfig: RetrievalConfig
showMultiModalTip: boolean
// Flags
isModelAndRetrievalConfigDisabled: boolean
datasetId?: string
// Modal state
isQAConfirmDialogOpen: boolean
// Actions
onIndexTypeChange: (type: IndexingType) => void
onEmbeddingModelChange: (model: DefaultModel) => void
onRetrievalConfigChange: (config: RetrievalConfig) => void
onQAConfirmDialogClose: () => void
onQAConfirmDialogConfirm: () => void
}
export const IndexingModeSection: FC<IndexingModeSectionProps> = ({
indexType,
hasSetIndexType,
docForm,
embeddingModel,
embeddingModelList,
retrievalConfig,
showMultiModalTip,
isModelAndRetrievalConfigDisabled,
datasetId,
isQAConfirmDialogOpen,
onIndexTypeChange,
onEmbeddingModelChange,
onRetrievalConfigChange,
onQAConfirmDialogClose,
onQAConfirmDialogConfirm,
}) => {
const { t } = useTranslation()
const docLink = useDocLink()
const getIndexingTechnique = () => indexType
return (
<>
{/* Index Mode */}
<div className="system-md-semibold mb-1 text-text-secondary">
{t('stepTwo.indexMode', { ns: 'datasetCreation' })}
</div>
<div className="flex items-center gap-2">
{/* Qualified option */}
{(!hasSetIndexType || (hasSetIndexType && indexType === IndexingType.QUALIFIED)) && (
<OptionCard
className="flex-1 self-stretch"
title={(
<div className="flex items-center">
{t('stepTwo.qualified', { ns: 'datasetCreation' })}
<Badge
className={cn(
'ml-1 h-[18px]',
(!hasSetIndexType && indexType === IndexingType.QUALIFIED)
? 'border-text-accent-secondary text-text-accent-secondary'
: '',
)}
uppercase
>
{t('stepTwo.recommend', { ns: 'datasetCreation' })}
</Badge>
<span className="ml-auto">
{!hasSetIndexType && <span className={cn(s.radio)} />}
</span>
</div>
)}
description={t('stepTwo.qualifiedTip', { ns: 'datasetCreation' })}
icon={<Image src={indexMethodIcon.high_quality} alt="" />}
isActive={!hasSetIndexType && indexType === IndexingType.QUALIFIED}
disabled={hasSetIndexType}
onSwitched={() => onIndexTypeChange(IndexingType.QUALIFIED)}
/>
)}
{/* Economical option */}
{(!hasSetIndexType || (hasSetIndexType && indexType === IndexingType.ECONOMICAL)) && (
<>
<CustomDialog show={isQAConfirmDialogOpen} onClose={onQAConfirmDialogClose} className="w-[432px]">
<header className="mb-4 pt-6">
<h2 className="text-lg font-semibold text-text-primary">
{t('stepTwo.qaSwitchHighQualityTipTitle', { ns: 'datasetCreation' })}
</h2>
<p className="mt-2 text-sm font-normal text-text-secondary">
{t('stepTwo.qaSwitchHighQualityTipContent', { ns: 'datasetCreation' })}
</p>
</header>
<div className="flex gap-2 pb-6">
<Button className="ml-auto" onClick={onQAConfirmDialogClose}>
{t('stepTwo.cancel', { ns: 'datasetCreation' })}
</Button>
<Button variant="primary" onClick={onQAConfirmDialogConfirm}>
{t('stepTwo.switch', { ns: 'datasetCreation' })}
</Button>
</div>
</CustomDialog>
<Tooltip
popupContent={(
<div className="rounded-lg border-components-panel-border bg-components-tooltip-bg p-3 text-xs font-medium text-text-secondary shadow-lg">
{docForm === ChunkingMode.qa
? t('stepTwo.notAvailableForQA', { ns: 'datasetCreation' })
: t('stepTwo.notAvailableForParentChild', { ns: 'datasetCreation' })}
</div>
)}
noDecoration
position="top"
asChild={false}
triggerClassName="flex-1 self-stretch"
>
<OptionCard
className="h-full"
title={t('stepTwo.economical', { ns: 'datasetCreation' })}
description={t('stepTwo.economicalTip', { ns: 'datasetCreation' })}
icon={<Image src={indexMethodIcon.economical} alt="" />}
isActive={!hasSetIndexType && indexType === IndexingType.ECONOMICAL}
disabled={hasSetIndexType || docForm !== ChunkingMode.text}
onSwitched={() => onIndexTypeChange(IndexingType.ECONOMICAL)}
/>
</Tooltip>
</>
)}
</div>
{/* High quality tip */}
{!hasSetIndexType && indexType === IndexingType.QUALIFIED && (
<div className="mt-2 flex h-10 items-center gap-x-0.5 overflow-hidden rounded-xl border-[0.5px] border-components-panel-border bg-components-panel-bg-blur p-2 shadow-xs backdrop-blur-[5px]">
<div className="absolute bottom-0 left-0 right-0 top-0 bg-dataset-warning-message-bg opacity-40"></div>
<div className="p-1">
<AlertTriangle className="size-4 text-text-warning-secondary" />
</div>
<span className="system-xs-medium text-text-primary">
{t('stepTwo.highQualityTip', { ns: 'datasetCreation' })}
</span>
</div>
)}
{/* Economical index setting tip */}
{hasSetIndexType && indexType === IndexingType.ECONOMICAL && (
<div className="system-xs-medium mt-2 text-text-tertiary">
{t('stepTwo.indexSettingTip', { ns: 'datasetCreation' })}
<Link className="text-text-accent" href={`/datasets/${datasetId}/settings`}>
{t('stepTwo.datasetSettingLink', { ns: 'datasetCreation' })}
</Link>
</div>
)}
{/* Embedding model */}
{indexType === IndexingType.QUALIFIED && (
<div className="mt-5">
<div className={cn('system-md-semibold mb-1 text-text-secondary', datasetId && 'flex items-center justify-between')}>
{t('form.embeddingModel', { ns: 'datasetSettings' })}
</div>
<ModelSelector
readonly={isModelAndRetrievalConfigDisabled}
triggerClassName={isModelAndRetrievalConfigDisabled ? 'opacity-50' : ''}
defaultModel={embeddingModel}
modelList={embeddingModelList ?? []}
onSelect={onEmbeddingModelChange}
/>
{isModelAndRetrievalConfigDisabled && (
<div className="system-xs-medium mt-2 text-text-tertiary">
{t('stepTwo.indexSettingTip', { ns: 'datasetCreation' })}
<Link className="text-text-accent" href={`/datasets/${datasetId}/settings`}>
{t('stepTwo.datasetSettingLink', { ns: 'datasetCreation' })}
</Link>
</div>
)}
</div>
)}
<Divider className="my-5" />
{/* Retrieval Method Config */}
<div>
{!isModelAndRetrievalConfigDisabled
? (
<div className="mb-1">
<div className="system-md-semibold mb-0.5 text-text-secondary">
{t('form.retrievalSetting.title', { ns: 'datasetSettings' })}
</div>
<div className="body-xs-regular text-text-tertiary">
<a
target="_blank"
rel="noopener noreferrer"
href={docLink('/guides/knowledge-base/create-knowledge-and-upload-documents')}
className="text-text-accent"
>
{t('form.retrievalSetting.learnMore', { ns: 'datasetSettings' })}
</a>
{t('form.retrievalSetting.longDescription', { ns: 'datasetSettings' })}
</div>
</div>
)
: (
<div className={cn('system-md-semibold mb-0.5 text-text-secondary', 'flex items-center justify-between')}>
<div>{t('form.retrievalSetting.title', { ns: 'datasetSettings' })}</div>
</div>
)}
<div>
{getIndexingTechnique() === IndexingType.QUALIFIED
? (
<RetrievalMethodConfig
disabled={isModelAndRetrievalConfigDisabled}
value={retrievalConfig}
onChange={onRetrievalConfigChange}
showMultiModalTip={showMultiModalTip}
/>
)
: (
<EconomicalRetrievalMethodConfig
disabled={isModelAndRetrievalConfigDisabled}
value={retrievalConfig}
onChange={onRetrievalConfigChange}
/>
)}
</div>
</div>
</>
)
}

View File

@@ -0,0 +1,191 @@
'use client'
import type { FC } from 'react'
import type { ParentChildConfig } from '../hooks'
import type { ParentMode, PreProcessingRule } from '@/models/datasets'
import { RiSearchEyeLine } from '@remixicon/react'
import Image from 'next/image'
import { useTranslation } from 'react-i18next'
import Button from '@/app/components/base/button'
import Checkbox from '@/app/components/base/checkbox'
import Divider from '@/app/components/base/divider'
import { ParentChildChunk } from '@/app/components/base/icons/src/vender/knowledge'
import RadioCard from '@/app/components/base/radio-card'
import { ChunkingMode } from '@/models/datasets'
import FileList from '../../assets/file-list-3-fill.svg'
import Note from '../../assets/note-mod.svg'
import BlueEffect from '../../assets/option-card-effect-blue.svg'
import s from '../index.module.css'
import { DelimiterInput, MaxLengthInput } from './inputs'
import { OptionCard } from './option-card'
type TextLabelProps = {
children: React.ReactNode
}
const TextLabel: FC<TextLabelProps> = ({ children }) => {
return <label className="system-sm-semibold text-text-secondary">{children}</label>
}
type ParentChildOptionsProps = {
// State
parentChildConfig: ParentChildConfig
rules: PreProcessingRule[]
currentDocForm: ChunkingMode
// Flags
isActive: boolean
isInUpload: boolean
isNotUploadInEmptyDataset: boolean
// Actions
onDocFormChange: (form: ChunkingMode) => void
onChunkForContextChange: (mode: ParentMode) => void
onParentDelimiterChange: (value: string) => void
onParentMaxLengthChange: (value: number) => void
onChildDelimiterChange: (value: string) => void
onChildMaxLengthChange: (value: number) => void
onRuleToggle: (id: string) => void
onPreview: () => void
onReset: () => void
}
export const ParentChildOptions: FC<ParentChildOptionsProps> = ({
parentChildConfig,
rules,
currentDocForm: _currentDocForm,
isActive,
isInUpload,
isNotUploadInEmptyDataset,
onDocFormChange,
onChunkForContextChange,
onParentDelimiterChange,
onParentMaxLengthChange,
onChildDelimiterChange,
onChildMaxLengthChange,
onRuleToggle,
onPreview,
onReset,
}) => {
const { t } = useTranslation()
const getRuleName = (key: string): string => {
const ruleNameMap: Record<string, string> = {
remove_extra_spaces: t('stepTwo.removeExtraSpaces', { ns: 'datasetCreation' }),
remove_urls_emails: t('stepTwo.removeUrlEmails', { ns: 'datasetCreation' }),
remove_stopwords: t('stepTwo.removeStopwords', { ns: 'datasetCreation' }),
}
return ruleNameMap[key] ?? key
}
return (
<OptionCard
title={t('stepTwo.parentChild', { ns: 'datasetCreation' })}
icon={<ParentChildChunk className="h-[20px] w-[20px]" />}
effectImg={BlueEffect.src}
className="text-util-colors-blue-light-blue-light-500"
activeHeaderClassName="bg-dataset-option-card-blue-gradient"
description={t('stepTwo.parentChildTip', { ns: 'datasetCreation' })}
isActive={isActive}
onSwitched={() => onDocFormChange(ChunkingMode.parentChild)}
actions={(
<>
<Button variant="secondary-accent" onClick={onPreview}>
<RiSearchEyeLine className="mr-0.5 h-4 w-4" />
{t('stepTwo.previewChunk', { ns: 'datasetCreation' })}
</Button>
<Button variant="ghost" onClick={onReset}>
{t('stepTwo.reset', { ns: 'datasetCreation' })}
</Button>
</>
)}
noHighlight={isInUpload && isNotUploadInEmptyDataset}
>
<div className="flex flex-col gap-4">
{/* Parent chunk for context */}
<div>
<div className="flex items-center gap-x-2">
<div className="inline-flex shrink-0">
<TextLabel>{t('stepTwo.parentChunkForContext', { ns: 'datasetCreation' })}</TextLabel>
</div>
<Divider className="grow" bgStyle="gradient" />
</div>
<RadioCard
className="mt-1"
icon={<Image src={Note} alt="" />}
title={t('stepTwo.paragraph', { ns: 'datasetCreation' })}
description={t('stepTwo.paragraphTip', { ns: 'datasetCreation' })}
isChosen={parentChildConfig.chunkForContext === 'paragraph'}
onChosen={() => onChunkForContextChange('paragraph')}
chosenConfig={(
<div className="flex gap-3">
<DelimiterInput
value={parentChildConfig.parent.delimiter}
tooltip={t('stepTwo.parentChildDelimiterTip', { ns: 'datasetCreation' })!}
onChange={e => onParentDelimiterChange(e.target.value)}
/>
<MaxLengthInput
unit="characters"
value={parentChildConfig.parent.maxLength}
onChange={onParentMaxLengthChange}
/>
</div>
)}
/>
<RadioCard
className="mt-2"
icon={<Image src={FileList} alt="" />}
title={t('stepTwo.fullDoc', { ns: 'datasetCreation' })}
description={t('stepTwo.fullDocTip', { ns: 'datasetCreation' })}
onChosen={() => onChunkForContextChange('full-doc')}
isChosen={parentChildConfig.chunkForContext === 'full-doc'}
/>
</div>
{/* Child chunk for retrieval */}
<div>
<div className="flex items-center gap-x-2">
<div className="inline-flex shrink-0">
<TextLabel>{t('stepTwo.childChunkForRetrieval', { ns: 'datasetCreation' })}</TextLabel>
</div>
<Divider className="grow" bgStyle="gradient" />
</div>
<div className="mt-1 flex gap-3">
<DelimiterInput
value={parentChildConfig.child.delimiter}
tooltip={t('stepTwo.parentChildChunkDelimiterTip', { ns: 'datasetCreation' })!}
onChange={e => onChildDelimiterChange(e.target.value)}
/>
<MaxLengthInput
unit="characters"
value={parentChildConfig.child.maxLength}
onChange={onChildMaxLengthChange}
/>
</div>
</div>
{/* Rules */}
<div>
<div className="flex items-center gap-x-2">
<div className="inline-flex shrink-0">
<TextLabel>{t('stepTwo.rules', { ns: 'datasetCreation' })}</TextLabel>
</div>
<Divider className="grow" bgStyle="gradient" />
</div>
<div className="mt-1">
{rules.map(rule => (
<div
key={rule.id}
className={s.ruleItem}
onClick={() => onRuleToggle(rule.id)}
>
<Checkbox checked={rule.enabled} />
<label className="system-sm-regular ml-2 cursor-pointer text-text-secondary">
{getRuleName(rule.id)}
</label>
</div>
))}
</div>
</div>
</div>
</OptionCard>
)
}

View File

@@ -0,0 +1,171 @@
'use client'
import type { FC } from 'react'
import type { ParentChildConfig } from '../hooks'
import type { DataSourceType, FileIndexingEstimateResponse } from '@/models/datasets'
import { RiSearchEyeLine } from '@remixicon/react'
import { noop } from 'es-toolkit/function'
import { useTranslation } from 'react-i18next'
import Badge from '@/app/components/base/badge'
import FloatRightContainer from '@/app/components/base/float-right-container'
import { SkeletonContainer, SkeletonPoint, SkeletonRectangle, SkeletonRow } from '@/app/components/base/skeleton'
import { FULL_DOC_PREVIEW_LENGTH } from '@/config'
import { ChunkingMode } from '@/models/datasets'
import { cn } from '@/utils/classnames'
import { ChunkContainer, QAPreview } from '../../../chunk'
import PreviewDocumentPicker from '../../../common/document-picker/preview-document-picker'
import { PreviewSlice } from '../../../formatted-text/flavours/preview-slice'
import { FormattedText } from '../../../formatted-text/formatted'
import PreviewContainer from '../../../preview/container'
import { PreviewHeader } from '../../../preview/header'
type PreviewPanelProps = {
// State
isMobile: boolean
dataSourceType: DataSourceType
currentDocForm: ChunkingMode
estimate?: FileIndexingEstimateResponse
parentChildConfig: ParentChildConfig
isSetting?: boolean
// Picker
pickerFiles: Array<{ id: string, name: string, extension: string }>
pickerValue: { id: string, name: string, extension: string }
// Mutation state
isIdle: boolean
isPending: boolean
// Actions
onPickerChange: (selected: { id: string, name: string }) => void
}
export const PreviewPanel: FC<PreviewPanelProps> = ({
isMobile,
dataSourceType: _dataSourceType,
currentDocForm,
estimate,
parentChildConfig,
isSetting,
pickerFiles,
pickerValue,
isIdle,
isPending,
onPickerChange,
}) => {
const { t } = useTranslation()
return (
<FloatRightContainer isMobile={isMobile} isOpen={true} onClose={noop} footer={null}>
<PreviewContainer
header={(
<PreviewHeader title={t('stepTwo.preview', { ns: 'datasetCreation' })}>
<div className="flex items-center gap-1">
<PreviewDocumentPicker
files={pickerFiles as Array<Required<{ id: string, name: string, extension: string }>>}
onChange={onPickerChange}
value={isSetting ? pickerFiles[0] : pickerValue}
/>
{currentDocForm !== ChunkingMode.qa && (
<Badge
text={t('stepTwo.previewChunkCount', {
ns: 'datasetCreation',
count: estimate?.total_segments || 0,
}) as string}
/>
)}
</div>
</PreviewHeader>
)}
className={cn('relative flex h-full w-1/2 shrink-0 p-4 pr-0', isMobile && 'w-full max-w-[524px]')}
mainClassName="space-y-6"
>
{/* QA Preview */}
{currentDocForm === ChunkingMode.qa && estimate?.qa_preview && (
estimate.qa_preview.map((item, index) => (
<ChunkContainer
key={item.question}
label={`Chunk-${index + 1}`}
characterCount={item.question.length + item.answer.length}
>
<QAPreview qa={item} />
</ChunkContainer>
))
)}
{/* Text Preview */}
{currentDocForm === ChunkingMode.text && estimate?.preview && (
estimate.preview.map((item, index) => (
<ChunkContainer
key={item.content}
label={`Chunk-${index + 1}`}
characterCount={item.content.length}
>
{item.content}
</ChunkContainer>
))
)}
{/* Parent-Child Preview */}
{currentDocForm === ChunkingMode.parentChild && estimate?.preview && (
estimate.preview.map((item, index) => {
const indexForLabel = index + 1
const childChunks = parentChildConfig.chunkForContext === 'full-doc'
? item.child_chunks.slice(0, FULL_DOC_PREVIEW_LENGTH)
: item.child_chunks
return (
<ChunkContainer
key={item.content}
label={`Chunk-${indexForLabel}`}
characterCount={item.content.length}
>
<FormattedText>
{childChunks.map((child, childIndex) => {
const childIndexForLabel = childIndex + 1
return (
<PreviewSlice
key={`C-${childIndexForLabel}-${child}`}
label={`C-${childIndexForLabel}`}
text={child}
tooltip={`Child-chunk-${childIndexForLabel} · ${child.length} Characters`}
labelInnerClassName="text-[10px] font-semibold align-bottom leading-7"
dividerClassName="leading-7"
/>
)
})}
</FormattedText>
</ChunkContainer>
)
})
)}
{/* Idle State */}
{isIdle && (
<div className="flex h-full w-full items-center justify-center">
<div className="flex flex-col items-center justify-center gap-3">
<RiSearchEyeLine className="size-10 text-text-empty-state-icon" />
<p className="text-sm text-text-tertiary">
{t('stepTwo.previewChunkTip', { ns: 'datasetCreation' })}
</p>
</div>
</div>
)}
{/* Loading State */}
{isPending && (
<div className="space-y-6">
{Array.from({ length: 10 }, (_, i) => (
<SkeletonContainer key={i}>
<SkeletonRow>
<SkeletonRectangle className="w-20" />
<SkeletonPoint />
<SkeletonRectangle className="w-24" />
</SkeletonRow>
<SkeletonRectangle className="w-full" />
<SkeletonRectangle className="w-full" />
<SkeletonRectangle className="w-[422px]" />
</SkeletonContainer>
))}
</div>
)}
</PreviewContainer>
</FloatRightContainer>
)
}

View File

@@ -0,0 +1,58 @@
'use client'
import type { FC } from 'react'
import { RiArrowLeftLine } from '@remixicon/react'
import { useTranslation } from 'react-i18next'
import Button from '@/app/components/base/button'
type StepTwoFooterProps = {
isSetting?: boolean
isCreating: boolean
onPrevious: () => void
onCreate: () => void
onCancel?: () => void
}
export const StepTwoFooter: FC<StepTwoFooterProps> = ({
isSetting,
isCreating,
onPrevious,
onCreate,
onCancel,
}) => {
const { t } = useTranslation()
if (!isSetting) {
return (
<div className="mt-8 flex items-center py-2">
<Button onClick={onPrevious}>
<RiArrowLeftLine className="mr-1 h-4 w-4" />
{t('stepTwo.previousStep', { ns: 'datasetCreation' })}
</Button>
<Button
className="ml-auto"
loading={isCreating}
variant="primary"
onClick={onCreate}
>
{t('stepTwo.nextStep', { ns: 'datasetCreation' })}
</Button>
</div>
)
}
return (
<div className="mt-8 flex items-center py-2">
<Button
loading={isCreating}
variant="primary"
onClick={onCreate}
>
{t('stepTwo.save', { ns: 'datasetCreation' })}
</Button>
<Button className="ml-2" onClick={onCancel}>
{t('stepTwo.cancel', { ns: 'datasetCreation' })}
</Button>
</div>
)
}

View File

@@ -0,0 +1,14 @@
export { useDocumentCreation } from './use-document-creation'
export type { DocumentCreation, ValidationParams } from './use-document-creation'
export { IndexingType, useIndexingConfig } from './use-indexing-config'
export type { IndexingConfig } from './use-indexing-config'
export { useIndexingEstimate } from './use-indexing-estimate'
export type { IndexingEstimate } from './use-indexing-estimate'
export { usePreviewState } from './use-preview-state'
export type { PreviewState } from './use-preview-state'
export { DEFAULT_MAXIMUM_CHUNK_LENGTH, DEFAULT_OVERLAP, DEFAULT_SEGMENT_IDENTIFIER, defaultParentChildConfig, MAXIMUM_CHUNK_TOKEN_LENGTH, useSegmentationState } from './use-segmentation-state'
export type { ParentChildConfig, SegmentationState } from './use-segmentation-state'

View File

@@ -0,0 +1,279 @@
import type { DefaultModel, Model } from '@/app/components/header/account-setting/model-provider-page/declarations'
import type { NotionPage } from '@/models/common'
import type {
ChunkingMode,
CrawlOptions,
CrawlResultItem,
CreateDocumentReq,
createDocumentResponse,
CustomFile,
FullDocumentDetail,
ProcessRule,
} from '@/models/datasets'
import type { RetrievalConfig, RETRIEVE_METHOD } from '@/types/app'
import { useCallback } from 'react'
import { useTranslation } from 'react-i18next'
import { trackEvent } from '@/app/components/base/amplitude'
import Toast from '@/app/components/base/toast'
import { isReRankModelSelected } from '@/app/components/datasets/common/check-rerank-model'
import { DataSourceProvider } from '@/models/common'
import {
DataSourceType,
} from '@/models/datasets'
import { getNotionInfo, getWebsiteInfo, useCreateDocument, useCreateFirstDocument } from '@/service/knowledge/use-create-dataset'
import { useInvalidDatasetList } from '@/service/knowledge/use-dataset'
import { IndexingType } from './use-indexing-config'
import { MAXIMUM_CHUNK_TOKEN_LENGTH } from './use-segmentation-state'
export type UseDocumentCreationOptions = {
datasetId?: string
isSetting?: boolean
documentDetail?: FullDocumentDetail
dataSourceType: DataSourceType
files: CustomFile[]
notionPages: NotionPage[]
notionCredentialId: string
websitePages: CrawlResultItem[]
crawlOptions?: CrawlOptions
websiteCrawlProvider?: DataSourceProvider
websiteCrawlJobId?: string
// Callbacks
onStepChange?: (delta: number) => void
updateIndexingTypeCache?: (type: string) => void
updateResultCache?: (res: createDocumentResponse) => void
updateRetrievalMethodCache?: (method: RETRIEVE_METHOD | '') => void
onSave?: () => void
mutateDatasetRes?: () => void
}
export type ValidationParams = {
segmentationType: string
maxChunkLength: number
limitMaxChunkLength: number
overlap: number
indexType: IndexingType
embeddingModel: DefaultModel
rerankModelList: Model[]
retrievalConfig: RetrievalConfig
}
export const useDocumentCreation = (options: UseDocumentCreationOptions) => {
const { t } = useTranslation()
const {
datasetId,
isSetting,
documentDetail,
dataSourceType,
files,
notionPages,
notionCredentialId,
websitePages,
crawlOptions,
websiteCrawlProvider = DataSourceProvider.jinaReader,
websiteCrawlJobId = '',
onStepChange,
updateIndexingTypeCache,
updateResultCache,
updateRetrievalMethodCache,
onSave,
mutateDatasetRes,
} = options
const createFirstDocumentMutation = useCreateFirstDocument()
const createDocumentMutation = useCreateDocument(datasetId!)
const invalidDatasetList = useInvalidDatasetList()
const isCreating = createFirstDocumentMutation.isPending || createDocumentMutation.isPending
// Validate creation params
const validateParams = useCallback((params: ValidationParams): boolean => {
const {
segmentationType,
maxChunkLength,
limitMaxChunkLength,
overlap,
indexType,
embeddingModel,
rerankModelList,
retrievalConfig,
} = params
if (segmentationType === 'general' && overlap > maxChunkLength) {
Toast.notify({ type: 'error', message: t('stepTwo.overlapCheck', { ns: 'datasetCreation' }) })
return false
}
if (segmentationType === 'general' && maxChunkLength > limitMaxChunkLength) {
Toast.notify({
type: 'error',
message: t('stepTwo.maxLengthCheck', { ns: 'datasetCreation', limit: limitMaxChunkLength }),
})
return false
}
if (!isSetting) {
if (indexType === IndexingType.QUALIFIED && (!embeddingModel.model || !embeddingModel.provider)) {
Toast.notify({
type: 'error',
message: t('datasetConfig.embeddingModelRequired', { ns: 'appDebug' }),
})
return false
}
if (!isReRankModelSelected({
rerankModelList,
retrievalConfig,
indexMethod: indexType,
})) {
Toast.notify({ type: 'error', message: t('datasetConfig.rerankModelRequired', { ns: 'appDebug' }) })
return false
}
}
return true
}, [t, isSetting])
// Build creation params
const buildCreationParams = useCallback((
currentDocForm: ChunkingMode,
docLanguage: string,
processRule: ProcessRule,
retrievalConfig: RetrievalConfig,
embeddingModel: DefaultModel,
indexingTechnique: string,
): CreateDocumentReq | null => {
if (isSetting) {
return {
original_document_id: documentDetail?.id,
doc_form: currentDocForm,
doc_language: docLanguage,
process_rule: processRule,
retrieval_model: retrievalConfig,
embedding_model: embeddingModel.model,
embedding_model_provider: embeddingModel.provider,
indexing_technique: indexingTechnique,
} as CreateDocumentReq
}
const params: CreateDocumentReq = {
data_source: {
type: dataSourceType,
info_list: {
data_source_type: dataSourceType,
},
},
indexing_technique: indexingTechnique,
process_rule: processRule,
doc_form: currentDocForm,
doc_language: docLanguage,
retrieval_model: retrievalConfig,
embedding_model: embeddingModel.model,
embedding_model_provider: embeddingModel.provider,
} as CreateDocumentReq
// Add data source specific info
if (dataSourceType === DataSourceType.FILE) {
params.data_source!.info_list.file_info_list = {
file_ids: files.map(file => file.id || '').filter(Boolean),
}
}
if (dataSourceType === DataSourceType.NOTION)
params.data_source!.info_list.notion_info_list = getNotionInfo(notionPages, notionCredentialId)
if (dataSourceType === DataSourceType.WEB) {
params.data_source!.info_list.website_info_list = getWebsiteInfo({
websiteCrawlProvider,
websiteCrawlJobId,
websitePages,
crawlOptions,
})
}
return params
}, [
isSetting,
documentDetail,
dataSourceType,
files,
notionPages,
notionCredentialId,
websitePages,
websiteCrawlProvider,
websiteCrawlJobId,
crawlOptions,
])
// Execute creation
const executeCreation = useCallback(async (
params: CreateDocumentReq,
indexType: IndexingType,
retrievalConfig: RetrievalConfig,
) => {
if (!datasetId) {
await createFirstDocumentMutation.mutateAsync(params, {
onSuccess(data) {
updateIndexingTypeCache?.(indexType)
updateResultCache?.(data)
updateRetrievalMethodCache?.(retrievalConfig.search_method as RETRIEVE_METHOD)
},
})
}
else {
await createDocumentMutation.mutateAsync(params, {
onSuccess(data) {
updateIndexingTypeCache?.(indexType)
updateResultCache?.(data)
updateRetrievalMethodCache?.(retrievalConfig.search_method as RETRIEVE_METHOD)
},
})
}
mutateDatasetRes?.()
invalidDatasetList()
trackEvent('create_datasets', {
data_source_type: dataSourceType,
indexing_technique: indexType,
})
onStepChange?.(+1)
if (isSetting)
onSave?.()
}, [
datasetId,
createFirstDocumentMutation,
createDocumentMutation,
updateIndexingTypeCache,
updateResultCache,
updateRetrievalMethodCache,
mutateDatasetRes,
invalidDatasetList,
dataSourceType,
onStepChange,
isSetting,
onSave,
])
// Validate preview params
const validatePreviewParams = useCallback((maxChunkLength: number): boolean => {
if (maxChunkLength > MAXIMUM_CHUNK_TOKEN_LENGTH) {
Toast.notify({
type: 'error',
message: t('stepTwo.maxLengthCheck', { ns: 'datasetCreation', limit: MAXIMUM_CHUNK_TOKEN_LENGTH }),
})
return false
}
return true
}, [t])
return {
isCreating,
validateParams,
buildCreationParams,
executeCreation,
validatePreviewParams,
}
}
export type DocumentCreation = ReturnType<typeof useDocumentCreation>

View File

@@ -0,0 +1,143 @@
import type { DefaultModel } from '@/app/components/header/account-setting/model-provider-page/declarations'
import type { RetrievalConfig } from '@/types/app'
import { useEffect, useMemo, useState } from 'react'
import { checkShowMultiModalTip } from '@/app/components/datasets/settings/utils'
import { ModelTypeEnum } from '@/app/components/header/account-setting/model-provider-page/declarations'
import { useDefaultModel, useModelList, useModelListAndDefaultModelAndCurrentProviderAndModel } from '@/app/components/header/account-setting/model-provider-page/hooks'
import { RETRIEVE_METHOD } from '@/types/app'
export enum IndexingType {
QUALIFIED = 'high_quality',
ECONOMICAL = 'economy',
}
const DEFAULT_RETRIEVAL_CONFIG: RetrievalConfig = {
search_method: RETRIEVE_METHOD.semantic,
reranking_enable: false,
reranking_model: {
reranking_provider_name: '',
reranking_model_name: '',
},
top_k: 3,
score_threshold_enabled: false,
score_threshold: 0.5,
}
export type UseIndexingConfigOptions = {
initialIndexType?: IndexingType
initialEmbeddingModel?: DefaultModel
initialRetrievalConfig?: RetrievalConfig
isAPIKeySet: boolean
hasSetIndexType: boolean
}
export const useIndexingConfig = (options: UseIndexingConfigOptions) => {
const {
initialIndexType,
initialEmbeddingModel,
initialRetrievalConfig,
isAPIKeySet,
hasSetIndexType,
} = options
// Rerank model
const {
modelList: rerankModelList,
defaultModel: rerankDefaultModel,
currentModel: isRerankDefaultModelValid,
} = useModelListAndDefaultModelAndCurrentProviderAndModel(ModelTypeEnum.rerank)
// Embedding model list
const { data: embeddingModelList } = useModelList(ModelTypeEnum.textEmbedding)
const { data: defaultEmbeddingModel } = useDefaultModel(ModelTypeEnum.textEmbedding)
// Index type state
const [indexType, setIndexType] = useState<IndexingType>(() => {
if (initialIndexType)
return initialIndexType
return isAPIKeySet ? IndexingType.QUALIFIED : IndexingType.ECONOMICAL
})
// Embedding model state
const [embeddingModel, setEmbeddingModel] = useState<DefaultModel>(
initialEmbeddingModel ?? {
provider: defaultEmbeddingModel?.provider.provider || '',
model: defaultEmbeddingModel?.model || '',
},
)
// Retrieval config state
const [retrievalConfig, setRetrievalConfig] = useState<RetrievalConfig>(
initialRetrievalConfig ?? DEFAULT_RETRIEVAL_CONFIG,
)
// Sync retrieval config with rerank model when available
useEffect(() => {
if (initialRetrievalConfig)
return
setRetrievalConfig({
search_method: RETRIEVE_METHOD.semantic,
reranking_enable: !!isRerankDefaultModelValid,
reranking_model: {
reranking_provider_name: isRerankDefaultModelValid ? rerankDefaultModel?.provider.provider ?? '' : '',
reranking_model_name: isRerankDefaultModelValid ? rerankDefaultModel?.model ?? '' : '',
},
top_k: 3,
score_threshold_enabled: false,
score_threshold: 0.5,
})
}, [rerankDefaultModel, isRerankDefaultModelValid, initialRetrievalConfig])
// Sync index type with props
useEffect(() => {
if (initialIndexType)
setIndexType(initialIndexType)
else
setIndexType(isAPIKeySet ? IndexingType.QUALIFIED : IndexingType.ECONOMICAL)
}, [isAPIKeySet, initialIndexType])
// Show multimodal tip
const showMultiModalTip = useMemo(() => {
return checkShowMultiModalTip({
embeddingModel,
rerankingEnable: retrievalConfig.reranking_enable,
rerankModel: {
rerankingProviderName: retrievalConfig.reranking_model.reranking_provider_name,
rerankingModelName: retrievalConfig.reranking_model.reranking_model_name,
},
indexMethod: indexType,
embeddingModelList,
rerankModelList,
})
}, [embeddingModel, retrievalConfig, indexType, embeddingModelList, rerankModelList])
// Get effective indexing technique
const getIndexingTechnique = () => initialIndexType || indexType
return {
// Index type
indexType,
setIndexType,
hasSetIndexType,
getIndexingTechnique,
// Embedding model
embeddingModel,
setEmbeddingModel,
embeddingModelList,
defaultEmbeddingModel,
// Retrieval config
retrievalConfig,
setRetrievalConfig,
rerankModelList,
rerankDefaultModel,
isRerankDefaultModelValid,
// Computed
showMultiModalTip,
}
}
export type IndexingConfig = ReturnType<typeof useIndexingConfig>

View File

@@ -0,0 +1,123 @@
import type { IndexingType } from './use-indexing-config'
import type { NotionPage } from '@/models/common'
import type { ChunkingMode, CrawlOptions, CrawlResultItem, CustomFile, ProcessRule } from '@/models/datasets'
import { useCallback } from 'react'
import { DataSourceProvider } from '@/models/common'
import { DataSourceType } from '@/models/datasets'
import {
useFetchFileIndexingEstimateForFile,
useFetchFileIndexingEstimateForNotion,
useFetchFileIndexingEstimateForWeb,
} from '@/service/knowledge/use-create-dataset'
export type UseIndexingEstimateOptions = {
dataSourceType: DataSourceType
datasetId?: string
// Document settings
currentDocForm: ChunkingMode
docLanguage: string
// File data source
files: CustomFile[]
previewFileName?: string
// Notion data source
previewNotionPage: NotionPage
notionCredentialId: string
// Website data source
previewWebsitePage: CrawlResultItem
crawlOptions?: CrawlOptions
websiteCrawlProvider?: DataSourceProvider
websiteCrawlJobId?: string
// Processing
indexingTechnique: IndexingType
processRule: ProcessRule
}
export const useIndexingEstimate = (options: UseIndexingEstimateOptions) => {
const {
dataSourceType,
datasetId,
currentDocForm,
docLanguage,
files,
previewFileName,
previewNotionPage,
notionCredentialId,
previewWebsitePage,
crawlOptions,
websiteCrawlProvider,
websiteCrawlJobId,
indexingTechnique,
processRule,
} = options
// File indexing estimate
const fileQuery = useFetchFileIndexingEstimateForFile({
docForm: currentDocForm,
docLanguage,
dataSourceType: DataSourceType.FILE,
files: previewFileName
? [files.find(file => file.name === previewFileName)!]
: files,
indexingTechnique,
processRule,
dataset_id: datasetId!,
})
// Notion indexing estimate
const notionQuery = useFetchFileIndexingEstimateForNotion({
docForm: currentDocForm,
docLanguage,
dataSourceType: DataSourceType.NOTION,
notionPages: [previewNotionPage],
indexingTechnique,
processRule,
dataset_id: datasetId || '',
credential_id: notionCredentialId,
})
// Website indexing estimate
const websiteQuery = useFetchFileIndexingEstimateForWeb({
docForm: currentDocForm,
docLanguage,
dataSourceType: DataSourceType.WEB,
websitePages: [previewWebsitePage],
crawlOptions,
websiteCrawlProvider: websiteCrawlProvider ?? DataSourceProvider.jinaReader,
websiteCrawlJobId: websiteCrawlJobId ?? '',
indexingTechnique,
processRule,
dataset_id: datasetId || '',
})
// Get current mutation based on data source type
const getCurrentMutation = useCallback(() => {
if (dataSourceType === DataSourceType.FILE)
return fileQuery
if (dataSourceType === DataSourceType.NOTION)
return notionQuery
return websiteQuery
}, [dataSourceType, fileQuery, notionQuery, websiteQuery])
const currentMutation = getCurrentMutation()
// Trigger estimate fetch
const fetchEstimate = useCallback(() => {
if (dataSourceType === DataSourceType.FILE)
fileQuery.mutate()
else if (dataSourceType === DataSourceType.NOTION)
notionQuery.mutate()
else
websiteQuery.mutate()
}, [dataSourceType, fileQuery, notionQuery, websiteQuery])
return {
currentMutation,
estimate: currentMutation.data,
isIdle: currentMutation.isIdle,
isPending: currentMutation.isPending,
fetchEstimate,
reset: currentMutation.reset,
}
}
export type IndexingEstimate = ReturnType<typeof useIndexingEstimate>

View File

@@ -0,0 +1,127 @@
import type { NotionPage } from '@/models/common'
import type { CrawlResultItem, CustomFile, DocumentItem, FullDocumentDetail } from '@/models/datasets'
import { useCallback, useState } from 'react'
import { DataSourceType } from '@/models/datasets'
export type UsePreviewStateOptions = {
dataSourceType: DataSourceType
files: CustomFile[]
notionPages: NotionPage[]
websitePages: CrawlResultItem[]
documentDetail?: FullDocumentDetail
datasetId?: string
}
export const usePreviewState = (options: UsePreviewStateOptions) => {
const {
dataSourceType,
files,
notionPages,
websitePages,
documentDetail,
datasetId,
} = options
// File preview state
const [previewFile, setPreviewFile] = useState<DocumentItem>(
(datasetId && documentDetail)
? documentDetail.file
: files[0],
)
// Notion page preview state
const [previewNotionPage, setPreviewNotionPage] = useState<NotionPage>(
(datasetId && documentDetail)
? documentDetail.notion_page
: notionPages[0],
)
// Website page preview state
const [previewWebsitePage, setPreviewWebsitePage] = useState<CrawlResultItem>(
(datasetId && documentDetail)
? documentDetail.website_page
: websitePages[0],
)
// Get preview items for document picker based on data source type
const getPreviewPickerItems = useCallback(() => {
if (dataSourceType === DataSourceType.FILE) {
return files as Array<Required<CustomFile>>
}
if (dataSourceType === DataSourceType.NOTION) {
return notionPages.map(page => ({
id: page.page_id,
name: page.page_name,
extension: 'md',
}))
}
if (dataSourceType === DataSourceType.WEB) {
return websitePages.map(page => ({
id: page.source_url,
name: page.title,
extension: 'md',
}))
}
return []
}, [dataSourceType, files, notionPages, websitePages])
// Get current preview value for picker
const getPreviewPickerValue = useCallback(() => {
if (dataSourceType === DataSourceType.FILE) {
return previewFile as Required<CustomFile>
}
if (dataSourceType === DataSourceType.NOTION) {
return {
id: previewNotionPage?.page_id || '',
name: previewNotionPage?.page_name || '',
extension: 'md',
}
}
if (dataSourceType === DataSourceType.WEB) {
return {
id: previewWebsitePage?.source_url || '',
name: previewWebsitePage?.title || '',
extension: 'md',
}
}
return { id: '', name: '', extension: '' }
}, [dataSourceType, previewFile, previewNotionPage, previewWebsitePage])
// Handle preview change
const handlePreviewChange = useCallback((selected: { id: string, name: string }) => {
if (dataSourceType === DataSourceType.FILE) {
setPreviewFile(selected as DocumentItem)
}
else if (dataSourceType === DataSourceType.NOTION) {
const selectedPage = notionPages.find(page => page.page_id === selected.id)
if (selectedPage)
setPreviewNotionPage(selectedPage)
}
else if (dataSourceType === DataSourceType.WEB) {
const selectedPage = websitePages.find(page => page.source_url === selected.id)
if (selectedPage)
setPreviewWebsitePage(selectedPage)
}
}, [dataSourceType, notionPages, websitePages])
return {
// File preview
previewFile,
setPreviewFile,
// Notion preview
previewNotionPage,
setPreviewNotionPage,
// Website preview
previewWebsitePage,
setPreviewWebsitePage,
// Picker helpers
getPreviewPickerItems,
getPreviewPickerValue,
handlePreviewChange,
}
}
export type PreviewState = ReturnType<typeof usePreviewState>

View File

@@ -0,0 +1,222 @@
import type { ParentMode, PreProcessingRule, ProcessRule, Rules } from '@/models/datasets'
import { useCallback, useState } from 'react'
import { ChunkingMode, ProcessMode } from '@/models/datasets'
import escape from './escape'
import unescape from './unescape'
// Constants
export const DEFAULT_SEGMENT_IDENTIFIER = '\\n\\n'
export const DEFAULT_MAXIMUM_CHUNK_LENGTH = 1024
export const DEFAULT_OVERLAP = 50
export const MAXIMUM_CHUNK_TOKEN_LENGTH = Number.parseInt(
globalThis.document?.body?.getAttribute('data-public-indexing-max-segmentation-tokens-length') || '4000',
10,
)
export type ParentChildConfig = {
chunkForContext: ParentMode
parent: {
delimiter: string
maxLength: number
}
child: {
delimiter: string
maxLength: number
}
}
export const defaultParentChildConfig: ParentChildConfig = {
chunkForContext: 'paragraph',
parent: {
delimiter: '\\n\\n',
maxLength: 1024,
},
child: {
delimiter: '\\n',
maxLength: 512,
},
}
export type UseSegmentationStateOptions = {
initialSegmentationType?: ProcessMode
}
export const useSegmentationState = (options: UseSegmentationStateOptions = {}) => {
const { initialSegmentationType } = options
// Segmentation type (general or parent-child)
const [segmentationType, setSegmentationType] = useState<ProcessMode>(
initialSegmentationType ?? ProcessMode.general,
)
// General chunking settings
const [segmentIdentifier, doSetSegmentIdentifier] = useState(DEFAULT_SEGMENT_IDENTIFIER)
const [maxChunkLength, setMaxChunkLength] = useState(DEFAULT_MAXIMUM_CHUNK_LENGTH)
const [limitMaxChunkLength, setLimitMaxChunkLength] = useState(MAXIMUM_CHUNK_TOKEN_LENGTH)
const [overlap, setOverlap] = useState(DEFAULT_OVERLAP)
// Pre-processing rules
const [rules, setRules] = useState<PreProcessingRule[]>([])
const [defaultConfig, setDefaultConfig] = useState<Rules>()
// Parent-child config
const [parentChildConfig, setParentChildConfig] = useState<ParentChildConfig>(defaultParentChildConfig)
// Escaped segment identifier setter
const setSegmentIdentifier = useCallback((value: string, canEmpty?: boolean) => {
if (value) {
doSetSegmentIdentifier(escape(value))
}
else {
doSetSegmentIdentifier(canEmpty ? '' : DEFAULT_SEGMENT_IDENTIFIER)
}
}, [])
// Rule toggle handler
const toggleRule = useCallback((id: string) => {
setRules(prev => prev.map(rule =>
rule.id === id ? { ...rule, enabled: !rule.enabled } : rule,
))
}, [])
// Reset to defaults
const resetToDefaults = useCallback(() => {
if (defaultConfig) {
setSegmentIdentifier(defaultConfig.segmentation.separator)
setMaxChunkLength(defaultConfig.segmentation.max_tokens)
setOverlap(defaultConfig.segmentation.chunk_overlap!)
setRules(defaultConfig.pre_processing_rules)
}
setParentChildConfig(defaultParentChildConfig)
}, [defaultConfig, setSegmentIdentifier])
// Apply config from document detail
const applyConfigFromRules = useCallback((rulesConfig: Rules, isHierarchical: boolean) => {
const separator = rulesConfig.segmentation.separator
const max = rulesConfig.segmentation.max_tokens
const chunkOverlap = rulesConfig.segmentation.chunk_overlap
setSegmentIdentifier(separator)
setMaxChunkLength(max)
setOverlap(chunkOverlap!)
setRules(rulesConfig.pre_processing_rules)
setDefaultConfig(rulesConfig)
if (isHierarchical) {
setParentChildConfig({
chunkForContext: rulesConfig.parent_mode || 'paragraph',
parent: {
delimiter: escape(rulesConfig.segmentation.separator),
maxLength: rulesConfig.segmentation.max_tokens,
},
child: {
delimiter: escape(rulesConfig.subchunk_segmentation!.separator),
maxLength: rulesConfig.subchunk_segmentation!.max_tokens,
},
})
}
}, [setSegmentIdentifier])
// Get process rule for API
const getProcessRule = useCallback((docForm: ChunkingMode): ProcessRule => {
if (docForm === ChunkingMode.parentChild) {
return {
rules: {
pre_processing_rules: rules,
segmentation: {
separator: unescape(parentChildConfig.parent.delimiter),
max_tokens: parentChildConfig.parent.maxLength,
},
parent_mode: parentChildConfig.chunkForContext,
subchunk_segmentation: {
separator: unescape(parentChildConfig.child.delimiter),
max_tokens: parentChildConfig.child.maxLength,
},
},
mode: 'hierarchical',
} as ProcessRule
}
return {
rules: {
pre_processing_rules: rules,
segmentation: {
separator: unescape(segmentIdentifier),
max_tokens: maxChunkLength,
chunk_overlap: overlap,
},
},
mode: segmentationType,
} as ProcessRule
}, [rules, parentChildConfig, segmentIdentifier, maxChunkLength, overlap, segmentationType])
// Update parent config field
const updateParentConfig = useCallback((field: 'delimiter' | 'maxLength', value: string | number) => {
setParentChildConfig((prev) => {
let newValue: string | number
if (field === 'delimiter')
newValue = value ? escape(value as string) : ''
else
newValue = value
return {
...prev,
parent: { ...prev.parent, [field]: newValue },
}
})
}, [])
// Update child config field
const updateChildConfig = useCallback((field: 'delimiter' | 'maxLength', value: string | number) => {
setParentChildConfig((prev) => {
let newValue: string | number
if (field === 'delimiter')
newValue = value ? escape(value as string) : ''
else
newValue = value
return {
...prev,
child: { ...prev.child, [field]: newValue },
}
})
}, [])
// Set chunk for context mode
const setChunkForContext = useCallback((mode: ParentMode) => {
setParentChildConfig(prev => ({ ...prev, chunkForContext: mode }))
}, [])
return {
// General chunking state
segmentationType,
setSegmentationType,
segmentIdentifier,
setSegmentIdentifier,
maxChunkLength,
setMaxChunkLength,
limitMaxChunkLength,
setLimitMaxChunkLength,
overlap,
setOverlap,
// Rules
rules,
setRules,
defaultConfig,
setDefaultConfig,
toggleRule,
// Parent-child config
parentChildConfig,
setParentChildConfig,
updateParentConfig,
updateChildConfig,
setChunkForContext,
// Actions
resetToDefaults,
applyConfigFromRules,
getProcessRule,
}
}
export type SegmentationState = ReturnType<typeof useSegmentationState>

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,28 @@
import type { IndexingType } from './hooks'
import type { DataSourceProvider, NotionPage } from '@/models/common'
import type { CrawlOptions, CrawlResultItem, createDocumentResponse, CustomFile, DataSourceType, FullDocumentDetail } from '@/models/datasets'
import type { RETRIEVE_METHOD } from '@/types/app'
export type StepTwoProps = {
isSetting?: boolean
documentDetail?: FullDocumentDetail
isAPIKeySet: boolean
onSetting: () => void
datasetId?: string
indexingType?: IndexingType
retrievalMethod?: string
dataSourceType: DataSourceType
files: CustomFile[]
notionPages?: NotionPage[]
notionCredentialId: string
websitePages?: CrawlResultItem[]
crawlOptions?: CrawlOptions
websiteCrawlProvider?: DataSourceProvider
websiteCrawlJobId?: string
onStepChange?: (delta: number) => void
updateIndexingTypeCache?: (type: string) => void
updateRetrievalMethodCache?: (method: RETRIEVE_METHOD | '') => void
updateResultCache?: (res: createDocumentResponse) => void
onSave?: () => void
onCancel?: () => void
}

View File

@@ -0,0 +1,201 @@
'use client'
import type { FC } from 'react'
import type { Item } from '@/app/components/base/select'
import type { BuiltInMetadataItem, MetadataItemWithValueLength } from '@/app/components/datasets/metadata/types'
import type { SortType } from '@/service/datasets'
import { PlusIcon } from '@heroicons/react/24/solid'
import { RiDraftLine, RiExternalLinkLine } from '@remixicon/react'
import { useMemo } from 'react'
import { useTranslation } from 'react-i18next'
import Button from '@/app/components/base/button'
import Chip from '@/app/components/base/chip'
import Input from '@/app/components/base/input'
import Sort from '@/app/components/base/sort'
import AutoDisabledDocument from '@/app/components/datasets/common/document-status-with-action/auto-disabled-document'
import IndexFailed from '@/app/components/datasets/common/document-status-with-action/index-failed'
import StatusWithAction from '@/app/components/datasets/common/document-status-with-action/status-with-action'
import DatasetMetadataDrawer from '@/app/components/datasets/metadata/metadata-dataset/dataset-metadata-drawer'
import { useDocLink } from '@/context/i18n'
import { DataSourceType } from '@/models/datasets'
import { useIndexStatus } from '../status-item/hooks'
type DocumentsHeaderProps = {
// Dataset info
datasetId: string
dataSourceType?: DataSourceType
embeddingAvailable: boolean
isFreePlan: boolean
// Filter & sort
statusFilterValue: string
sortValue: SortType
inputValue: string
onStatusFilterChange: (value: string) => void
onStatusFilterClear: () => void
onSortChange: (value: string) => void
onInputChange: (value: string) => void
// Metadata modal
isShowEditMetadataModal: boolean
showEditMetadataModal: () => void
hideEditMetadataModal: () => void
datasetMetaData?: MetadataItemWithValueLength[]
builtInMetaData?: BuiltInMetadataItem[]
builtInEnabled: boolean
onAddMetaData: (payload: BuiltInMetadataItem) => Promise<void>
onRenameMetaData: (payload: MetadataItemWithValueLength) => Promise<void>
onDeleteMetaData: (metaDataId: string) => Promise<void>
onBuiltInEnabledChange: (enabled: boolean) => void
// Actions
onAddDocument: () => void
}
const DocumentsHeader: FC<DocumentsHeaderProps> = ({
datasetId,
dataSourceType,
embeddingAvailable,
isFreePlan,
statusFilterValue,
sortValue,
inputValue,
onStatusFilterChange,
onStatusFilterClear,
onSortChange,
onInputChange,
isShowEditMetadataModal,
showEditMetadataModal,
hideEditMetadataModal,
datasetMetaData,
builtInMetaData,
builtInEnabled,
onAddMetaData,
onRenameMetaData,
onDeleteMetaData,
onBuiltInEnabledChange,
onAddDocument,
}) => {
const { t } = useTranslation()
const docLink = useDocLink()
const DOC_INDEX_STATUS_MAP = useIndexStatus()
const isDataSourceNotion = dataSourceType === DataSourceType.NOTION
const isDataSourceWeb = dataSourceType === DataSourceType.WEB
const statusFilterItems: Item[] = useMemo(() => [
{ value: 'all', name: t('list.index.all', { ns: 'datasetDocuments' }) as string },
{ value: 'queuing', name: DOC_INDEX_STATUS_MAP.queuing.text },
{ value: 'indexing', name: DOC_INDEX_STATUS_MAP.indexing.text },
{ value: 'paused', name: DOC_INDEX_STATUS_MAP.paused.text },
{ value: 'error', name: DOC_INDEX_STATUS_MAP.error.text },
{ value: 'available', name: DOC_INDEX_STATUS_MAP.available.text },
{ value: 'enabled', name: DOC_INDEX_STATUS_MAP.enabled.text },
{ value: 'disabled', name: DOC_INDEX_STATUS_MAP.disabled.text },
{ value: 'archived', name: DOC_INDEX_STATUS_MAP.archived.text },
], [DOC_INDEX_STATUS_MAP, t])
const sortItems: Item[] = useMemo(() => [
{ value: 'created_at', name: t('list.sort.uploadTime', { ns: 'datasetDocuments' }) as string },
{ value: 'hit_count', name: t('list.sort.hitCount', { ns: 'datasetDocuments' }) as string },
], [t])
// Determine add button text based on data source type
const addButtonText = useMemo(() => {
if (isDataSourceNotion)
return t('list.addPages', { ns: 'datasetDocuments' })
if (isDataSourceWeb)
return t('list.addUrl', { ns: 'datasetDocuments' })
return t('list.addFile', { ns: 'datasetDocuments' })
}, [isDataSourceNotion, isDataSourceWeb, t])
return (
<>
{/* Title section */}
<div className="flex flex-col justify-center gap-1 px-6 pt-4">
<h1 className="text-base font-semibold text-text-primary">
{t('list.title', { ns: 'datasetDocuments' })}
</h1>
<div className="flex items-center space-x-0.5 text-sm font-normal text-text-tertiary">
<span>{t('list.desc', { ns: 'datasetDocuments' })}</span>
<a
className="flex items-center text-text-accent"
target="_blank"
rel="noopener noreferrer"
href={docLink('/guides/knowledge-base/integrate-knowledge-within-application')}
>
<span>{t('list.learnMore', { ns: 'datasetDocuments' })}</span>
<RiExternalLinkLine className="h-3 w-3" />
</a>
</div>
</div>
{/* Toolbar section */}
<div className="flex flex-wrap items-center justify-between px-6 pt-4">
{/* Left: Filters */}
<div className="flex items-center gap-2">
<Chip
className="w-[160px]"
showLeftIcon={false}
value={statusFilterValue}
items={statusFilterItems}
onSelect={item => onStatusFilterChange(item?.value ? String(item.value) : '')}
onClear={onStatusFilterClear}
/>
<Input
showLeftIcon
showClearIcon
wrapperClassName="!w-[200px]"
value={inputValue}
onChange={e => onInputChange(e.target.value)}
onClear={() => onInputChange('')}
/>
<div className="h-3.5 w-px bg-divider-regular"></div>
<Sort
order={sortValue.startsWith('-') ? '-' : ''}
value={sortValue.replace('-', '')}
items={sortItems}
onSelect={value => onSortChange(String(value))}
/>
</div>
{/* Right: Actions */}
<div className="flex !h-8 items-center justify-center gap-2">
{!isFreePlan && <AutoDisabledDocument datasetId={datasetId} />}
<IndexFailed datasetId={datasetId} />
{!embeddingAvailable && (
<StatusWithAction
type="warning"
description={t('embeddingModelNotAvailable', { ns: 'dataset' })}
/>
)}
{embeddingAvailable && (
<Button variant="secondary" className="shrink-0" onClick={showEditMetadataModal}>
<RiDraftLine className="mr-1 size-4" />
{t('metadata.metadata', { ns: 'dataset' })}
</Button>
)}
{isShowEditMetadataModal && (
<DatasetMetadataDrawer
userMetadata={datasetMetaData ?? []}
onClose={hideEditMetadataModal}
onAdd={onAddMetaData}
onRename={onRenameMetaData}
onRemove={onDeleteMetaData}
builtInMetadata={builtInMetaData ?? []}
isBuiltInEnabled={builtInEnabled}
onIsBuiltInEnabledChange={onBuiltInEnabledChange}
/>
)}
{embeddingAvailable && (
<Button variant="primary" onClick={onAddDocument} className="shrink-0">
<PlusIcon className="mr-2 h-4 w-4 stroke-current" />
{addButtonText}
</Button>
)}
</div>
</div>
</>
)
}
export default DocumentsHeader

View File

@@ -0,0 +1,41 @@
'use client'
import type { FC } from 'react'
import { PlusIcon } from '@heroicons/react/24/solid'
import { useTranslation } from 'react-i18next'
import Button from '@/app/components/base/button'
import s from '../style.module.css'
import { FolderPlusIcon, NotionIcon, ThreeDotsIcon } from './icons'
type EmptyElementProps = {
canAdd: boolean
onClick: () => void
type?: 'upload' | 'sync'
}
const EmptyElement: FC<EmptyElementProps> = ({ canAdd = true, onClick, type = 'upload' }) => {
const { t } = useTranslation()
return (
<div className={s.emptyWrapper}>
<div className={s.emptyElement}>
<div className={s.emptySymbolIconWrapper}>
{type === 'upload' ? <FolderPlusIcon /> : <NotionIcon />}
</div>
<span className={s.emptyTitle}>
{t('list.empty.title', { ns: 'datasetDocuments' })}
<ThreeDotsIcon className="relative -left-1.5 -top-3 inline" />
</span>
<div className={s.emptyTip}>
{t(`list.empty.${type}.tip`, { ns: 'datasetDocuments' })}
</div>
{type === 'upload' && canAdd && (
<Button onClick={onClick} className={s.addFileBtn} variant="secondary-accent">
<PlusIcon className={s.plusIcon} />
{t('list.addFile', { ns: 'datasetDocuments' })}
</Button>
)}
</div>
</div>
)
}
export default EmptyElement

View File

@@ -0,0 +1,34 @@
import type * as React from 'react'
export const FolderPlusIcon = ({ className }: React.SVGProps<SVGElement>) => {
return (
<svg width="20" height="20" viewBox="0 0 20 20" fill="none" xmlns="http://www.w3.org/2000/svg" className={className ?? ''}>
<path d="M10.8332 5.83333L9.90355 3.9741C9.63601 3.439 9.50222 3.17144 9.30265 2.97597C9.12615 2.80311 8.91344 2.67164 8.6799 2.59109C8.41581 2.5 8.11668 2.5 7.51841 2.5H4.33317C3.39975 2.5 2.93304 2.5 2.57652 2.68166C2.26292 2.84144 2.00795 3.09641 1.84816 3.41002C1.6665 3.76654 1.6665 4.23325 1.6665 5.16667V5.83333M1.6665 5.83333H14.3332C15.7333 5.83333 16.4334 5.83333 16.9681 6.10582C17.4386 6.3455 17.821 6.72795 18.0607 7.19836C18.3332 7.73314 18.3332 8.4332 18.3332 9.83333V13.5C18.3332 14.9001 18.3332 15.6002 18.0607 16.135C17.821 16.6054 17.4386 16.9878 16.9681 17.2275C16.4334 17.5 15.7333 17.5 14.3332 17.5H5.6665C4.26637 17.5 3.56631 17.5 3.03153 17.2275C2.56112 16.9878 2.17867 16.6054 1.93899 16.135C1.6665 15.6002 1.6665 14.9001 1.6665 13.5V5.83333ZM9.99984 14.1667V9.16667M7.49984 11.6667H12.4998" stroke="#667085" strokeWidth="1.5" strokeLinecap="round" strokeLinejoin="round" />
</svg>
)
}
export const ThreeDotsIcon = ({ className }: React.SVGProps<SVGElement>) => {
return (
<svg width="16" height="16" viewBox="0 0 16 16" fill="none" xmlns="http://www.w3.org/2000/svg" className={className ?? ''}>
<path d="M5 6.5V5M8.93934 7.56066L10 6.5M10.0103 11.5H11.5103" stroke="#374151" strokeWidth="2" strokeLinecap="round" strokeLinejoin="round" />
</svg>
)
}
export const NotionIcon = ({ className }: React.SVGProps<SVGElement>) => {
return (
<svg width="20" height="20" viewBox="0 0 20 20" fill="none" xmlns="http://www.w3.org/2000/svg" className={className ?? ''}>
<g clipPath="url(#clip0_2164_11263)">
<path fillRule="evenodd" clipRule="evenodd" d="M3.5725 18.2611L1.4229 15.5832C0.905706 14.9389 0.625 14.1466 0.625 13.3312V3.63437C0.625 2.4129 1.60224 1.39936 2.86295 1.31328L12.8326 0.632614C13.5569 0.583164 14.2768 0.775682 14.8717 1.17794L18.3745 3.5462C19.0015 3.97012 19.375 4.66312 19.375 5.40266V16.427C19.375 17.6223 18.4141 18.6121 17.1798 18.688L6.11458 19.3692C5.12958 19.4298 4.17749 19.0148 3.5725 18.2611Z" fill="white" />
<path d="M7.03006 8.48669V8.35974C7.03006 8.03794 7.28779 7.77104 7.61997 7.74886L10.0396 7.58733L13.3857 12.5147V8.19009L12.5244 8.07528V8.01498C12.5244 7.68939 12.788 7.42074 13.1244 7.4035L15.326 7.29073V7.60755C15.326 7.75628 15.2154 7.88349 15.0638 7.90913L14.534 7.99874V15.0023L13.8691 15.231C13.3136 15.422 12.6952 15.2175 12.3772 14.7377L9.12879 9.83574V14.5144L10.1287 14.7057L10.1147 14.7985C10.0711 15.089 9.82028 15.3087 9.51687 15.3222L7.03006 15.4329C6.99718 15.1205 7.23132 14.841 7.55431 14.807L7.88143 14.7727V8.53453L7.03006 8.48669Z" fill="black" />
<path fillRule="evenodd" clipRule="evenodd" d="M12.9218 1.85424L2.95217 2.53491C2.35499 2.57568 1.89209 3.05578 1.89209 3.63437V13.3312C1.89209 13.8748 2.07923 14.403 2.42402 14.8325L4.57362 17.5104C4.92117 17.9434 5.46812 18.1818 6.03397 18.147L17.0991 17.4658C17.6663 17.4309 18.1078 16.9762 18.1078 16.427V5.40266C18.1078 5.06287 17.9362 4.74447 17.6481 4.54969L14.1453 2.18143C13.7883 1.94008 13.3564 1.82457 12.9218 1.85424ZM3.44654 3.78562C3.30788 3.68296 3.37387 3.46909 3.54806 3.4566L12.9889 2.77944C13.2897 2.75787 13.5886 2.8407 13.8318 3.01305L15.7261 4.35508C15.798 4.40603 15.7642 4.51602 15.6752 4.52086L5.67742 5.0646C5.37485 5.08106 5.0762 4.99217 4.83563 4.81406L3.44654 3.78562ZM5.20848 6.76919C5.20848 6.4444 5.47088 6.1761 5.80642 6.15783L16.3769 5.58216C16.7039 5.56435 16.9792 5.81583 16.9792 6.13239V15.6783C16.9792 16.0025 16.7177 16.2705 16.3829 16.2896L5.8793 16.8872C5.51537 16.9079 5.20848 16.6283 5.20848 16.2759V6.76919Z" fill="black" />
</g>
<defs>
<clipPath id="clip0_2164_11263">
<rect width="20" height="20" fill="white" />
</clipPath>
</defs>
</svg>
)
}

View File

@@ -16,13 +16,16 @@ import * as React from 'react'
import { useCallback, useEffect, useMemo, useState } from 'react'
import { useTranslation } from 'react-i18next'
import Checkbox from '@/app/components/base/checkbox'
import FileTypeIcon from '@/app/components/base/file-uploader/file-type-icon'
import NotionIcon from '@/app/components/base/notion-icon'
import Pagination from '@/app/components/base/pagination'
import Toast from '@/app/components/base/toast'
import Tooltip from '@/app/components/base/tooltip'
import ChunkingModeLabel from '@/app/components/datasets/common/chunking-mode-label'
import { normalizeStatusForQuery } from '@/app/components/datasets/documents/status-filter'
import { extensionToFileType } from '@/app/components/datasets/hit-testing/utils/extension-to-file-type'
import EditMetadataBatchModal from '@/app/components/datasets/metadata/edit-metadata-batch/modal'
import useBatchEditDocumentMetadata from '@/app/components/datasets/metadata/hooks/use-batch-edit-document-metadata'
import { useDatasetDetailContextWithSelector as useDatasetDetailContext } from '@/context/dataset-detail'
import useTimestamp from '@/hooks/use-timestamp'
import { ChunkingMode, DataSourceType, DocumentActionType } from '@/models/datasets'
@@ -31,14 +34,11 @@ import { useDocumentArchive, useDocumentBatchRetryIndex, useDocumentDelete, useD
import { asyncRunSafe } from '@/utils'
import { cn } from '@/utils/classnames'
import { formatNumber } from '@/utils/format'
import FileTypeIcon from '../../base/file-uploader/file-type-icon'
import ChunkingModeLabel from '../common/chunking-mode-label'
import useBatchEditDocumentMetadata from '../metadata/hooks/use-batch-edit-document-metadata'
import BatchAction from './detail/completed/common/batch-action'
import BatchAction from '../detail/completed/common/batch-action'
import StatusItem from '../status-item'
import s from '../style.module.css'
import Operations from './operations'
import RenameModal from './rename-modal'
import StatusItem from './status-item'
import s from './style.module.css'
export const renderTdValue = (value: string | number | null, isEmptyStyle = false) => {
return (

View File

@@ -1,4 +1,4 @@
import type { OperationName } from './types'
import type { OperationName } from '../types'
import type { CommonResponse } from '@/models/common'
import {
RiArchive2Line,
@@ -17,6 +17,12 @@ import * as React from 'react'
import { useCallback, useState } from 'react'
import { useTranslation } from 'react-i18next'
import { useContext } from 'use-context-selector'
import Confirm from '@/app/components/base/confirm'
import Divider from '@/app/components/base/divider'
import CustomPopover from '@/app/components/base/popover'
import Switch from '@/app/components/base/switch'
import { ToastContext } from '@/app/components/base/toast'
import Tooltip from '@/app/components/base/tooltip'
import { DataSourceType, DocumentActionType } from '@/models/datasets'
import {
useDocumentArchive,
@@ -31,14 +37,8 @@ import {
} from '@/service/knowledge/use-document'
import { asyncRunSafe } from '@/utils'
import { cn } from '@/utils/classnames'
import Confirm from '../../base/confirm'
import Divider from '../../base/divider'
import CustomPopover from '../../base/popover'
import Switch from '../../base/switch'
import { ToastContext } from '../../base/toast'
import Tooltip from '../../base/tooltip'
import s from '../style.module.css'
import RenameModal from './rename-modal'
import s from './style.module.css'
type OperationsProps = {
embeddingAvailable: boolean

View File

@@ -7,8 +7,8 @@ import { useTranslation } from 'react-i18next'
import Button from '@/app/components/base/button'
import Input from '@/app/components/base/input'
import Modal from '@/app/components/base/modal'
import Toast from '@/app/components/base/toast'
import { renameDocumentName } from '@/service/datasets'
import Toast from '../../base/toast'
type Props = {
datasetId: string

View File

@@ -18,7 +18,7 @@ import { useDocumentDetail, useDocumentMetadata, useInvalidDocumentList } from '
import { useCheckSegmentBatchImportProgress, useChildSegmentListKey, useSegmentBatchImport, useSegmentListKey } from '@/service/knowledge/use-segment'
import { useInvalid } from '@/service/use-base'
import { cn } from '@/utils/classnames'
import Operations from '../operations'
import Operations from '../components/operations'
import StatusItem from '../status-item'
import BatchModal from './batch-modal'
import Completed from './completed'

View File

@@ -0,0 +1,197 @@
import type { DocumentListResponse } from '@/models/datasets'
import type { SortType } from '@/service/datasets'
import { useDebounce, useDebounceFn } from 'ahooks'
import { useCallback, useEffect, useMemo, useState } from 'react'
import { normalizeStatusForQuery, sanitizeStatusValue } from '../status-filter'
import useDocumentListQueryState from './use-document-list-query-state'
/**
* Custom hook to manage documents page state including:
* - Search state (input value, debounced search value)
* - Filter state (status filter, sort value)
* - Pagination state (current page, limit)
* - Selection state (selected document ids)
* - Polling state (timer control for auto-refresh)
*/
export function useDocumentsPageState() {
const { query, updateQuery } = useDocumentListQueryState()
// Search state
const [inputValue, setInputValue] = useState<string>('')
const [searchValue, setSearchValue] = useState<string>('')
const debouncedSearchValue = useDebounce(searchValue, { wait: 500 })
// Filter & sort state
const [statusFilterValue, setStatusFilterValue] = useState<string>(() => sanitizeStatusValue(query.status))
const [sortValue, setSortValue] = useState<SortType>(query.sort)
const normalizedStatusFilterValue = useMemo(
() => normalizeStatusForQuery(statusFilterValue),
[statusFilterValue],
)
// Pagination state
const [currPage, setCurrPage] = useState<number>(query.page - 1)
const [limit, setLimit] = useState<number>(query.limit)
// Selection state
const [selectedIds, setSelectedIds] = useState<string[]>([])
// Polling state
const [timerCanRun, setTimerCanRun] = useState(true)
// Initialize search value from URL on mount
useEffect(() => {
if (query.keyword) {
setInputValue(query.keyword)
setSearchValue(query.keyword)
}
}, []) // Only run on mount
// Sync local state with URL query changes
useEffect(() => {
setCurrPage(query.page - 1)
setLimit(query.limit)
if (query.keyword !== searchValue) {
setInputValue(query.keyword)
setSearchValue(query.keyword)
}
setStatusFilterValue((prev) => {
const nextValue = sanitizeStatusValue(query.status)
return prev === nextValue ? prev : nextValue
})
setSortValue(query.sort)
}, [query])
// Update URL when search changes
useEffect(() => {
if (debouncedSearchValue !== query.keyword) {
setCurrPage(0)
updateQuery({ keyword: debouncedSearchValue, page: 1 })
}
}, [debouncedSearchValue, query.keyword, updateQuery])
// Clear selection when search changes
useEffect(() => {
if (searchValue !== query.keyword)
setSelectedIds([])
}, [searchValue, query.keyword])
// Clear selection when status filter changes
useEffect(() => {
setSelectedIds([])
}, [normalizedStatusFilterValue])
// Page change handler
const handlePageChange = useCallback((newPage: number) => {
setCurrPage(newPage)
updateQuery({ page: newPage + 1 })
}, [updateQuery])
// Limit change handler
const handleLimitChange = useCallback((newLimit: number) => {
setLimit(newLimit)
setCurrPage(0)
updateQuery({ limit: newLimit, page: 1 })
}, [updateQuery])
// Debounced search handler
const { run: handleSearch } = useDebounceFn(() => {
setSearchValue(inputValue)
}, { wait: 500 })
// Input change handler
const handleInputChange = useCallback((value: string) => {
setInputValue(value)
handleSearch()
}, [handleSearch])
// Status filter change handler
const handleStatusFilterChange = useCallback((value: string) => {
const selectedValue = sanitizeStatusValue(value)
setStatusFilterValue(selectedValue)
setCurrPage(0)
updateQuery({ status: selectedValue, page: 1 })
}, [updateQuery])
// Status filter clear handler
const handleStatusFilterClear = useCallback(() => {
if (statusFilterValue === 'all')
return
setStatusFilterValue('all')
setCurrPage(0)
updateQuery({ status: 'all', page: 1 })
}, [statusFilterValue, updateQuery])
// Sort change handler
const handleSortChange = useCallback((value: string) => {
const next = value as SortType
if (next === sortValue)
return
setSortValue(next)
setCurrPage(0)
updateQuery({ sort: next, page: 1 })
}, [sortValue, updateQuery])
// Update polling state based on documents response
const updatePollingState = useCallback((documentsRes: DocumentListResponse | undefined) => {
if (!documentsRes?.data)
return
let completedNum = 0
documentsRes.data.forEach((documentItem) => {
const { indexing_status } = documentItem
const isEmbedded = indexing_status === 'completed' || indexing_status === 'paused' || indexing_status === 'error'
if (isEmbedded)
completedNum++
})
const hasIncompleteDocuments = completedNum !== documentsRes.data.length
const transientStatuses = ['queuing', 'indexing', 'paused']
const shouldForcePolling = normalizedStatusFilterValue === 'all'
? false
: transientStatuses.includes(normalizedStatusFilterValue)
setTimerCanRun(shouldForcePolling || hasIncompleteDocuments)
}, [normalizedStatusFilterValue])
// Adjust page when total pages change
const adjustPageForTotal = useCallback((documentsRes: DocumentListResponse | undefined) => {
if (!documentsRes)
return
const totalPages = Math.ceil(documentsRes.total / limit)
if (currPage > 0 && currPage + 1 > totalPages)
handlePageChange(totalPages > 0 ? totalPages - 1 : 0)
}, [limit, currPage, handlePageChange])
return {
// Search state
inputValue,
searchValue,
debouncedSearchValue,
handleInputChange,
// Filter & sort state
statusFilterValue,
sortValue,
normalizedStatusFilterValue,
handleStatusFilterChange,
handleStatusFilterClear,
handleSortChange,
// Pagination state
currPage,
limit,
handlePageChange,
handleLimitChange,
// Selection state
selectedIds,
setSelectedIds,
// Polling state
timerCanRun,
updatePollingState,
adjustPageForTotal,
}
}
export default useDocumentsPageState

Some files were not shown because too many files have changed in this diff Show More