mirror of
https://github.com/langgenius/dify.git
synced 2025-12-19 22:28:46 +00:00
feat: sandbox retention basic settings (#29842)
This commit is contained in:
@@ -6,7 +6,7 @@ cd web && pnpm install
|
|||||||
pipx install uv
|
pipx install uv
|
||||||
|
|
||||||
echo "alias start-api=\"cd $WORKSPACE_ROOT/api && uv run python -m flask run --host 0.0.0.0 --port=5001 --debug\"" >> ~/.bashrc
|
echo "alias start-api=\"cd $WORKSPACE_ROOT/api && uv run python -m flask run --host 0.0.0.0 --port=5001 --debug\"" >> ~/.bashrc
|
||||||
echo "alias start-worker=\"cd $WORKSPACE_ROOT/api && uv run python -m celery -A app.celery worker -P threads -c 1 --loglevel INFO -Q dataset,priority_dataset,priority_pipeline,pipeline,mail,ops_trace,app_deletion,plugin,workflow_storage,conversation,workflow,schedule_poller,schedule_executor,triggered_workflow_dispatcher,trigger_refresh_executor\"" >> ~/.bashrc
|
echo "alias start-worker=\"cd $WORKSPACE_ROOT/api && uv run python -m celery -A app.celery worker -P threads -c 1 --loglevel INFO -Q dataset,priority_dataset,priority_pipeline,pipeline,mail,ops_trace,app_deletion,plugin,workflow_storage,conversation,workflow,schedule_poller,schedule_executor,triggered_workflow_dispatcher,trigger_refresh_executor,retention\"" >> ~/.bashrc
|
||||||
echo "alias start-web=\"cd $WORKSPACE_ROOT/web && pnpm dev\"" >> ~/.bashrc
|
echo "alias start-web=\"cd $WORKSPACE_ROOT/web && pnpm dev\"" >> ~/.bashrc
|
||||||
echo "alias start-web-prod=\"cd $WORKSPACE_ROOT/web && pnpm build && pnpm start\"" >> ~/.bashrc
|
echo "alias start-web-prod=\"cd $WORKSPACE_ROOT/web && pnpm build && pnpm start\"" >> ~/.bashrc
|
||||||
echo "alias start-containers=\"cd $WORKSPACE_ROOT/docker && docker-compose -f docker-compose.middleware.yaml -p dify --env-file middleware.env up -d\"" >> ~/.bashrc
|
echo "alias start-containers=\"cd $WORKSPACE_ROOT/docker && docker-compose -f docker-compose.middleware.yaml -p dify --env-file middleware.env up -d\"" >> ~/.bashrc
|
||||||
|
|||||||
2
.vscode/launch.json.template
vendored
2
.vscode/launch.json.template
vendored
@@ -37,7 +37,7 @@
|
|||||||
"-c",
|
"-c",
|
||||||
"1",
|
"1",
|
||||||
"-Q",
|
"-Q",
|
||||||
"dataset,priority_dataset,priority_pipeline,pipeline,mail,ops_trace,app_deletion,plugin,workflow_storage,conversation,workflow,schedule_poller,schedule_executor,triggered_workflow_dispatcher,trigger_refresh_executor",
|
"dataset,priority_dataset,priority_pipeline,pipeline,mail,ops_trace,app_deletion,plugin,workflow_storage,conversation,workflow,schedule_poller,schedule_executor,triggered_workflow_dispatcher,trigger_refresh_executor,retention",
|
||||||
"--loglevel",
|
"--loglevel",
|
||||||
"INFO"
|
"INFO"
|
||||||
],
|
],
|
||||||
|
|||||||
@@ -690,3 +690,8 @@ ANNOTATION_IMPORT_RATE_LIMIT_PER_MINUTE=5
|
|||||||
ANNOTATION_IMPORT_RATE_LIMIT_PER_HOUR=20
|
ANNOTATION_IMPORT_RATE_LIMIT_PER_HOUR=20
|
||||||
# Maximum number of concurrent annotation import tasks per tenant
|
# Maximum number of concurrent annotation import tasks per tenant
|
||||||
ANNOTATION_IMPORT_MAX_CONCURRENT=5
|
ANNOTATION_IMPORT_MAX_CONCURRENT=5
|
||||||
|
|
||||||
|
# Sandbox expired records clean configuration
|
||||||
|
SANDBOX_EXPIRED_RECORDS_CLEAN_GRACEFUL_PERIOD=21
|
||||||
|
SANDBOX_EXPIRED_RECORDS_CLEAN_BATCH_SIZE=1000
|
||||||
|
SANDBOX_EXPIRED_RECORDS_RETENTION_DAYS=30
|
||||||
|
|||||||
@@ -84,7 +84,7 @@
|
|||||||
1. If you need to handle and debug the async tasks (e.g. dataset importing and documents indexing), please start the worker service.
|
1. If you need to handle and debug the async tasks (e.g. dataset importing and documents indexing), please start the worker service.
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
uv run celery -A app.celery worker -P threads -c 2 --loglevel INFO -Q dataset,priority_dataset,priority_pipeline,pipeline,mail,ops_trace,app_deletion,plugin,workflow_storage,conversation,workflow,schedule_poller,schedule_executor,triggered_workflow_dispatcher,trigger_refresh_executor
|
uv run celery -A app.celery worker -P threads -c 2 --loglevel INFO -Q dataset,priority_dataset,priority_pipeline,pipeline,mail,ops_trace,app_deletion,plugin,workflow_storage,conversation,workflow,schedule_poller,schedule_executor,triggered_workflow_dispatcher,trigger_refresh_executor,retention
|
||||||
```
|
```
|
||||||
|
|
||||||
Additionally, if you want to debug the celery scheduled tasks, you can run the following command in another terminal to start the beat service:
|
Additionally, if you want to debug the celery scheduled tasks, you can run the following command in another terminal to start the beat service:
|
||||||
|
|||||||
@@ -1270,6 +1270,21 @@ class TenantIsolatedTaskQueueConfig(BaseSettings):
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class SandboxExpiredRecordsCleanConfig(BaseSettings):
|
||||||
|
SANDBOX_EXPIRED_RECORDS_CLEAN_GRACEFUL_PERIOD: NonNegativeInt = Field(
|
||||||
|
description="Graceful period in days for sandbox records clean after subscription expiration",
|
||||||
|
default=21,
|
||||||
|
)
|
||||||
|
SANDBOX_EXPIRED_RECORDS_CLEAN_BATCH_SIZE: PositiveInt = Field(
|
||||||
|
description="Maximum number of records to process in each batch",
|
||||||
|
default=1000,
|
||||||
|
)
|
||||||
|
SANDBOX_EXPIRED_RECORDS_RETENTION_DAYS: PositiveInt = Field(
|
||||||
|
description="Retention days for sandbox expired workflow_run records and message records",
|
||||||
|
default=30,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class FeatureConfig(
|
class FeatureConfig(
|
||||||
# place the configs in alphabet order
|
# place the configs in alphabet order
|
||||||
AppExecutionConfig,
|
AppExecutionConfig,
|
||||||
@@ -1295,6 +1310,7 @@ class FeatureConfig(
|
|||||||
PositionConfig,
|
PositionConfig,
|
||||||
RagEtlConfig,
|
RagEtlConfig,
|
||||||
RepositoryConfig,
|
RepositoryConfig,
|
||||||
|
SandboxExpiredRecordsCleanConfig,
|
||||||
SecurityConfig,
|
SecurityConfig,
|
||||||
TenantIsolatedTaskQueueConfig,
|
TenantIsolatedTaskQueueConfig,
|
||||||
ToolConfig,
|
ToolConfig,
|
||||||
|
|||||||
@@ -34,10 +34,10 @@ if [[ "${MODE}" == "worker" ]]; then
|
|||||||
if [[ -z "${CELERY_QUEUES}" ]]; then
|
if [[ -z "${CELERY_QUEUES}" ]]; then
|
||||||
if [[ "${EDITION}" == "CLOUD" ]]; then
|
if [[ "${EDITION}" == "CLOUD" ]]; then
|
||||||
# Cloud edition: separate queues for dataset and trigger tasks
|
# Cloud edition: separate queues for dataset and trigger tasks
|
||||||
DEFAULT_QUEUES="dataset,priority_dataset,priority_pipeline,pipeline,mail,ops_trace,app_deletion,plugin,workflow_storage,conversation,workflow_professional,workflow_team,workflow_sandbox,schedule_poller,schedule_executor,triggered_workflow_dispatcher,trigger_refresh_executor"
|
DEFAULT_QUEUES="dataset,priority_dataset,priority_pipeline,pipeline,mail,ops_trace,app_deletion,plugin,workflow_storage,conversation,workflow_professional,workflow_team,workflow_sandbox,schedule_poller,schedule_executor,triggered_workflow_dispatcher,trigger_refresh_executor,retention"
|
||||||
else
|
else
|
||||||
# Community edition (SELF_HOSTED): dataset, pipeline and workflow have separate queues
|
# Community edition (SELF_HOSTED): dataset, pipeline and workflow have separate queues
|
||||||
DEFAULT_QUEUES="dataset,priority_dataset,priority_pipeline,pipeline,mail,ops_trace,app_deletion,plugin,workflow_storage,conversation,workflow,schedule_poller,schedule_executor,triggered_workflow_dispatcher,trigger_refresh_executor"
|
DEFAULT_QUEUES="dataset,priority_dataset,priority_pipeline,pipeline,mail,ops_trace,app_deletion,plugin,workflow_storage,conversation,workflow,schedule_poller,schedule_executor,triggered_workflow_dispatcher,trigger_refresh_executor,retention"
|
||||||
fi
|
fi
|
||||||
else
|
else
|
||||||
DEFAULT_QUEUES="${CELERY_QUEUES}"
|
DEFAULT_QUEUES="${CELERY_QUEUES}"
|
||||||
@@ -69,6 +69,53 @@ if [[ "${MODE}" == "worker" ]]; then
|
|||||||
|
|
||||||
elif [[ "${MODE}" == "beat" ]]; then
|
elif [[ "${MODE}" == "beat" ]]; then
|
||||||
exec celery -A app.celery beat --loglevel ${LOG_LEVEL:-INFO}
|
exec celery -A app.celery beat --loglevel ${LOG_LEVEL:-INFO}
|
||||||
|
|
||||||
|
elif [[ "${MODE}" == "job" ]]; then
|
||||||
|
# Job mode: Run a one-time Flask command and exit
|
||||||
|
# Pass Flask command and arguments via container args
|
||||||
|
# Example K8s usage:
|
||||||
|
# args:
|
||||||
|
# - create-tenant
|
||||||
|
# - --email
|
||||||
|
# - admin@example.com
|
||||||
|
#
|
||||||
|
# Example Docker usage:
|
||||||
|
# docker run -e MODE=job dify-api:latest create-tenant --email admin@example.com
|
||||||
|
|
||||||
|
if [[ $# -eq 0 ]]; then
|
||||||
|
echo "Error: No command specified for job mode."
|
||||||
|
echo ""
|
||||||
|
echo "Usage examples:"
|
||||||
|
echo " Kubernetes:"
|
||||||
|
echo " args: [create-tenant, --email, admin@example.com]"
|
||||||
|
echo ""
|
||||||
|
echo " Docker:"
|
||||||
|
echo " docker run -e MODE=job dify-api create-tenant --email admin@example.com"
|
||||||
|
echo ""
|
||||||
|
echo "Available commands:"
|
||||||
|
echo " create-tenant, reset-password, reset-email, upgrade-db,"
|
||||||
|
echo " vdb-migrate, install-plugins, and more..."
|
||||||
|
echo ""
|
||||||
|
echo "Run 'flask --help' to see all available commands."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "Running Flask job command: flask $*"
|
||||||
|
|
||||||
|
# Temporarily disable exit on error to capture exit code
|
||||||
|
set +e
|
||||||
|
flask "$@"
|
||||||
|
JOB_EXIT_CODE=$?
|
||||||
|
set -e
|
||||||
|
|
||||||
|
if [[ ${JOB_EXIT_CODE} -eq 0 ]]; then
|
||||||
|
echo "Job completed successfully."
|
||||||
|
else
|
||||||
|
echo "Job failed with exit code ${JOB_EXIT_CODE}."
|
||||||
|
fi
|
||||||
|
|
||||||
|
exit ${JOB_EXIT_CODE}
|
||||||
|
|
||||||
else
|
else
|
||||||
if [[ "${DEBUG}" == "true" ]]; then
|
if [[ "${DEBUG}" == "true" ]]; then
|
||||||
exec flask run --host=${DIFY_BIND_ADDRESS:-0.0.0.0} --port=${DIFY_PORT:-5001} --debug
|
exec flask run --host=${DIFY_BIND_ADDRESS:-0.0.0.0} --port=${DIFY_PORT:-5001} --debug
|
||||||
|
|||||||
@@ -37,6 +37,7 @@ show_help() {
|
|||||||
echo " pipeline - Standard pipeline tasks"
|
echo " pipeline - Standard pipeline tasks"
|
||||||
echo " triggered_workflow_dispatcher - Trigger dispatcher tasks"
|
echo " triggered_workflow_dispatcher - Trigger dispatcher tasks"
|
||||||
echo " trigger_refresh_executor - Trigger refresh tasks"
|
echo " trigger_refresh_executor - Trigger refresh tasks"
|
||||||
|
echo " retention - Retention tasks"
|
||||||
}
|
}
|
||||||
|
|
||||||
# Parse command line arguments
|
# Parse command line arguments
|
||||||
@@ -105,10 +106,10 @@ if [[ -z "${QUEUES}" ]]; then
|
|||||||
# Configure queues based on edition
|
# Configure queues based on edition
|
||||||
if [[ "${EDITION}" == "CLOUD" ]]; then
|
if [[ "${EDITION}" == "CLOUD" ]]; then
|
||||||
# Cloud edition: separate queues for dataset and trigger tasks
|
# Cloud edition: separate queues for dataset and trigger tasks
|
||||||
QUEUES="dataset,priority_dataset,priority_pipeline,pipeline,mail,ops_trace,app_deletion,plugin,workflow_storage,conversation,workflow_professional,workflow_team,workflow_sandbox,schedule_poller,schedule_executor,triggered_workflow_dispatcher,trigger_refresh_executor"
|
QUEUES="dataset,priority_dataset,priority_pipeline,pipeline,mail,ops_trace,app_deletion,plugin,workflow_storage,conversation,workflow_professional,workflow_team,workflow_sandbox,schedule_poller,schedule_executor,triggered_workflow_dispatcher,trigger_refresh_executor,retention"
|
||||||
else
|
else
|
||||||
# Community edition (SELF_HOSTED): dataset and workflow have separate queues
|
# Community edition (SELF_HOSTED): dataset and workflow have separate queues
|
||||||
QUEUES="dataset,priority_dataset,priority_pipeline,pipeline,mail,ops_trace,app_deletion,plugin,workflow_storage,conversation,workflow,schedule_poller,schedule_executor,triggered_workflow_dispatcher,trigger_refresh_executor"
|
QUEUES="dataset,priority_dataset,priority_pipeline,pipeline,mail,ops_trace,app_deletion,plugin,workflow_storage,conversation,workflow,schedule_poller,schedule_executor,triggered_workflow_dispatcher,trigger_refresh_executor,retention"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
echo "No queues specified, using edition-based defaults: ${QUEUES}"
|
echo "No queues specified, using edition-based defaults: ${QUEUES}"
|
||||||
|
|||||||
@@ -1479,4 +1479,9 @@ ANNOTATION_IMPORT_RATE_LIMIT_PER_HOUR=20
|
|||||||
ANNOTATION_IMPORT_MAX_CONCURRENT=5
|
ANNOTATION_IMPORT_MAX_CONCURRENT=5
|
||||||
|
|
||||||
# The API key of amplitude
|
# The API key of amplitude
|
||||||
AMPLITUDE_API_KEY=
|
AMPLITUDE_API_KEY=
|
||||||
|
|
||||||
|
# Sandbox expired records clean configuration
|
||||||
|
SANDBOX_EXPIRED_RECORDS_CLEAN_GRACEFUL_PERIOD=21
|
||||||
|
SANDBOX_EXPIRED_RECORDS_CLEAN_BATCH_SIZE=1000
|
||||||
|
SANDBOX_EXPIRED_RECORDS_RETENTION_DAYS=30
|
||||||
|
|||||||
@@ -663,6 +663,9 @@ x-shared-env: &shared-api-worker-env
|
|||||||
ANNOTATION_IMPORT_RATE_LIMIT_PER_HOUR: ${ANNOTATION_IMPORT_RATE_LIMIT_PER_HOUR:-20}
|
ANNOTATION_IMPORT_RATE_LIMIT_PER_HOUR: ${ANNOTATION_IMPORT_RATE_LIMIT_PER_HOUR:-20}
|
||||||
ANNOTATION_IMPORT_MAX_CONCURRENT: ${ANNOTATION_IMPORT_MAX_CONCURRENT:-5}
|
ANNOTATION_IMPORT_MAX_CONCURRENT: ${ANNOTATION_IMPORT_MAX_CONCURRENT:-5}
|
||||||
AMPLITUDE_API_KEY: ${AMPLITUDE_API_KEY:-}
|
AMPLITUDE_API_KEY: ${AMPLITUDE_API_KEY:-}
|
||||||
|
SANDBOX_EXPIRED_RECORDS_CLEAN_GRACEFUL_PERIOD: ${SANDBOX_EXPIRED_RECORDS_CLEAN_GRACEFUL_PERIOD:-21}
|
||||||
|
SANDBOX_EXPIRED_RECORDS_CLEAN_BATCH_SIZE: ${SANDBOX_EXPIRED_RECORDS_CLEAN_BATCH_SIZE:-1000}
|
||||||
|
SANDBOX_EXPIRED_RECORDS_RETENTION_DAYS: ${SANDBOX_EXPIRED_RECORDS_RETENTION_DAYS:-30}
|
||||||
|
|
||||||
services:
|
services:
|
||||||
# Init container to fix permissions
|
# Init container to fix permissions
|
||||||
|
|||||||
Reference in New Issue
Block a user