mirror of
https://github.com/langgenius/dify.git
synced 2026-02-25 18:55:08 +00:00
Merge remote-tracking branch 'origin/main' into feat/trigger
This commit is contained in:
@@ -1,6 +1,6 @@
|
||||
import ssl
|
||||
from datetime import timedelta
|
||||
from typing import Any, Optional
|
||||
from typing import Any
|
||||
|
||||
import pytz
|
||||
from celery import Celery, Task
|
||||
@@ -10,7 +10,7 @@ from configs import dify_config
|
||||
from dify_app import DifyApp
|
||||
|
||||
|
||||
def _get_celery_ssl_options() -> Optional[dict[str, Any]]:
|
||||
def _get_celery_ssl_options() -> dict[str, Any] | None:
|
||||
"""Get SSL configuration for Celery broker/backend connections."""
|
||||
# Use REDIS_USE_SSL for consistency with the main Redis client
|
||||
# Only apply SSL if we're using Redis as broker/backend
|
||||
@@ -143,9 +143,7 @@ def init_app(app: DifyApp) -> Celery:
|
||||
imports.append("schedule.queue_monitor_task")
|
||||
beat_schedule["datasets-queue-monitor"] = {
|
||||
"task": "schedule.queue_monitor_task.queue_monitor_task",
|
||||
"schedule": timedelta(
|
||||
minutes=dify_config.QUEUE_MONITOR_INTERVAL if dify_config.QUEUE_MONITOR_INTERVAL else 30
|
||||
),
|
||||
"schedule": timedelta(minutes=dify_config.QUEUE_MONITOR_INTERVAL or 30),
|
||||
}
|
||||
if dify_config.ENABLE_CHECK_UPGRADABLE_PLUGIN_TASK and dify_config.MARKETPLACE_ENABLED:
|
||||
imports.append("schedule.check_upgradable_plugin_task")
|
||||
|
||||
@@ -13,14 +13,18 @@ def init_app(app: DifyApp):
|
||||
extract_unique_plugins,
|
||||
fix_app_site_missing,
|
||||
install_plugins,
|
||||
install_rag_pipeline_plugins,
|
||||
migrate_data_for_plugin,
|
||||
migrate_oss,
|
||||
old_metadata_migration,
|
||||
remove_orphaned_files_on_storage,
|
||||
reset_email,
|
||||
reset_encrypt_key_pair,
|
||||
reset_password,
|
||||
setup_datasource_oauth_client,
|
||||
setup_system_tool_oauth_client,
|
||||
setup_system_trigger_oauth_client,
|
||||
transform_datasource_credentials,
|
||||
upgrade_db,
|
||||
vdb_migrate,
|
||||
)
|
||||
@@ -46,6 +50,10 @@ def init_app(app: DifyApp):
|
||||
setup_system_tool_oauth_client,
|
||||
setup_system_trigger_oauth_client,
|
||||
cleanup_orphaned_draft_variables,
|
||||
migrate_oss,
|
||||
setup_datasource_oauth_client,
|
||||
transform_datasource_credentials,
|
||||
install_rag_pipeline_plugins,
|
||||
]
|
||||
for cmd in cmds_to_register:
|
||||
app.cli.add_command(cmd)
|
||||
|
||||
@@ -5,7 +5,7 @@ from sqlalchemy import event
|
||||
from sqlalchemy.pool import Pool
|
||||
|
||||
from dify_app import DifyApp
|
||||
from models import db
|
||||
from models.engine import db
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -13,7 +13,7 @@ logger = logging.getLogger(__name__)
|
||||
_GEVENT_COMPATIBILITY_SETUP: bool = False
|
||||
|
||||
|
||||
def _safe_rollback(connection) -> None:
|
||||
def _safe_rollback(connection):
|
||||
"""Safely rollback database connection.
|
||||
|
||||
Args:
|
||||
@@ -25,7 +25,7 @@ def _safe_rollback(connection) -> None:
|
||||
logger.exception("Failed to rollback connection")
|
||||
|
||||
|
||||
def _setup_gevent_compatibility() -> None:
|
||||
def _setup_gevent_compatibility():
|
||||
global _GEVENT_COMPATIBILITY_SETUP # pylint: disable=global-statement
|
||||
|
||||
# Avoid duplicate registration
|
||||
@@ -33,7 +33,7 @@ def _setup_gevent_compatibility() -> None:
|
||||
return
|
||||
|
||||
@event.listens_for(Pool, "reset")
|
||||
def _safe_reset(dbapi_connection, connection_record, reset_state) -> None: # pylint: disable=unused-argument
|
||||
def _safe_reset(dbapi_connection, connection_record, reset_state): # pylint: disable=unused-argument
|
||||
if reset_state.terminate_only:
|
||||
return
|
||||
|
||||
|
||||
@@ -86,9 +86,7 @@ def load_user_from_request(request_from_flask_login):
|
||||
if not app_mcp_server:
|
||||
raise NotFound("App MCP server not found.")
|
||||
end_user = (
|
||||
db.session.query(EndUser)
|
||||
.where(EndUser.external_user_id == app_mcp_server.id, EndUser.type == "mcp")
|
||||
.first()
|
||||
db.session.query(EndUser).where(EndUser.session_id == app_mcp_server.id, EndUser.type == "mcp").first()
|
||||
)
|
||||
if not end_user:
|
||||
raise NotFound("End user not found.")
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
import logging
|
||||
from typing import Optional
|
||||
|
||||
from flask import Flask
|
||||
|
||||
@@ -68,7 +67,7 @@ class Mail:
|
||||
case _:
|
||||
raise ValueError(f"Unsupported mail type {mail_type}")
|
||||
|
||||
def send(self, to: str, subject: str, html: str, from_: Optional[str] = None):
|
||||
def send(self, to: str, subject: str, html: str, from_: str | None = None):
|
||||
if not self._client:
|
||||
raise ValueError("Mail client is not initialized")
|
||||
|
||||
|
||||
@@ -3,6 +3,6 @@ from flask_orjson import OrjsonProvider
|
||||
from dify_app import DifyApp
|
||||
|
||||
|
||||
def init_app(app: DifyApp) -> None:
|
||||
def init_app(app: DifyApp):
|
||||
"""Initialize Flask-Orjson extension for faster JSON serialization"""
|
||||
app.json = OrjsonProvider(app)
|
||||
|
||||
@@ -103,7 +103,7 @@ def init_app(app: DifyApp):
|
||||
def shutdown_tracer():
|
||||
provider = trace.get_tracer_provider()
|
||||
if hasattr(provider, "force_flush"):
|
||||
provider.force_flush()
|
||||
provider.force_flush() # ty: ignore [call-non-callable]
|
||||
|
||||
class ExceptionLoggingHandler(logging.Handler):
|
||||
"""Custom logging handler that creates spans for logging.exception() calls"""
|
||||
|
||||
@@ -3,7 +3,7 @@ import logging
|
||||
import ssl
|
||||
from collections.abc import Callable
|
||||
from datetime import timedelta
|
||||
from typing import TYPE_CHECKING, Any, Optional, Union
|
||||
from typing import TYPE_CHECKING, Any, Union
|
||||
|
||||
import redis
|
||||
from redis import RedisError
|
||||
@@ -246,7 +246,7 @@ def init_app(app: DifyApp):
|
||||
app.extensions["redis"] = redis_client
|
||||
|
||||
|
||||
def redis_fallback(default_return: Optional[Any] = None):
|
||||
def redis_fallback(default_return: Any | None = None):
|
||||
"""
|
||||
decorator to handle Redis operation exceptions and return a default value when Redis is unavailable.
|
||||
|
||||
@@ -260,7 +260,8 @@ def redis_fallback(default_return: Optional[Any] = None):
|
||||
try:
|
||||
return func(*args, **kwargs)
|
||||
except RedisError as e:
|
||||
logger.warning("Redis operation failed in %s: %s", func.__name__, str(e), exc_info=True)
|
||||
func_name = getattr(func, "__name__", "Unknown")
|
||||
logger.warning("Redis operation failed in %s: %s", func_name, str(e), exc_info=True)
|
||||
return default_return
|
||||
|
||||
return wrapper
|
||||
|
||||
@@ -15,7 +15,7 @@ def init_app(app: DifyApp):
|
||||
|
||||
def before_send(event, hint):
|
||||
if "exc_info" in hint:
|
||||
exc_type, exc_value, tb = hint["exc_info"]
|
||||
_, exc_value, _ = hint["exc_info"]
|
||||
if parse_error.defaultErrorResponse in str(exc_value):
|
||||
return None
|
||||
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
from collections.abc import Generator
|
||||
from datetime import timedelta
|
||||
from typing import Optional
|
||||
|
||||
from azure.identity import ChainedTokenCredential, DefaultAzureCredential
|
||||
from azure.storage.blob import AccountSasPermissions, BlobServiceClient, ResourceTypes, generate_account_sas
|
||||
@@ -21,7 +20,7 @@ class AzureBlobStorage(BaseStorage):
|
||||
self.account_name = dify_config.AZURE_BLOB_ACCOUNT_NAME
|
||||
self.account_key = dify_config.AZURE_BLOB_ACCOUNT_KEY
|
||||
|
||||
self.credential: Optional[ChainedTokenCredential] = None
|
||||
self.credential: ChainedTokenCredential | None = None
|
||||
if self.account_key == "managedidentity":
|
||||
self.credential = DefaultAzureCredential()
|
||||
else:
|
||||
|
||||
@@ -10,7 +10,6 @@ import tempfile
|
||||
from collections.abc import Generator
|
||||
from io import BytesIO
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
import clickzetta # type: ignore[import]
|
||||
from pydantic import BaseModel, model_validator
|
||||
@@ -33,14 +32,14 @@ class ClickZettaVolumeConfig(BaseModel):
|
||||
vcluster: str = "default_ap"
|
||||
schema_name: str = "dify"
|
||||
volume_type: str = "table" # table|user|external
|
||||
volume_name: Optional[str] = None # For external volumes
|
||||
volume_name: str | None = None # For external volumes
|
||||
table_prefix: str = "dataset_" # Prefix for table volume names
|
||||
dify_prefix: str = "dify_km" # Directory prefix for User Volume
|
||||
permission_check: bool = True # Enable/disable permission checking
|
||||
|
||||
@model_validator(mode="before")
|
||||
@classmethod
|
||||
def validate_config(cls, values: dict) -> dict:
|
||||
def validate_config(cls, values: dict):
|
||||
"""Validate the configuration values.
|
||||
|
||||
This method will first try to use CLICKZETTA_VOLUME_* environment variables,
|
||||
@@ -87,7 +86,7 @@ class ClickZettaVolumeConfig(BaseModel):
|
||||
values.setdefault("volume_name", os.getenv("CLICKZETTA_VOLUME_NAME"))
|
||||
values.setdefault("table_prefix", os.getenv("CLICKZETTA_VOLUME_TABLE_PREFIX", "dataset_"))
|
||||
values.setdefault("dify_prefix", os.getenv("CLICKZETTA_VOLUME_DIFY_PREFIX", "dify_km"))
|
||||
# 暂时禁用权限检查功能,直接设置为false
|
||||
# Temporarily disable permission check feature, set directly to false
|
||||
values.setdefault("permission_check", False)
|
||||
|
||||
# Validate required fields
|
||||
@@ -139,7 +138,7 @@ class ClickZettaVolumeStorage(BaseStorage):
|
||||
schema=self._config.schema_name,
|
||||
)
|
||||
logger.debug("ClickZetta connection established")
|
||||
except Exception as e:
|
||||
except Exception:
|
||||
logger.exception("Failed to connect to ClickZetta")
|
||||
raise
|
||||
|
||||
@@ -150,11 +149,11 @@ class ClickZettaVolumeStorage(BaseStorage):
|
||||
self._connection, self._config.volume_type, self._config.volume_name
|
||||
)
|
||||
logger.debug("Permission manager initialized")
|
||||
except Exception as e:
|
||||
except Exception:
|
||||
logger.exception("Failed to initialize permission manager")
|
||||
raise
|
||||
|
||||
def _get_volume_path(self, filename: str, dataset_id: Optional[str] = None) -> str:
|
||||
def _get_volume_path(self, filename: str, dataset_id: str | None = None) -> str:
|
||||
"""Get the appropriate volume path based on volume type."""
|
||||
if self._config.volume_type == "user":
|
||||
# Add dify prefix for User Volume to organize files
|
||||
@@ -179,7 +178,7 @@ class ClickZettaVolumeStorage(BaseStorage):
|
||||
else:
|
||||
raise ValueError(f"Unsupported volume type: {self._config.volume_type}")
|
||||
|
||||
def _get_volume_sql_prefix(self, dataset_id: Optional[str] = None) -> str:
|
||||
def _get_volume_sql_prefix(self, dataset_id: str | None = None) -> str:
|
||||
"""Get SQL prefix for volume operations."""
|
||||
if self._config.volume_type == "user":
|
||||
return "USER VOLUME"
|
||||
@@ -213,11 +212,11 @@ class ClickZettaVolumeStorage(BaseStorage):
|
||||
if fetch:
|
||||
return cursor.fetchall()
|
||||
return None
|
||||
except Exception as e:
|
||||
except Exception:
|
||||
logger.exception("SQL execution failed: %s", sql)
|
||||
raise
|
||||
|
||||
def _ensure_table_volume_exists(self, dataset_id: str) -> None:
|
||||
def _ensure_table_volume_exists(self, dataset_id: str):
|
||||
"""Ensure table volume exists for the given dataset_id."""
|
||||
if self._config.volume_type != "table" or not dataset_id:
|
||||
return
|
||||
@@ -252,7 +251,7 @@ class ClickZettaVolumeStorage(BaseStorage):
|
||||
# Don't raise exception, let the operation continue
|
||||
# The table might exist but not be visible due to permissions
|
||||
|
||||
def save(self, filename: str, data: bytes) -> None:
|
||||
def save(self, filename: str, data: bytes):
|
||||
"""Save data to ClickZetta Volume.
|
||||
|
||||
Args:
|
||||
@@ -349,7 +348,7 @@ class ClickZettaVolumeStorage(BaseStorage):
|
||||
|
||||
# Find the downloaded file (may be in subdirectories)
|
||||
downloaded_file = None
|
||||
for root, dirs, files in os.walk(temp_dir):
|
||||
for root, _, files in os.walk(temp_dir):
|
||||
for file in files:
|
||||
if file == filename or file == os.path.basename(filename):
|
||||
downloaded_file = Path(root) / file
|
||||
@@ -524,6 +523,6 @@ class ClickZettaVolumeStorage(BaseStorage):
|
||||
logger.debug("Scanned %d items in path %s", len(result), path)
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
except Exception:
|
||||
logger.exception("Error scanning path %s", path)
|
||||
return []
|
||||
|
||||
@@ -1,31 +1,33 @@
|
||||
"""ClickZetta Volume文件生命周期管理
|
||||
"""ClickZetta Volume file lifecycle management
|
||||
|
||||
该模块提供文件版本控制、自动清理、备份和恢复等生命周期管理功能。
|
||||
支持知识库文件的完整生命周期管理。
|
||||
This module provides file lifecycle management features including version control,
|
||||
automatic cleanup, backup and restore.
|
||||
Supports complete lifecycle management for knowledge base files.
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import operator
|
||||
from dataclasses import asdict, dataclass
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
from typing import Any, Optional
|
||||
from enum import StrEnum, auto
|
||||
from typing import Any
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class FileStatus(Enum):
|
||||
"""文件状态枚举"""
|
||||
class FileStatus(StrEnum):
|
||||
"""File status enumeration"""
|
||||
|
||||
ACTIVE = "active" # 活跃状态
|
||||
ARCHIVED = "archived" # 已归档
|
||||
DELETED = "deleted" # 已删除(软删除)
|
||||
BACKUP = "backup" # 备份文件
|
||||
ACTIVE = auto() # Active status
|
||||
ARCHIVED = auto() # Archived
|
||||
DELETED = auto() # Deleted (soft delete)
|
||||
BACKUP = auto() # Backup file
|
||||
|
||||
|
||||
@dataclass
|
||||
class FileMetadata:
|
||||
"""文件元数据"""
|
||||
"""File metadata"""
|
||||
|
||||
filename: str
|
||||
size: int | None
|
||||
@@ -33,12 +35,12 @@ class FileMetadata:
|
||||
modified_at: datetime
|
||||
version: int | None
|
||||
status: FileStatus
|
||||
checksum: Optional[str] = None
|
||||
tags: Optional[dict[str, str]] = None
|
||||
parent_version: Optional[int] = None
|
||||
checksum: str | None = None
|
||||
tags: dict[str, str] | None = None
|
||||
parent_version: int | None = None
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
"""转换为字典格式"""
|
||||
def to_dict(self):
|
||||
"""Convert to dictionary format"""
|
||||
data = asdict(self)
|
||||
data["created_at"] = self.created_at.isoformat()
|
||||
data["modified_at"] = self.modified_at.isoformat()
|
||||
@@ -47,7 +49,7 @@ class FileMetadata:
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: dict) -> "FileMetadata":
|
||||
"""从字典创建实例"""
|
||||
"""Create instance from dictionary"""
|
||||
data = data.copy()
|
||||
data["created_at"] = datetime.fromisoformat(data["created_at"])
|
||||
data["modified_at"] = datetime.fromisoformat(data["modified_at"])
|
||||
@@ -56,14 +58,14 @@ class FileMetadata:
|
||||
|
||||
|
||||
class FileLifecycleManager:
|
||||
"""文件生命周期管理器"""
|
||||
"""File lifecycle manager"""
|
||||
|
||||
def __init__(self, storage, dataset_id: Optional[str] = None):
|
||||
"""初始化生命周期管理器
|
||||
def __init__(self, storage, dataset_id: str | None = None):
|
||||
"""Initialize lifecycle manager
|
||||
|
||||
Args:
|
||||
storage: ClickZetta Volume存储实例
|
||||
dataset_id: 数据集ID(用于Table Volume)
|
||||
storage: ClickZetta Volume storage instance
|
||||
dataset_id: Dataset ID (for Table Volume)
|
||||
"""
|
||||
self._storage = storage
|
||||
self._dataset_id = dataset_id
|
||||
@@ -72,21 +74,21 @@ class FileLifecycleManager:
|
||||
self._backup_prefix = ".backups/"
|
||||
self._deleted_prefix = ".deleted/"
|
||||
|
||||
# 获取权限管理器(如果存在)
|
||||
self._permission_manager: Optional[Any] = getattr(storage, "_permission_manager", None)
|
||||
# Get permission manager (if exists)
|
||||
self._permission_manager: Any | None = getattr(storage, "_permission_manager", None)
|
||||
|
||||
def save_with_lifecycle(self, filename: str, data: bytes, tags: Optional[dict[str, str]] = None) -> FileMetadata:
|
||||
"""保存文件并管理生命周期
|
||||
def save_with_lifecycle(self, filename: str, data: bytes, tags: dict[str, str] | None = None) -> FileMetadata:
|
||||
"""Save file and manage lifecycle
|
||||
|
||||
Args:
|
||||
filename: 文件名
|
||||
data: 文件内容
|
||||
tags: 文件标签
|
||||
filename: File name
|
||||
data: File content
|
||||
tags: File tags
|
||||
|
||||
Returns:
|
||||
文件元数据
|
||||
File metadata
|
||||
"""
|
||||
# 权限检查
|
||||
# Permission check
|
||||
if not self._check_permission(filename, "save"):
|
||||
from .volume_permissions import VolumePermissionError
|
||||
|
||||
@@ -98,28 +100,28 @@ class FileLifecycleManager:
|
||||
)
|
||||
|
||||
try:
|
||||
# 1. 检查是否存在旧版本
|
||||
# 1. Check if old version exists
|
||||
metadata_dict = self._load_metadata()
|
||||
current_metadata = metadata_dict.get(filename)
|
||||
|
||||
# 2. 如果存在旧版本,创建版本备份
|
||||
# 2. If old version exists, create version backup
|
||||
if current_metadata:
|
||||
self._create_version_backup(filename, current_metadata)
|
||||
|
||||
# 3. 计算文件信息
|
||||
# 3. Calculate file information
|
||||
now = datetime.now()
|
||||
checksum = self._calculate_checksum(data)
|
||||
new_version = (current_metadata["version"] + 1) if current_metadata else 1
|
||||
|
||||
# 4. 保存新文件
|
||||
# 4. Save new file
|
||||
self._storage.save(filename, data)
|
||||
|
||||
# 5. 创建元数据
|
||||
# 5. Create metadata
|
||||
created_at = now
|
||||
parent_version = None
|
||||
|
||||
if current_metadata:
|
||||
# 如果created_at是字符串,转换为datetime
|
||||
# If created_at is string, convert to datetime
|
||||
if isinstance(current_metadata["created_at"], str):
|
||||
created_at = datetime.fromisoformat(current_metadata["created_at"])
|
||||
else:
|
||||
@@ -138,125 +140,125 @@ class FileLifecycleManager:
|
||||
parent_version=parent_version,
|
||||
)
|
||||
|
||||
# 6. 更新元数据
|
||||
# 6. Update metadata
|
||||
metadata_dict[filename] = file_metadata.to_dict()
|
||||
self._save_metadata(metadata_dict)
|
||||
|
||||
logger.info("File %s saved with lifecycle management, version %s", filename, new_version)
|
||||
return file_metadata
|
||||
|
||||
except Exception as e:
|
||||
except Exception:
|
||||
logger.exception("Failed to save file with lifecycle")
|
||||
raise
|
||||
|
||||
def get_file_metadata(self, filename: str) -> Optional[FileMetadata]:
|
||||
"""获取文件元数据
|
||||
def get_file_metadata(self, filename: str) -> FileMetadata | None:
|
||||
"""Get file metadata
|
||||
|
||||
Args:
|
||||
filename: 文件名
|
||||
filename: File name
|
||||
|
||||
Returns:
|
||||
文件元数据,如果不存在返回None
|
||||
File metadata, returns None if not exists
|
||||
"""
|
||||
try:
|
||||
metadata_dict = self._load_metadata()
|
||||
if filename in metadata_dict:
|
||||
return FileMetadata.from_dict(metadata_dict[filename])
|
||||
return None
|
||||
except Exception as e:
|
||||
except Exception:
|
||||
logger.exception("Failed to get file metadata for %s", filename)
|
||||
return None
|
||||
|
||||
def list_file_versions(self, filename: str) -> list[FileMetadata]:
|
||||
"""列出文件的所有版本
|
||||
"""List all versions of a file
|
||||
|
||||
Args:
|
||||
filename: 文件名
|
||||
filename: File name
|
||||
|
||||
Returns:
|
||||
文件版本列表,按版本号排序
|
||||
File version list, sorted by version number
|
||||
"""
|
||||
try:
|
||||
versions = []
|
||||
|
||||
# 获取当前版本
|
||||
# Get current version
|
||||
current_metadata = self.get_file_metadata(filename)
|
||||
if current_metadata:
|
||||
versions.append(current_metadata)
|
||||
|
||||
# 获取历史版本
|
||||
# Get historical versions
|
||||
try:
|
||||
version_files = self._storage.scan(self._dataset_id or "", files=True)
|
||||
for file_path in version_files:
|
||||
if file_path.startswith(f"{self._version_prefix}{filename}.v"):
|
||||
# 解析版本号
|
||||
# Parse version number
|
||||
version_str = file_path.split(".v")[-1].split(".")[0]
|
||||
try:
|
||||
version_num = int(version_str)
|
||||
# 这里简化处理,实际应该从版本文件中读取元数据
|
||||
# 暂时创建基本的元数据信息
|
||||
_ = int(version_str)
|
||||
# Simplified processing here, should actually read metadata from version file
|
||||
# Temporarily create basic metadata information
|
||||
except ValueError:
|
||||
continue
|
||||
except:
|
||||
# 如果无法扫描版本文件,只返回当前版本
|
||||
# If cannot scan version files, only return current version
|
||||
pass
|
||||
|
||||
return sorted(versions, key=lambda x: x.version or 0, reverse=True)
|
||||
|
||||
except Exception as e:
|
||||
except Exception:
|
||||
logger.exception("Failed to list file versions for %s", filename)
|
||||
return []
|
||||
|
||||
def restore_version(self, filename: str, version: int) -> bool:
|
||||
"""恢复文件到指定版本
|
||||
"""Restore file to specified version
|
||||
|
||||
Args:
|
||||
filename: 文件名
|
||||
version: 要恢复的版本号
|
||||
filename: File name
|
||||
version: Version number to restore
|
||||
|
||||
Returns:
|
||||
恢复是否成功
|
||||
Whether restore succeeded
|
||||
"""
|
||||
try:
|
||||
version_filename = f"{self._version_prefix}{filename}.v{version}"
|
||||
|
||||
# 检查版本文件是否存在
|
||||
# Check if version file exists
|
||||
if not self._storage.exists(version_filename):
|
||||
logger.warning("Version %s of %s not found", version, filename)
|
||||
return False
|
||||
|
||||
# 读取版本文件内容
|
||||
# Read version file content
|
||||
version_data = self._storage.load_once(version_filename)
|
||||
|
||||
# 保存当前版本为备份
|
||||
# Save current version as backup
|
||||
current_metadata = self.get_file_metadata(filename)
|
||||
if current_metadata:
|
||||
self._create_version_backup(filename, current_metadata.to_dict())
|
||||
|
||||
# 恢复文件
|
||||
# Restore file
|
||||
self.save_with_lifecycle(filename, version_data, {"restored_from": str(version)})
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
except Exception:
|
||||
logger.exception("Failed to restore %s to version %s", filename, version)
|
||||
return False
|
||||
|
||||
def archive_file(self, filename: str) -> bool:
|
||||
"""归档文件
|
||||
"""Archive file
|
||||
|
||||
Args:
|
||||
filename: 文件名
|
||||
filename: File name
|
||||
|
||||
Returns:
|
||||
归档是否成功
|
||||
Whether archive succeeded
|
||||
"""
|
||||
# 权限检查
|
||||
# Permission check
|
||||
if not self._check_permission(filename, "archive"):
|
||||
logger.warning("Permission denied for archive operation on file: %s", filename)
|
||||
return False
|
||||
|
||||
try:
|
||||
# 更新文件状态为归档
|
||||
# Update file status to archived
|
||||
metadata_dict = self._load_metadata()
|
||||
if filename not in metadata_dict:
|
||||
logger.warning("File %s not found in metadata", filename)
|
||||
@@ -270,41 +272,41 @@ class FileLifecycleManager:
|
||||
logger.info("File %s archived successfully", filename)
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
except Exception:
|
||||
logger.exception("Failed to archive file %s", filename)
|
||||
return False
|
||||
|
||||
def soft_delete_file(self, filename: str) -> bool:
|
||||
"""软删除文件(移动到删除目录)
|
||||
"""Soft delete file (move to deleted directory)
|
||||
|
||||
Args:
|
||||
filename: 文件名
|
||||
filename: File name
|
||||
|
||||
Returns:
|
||||
删除是否成功
|
||||
Whether delete succeeded
|
||||
"""
|
||||
# 权限检查
|
||||
# Permission check
|
||||
if not self._check_permission(filename, "delete"):
|
||||
logger.warning("Permission denied for soft delete operation on file: %s", filename)
|
||||
return False
|
||||
|
||||
try:
|
||||
# 检查文件是否存在
|
||||
# Check if file exists
|
||||
if not self._storage.exists(filename):
|
||||
logger.warning("File %s not found", filename)
|
||||
return False
|
||||
|
||||
# 读取文件内容
|
||||
# Read file content
|
||||
file_data = self._storage.load_once(filename)
|
||||
|
||||
# 移动到删除目录
|
||||
# Move to deleted directory
|
||||
deleted_filename = f"{self._deleted_prefix}{filename}.{datetime.now().strftime('%Y%m%d_%H%M%S')}"
|
||||
self._storage.save(deleted_filename, file_data)
|
||||
|
||||
# 删除原文件
|
||||
# Delete original file
|
||||
self._storage.delete(filename)
|
||||
|
||||
# 更新元数据
|
||||
# Update metadata
|
||||
metadata_dict = self._load_metadata()
|
||||
if filename in metadata_dict:
|
||||
metadata_dict[filename]["status"] = FileStatus.DELETED.value
|
||||
@@ -314,32 +316,32 @@ class FileLifecycleManager:
|
||||
logger.info("File %s soft deleted successfully", filename)
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
except Exception:
|
||||
logger.exception("Failed to soft delete file %s", filename)
|
||||
return False
|
||||
|
||||
def cleanup_old_versions(self, max_versions: int = 5, max_age_days: int = 30) -> int:
|
||||
"""清理旧版本文件
|
||||
"""Cleanup old version files
|
||||
|
||||
Args:
|
||||
max_versions: 保留的最大版本数
|
||||
max_age_days: 版本文件的最大保留天数
|
||||
max_versions: Maximum number of versions to keep
|
||||
max_age_days: Maximum retention days for version files
|
||||
|
||||
Returns:
|
||||
清理的文件数量
|
||||
Number of files cleaned
|
||||
"""
|
||||
try:
|
||||
cleaned_count = 0
|
||||
|
||||
# 获取所有版本文件
|
||||
# Get all version files
|
||||
try:
|
||||
all_files = self._storage.scan(self._dataset_id or "", files=True)
|
||||
version_files = [f for f in all_files if f.startswith(self._version_prefix)]
|
||||
|
||||
# 按文件分组
|
||||
# Group by file
|
||||
file_versions: dict[str, list[tuple[int, str]]] = {}
|
||||
for version_file in version_files:
|
||||
# 解析文件名和版本
|
||||
# Parse filename and version
|
||||
parts = version_file[len(self._version_prefix) :].split(".v")
|
||||
if len(parts) >= 2:
|
||||
base_filename = parts[0]
|
||||
@@ -352,12 +354,12 @@ class FileLifecycleManager:
|
||||
except ValueError:
|
||||
continue
|
||||
|
||||
# 清理每个文件的旧版本
|
||||
# Cleanup old versions for each file
|
||||
for base_filename, versions in file_versions.items():
|
||||
# 按版本号排序
|
||||
versions.sort(key=lambda x: x[0], reverse=True)
|
||||
# Sort by version number
|
||||
versions.sort(key=operator.itemgetter(0), reverse=True)
|
||||
|
||||
# 保留最新的max_versions个版本,删除其余的
|
||||
# Keep the newest max_versions versions, delete the rest
|
||||
if len(versions) > max_versions:
|
||||
to_delete = versions[max_versions:]
|
||||
for version_num, version_file in to_delete:
|
||||
@@ -372,15 +374,15 @@ class FileLifecycleManager:
|
||||
|
||||
return cleaned_count
|
||||
|
||||
except Exception as e:
|
||||
except Exception:
|
||||
logger.exception("Failed to cleanup old versions")
|
||||
return 0
|
||||
|
||||
def get_storage_statistics(self) -> dict[str, Any]:
|
||||
"""获取存储统计信息
|
||||
"""Get storage statistics
|
||||
|
||||
Returns:
|
||||
存储统计字典
|
||||
Storage statistics dictionary
|
||||
"""
|
||||
try:
|
||||
metadata_dict = self._load_metadata()
|
||||
@@ -402,7 +404,7 @@ class FileLifecycleManager:
|
||||
for filename, metadata in metadata_dict.items():
|
||||
file_meta = FileMetadata.from_dict(metadata)
|
||||
|
||||
# 统计文件状态
|
||||
# Count file status
|
||||
if file_meta.status == FileStatus.ACTIVE:
|
||||
stats["active_files"] = (stats["active_files"] or 0) + 1
|
||||
elif file_meta.status == FileStatus.ARCHIVED:
|
||||
@@ -410,13 +412,13 @@ class FileLifecycleManager:
|
||||
elif file_meta.status == FileStatus.DELETED:
|
||||
stats["deleted_files"] = (stats["deleted_files"] or 0) + 1
|
||||
|
||||
# 统计大小
|
||||
# Count size
|
||||
stats["total_size"] = (stats["total_size"] or 0) + (file_meta.size or 0)
|
||||
|
||||
# 统计版本
|
||||
# Count versions
|
||||
stats["versions_count"] = (stats["versions_count"] or 0) + (file_meta.version or 0)
|
||||
|
||||
# 找出最新和最旧的文件
|
||||
# Find newest and oldest files
|
||||
if oldest_date is None or file_meta.created_at < oldest_date:
|
||||
oldest_date = file_meta.created_at
|
||||
stats["oldest_file"] = filename
|
||||
@@ -427,17 +429,17 @@ class FileLifecycleManager:
|
||||
|
||||
return stats
|
||||
|
||||
except Exception as e:
|
||||
except Exception:
|
||||
logger.exception("Failed to get storage statistics")
|
||||
return {}
|
||||
|
||||
def _create_version_backup(self, filename: str, metadata: dict):
|
||||
"""创建版本备份"""
|
||||
"""Create version backup"""
|
||||
try:
|
||||
# 读取当前文件内容
|
||||
# Read current file content
|
||||
current_data = self._storage.load_once(filename)
|
||||
|
||||
# 保存为版本文件
|
||||
# Save as version file
|
||||
version_filename = f"{self._version_prefix}{filename}.v{metadata['version']}"
|
||||
self._storage.save(version_filename, current_data)
|
||||
|
||||
@@ -447,7 +449,7 @@ class FileLifecycleManager:
|
||||
logger.warning("Failed to create version backup for %s: %s", filename, e)
|
||||
|
||||
def _load_metadata(self) -> dict[str, Any]:
|
||||
"""加载元数据文件"""
|
||||
"""Load metadata file"""
|
||||
try:
|
||||
if self._storage.exists(self._metadata_file):
|
||||
metadata_content = self._storage.load_once(self._metadata_file)
|
||||
@@ -460,55 +462,55 @@ class FileLifecycleManager:
|
||||
return {}
|
||||
|
||||
def _save_metadata(self, metadata_dict: dict):
|
||||
"""保存元数据文件"""
|
||||
"""Save metadata file"""
|
||||
try:
|
||||
metadata_content = json.dumps(metadata_dict, indent=2, ensure_ascii=False)
|
||||
self._storage.save(self._metadata_file, metadata_content.encode("utf-8"))
|
||||
logger.debug("Metadata saved successfully")
|
||||
except Exception as e:
|
||||
except Exception:
|
||||
logger.exception("Failed to save metadata")
|
||||
raise
|
||||
|
||||
def _calculate_checksum(self, data: bytes) -> str:
|
||||
"""计算文件校验和"""
|
||||
"""Calculate file checksum"""
|
||||
import hashlib
|
||||
|
||||
return hashlib.md5(data).hexdigest()
|
||||
|
||||
def _check_permission(self, filename: str, operation: str) -> bool:
|
||||
"""检查文件操作权限
|
||||
"""Check file operation permission
|
||||
|
||||
Args:
|
||||
filename: 文件名
|
||||
operation: 操作类型
|
||||
filename: File name
|
||||
operation: Operation type
|
||||
|
||||
Returns:
|
||||
True if permission granted, False otherwise
|
||||
"""
|
||||
# 如果没有权限管理器,默认允许
|
||||
# If no permission manager, allow by default
|
||||
if not self._permission_manager:
|
||||
return True
|
||||
|
||||
try:
|
||||
# 根据操作类型映射到权限
|
||||
# Map operation type to permission
|
||||
operation_mapping = {
|
||||
"save": "save",
|
||||
"load": "load_once",
|
||||
"delete": "delete",
|
||||
"archive": "delete", # 归档需要删除权限
|
||||
"restore": "save", # 恢复需要写权限
|
||||
"cleanup": "delete", # 清理需要删除权限
|
||||
"archive": "delete", # Archive requires delete permission
|
||||
"restore": "save", # Restore requires write permission
|
||||
"cleanup": "delete", # Cleanup requires delete permission
|
||||
"read": "load_once",
|
||||
"write": "save",
|
||||
}
|
||||
|
||||
mapped_operation = operation_mapping.get(operation, operation)
|
||||
|
||||
# 检查权限
|
||||
# Check permission
|
||||
result = self._permission_manager.validate_operation(mapped_operation, self._dataset_id)
|
||||
return bool(result)
|
||||
|
||||
except Exception as e:
|
||||
except Exception:
|
||||
logger.exception("Permission check failed for %s operation %s", filename, operation)
|
||||
# 安全默认:权限检查失败时拒绝访问
|
||||
# Safe default: deny access when permission check fails
|
||||
return False
|
||||
|
||||
@@ -1,40 +1,39 @@
|
||||
"""ClickZetta Volume权限管理机制
|
||||
"""ClickZetta Volume permission management mechanism
|
||||
|
||||
该模块提供Volume权限检查、验证和管理功能。
|
||||
根据ClickZetta的权限模型,不同Volume类型有不同的权限要求。
|
||||
This module provides Volume permission checking, validation and management features.
|
||||
According to ClickZetta's permission model, different Volume types have different permission requirements.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from enum import Enum
|
||||
from typing import Optional
|
||||
from enum import StrEnum
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class VolumePermission(Enum):
|
||||
"""Volume权限类型枚举"""
|
||||
class VolumePermission(StrEnum):
|
||||
"""Volume permission type enumeration"""
|
||||
|
||||
READ = "SELECT" # 对应ClickZetta的SELECT权限
|
||||
WRITE = "INSERT,UPDATE,DELETE" # 对应ClickZetta的写权限
|
||||
LIST = "SELECT" # 列出文件需要SELECT权限
|
||||
DELETE = "INSERT,UPDATE,DELETE" # 删除文件需要写权限
|
||||
USAGE = "USAGE" # External Volume需要的基本权限
|
||||
READ = "SELECT" # Corresponds to ClickZetta's SELECT permission
|
||||
WRITE = "INSERT,UPDATE,DELETE" # Corresponds to ClickZetta's write permissions
|
||||
LIST = "SELECT" # Listing files requires SELECT permission
|
||||
DELETE = "INSERT,UPDATE,DELETE" # Deleting files requires write permissions
|
||||
USAGE = "USAGE" # Basic permission required for External Volume
|
||||
|
||||
|
||||
class VolumePermissionManager:
|
||||
"""Volume权限管理器"""
|
||||
"""Volume permission manager"""
|
||||
|
||||
def __init__(self, connection_or_config, volume_type: str | None = None, volume_name: Optional[str] = None):
|
||||
"""初始化权限管理器
|
||||
def __init__(self, connection_or_config, volume_type: str | None = None, volume_name: str | None = None):
|
||||
"""Initialize permission manager
|
||||
|
||||
Args:
|
||||
connection_or_config: ClickZetta连接对象或配置字典
|
||||
volume_type: Volume类型 (user|table|external)
|
||||
volume_name: Volume名称 (用于external volume)
|
||||
connection_or_config: ClickZetta connection object or configuration dictionary
|
||||
volume_type: Volume type (user|table|external)
|
||||
volume_name: Volume name (for external volume)
|
||||
"""
|
||||
# 支持两种初始化方式:连接对象或配置字典
|
||||
# Support two initialization methods: connection object or configuration dictionary
|
||||
if isinstance(connection_or_config, dict):
|
||||
# 从配置字典创建连接
|
||||
# Create connection from configuration dictionary
|
||||
import clickzetta # type: ignore[import-untyped]
|
||||
|
||||
config = connection_or_config
|
||||
@@ -50,7 +49,7 @@ class VolumePermissionManager:
|
||||
self._volume_type = config.get("volume_type", volume_type)
|
||||
self._volume_name = config.get("volume_name", volume_name)
|
||||
else:
|
||||
# 直接使用连接对象
|
||||
# Use connection object directly
|
||||
self._connection = connection_or_config
|
||||
self._volume_type = volume_type
|
||||
self._volume_name = volume_name
|
||||
@@ -61,14 +60,14 @@ class VolumePermissionManager:
|
||||
raise ValueError("volume_type is required")
|
||||
|
||||
self._permission_cache: dict[str, set[str]] = {}
|
||||
self._current_username = None # 将从连接中获取当前用户名
|
||||
self._current_username = None # Will get current username from connection
|
||||
|
||||
def check_permission(self, operation: VolumePermission, dataset_id: Optional[str] = None) -> bool:
|
||||
"""检查用户是否有执行特定操作的权限
|
||||
def check_permission(self, operation: VolumePermission, dataset_id: str | None = None) -> bool:
|
||||
"""Check if user has permission to perform specific operation
|
||||
|
||||
Args:
|
||||
operation: 要执行的操作类型
|
||||
dataset_id: 数据集ID (用于table volume)
|
||||
operation: Type of operation to perform
|
||||
dataset_id: Dataset ID (for table volume)
|
||||
|
||||
Returns:
|
||||
True if user has permission, False otherwise
|
||||
@@ -84,25 +83,25 @@ class VolumePermissionManager:
|
||||
logger.warning("Unknown volume type: %s", self._volume_type)
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
except Exception:
|
||||
logger.exception("Permission check failed")
|
||||
return False
|
||||
|
||||
def _check_user_volume_permission(self, operation: VolumePermission) -> bool:
|
||||
"""检查User Volume权限
|
||||
"""Check User Volume permission
|
||||
|
||||
User Volume权限规则:
|
||||
- 用户对自己的User Volume有全部权限
|
||||
- 只要用户能够连接到ClickZetta,就默认具有User Volume的基本权限
|
||||
- 更注重连接身份验证,而不是复杂的权限检查
|
||||
User Volume permission rules:
|
||||
- User has full permissions on their own User Volume
|
||||
- As long as user can connect to ClickZetta, they have basic User Volume permissions by default
|
||||
- Focus more on connection authentication rather than complex permission checking
|
||||
"""
|
||||
try:
|
||||
# 获取当前用户名
|
||||
# Get current username
|
||||
current_user = self._get_current_username()
|
||||
|
||||
# 检查基本连接状态
|
||||
# Check basic connection status
|
||||
with self._connection.cursor() as cursor:
|
||||
# 简单的连接测试,如果能执行查询说明用户有基本权限
|
||||
# Simple connection test, if query can be executed user has basic permissions
|
||||
cursor.execute("SELECT 1")
|
||||
result = cursor.fetchone()
|
||||
|
||||
@@ -119,19 +118,20 @@ class VolumePermissionManager:
|
||||
)
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
except Exception:
|
||||
logger.exception("User Volume permission check failed")
|
||||
# 对于User Volume,如果权限检查失败,可能是配置问题,给出更友好的错误提示
|
||||
# For User Volume, if permission check fails, it might be a configuration issue,
|
||||
# provide friendlier error message
|
||||
logger.info("User Volume permission check failed, but permission checking is disabled in this version")
|
||||
return False
|
||||
|
||||
def _check_table_volume_permission(self, operation: VolumePermission, dataset_id: Optional[str]) -> bool:
|
||||
"""检查Table Volume权限
|
||||
def _check_table_volume_permission(self, operation: VolumePermission, dataset_id: str | None) -> bool:
|
||||
"""Check Table Volume permission
|
||||
|
||||
Table Volume权限规则:
|
||||
- Table Volume权限继承对应表的权限
|
||||
- SELECT权限 -> 可以READ/LIST文件
|
||||
- INSERT,UPDATE,DELETE权限 -> 可以WRITE/DELETE文件
|
||||
Table Volume permission rules:
|
||||
- Table Volume permissions inherit from corresponding table permissions
|
||||
- SELECT permission -> can READ/LIST files
|
||||
- INSERT,UPDATE,DELETE permissions -> can WRITE/DELETE files
|
||||
"""
|
||||
if not dataset_id:
|
||||
logger.warning("dataset_id is required for table volume permission check")
|
||||
@@ -140,11 +140,11 @@ class VolumePermissionManager:
|
||||
table_name = f"dataset_{dataset_id}" if not dataset_id.startswith("dataset_") else dataset_id
|
||||
|
||||
try:
|
||||
# 检查表权限
|
||||
# Check table permissions
|
||||
permissions = self._get_table_permissions(table_name)
|
||||
required_permissions = set(operation.value.split(","))
|
||||
|
||||
# 检查是否有所需的所有权限
|
||||
# Check if has all required permissions
|
||||
has_permission = required_permissions.issubset(permissions)
|
||||
|
||||
logger.debug(
|
||||
@@ -158,27 +158,27 @@ class VolumePermissionManager:
|
||||
|
||||
return has_permission
|
||||
|
||||
except Exception as e:
|
||||
except Exception:
|
||||
logger.exception("Table volume permission check failed for %s", table_name)
|
||||
return False
|
||||
|
||||
def _check_external_volume_permission(self, operation: VolumePermission) -> bool:
|
||||
"""检查External Volume权限
|
||||
"""Check External Volume permission
|
||||
|
||||
External Volume权限规则:
|
||||
- 尝试获取对External Volume的权限
|
||||
- 如果权限检查失败,进行备选验证
|
||||
- 对于开发环境,提供更宽松的权限检查
|
||||
External Volume permission rules:
|
||||
- Try to get permissions for External Volume
|
||||
- If permission check fails, perform fallback verification
|
||||
- For development environment, provide more lenient permission checking
|
||||
"""
|
||||
if not self._volume_name:
|
||||
logger.warning("volume_name is required for external volume permission check")
|
||||
return False
|
||||
|
||||
try:
|
||||
# 检查External Volume权限
|
||||
# Check External Volume permissions
|
||||
permissions = self._get_external_volume_permissions(self._volume_name)
|
||||
|
||||
# External Volume权限映射:根据操作类型确定所需权限
|
||||
# External Volume permission mapping: determine required permissions based on operation type
|
||||
required_permissions = set()
|
||||
|
||||
if operation in [VolumePermission.READ, VolumePermission.LIST]:
|
||||
@@ -186,7 +186,7 @@ class VolumePermissionManager:
|
||||
elif operation in [VolumePermission.WRITE, VolumePermission.DELETE]:
|
||||
required_permissions.add("write")
|
||||
|
||||
# 检查是否有所需的所有权限
|
||||
# Check if has all required permissions
|
||||
has_permission = required_permissions.issubset(permissions)
|
||||
|
||||
logger.debug(
|
||||
@@ -198,11 +198,11 @@ class VolumePermissionManager:
|
||||
has_permission,
|
||||
)
|
||||
|
||||
# 如果权限检查失败,尝试备选验证
|
||||
# If permission check fails, try fallback verification
|
||||
if not has_permission:
|
||||
logger.info("Direct permission check failed for %s, trying fallback verification", self._volume_name)
|
||||
|
||||
# 备选验证:尝试列出Volume来验证基本访问权限
|
||||
# Fallback verification: try listing Volume to verify basic access permissions
|
||||
try:
|
||||
with self._connection.cursor() as cursor:
|
||||
cursor.execute("SHOW VOLUMES")
|
||||
@@ -216,19 +216,19 @@ class VolumePermissionManager:
|
||||
|
||||
return has_permission
|
||||
|
||||
except Exception as e:
|
||||
except Exception:
|
||||
logger.exception("External volume permission check failed for %s", self._volume_name)
|
||||
logger.info("External Volume permission check failed, but permission checking is disabled in this version")
|
||||
return False
|
||||
|
||||
def _get_table_permissions(self, table_name: str) -> set[str]:
|
||||
"""获取用户对指定表的权限
|
||||
"""Get user permissions for specified table
|
||||
|
||||
Args:
|
||||
table_name: 表名
|
||||
table_name: Table name
|
||||
|
||||
Returns:
|
||||
用户对该表的权限集合
|
||||
Set of user permissions for this table
|
||||
"""
|
||||
cache_key = f"table:{table_name}"
|
||||
|
||||
@@ -239,18 +239,18 @@ class VolumePermissionManager:
|
||||
|
||||
try:
|
||||
with self._connection.cursor() as cursor:
|
||||
# 使用正确的ClickZetta语法检查当前用户权限
|
||||
# Use correct ClickZetta syntax to check current user permissions
|
||||
cursor.execute("SHOW GRANTS")
|
||||
grants = cursor.fetchall()
|
||||
|
||||
# 解析权限结果,查找对该表的权限
|
||||
# Parse permission results, find permissions for this table
|
||||
for grant in grants:
|
||||
if len(grant) >= 3: # 典型格式: (privilege, object_type, object_name, ...)
|
||||
if len(grant) >= 3: # Typical format: (privilege, object_type, object_name, ...)
|
||||
privilege = grant[0].upper()
|
||||
object_type = grant[1].upper() if len(grant) > 1 else ""
|
||||
object_name = grant[2] if len(grant) > 2 else ""
|
||||
|
||||
# 检查是否是对该表的权限
|
||||
# Check if it's permission for this table
|
||||
if (
|
||||
object_type == "TABLE"
|
||||
and object_name == table_name
|
||||
@@ -263,7 +263,7 @@ class VolumePermissionManager:
|
||||
else:
|
||||
permissions.add(privilege)
|
||||
|
||||
# 如果没有找到明确的权限,尝试执行一个简单的查询来验证权限
|
||||
# If no explicit permissions found, try executing a simple query to verify permissions
|
||||
if not permissions:
|
||||
try:
|
||||
cursor.execute(f"SELECT COUNT(*) FROM {table_name} LIMIT 1")
|
||||
@@ -273,15 +273,15 @@ class VolumePermissionManager:
|
||||
|
||||
except Exception as e:
|
||||
logger.warning("Could not check table permissions for %s: %s", table_name, e)
|
||||
# 安全默认:权限检查失败时拒绝访问
|
||||
# Safe default: deny access when permission check fails
|
||||
pass
|
||||
|
||||
# 缓存权限信息
|
||||
# Cache permission information
|
||||
self._permission_cache[cache_key] = permissions
|
||||
return permissions
|
||||
|
||||
def _get_current_username(self) -> str:
|
||||
"""获取当前用户名"""
|
||||
"""Get current username"""
|
||||
if self._current_username:
|
||||
return self._current_username
|
||||
|
||||
@@ -292,13 +292,13 @@ class VolumePermissionManager:
|
||||
if result:
|
||||
self._current_username = result[0]
|
||||
return str(self._current_username)
|
||||
except Exception as e:
|
||||
except Exception:
|
||||
logger.exception("Failed to get current username")
|
||||
|
||||
return "unknown"
|
||||
|
||||
def _get_user_permissions(self, username: str) -> set[str]:
|
||||
"""获取用户的基本权限集合"""
|
||||
"""Get user's basic permission set"""
|
||||
cache_key = f"user_permissions:{username}"
|
||||
|
||||
if cache_key in self._permission_cache:
|
||||
@@ -308,17 +308,17 @@ class VolumePermissionManager:
|
||||
|
||||
try:
|
||||
with self._connection.cursor() as cursor:
|
||||
# 使用正确的ClickZetta语法检查当前用户权限
|
||||
# Use correct ClickZetta syntax to check current user permissions
|
||||
cursor.execute("SHOW GRANTS")
|
||||
grants = cursor.fetchall()
|
||||
|
||||
# 解析权限结果,查找用户的基本权限
|
||||
# Parse permission results, find user's basic permissions
|
||||
for grant in grants:
|
||||
if len(grant) >= 3: # 典型格式: (privilege, object_type, object_name, ...)
|
||||
if len(grant) >= 3: # Typical format: (privilege, object_type, object_name, ...)
|
||||
privilege = grant[0].upper()
|
||||
object_type = grant[1].upper() if len(grant) > 1 else ""
|
||||
_ = grant[1].upper() if len(grant) > 1 else ""
|
||||
|
||||
# 收集所有相关权限
|
||||
# Collect all relevant permissions
|
||||
if privilege in ["SELECT", "INSERT", "UPDATE", "DELETE", "ALL"]:
|
||||
if privilege == "ALL":
|
||||
permissions.update(["SELECT", "INSERT", "UPDATE", "DELETE"])
|
||||
@@ -327,21 +327,21 @@ class VolumePermissionManager:
|
||||
|
||||
except Exception as e:
|
||||
logger.warning("Could not check user permissions for %s: %s", username, e)
|
||||
# 安全默认:权限检查失败时拒绝访问
|
||||
# Safe default: deny access when permission check fails
|
||||
pass
|
||||
|
||||
# 缓存权限信息
|
||||
# Cache permission information
|
||||
self._permission_cache[cache_key] = permissions
|
||||
return permissions
|
||||
|
||||
def _get_external_volume_permissions(self, volume_name: str) -> set[str]:
|
||||
"""获取用户对指定External Volume的权限
|
||||
"""Get user permissions for specified External Volume
|
||||
|
||||
Args:
|
||||
volume_name: External Volume名称
|
||||
volume_name: External Volume name
|
||||
|
||||
Returns:
|
||||
用户对该Volume的权限集合
|
||||
Set of user permissions for this Volume
|
||||
"""
|
||||
cache_key = f"external_volume:{volume_name}"
|
||||
|
||||
@@ -352,15 +352,15 @@ class VolumePermissionManager:
|
||||
|
||||
try:
|
||||
with self._connection.cursor() as cursor:
|
||||
# 使用正确的ClickZetta语法检查Volume权限
|
||||
# Use correct ClickZetta syntax to check Volume permissions
|
||||
logger.info("Checking permissions for volume: %s", volume_name)
|
||||
cursor.execute(f"SHOW GRANTS ON VOLUME {volume_name}")
|
||||
grants = cursor.fetchall()
|
||||
|
||||
logger.info("Raw grants result for %s: %s", volume_name, grants)
|
||||
|
||||
# 解析权限结果
|
||||
# 格式: (granted_type, privilege, conditions, granted_on, object_name, granted_to,
|
||||
# Parse permission results
|
||||
# Format: (granted_type, privilege, conditions, granted_on, object_name, granted_to,
|
||||
# grantee_name, grantor_name, grant_option, granted_time)
|
||||
for grant in grants:
|
||||
logger.info("Processing grant: %s", grant)
|
||||
@@ -378,7 +378,7 @@ class VolumePermissionManager:
|
||||
object_name,
|
||||
)
|
||||
|
||||
# 检查是否是对该Volume的权限或者是层级权限
|
||||
# Check if it's permission for this Volume or hierarchical permission
|
||||
if (
|
||||
granted_type == "PRIVILEGE" and granted_on == "VOLUME" and object_name.endswith(volume_name)
|
||||
) or (granted_type == "OBJECT_HIERARCHY" and granted_on == "VOLUME"):
|
||||
@@ -399,14 +399,14 @@ class VolumePermissionManager:
|
||||
|
||||
logger.info("Final permissions for %s: %s", volume_name, permissions)
|
||||
|
||||
# 如果没有找到明确的权限,尝试查看Volume列表来验证基本权限
|
||||
# If no explicit permissions found, try viewing Volume list to verify basic permissions
|
||||
if not permissions:
|
||||
try:
|
||||
cursor.execute("SHOW VOLUMES")
|
||||
volumes = cursor.fetchall()
|
||||
for volume in volumes:
|
||||
if len(volume) > 0 and volume[0] == volume_name:
|
||||
permissions.add("read") # 至少有读权限
|
||||
permissions.add("read") # At least has read permission
|
||||
logger.debug("Volume %s found in SHOW VOLUMES, assuming read permission", volume_name)
|
||||
break
|
||||
except Exception:
|
||||
@@ -414,7 +414,7 @@ class VolumePermissionManager:
|
||||
|
||||
except Exception as e:
|
||||
logger.warning("Could not check external volume permissions for %s: %s", volume_name, e)
|
||||
# 在权限检查失败时,尝试基本的Volume访问验证
|
||||
# When permission check fails, try basic Volume access verification
|
||||
try:
|
||||
with self._connection.cursor() as cursor:
|
||||
cursor.execute("SHOW VOLUMES")
|
||||
@@ -423,30 +423,30 @@ class VolumePermissionManager:
|
||||
if len(volume) > 0 and volume[0] == volume_name:
|
||||
logger.info("Basic volume access verified for %s", volume_name)
|
||||
permissions.add("read")
|
||||
permissions.add("write") # 假设有写权限
|
||||
permissions.add("write") # Assume has write permission
|
||||
break
|
||||
except Exception as basic_e:
|
||||
logger.warning("Basic volume access check failed for %s: %s", volume_name, basic_e)
|
||||
# 最后的备选方案:假设有基本权限
|
||||
# Last fallback: assume basic permissions
|
||||
permissions.add("read")
|
||||
|
||||
# 缓存权限信息
|
||||
# Cache permission information
|
||||
self._permission_cache[cache_key] = permissions
|
||||
return permissions
|
||||
|
||||
def clear_permission_cache(self):
|
||||
"""清空权限缓存"""
|
||||
"""Clear permission cache"""
|
||||
self._permission_cache.clear()
|
||||
logger.debug("Permission cache cleared")
|
||||
|
||||
def get_permission_summary(self, dataset_id: Optional[str] = None) -> dict[str, bool]:
|
||||
"""获取权限摘要
|
||||
def get_permission_summary(self, dataset_id: str | None = None) -> dict[str, bool]:
|
||||
"""Get permission summary
|
||||
|
||||
Args:
|
||||
dataset_id: 数据集ID (用于table volume)
|
||||
dataset_id: Dataset ID (for table volume)
|
||||
|
||||
Returns:
|
||||
权限摘要字典
|
||||
Permission summary dictionary
|
||||
"""
|
||||
summary = {}
|
||||
|
||||
@@ -456,43 +456,43 @@ class VolumePermissionManager:
|
||||
return summary
|
||||
|
||||
def check_inherited_permission(self, file_path: str, operation: VolumePermission) -> bool:
|
||||
"""检查文件路径的权限继承
|
||||
"""Check permission inheritance for file path
|
||||
|
||||
Args:
|
||||
file_path: 文件路径
|
||||
operation: 要执行的操作
|
||||
file_path: File path
|
||||
operation: Operation to perform
|
||||
|
||||
Returns:
|
||||
True if user has permission, False otherwise
|
||||
"""
|
||||
try:
|
||||
# 解析文件路径
|
||||
# Parse file path
|
||||
path_parts = file_path.strip("/").split("/")
|
||||
|
||||
if not path_parts:
|
||||
logger.warning("Invalid file path for permission inheritance check")
|
||||
return False
|
||||
|
||||
# 对于Table Volume,第一层是dataset_id
|
||||
# For Table Volume, first layer is dataset_id
|
||||
if self._volume_type == "table":
|
||||
if len(path_parts) < 1:
|
||||
return False
|
||||
|
||||
dataset_id = path_parts[0]
|
||||
|
||||
# 检查对dataset的权限
|
||||
# Check permissions for dataset
|
||||
has_dataset_permission = self.check_permission(operation, dataset_id)
|
||||
|
||||
if not has_dataset_permission:
|
||||
logger.debug("Permission denied for dataset %s", dataset_id)
|
||||
return False
|
||||
|
||||
# 检查路径遍历攻击
|
||||
# Check path traversal attack
|
||||
if self._contains_path_traversal(file_path):
|
||||
logger.warning("Path traversal attack detected: %s", file_path)
|
||||
return False
|
||||
|
||||
# 检查是否访问敏感目录
|
||||
# Check if accessing sensitive directory
|
||||
if self._is_sensitive_path(file_path):
|
||||
logger.warning("Access to sensitive path denied: %s", file_path)
|
||||
return False
|
||||
@@ -501,33 +501,33 @@ class VolumePermissionManager:
|
||||
return True
|
||||
|
||||
elif self._volume_type == "user":
|
||||
# User Volume的权限继承
|
||||
# User Volume permission inheritance
|
||||
current_user = self._get_current_username()
|
||||
|
||||
# 检查是否试图访问其他用户的目录
|
||||
# Check if attempting to access other user's directory
|
||||
if len(path_parts) > 1 and path_parts[0] != current_user:
|
||||
logger.warning("User %s attempted to access %s's directory", current_user, path_parts[0])
|
||||
return False
|
||||
|
||||
# 检查基本权限
|
||||
# Check basic permissions
|
||||
return self.check_permission(operation)
|
||||
|
||||
elif self._volume_type == "external":
|
||||
# External Volume的权限继承
|
||||
# 检查对External Volume的权限
|
||||
# External Volume permission inheritance
|
||||
# Check permissions for External Volume
|
||||
return self.check_permission(operation)
|
||||
|
||||
else:
|
||||
logger.warning("Unknown volume type for permission inheritance: %s", self._volume_type)
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
except Exception:
|
||||
logger.exception("Permission inheritance check failed")
|
||||
return False
|
||||
|
||||
def _contains_path_traversal(self, file_path: str) -> bool:
|
||||
"""检查路径是否包含路径遍历攻击"""
|
||||
# 检查常见的路径遍历模式
|
||||
"""Check if path contains path traversal attack"""
|
||||
# Check common path traversal patterns
|
||||
traversal_patterns = [
|
||||
"../",
|
||||
"..\\",
|
||||
@@ -547,18 +547,18 @@ class VolumePermissionManager:
|
||||
if pattern in file_path_lower:
|
||||
return True
|
||||
|
||||
# 检查绝对路径
|
||||
# Check absolute path
|
||||
if file_path.startswith("/") or file_path.startswith("\\"):
|
||||
return True
|
||||
|
||||
# 检查Windows驱动器路径
|
||||
# Check Windows drive path
|
||||
if len(file_path) >= 2 and file_path[1] == ":":
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def _is_sensitive_path(self, file_path: str) -> bool:
|
||||
"""检查路径是否为敏感路径"""
|
||||
"""Check if path is sensitive path"""
|
||||
sensitive_patterns = [
|
||||
"passwd",
|
||||
"shadow",
|
||||
@@ -581,12 +581,12 @@ class VolumePermissionManager:
|
||||
|
||||
return any(pattern in file_path_lower for pattern in sensitive_patterns)
|
||||
|
||||
def validate_operation(self, operation: str, dataset_id: Optional[str] = None) -> bool:
|
||||
"""验证操作权限
|
||||
def validate_operation(self, operation: str, dataset_id: str | None = None) -> bool:
|
||||
"""Validate operation permission
|
||||
|
||||
Args:
|
||||
operation: 操作名称 (save|load|exists|delete|scan)
|
||||
dataset_id: 数据集ID
|
||||
operation: Operation name (save|load|exists|delete|scan)
|
||||
dataset_id: Dataset ID
|
||||
|
||||
Returns:
|
||||
True if operation is allowed, False otherwise
|
||||
@@ -611,27 +611,25 @@ class VolumePermissionManager:
|
||||
|
||||
|
||||
class VolumePermissionError(Exception):
|
||||
"""Volume权限错误异常"""
|
||||
"""Volume permission error exception"""
|
||||
|
||||
def __init__(self, message: str, operation: str, volume_type: str, dataset_id: Optional[str] = None):
|
||||
def __init__(self, message: str, operation: str, volume_type: str, dataset_id: str | None = None):
|
||||
self.operation = operation
|
||||
self.volume_type = volume_type
|
||||
self.dataset_id = dataset_id
|
||||
super().__init__(message)
|
||||
|
||||
|
||||
def check_volume_permission(
|
||||
permission_manager: VolumePermissionManager, operation: str, dataset_id: Optional[str] = None
|
||||
) -> None:
|
||||
"""权限检查装饰器函数
|
||||
def check_volume_permission(permission_manager: VolumePermissionManager, operation: str, dataset_id: str | None = None):
|
||||
"""Permission check decorator function
|
||||
|
||||
Args:
|
||||
permission_manager: 权限管理器
|
||||
operation: 操作名称
|
||||
dataset_id: 数据集ID
|
||||
permission_manager: Permission manager
|
||||
operation: Operation name
|
||||
dataset_id: Dataset ID
|
||||
|
||||
Raises:
|
||||
VolumePermissionError: 如果没有权限
|
||||
VolumePermissionError: If no permission
|
||||
"""
|
||||
if not permission_manager.validate_operation(operation, dataset_id):
|
||||
error_message = f"Permission denied for operation '{operation}' on {permission_manager._volume_type} volume"
|
||||
|
||||
@@ -3,8 +3,9 @@ import os
|
||||
from collections.abc import Generator
|
||||
from pathlib import Path
|
||||
|
||||
import opendal # type: ignore[import]
|
||||
from dotenv import dotenv_values
|
||||
from opendal import Operator
|
||||
from opendal.layers import RetryLayer
|
||||
|
||||
from extensions.storage.base_storage import BaseStorage
|
||||
|
||||
@@ -34,13 +35,12 @@ class OpenDALStorage(BaseStorage):
|
||||
root = kwargs.get("root", "storage")
|
||||
Path(root).mkdir(parents=True, exist_ok=True)
|
||||
|
||||
self.op = opendal.Operator(scheme=scheme, **kwargs) # type: ignore
|
||||
retry_layer = RetryLayer(max_times=3, factor=2.0, jitter=True)
|
||||
self.op = Operator(scheme=scheme, **kwargs).layer(retry_layer)
|
||||
logger.debug("opendal operator created with scheme %s", scheme)
|
||||
retry_layer = opendal.layers.RetryLayer(max_times=3, factor=2.0, jitter=True)
|
||||
self.op = self.op.layer(retry_layer)
|
||||
logger.debug("added retry layer to opendal operator")
|
||||
|
||||
def save(self, filename: str, data: bytes) -> None:
|
||||
def save(self, filename: str, data: bytes):
|
||||
self.op.write(path=filename, bs=data)
|
||||
logger.debug("file %s saved", filename)
|
||||
|
||||
@@ -57,22 +57,24 @@ class OpenDALStorage(BaseStorage):
|
||||
raise FileNotFoundError("File not found")
|
||||
|
||||
batch_size = 4096
|
||||
file = self.op.open(path=filename, mode="rb")
|
||||
while chunk := file.read(batch_size):
|
||||
yield chunk
|
||||
with self.op.open(
|
||||
path=filename,
|
||||
mode="rb",
|
||||
chunck=batch_size,
|
||||
) as file:
|
||||
while chunk := file.read(batch_size):
|
||||
yield chunk
|
||||
logger.debug("file %s loaded as stream", filename)
|
||||
|
||||
def download(self, filename: str, target_filepath: str):
|
||||
if not self.exists(filename):
|
||||
raise FileNotFoundError("File not found")
|
||||
|
||||
with Path(target_filepath).open("wb") as f:
|
||||
f.write(self.op.read(path=filename))
|
||||
Path(target_filepath).write_bytes(self.op.read(path=filename))
|
||||
logger.debug("file %s downloaded to %s", filename, target_filepath)
|
||||
|
||||
def exists(self, filename: str) -> bool:
|
||||
res: bool = self.op.exists(path=filename)
|
||||
return res
|
||||
return self.op.exists(path=filename)
|
||||
|
||||
def delete(self, filename: str):
|
||||
if self.exists(filename):
|
||||
@@ -85,7 +87,7 @@ class OpenDALStorage(BaseStorage):
|
||||
if not self.exists(path):
|
||||
raise FileNotFoundError("Path not found")
|
||||
|
||||
all_files = self.op.scan(path=path)
|
||||
all_files = self.op.list(path=path)
|
||||
if files and directories:
|
||||
logger.debug("files and directories on %s scanned", path)
|
||||
return [f.path for f in all_files]
|
||||
|
||||
Reference in New Issue
Block a user