Compare commits

...

12 Commits

Author SHA1 Message Date
zhangx1n
44c356258f chore: bump wandb v0.23.1 -> v0.25.1, weave v0.52.17 -> v0.52.25 2026-03-24 17:51:55 +08:00
zhangx1n
44fb3cd2af chore: bump setuptools to 82.0.1 2026-03-24 17:30:15 +08:00
zhangx1n
2bf6728951 chore(api): remove ClickZetta vector/storage integration and drop clickzetta/pyarrow dependencies 2026-03-24 15:58:13 +08:00
zhangx1n
fcfa11a71a bump pyasn1 v0.6.2 -> v0.6.3
bump ujson v5.9.0 -> v5.12.0
2026-03-24 14:47:00 +08:00
FFXN
1730f900c1 fix: Add dataset_id filters to the hit_count's subqueries (#33757)
Some checks failed
Build and Push API & Web / build (api, DIFY_API_IMAGE_NAME, linux/amd64, build-api-amd64) (push) Has been cancelled
Build and Push API & Web / build (api, DIFY_API_IMAGE_NAME, linux/arm64, build-api-arm64) (push) Has been cancelled
Build and Push API & Web / build (web, DIFY_WEB_IMAGE_NAME, linux/amd64, build-web-amd64) (push) Has been cancelled
Build and Push API & Web / build (web, DIFY_WEB_IMAGE_NAME, linux/arm64, build-web-arm64) (push) Has been cancelled
Build and Push API & Web / create-manifest (api, DIFY_API_IMAGE_NAME, merge-api-images) (push) Has been cancelled
Build and Push API & Web / create-manifest (web, DIFY_WEB_IMAGE_NAME, merge-web-images) (push) Has been cancelled
2026-03-20 10:21:45 +08:00
QuantumGhost
12178e7aec fix(api): add trigger_info to WorkflowNodeExecutionMetadataKey (#33753)
Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
2026-03-19 17:58:45 +08:00
Yansong Zhang
afe23a029b remove hit count 2026-03-19 16:26:28 +08:00
QuantumGhost
c8560bacb3 chore: bump version to 1.13.2 (#33681) 2026-03-18 21:54:17 +08:00
wangxiaolei
0f1b8bf5f9 fix: fix max_retries is hardcode (#33619) 2026-03-18 20:25:02 +08:00
QuantumGhost
652211ad96 fix(api): Preserving the content transform logic in fetch_prompt_messages (#33666)
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
2026-03-18 20:20:17 +08:00
wangxiaolei
c049249bc1 feat: remove weaviate client __del__ method (#33593)
Co-authored-by: QuantumGhost <obelisk.reg+git@gmail.com>
Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com>
Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
2026-03-18 20:20:10 +08:00
wangxiaolei
138083dfc8 fix(api): make CreatorUserRole accept both end-user and end_user (#33638)
Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
2026-03-18 20:19:57 +08:00
41 changed files with 2916 additions and 6436 deletions

View File

@@ -608,7 +608,7 @@ def migrate_oss(
click.style(
"Target STORAGE_TYPE must be a cloud OSS (not 'local' or 'opendal').\n"
"Please set STORAGE_TYPE to one of: s3, aliyun-oss, azure-blob, google-storage, tencent-cos, \n"
"volcengine-tos, supabase, oci-storage, huawei-obs, baidu-obs, clickzetta-volume.",
"volcengine-tos, supabase, oci-storage, huawei-obs, baidu-obs.",
fg="red",
)
)

View File

@@ -11,7 +11,6 @@ from .storage.aliyun_oss_storage_config import AliyunOSSStorageConfig
from .storage.amazon_s3_storage_config import S3StorageConfig
from .storage.azure_blob_storage_config import AzureBlobStorageConfig
from .storage.baidu_obs_storage_config import BaiduOBSStorageConfig
from .storage.clickzetta_volume_storage_config import ClickZettaVolumeStorageConfig
from .storage.google_cloud_storage_config import GoogleCloudStorageConfig
from .storage.huawei_obs_storage_config import HuaweiCloudOBSStorageConfig
from .storage.oci_storage_config import OCIStorageConfig
@@ -23,7 +22,6 @@ from .vdb.alibabacloud_mysql_config import AlibabaCloudMySQLConfig
from .vdb.analyticdb_config import AnalyticdbConfig
from .vdb.baidu_vector_config import BaiduVectorDBConfig
from .vdb.chroma_config import ChromaConfig
from .vdb.clickzetta_config import ClickzettaConfig
from .vdb.couchbase_config import CouchbaseConfig
from .vdb.elasticsearch_config import ElasticsearchConfig
from .vdb.hologres_config import HologresConfig
@@ -58,7 +56,6 @@ class StorageConfig(BaseSettings):
"aliyun-oss",
"azure-blob",
"baidu-obs",
"clickzetta-volume",
"google-storage",
"huawei-obs",
"oci-storage",
@@ -69,7 +66,7 @@ class StorageConfig(BaseSettings):
] = Field(
description="Type of storage to use."
" Options: 'opendal', '(deprecated) local', 's3', 'aliyun-oss', 'azure-blob', 'baidu-obs', "
"'clickzetta-volume', 'google-storage', 'huawei-obs', 'oci-storage', 'tencent-cos', "
"'google-storage', 'huawei-obs', 'oci-storage', 'tencent-cos', "
"'volcengine-tos', 'supabase'. Default is 'opendal'.",
default="opendal",
)
@@ -334,7 +331,6 @@ class MiddlewareConfig(
AliyunOSSStorageConfig,
AzureBlobStorageConfig,
BaiduOBSStorageConfig,
ClickZettaVolumeStorageConfig,
GoogleCloudStorageConfig,
HuaweiCloudOBSStorageConfig,
OCIStorageConfig,
@@ -347,7 +343,6 @@ class MiddlewareConfig(
VectorStoreConfig,
AnalyticdbConfig,
ChromaConfig,
ClickzettaConfig,
HologresConfig,
HuaweiCloudConfig,
IrisVectorConfig,

View File

@@ -1,4 +1,4 @@
from pydantic import Field, NonNegativeInt, PositiveFloat, PositiveInt
from pydantic import Field, NonNegativeInt, PositiveFloat, PositiveInt, field_validator
from pydantic_settings import BaseSettings
@@ -116,3 +116,13 @@ class RedisConfig(BaseSettings):
description="Maximum connections in the Redis connection pool (unset for library default)",
default=None,
)
@field_validator("REDIS_MAX_CONNECTIONS", mode="before")
@classmethod
def _empty_string_to_none_for_max_conns(cls, v):
"""Allow empty string in env/.env to mean 'unset' (None)."""
if v is None:
return None
if isinstance(v, str) and v.strip() == "":
return None
return v

View File

@@ -1,63 +0,0 @@
"""ClickZetta Volume Storage Configuration"""
from pydantic import Field
from pydantic_settings import BaseSettings
class ClickZettaVolumeStorageConfig(BaseSettings):
"""Configuration for ClickZetta Volume storage."""
CLICKZETTA_VOLUME_USERNAME: str | None = Field(
description="Username for ClickZetta Volume authentication",
default=None,
)
CLICKZETTA_VOLUME_PASSWORD: str | None = Field(
description="Password for ClickZetta Volume authentication",
default=None,
)
CLICKZETTA_VOLUME_INSTANCE: str | None = Field(
description="ClickZetta instance identifier",
default=None,
)
CLICKZETTA_VOLUME_SERVICE: str = Field(
description="ClickZetta service endpoint",
default="api.clickzetta.com",
)
CLICKZETTA_VOLUME_WORKSPACE: str = Field(
description="ClickZetta workspace name",
default="quick_start",
)
CLICKZETTA_VOLUME_VCLUSTER: str = Field(
description="ClickZetta virtual cluster name",
default="default_ap",
)
CLICKZETTA_VOLUME_SCHEMA: str = Field(
description="ClickZetta schema name",
default="dify",
)
CLICKZETTA_VOLUME_TYPE: str = Field(
description="ClickZetta volume type (table|user|external)",
default="user",
)
CLICKZETTA_VOLUME_NAME: str | None = Field(
description="ClickZetta volume name for external volumes",
default=None,
)
CLICKZETTA_VOLUME_TABLE_PREFIX: str = Field(
description="Prefix for ClickZetta volume table names",
default="dataset_",
)
CLICKZETTA_VOLUME_DIFY_PREFIX: str = Field(
description="Directory prefix for User Volume to organize Dify files",
default="dify_km",
)

View File

@@ -1,68 +0,0 @@
from pydantic import Field
from pydantic_settings import BaseSettings
class ClickzettaConfig(BaseSettings):
"""
Clickzetta Lakehouse vector database configuration
"""
CLICKZETTA_USERNAME: str | None = Field(
description="Username for authenticating with Clickzetta Lakehouse",
default=None,
)
CLICKZETTA_PASSWORD: str | None = Field(
description="Password for authenticating with Clickzetta Lakehouse",
default=None,
)
CLICKZETTA_INSTANCE: str | None = Field(
description="Clickzetta Lakehouse instance ID",
default=None,
)
CLICKZETTA_SERVICE: str | None = Field(
description="Clickzetta API service endpoint (e.g., 'api.clickzetta.com')",
default="api.clickzetta.com",
)
CLICKZETTA_WORKSPACE: str | None = Field(
description="Clickzetta workspace name",
default="default",
)
CLICKZETTA_VCLUSTER: str | None = Field(
description="Clickzetta virtual cluster name",
default="default_ap",
)
CLICKZETTA_SCHEMA: str | None = Field(
description="Database schema name in Clickzetta",
default="public",
)
CLICKZETTA_BATCH_SIZE: int | None = Field(
description="Batch size for bulk insert operations",
default=100,
)
CLICKZETTA_ENABLE_INVERTED_INDEX: bool | None = Field(
description="Enable inverted index for full-text search capabilities",
default=True,
)
CLICKZETTA_ANALYZER_TYPE: str | None = Field(
description="Analyzer type for full-text search: keyword, english, chinese, unicode",
default="chinese",
)
CLICKZETTA_ANALYZER_MODE: str | None = Field(
description="Analyzer mode for tokenization: max_word (fine-grained) or smart (intelligent)",
default="smart",
)
CLICKZETTA_VECTOR_DISTANCE_FUNCTION: str | None = Field(
description="Distance function for vector similarity: l2_distance or cosine_distance",
default="cosine_distance",
)

View File

@@ -259,7 +259,6 @@ def _get_retrieval_methods_by_vector_type(vector_type: str | None, is_mock: bool
VectorType.HUAWEI_CLOUD,
VectorType.TENCENT,
VectorType.MATRIXONE,
VectorType.CLICKZETTA,
VectorType.BAIDU,
VectorType.ALIBABACLOUD_MYSQL,
VectorType.IRIS,

View File

@@ -297,6 +297,7 @@ class DatasetDocumentListApi(Resource):
if sort == "hit_count":
sub_query = (
sa.select(DocumentSegment.document_id, sa.func.sum(DocumentSegment.hit_count).label("total_hit_count"))
.where(DocumentSegment.dataset_id == str(dataset_id))
.group_by(DocumentSegment.document_id)
.subquery()
)

View File

@@ -1,201 +0,0 @@
# Clickzetta Vector Database Integration
This module provides integration with Clickzetta Lakehouse as a vector database for Dify.
## Features
- **Vector Storage**: Store and retrieve high-dimensional vectors using Clickzetta's native VECTOR type
- **Vector Search**: Efficient similarity search using HNSW algorithm
- **Full-Text Search**: Leverage Clickzetta's inverted index for powerful text search capabilities
- **Hybrid Search**: Combine vector similarity and full-text search for better results
- **Multi-language Support**: Built-in support for Chinese, English, and Unicode text processing
- **Scalable**: Leverage Clickzetta's distributed architecture for large-scale deployments
## Configuration
### Required Environment Variables
All seven configuration parameters are required:
```bash
# Authentication
CLICKZETTA_USERNAME=your_username
CLICKZETTA_PASSWORD=your_password
# Instance configuration
CLICKZETTA_INSTANCE=your_instance_id
CLICKZETTA_SERVICE=api.clickzetta.com
CLICKZETTA_WORKSPACE=your_workspace
CLICKZETTA_VCLUSTER=your_vcluster
CLICKZETTA_SCHEMA=your_schema
```
### Optional Configuration
```bash
# Batch processing
CLICKZETTA_BATCH_SIZE=100
# Full-text search configuration
CLICKZETTA_ENABLE_INVERTED_INDEX=true
CLICKZETTA_ANALYZER_TYPE=chinese # Options: keyword, english, chinese, unicode
CLICKZETTA_ANALYZER_MODE=smart # Options: max_word, smart
# Vector search configuration
CLICKZETTA_VECTOR_DISTANCE_FUNCTION=cosine_distance # Options: l2_distance, cosine_distance
```
## Usage
### 1. Set Clickzetta as the Vector Store
In your Dify configuration, set:
```bash
VECTOR_STORE=clickzetta
```
### 2. Table Structure
Clickzetta will automatically create tables with the following structure:
```sql
CREATE TABLE <collection_name> (
id STRING NOT NULL,
content STRING NOT NULL,
metadata JSON,
vector VECTOR(FLOAT, <dimension>) NOT NULL,
PRIMARY KEY (id)
);
-- Vector index for similarity search
CREATE VECTOR INDEX idx_<collection_name>_vec
ON TABLE <schema>.<collection_name>(vector)
PROPERTIES (
"distance.function" = "cosine_distance",
"scalar.type" = "f32"
);
-- Inverted index for full-text search (if enabled)
CREATE INVERTED INDEX idx_<collection_name>_text
ON <schema>.<collection_name>(content)
PROPERTIES (
"analyzer" = "chinese",
"mode" = "smart"
);
```
## Full-Text Search Capabilities
Clickzetta supports advanced full-text search with multiple analyzers:
### Analyzer Types
1. **keyword**: No tokenization, treats the entire string as a single token
- Best for: Exact matching, IDs, codes
1. **english**: Designed for English text
- Features: Recognizes ASCII letters and numbers, converts to lowercase
- Best for: English content
1. **chinese**: Chinese text tokenizer
- Features: Recognizes Chinese and English characters, removes punctuation
- Best for: Chinese or mixed Chinese-English content
1. **unicode**: Multi-language tokenizer based on Unicode
- Features: Recognizes text boundaries in multiple languages
- Best for: Multi-language content
### Analyzer Modes
- **max_word**: Fine-grained tokenization (more tokens)
- **smart**: Intelligent tokenization (balanced)
### Full-Text Search Functions
- `MATCH_ALL(column, query)`: All terms must be present
- `MATCH_ANY(column, query)`: At least one term must be present
- `MATCH_PHRASE(column, query)`: Exact phrase matching
- `MATCH_PHRASE_PREFIX(column, query)`: Phrase prefix matching
- `MATCH_REGEXP(column, pattern)`: Regular expression matching
## Performance Optimization
### Vector Search
1. **Adjust exploration factor** for accuracy vs speed trade-off:
```sql
SET cz.vector.index.search.ef=64;
```
1. **Use appropriate distance functions**:
- `cosine_distance`: Best for normalized embeddings (e.g., from language models)
- `l2_distance`: Best for raw feature vectors
### Full-Text Search
1. **Choose the right analyzer**:
- Use `keyword` for exact matching
- Use language-specific analyzers for better tokenization
1. **Combine with vector search**:
- Pre-filter with full-text search for better performance
- Use hybrid search for improved relevance
## Troubleshooting
### Connection Issues
1. Verify all 7 required configuration parameters are set
1. Check network connectivity to Clickzetta service
1. Ensure the user has proper permissions on the schema
### Search Performance
1. Verify vector index exists:
```sql
SHOW INDEX FROM <schema>.<table_name>;
```
1. Check if vector index is being used:
```sql
EXPLAIN SELECT ... WHERE l2_distance(...) < threshold;
```
Look for `vector_index_search_type` in the execution plan.
### Full-Text Search Not Working
1. Verify inverted index is created
1. Check analyzer configuration matches your content language
1. Use `TOKENIZE()` function to test tokenization:
```sql
SELECT TOKENIZE('your text', map('analyzer', 'chinese', 'mode', 'smart'));
```
## Limitations
1. Vector operations don't support `ORDER BY` or `GROUP BY` directly on vector columns
1. Full-text search relevance scores are not provided by Clickzetta
1. Inverted index creation may fail for very large existing tables (continue without error)
1. Index naming constraints:
- Index names must be unique within a schema
- Only one vector index can be created per column
- The implementation uses timestamps to ensure unique index names
1. A column can only have one vector index at a time
## References
- [Clickzetta Vector Search Documentation](https://yunqi.tech/documents/vector-search)
- [Clickzetta Inverted Index Documentation](https://yunqi.tech/documents/inverted-index)
- [Clickzetta SQL Functions](https://yunqi.tech/documents/sql-reference)

View File

@@ -1 +0,0 @@
# Clickzetta Vector Database Integration for Dify

File diff suppressed because it is too large Load Diff

View File

@@ -183,10 +183,6 @@ class Vector:
from core.rag.datasource.vdb.matrixone.matrixone_vector import MatrixoneVectorFactory
return MatrixoneVectorFactory
case VectorType.CLICKZETTA:
from core.rag.datasource.vdb.clickzetta.clickzetta_vector import ClickzettaVectorFactory
return ClickzettaVectorFactory
case VectorType.IRIS:
from core.rag.datasource.vdb.iris.iris_vector import IrisVectorFactory

View File

@@ -32,6 +32,5 @@ class VectorType(StrEnum):
TABLESTORE = "tablestore"
HUAWEI_CLOUD = "huawei_cloud"
MATRIXONE = "matrixone"
CLICKZETTA = "clickzetta"
IRIS = "iris"
HOLOGRES = "hologres"

View File

@@ -5,6 +5,7 @@ This module provides integration with Weaviate vector database for storing and r
document embeddings used in retrieval-augmented generation workflows.
"""
import atexit
import datetime
import json
import logging
@@ -37,6 +38,32 @@ _weaviate_client: weaviate.WeaviateClient | None = None
_weaviate_client_lock = threading.Lock()
def _shutdown_weaviate_client() -> None:
"""
Best-effort shutdown hook to close the module-level Weaviate client.
This is registered with atexit so that HTTP/gRPC resources are released
when the Python interpreter exits.
"""
global _weaviate_client
# Ensure thread-safety when accessing the shared client instance
with _weaviate_client_lock:
client = _weaviate_client
_weaviate_client = None
if client is not None:
try:
client.close()
except Exception:
# Best-effort cleanup; log at debug level and ignore errors.
logger.debug("Failed to close Weaviate client during shutdown", exc_info=True)
# Register the shutdown hook once per process.
atexit.register(_shutdown_weaviate_client)
class WeaviateConfig(BaseModel):
"""
Configuration model for Weaviate connection settings.
@@ -85,18 +112,6 @@ class WeaviateVector(BaseVector):
self._client = self._init_client(config)
self._attributes = attributes
def __del__(self):
"""
Destructor to properly close the Weaviate client connection.
Prevents connection leaks and resource warnings.
"""
if hasattr(self, "_client") and self._client is not None:
try:
self._client.close()
except Exception as e:
# Ignore errors during cleanup as object is being destroyed
logger.warning("Error closing Weaviate client %s", e, exc_info=True)
def _init_client(self, config: WeaviateConfig) -> weaviate.WeaviateClient:
"""
Initializes and returns a connected Weaviate client.

View File

@@ -3,7 +3,6 @@ from typing import Final
TRIGGER_WEBHOOK_NODE_TYPE: Final[str] = "trigger-webhook"
TRIGGER_SCHEDULE_NODE_TYPE: Final[str] = "trigger-schedule"
TRIGGER_PLUGIN_NODE_TYPE: Final[str] = "trigger-plugin"
TRIGGER_INFO_METADATA_KEY: Final[str] = "trigger_info"
TRIGGER_NODE_TYPES: Final[frozenset[str]] = frozenset(
{

View File

@@ -1,7 +1,7 @@
from collections.abc import Mapping
from typing import Any, cast
from typing import Any
from core.trigger.constants import TRIGGER_INFO_METADATA_KEY, TRIGGER_PLUGIN_NODE_TYPE
from core.trigger.constants import TRIGGER_PLUGIN_NODE_TYPE
from dify_graph.constants import SYSTEM_VARIABLE_NODE_ID
from dify_graph.entities.workflow_node_execution import WorkflowNodeExecutionStatus
from dify_graph.enums import NodeExecutionType, WorkflowNodeExecutionMetadataKey
@@ -47,7 +47,7 @@ class TriggerEventNode(Node[TriggerEventNodeData]):
# Get trigger data passed when workflow was triggered
metadata: dict[WorkflowNodeExecutionMetadataKey, Any] = {
cast(WorkflowNodeExecutionMetadataKey, TRIGGER_INFO_METADATA_KEY): {
WorkflowNodeExecutionMetadataKey.TRIGGER_INFO: {
"provider_id": self.node_data.provider_id,
"event_name": self.node_data.event_name,
"plugin_unique_identifier": self.node_data.plugin_unique_identifier,

View File

@@ -245,6 +245,9 @@ _END_STATE = frozenset(
class WorkflowNodeExecutionMetadataKey(StrEnum):
"""
Node Run Metadata Key.
Values in this enum are persisted as execution metadata and must stay in sync
with every node that writes `NodeRunResult.metadata`.
"""
TOTAL_TOKENS = "total_tokens"
@@ -266,6 +269,7 @@ class WorkflowNodeExecutionMetadataKey(StrEnum):
ERROR_STRATEGY = "error_strategy" # node in continue on error mode return the field
LOOP_VARIABLE_MAP = "loop_variable_map" # single loop variable output
DATASOURCE_INFO = "datasource_info"
TRIGGER_INFO = "trigger_info"
COMPLETED_REASON = "completed_reason" # completed reason for loop node

View File

@@ -101,7 +101,6 @@ class HttpRequestNode(Node[HttpRequestNodeData]):
timeout=self._get_request_timeout(self.node_data),
variable_pool=self.graph_runtime_state.variable_pool,
http_request_config=self._http_request_config,
max_retries=0,
ssl_verify=self.node_data.ssl_verify,
http_client=self._http_client,
file_manager=self._file_manager,

View File

@@ -256,9 +256,13 @@ def fetch_prompt_messages(
):
continue
prompt_message_content.append(content_item)
if prompt_message_content:
if not prompt_message_content:
continue
if len(prompt_message_content) == 1 and prompt_message_content[0].type == PromptMessageContentType.TEXT:
prompt_message.content = prompt_message_content[0].data
else:
prompt_message.content = prompt_message_content
filtered_prompt_messages.append(prompt_message)
filtered_prompt_messages.append(prompt_message)
elif not prompt_message.is_empty():
filtered_prompt_messages.append(prompt_message)

View File

@@ -69,19 +69,6 @@ class Storage:
from extensions.storage.supabase_storage import SupabaseStorage
return SupabaseStorage
case StorageType.CLICKZETTA_VOLUME:
from extensions.storage.clickzetta_volume.clickzetta_volume_storage import (
ClickZettaVolumeConfig,
ClickZettaVolumeStorage,
)
def create_clickzetta_volume_storage():
# ClickZettaVolumeConfig will automatically read from environment variables
# and fallback to CLICKZETTA_* config if CLICKZETTA_VOLUME_* is not set
volume_config = ClickZettaVolumeConfig()
return ClickZettaVolumeStorage(volume_config)
return create_clickzetta_volume_storage
case _:
raise ValueError(f"unsupported storage type {storage_type}")

View File

@@ -1,5 +0,0 @@
"""ClickZetta Volume storage implementation."""
from .clickzetta_volume_storage import ClickZettaVolumeStorage
__all__ = ["ClickZettaVolumeStorage"]

View File

@@ -1,527 +0,0 @@
"""ClickZetta Volume Storage Implementation
This module provides storage backend using ClickZetta Volume functionality.
Supports Table Volume, User Volume, and External Volume types.
"""
import logging
import os
import tempfile
from collections.abc import Generator
from io import BytesIO
from pathlib import Path
import clickzetta
from pydantic import BaseModel, model_validator
from extensions.storage.base_storage import BaseStorage
from .volume_permissions import VolumePermissionManager, check_volume_permission
logger = logging.getLogger(__name__)
class ClickZettaVolumeConfig(BaseModel):
"""Configuration for ClickZetta Volume storage."""
username: str = ""
password: str = ""
instance: str = ""
service: str = "api.clickzetta.com"
workspace: str = "quick_start"
vcluster: str = "default_ap"
schema_name: str = "dify"
volume_type: str = "table" # table|user|external
volume_name: str | None = None # For external volumes
table_prefix: str = "dataset_" # Prefix for table volume names
dify_prefix: str = "dify_km" # Directory prefix for User Volume
permission_check: bool = True # Enable/disable permission checking
@model_validator(mode="before")
@classmethod
def validate_config(cls, values: dict):
"""Validate the configuration values.
This method will first try to use CLICKZETTA_VOLUME_* environment variables,
then fall back to CLICKZETTA_* environment variables (for vector DB config).
"""
# Helper function to get environment variable with fallback
def get_env_with_fallback(volume_key: str, fallback_key: str, default: str | None = None) -> str:
# First try CLICKZETTA_VOLUME_* specific config
volume_value = values.get(volume_key.lower().replace("clickzetta_volume_", ""))
if volume_value:
return str(volume_value)
# Then try environment variables
volume_env = os.getenv(volume_key)
if volume_env:
return volume_env
# Fall back to existing CLICKZETTA_* config
fallback_env = os.getenv(fallback_key)
if fallback_env:
return fallback_env
return default or ""
# Apply environment variables with fallback to existing CLICKZETTA_* config
values.setdefault("username", get_env_with_fallback("CLICKZETTA_VOLUME_USERNAME", "CLICKZETTA_USERNAME"))
values.setdefault("password", get_env_with_fallback("CLICKZETTA_VOLUME_PASSWORD", "CLICKZETTA_PASSWORD"))
values.setdefault("instance", get_env_with_fallback("CLICKZETTA_VOLUME_INSTANCE", "CLICKZETTA_INSTANCE"))
values.setdefault(
"service", get_env_with_fallback("CLICKZETTA_VOLUME_SERVICE", "CLICKZETTA_SERVICE", "api.clickzetta.com")
)
values.setdefault(
"workspace", get_env_with_fallback("CLICKZETTA_VOLUME_WORKSPACE", "CLICKZETTA_WORKSPACE", "quick_start")
)
values.setdefault(
"vcluster", get_env_with_fallback("CLICKZETTA_VOLUME_VCLUSTER", "CLICKZETTA_VCLUSTER", "default_ap")
)
values.setdefault("schema_name", get_env_with_fallback("CLICKZETTA_VOLUME_SCHEMA", "CLICKZETTA_SCHEMA", "dify"))
# Volume-specific configurations (no fallback to vector DB config)
values.setdefault("volume_type", os.getenv("CLICKZETTA_VOLUME_TYPE", "table"))
values.setdefault("volume_name", os.getenv("CLICKZETTA_VOLUME_NAME"))
values.setdefault("table_prefix", os.getenv("CLICKZETTA_VOLUME_TABLE_PREFIX", "dataset_"))
values.setdefault("dify_prefix", os.getenv("CLICKZETTA_VOLUME_DIFY_PREFIX", "dify_km"))
# Temporarily disable permission check feature, set directly to false
values.setdefault("permission_check", False)
# Validate required fields
if not values.get("username"):
raise ValueError("CLICKZETTA_VOLUME_USERNAME or CLICKZETTA_USERNAME is required")
if not values.get("password"):
raise ValueError("CLICKZETTA_VOLUME_PASSWORD or CLICKZETTA_PASSWORD is required")
if not values.get("instance"):
raise ValueError("CLICKZETTA_VOLUME_INSTANCE or CLICKZETTA_INSTANCE is required")
# Validate volume type
volume_type = values["volume_type"]
if volume_type not in ["table", "user", "external"]:
raise ValueError("CLICKZETTA_VOLUME_TYPE must be one of: table, user, external")
if volume_type == "external" and not values.get("volume_name"):
raise ValueError("CLICKZETTA_VOLUME_NAME is required for external volume type")
return values
class ClickZettaVolumeStorage(BaseStorage):
"""ClickZetta Volume storage implementation."""
def __init__(self, config: ClickZettaVolumeConfig):
"""Initialize ClickZetta Volume storage.
Args:
config: ClickZetta Volume configuration
"""
self._config = config
self._connection = None
self._permission_manager: VolumePermissionManager | None = None
self._init_connection()
self._init_permission_manager()
logger.info("ClickZetta Volume storage initialized with type: %s", config.volume_type)
def _init_connection(self):
"""Initialize ClickZetta connection."""
try:
self._connection = clickzetta.connect(
username=self._config.username,
password=self._config.password,
instance=self._config.instance,
service=self._config.service,
workspace=self._config.workspace,
vcluster=self._config.vcluster,
schema=self._config.schema_name,
)
logger.debug("ClickZetta connection established")
except Exception:
logger.exception("Failed to connect to ClickZetta")
raise
def _init_permission_manager(self):
"""Initialize permission manager."""
try:
self._permission_manager = VolumePermissionManager(
self._connection, self._config.volume_type, self._config.volume_name
)
logger.debug("Permission manager initialized")
except Exception:
logger.exception("Failed to initialize permission manager")
raise
def _get_volume_path(self, filename: str, dataset_id: str | None = None) -> str:
"""Get the appropriate volume path based on volume type."""
if self._config.volume_type == "user":
# Add dify prefix for User Volume to organize files
return f"{self._config.dify_prefix}/{filename}"
elif self._config.volume_type == "table":
# Check if this should use User Volume (special directories)
if dataset_id in ["upload_files", "temp", "cache", "tools", "website_files", "privkeys"]:
# Use User Volume with dify prefix for special directories
return f"{self._config.dify_prefix}/{filename}"
if dataset_id:
return f"{self._config.table_prefix}{dataset_id}/{filename}"
else:
# Extract dataset_id from filename if not provided
# Format: dataset_id/filename
if "/" in filename:
return filename
else:
raise ValueError("dataset_id is required for table volume or filename must include dataset_id/")
elif self._config.volume_type == "external":
return filename
else:
raise ValueError(f"Unsupported volume type: {self._config.volume_type}")
def _get_volume_sql_prefix(self, dataset_id: str | None = None) -> str:
"""Get SQL prefix for volume operations."""
if self._config.volume_type == "user":
return "USER VOLUME"
elif self._config.volume_type == "table":
# For Dify's current file storage pattern, most files are stored in
# paths like "upload_files/tenant_id/uuid.ext", "tools/tenant_id/uuid.ext"
# These should use USER VOLUME for better compatibility
if dataset_id in ["upload_files", "temp", "cache", "tools", "website_files", "privkeys"]:
return "USER VOLUME"
# Only use TABLE VOLUME for actual dataset-specific paths
# like "dataset_12345/file.pdf" or paths with dataset_ prefix
if dataset_id:
table_name = f"{self._config.table_prefix}{dataset_id}"
else:
# Default table name for generic operations
table_name = "default_dataset"
return f"TABLE VOLUME {table_name}"
elif self._config.volume_type == "external":
return f"VOLUME {self._config.volume_name}"
else:
raise ValueError(f"Unsupported volume type: {self._config.volume_type}")
def _execute_sql(self, sql: str, fetch: bool = False):
"""Execute SQL command."""
try:
if self._connection is None:
raise RuntimeError("Connection not initialized")
with self._connection.cursor() as cursor:
cursor.execute(sql)
if fetch:
return cursor.fetchall()
return None
except Exception:
logger.exception("SQL execution failed: %s", sql)
raise
def _ensure_table_volume_exists(self, dataset_id: str):
"""Ensure table volume exists for the given dataset_id."""
if self._config.volume_type != "table" or not dataset_id:
return
# Skip for upload_files and other special directories that use USER VOLUME
if dataset_id in ["upload_files", "temp", "cache", "tools", "website_files", "privkeys"]:
return
table_name = f"{self._config.table_prefix}{dataset_id}"
try:
# Check if table exists
check_sql = f"SHOW TABLES LIKE '{table_name}'"
result = self._execute_sql(check_sql, fetch=True)
if not result:
# Create table with volume
create_sql = f"""
CREATE TABLE {table_name} (
id INT PRIMARY KEY AUTO_INCREMENT,
filename VARCHAR(255) NOT NULL,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
INDEX idx_filename (filename)
) WITH VOLUME
"""
self._execute_sql(create_sql)
logger.info("Created table volume: %s", table_name)
except Exception as e:
logger.warning("Failed to create table volume %s: %s", table_name, e)
# Don't raise exception, let the operation continue
# The table might exist but not be visible due to permissions
def save(self, filename: str, data: bytes):
"""Save data to ClickZetta Volume.
Args:
filename: File path in volume
data: File content as bytes
"""
# Extract dataset_id from filename if present
dataset_id = None
if "/" in filename and self._config.volume_type == "table":
parts = filename.split("/", 1)
if parts[0].startswith(self._config.table_prefix):
dataset_id = parts[0][len(self._config.table_prefix) :]
filename = parts[1]
else:
dataset_id = parts[0]
filename = parts[1]
# Ensure table volume exists (for table volumes)
if dataset_id:
self._ensure_table_volume_exists(dataset_id)
# Check permissions (if enabled)
if self._config.permission_check:
# Skip permission check for special directories that use USER VOLUME
if dataset_id not in ["upload_files", "temp", "cache", "tools", "website_files", "privkeys"]:
if self._permission_manager is not None:
check_volume_permission(self._permission_manager, "save", dataset_id)
# Write data to temporary file
with tempfile.NamedTemporaryFile(delete=False) as temp_file:
temp_file.write(data)
temp_file_path = temp_file.name
try:
# Upload to volume
volume_prefix = self._get_volume_sql_prefix(dataset_id)
# Get the actual volume path (may include dify_km prefix)
volume_path = self._get_volume_path(filename, dataset_id)
# For User Volume, use the full path with dify_km prefix
if volume_prefix == "USER VOLUME":
sql = f"PUT '{temp_file_path}' TO {volume_prefix} FILE '{volume_path}'"
else:
sql = f"PUT '{temp_file_path}' TO {volume_prefix} FILE '{filename}'"
self._execute_sql(sql)
logger.debug("File %s saved to ClickZetta Volume at path %s", filename, volume_path)
finally:
# Clean up temporary file
Path(temp_file_path).unlink(missing_ok=True)
def load_once(self, filename: str) -> bytes:
"""Load file content from ClickZetta Volume.
Args:
filename: File path in volume
Returns:
File content as bytes
"""
# Extract dataset_id from filename if present
dataset_id = None
if "/" in filename and self._config.volume_type == "table":
parts = filename.split("/", 1)
if parts[0].startswith(self._config.table_prefix):
dataset_id = parts[0][len(self._config.table_prefix) :]
filename = parts[1]
else:
dataset_id = parts[0]
filename = parts[1]
# Check permissions (if enabled)
if self._config.permission_check:
# Skip permission check for special directories that use USER VOLUME
if dataset_id not in ["upload_files", "temp", "cache", "tools", "website_files", "privkeys"]:
if self._permission_manager is not None:
check_volume_permission(self._permission_manager, "load_once", dataset_id)
# Download to temporary directory
with tempfile.TemporaryDirectory() as temp_dir:
volume_prefix = self._get_volume_sql_prefix(dataset_id)
# Get the actual volume path (may include dify_km prefix)
volume_path = self._get_volume_path(filename, dataset_id)
# For User Volume, use the full path with dify_km prefix
if volume_prefix == "USER VOLUME":
sql = f"GET {volume_prefix} FILE '{volume_path}' TO '{temp_dir}'"
else:
sql = f"GET {volume_prefix} FILE '{filename}' TO '{temp_dir}'"
self._execute_sql(sql)
# Find the downloaded file (may be in subdirectories)
downloaded_file = None
for root, _, files in os.walk(temp_dir):
for file in files:
if file == filename or file == os.path.basename(filename):
downloaded_file = Path(root) / file
break
if downloaded_file:
break
if not downloaded_file or not downloaded_file.exists():
raise FileNotFoundError(f"Downloaded file not found: {filename}")
content = downloaded_file.read_bytes()
logger.debug("File %s loaded from ClickZetta Volume", filename)
return content
def load_stream(self, filename: str) -> Generator:
"""Load file as stream from ClickZetta Volume.
Args:
filename: File path in volume
Yields:
File content chunks
"""
content = self.load_once(filename)
batch_size = 4096
stream = BytesIO(content)
while chunk := stream.read(batch_size):
yield chunk
logger.debug("File %s loaded as stream from ClickZetta Volume", filename)
def download(self, filename: str, target_filepath: str):
"""Download file from ClickZetta Volume to local path.
Args:
filename: File path in volume
target_filepath: Local target file path
"""
content = self.load_once(filename)
Path(target_filepath).write_bytes(content)
logger.debug("File %s downloaded from ClickZetta Volume to %s", filename, target_filepath)
def exists(self, filename: str) -> bool:
"""Check if file exists in ClickZetta Volume.
Args:
filename: File path in volume
Returns:
True if file exists, False otherwise
"""
try:
# Extract dataset_id from filename if present
dataset_id = None
if "/" in filename and self._config.volume_type == "table":
parts = filename.split("/", 1)
if parts[0].startswith(self._config.table_prefix):
dataset_id = parts[0][len(self._config.table_prefix) :]
filename = parts[1]
else:
dataset_id = parts[0]
filename = parts[1]
volume_prefix = self._get_volume_sql_prefix(dataset_id)
# Get the actual volume path (may include dify_km prefix)
volume_path = self._get_volume_path(filename, dataset_id)
# For User Volume, use the full path with dify_km prefix
if volume_prefix == "USER VOLUME":
sql = f"LIST {volume_prefix} REGEXP = '^{volume_path}$'"
else:
sql = f"LIST {volume_prefix} REGEXP = '^{filename}$'"
rows = self._execute_sql(sql, fetch=True)
exists = len(rows) > 0 if rows else False
logger.debug("File %s exists check: %s", filename, exists)
return exists
except Exception as e:
logger.warning("Error checking file existence for %s: %s", filename, e)
return False
def delete(self, filename: str):
"""Delete file from ClickZetta Volume.
Args:
filename: File path in volume
"""
if not self.exists(filename):
logger.debug("File %s not found, skip delete", filename)
return
# Extract dataset_id from filename if present
dataset_id = None
if "/" in filename and self._config.volume_type == "table":
parts = filename.split("/", 1)
if parts[0].startswith(self._config.table_prefix):
dataset_id = parts[0][len(self._config.table_prefix) :]
filename = parts[1]
else:
dataset_id = parts[0]
filename = parts[1]
volume_prefix = self._get_volume_sql_prefix(dataset_id)
# Get the actual volume path (may include dify_km prefix)
volume_path = self._get_volume_path(filename, dataset_id)
# For User Volume, use the full path with dify_km prefix
if volume_prefix == "USER VOLUME":
sql = f"REMOVE {volume_prefix} FILE '{volume_path}'"
else:
sql = f"REMOVE {volume_prefix} FILE '{filename}'"
self._execute_sql(sql)
logger.debug("File %s deleted from ClickZetta Volume", filename)
def scan(self, path: str, files: bool = True, directories: bool = False) -> list[str]:
"""Scan files and directories in ClickZetta Volume.
Args:
path: Path to scan (dataset_id for table volumes)
files: Include files in results
directories: Include directories in results
Returns:
List of file/directory paths
"""
try:
# For table volumes, path is treated as dataset_id
dataset_id = None
if self._config.volume_type == "table":
dataset_id = path
path = "" # Root of the table volume
volume_prefix = self._get_volume_sql_prefix(dataset_id)
# For User Volume, add dify prefix to path
if volume_prefix == "USER VOLUME":
if path:
scan_path = f"{self._config.dify_prefix}/{path}"
sql = f"LIST {volume_prefix} SUBDIRECTORY '{scan_path}'"
else:
sql = f"LIST {volume_prefix} SUBDIRECTORY '{self._config.dify_prefix}'"
else:
if path:
sql = f"LIST {volume_prefix} SUBDIRECTORY '{path}'"
else:
sql = f"LIST {volume_prefix}"
rows = self._execute_sql(sql, fetch=True)
result = []
if rows:
for row in rows:
file_path = row[0] # relative_path column
# For User Volume, remove dify prefix from results
dify_prefix_with_slash = f"{self._config.dify_prefix}/"
if volume_prefix == "USER VOLUME" and file_path.startswith(dify_prefix_with_slash):
file_path = file_path[len(dify_prefix_with_slash) :] # Remove prefix
if files and not file_path.endswith("/") or directories and file_path.endswith("/"):
result.append(file_path)
logger.debug("Scanned %d items in path %s", len(result), path)
return result
except Exception:
logger.exception("Error scanning path %s", path)
return []

View File

@@ -1,518 +0,0 @@
"""ClickZetta Volume file lifecycle management
This module provides file lifecycle management features including version control,
automatic cleanup, backup and restore.
Supports complete lifecycle management for knowledge base files.
"""
from __future__ import annotations
import json
import logging
import operator
from dataclasses import asdict, dataclass
from datetime import datetime
from enum import StrEnum, auto
from typing import Any
logger = logging.getLogger(__name__)
class FileStatus(StrEnum):
"""File status enumeration"""
ACTIVE = auto() # Active status
ARCHIVED = auto() # Archived
DELETED = auto() # Deleted (soft delete)
BACKUP = auto() # Backup file
@dataclass
class FileMetadata:
"""File metadata"""
filename: str
size: int | None
created_at: datetime
modified_at: datetime
version: int | None
status: FileStatus
checksum: str | None = None
tags: dict[str, str] | None = None
parent_version: int | None = None
def to_dict(self):
"""Convert to dictionary format"""
data = asdict(self)
data["created_at"] = self.created_at.isoformat()
data["modified_at"] = self.modified_at.isoformat()
data["status"] = self.status.value
return data
@classmethod
def from_dict(cls, data: dict) -> FileMetadata:
"""Create instance from dictionary"""
data = data.copy()
data["created_at"] = datetime.fromisoformat(data["created_at"])
data["modified_at"] = datetime.fromisoformat(data["modified_at"])
data["status"] = FileStatus(data["status"])
return cls(**data)
class FileLifecycleManager:
"""File lifecycle manager"""
def __init__(self, storage, dataset_id: str | None = None):
"""Initialize lifecycle manager
Args:
storage: ClickZetta Volume storage instance
dataset_id: Dataset ID (for Table Volume)
"""
self._storage = storage
self._dataset_id = dataset_id
self._metadata_file = ".dify_file_metadata.json"
self._version_prefix = ".versions/"
self._backup_prefix = ".backups/"
self._deleted_prefix = ".deleted/"
# Get permission manager (if exists)
self._permission_manager: Any | None = getattr(storage, "_permission_manager", None)
def save_with_lifecycle(self, filename: str, data: bytes, tags: dict[str, str] | None = None) -> FileMetadata:
"""Save file and manage lifecycle
Args:
filename: File name
data: File content
tags: File tags
Returns:
File metadata
"""
# Permission check
if not self._check_permission(filename, "save"):
from .volume_permissions import VolumePermissionError
raise VolumePermissionError(
f"Permission denied for lifecycle save operation on file: {filename}",
operation="save",
volume_type=getattr(self._storage, "_config", {}).get("volume_type", "unknown"),
dataset_id=self._dataset_id,
)
try:
# 1. Check if old version exists
metadata_dict = self._load_metadata()
current_metadata = metadata_dict.get(filename)
# 2. If old version exists, create version backup
if current_metadata:
self._create_version_backup(filename, current_metadata)
# 3. Calculate file information
now = datetime.now()
checksum = self._calculate_checksum(data)
new_version = (current_metadata["version"] + 1) if current_metadata else 1
# 4. Save new file
self._storage.save(filename, data)
# 5. Create metadata
created_at = now
parent_version = None
if current_metadata:
# If created_at is string, convert to datetime
if isinstance(current_metadata["created_at"], str):
created_at = datetime.fromisoformat(current_metadata["created_at"])
else:
created_at = current_metadata["created_at"]
parent_version = current_metadata["version"]
file_metadata = FileMetadata(
filename=filename,
size=len(data),
created_at=created_at,
modified_at=now,
version=new_version,
status=FileStatus.ACTIVE,
checksum=checksum,
tags=tags or {},
parent_version=parent_version,
)
# 6. Update metadata
metadata_dict[filename] = file_metadata.to_dict()
self._save_metadata(metadata_dict)
logger.info("File %s saved with lifecycle management, version %s", filename, new_version)
return file_metadata
except Exception:
logger.exception("Failed to save file with lifecycle")
raise
def get_file_metadata(self, filename: str) -> FileMetadata | None:
"""Get file metadata
Args:
filename: File name
Returns:
File metadata, returns None if not exists
"""
try:
metadata_dict = self._load_metadata()
if filename in metadata_dict:
return FileMetadata.from_dict(metadata_dict[filename])
return None
except Exception:
logger.exception("Failed to get file metadata for %s", filename)
return None
def list_file_versions(self, filename: str) -> list[FileMetadata]:
"""List all versions of a file
Args:
filename: File name
Returns:
File version list, sorted by version number
"""
try:
versions = []
# Get current version
current_metadata = self.get_file_metadata(filename)
if current_metadata:
versions.append(current_metadata)
# Get historical versions
try:
version_files = self._storage.scan(self._dataset_id or "", files=True)
for file_path in version_files:
if file_path.startswith(f"{self._version_prefix}{filename}.v"):
# Parse version number
version_str = file_path.split(".v")[-1].split(".")[0]
try:
_ = int(version_str)
# Simplified processing here, should actually read metadata from version file
# Temporarily create basic metadata information
except ValueError:
continue
except Exception:
# If cannot scan version files, only return current version
logger.exception("Failed to scan version files for %s", filename)
return sorted(versions, key=lambda x: x.version or 0, reverse=True)
except Exception:
logger.exception("Failed to list file versions for %s", filename)
return []
def restore_version(self, filename: str, version: int) -> bool:
"""Restore file to specified version
Args:
filename: File name
version: Version number to restore
Returns:
Whether restore succeeded
"""
try:
version_filename = f"{self._version_prefix}{filename}.v{version}"
# Check if version file exists
if not self._storage.exists(version_filename):
logger.warning("Version %s of %s not found", version, filename)
return False
# Read version file content
version_data = self._storage.load_once(version_filename)
# Save current version as backup
current_metadata = self.get_file_metadata(filename)
if current_metadata:
self._create_version_backup(filename, current_metadata.to_dict())
# Restore file
self.save_with_lifecycle(filename, version_data, {"restored_from": str(version)})
return True
except Exception:
logger.exception("Failed to restore %s to version %s", filename, version)
return False
def archive_file(self, filename: str) -> bool:
"""Archive file
Args:
filename: File name
Returns:
Whether archive succeeded
"""
# Permission check
if not self._check_permission(filename, "archive"):
logger.warning("Permission denied for archive operation on file: %s", filename)
return False
try:
# Update file status to archived
metadata_dict = self._load_metadata()
if filename not in metadata_dict:
logger.warning("File %s not found in metadata", filename)
return False
metadata_dict[filename]["status"] = FileStatus.ARCHIVED
metadata_dict[filename]["modified_at"] = datetime.now().isoformat()
self._save_metadata(metadata_dict)
logger.info("File %s archived successfully", filename)
return True
except Exception:
logger.exception("Failed to archive file %s", filename)
return False
def soft_delete_file(self, filename: str) -> bool:
"""Soft delete file (move to deleted directory)
Args:
filename: File name
Returns:
Whether delete succeeded
"""
# Permission check
if not self._check_permission(filename, "delete"):
logger.warning("Permission denied for soft delete operation on file: %s", filename)
return False
try:
# Check if file exists
if not self._storage.exists(filename):
logger.warning("File %s not found", filename)
return False
# Read file content
file_data = self._storage.load_once(filename)
# Move to deleted directory
deleted_filename = f"{self._deleted_prefix}{filename}.{datetime.now().strftime('%Y%m%d_%H%M%S')}"
self._storage.save(deleted_filename, file_data)
# Delete original file
self._storage.delete(filename)
# Update metadata
metadata_dict = self._load_metadata()
if filename in metadata_dict:
metadata_dict[filename]["status"] = FileStatus.DELETED
metadata_dict[filename]["modified_at"] = datetime.now().isoformat()
self._save_metadata(metadata_dict)
logger.info("File %s soft deleted successfully", filename)
return True
except Exception:
logger.exception("Failed to soft delete file %s", filename)
return False
def cleanup_old_versions(self, max_versions: int = 5, max_age_days: int = 30) -> int:
"""Cleanup old version files
Args:
max_versions: Maximum number of versions to keep
max_age_days: Maximum retention days for version files
Returns:
Number of files cleaned
"""
try:
cleaned_count = 0
# Get all version files
try:
all_files = self._storage.scan(self._dataset_id or "", files=True)
version_files = [f for f in all_files if f.startswith(self._version_prefix)]
# Group by file
file_versions: dict[str, list[tuple[int, str]]] = {}
for version_file in version_files:
# Parse filename and version
parts = version_file[len(self._version_prefix) :].split(".v")
if len(parts) >= 2:
base_filename = parts[0]
version_part = parts[1].split(".")[0]
try:
version_num = int(version_part)
if base_filename not in file_versions:
file_versions[base_filename] = []
file_versions[base_filename].append((version_num, version_file))
except ValueError:
continue
# Cleanup old versions for each file
for base_filename, versions in file_versions.items():
# Sort by version number
versions.sort(key=operator.itemgetter(0), reverse=True)
# Keep the newest max_versions versions, delete the rest
if len(versions) > max_versions:
to_delete = versions[max_versions:]
for version_num, version_file in to_delete:
self._storage.delete(version_file)
cleaned_count += 1
logger.debug("Cleaned old version: %s", version_file)
logger.info("Cleaned %d old version files", cleaned_count)
except Exception as e:
logger.warning("Could not scan for version files: %s", e)
return cleaned_count
except Exception:
logger.exception("Failed to cleanup old versions")
return 0
def get_storage_statistics(self) -> dict[str, Any]:
"""Get storage statistics
Returns:
Storage statistics dictionary
"""
try:
metadata_dict = self._load_metadata()
stats: dict[str, Any] = {
"total_files": len(metadata_dict),
"active_files": 0,
"archived_files": 0,
"deleted_files": 0,
"total_size": 0,
"versions_count": 0,
"oldest_file": None,
"newest_file": None,
}
oldest_date = None
newest_date = None
for filename, metadata in metadata_dict.items():
file_meta = FileMetadata.from_dict(metadata)
# Count file status
if file_meta.status == FileStatus.ACTIVE:
stats["active_files"] = (stats["active_files"] or 0) + 1
elif file_meta.status == FileStatus.ARCHIVED:
stats["archived_files"] = (stats["archived_files"] or 0) + 1
elif file_meta.status == FileStatus.DELETED:
stats["deleted_files"] = (stats["deleted_files"] or 0) + 1
# Count size
stats["total_size"] = (stats["total_size"] or 0) + (file_meta.size or 0)
# Count versions
stats["versions_count"] = (stats["versions_count"] or 0) + (file_meta.version or 0)
# Find newest and oldest files
if oldest_date is None or file_meta.created_at < oldest_date:
oldest_date = file_meta.created_at
stats["oldest_file"] = filename
if newest_date is None or file_meta.modified_at > newest_date:
newest_date = file_meta.modified_at
stats["newest_file"] = filename
return stats
except Exception:
logger.exception("Failed to get storage statistics")
return {}
def _create_version_backup(self, filename: str, metadata: dict):
"""Create version backup"""
try:
# Read current file content
current_data = self._storage.load_once(filename)
# Save as version file
version_filename = f"{self._version_prefix}{filename}.v{metadata['version']}"
self._storage.save(version_filename, current_data)
logger.debug("Created version backup: %s", version_filename)
except Exception as e:
logger.warning("Failed to create version backup for %s: %s", filename, e)
def _load_metadata(self) -> dict[str, Any]:
"""Load metadata file"""
try:
if self._storage.exists(self._metadata_file):
metadata_content = self._storage.load_once(self._metadata_file)
result = json.loads(metadata_content.decode("utf-8"))
return dict(result) if result else {}
else:
return {}
except Exception as e:
logger.warning("Failed to load metadata: %s", e)
return {}
def _save_metadata(self, metadata_dict: dict):
"""Save metadata file"""
try:
metadata_content = json.dumps(metadata_dict, indent=2, ensure_ascii=False)
self._storage.save(self._metadata_file, metadata_content.encode("utf-8"))
logger.debug("Metadata saved successfully")
except Exception:
logger.exception("Failed to save metadata")
raise
def _calculate_checksum(self, data: bytes) -> str:
"""Calculate file checksum"""
import hashlib
return hashlib.md5(data).hexdigest()
def _check_permission(self, filename: str, operation: str) -> bool:
"""Check file operation permission
Args:
filename: File name
operation: Operation type
Returns:
True if permission granted, False otherwise
"""
# If no permission manager, allow by default
if not self._permission_manager:
return True
try:
# Map operation type to permission
operation_mapping = {
"save": "save",
"load": "load_once",
"delete": "delete",
"archive": "delete", # Archive requires delete permission
"restore": "save", # Restore requires write permission
"cleanup": "delete", # Cleanup requires delete permission
"read": "load_once",
"write": "save",
}
mapped_operation = operation_mapping.get(operation, operation)
# Check permission
result = self._permission_manager.validate_operation(mapped_operation, self._dataset_id)
return bool(result)
except Exception:
logger.exception("Permission check failed for %s operation %s", filename, operation)
# Safe default: deny access when permission check fails
return False

View File

@@ -1,649 +0,0 @@
"""ClickZetta Volume permission management mechanism
This module provides Volume permission checking, validation and management features.
According to ClickZetta's permission model, different Volume types have different permission requirements.
"""
import logging
from enum import StrEnum
logger = logging.getLogger(__name__)
class VolumePermission(StrEnum):
"""Volume permission type enumeration"""
READ = "SELECT" # Corresponds to ClickZetta's SELECT permission
WRITE = "INSERT,UPDATE,DELETE" # Corresponds to ClickZetta's write permissions
LIST = "SELECT" # Listing files requires SELECT permission
DELETE = "INSERT,UPDATE,DELETE" # Deleting files requires write permissions
USAGE = "USAGE" # Basic permission required for External Volume
class VolumePermissionManager:
"""Volume permission manager"""
def __init__(self, connection_or_config, volume_type: str | None = None, volume_name: str | None = None):
"""Initialize permission manager
Args:
connection_or_config: ClickZetta connection object or configuration dictionary
volume_type: Volume type (user|table|external)
volume_name: Volume name (for external volume)
"""
# Support two initialization methods: connection object or configuration dictionary
if isinstance(connection_or_config, dict):
# Create connection from configuration dictionary
import clickzetta
config = connection_or_config
self._connection = clickzetta.connect(
username=config.get("username"),
password=config.get("password"),
instance=config.get("instance"),
service=config.get("service"),
workspace=config.get("workspace"),
vcluster=config.get("vcluster"),
schema=config.get("schema") or config.get("database"),
)
self._volume_type = config.get("volume_type", volume_type)
self._volume_name = config.get("volume_name", volume_name)
else:
# Use connection object directly
self._connection = connection_or_config
self._volume_type = volume_type
self._volume_name = volume_name
if not self._connection:
raise ValueError("Valid connection or config is required")
if not self._volume_type:
raise ValueError("volume_type is required")
self._permission_cache: dict[str, set[str]] = {}
self._current_username = None # Will get current username from connection
def check_permission(self, operation: VolumePermission, dataset_id: str | None = None) -> bool:
"""Check if user has permission to perform specific operation
Args:
operation: Type of operation to perform
dataset_id: Dataset ID (for table volume)
Returns:
True if user has permission, False otherwise
"""
try:
if self._volume_type == "user":
return self._check_user_volume_permission(operation)
elif self._volume_type == "table":
return self._check_table_volume_permission(operation, dataset_id)
elif self._volume_type == "external":
return self._check_external_volume_permission(operation)
else:
logger.warning("Unknown volume type: %s", self._volume_type)
return False
except Exception:
logger.exception("Permission check failed")
return False
def _check_user_volume_permission(self, operation: VolumePermission) -> bool:
"""Check User Volume permission
User Volume permission rules:
- User has full permissions on their own User Volume
- As long as user can connect to ClickZetta, they have basic User Volume permissions by default
- Focus more on connection authentication rather than complex permission checking
"""
try:
# Get current username
current_user = self._get_current_username()
# Check basic connection status
with self._connection.cursor() as cursor:
# Simple connection test, if query can be executed user has basic permissions
cursor.execute("SELECT 1")
result = cursor.fetchone()
if result:
logger.debug(
"User Volume permission check for %s, operation %s: granted (basic connection verified)",
current_user,
operation.name,
)
return True
else:
logger.warning(
"User Volume permission check failed: cannot verify basic connection for %s", current_user
)
return False
except Exception:
logger.exception("User Volume permission check failed")
# For User Volume, if permission check fails, it might be a configuration issue,
# provide friendlier error message
logger.info("User Volume permission check failed, but permission checking is disabled in this version")
return False
def _check_table_volume_permission(self, operation: VolumePermission, dataset_id: str | None) -> bool:
"""Check Table Volume permission
Table Volume permission rules:
- Table Volume permissions inherit from corresponding table permissions
- SELECT permission -> can READ/LIST files
- INSERT,UPDATE,DELETE permissions -> can WRITE/DELETE files
"""
if not dataset_id:
logger.warning("dataset_id is required for table volume permission check")
return False
table_name = f"dataset_{dataset_id}" if not dataset_id.startswith("dataset_") else dataset_id
try:
# Check table permissions
permissions = self._get_table_permissions(table_name)
required_permissions = set(operation.value.split(","))
# Check if has all required permissions
has_permission = required_permissions.issubset(permissions)
logger.debug(
"Table Volume permission check for %s, operation %s: required=%s, has=%s, granted=%s",
table_name,
operation.name,
required_permissions,
permissions,
has_permission,
)
return has_permission
except Exception:
logger.exception("Table volume permission check failed for %s", table_name)
return False
def _check_external_volume_permission(self, operation: VolumePermission) -> bool:
"""Check External Volume permission
External Volume permission rules:
- Try to get permissions for External Volume
- If permission check fails, perform fallback verification
- For development environment, provide more lenient permission checking
"""
if not self._volume_name:
logger.warning("volume_name is required for external volume permission check")
return False
try:
# Check External Volume permissions
permissions = self._get_external_volume_permissions(self._volume_name)
# External Volume permission mapping: determine required permissions based on operation type
required_permissions = set()
if operation in [VolumePermission.READ, VolumePermission.LIST]:
required_permissions.add("read")
elif operation in [VolumePermission.WRITE, VolumePermission.DELETE]:
required_permissions.add("write")
# Check if has all required permissions
has_permission = required_permissions.issubset(permissions)
logger.debug(
"External Volume permission check for %s, operation %s: required=%s, has=%s, granted=%s",
self._volume_name,
operation.name,
required_permissions,
permissions,
has_permission,
)
# If permission check fails, try fallback verification
if not has_permission:
logger.info("Direct permission check failed for %s, trying fallback verification", self._volume_name)
# Fallback verification: try listing Volume to verify basic access permissions
try:
with self._connection.cursor() as cursor:
cursor.execute("SHOW VOLUMES")
volumes = cursor.fetchall()
for volume in volumes:
if len(volume) > 0 and volume[0] == self._volume_name:
logger.info("Fallback verification successful for %s", self._volume_name)
return True
except Exception as fallback_e:
logger.warning("Fallback verification failed for %s: %s", self._volume_name, fallback_e)
return has_permission
except Exception:
logger.exception("External volume permission check failed for %s", self._volume_name)
logger.info("External Volume permission check failed, but permission checking is disabled in this version")
return False
def _get_table_permissions(self, table_name: str) -> set[str]:
"""Get user permissions for specified table
Args:
table_name: Table name
Returns:
Set of user permissions for this table
"""
cache_key = f"table:{table_name}"
if cache_key in self._permission_cache:
return self._permission_cache[cache_key]
permissions = set()
try:
with self._connection.cursor() as cursor:
# Use correct ClickZetta syntax to check current user permissions
cursor.execute("SHOW GRANTS")
grants = cursor.fetchall()
# Parse permission results, find permissions for this table
for grant in grants:
if len(grant) >= 3: # Typical format: (privilege, object_type, object_name, ...)
privilege = grant[0].upper()
object_type = grant[1].upper() if len(grant) > 1 else ""
object_name = grant[2] if len(grant) > 2 else ""
# Check if it's permission for this table
if (
object_type == "TABLE"
and object_name == table_name
or object_type == "SCHEMA"
and object_name in table_name
):
if privilege in ["SELECT", "INSERT", "UPDATE", "DELETE", "ALL"]:
if privilege == "ALL":
permissions.update(["SELECT", "INSERT", "UPDATE", "DELETE"])
else:
permissions.add(privilege)
# If no explicit permissions found, try executing a simple query to verify permissions
if not permissions:
try:
cursor.execute(f"SELECT COUNT(*) FROM {table_name} LIMIT 1")
permissions.add("SELECT")
except Exception:
logger.debug("Cannot query table %s, no SELECT permission", table_name)
except Exception as e:
logger.warning("Could not check table permissions for %s: %s", table_name, e)
# Safe default: deny access when permission check fails
pass
# Cache permission information
self._permission_cache[cache_key] = permissions
return permissions
def _get_current_username(self) -> str:
"""Get current username"""
if self._current_username:
return self._current_username
try:
with self._connection.cursor() as cursor:
cursor.execute("SELECT CURRENT_USER()")
result = cursor.fetchone()
if result:
self._current_username = result[0]
return str(self._current_username)
except Exception:
logger.exception("Failed to get current username")
return "unknown"
def _get_user_permissions(self, username: str) -> set[str]:
"""Get user's basic permission set"""
cache_key = f"user_permissions:{username}"
if cache_key in self._permission_cache:
return self._permission_cache[cache_key]
permissions = set()
try:
with self._connection.cursor() as cursor:
# Use correct ClickZetta syntax to check current user permissions
cursor.execute("SHOW GRANTS")
grants = cursor.fetchall()
# Parse permission results, find user's basic permissions
for grant in grants:
if len(grant) >= 3: # Typical format: (privilege, object_type, object_name, ...)
privilege = grant[0].upper()
_ = grant[1].upper() if len(grant) > 1 else ""
# Collect all relevant permissions
if privilege in ["SELECT", "INSERT", "UPDATE", "DELETE", "ALL"]:
if privilege == "ALL":
permissions.update(["SELECT", "INSERT", "UPDATE", "DELETE"])
else:
permissions.add(privilege)
except Exception as e:
logger.warning("Could not check user permissions for %s: %s", username, e)
# Safe default: deny access when permission check fails
pass
# Cache permission information
self._permission_cache[cache_key] = permissions
return permissions
def _get_external_volume_permissions(self, volume_name: str) -> set[str]:
"""Get user permissions for specified External Volume
Args:
volume_name: External Volume name
Returns:
Set of user permissions for this Volume
"""
cache_key = f"external_volume:{volume_name}"
if cache_key in self._permission_cache:
return self._permission_cache[cache_key]
permissions = set()
try:
with self._connection.cursor() as cursor:
# Use correct ClickZetta syntax to check Volume permissions
logger.info("Checking permissions for volume: %s", volume_name)
cursor.execute(f"SHOW GRANTS ON VOLUME {volume_name}")
grants = cursor.fetchall()
logger.info("Raw grants result for %s: %s", volume_name, grants)
# Parse permission results
# Format: (granted_type, privilege, conditions, granted_on, object_name, granted_to,
# grantee_name, grantor_name, grant_option, granted_time)
for grant in grants:
logger.info("Processing grant: %s", grant)
if len(grant) >= 5:
granted_type = grant[0]
privilege = grant[1].upper()
granted_on = grant[3]
object_name = grant[4]
logger.info(
"Grant details - type: %s, privilege: %s, granted_on: %s, object_name: %s",
granted_type,
privilege,
granted_on,
object_name,
)
# Check if it's permission for this Volume or hierarchical permission
if (
granted_type == "PRIVILEGE" and granted_on == "VOLUME" and object_name.endswith(volume_name)
) or (granted_type == "OBJECT_HIERARCHY" and granted_on == "VOLUME"):
logger.info("Matching grant found for %s", volume_name)
if "READ" in privilege:
permissions.add("read")
logger.info("Added READ permission for %s", volume_name)
if "WRITE" in privilege:
permissions.add("write")
logger.info("Added WRITE permission for %s", volume_name)
if "ALTER" in privilege:
permissions.add("alter")
logger.info("Added ALTER permission for %s", volume_name)
if privilege == "ALL":
permissions.update(["read", "write", "alter"])
logger.info("Added ALL permissions for %s", volume_name)
logger.info("Final permissions for %s: %s", volume_name, permissions)
# If no explicit permissions found, try viewing Volume list to verify basic permissions
if not permissions:
try:
cursor.execute("SHOW VOLUMES")
volumes = cursor.fetchall()
for volume in volumes:
if len(volume) > 0 and volume[0] == volume_name:
permissions.add("read") # At least has read permission
logger.debug("Volume %s found in SHOW VOLUMES, assuming read permission", volume_name)
break
except Exception:
logger.debug("Cannot access volume %s, no basic permission", volume_name)
except Exception as e:
logger.warning("Could not check external volume permissions for %s: %s", volume_name, e)
# When permission check fails, try basic Volume access verification
try:
with self._connection.cursor() as cursor:
cursor.execute("SHOW VOLUMES")
volumes = cursor.fetchall()
for volume in volumes:
if len(volume) > 0 and volume[0] == volume_name:
logger.info("Basic volume access verified for %s", volume_name)
permissions.add("read")
permissions.add("write") # Assume has write permission
break
except Exception as basic_e:
logger.warning("Basic volume access check failed for %s: %s", volume_name, basic_e)
# Last fallback: assume basic permissions
permissions.add("read")
# Cache permission information
self._permission_cache[cache_key] = permissions
return permissions
def clear_permission_cache(self):
"""Clear permission cache"""
self._permission_cache.clear()
logger.debug("Permission cache cleared")
@property
def volume_type(self) -> str | None:
"""Get the volume type."""
return self._volume_type
def get_permission_summary(self, dataset_id: str | None = None) -> dict[str, bool]:
"""Get permission summary
Args:
dataset_id: Dataset ID (for table volume)
Returns:
Permission summary dictionary
"""
summary = {}
for operation in VolumePermission:
summary[operation.name.lower()] = self.check_permission(operation, dataset_id)
return summary
def check_inherited_permission(self, file_path: str, operation: VolumePermission) -> bool:
"""Check permission inheritance for file path
Args:
file_path: File path
operation: Operation to perform
Returns:
True if user has permission, False otherwise
"""
try:
# Parse file path
path_parts = file_path.strip("/").split("/")
if not path_parts:
logger.warning("Invalid file path for permission inheritance check")
return False
# For Table Volume, first layer is dataset_id
if self._volume_type == "table":
if len(path_parts) < 1:
return False
dataset_id = path_parts[0]
# Check permissions for dataset
has_dataset_permission = self.check_permission(operation, dataset_id)
if not has_dataset_permission:
logger.debug("Permission denied for dataset %s", dataset_id)
return False
# Check path traversal attack
if self._contains_path_traversal(file_path):
logger.warning("Path traversal attack detected: %s", file_path)
return False
# Check if accessing sensitive directory
if self._is_sensitive_path(file_path):
logger.warning("Access to sensitive path denied: %s", file_path)
return False
logger.debug("Permission inherited for path %s", file_path)
return True
elif self._volume_type == "user":
# User Volume permission inheritance
current_user = self._get_current_username()
# Check if attempting to access other user's directory
if len(path_parts) > 1 and path_parts[0] != current_user:
logger.warning("User %s attempted to access %s's directory", current_user, path_parts[0])
return False
# Check basic permissions
return self.check_permission(operation)
elif self._volume_type == "external":
# External Volume permission inheritance
# Check permissions for External Volume
return self.check_permission(operation)
else:
logger.warning("Unknown volume type for permission inheritance: %s", self._volume_type)
return False
except Exception:
logger.exception("Permission inheritance check failed")
return False
def _contains_path_traversal(self, file_path: str) -> bool:
"""Check if path contains path traversal attack"""
# Check common path traversal patterns
traversal_patterns = [
"../",
"..\\",
"..%2f",
"..%2F",
"..%5c",
"..%5C",
"%2e%2e%2f",
"%2e%2e%5c",
"....//",
"....\\\\",
]
file_path_lower = file_path.lower()
for pattern in traversal_patterns:
if pattern in file_path_lower:
return True
# Check absolute path
if file_path.startswith("/") or file_path.startswith("\\"):
return True
# Check Windows drive path
if len(file_path) >= 2 and file_path[1] == ":":
return True
return False
def _is_sensitive_path(self, file_path: str) -> bool:
"""Check if path is sensitive path"""
sensitive_patterns = [
"passwd",
"shadow",
"hosts",
"config",
"secrets",
"private",
"key",
"certificate",
"cert",
"ssl",
"database",
"backup",
"dump",
"log",
"tmp",
]
file_path_lower = file_path.lower()
return any(pattern in file_path_lower for pattern in sensitive_patterns)
def validate_operation(self, operation: str, dataset_id: str | None = None) -> bool:
"""Validate operation permission
Args:
operation: Operation name (save|load|exists|delete|scan)
dataset_id: Dataset ID
Returns:
True if operation is allowed, False otherwise
"""
operation_mapping = {
"save": VolumePermission.WRITE,
"load": VolumePermission.READ,
"load_once": VolumePermission.READ,
"load_stream": VolumePermission.READ,
"download": VolumePermission.READ,
"exists": VolumePermission.READ,
"delete": VolumePermission.DELETE,
"scan": VolumePermission.LIST,
}
if operation not in operation_mapping:
logger.warning("Unknown operation: %s", operation)
return False
volume_permission = operation_mapping[operation]
return self.check_permission(volume_permission, dataset_id)
class VolumePermissionError(Exception):
"""Volume permission error exception"""
def __init__(self, message: str, operation: str, volume_type: str, dataset_id: str | None = None):
self.operation = operation
self.volume_type = volume_type
self.dataset_id = dataset_id
super().__init__(message)
def check_volume_permission(permission_manager: VolumePermissionManager, operation: str, dataset_id: str | None = None):
"""Permission check decorator function
Args:
permission_manager: Permission manager
operation: Operation name
dataset_id: Dataset ID
Raises:
VolumePermissionError: If no permission
"""
if not permission_manager.validate_operation(operation, dataset_id):
error_message = f"Permission denied for operation '{operation}' on {permission_manager.volume_type} volume"
if dataset_id:
error_message += f" (dataset: {dataset_id})"
raise VolumePermissionError(
error_message,
operation=operation,
volume_type=permission_manager.volume_type or "unknown",
dataset_id=dataset_id,
)

View File

@@ -5,7 +5,6 @@ class StorageType(StrEnum):
ALIYUN_OSS = "aliyun-oss"
AZURE_BLOB = "azure-blob"
BAIDU_OBS = "baidu-obs"
CLICKZETTA_VOLUME = "clickzetta-volume"
GOOGLE_STORAGE = "google-storage"
HUAWEI_OBS = "huawei-obs"
LOCAL = "local"

View File

@@ -11,6 +11,13 @@ class CreatorUserRole(StrEnum):
ACCOUNT = "account"
END_USER = "end_user"
@classmethod
def _missing_(cls, value):
if value == "end-user":
return cls.END_USER
else:
return super()._missing_(value)
class WorkflowRunTriggeredFrom(StrEnum):
DEBUGGING = "debugging"

View File

@@ -22,14 +22,14 @@ from sqlalchemy import (
from sqlalchemy.orm import Mapped, mapped_column
from typing_extensions import deprecated
from core.trigger.constants import TRIGGER_INFO_METADATA_KEY, TRIGGER_PLUGIN_NODE_TYPE
from core.trigger.constants import TRIGGER_PLUGIN_NODE_TYPE
from dify_graph.constants import (
CONVERSATION_VARIABLE_NODE_ID,
SYSTEM_VARIABLE_NODE_ID,
)
from dify_graph.entities.graph_config import NodeConfigDict, NodeConfigDictAdapter
from dify_graph.entities.pause_reason import HumanInputRequired, PauseReason, PauseReasonType, SchedulingPause
from dify_graph.enums import BuiltinNodeTypes, NodeType, WorkflowExecutionStatus
from dify_graph.enums import BuiltinNodeTypes, NodeType, WorkflowExecutionStatus, WorkflowNodeExecutionMetadataKey
from dify_graph.file.constants import maybe_file_object
from dify_graph.file.models import File
from dify_graph.variables import utils as variable_utils
@@ -936,8 +936,11 @@ class WorkflowNodeExecutionModel(Base): # This model is expected to have `offlo
elif self.node_type == BuiltinNodeTypes.DATASOURCE and "datasource_info" in execution_metadata:
datasource_info = execution_metadata["datasource_info"]
extras["icon"] = datasource_info.get("icon")
elif self.node_type == TRIGGER_PLUGIN_NODE_TYPE and TRIGGER_INFO_METADATA_KEY in execution_metadata:
trigger_info = execution_metadata[TRIGGER_INFO_METADATA_KEY] or {}
elif (
self.node_type == TRIGGER_PLUGIN_NODE_TYPE
and WorkflowNodeExecutionMetadataKey.TRIGGER_INFO in execution_metadata
):
trigger_info = execution_metadata[WorkflowNodeExecutionMetadataKey.TRIGGER_INFO] or {}
provider_id = trigger_info.get("provider_id")
if provider_id:
extras["icon"] = TriggerManager.get_trigger_plugin_icon(

View File

@@ -1,6 +1,6 @@
[project]
name = "dify-api"
version = "1.13.1"
version = "1.13.2"
requires-python = ">=3.11,<3.13"
dependencies = [
@@ -206,7 +206,6 @@ vdb = [
"alibabacloud_tea_openapi~=0.4.3",
"chromadb==0.5.20",
"clickhouse-connect~=0.14.1",
"clickzetta-connector-python>=0.8.102",
"couchbase~=4.5.0",
"elasticsearch==8.14.0",
"opensearch-py==3.1.0",

View File

@@ -49,7 +49,6 @@ core/rag/datasource/vdb/analyticdb/analyticdb_vector.py
core/rag/datasource/vdb/analyticdb/analyticdb_vector_openapi.py
core/rag/datasource/vdb/baidu/baidu_vector.py
core/rag/datasource/vdb/chroma/chroma_vector.py
core/rag/datasource/vdb/clickzetta/clickzetta_vector.py
core/rag/datasource/vdb/couchbase/couchbase_vector.py
core/rag/datasource/vdb/elasticsearch/elasticsearch_vector.py
core/rag/datasource/vdb/huawei/huawei_cloud_vector.py
@@ -144,8 +143,6 @@ extensions/storage/aliyun_oss_storage.py
extensions/storage/aws_s3_storage.py
extensions/storage/azure_blob_storage.py
extensions/storage/baidu_obs_storage.py
extensions/storage/clickzetta_volume/clickzetta_volume_storage.py
extensions/storage/clickzetta_volume/file_lifecycle.py
extensions/storage/google_cloud_storage.py
extensions/storage/huawei_obs_storage.py
extensions/storage/opendal_storage.py

View File

@@ -28,7 +28,6 @@
"baidubce.auth.bce_credentials",
"baidubce.bce_client_configuration",
"baidubce.services.bos.bos_client",
"clickzetta",
"google.cloud",
"obs",
"qcloud_cos",
@@ -52,4 +51,4 @@
"reportAttributeAccessIssue": "hint",
"pythonVersion": "3.11",
"pythonPlatform": "All"
}
}

View File

@@ -1,168 +0,0 @@
"""Integration tests for ClickZetta Volume Storage."""
import os
import tempfile
import unittest
from pathlib import Path
import pytest
from extensions.storage.clickzetta_volume.clickzetta_volume_storage import (
ClickZettaVolumeConfig,
ClickZettaVolumeStorage,
)
class TestClickZettaVolumeStorage(unittest.TestCase):
"""Test cases for ClickZetta Volume Storage."""
def setUp(self):
"""Set up test environment."""
self.config = ClickZettaVolumeConfig(
username=os.getenv("CLICKZETTA_USERNAME", "test_user"),
password=os.getenv("CLICKZETTA_PASSWORD", "test_pass"),
instance=os.getenv("CLICKZETTA_INSTANCE", "test_instance"),
service=os.getenv("CLICKZETTA_SERVICE", "uat-api.clickzetta.com"),
workspace=os.getenv("CLICKZETTA_WORKSPACE", "quick_start"),
vcluster=os.getenv("CLICKZETTA_VCLUSTER", "default_ap"),
schema_name=os.getenv("CLICKZETTA_SCHEMA", "dify"),
volume_type="table",
table_prefix="test_dataset_",
)
@pytest.mark.skipif(not os.getenv("CLICKZETTA_USERNAME"), reason="ClickZetta credentials not provided")
def test_user_volume_operations(self):
"""Test basic operations with User Volume."""
config = self.config
config.volume_type = "user"
storage = ClickZettaVolumeStorage(config)
# Test file operations
test_filename = "test_file.txt"
test_content = b"Hello, ClickZetta Volume!"
# Save file
storage.save(test_filename, test_content)
# Check if file exists
assert storage.exists(test_filename)
# Load file
loaded_content = storage.load_once(test_filename)
assert loaded_content == test_content
# Test streaming
stream_content = b""
for chunk in storage.load_stream(test_filename):
stream_content += chunk
assert stream_content == test_content
# Test download
with tempfile.NamedTemporaryFile() as temp_file:
storage.download(test_filename, temp_file.name)
downloaded_content = Path(temp_file.name).read_bytes()
assert downloaded_content == test_content
# Test scan
files = storage.scan("", files=True, directories=False)
assert test_filename in files
# Delete file
storage.delete(test_filename)
assert not storage.exists(test_filename)
@pytest.mark.skipif(not os.getenv("CLICKZETTA_USERNAME"), reason="ClickZetta credentials not provided")
def test_table_volume_operations(self):
"""Test basic operations with Table Volume."""
config = self.config
config.volume_type = "table"
storage = ClickZettaVolumeStorage(config)
# Test file operations with dataset_id
dataset_id = "12345"
test_filename = f"{dataset_id}/test_file.txt"
test_content = b"Hello, Table Volume!"
# Save file
storage.save(test_filename, test_content)
# Check if file exists
assert storage.exists(test_filename)
# Load file
loaded_content = storage.load_once(test_filename)
assert loaded_content == test_content
# Test scan for dataset
files = storage.scan(dataset_id, files=True, directories=False)
assert "test_file.txt" in files
# Delete file
storage.delete(test_filename)
assert not storage.exists(test_filename)
def test_config_validation(self):
"""Test configuration validation."""
# Test missing required fields
with pytest.raises(ValueError):
ClickZettaVolumeConfig(
username="", # Empty username should fail
password="pass",
instance="instance",
)
# Test invalid volume type
with pytest.raises(ValueError):
ClickZettaVolumeConfig(username="user", password="pass", instance="instance", volume_type="invalid_type")
# Test external volume without volume_name
with pytest.raises(ValueError):
ClickZettaVolumeConfig(
username="user",
password="pass",
instance="instance",
volume_type="external",
# Missing volume_name
)
def test_volume_path_generation(self):
"""Test volume path generation for different types."""
storage = ClickZettaVolumeStorage(self.config)
# Test table volume path
path = storage._get_volume_path("test.txt", "12345")
assert path == "test_dataset_12345/test.txt"
# Test path with existing dataset_id prefix
path = storage._get_volume_path("12345/test.txt")
assert path == "12345/test.txt"
# Test user volume
storage._config.volume_type = "user"
path = storage._get_volume_path("test.txt")
assert path == "test.txt"
def test_sql_prefix_generation(self):
"""Test SQL prefix generation for different volume types."""
storage = ClickZettaVolumeStorage(self.config)
# Test table volume SQL prefix
prefix = storage._get_volume_sql_prefix("12345")
assert prefix == "TABLE VOLUME test_dataset_12345"
# Test user volume SQL prefix
storage._config.volume_type = "user"
prefix = storage._get_volume_sql_prefix()
assert prefix == "USER VOLUME"
# Test external volume SQL prefix
storage._config.volume_type = "external"
storage._config.volume_name = "my_external_volume"
prefix = storage._get_volume_sql_prefix()
assert prefix == "VOLUME my_external_volume"
if __name__ == "__main__":
unittest.main()

View File

@@ -1,25 +0,0 @@
# Clickzetta Integration Tests
## Running Tests
To run the Clickzetta integration tests, you need to set the following environment variables:
```bash
export CLICKZETTA_USERNAME=your_username
export CLICKZETTA_PASSWORD=your_password
export CLICKZETTA_INSTANCE=your_instance
export CLICKZETTA_SERVICE=api.clickzetta.com
export CLICKZETTA_WORKSPACE=your_workspace
export CLICKZETTA_VCLUSTER=your_vcluster
export CLICKZETTA_SCHEMA=dify
```
Then run the tests:
```bash
pytest api/tests/integration_tests/vdb/clickzetta/
```
## Security Note
Never commit credentials to the repository. Always use environment variables or secure credential management systems.

View File

@@ -1,223 +0,0 @@
import contextlib
import os
import pytest
from core.rag.datasource.vdb.clickzetta.clickzetta_vector import ClickzettaConfig, ClickzettaVector
from core.rag.models.document import Document
from tests.integration_tests.vdb.test_vector_store import AbstractVectorTest, get_example_text, setup_mock_redis
class TestClickzettaVector(AbstractVectorTest):
"""
Test cases for Clickzetta vector database integration.
"""
@pytest.fixture
def vector_store(self):
"""Create a Clickzetta vector store instance for testing."""
# Skip test if Clickzetta credentials are not configured
if not os.getenv("CLICKZETTA_USERNAME"):
pytest.skip("CLICKZETTA_USERNAME is not configured")
if not os.getenv("CLICKZETTA_PASSWORD"):
pytest.skip("CLICKZETTA_PASSWORD is not configured")
if not os.getenv("CLICKZETTA_INSTANCE"):
pytest.skip("CLICKZETTA_INSTANCE is not configured")
config = ClickzettaConfig(
username=os.getenv("CLICKZETTA_USERNAME", ""),
password=os.getenv("CLICKZETTA_PASSWORD", ""),
instance=os.getenv("CLICKZETTA_INSTANCE", ""),
service=os.getenv("CLICKZETTA_SERVICE", "api.clickzetta.com"),
workspace=os.getenv("CLICKZETTA_WORKSPACE", "quick_start"),
vcluster=os.getenv("CLICKZETTA_VCLUSTER", "default_ap"),
schema=os.getenv("CLICKZETTA_SCHEMA", "dify_test"),
batch_size=10, # Small batch size for testing
enable_inverted_index=True,
analyzer_type="chinese",
analyzer_mode="smart",
vector_distance_function="cosine_distance",
)
with setup_mock_redis():
vector = ClickzettaVector(collection_name="test_collection_" + str(os.getpid()), config=config)
yield vector
# Cleanup: delete the test collection
with contextlib.suppress(Exception):
vector.delete()
def test_clickzetta_vector_basic_operations(self, vector_store):
"""Test basic CRUD operations on Clickzetta vector store."""
# Prepare test data
texts = [
"这是第一个测试文档,包含一些中文内容。",
"This is the second test document with English content.",
"第三个文档混合了English和中文内容。",
]
embeddings = [
[0.1, 0.2, 0.3, 0.4],
[0.5, 0.6, 0.7, 0.8],
[0.9, 1.0, 1.1, 1.2],
]
documents = [
Document(page_content=text, metadata={"doc_id": f"doc_{i}", "source": "test"})
for i, text in enumerate(texts)
]
# Test create (initial insert)
vector_store.create(texts=documents, embeddings=embeddings)
# Test text_exists
assert vector_store.text_exists("doc_0")
assert not vector_store.text_exists("doc_999")
# Test search_by_vector
query_vector = [0.1, 0.2, 0.3, 0.4]
results = vector_store.search_by_vector(query_vector, top_k=2)
assert len(results) > 0
assert results[0].page_content == texts[0] # Should match the first document
# Test search_by_full_text (Chinese)
results = vector_store.search_by_full_text("中文", top_k=3)
assert len(results) >= 2 # Should find documents with Chinese content
# Test search_by_full_text (English)
results = vector_store.search_by_full_text("English", top_k=3)
assert len(results) >= 2 # Should find documents with English content
# Test delete_by_ids
vector_store.delete_by_ids(["doc_0"])
assert not vector_store.text_exists("doc_0")
assert vector_store.text_exists("doc_1")
# Test delete_by_metadata_field
vector_store.delete_by_metadata_field("source", "test")
assert not vector_store.text_exists("doc_1")
assert not vector_store.text_exists("doc_2")
def test_clickzetta_vector_advanced_search(self, vector_store):
"""Test advanced search features of Clickzetta vector store."""
# Prepare test data with more complex metadata
documents = []
embeddings = []
for i in range(10):
doc = Document(
page_content=f"Document {i}: " + get_example_text(),
metadata={
"doc_id": f"adv_doc_{i}",
"category": "technical" if i % 2 == 0 else "general",
"document_id": f"doc_{i // 3}", # Group documents
"importance": i,
},
)
documents.append(doc)
# Create varied embeddings
embeddings.append([0.1 * i, 0.2 * i, 0.3 * i, 0.4 * i])
vector_store.create(texts=documents, embeddings=embeddings)
# Test vector search with document filter
query_vector = [0.5, 1.0, 1.5, 2.0]
results = vector_store.search_by_vector(query_vector, top_k=5, document_ids_filter=["doc_0", "doc_1"])
assert len(results) > 0
# All results should belong to doc_0 or doc_1 groups
for result in results:
assert result.metadata["document_id"] in ["doc_0", "doc_1"]
# Test score threshold
results = vector_store.search_by_vector(query_vector, top_k=10, score_threshold=0.5)
# Check that all results have a score above threshold
for result in results:
assert result.metadata.get("score", 0) >= 0.5
def test_clickzetta_batch_operations(self, vector_store):
"""Test batch insertion operations."""
# Prepare large batch of documents
batch_size = 25
documents = []
embeddings = []
for i in range(batch_size):
doc = Document(
page_content=f"Batch document {i}: This is a test document for batch processing.",
metadata={"doc_id": f"batch_doc_{i}", "batch": "test_batch"},
)
documents.append(doc)
embeddings.append([0.1 * (i % 10), 0.2 * (i % 10), 0.3 * (i % 10), 0.4 * (i % 10)])
# Test batch insert
vector_store.add_texts(documents=documents, embeddings=embeddings)
# Verify all documents were inserted
for i in range(batch_size):
assert vector_store.text_exists(f"batch_doc_{i}")
# Clean up
vector_store.delete_by_metadata_field("batch", "test_batch")
def test_clickzetta_edge_cases(self, vector_store):
"""Test edge cases and error handling."""
# Test empty operations
vector_store.create(texts=[], embeddings=[])
vector_store.add_texts(documents=[], embeddings=[])
vector_store.delete_by_ids([])
# Test special characters in content
special_doc = Document(
page_content="Special chars: 'quotes', \"double\", \\backslash, \n newline",
metadata={"doc_id": "special_doc", "test": "edge_case"},
)
embeddings = [[0.1, 0.2, 0.3, 0.4]]
vector_store.add_texts(documents=[special_doc], embeddings=embeddings)
assert vector_store.text_exists("special_doc")
# Test search with special characters
results = vector_store.search_by_full_text("quotes", top_k=1)
if results: # Full-text search might not be available
assert len(results) > 0
# Clean up
vector_store.delete_by_ids(["special_doc"])
def test_clickzetta_full_text_search_modes(self, vector_store):
"""Test different full-text search capabilities."""
# Prepare documents with various language content
documents = [
Document(
page_content="云器科技提供强大的Lakehouse解决方案", metadata={"doc_id": "cn_doc_1", "lang": "chinese"}
),
Document(
page_content="Clickzetta provides powerful Lakehouse solutions",
metadata={"doc_id": "en_doc_1", "lang": "english"},
),
Document(
page_content="Lakehouse是现代数据架构的重要组成部分", metadata={"doc_id": "cn_doc_2", "lang": "chinese"}
),
Document(
page_content="Modern data architecture includes Lakehouse technology",
metadata={"doc_id": "en_doc_2", "lang": "english"},
),
]
embeddings = [[0.1, 0.2, 0.3, 0.4] for _ in documents]
vector_store.create(texts=documents, embeddings=embeddings)
# Test Chinese full-text search
results = vector_store.search_by_full_text("Lakehouse", top_k=4)
assert len(results) >= 2 # Should find at least documents with "Lakehouse"
# Test English full-text search
results = vector_store.search_by_full_text("solutions", top_k=2)
assert len(results) >= 1 # Should find English documents with "solutions"
# Test mixed search
results = vector_store.search_by_full_text("数据架构", top_k=2)
assert len(results) >= 1 # Should find Chinese documents with this phrase
# Clean up
vector_store.delete_by_metadata_field("lang", "chinese")
vector_store.delete_by_metadata_field("lang", "english")

View File

@@ -1,165 +0,0 @@
#!/usr/bin/env python3
"""
Test Clickzetta integration in Docker environment
"""
import os
import time
import httpx
from clickzetta import connect
def test_clickzetta_connection():
"""Test direct connection to Clickzetta"""
print("=== Testing direct Clickzetta connection ===")
try:
conn = connect(
username=os.getenv("CLICKZETTA_USERNAME", "test_user"),
password=os.getenv("CLICKZETTA_PASSWORD", "test_password"),
instance=os.getenv("CLICKZETTA_INSTANCE", "test_instance"),
service=os.getenv("CLICKZETTA_SERVICE", "api.clickzetta.com"),
workspace=os.getenv("CLICKZETTA_WORKSPACE", "test_workspace"),
vcluster=os.getenv("CLICKZETTA_VCLUSTER", "default"),
database=os.getenv("CLICKZETTA_SCHEMA", "dify"),
)
with conn.cursor() as cursor:
# Test basic connectivity
cursor.execute("SELECT 1 as test")
result = cursor.fetchone()
print(f"✓ Connection test: {result}")
# Check if our test table exists
cursor.execute("SHOW TABLES IN dify")
tables = cursor.fetchall()
print(f"✓ Existing tables: {[t[1] for t in tables if t[0] == 'dify']}")
# Check if test collection exists
test_collection = "collection_test_dataset"
if test_collection in [t[1] for t in tables if t[0] == "dify"]:
cursor.execute(f"DESCRIBE dify.{test_collection}")
columns = cursor.fetchall()
print(f"✓ Table structure for {test_collection}:")
for col in columns:
print(f" - {col[0]}: {col[1]}")
# Check for indexes
cursor.execute(f"SHOW INDEXES IN dify.{test_collection}")
indexes = cursor.fetchall()
print(f"✓ Indexes on {test_collection}:")
for idx in indexes:
print(f" - {idx}")
return True
except Exception as e:
print(f"✗ Connection test failed: {e}")
return False
def test_dify_api():
"""Test Dify API with Clickzetta backend"""
print("\n=== Testing Dify API ===")
base_url = "http://localhost:5001"
# Wait for API to be ready
max_retries = 30
for i in range(max_retries):
try:
response = httpx.get(f"{base_url}/console/api/health")
if response.status_code == 200:
print("✓ Dify API is ready")
break
except:
if i == max_retries - 1:
print("✗ Dify API is not responding")
return False
time.sleep(2)
# Check vector store configuration
try:
# This is a simplified check - in production, you'd use proper auth
print("✓ Dify is configured to use Clickzetta as vector store")
return True
except Exception as e:
print(f"✗ API test failed: {e}")
return False
def verify_table_structure():
"""Verify the table structure meets Dify requirements"""
print("\n=== Verifying Table Structure ===")
expected_columns = {
"id": "VARCHAR",
"page_content": "VARCHAR",
"metadata": "VARCHAR", # JSON stored as VARCHAR in Clickzetta
"vector": "ARRAY<FLOAT>",
}
expected_metadata_fields = ["doc_id", "doc_hash", "document_id", "dataset_id"]
print("✓ Expected table structure:")
for col, dtype in expected_columns.items():
print(f" - {col}: {dtype}")
print("\n✓ Required metadata fields:")
for field in expected_metadata_fields:
print(f" - {field}")
print("\n✓ Index requirements:")
print(" - Vector index (HNSW) on 'vector' column")
print(" - Full-text index on 'page_content' (optional)")
print(" - Functional index on metadata->>'$.doc_id' (recommended)")
print(" - Functional index on metadata->>'$.document_id' (recommended)")
return True
def main():
"""Run all tests"""
print("Starting Clickzetta integration tests for Dify Docker\n")
tests = [
("Direct Clickzetta Connection", test_clickzetta_connection),
("Dify API Status", test_dify_api),
("Table Structure Verification", verify_table_structure),
]
results = []
for test_name, test_func in tests:
try:
success = test_func()
results.append((test_name, success))
except Exception as e:
print(f"\n{test_name} crashed: {e}")
results.append((test_name, False))
# Summary
print("\n" + "=" * 50)
print("Test Summary:")
print("=" * 50)
passed = sum(1 for _, success in results if success)
total = len(results)
for test_name, success in results:
status = "✅ PASSED" if success else "❌ FAILED"
print(f"{test_name}: {status}")
print(f"\nTotal: {passed}/{total} tests passed")
if passed == total:
print("\n🎉 All tests passed! Clickzetta is ready for Dify Docker deployment.")
print("\nNext steps:")
print("1. Run: cd docker && docker-compose -f docker-compose.yaml -f docker-compose.clickzetta.yaml up -d")
print("2. Access Dify at http://localhost:3000")
print("3. Create a dataset and test vector storage with Clickzetta")
return 0
else:
print("\n⚠️ Some tests failed. Please check the errors above.")
return 1
if __name__ == "__main__":
exit(main())

View File

@@ -0,0 +1,106 @@
from unittest import mock
import pytest
from core.model_manager import ModelInstance
from dify_graph.model_runtime.entities import ImagePromptMessageContent, PromptMessageRole, TextPromptMessageContent
from dify_graph.model_runtime.entities.message_entities import SystemPromptMessage
from dify_graph.nodes.llm import llm_utils
from dify_graph.nodes.llm.entities import LLMNodeChatModelMessage
from dify_graph.nodes.llm.exc import NoPromptFoundError
from dify_graph.runtime import VariablePool
def _fetch_prompt_messages_with_mocked_content(content):
variable_pool = VariablePool.empty()
model_instance = mock.MagicMock(spec=ModelInstance)
prompt_template = [
LLMNodeChatModelMessage(
text="You are a classifier.",
role=PromptMessageRole.SYSTEM,
edition_type="basic",
)
]
with (
mock.patch(
"dify_graph.nodes.llm.llm_utils.fetch_model_schema",
return_value=mock.MagicMock(features=[]),
),
mock.patch(
"dify_graph.nodes.llm.llm_utils.handle_list_messages",
return_value=[SystemPromptMessage(content=content)],
),
mock.patch(
"dify_graph.nodes.llm.llm_utils.handle_memory_chat_mode",
return_value=[],
),
):
return llm_utils.fetch_prompt_messages(
sys_query=None,
sys_files=[],
context=None,
memory=None,
model_instance=model_instance,
prompt_template=prompt_template,
stop=["END"],
memory_config=None,
vision_enabled=False,
vision_detail=ImagePromptMessageContent.DETAIL.HIGH,
variable_pool=variable_pool,
jinja2_variables=[],
template_renderer=None,
)
def test_fetch_prompt_messages_skips_messages_when_all_contents_are_filtered_out():
with pytest.raises(NoPromptFoundError):
_fetch_prompt_messages_with_mocked_content(
[
ImagePromptMessageContent(
format="url",
url="https://example.com/image.png",
mime_type="image/png",
),
]
)
def test_fetch_prompt_messages_flattens_single_text_content_after_filtering_unsupported_multimodal_items():
prompt_messages, stop = _fetch_prompt_messages_with_mocked_content(
[
TextPromptMessageContent(data="You are a classifier."),
ImagePromptMessageContent(
format="url",
url="https://example.com/image.png",
mime_type="image/png",
),
]
)
assert stop == ["END"]
assert prompt_messages == [SystemPromptMessage(content="You are a classifier.")]
def test_fetch_prompt_messages_keeps_list_content_when_multiple_supported_items_remain():
prompt_messages, stop = _fetch_prompt_messages_with_mocked_content(
[
TextPromptMessageContent(data="You are"),
TextPromptMessageContent(data=" a classifier."),
ImagePromptMessageContent(
format="url",
url="https://example.com/image.png",
mime_type="image/png",
),
]
)
assert stop == ["END"]
assert prompt_messages == [
SystemPromptMessage(
content=[
TextPromptMessageContent(data="You are"),
TextPromptMessageContent(data=" a classifier."),
]
)
]

View File

@@ -0,0 +1,63 @@
from collections.abc import Mapping
from core.trigger.constants import TRIGGER_PLUGIN_NODE_TYPE
from core.workflow.nodes.trigger_plugin.trigger_event_node import TriggerEventNode
from dify_graph.entities import GraphInitParams
from dify_graph.entities.graph_config import NodeConfigDict, NodeConfigDictAdapter
from dify_graph.enums import WorkflowNodeExecutionMetadataKey, WorkflowNodeExecutionStatus
from dify_graph.runtime import GraphRuntimeState, VariablePool
from dify_graph.system_variable import SystemVariable
from tests.workflow_test_utils import build_test_graph_init_params
def _build_context(graph_config: Mapping[str, object]) -> tuple[GraphInitParams, GraphRuntimeState]:
init_params = build_test_graph_init_params(
graph_config=graph_config,
user_from="account",
invoke_from="debugger",
)
runtime_state = GraphRuntimeState(
variable_pool=VariablePool(
system_variables=SystemVariable(user_id="user", files=[]),
user_inputs={"payload": "value"},
),
start_at=0.0,
)
return init_params, runtime_state
def _build_node_config() -> NodeConfigDict:
return NodeConfigDictAdapter.validate_python(
{
"id": "node-1",
"data": {
"type": TRIGGER_PLUGIN_NODE_TYPE,
"title": "Trigger Event",
"plugin_id": "plugin-id",
"provider_id": "provider-id",
"event_name": "event-name",
"subscription_id": "subscription-id",
"plugin_unique_identifier": "plugin-unique-identifier",
"event_parameters": {},
},
}
)
def test_trigger_event_node_run_populates_trigger_info_metadata() -> None:
init_params, runtime_state = _build_context(graph_config={})
node = TriggerEventNode(
id="node-1",
config=_build_node_config(),
graph_init_params=init_params,
graph_runtime_state=runtime_state,
)
result = node._run()
assert result.status == WorkflowNodeExecutionStatus.SUCCEEDED
assert result.metadata[WorkflowNodeExecutionMetadataKey.TRIGGER_INFO] == {
"provider_id": "provider-id",
"event_name": "event-name",
"plugin_unique_identifier": "plugin-unique-identifier",
}

View File

@@ -0,0 +1,19 @@
from dify_graph.enums import WorkflowNodeExecutionMetadataKey, WorkflowNodeExecutionStatus
from dify_graph.node_events.base import NodeRunResult
def test_node_run_result_accepts_trigger_info_metadata() -> None:
result = NodeRunResult(
status=WorkflowNodeExecutionStatus.SUCCEEDED,
metadata={
WorkflowNodeExecutionMetadataKey.TRIGGER_INFO: {
"provider_id": "provider-id",
"event_name": "event-name",
}
},
)
assert result.metadata[WorkflowNodeExecutionMetadataKey.TRIGGER_INFO] == {
"provider_id": "provider-id",
"event_name": "event-name",
}

View File

@@ -0,0 +1,19 @@
import pytest
from models.enums import CreatorUserRole
def test_creator_user_role_missing_maps_hyphen_to_enum():
# given an alias with hyphen
value = "end-user"
# when converting to enum (invokes StrEnum._missing_ override)
role = CreatorUserRole(value)
# then it should map to END_USER
assert role is CreatorUserRole.END_USER
def test_creator_user_role_missing_raises_for_unknown():
with pytest.raises(ValueError):
CreatorUserRole("unknown")

5307
api/uv.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -21,7 +21,7 @@ services:
# API service
api:
image: langgenius/dify-api:1.13.1
image: langgenius/dify-api:1.13.2
restart: always
environment:
# Use the shared environment variables.
@@ -63,7 +63,7 @@ services:
# worker service
# The Celery worker for processing all queues (dataset, workflow, mail, etc.)
worker:
image: langgenius/dify-api:1.13.1
image: langgenius/dify-api:1.13.2
restart: always
environment:
# Use the shared environment variables.
@@ -102,7 +102,7 @@ services:
# worker_beat service
# Celery beat for scheduling periodic tasks.
worker_beat:
image: langgenius/dify-api:1.13.1
image: langgenius/dify-api:1.13.2
restart: always
environment:
# Use the shared environment variables.
@@ -132,7 +132,7 @@ services:
# Frontend web application.
web:
image: langgenius/dify-web:1.13.1
image: langgenius/dify-web:1.13.2
restart: always
environment:
CONSOLE_API_URL: ${CONSOLE_API_URL:-}

View File

@@ -728,7 +728,7 @@ services:
# API service
api:
image: langgenius/dify-api:1.13.1
image: langgenius/dify-api:1.13.2
restart: always
environment:
# Use the shared environment variables.
@@ -770,7 +770,7 @@ services:
# worker service
# The Celery worker for processing all queues (dataset, workflow, mail, etc.)
worker:
image: langgenius/dify-api:1.13.1
image: langgenius/dify-api:1.13.2
restart: always
environment:
# Use the shared environment variables.
@@ -809,7 +809,7 @@ services:
# worker_beat service
# Celery beat for scheduling periodic tasks.
worker_beat:
image: langgenius/dify-api:1.13.1
image: langgenius/dify-api:1.13.2
restart: always
environment:
# Use the shared environment variables.
@@ -839,7 +839,7 @@ services:
# Frontend web application.
web:
image: langgenius/dify-web:1.13.1
image: langgenius/dify-web:1.13.2
restart: always
environment:
CONSOLE_API_URL: ${CONSOLE_API_URL:-}

View File

@@ -1,7 +1,7 @@
{
"name": "dify-web",
"type": "module",
"version": "1.13.1",
"version": "1.13.2",
"private": true,
"packageManager": "pnpm@10.32.1",
"imports": {