mirror of
https://github.com/langgenius/dify.git
synced 2026-02-26 11:25:10 +00:00
Compare commits
1 Commits
main
...
32589-main
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
4b9898692f |
@@ -110,10 +110,10 @@ class Storage:
|
||||
def load_stream(self, filename: str) -> Generator:
|
||||
return self.storage_runner.load_stream(filename)
|
||||
|
||||
def download(self, filename, target_filepath):
|
||||
def download(self, filename: str, target_filepath):
|
||||
self.storage_runner.download(filename, target_filepath)
|
||||
|
||||
def exists(self, filename):
|
||||
def exists(self, filename: str):
|
||||
return self.storage_runner.exists(filename)
|
||||
|
||||
def delete(self, filename: str):
|
||||
|
||||
@@ -1,9 +1,7 @@
|
||||
project-includes = ["."]
|
||||
project-excludes = [
|
||||
"tests/",
|
||||
".venv",
|
||||
"migrations/",
|
||||
"core/rag",
|
||||
]
|
||||
python-platform = "linux"
|
||||
python-version = "3.11.0"
|
||||
|
||||
@@ -50,26 +50,8 @@ class TestDealDatasetVectorIndexTask:
|
||||
mock_factory.return_value = mock_instance
|
||||
yield mock_factory
|
||||
|
||||
@pytest.fixture
|
||||
def account_and_tenant(self, db_session_with_containers, mock_external_service_dependencies):
|
||||
"""Create an account with an owner tenant for testing.
|
||||
|
||||
Returns a tuple of (account, tenant) where tenant is guaranteed to be non-None.
|
||||
"""
|
||||
fake = Faker()
|
||||
account = AccountService.create_account(
|
||||
email=fake.email(),
|
||||
name=fake.name(),
|
||||
interface_language="en-US",
|
||||
password=fake.password(length=12),
|
||||
)
|
||||
TenantService.create_owner_tenant_if_not_exist(account, name=fake.company())
|
||||
tenant = account.current_tenant
|
||||
assert tenant is not None
|
||||
return account, tenant
|
||||
|
||||
def test_deal_dataset_vector_index_task_remove_action_success(
|
||||
self, db_session_with_containers, mock_index_processor_factory, account_and_tenant
|
||||
self, db_session_with_containers, mock_index_processor_factory, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test successful removal of dataset vector index.
|
||||
@@ -81,7 +63,16 @@ class TestDealDatasetVectorIndexTask:
|
||||
4. Completes without errors
|
||||
"""
|
||||
fake = Faker()
|
||||
account, tenant = account_and_tenant
|
||||
|
||||
# Create test data
|
||||
account = AccountService.create_account(
|
||||
email=fake.email(),
|
||||
name=fake.name(),
|
||||
interface_language="en-US",
|
||||
password=fake.password(length=12),
|
||||
)
|
||||
TenantService.create_owner_tenant_if_not_exist(account, name=fake.company())
|
||||
tenant = account.current_tenant
|
||||
|
||||
# Create dataset
|
||||
dataset = Dataset(
|
||||
@@ -127,7 +118,7 @@ class TestDealDatasetVectorIndexTask:
|
||||
assert mock_processor.clean.call_count >= 0 # For now, just check it doesn't fail
|
||||
|
||||
def test_deal_dataset_vector_index_task_add_action_success(
|
||||
self, db_session_with_containers, mock_index_processor_factory, account_and_tenant
|
||||
self, db_session_with_containers, mock_index_processor_factory, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test successful addition of dataset vector index.
|
||||
@@ -141,7 +132,16 @@ class TestDealDatasetVectorIndexTask:
|
||||
6. Updates document status to completed
|
||||
"""
|
||||
fake = Faker()
|
||||
account, tenant = account_and_tenant
|
||||
|
||||
# Create test data
|
||||
account = AccountService.create_account(
|
||||
email=fake.email(),
|
||||
name=fake.name(),
|
||||
interface_language="en-US",
|
||||
password=fake.password(length=12),
|
||||
)
|
||||
TenantService.create_owner_tenant_if_not_exist(account, name=fake.company())
|
||||
tenant = account.current_tenant
|
||||
|
||||
# Create dataset
|
||||
dataset = Dataset(
|
||||
@@ -227,7 +227,7 @@ class TestDealDatasetVectorIndexTask:
|
||||
mock_processor.load.assert_called_once()
|
||||
|
||||
def test_deal_dataset_vector_index_task_update_action_success(
|
||||
self, db_session_with_containers, mock_index_processor_factory, account_and_tenant
|
||||
self, db_session_with_containers, mock_index_processor_factory, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test successful update of dataset vector index.
|
||||
@@ -242,7 +242,16 @@ class TestDealDatasetVectorIndexTask:
|
||||
7. Updates document status to completed
|
||||
"""
|
||||
fake = Faker()
|
||||
account, tenant = account_and_tenant
|
||||
|
||||
# Create test data
|
||||
account = AccountService.create_account(
|
||||
email=fake.email(),
|
||||
name=fake.name(),
|
||||
interface_language="en-US",
|
||||
password=fake.password(length=12),
|
||||
)
|
||||
TenantService.create_owner_tenant_if_not_exist(account, name=fake.company())
|
||||
tenant = account.current_tenant
|
||||
|
||||
# Create dataset with parent-child index
|
||||
dataset = Dataset(
|
||||
@@ -329,7 +338,7 @@ class TestDealDatasetVectorIndexTask:
|
||||
mock_processor.load.assert_called_once()
|
||||
|
||||
def test_deal_dataset_vector_index_task_dataset_not_found_error(
|
||||
self, db_session_with_containers, mock_index_processor_factory, account_and_tenant
|
||||
self, db_session_with_containers, mock_index_processor_factory, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test task behavior when dataset is not found.
|
||||
@@ -349,7 +358,7 @@ class TestDealDatasetVectorIndexTask:
|
||||
mock_processor.load.assert_not_called()
|
||||
|
||||
def test_deal_dataset_vector_index_task_add_action_no_documents(
|
||||
self, db_session_with_containers, mock_index_processor_factory, account_and_tenant
|
||||
self, db_session_with_containers, mock_index_processor_factory, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test add action when no documents exist for the dataset.
|
||||
@@ -358,7 +367,16 @@ class TestDealDatasetVectorIndexTask:
|
||||
a dataset exists but has no documents to process.
|
||||
"""
|
||||
fake = Faker()
|
||||
account, tenant = account_and_tenant
|
||||
|
||||
# Create test data
|
||||
account = AccountService.create_account(
|
||||
email=fake.email(),
|
||||
name=fake.name(),
|
||||
interface_language="en-US",
|
||||
password=fake.password(length=12),
|
||||
)
|
||||
TenantService.create_owner_tenant_if_not_exist(account, name=fake.company())
|
||||
tenant = account.current_tenant
|
||||
|
||||
# Create dataset without documents
|
||||
dataset = Dataset(
|
||||
@@ -381,7 +399,7 @@ class TestDealDatasetVectorIndexTask:
|
||||
mock_processor.load.assert_not_called()
|
||||
|
||||
def test_deal_dataset_vector_index_task_add_action_no_segments(
|
||||
self, db_session_with_containers, mock_index_processor_factory, account_and_tenant
|
||||
self, db_session_with_containers, mock_index_processor_factory, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test add action when documents exist but have no segments.
|
||||
@@ -390,7 +408,16 @@ class TestDealDatasetVectorIndexTask:
|
||||
documents exist but contain no segments to process.
|
||||
"""
|
||||
fake = Faker()
|
||||
account, tenant = account_and_tenant
|
||||
|
||||
# Create test data
|
||||
account = AccountService.create_account(
|
||||
email=fake.email(),
|
||||
name=fake.name(),
|
||||
interface_language="en-US",
|
||||
password=fake.password(length=12),
|
||||
)
|
||||
TenantService.create_owner_tenant_if_not_exist(account, name=fake.company())
|
||||
tenant = account.current_tenant
|
||||
|
||||
# Create dataset
|
||||
dataset = Dataset(
|
||||
@@ -437,7 +464,7 @@ class TestDealDatasetVectorIndexTask:
|
||||
mock_processor.load.assert_not_called()
|
||||
|
||||
def test_deal_dataset_vector_index_task_update_action_no_documents(
|
||||
self, db_session_with_containers, mock_index_processor_factory, account_and_tenant
|
||||
self, db_session_with_containers, mock_index_processor_factory, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test update action when no documents exist for the dataset.
|
||||
@@ -446,7 +473,16 @@ class TestDealDatasetVectorIndexTask:
|
||||
a dataset exists but has no documents to process during update.
|
||||
"""
|
||||
fake = Faker()
|
||||
account, tenant = account_and_tenant
|
||||
|
||||
# Create test data
|
||||
account = AccountService.create_account(
|
||||
email=fake.email(),
|
||||
name=fake.name(),
|
||||
interface_language="en-US",
|
||||
password=fake.password(length=12),
|
||||
)
|
||||
TenantService.create_owner_tenant_if_not_exist(account, name=fake.company())
|
||||
tenant = account.current_tenant
|
||||
|
||||
# Create dataset without documents
|
||||
dataset = Dataset(
|
||||
@@ -470,7 +506,7 @@ class TestDealDatasetVectorIndexTask:
|
||||
mock_processor.load.assert_not_called()
|
||||
|
||||
def test_deal_dataset_vector_index_task_add_action_with_exception_handling(
|
||||
self, db_session_with_containers, mock_index_processor_factory, account_and_tenant
|
||||
self, db_session_with_containers, mock_index_processor_factory, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test add action with exception handling during processing.
|
||||
@@ -479,7 +515,16 @@ class TestDealDatasetVectorIndexTask:
|
||||
during document processing and updates document status to error.
|
||||
"""
|
||||
fake = Faker()
|
||||
account, tenant = account_and_tenant
|
||||
|
||||
# Create test data
|
||||
account = AccountService.create_account(
|
||||
email=fake.email(),
|
||||
name=fake.name(),
|
||||
interface_language="en-US",
|
||||
password=fake.password(length=12),
|
||||
)
|
||||
TenantService.create_owner_tenant_if_not_exist(account, name=fake.company())
|
||||
tenant = account.current_tenant
|
||||
|
||||
# Create dataset
|
||||
dataset = Dataset(
|
||||
@@ -566,7 +611,7 @@ class TestDealDatasetVectorIndexTask:
|
||||
assert "Test exception during indexing" in updated_document.error
|
||||
|
||||
def test_deal_dataset_vector_index_task_with_custom_index_type(
|
||||
self, db_session_with_containers, mock_index_processor_factory, account_and_tenant
|
||||
self, db_session_with_containers, mock_index_processor_factory, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test task behavior with custom index type (QA_INDEX).
|
||||
@@ -575,7 +620,16 @@ class TestDealDatasetVectorIndexTask:
|
||||
and initializes the appropriate index processor.
|
||||
"""
|
||||
fake = Faker()
|
||||
account, tenant = account_and_tenant
|
||||
|
||||
# Create test data
|
||||
account = AccountService.create_account(
|
||||
email=fake.email(),
|
||||
name=fake.name(),
|
||||
interface_language="en-US",
|
||||
password=fake.password(length=12),
|
||||
)
|
||||
TenantService.create_owner_tenant_if_not_exist(account, name=fake.company())
|
||||
tenant = account.current_tenant
|
||||
|
||||
# Create dataset with custom index type
|
||||
dataset = Dataset(
|
||||
@@ -642,7 +696,7 @@ class TestDealDatasetVectorIndexTask:
|
||||
mock_processor.load.assert_called_once()
|
||||
|
||||
def test_deal_dataset_vector_index_task_with_default_index_type(
|
||||
self, db_session_with_containers, mock_index_processor_factory, account_and_tenant
|
||||
self, db_session_with_containers, mock_index_processor_factory, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test task behavior with default index type (PARAGRAPH_INDEX).
|
||||
@@ -651,7 +705,16 @@ class TestDealDatasetVectorIndexTask:
|
||||
when dataset.doc_form is None.
|
||||
"""
|
||||
fake = Faker()
|
||||
account, tenant = account_and_tenant
|
||||
|
||||
# Create test data
|
||||
account = AccountService.create_account(
|
||||
email=fake.email(),
|
||||
name=fake.name(),
|
||||
interface_language="en-US",
|
||||
password=fake.password(length=12),
|
||||
)
|
||||
TenantService.create_owner_tenant_if_not_exist(account, name=fake.company())
|
||||
tenant = account.current_tenant
|
||||
|
||||
# Create dataset without doc_form (should use default)
|
||||
dataset = Dataset(
|
||||
@@ -718,7 +781,7 @@ class TestDealDatasetVectorIndexTask:
|
||||
mock_processor.load.assert_called_once()
|
||||
|
||||
def test_deal_dataset_vector_index_task_multiple_documents_processing(
|
||||
self, db_session_with_containers, mock_index_processor_factory, account_and_tenant
|
||||
self, db_session_with_containers, mock_index_processor_factory, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test task processing with multiple documents and segments.
|
||||
@@ -727,7 +790,16 @@ class TestDealDatasetVectorIndexTask:
|
||||
and their segments in sequence.
|
||||
"""
|
||||
fake = Faker()
|
||||
account, tenant = account_and_tenant
|
||||
|
||||
# Create test data
|
||||
account = AccountService.create_account(
|
||||
email=fake.email(),
|
||||
name=fake.name(),
|
||||
interface_language="en-US",
|
||||
password=fake.password(length=12),
|
||||
)
|
||||
TenantService.create_owner_tenant_if_not_exist(account, name=fake.company())
|
||||
tenant = account.current_tenant
|
||||
|
||||
# Create dataset
|
||||
dataset = Dataset(
|
||||
@@ -821,7 +893,7 @@ class TestDealDatasetVectorIndexTask:
|
||||
assert mock_processor.load.call_count == 3
|
||||
|
||||
def test_deal_dataset_vector_index_task_document_status_transitions(
|
||||
self, db_session_with_containers, mock_index_processor_factory, account_and_tenant
|
||||
self, db_session_with_containers, mock_index_processor_factory, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test document status transitions during task execution.
|
||||
@@ -830,7 +902,16 @@ class TestDealDatasetVectorIndexTask:
|
||||
'completed' to 'indexing' and back to 'completed' during processing.
|
||||
"""
|
||||
fake = Faker()
|
||||
account, tenant = account_and_tenant
|
||||
|
||||
# Create test data
|
||||
account = AccountService.create_account(
|
||||
email=fake.email(),
|
||||
name=fake.name(),
|
||||
interface_language="en-US",
|
||||
password=fake.password(length=12),
|
||||
)
|
||||
TenantService.create_owner_tenant_if_not_exist(account, name=fake.company())
|
||||
tenant = account.current_tenant
|
||||
|
||||
# Create dataset
|
||||
dataset = Dataset(
|
||||
@@ -918,7 +999,7 @@ class TestDealDatasetVectorIndexTask:
|
||||
assert updated_document.indexing_status == "completed"
|
||||
|
||||
def test_deal_dataset_vector_index_task_with_disabled_documents(
|
||||
self, db_session_with_containers, mock_index_processor_factory, account_and_tenant
|
||||
self, db_session_with_containers, mock_index_processor_factory, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test task behavior with disabled documents.
|
||||
@@ -927,7 +1008,16 @@ class TestDealDatasetVectorIndexTask:
|
||||
during processing.
|
||||
"""
|
||||
fake = Faker()
|
||||
account, tenant = account_and_tenant
|
||||
|
||||
# Create test data
|
||||
account = AccountService.create_account(
|
||||
email=fake.email(),
|
||||
name=fake.name(),
|
||||
interface_language="en-US",
|
||||
password=fake.password(length=12),
|
||||
)
|
||||
TenantService.create_owner_tenant_if_not_exist(account, name=fake.company())
|
||||
tenant = account.current_tenant
|
||||
|
||||
# Create dataset
|
||||
dataset = Dataset(
|
||||
@@ -1039,7 +1129,7 @@ class TestDealDatasetVectorIndexTask:
|
||||
mock_processor.load.assert_called_once()
|
||||
|
||||
def test_deal_dataset_vector_index_task_with_archived_documents(
|
||||
self, db_session_with_containers, mock_index_processor_factory, account_and_tenant
|
||||
self, db_session_with_containers, mock_index_processor_factory, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test task behavior with archived documents.
|
||||
@@ -1048,7 +1138,16 @@ class TestDealDatasetVectorIndexTask:
|
||||
during processing.
|
||||
"""
|
||||
fake = Faker()
|
||||
account, tenant = account_and_tenant
|
||||
|
||||
# Create test data
|
||||
account = AccountService.create_account(
|
||||
email=fake.email(),
|
||||
name=fake.name(),
|
||||
interface_language="en-US",
|
||||
password=fake.password(length=12),
|
||||
)
|
||||
TenantService.create_owner_tenant_if_not_exist(account, name=fake.company())
|
||||
tenant = account.current_tenant
|
||||
|
||||
# Create dataset
|
||||
dataset = Dataset(
|
||||
@@ -1160,7 +1259,7 @@ class TestDealDatasetVectorIndexTask:
|
||||
mock_processor.load.assert_called_once()
|
||||
|
||||
def test_deal_dataset_vector_index_task_with_incomplete_documents(
|
||||
self, db_session_with_containers, mock_index_processor_factory, account_and_tenant
|
||||
self, db_session_with_containers, mock_index_processor_factory, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test task behavior with documents that have incomplete indexing status.
|
||||
@@ -1169,7 +1268,16 @@ class TestDealDatasetVectorIndexTask:
|
||||
incomplete indexing status during processing.
|
||||
"""
|
||||
fake = Faker()
|
||||
account, tenant = account_and_tenant
|
||||
|
||||
# Create test data
|
||||
account = AccountService.create_account(
|
||||
email=fake.email(),
|
||||
name=fake.name(),
|
||||
interface_language="en-US",
|
||||
password=fake.password(length=12),
|
||||
)
|
||||
TenantService.create_owner_tenant_if_not_exist(account, name=fake.company())
|
||||
tenant = account.current_tenant
|
||||
|
||||
# Create dataset
|
||||
dataset = Dataset(
|
||||
|
||||
@@ -143,8 +143,234 @@ def mock_upload_file():
|
||||
# ============================================================================
|
||||
# Test Basic Cleanup
|
||||
# ============================================================================
|
||||
# Note: Basic cleanup behavior is now covered by testcontainers-based
|
||||
# integration tests; no unit tests remain in this section.
|
||||
|
||||
|
||||
class TestBasicCleanup:
|
||||
"""Test cases for basic dataset cleanup functionality."""
|
||||
|
||||
def test_clean_dataset_task_empty_dataset(
|
||||
self,
|
||||
dataset_id,
|
||||
tenant_id,
|
||||
collection_binding_id,
|
||||
mock_db_session,
|
||||
mock_storage,
|
||||
mock_index_processor_factory,
|
||||
mock_get_image_upload_file_ids,
|
||||
):
|
||||
"""
|
||||
Test cleanup of an empty dataset with no documents or segments.
|
||||
|
||||
Scenario:
|
||||
- Dataset has no documents or segments
|
||||
- Should still clean vector database and delete related records
|
||||
|
||||
Expected behavior:
|
||||
- IndexProcessorFactory is called to clean vector database
|
||||
- No storage deletions occur
|
||||
- Related records (DatasetProcessRule, etc.) are deleted
|
||||
- Session is committed and closed
|
||||
"""
|
||||
# Arrange
|
||||
mock_db_session.session.scalars.return_value.all.return_value = []
|
||||
|
||||
# Act
|
||||
clean_dataset_task(
|
||||
dataset_id=dataset_id,
|
||||
tenant_id=tenant_id,
|
||||
indexing_technique="high_quality",
|
||||
index_struct='{"type": "paragraph"}',
|
||||
collection_binding_id=collection_binding_id,
|
||||
doc_form="paragraph_index",
|
||||
)
|
||||
|
||||
# Assert
|
||||
mock_index_processor_factory["factory"].assert_called_once_with("paragraph_index")
|
||||
mock_index_processor_factory["processor"].clean.assert_called_once()
|
||||
mock_storage.delete.assert_not_called()
|
||||
mock_db_session.session.commit.assert_called_once()
|
||||
mock_db_session.session.close.assert_called_once()
|
||||
|
||||
def test_clean_dataset_task_with_documents_and_segments(
|
||||
self,
|
||||
dataset_id,
|
||||
tenant_id,
|
||||
collection_binding_id,
|
||||
mock_db_session,
|
||||
mock_storage,
|
||||
mock_index_processor_factory,
|
||||
mock_get_image_upload_file_ids,
|
||||
mock_document,
|
||||
mock_segment,
|
||||
):
|
||||
"""
|
||||
Test cleanup of dataset with documents and segments.
|
||||
|
||||
Scenario:
|
||||
- Dataset has one document and one segment
|
||||
- No image files in segment content
|
||||
|
||||
Expected behavior:
|
||||
- Documents and segments are deleted
|
||||
- Vector database is cleaned
|
||||
- Session is committed
|
||||
"""
|
||||
# Arrange
|
||||
mock_db_session.session.scalars.return_value.all.side_effect = [
|
||||
[mock_document], # documents
|
||||
[mock_segment], # segments
|
||||
]
|
||||
mock_get_image_upload_file_ids.return_value = []
|
||||
|
||||
# Act
|
||||
clean_dataset_task(
|
||||
dataset_id=dataset_id,
|
||||
tenant_id=tenant_id,
|
||||
indexing_technique="high_quality",
|
||||
index_struct='{"type": "paragraph"}',
|
||||
collection_binding_id=collection_binding_id,
|
||||
doc_form="paragraph_index",
|
||||
)
|
||||
|
||||
# Assert
|
||||
mock_db_session.session.delete.assert_any_call(mock_document)
|
||||
# Segments are deleted in batch; verify a DELETE on document_segments was issued
|
||||
execute_sqls = [" ".join(str(c[0][0]).split()) for c in mock_db_session.session.execute.call_args_list]
|
||||
assert any("DELETE FROM document_segments" in sql for sql in execute_sqls)
|
||||
mock_db_session.session.commit.assert_called_once()
|
||||
|
||||
def test_clean_dataset_task_deletes_related_records(
|
||||
self,
|
||||
dataset_id,
|
||||
tenant_id,
|
||||
collection_binding_id,
|
||||
mock_db_session,
|
||||
mock_storage,
|
||||
mock_index_processor_factory,
|
||||
mock_get_image_upload_file_ids,
|
||||
):
|
||||
"""
|
||||
Test that all related records are deleted.
|
||||
|
||||
Expected behavior:
|
||||
- DatasetProcessRule records are deleted
|
||||
- DatasetQuery records are deleted
|
||||
- AppDatasetJoin records are deleted
|
||||
- DatasetMetadata records are deleted
|
||||
- DatasetMetadataBinding records are deleted
|
||||
"""
|
||||
# Arrange
|
||||
mock_query = mock_db_session.session.query.return_value
|
||||
mock_query.where.return_value = mock_query
|
||||
mock_query.delete.return_value = 1
|
||||
|
||||
# Act
|
||||
clean_dataset_task(
|
||||
dataset_id=dataset_id,
|
||||
tenant_id=tenant_id,
|
||||
indexing_technique="high_quality",
|
||||
index_struct='{"type": "paragraph"}',
|
||||
collection_binding_id=collection_binding_id,
|
||||
doc_form="paragraph_index",
|
||||
)
|
||||
|
||||
# Assert - verify query.where.delete was called multiple times
|
||||
# for different models (DatasetProcessRule, DatasetQuery, etc.)
|
||||
assert mock_query.delete.call_count >= 5
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Test Doc Form Validation
|
||||
# ============================================================================
|
||||
|
||||
|
||||
class TestDocFormValidation:
|
||||
"""Test cases for doc_form validation and default fallback."""
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"invalid_doc_form",
|
||||
[
|
||||
None,
|
||||
"",
|
||||
" ",
|
||||
"\t",
|
||||
"\n",
|
||||
" \t\n ",
|
||||
],
|
||||
)
|
||||
def test_clean_dataset_task_invalid_doc_form_uses_default(
|
||||
self,
|
||||
invalid_doc_form,
|
||||
dataset_id,
|
||||
tenant_id,
|
||||
collection_binding_id,
|
||||
mock_db_session,
|
||||
mock_storage,
|
||||
mock_index_processor_factory,
|
||||
mock_get_image_upload_file_ids,
|
||||
):
|
||||
"""
|
||||
Test that invalid doc_form values use default paragraph index type.
|
||||
|
||||
Scenario:
|
||||
- doc_form is None, empty, or whitespace-only
|
||||
- Should use default IndexStructureType.PARAGRAPH_INDEX
|
||||
|
||||
Expected behavior:
|
||||
- Default index type is used for cleanup
|
||||
- No errors are raised
|
||||
- Cleanup proceeds normally
|
||||
"""
|
||||
# Arrange - import to verify the default value
|
||||
from core.rag.index_processor.constant.index_type import IndexStructureType
|
||||
|
||||
# Act
|
||||
clean_dataset_task(
|
||||
dataset_id=dataset_id,
|
||||
tenant_id=tenant_id,
|
||||
indexing_technique="high_quality",
|
||||
index_struct='{"type": "paragraph"}',
|
||||
collection_binding_id=collection_binding_id,
|
||||
doc_form=invalid_doc_form,
|
||||
)
|
||||
|
||||
# Assert - IndexProcessorFactory should be called with default type
|
||||
mock_index_processor_factory["factory"].assert_called_once_with(IndexStructureType.PARAGRAPH_INDEX)
|
||||
mock_index_processor_factory["processor"].clean.assert_called_once()
|
||||
|
||||
def test_clean_dataset_task_valid_doc_form_used_directly(
|
||||
self,
|
||||
dataset_id,
|
||||
tenant_id,
|
||||
collection_binding_id,
|
||||
mock_db_session,
|
||||
mock_storage,
|
||||
mock_index_processor_factory,
|
||||
mock_get_image_upload_file_ids,
|
||||
):
|
||||
"""
|
||||
Test that valid doc_form values are used directly.
|
||||
|
||||
Expected behavior:
|
||||
- Provided doc_form is passed to IndexProcessorFactory
|
||||
"""
|
||||
# Arrange
|
||||
valid_doc_form = "qa_index"
|
||||
|
||||
# Act
|
||||
clean_dataset_task(
|
||||
dataset_id=dataset_id,
|
||||
tenant_id=tenant_id,
|
||||
indexing_technique="high_quality",
|
||||
index_struct='{"type": "paragraph"}',
|
||||
collection_binding_id=collection_binding_id,
|
||||
doc_form=valid_doc_form,
|
||||
)
|
||||
|
||||
# Assert
|
||||
mock_index_processor_factory["factory"].assert_called_once_with(valid_doc_form)
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Test Error Handling
|
||||
# ============================================================================
|
||||
@@ -153,6 +379,156 @@ def mock_upload_file():
|
||||
class TestErrorHandling:
|
||||
"""Test cases for error handling and recovery."""
|
||||
|
||||
def test_clean_dataset_task_vector_cleanup_failure_continues(
|
||||
self,
|
||||
dataset_id,
|
||||
tenant_id,
|
||||
collection_binding_id,
|
||||
mock_db_session,
|
||||
mock_storage,
|
||||
mock_index_processor_factory,
|
||||
mock_get_image_upload_file_ids,
|
||||
mock_document,
|
||||
mock_segment,
|
||||
):
|
||||
"""
|
||||
Test that document cleanup continues even if vector cleanup fails.
|
||||
|
||||
Scenario:
|
||||
- IndexProcessor.clean() raises an exception
|
||||
- Document and segment deletion should still proceed
|
||||
|
||||
Expected behavior:
|
||||
- Exception is caught and logged
|
||||
- Documents and segments are still deleted
|
||||
- Session is committed
|
||||
"""
|
||||
# Arrange
|
||||
mock_db_session.session.scalars.return_value.all.side_effect = [
|
||||
[mock_document], # documents
|
||||
[mock_segment], # segments
|
||||
]
|
||||
mock_index_processor_factory["processor"].clean.side_effect = Exception("Vector database error")
|
||||
|
||||
# Act
|
||||
clean_dataset_task(
|
||||
dataset_id=dataset_id,
|
||||
tenant_id=tenant_id,
|
||||
indexing_technique="high_quality",
|
||||
index_struct='{"type": "paragraph"}',
|
||||
collection_binding_id=collection_binding_id,
|
||||
doc_form="paragraph_index",
|
||||
)
|
||||
|
||||
# Assert - documents and segments should still be deleted
|
||||
mock_db_session.session.delete.assert_any_call(mock_document)
|
||||
# Segments are deleted in batch; verify a DELETE on document_segments was issued
|
||||
execute_sqls = [" ".join(str(c[0][0]).split()) for c in mock_db_session.session.execute.call_args_list]
|
||||
assert any("DELETE FROM document_segments" in sql for sql in execute_sqls)
|
||||
mock_db_session.session.commit.assert_called_once()
|
||||
|
||||
def test_clean_dataset_task_storage_delete_failure_continues(
|
||||
self,
|
||||
dataset_id,
|
||||
tenant_id,
|
||||
collection_binding_id,
|
||||
mock_db_session,
|
||||
mock_storage,
|
||||
mock_index_processor_factory,
|
||||
mock_get_image_upload_file_ids,
|
||||
):
|
||||
"""
|
||||
Test that cleanup continues even if storage deletion fails.
|
||||
|
||||
Scenario:
|
||||
- Segment contains image file references
|
||||
- Storage.delete() raises an exception
|
||||
- Cleanup should continue
|
||||
|
||||
Expected behavior:
|
||||
- Exception is caught and logged
|
||||
- Image file record is still deleted from database
|
||||
- Other cleanup operations proceed
|
||||
"""
|
||||
# Arrange
|
||||
# Need at least one document for segment processing to occur (code is in else block)
|
||||
mock_document = MagicMock()
|
||||
mock_document.id = str(uuid.uuid4())
|
||||
mock_document.tenant_id = tenant_id
|
||||
mock_document.data_source_type = "website" # Non-upload type to avoid file deletion
|
||||
|
||||
mock_segment = MagicMock()
|
||||
mock_segment.id = str(uuid.uuid4())
|
||||
mock_segment.content = "Test content with image"
|
||||
|
||||
mock_upload_file = MagicMock()
|
||||
mock_upload_file.id = str(uuid.uuid4())
|
||||
mock_upload_file.key = "images/test-image.jpg"
|
||||
|
||||
image_file_id = mock_upload_file.id
|
||||
|
||||
mock_db_session.session.scalars.return_value.all.side_effect = [
|
||||
[mock_document], # documents - need at least one for segment processing
|
||||
[mock_segment], # segments
|
||||
]
|
||||
mock_get_image_upload_file_ids.return_value = [image_file_id]
|
||||
mock_db_session.session.query.return_value.where.return_value.all.return_value = [mock_upload_file]
|
||||
mock_storage.delete.side_effect = Exception("Storage service unavailable")
|
||||
|
||||
# Act
|
||||
clean_dataset_task(
|
||||
dataset_id=dataset_id,
|
||||
tenant_id=tenant_id,
|
||||
indexing_technique="high_quality",
|
||||
index_struct='{"type": "paragraph"}',
|
||||
collection_binding_id=collection_binding_id,
|
||||
doc_form="paragraph_index",
|
||||
)
|
||||
|
||||
# Assert - storage delete was attempted for image file
|
||||
mock_storage.delete.assert_called_with(mock_upload_file.key)
|
||||
# Upload files are deleted in batch; verify a DELETE on upload_files was issued
|
||||
execute_sqls = [" ".join(str(c[0][0]).split()) for c in mock_db_session.session.execute.call_args_list]
|
||||
assert any("DELETE FROM upload_files" in sql for sql in execute_sqls)
|
||||
|
||||
def test_clean_dataset_task_database_error_rollback(
|
||||
self,
|
||||
dataset_id,
|
||||
tenant_id,
|
||||
collection_binding_id,
|
||||
mock_db_session,
|
||||
mock_storage,
|
||||
mock_index_processor_factory,
|
||||
mock_get_image_upload_file_ids,
|
||||
):
|
||||
"""
|
||||
Test that database session is rolled back on error.
|
||||
|
||||
Scenario:
|
||||
- Database operation raises an exception
|
||||
- Session should be rolled back to prevent dirty state
|
||||
|
||||
Expected behavior:
|
||||
- Session.rollback() is called
|
||||
- Session.close() is called in finally block
|
||||
"""
|
||||
# Arrange
|
||||
mock_db_session.session.commit.side_effect = Exception("Database commit failed")
|
||||
|
||||
# Act
|
||||
clean_dataset_task(
|
||||
dataset_id=dataset_id,
|
||||
tenant_id=tenant_id,
|
||||
indexing_technique="high_quality",
|
||||
index_struct='{"type": "paragraph"}',
|
||||
collection_binding_id=collection_binding_id,
|
||||
doc_form="paragraph_index",
|
||||
)
|
||||
|
||||
# Assert
|
||||
mock_db_session.session.rollback.assert_called_once()
|
||||
mock_db_session.session.close.assert_called_once()
|
||||
|
||||
def test_clean_dataset_task_rollback_failure_still_closes_session(
|
||||
self,
|
||||
dataset_id,
|
||||
@@ -378,6 +754,296 @@ class TestSegmentAttachmentCleanup:
|
||||
assert any("DELETE FROM segment_attachment_bindings" in sql for sql in execute_sqls)
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Test Upload File Cleanup
|
||||
# ============================================================================
|
||||
|
||||
|
||||
class TestUploadFileCleanup:
|
||||
"""Test cases for upload file cleanup."""
|
||||
|
||||
def test_clean_dataset_task_deletes_document_upload_files(
|
||||
self,
|
||||
dataset_id,
|
||||
tenant_id,
|
||||
collection_binding_id,
|
||||
mock_db_session,
|
||||
mock_storage,
|
||||
mock_index_processor_factory,
|
||||
mock_get_image_upload_file_ids,
|
||||
):
|
||||
"""
|
||||
Test that document upload files are deleted.
|
||||
|
||||
Scenario:
|
||||
- Document has data_source_type = "upload_file"
|
||||
- data_source_info contains upload_file_id
|
||||
|
||||
Expected behavior:
|
||||
- Upload file is deleted from storage
|
||||
- Upload file record is deleted from database
|
||||
"""
|
||||
# Arrange
|
||||
mock_document = MagicMock()
|
||||
mock_document.id = str(uuid.uuid4())
|
||||
mock_document.tenant_id = tenant_id
|
||||
mock_document.data_source_type = "upload_file"
|
||||
mock_document.data_source_info = '{"upload_file_id": "test-file-id"}'
|
||||
mock_document.data_source_info_dict = {"upload_file_id": "test-file-id"}
|
||||
|
||||
mock_upload_file = MagicMock()
|
||||
mock_upload_file.id = "test-file-id"
|
||||
mock_upload_file.key = "uploads/test-file.txt"
|
||||
|
||||
mock_db_session.session.scalars.return_value.all.side_effect = [
|
||||
[mock_document], # documents
|
||||
[], # segments
|
||||
]
|
||||
mock_db_session.session.query.return_value.where.return_value.all.return_value = [mock_upload_file]
|
||||
|
||||
# Act
|
||||
clean_dataset_task(
|
||||
dataset_id=dataset_id,
|
||||
tenant_id=tenant_id,
|
||||
indexing_technique="high_quality",
|
||||
index_struct='{"type": "paragraph"}',
|
||||
collection_binding_id=collection_binding_id,
|
||||
doc_form="paragraph_index",
|
||||
)
|
||||
|
||||
# Assert
|
||||
mock_storage.delete.assert_called_with(mock_upload_file.key)
|
||||
# Upload files are deleted in batch; verify a DELETE on upload_files was issued
|
||||
execute_sqls = [" ".join(str(c[0][0]).split()) for c in mock_db_session.session.execute.call_args_list]
|
||||
assert any("DELETE FROM upload_files" in sql for sql in execute_sqls)
|
||||
|
||||
def test_clean_dataset_task_handles_missing_upload_file(
|
||||
self,
|
||||
dataset_id,
|
||||
tenant_id,
|
||||
collection_binding_id,
|
||||
mock_db_session,
|
||||
mock_storage,
|
||||
mock_index_processor_factory,
|
||||
mock_get_image_upload_file_ids,
|
||||
):
|
||||
"""
|
||||
Test that missing upload files are handled gracefully.
|
||||
|
||||
Scenario:
|
||||
- Document references an upload_file_id that doesn't exist
|
||||
|
||||
Expected behavior:
|
||||
- No error is raised
|
||||
- Cleanup continues normally
|
||||
"""
|
||||
# Arrange
|
||||
mock_document = MagicMock()
|
||||
mock_document.id = str(uuid.uuid4())
|
||||
mock_document.tenant_id = tenant_id
|
||||
mock_document.data_source_type = "upload_file"
|
||||
mock_document.data_source_info = '{"upload_file_id": "nonexistent-file"}'
|
||||
mock_document.data_source_info_dict = {"upload_file_id": "nonexistent-file"}
|
||||
|
||||
mock_db_session.session.scalars.return_value.all.side_effect = [
|
||||
[mock_document], # documents
|
||||
[], # segments
|
||||
]
|
||||
mock_db_session.session.query.return_value.where.return_value.all.return_value = []
|
||||
|
||||
# Act - should not raise exception
|
||||
clean_dataset_task(
|
||||
dataset_id=dataset_id,
|
||||
tenant_id=tenant_id,
|
||||
indexing_technique="high_quality",
|
||||
index_struct='{"type": "paragraph"}',
|
||||
collection_binding_id=collection_binding_id,
|
||||
doc_form="paragraph_index",
|
||||
)
|
||||
|
||||
# Assert
|
||||
mock_storage.delete.assert_not_called()
|
||||
mock_db_session.session.commit.assert_called_once()
|
||||
|
||||
def test_clean_dataset_task_handles_non_upload_file_data_source(
|
||||
self,
|
||||
dataset_id,
|
||||
tenant_id,
|
||||
collection_binding_id,
|
||||
mock_db_session,
|
||||
mock_storage,
|
||||
mock_index_processor_factory,
|
||||
mock_get_image_upload_file_ids,
|
||||
):
|
||||
"""
|
||||
Test that non-upload_file data sources are skipped.
|
||||
|
||||
Scenario:
|
||||
- Document has data_source_type = "website"
|
||||
|
||||
Expected behavior:
|
||||
- No file deletion is attempted
|
||||
"""
|
||||
# Arrange
|
||||
mock_document = MagicMock()
|
||||
mock_document.id = str(uuid.uuid4())
|
||||
mock_document.tenant_id = tenant_id
|
||||
mock_document.data_source_type = "website"
|
||||
mock_document.data_source_info = None
|
||||
|
||||
mock_db_session.session.scalars.return_value.all.side_effect = [
|
||||
[mock_document], # documents
|
||||
[], # segments
|
||||
]
|
||||
|
||||
# Act
|
||||
clean_dataset_task(
|
||||
dataset_id=dataset_id,
|
||||
tenant_id=tenant_id,
|
||||
indexing_technique="high_quality",
|
||||
index_struct='{"type": "paragraph"}',
|
||||
collection_binding_id=collection_binding_id,
|
||||
doc_form="paragraph_index",
|
||||
)
|
||||
|
||||
# Assert - storage delete should not be called for document files
|
||||
# (only for image files in segments, which are empty here)
|
||||
mock_storage.delete.assert_not_called()
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Test Image File Cleanup
|
||||
# ============================================================================
|
||||
|
||||
|
||||
class TestImageFileCleanup:
|
||||
"""Test cases for image file cleanup in segments."""
|
||||
|
||||
def test_clean_dataset_task_deletes_image_files_in_segments(
|
||||
self,
|
||||
dataset_id,
|
||||
tenant_id,
|
||||
collection_binding_id,
|
||||
mock_db_session,
|
||||
mock_storage,
|
||||
mock_index_processor_factory,
|
||||
mock_get_image_upload_file_ids,
|
||||
):
|
||||
"""
|
||||
Test that image files referenced in segment content are deleted.
|
||||
|
||||
Scenario:
|
||||
- Segment content contains image file references
|
||||
- get_image_upload_file_ids returns file IDs
|
||||
|
||||
Expected behavior:
|
||||
- Each image file is deleted from storage
|
||||
- Each image file record is deleted from database
|
||||
"""
|
||||
# Arrange
|
||||
# Need at least one document for segment processing to occur (code is in else block)
|
||||
mock_document = MagicMock()
|
||||
mock_document.id = str(uuid.uuid4())
|
||||
mock_document.tenant_id = tenant_id
|
||||
mock_document.data_source_type = "website" # Non-upload type
|
||||
|
||||
mock_segment = MagicMock()
|
||||
mock_segment.id = str(uuid.uuid4())
|
||||
mock_segment.content = '<img src="file://image-1"> <img src="file://image-2">'
|
||||
|
||||
image_file_ids = ["image-1", "image-2"]
|
||||
mock_get_image_upload_file_ids.return_value = image_file_ids
|
||||
|
||||
mock_image_files = []
|
||||
for file_id in image_file_ids:
|
||||
mock_file = MagicMock()
|
||||
mock_file.id = file_id
|
||||
mock_file.key = f"images/{file_id}.jpg"
|
||||
mock_image_files.append(mock_file)
|
||||
|
||||
mock_db_session.session.scalars.return_value.all.side_effect = [
|
||||
[mock_document], # documents - need at least one for segment processing
|
||||
[mock_segment], # segments
|
||||
]
|
||||
|
||||
# Setup a mock query chain that returns files in batch (align with .in_().all())
|
||||
mock_query = MagicMock()
|
||||
mock_where = MagicMock()
|
||||
mock_query.where.return_value = mock_where
|
||||
mock_where.all.return_value = mock_image_files
|
||||
mock_db_session.session.query.return_value = mock_query
|
||||
|
||||
# Act
|
||||
clean_dataset_task(
|
||||
dataset_id=dataset_id,
|
||||
tenant_id=tenant_id,
|
||||
indexing_technique="high_quality",
|
||||
index_struct='{"type": "paragraph"}',
|
||||
collection_binding_id=collection_binding_id,
|
||||
doc_form="paragraph_index",
|
||||
)
|
||||
|
||||
# Assert - each expected image key was deleted at least once
|
||||
calls = [c.args[0] for c in mock_storage.delete.call_args_list]
|
||||
assert "images/image-1.jpg" in calls
|
||||
assert "images/image-2.jpg" in calls
|
||||
|
||||
def test_clean_dataset_task_handles_missing_image_file(
|
||||
self,
|
||||
dataset_id,
|
||||
tenant_id,
|
||||
collection_binding_id,
|
||||
mock_db_session,
|
||||
mock_storage,
|
||||
mock_index_processor_factory,
|
||||
mock_get_image_upload_file_ids,
|
||||
):
|
||||
"""
|
||||
Test that missing image files are handled gracefully.
|
||||
|
||||
Scenario:
|
||||
- Segment references image file ID that doesn't exist in database
|
||||
|
||||
Expected behavior:
|
||||
- No error is raised
|
||||
- Cleanup continues
|
||||
"""
|
||||
# Arrange
|
||||
# Need at least one document for segment processing to occur (code is in else block)
|
||||
mock_document = MagicMock()
|
||||
mock_document.id = str(uuid.uuid4())
|
||||
mock_document.tenant_id = tenant_id
|
||||
mock_document.data_source_type = "website" # Non-upload type
|
||||
|
||||
mock_segment = MagicMock()
|
||||
mock_segment.id = str(uuid.uuid4())
|
||||
mock_segment.content = '<img src="file://nonexistent-image">'
|
||||
|
||||
mock_get_image_upload_file_ids.return_value = ["nonexistent-image"]
|
||||
|
||||
mock_db_session.session.scalars.return_value.all.side_effect = [
|
||||
[mock_document], # documents - need at least one for segment processing
|
||||
[mock_segment], # segments
|
||||
]
|
||||
|
||||
# Image file not found
|
||||
mock_db_session.session.query.return_value.where.return_value.all.return_value = []
|
||||
|
||||
# Act - should not raise exception
|
||||
clean_dataset_task(
|
||||
dataset_id=dataset_id,
|
||||
tenant_id=tenant_id,
|
||||
indexing_technique="high_quality",
|
||||
index_struct='{"type": "paragraph"}',
|
||||
collection_binding_id=collection_binding_id,
|
||||
doc_form="paragraph_index",
|
||||
)
|
||||
|
||||
# Assert
|
||||
mock_storage.delete.assert_not_called()
|
||||
mock_db_session.session.commit.assert_called_once()
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Test Edge Cases
|
||||
# ============================================================================
|
||||
@@ -386,6 +1052,114 @@ class TestSegmentAttachmentCleanup:
|
||||
class TestEdgeCases:
|
||||
"""Test edge cases and boundary conditions."""
|
||||
|
||||
def test_clean_dataset_task_multiple_documents_and_segments(
|
||||
self,
|
||||
dataset_id,
|
||||
tenant_id,
|
||||
collection_binding_id,
|
||||
mock_db_session,
|
||||
mock_storage,
|
||||
mock_index_processor_factory,
|
||||
mock_get_image_upload_file_ids,
|
||||
):
|
||||
"""
|
||||
Test cleanup of multiple documents and segments.
|
||||
|
||||
Scenario:
|
||||
- Dataset has 5 documents and 10 segments
|
||||
|
||||
Expected behavior:
|
||||
- All documents and segments are deleted
|
||||
"""
|
||||
# Arrange
|
||||
mock_documents = []
|
||||
for i in range(5):
|
||||
doc = MagicMock()
|
||||
doc.id = str(uuid.uuid4())
|
||||
doc.tenant_id = tenant_id
|
||||
doc.data_source_type = "website" # Non-upload type
|
||||
mock_documents.append(doc)
|
||||
|
||||
mock_segments = []
|
||||
for i in range(10):
|
||||
seg = MagicMock()
|
||||
seg.id = str(uuid.uuid4())
|
||||
seg.content = f"Segment content {i}"
|
||||
mock_segments.append(seg)
|
||||
|
||||
mock_db_session.session.scalars.return_value.all.side_effect = [
|
||||
mock_documents,
|
||||
mock_segments,
|
||||
]
|
||||
mock_get_image_upload_file_ids.return_value = []
|
||||
|
||||
# Act
|
||||
clean_dataset_task(
|
||||
dataset_id=dataset_id,
|
||||
tenant_id=tenant_id,
|
||||
indexing_technique="high_quality",
|
||||
index_struct='{"type": "paragraph"}',
|
||||
collection_binding_id=collection_binding_id,
|
||||
doc_form="paragraph_index",
|
||||
)
|
||||
|
||||
# Assert - all documents and segments should be deleted (documents per-entity, segments in batch)
|
||||
delete_calls = mock_db_session.session.delete.call_args_list
|
||||
deleted_items = [call[0][0] for call in delete_calls]
|
||||
|
||||
for doc in mock_documents:
|
||||
assert doc in deleted_items
|
||||
# Verify a batch DELETE on document_segments occurred
|
||||
execute_sqls = [" ".join(str(c[0][0]).split()) for c in mock_db_session.session.execute.call_args_list]
|
||||
assert any("DELETE FROM document_segments" in sql for sql in execute_sqls)
|
||||
|
||||
def test_clean_dataset_task_document_with_empty_data_source_info(
|
||||
self,
|
||||
dataset_id,
|
||||
tenant_id,
|
||||
collection_binding_id,
|
||||
mock_db_session,
|
||||
mock_storage,
|
||||
mock_index_processor_factory,
|
||||
mock_get_image_upload_file_ids,
|
||||
):
|
||||
"""
|
||||
Test handling of document with empty data_source_info.
|
||||
|
||||
Scenario:
|
||||
- Document has data_source_type = "upload_file"
|
||||
- data_source_info is None or empty
|
||||
|
||||
Expected behavior:
|
||||
- No error is raised
|
||||
- File deletion is skipped
|
||||
"""
|
||||
# Arrange
|
||||
mock_document = MagicMock()
|
||||
mock_document.id = str(uuid.uuid4())
|
||||
mock_document.tenant_id = tenant_id
|
||||
mock_document.data_source_type = "upload_file"
|
||||
mock_document.data_source_info = None
|
||||
|
||||
mock_db_session.session.scalars.return_value.all.side_effect = [
|
||||
[mock_document], # documents
|
||||
[], # segments
|
||||
]
|
||||
|
||||
# Act - should not raise exception
|
||||
clean_dataset_task(
|
||||
dataset_id=dataset_id,
|
||||
tenant_id=tenant_id,
|
||||
indexing_technique="high_quality",
|
||||
index_struct='{"type": "paragraph"}',
|
||||
collection_binding_id=collection_binding_id,
|
||||
doc_form="paragraph_index",
|
||||
)
|
||||
|
||||
# Assert
|
||||
mock_storage.delete.assert_not_called()
|
||||
mock_db_session.session.commit.assert_called_once()
|
||||
|
||||
def test_clean_dataset_task_session_always_closed(
|
||||
self,
|
||||
dataset_id,
|
||||
|
||||
Reference in New Issue
Block a user