From 3abfbc024684aea7b23f14064a6c987e653e52f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=A8=E4=B9=8B=E6=9C=AC=E6=BE=AA?= Date: Wed, 25 Feb 2026 01:51:38 +0800 Subject: [PATCH] test: migrate remaining DocumentSegment navigation SQL tests to testcontainers (#32523) Co-authored-by: KinomotoMio <200703522+KinomotoMio@users.noreply.github.com> --- .../models/test_dataset_models.py | 218 ++++++++++++++++++ .../unit_tests/models/test_dataset_models.py | 142 ------------ 2 files changed, 218 insertions(+), 142 deletions(-) diff --git a/api/tests/test_containers_integration_tests/models/test_dataset_models.py b/api/tests/test_containers_integration_tests/models/test_dataset_models.py index d2c3e1e58e..6c541a8ad2 100644 --- a/api/tests/test_containers_integration_tests/models/test_dataset_models.py +++ b/api/tests/test_containers_integration_tests/models/test_dataset_models.py @@ -269,3 +269,221 @@ class TestDatasetDocumentProperties: db_session_with_containers.flush() assert doc.hit_count == 25 + + +class TestDocumentSegmentNavigationProperties: + """Integration tests for DocumentSegment navigation properties.""" + + @pytest.fixture(autouse=True) + def _auto_rollback(self, db_session_with_containers: Session) -> Generator[None, None, None]: + """Automatically rollback session changes after each test.""" + yield + db_session_with_containers.rollback() + + def test_document_segment_dataset_property(self, db_session_with_containers: Session) -> None: + """Test segment can access its parent dataset.""" + # Arrange + tenant_id = str(uuid4()) + created_by = str(uuid4()) + dataset = Dataset( + tenant_id=tenant_id, + name="Test Dataset", + data_source_type="upload_file", + created_by=created_by, + ) + db_session_with_containers.add(dataset) + db_session_with_containers.flush() + + document = Document( + tenant_id=tenant_id, + dataset_id=dataset.id, + position=1, + data_source_type="upload_file", + batch="batch_001", + name="test.pdf", + created_from="web", + created_by=created_by, + ) + db_session_with_containers.add(document) + db_session_with_containers.flush() + + segment = DocumentSegment( + tenant_id=tenant_id, + dataset_id=dataset.id, + document_id=document.id, + position=1, + content="Test", + word_count=1, + tokens=2, + created_by=created_by, + ) + db_session_with_containers.add(segment) + db_session_with_containers.flush() + + # Act + related_dataset = segment.dataset + + # Assert + assert related_dataset is not None + assert related_dataset.id == dataset.id + + def test_document_segment_document_property(self, db_session_with_containers: Session) -> None: + """Test segment can access its parent document.""" + # Arrange + tenant_id = str(uuid4()) + created_by = str(uuid4()) + dataset = Dataset( + tenant_id=tenant_id, + name="Test Dataset", + data_source_type="upload_file", + created_by=created_by, + ) + db_session_with_containers.add(dataset) + db_session_with_containers.flush() + + document = Document( + tenant_id=tenant_id, + dataset_id=dataset.id, + position=1, + data_source_type="upload_file", + batch="batch_001", + name="test.pdf", + created_from="web", + created_by=created_by, + ) + db_session_with_containers.add(document) + db_session_with_containers.flush() + + segment = DocumentSegment( + tenant_id=tenant_id, + dataset_id=dataset.id, + document_id=document.id, + position=1, + content="Test", + word_count=1, + tokens=2, + created_by=created_by, + ) + db_session_with_containers.add(segment) + db_session_with_containers.flush() + + # Act + related_document = segment.document + + # Assert + assert related_document is not None + assert related_document.id == document.id + + def test_document_segment_previous_segment(self, db_session_with_containers: Session) -> None: + """Test segment can access previous segment.""" + # Arrange + tenant_id = str(uuid4()) + created_by = str(uuid4()) + dataset = Dataset( + tenant_id=tenant_id, + name="Test Dataset", + data_source_type="upload_file", + created_by=created_by, + ) + db_session_with_containers.add(dataset) + db_session_with_containers.flush() + + document = Document( + tenant_id=tenant_id, + dataset_id=dataset.id, + position=1, + data_source_type="upload_file", + batch="batch_001", + name="test.pdf", + created_from="web", + created_by=created_by, + ) + db_session_with_containers.add(document) + db_session_with_containers.flush() + + previous_segment = DocumentSegment( + tenant_id=tenant_id, + dataset_id=dataset.id, + document_id=document.id, + position=1, + content="Previous", + word_count=1, + tokens=2, + created_by=created_by, + ) + segment = DocumentSegment( + tenant_id=tenant_id, + dataset_id=dataset.id, + document_id=document.id, + position=2, + content="Current", + word_count=1, + tokens=2, + created_by=created_by, + ) + db_session_with_containers.add_all([previous_segment, segment]) + db_session_with_containers.flush() + + # Act + prev_seg = segment.previous_segment + + # Assert + assert prev_seg is not None + assert prev_seg.position == 1 + + def test_document_segment_next_segment(self, db_session_with_containers: Session) -> None: + """Test segment can access next segment.""" + # Arrange + tenant_id = str(uuid4()) + created_by = str(uuid4()) + dataset = Dataset( + tenant_id=tenant_id, + name="Test Dataset", + data_source_type="upload_file", + created_by=created_by, + ) + db_session_with_containers.add(dataset) + db_session_with_containers.flush() + + document = Document( + tenant_id=tenant_id, + dataset_id=dataset.id, + position=1, + data_source_type="upload_file", + batch="batch_001", + name="test.pdf", + created_from="web", + created_by=created_by, + ) + db_session_with_containers.add(document) + db_session_with_containers.flush() + + segment = DocumentSegment( + tenant_id=tenant_id, + dataset_id=dataset.id, + document_id=document.id, + position=1, + content="Current", + word_count=1, + tokens=2, + created_by=created_by, + ) + next_segment = DocumentSegment( + tenant_id=tenant_id, + dataset_id=dataset.id, + document_id=document.id, + position=2, + content="Next", + word_count=1, + tokens=2, + created_by=created_by, + ) + db_session_with_containers.add_all([segment, next_segment]) + db_session_with_containers.flush() + + # Act + next_seg = segment.next_segment + + # Assert + assert next_seg is not None + assert next_seg.position == 2 diff --git a/api/tests/unit_tests/models/test_dataset_models.py b/api/tests/unit_tests/models/test_dataset_models.py index c0e912fa1e..9bb7c05a91 100644 --- a/api/tests/unit_tests/models/test_dataset_models.py +++ b/api/tests/unit_tests/models/test_dataset_models.py @@ -954,148 +954,6 @@ class TestChildChunk: assert child_chunk.index_node_hash == index_node_hash -class TestDocumentSegmentNavigation: - """Test suite for DocumentSegment navigation properties.""" - - def test_document_segment_dataset_property(self): - """Test segment can access its parent dataset.""" - # Arrange - dataset_id = str(uuid4()) - segment = DocumentSegment( - tenant_id=str(uuid4()), - dataset_id=dataset_id, - document_id=str(uuid4()), - position=1, - content="Test", - word_count=1, - tokens=2, - created_by=str(uuid4()), - ) - - mock_dataset = Dataset( - tenant_id=str(uuid4()), - name="Test Dataset", - data_source_type="upload_file", - created_by=str(uuid4()), - ) - mock_dataset.id = dataset_id - - # Mock the database session scalar - with patch("models.dataset.db.session.scalar", return_value=mock_dataset): - # Act - dataset = segment.dataset - - # Assert - assert dataset is not None - assert dataset.id == dataset_id - - def test_document_segment_document_property(self): - """Test segment can access its parent document.""" - # Arrange - document_id = str(uuid4()) - segment = DocumentSegment( - tenant_id=str(uuid4()), - dataset_id=str(uuid4()), - document_id=document_id, - position=1, - content="Test", - word_count=1, - tokens=2, - created_by=str(uuid4()), - ) - - mock_document = Document( - tenant_id=str(uuid4()), - dataset_id=str(uuid4()), - position=1, - data_source_type="upload_file", - batch="batch_001", - name="test.pdf", - created_from="web", - created_by=str(uuid4()), - ) - mock_document.id = document_id - - # Mock the database session scalar - with patch("models.dataset.db.session.scalar", return_value=mock_document): - # Act - document = segment.document - - # Assert - assert document is not None - assert document.id == document_id - - def test_document_segment_previous_segment(self): - """Test segment can access previous segment.""" - # Arrange - document_id = str(uuid4()) - segment = DocumentSegment( - tenant_id=str(uuid4()), - dataset_id=str(uuid4()), - document_id=document_id, - position=2, - content="Test", - word_count=1, - tokens=2, - created_by=str(uuid4()), - ) - - previous_segment = DocumentSegment( - tenant_id=str(uuid4()), - dataset_id=str(uuid4()), - document_id=document_id, - position=1, - content="Previous", - word_count=1, - tokens=2, - created_by=str(uuid4()), - ) - - # Mock the database session scalar - with patch("models.dataset.db.session.scalar", return_value=previous_segment): - # Act - prev_seg = segment.previous_segment - - # Assert - assert prev_seg is not None - assert prev_seg.position == 1 - - def test_document_segment_next_segment(self): - """Test segment can access next segment.""" - # Arrange - document_id = str(uuid4()) - segment = DocumentSegment( - tenant_id=str(uuid4()), - dataset_id=str(uuid4()), - document_id=document_id, - position=1, - content="Test", - word_count=1, - tokens=2, - created_by=str(uuid4()), - ) - - next_segment = DocumentSegment( - tenant_id=str(uuid4()), - dataset_id=str(uuid4()), - document_id=document_id, - position=2, - content="Next", - word_count=1, - tokens=2, - created_by=str(uuid4()), - ) - - # Mock the database session scalar - with patch("models.dataset.db.session.scalar", return_value=next_segment): - # Act - next_seg = segment.next_segment - - # Assert - assert next_seg is not None - assert next_seg.position == 2 - - class TestModelIntegration: """Test suite for model integration scenarios."""