From 99cc98320aa2bf560fdc3d9cd68f3462fda861ce Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=9C=A8=E4=B9=8B=E6=9C=AC=E6=BE=AA?=
 <kinomotomiovo@gmail.com>
Date: Wed, 25 Feb 2026 13:15:07 +0800
Subject: [PATCH] test: migrate dataset collection binding SQL tests to
 testcontainers (#32539)

Co-authored-by: KinomotoMio <200703522+KinomotoMio@users.noreply.github.com>
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
---
 .../services/dataset_collection_binding.py    | 254 +++++
 .../services/dataset_collection_binding.py    | 932 ------------------
 2 files changed, 254 insertions(+), 932 deletions(-)
 create mode 100644 api/tests/test_containers_integration_tests/services/dataset_collection_binding.py
 delete mode 100644 api/tests/unit_tests/services/dataset_collection_binding.py

diff --git a/api/tests/test_containers_integration_tests/services/dataset_collection_binding.py b/api/tests/test_containers_integration_tests/services/dataset_collection_binding.py
new file mode 100644
index 0000000000..73df2d9ed9
--- /dev/null
+++ b/api/tests/test_containers_integration_tests/services/dataset_collection_binding.py
@@ -0,0 +1,254 @@
+"""
+Comprehensive unit tests for DatasetCollectionBindingService.
+
+This module contains extensive unit tests for the DatasetCollectionBindingService class,
+which handles dataset collection binding operations for vector database collections.
+"""
+
+from itertools import starmap
+from uuid import uuid4
+
+import pytest
+
+from extensions.ext_database import db
+from models.dataset import DatasetCollectionBinding
+from services.dataset_service import DatasetCollectionBindingService
+
+
+class DatasetCollectionBindingTestDataFactory:
+    """
+    Factory class for creating test data for dataset collection binding integration tests.
+
+    This factory provides a static method to create and persist `DatasetCollectionBinding`
+    instances in the test database.
+
+    The factory methods help maintain consistency across tests and reduce
+    code duplication when setting up test scenarios.
+    """
+
+    @staticmethod
+    def create_collection_binding(
+        provider_name: str = "openai",
+        model_name: str = "text-embedding-ada-002",
+        collection_name: str = "collection-abc",
+        collection_type: str = "dataset",
+    ) -> DatasetCollectionBinding:
+        """
+        Create a DatasetCollectionBinding with specified attributes.
+
+        Args:
+            provider_name: Name of the embedding model provider (e.g., "openai", "cohere")
+            model_name: Name of the embedding model (e.g., "text-embedding-ada-002")
+            collection_name: Name of the vector database collection
+            collection_type: Type of collection (default: "dataset")
+
+        Returns:
+            DatasetCollectionBinding instance
+        """
+        binding = DatasetCollectionBinding(
+            provider_name=provider_name,
+            model_name=model_name,
+            collection_name=collection_name,
+            type=collection_type,
+        )
+        db.session.add(binding)
+        db.session.commit()
+        return binding
+
+
+class TestDatasetCollectionBindingServiceGetBinding:
+    """
+    Comprehensive unit tests for DatasetCollectionBindingService.get_dataset_collection_binding method.
+
+    This test class covers the main collection binding retrieval/creation functionality,
+    including various provider/model combinations, collection types, and edge cases.
+    """
+
+    def test_get_dataset_collection_binding_existing_binding_success(self, db_session_with_containers):
+        """
+        Test successful retrieval of an existing collection binding.
+
+        Verifies that when a binding already exists in the database for the given
+        provider, model, and collection type, the method returns the existing binding
+        without creating a new one.
+        """
+        # Arrange
+        provider_name = "openai"
+        model_name = "text-embedding-ada-002"
+        collection_type = "dataset"
+        existing_binding = DatasetCollectionBindingTestDataFactory.create_collection_binding(
+            provider_name=provider_name,
+            model_name=model_name,
+            collection_name="existing-collection",
+            collection_type=collection_type,
+        )
+
+        # Act
+        result = DatasetCollectionBindingService.get_dataset_collection_binding(
+            provider_name, model_name, collection_type
+        )
+
+        # Assert
+        assert result.id == existing_binding.id
+        assert result.collection_name == "existing-collection"
+
+    def test_get_dataset_collection_binding_create_new_binding_success(self, db_session_with_containers):
+        """
+        Test successful creation of a new collection binding when none exists.
+
+        Verifies that when no existing binding is found for the given provider,
+        model, and collection type, a new binding is created and returned.
+        """
+        # Arrange
+        provider_name = f"provider-{uuid4()}"
+        model_name = f"model-{uuid4()}"
+        collection_type = "dataset"
+
+        # Act
+        result = DatasetCollectionBindingService.get_dataset_collection_binding(
+            provider_name, model_name, collection_type
+        )
+
+        # Assert
+        assert result is not None
+        assert result.provider_name == provider_name
+        assert result.model_name == model_name
+        assert result.type == collection_type
+        assert result.collection_name is not None
+
+    def test_get_dataset_collection_binding_different_collection_type(self, db_session_with_containers):
+        """Test get_dataset_collection_binding with different collection type."""
+        # Arrange
+        provider_name = "openai"
+        model_name = "text-embedding-ada-002"
+        collection_type = "custom_type"
+
+        # Act
+        result = DatasetCollectionBindingService.get_dataset_collection_binding(
+            provider_name, model_name, collection_type
+        )
+
+        # Assert
+        assert result.type == collection_type
+        assert result.provider_name == provider_name
+        assert result.model_name == model_name
+
+    def test_get_dataset_collection_binding_default_collection_type(self, db_session_with_containers):
+        """Test get_dataset_collection_binding with default collection type parameter."""
+        # Arrange
+        provider_name = "openai"
+        model_name = "text-embedding-ada-002"
+
+        # Act
+        result = DatasetCollectionBindingService.get_dataset_collection_binding(provider_name, model_name)
+
+        # Assert
+        assert result.type == "dataset"
+        assert result.provider_name == provider_name
+        assert result.model_name == model_name
+
+    def test_get_dataset_collection_binding_different_provider_model_combination(self, db_session_with_containers):
+        """Test get_dataset_collection_binding with various provider/model combinations."""
+        # Arrange
+        combinations = [
+            ("openai", "text-embedding-ada-002"),
+            ("cohere", "embed-english-v3.0"),
+            ("huggingface", "sentence-transformers/all-MiniLM-L6-v2"),
+        ]
+
+        # Act
+        results = list(starmap(DatasetCollectionBindingService.get_dataset_collection_binding, combinations))
+
+        # Assert
+        assert len(results) == 3
+        for result, (provider, model) in zip(results, combinations):
+            assert result.provider_name == provider
+            assert result.model_name == model
+
+
+class TestDatasetCollectionBindingServiceGetBindingByIdAndType:
+    """
+    Comprehensive unit tests for DatasetCollectionBindingService.get_dataset_collection_binding_by_id_and_type method.
+
+    This test class covers retrieval of specific collection bindings by ID and type,
+    including successful retrieval and error handling for missing bindings.
+    """
+
+    def test_get_dataset_collection_binding_by_id_and_type_success(self, db_session_with_containers):
+        """Test successful retrieval of collection binding by ID and type."""
+        # Arrange
+        binding = DatasetCollectionBindingTestDataFactory.create_collection_binding(
+            provider_name="openai",
+            model_name="text-embedding-ada-002",
+            collection_name="test-collection",
+            collection_type="dataset",
+        )
+
+        # Act
+        result = DatasetCollectionBindingService.get_dataset_collection_binding_by_id_and_type(binding.id, "dataset")
+
+        # Assert
+        assert result.id == binding.id
+        assert result.provider_name == "openai"
+        assert result.model_name == "text-embedding-ada-002"
+        assert result.collection_name == "test-collection"
+        assert result.type == "dataset"
+
+    def test_get_dataset_collection_binding_by_id_and_type_not_found_error(self, db_session_with_containers):
+        """Test error handling when collection binding is not found by ID and type."""
+        # Arrange
+        non_existent_id = str(uuid4())
+
+        # Act & Assert
+        with pytest.raises(ValueError, match="Dataset collection binding not found"):
+            DatasetCollectionBindingService.get_dataset_collection_binding_by_id_and_type(non_existent_id, "dataset")
+
+    def test_get_dataset_collection_binding_by_id_and_type_different_collection_type(self, db_session_with_containers):
+        """Test retrieval by ID and type with different collection type."""
+        # Arrange
+        binding = DatasetCollectionBindingTestDataFactory.create_collection_binding(
+            provider_name="openai",
+            model_name="text-embedding-ada-002",
+            collection_name="test-collection",
+            collection_type="custom_type",
+        )
+
+        # Act
+        result = DatasetCollectionBindingService.get_dataset_collection_binding_by_id_and_type(
+            binding.id, "custom_type"
+        )
+
+        # Assert
+        assert result.id == binding.id
+        assert result.type == "custom_type"
+
+    def test_get_dataset_collection_binding_by_id_and_type_default_collection_type(self, db_session_with_containers):
+        """Test retrieval by ID with default collection type."""
+        # Arrange
+        binding = DatasetCollectionBindingTestDataFactory.create_collection_binding(
+            provider_name="openai",
+            model_name="text-embedding-ada-002",
+            collection_name="test-collection",
+            collection_type="dataset",
+        )
+
+        # Act
+        result = DatasetCollectionBindingService.get_dataset_collection_binding_by_id_and_type(binding.id)
+
+        # Assert
+        assert result.id == binding.id
+        assert result.type == "dataset"
+
+    def test_get_dataset_collection_binding_by_id_and_type_wrong_type_error(self, db_session_with_containers):
+        """Test error when binding exists but with wrong collection type."""
+        # Arrange
+        binding = DatasetCollectionBindingTestDataFactory.create_collection_binding(
+            provider_name="openai",
+            model_name="text-embedding-ada-002",
+            collection_name="test-collection",
+            collection_type="dataset",
+        )
+
+        # Act & Assert
+        with pytest.raises(ValueError, match="Dataset collection binding not found"):
+            DatasetCollectionBindingService.get_dataset_collection_binding_by_id_and_type(binding.id, "wrong_type")
diff --git a/api/tests/unit_tests/services/dataset_collection_binding.py b/api/tests/unit_tests/services/dataset_collection_binding.py
deleted file mode 100644
index 2a939a5c1d..0000000000
--- a/api/tests/unit_tests/services/dataset_collection_binding.py
+++ /dev/null
@@ -1,932 +0,0 @@
-"""
-Comprehensive unit tests for DatasetCollectionBindingService.
-
-This module contains extensive unit tests for the DatasetCollectionBindingService class,
-which handles dataset collection binding operations for vector database collections.
-
-The DatasetCollectionBindingService provides methods for:
-- Retrieving or creating dataset collection bindings by provider, model, and type
-- Retrieving specific collection bindings by ID and type
-- Managing collection bindings for different collection types (dataset, etc.)
-
-Collection bindings are used to map embedding models (provider + model name) to
-specific vector database collections, allowing datasets to share collections when
-they use the same embedding model configuration.
-
-This test suite ensures:
-- Correct retrieval of existing bindings
-- Proper creation of new bindings when they don't exist
-- Accurate filtering by provider, model, and collection type
-- Proper error handling for missing bindings
-- Database transaction handling (add, commit)
-- Collection name generation using Dataset.gen_collection_name_by_id
-
-================================================================================
-ARCHITECTURE OVERVIEW
-================================================================================
-
-The DatasetCollectionBindingService is a critical component in the Dify platform's
-vector database management system. It serves as an abstraction layer between the
-application logic and the underlying vector database collections.
-
-Key Concepts:
-1. Collection Binding: A mapping between an embedding model configuration
-   (provider + model name) and a vector database collection name. This allows
-   multiple datasets to share the same collection when they use identical
-   embedding models, improving resource efficiency.
-
-2. Collection Type: Different types of collections can exist (e.g., "dataset",
-   "custom_type"). This allows for separation of collections based on their
-   intended use case or data structure.
-
-3. Provider and Model: The combination of provider_name (e.g., "openai",
-   "cohere", "huggingface") and model_name (e.g., "text-embedding-ada-002")
-   uniquely identifies an embedding model configuration.
-
-4. Collection Name Generation: When a new binding is created, a unique collection
-   name is generated using Dataset.gen_collection_name_by_id() with a UUID.
-   This ensures each binding has a unique collection identifier.
-
-================================================================================
-TESTING STRATEGY
-================================================================================
-
-This test suite follows a comprehensive testing strategy that covers:
-
-1. Happy Path Scenarios:
-   - Successful retrieval of existing bindings
-   - Successful creation of new bindings
-   - Proper handling of default parameters
-
-2. Edge Cases:
-   - Different collection types
-   - Various provider/model combinations
-   - Default vs explicit parameter usage
-
-3. Error Handling:
-   - Missing bindings (for get_by_id_and_type)
-   - Database query failures
-   - Invalid parameter combinations
-
-4. Database Interaction:
-   - Query construction and execution
-   - Transaction management (add, commit)
-   - Query chaining (where, order_by, first)
-
-5. Mocking Strategy:
-   - Database session mocking
-   - Query builder chain mocking
-   - UUID generation mocking
-   - Collection name generation mocking
-
-================================================================================
-"""
-
-"""
-Import statements for the test module.
-
-This section imports all necessary dependencies for testing the
-DatasetCollectionBindingService, including:
-- unittest.mock for creating mock objects
-- pytest for test framework functionality
-- uuid for UUID generation (used in collection name generation)
-- Models and services from the application codebase
-"""
-
-from unittest.mock import Mock, patch
-
-import pytest
-
-from models.dataset import Dataset, DatasetCollectionBinding
-from services.dataset_service import DatasetCollectionBindingService
-
-# ============================================================================
-# Test Data Factory
-# ============================================================================
-# The Test Data Factory pattern is used here to centralize the creation of
-# test objects and mock instances. This approach provides several benefits:
-#
-# 1. Consistency: All test objects are created using the same factory methods,
-#    ensuring consistent structure across all tests.
-#
-# 2. Maintainability: If the structure of DatasetCollectionBinding or Dataset
-#    changes, we only need to update the factory methods rather than every
-#    individual test.
-#
-# 3. Reusability: Factory methods can be reused across multiple test classes,
-#    reducing code duplication.
-#
-# 4. Readability: Tests become more readable when they use descriptive factory
-#    method calls instead of complex object construction logic.
-#
-# ============================================================================
-
-
-class DatasetCollectionBindingTestDataFactory:
-    """
-    Factory class for creating test data and mock objects for dataset collection binding tests.
-
-    This factory provides static methods to create mock objects for:
-    - DatasetCollectionBinding instances
-    - Database query results
-    - Collection name generation results
-
-    The factory methods help maintain consistency across tests and reduce
-    code duplication when setting up test scenarios.
-    """
-
-    @staticmethod
-    def create_collection_binding_mock(
-        binding_id: str = "binding-123",
-        provider_name: str = "openai",
-        model_name: str = "text-embedding-ada-002",
-        collection_name: str = "collection-abc",
-        collection_type: str = "dataset",
-        created_at=None,
-        **kwargs,
-    ) -> Mock:
-        """
-        Create a mock DatasetCollectionBinding with specified attributes.
-
-        Args:
-            binding_id: Unique identifier for the binding
-            provider_name: Name of the embedding model provider (e.g., "openai", "cohere")
-            model_name: Name of the embedding model (e.g., "text-embedding-ada-002")
-            collection_name: Name of the vector database collection
-            collection_type: Type of collection (default: "dataset")
-            created_at: Optional datetime for creation timestamp
-            **kwargs: Additional attributes to set on the mock
-
-        Returns:
-            Mock object configured as a DatasetCollectionBinding instance
-        """
-        binding = Mock(spec=DatasetCollectionBinding)
-        binding.id = binding_id
-        binding.provider_name = provider_name
-        binding.model_name = model_name
-        binding.collection_name = collection_name
-        binding.type = collection_type
-        binding.created_at = created_at
-        for key, value in kwargs.items():
-            setattr(binding, key, value)
-        return binding
-
-    @staticmethod
-    def create_dataset_mock(
-        dataset_id: str = "dataset-123",
-        **kwargs,
-    ) -> Mock:
-        """
-        Create a mock Dataset for testing collection name generation.
-
-        Args:
-            dataset_id: Unique identifier for the dataset
-            **kwargs: Additional attributes to set on the mock
-
-        Returns:
-            Mock object configured as a Dataset instance
-        """
-        dataset = Mock(spec=Dataset)
-        dataset.id = dataset_id
-        for key, value in kwargs.items():
-            setattr(dataset, key, value)
-        return dataset
-
-
-# ============================================================================
-# Tests for get_dataset_collection_binding
-# ============================================================================
-
-
-class TestDatasetCollectionBindingServiceGetBinding:
-    """
-    Comprehensive unit tests for DatasetCollectionBindingService.get_dataset_collection_binding method.
-
-    This test class covers the main collection binding retrieval/creation functionality,
-    including various provider/model combinations, collection types, and edge cases.
-
-    The get_dataset_collection_binding method:
-    1. Queries for existing binding by provider_name, model_name, and collection_type
-    2. Orders results by created_at (ascending) and takes the first match
-    3. If no binding exists, creates a new one with:
-       - The provided provider_name and model_name
-       - A generated collection_name using Dataset.gen_collection_name_by_id
-       - The provided collection_type
-    4. Adds the new binding to the database session and commits
-    5. Returns the binding (either existing or newly created)
-
-    Test scenarios include:
-    - Retrieving existing bindings
-    - Creating new bindings when none exist
-    - Different collection types
-    - Database transaction handling
-    - Collection name generation
-    """
-
-    @pytest.fixture
-    def mock_db_session(self):
-        """
-        Mock database session for testing database operations.
-
-        Provides a mocked database session that can be used to verify:
-        - Query construction and execution
-        - Add operations for new bindings
-        - Commit operations for transaction completion
-
-        The mock is configured to return a query builder that supports
-        chaining operations like .where(), .order_by(), and .first().
-        """
-        with patch("services.dataset_service.db.session") as mock_db:
-            yield mock_db
-
-    def test_get_dataset_collection_binding_existing_binding_success(self, mock_db_session):
-        """
-        Test successful retrieval of an existing collection binding.
-
-        Verifies that when a binding already exists in the database for the given
-        provider, model, and collection type, the method returns the existing binding
-        without creating a new one.
-
-        This test ensures:
-        - The query is constructed correctly with all three filters
-        - Results are ordered by created_at
-        - The first matching binding is returned
-        - No new binding is created (db.session.add is not called)
-        - No commit is performed (db.session.commit is not called)
-        """
-        # Arrange
-        provider_name = "openai"
-        model_name = "text-embedding-ada-002"
-        collection_type = "dataset"
-
-        existing_binding = DatasetCollectionBindingTestDataFactory.create_collection_binding_mock(
-            binding_id="binding-123",
-            provider_name=provider_name,
-            model_name=model_name,
-            collection_type=collection_type,
-        )
-
-        # Mock the query chain: query().where().order_by().first()
-        mock_query = Mock()
-        mock_where = Mock()
-        mock_order_by = Mock()
-        mock_query.where.return_value = mock_where
-        mock_where.order_by.return_value = mock_order_by
-        mock_order_by.first.return_value = existing_binding
-        mock_db_session.query.return_value = mock_query
-
-        # Act
-        result = DatasetCollectionBindingService.get_dataset_collection_binding(
-            provider_name=provider_name, model_name=model_name, collection_type=collection_type
-        )
-
-        # Assert
-        assert result == existing_binding
-        assert result.id == "binding-123"
-        assert result.provider_name == provider_name
-        assert result.model_name == model_name
-        assert result.type == collection_type
-
-        # Verify query was constructed correctly
-        # The query should be constructed with DatasetCollectionBinding as the model
-        mock_db_session.query.assert_called_once_with(DatasetCollectionBinding)
-
-        # Verify the where clause was applied to filter by provider, model, and type
-        mock_query.where.assert_called_once()
-
-        # Verify the results were ordered by created_at (ascending)
-        # This ensures we get the oldest binding if multiple exist
-        mock_where.order_by.assert_called_once()
-
-        # Verify no new binding was created
-        # Since an existing binding was found, we should not create a new one
-        mock_db_session.add.assert_not_called()
-
-        # Verify no commit was performed
-        # Since no new binding was created, no database transaction is needed
-        mock_db_session.commit.assert_not_called()
-
-    def test_get_dataset_collection_binding_create_new_binding_success(self, mock_db_session):
-        """
-        Test successful creation of a new collection binding when none exists.
-
-        Verifies that when no binding exists in the database for the given
-        provider, model, and collection type, the method creates a new binding
-        with a generated collection name and commits it to the database.
-
-        This test ensures:
-        - The query returns None (no existing binding)
-        - A new DatasetCollectionBinding is created with correct attributes
-        - Dataset.gen_collection_name_by_id is called to generate collection name
-        - The new binding is added to the database session
-        - The transaction is committed
-        - The newly created binding is returned
-        """
-        # Arrange
-        provider_name = "cohere"
-        model_name = "embed-english-v3.0"
-        collection_type = "dataset"
-        generated_collection_name = "collection-generated-xyz"
-
-        # Mock the query chain to return None (no existing binding)
-        mock_query = Mock()
-        mock_where = Mock()
-        mock_order_by = Mock()
-        mock_query.where.return_value = mock_where
-        mock_where.order_by.return_value = mock_order_by
-        mock_order_by.first.return_value = None  # No existing binding
-        mock_db_session.query.return_value = mock_query
-
-        # Mock Dataset.gen_collection_name_by_id to return a generated name
-        with patch("services.dataset_service.Dataset.gen_collection_name_by_id") as mock_gen_name:
-            mock_gen_name.return_value = generated_collection_name
-
-            # Mock uuid.uuid4 for the collection name generation
-            mock_uuid = "test-uuid-123"
-            with patch("services.dataset_service.uuid.uuid4", return_value=mock_uuid):
-                # Act
-                result = DatasetCollectionBindingService.get_dataset_collection_binding(
-                    provider_name=provider_name, model_name=model_name, collection_type=collection_type
-                )
-
-        # Assert
-        assert result is not None
-        assert result.provider_name == provider_name
-        assert result.model_name == model_name
-        assert result.type == collection_type
-        assert result.collection_name == generated_collection_name
-
-        # Verify Dataset.gen_collection_name_by_id was called with the generated UUID
-        # This method generates a unique collection name based on the UUID
-        # The UUID is converted to string before passing to the method
-        mock_gen_name.assert_called_once_with(str(mock_uuid))
-
-        # Verify new binding was added to the database session
-        # The add method should be called exactly once with the new binding instance
-        mock_db_session.add.assert_called_once()
-
-        # Extract the binding that was added to verify its properties
-        added_binding = mock_db_session.add.call_args[0][0]
-
-        # Verify the added binding is an instance of DatasetCollectionBinding
-        # This ensures we're creating the correct type of object
-        assert isinstance(added_binding, DatasetCollectionBinding)
-
-        # Verify all the binding properties are set correctly
-        # These should match the input parameters to the method
-        assert added_binding.provider_name == provider_name
-        assert added_binding.model_name == model_name
-        assert added_binding.type == collection_type
-
-        # Verify the collection name was set from the generated name
-        # This ensures the binding has a valid collection identifier
-        assert added_binding.collection_name == generated_collection_name
-
-        # Verify the transaction was committed
-        # This ensures the new binding is persisted to the database
-        mock_db_session.commit.assert_called_once()
-
-    def test_get_dataset_collection_binding_different_collection_type(self, mock_db_session):
-        """
-        Test retrieval with a different collection type (not "dataset").
-
-        Verifies that the method correctly filters by collection_type, allowing
-        different types of collections to coexist with the same provider/model
-        combination.
-
-        This test ensures:
-        - Collection type is properly used as a filter in the query
-        - Different collection types can have separate bindings
-        - The correct binding is returned based on type
-        """
-        # Arrange
-        provider_name = "openai"
-        model_name = "text-embedding-ada-002"
-        collection_type = "custom_type"
-
-        existing_binding = DatasetCollectionBindingTestDataFactory.create_collection_binding_mock(
-            binding_id="binding-456",
-            provider_name=provider_name,
-            model_name=model_name,
-            collection_type=collection_type,
-        )
-
-        # Mock the query chain
-        mock_query = Mock()
-        mock_where = Mock()
-        mock_order_by = Mock()
-        mock_query.where.return_value = mock_where
-        mock_where.order_by.return_value = mock_order_by
-        mock_order_by.first.return_value = existing_binding
-        mock_db_session.query.return_value = mock_query
-
-        # Act
-        result = DatasetCollectionBindingService.get_dataset_collection_binding(
-            provider_name=provider_name, model_name=model_name, collection_type=collection_type
-        )
-
-        # Assert
-        assert result == existing_binding
-        assert result.type == collection_type
-
-        # Verify query was constructed with the correct type filter
-        mock_db_session.query.assert_called_once_with(DatasetCollectionBinding)
-        mock_query.where.assert_called_once()
-
-    def test_get_dataset_collection_binding_default_collection_type(self, mock_db_session):
-        """
-        Test retrieval with default collection type ("dataset").
-
-        Verifies that when collection_type is not provided, it defaults to "dataset"
-        as specified in the method signature.
-
-        This test ensures:
-        - The default value "dataset" is used when type is not specified
-        - The query correctly filters by the default type
-        """
-        # Arrange
-        provider_name = "openai"
-        model_name = "text-embedding-ada-002"
-        # collection_type defaults to "dataset" in method signature
-
-        existing_binding = DatasetCollectionBindingTestDataFactory.create_collection_binding_mock(
-            binding_id="binding-789",
-            provider_name=provider_name,
-            model_name=model_name,
-            collection_type="dataset",  # Default type
-        )
-
-        # Mock the query chain
-        mock_query = Mock()
-        mock_where = Mock()
-        mock_order_by = Mock()
-        mock_query.where.return_value = mock_where
-        mock_where.order_by.return_value = mock_order_by
-        mock_order_by.first.return_value = existing_binding
-        mock_db_session.query.return_value = mock_query
-
-        # Act - call without specifying collection_type (uses default)
-        result = DatasetCollectionBindingService.get_dataset_collection_binding(
-            provider_name=provider_name, model_name=model_name
-        )
-
-        # Assert
-        assert result == existing_binding
-        assert result.type == "dataset"
-
-        # Verify query was constructed correctly
-        mock_db_session.query.assert_called_once_with(DatasetCollectionBinding)
-
-    def test_get_dataset_collection_binding_different_provider_model_combination(self, mock_db_session):
-        """
-        Test retrieval with different provider/model combinations.
-
-        Verifies that bindings are correctly filtered by both provider_name and
-        model_name, ensuring that different model combinations have separate bindings.
-
-        This test ensures:
-        - Provider and model are both used as filters
-        - Different combinations result in different bindings
-        - The correct binding is returned for each combination
-        """
-        # Arrange
-        provider_name = "huggingface"
-        model_name = "sentence-transformers/all-MiniLM-L6-v2"
-        collection_type = "dataset"
-
-        existing_binding = DatasetCollectionBindingTestDataFactory.create_collection_binding_mock(
-            binding_id="binding-hf-123",
-            provider_name=provider_name,
-            model_name=model_name,
-            collection_type=collection_type,
-        )
-
-        # Mock the query chain
-        mock_query = Mock()
-        mock_where = Mock()
-        mock_order_by = Mock()
-        mock_query.where.return_value = mock_where
-        mock_where.order_by.return_value = mock_order_by
-        mock_order_by.first.return_value = existing_binding
-        mock_db_session.query.return_value = mock_query
-
-        # Act
-        result = DatasetCollectionBindingService.get_dataset_collection_binding(
-            provider_name=provider_name, model_name=model_name, collection_type=collection_type
-        )
-
-        # Assert
-        assert result == existing_binding
-        assert result.provider_name == provider_name
-        assert result.model_name == model_name
-
-        # Verify query filters were applied correctly
-        # The query should filter by both provider_name and model_name
-        # This ensures different model combinations have separate bindings
-        mock_db_session.query.assert_called_once_with(DatasetCollectionBinding)
-
-        # Verify the where clause was applied with all three filters:
-        # - provider_name filter
-        # - model_name filter
-        # - collection_type filter
-        mock_query.where.assert_called_once()
-
-
-# ============================================================================
-# Tests for get_dataset_collection_binding_by_id_and_type
-# ============================================================================
-# This section contains tests for the get_dataset_collection_binding_by_id_and_type
-# method, which retrieves a specific collection binding by its ID and type.
-#
-# Key differences from get_dataset_collection_binding:
-# 1. This method queries by ID and type, not by provider/model/type
-# 2. This method does NOT create a new binding if one doesn't exist
-# 3. This method raises ValueError if the binding is not found
-# 4. This method is typically used when you already know the binding ID
-#
-# Use cases:
-# - Retrieving a binding that was previously created
-# - Validating that a binding exists before using it
-# - Accessing binding metadata when you have the ID
-#
-# ============================================================================
-
-
-class TestDatasetCollectionBindingServiceGetBindingByIdAndType:
-    """
-    Comprehensive unit tests for DatasetCollectionBindingService.get_dataset_collection_binding_by_id_and_type method.
-
-    This test class covers collection binding retrieval by ID and type,
-    including success scenarios and error handling for missing bindings.
-
-    The get_dataset_collection_binding_by_id_and_type method:
-    1. Queries for a binding by collection_binding_id and collection_type
-    2. Orders results by created_at (ascending) and takes the first match
-    3. If no binding exists, raises ValueError("Dataset collection binding not found")
-    4. Returns the found binding
-
-    Unlike get_dataset_collection_binding, this method does NOT create a new
-    binding if one doesn't exist - it only retrieves existing bindings.
-
-    Test scenarios include:
-    - Successful retrieval of existing bindings
-    - Error handling for missing bindings
-    - Different collection types
-    - Default collection type behavior
-    """
-
-    @pytest.fixture
-    def mock_db_session(self):
-        """
-        Mock database session for testing database operations.
-
-        Provides a mocked database session that can be used to verify:
-        - Query construction with ID and type filters
-        - Ordering by created_at
-        - First result retrieval
-
-        The mock is configured to return a query builder that supports
-        chaining operations like .where(), .order_by(), and .first().
-        """
-        with patch("services.dataset_service.db.session") as mock_db:
-            yield mock_db
-
-    def test_get_dataset_collection_binding_by_id_and_type_success(self, mock_db_session):
-        """
-        Test successful retrieval of a collection binding by ID and type.
-
-        Verifies that when a binding exists in the database with the given
-        ID and collection type, the method returns the binding.
-
-        This test ensures:
-        - The query is constructed correctly with ID and type filters
-        - Results are ordered by created_at
-        - The first matching binding is returned
-        - No error is raised
-        """
-        # Arrange
-        collection_binding_id = "binding-123"
-        collection_type = "dataset"
-
-        existing_binding = DatasetCollectionBindingTestDataFactory.create_collection_binding_mock(
-            binding_id=collection_binding_id,
-            provider_name="openai",
-            model_name="text-embedding-ada-002",
-            collection_type=collection_type,
-        )
-
-        # Mock the query chain: query().where().order_by().first()
-        mock_query = Mock()
-        mock_where = Mock()
-        mock_order_by = Mock()
-        mock_query.where.return_value = mock_where
-        mock_where.order_by.return_value = mock_order_by
-        mock_order_by.first.return_value = existing_binding
-        mock_db_session.query.return_value = mock_query
-
-        # Act
-        result = DatasetCollectionBindingService.get_dataset_collection_binding_by_id_and_type(
-            collection_binding_id=collection_binding_id, collection_type=collection_type
-        )
-
-        # Assert
-        assert result == existing_binding
-        assert result.id == collection_binding_id
-        assert result.type == collection_type
-
-        # Verify query was constructed correctly
-        mock_db_session.query.assert_called_once_with(DatasetCollectionBinding)
-        mock_query.where.assert_called_once()
-        mock_where.order_by.assert_called_once()
-
-    def test_get_dataset_collection_binding_by_id_and_type_not_found_error(self, mock_db_session):
-        """
-        Test error handling when binding is not found.
-
-        Verifies that when no binding exists in the database with the given
-        ID and collection type, the method raises a ValueError with the
-        message "Dataset collection binding not found".
-
-        This test ensures:
-        - The query returns None (no existing binding)
-        - ValueError is raised with the correct message
-        - No binding is returned
-        """
-        # Arrange
-        collection_binding_id = "non-existent-binding"
-        collection_type = "dataset"
-
-        # Mock the query chain to return None (no existing binding)
-        mock_query = Mock()
-        mock_where = Mock()
-        mock_order_by = Mock()
-        mock_query.where.return_value = mock_where
-        mock_where.order_by.return_value = mock_order_by
-        mock_order_by.first.return_value = None  # No existing binding
-        mock_db_session.query.return_value = mock_query
-
-        # Act & Assert
-        with pytest.raises(ValueError, match="Dataset collection binding not found"):
-            DatasetCollectionBindingService.get_dataset_collection_binding_by_id_and_type(
-                collection_binding_id=collection_binding_id, collection_type=collection_type
-            )
-
-        # Verify query was attempted
-        mock_db_session.query.assert_called_once_with(DatasetCollectionBinding)
-        mock_query.where.assert_called_once()
-
-    def test_get_dataset_collection_binding_by_id_and_type_different_collection_type(self, mock_db_session):
-        """
-        Test retrieval with a different collection type.
-
-        Verifies that the method correctly filters by collection_type, ensuring
-        that bindings with the same ID but different types are treated as
-        separate entities.
-
-        This test ensures:
-        - Collection type is properly used as a filter in the query
-        - Different collection types can have separate bindings with same ID
-        - The correct binding is returned based on type
-        """
-        # Arrange
-        collection_binding_id = "binding-456"
-        collection_type = "custom_type"
-
-        existing_binding = DatasetCollectionBindingTestDataFactory.create_collection_binding_mock(
-            binding_id=collection_binding_id,
-            provider_name="cohere",
-            model_name="embed-english-v3.0",
-            collection_type=collection_type,
-        )
-
-        # Mock the query chain
-        mock_query = Mock()
-        mock_where = Mock()
-        mock_order_by = Mock()
-        mock_query.where.return_value = mock_where
-        mock_where.order_by.return_value = mock_order_by
-        mock_order_by.first.return_value = existing_binding
-        mock_db_session.query.return_value = mock_query
-
-        # Act
-        result = DatasetCollectionBindingService.get_dataset_collection_binding_by_id_and_type(
-            collection_binding_id=collection_binding_id, collection_type=collection_type
-        )
-
-        # Assert
-        assert result == existing_binding
-        assert result.id == collection_binding_id
-        assert result.type == collection_type
-
-        # Verify query was constructed with the correct type filter
-        mock_db_session.query.assert_called_once_with(DatasetCollectionBinding)
-        mock_query.where.assert_called_once()
-
-    def test_get_dataset_collection_binding_by_id_and_type_default_collection_type(self, mock_db_session):
-        """
-        Test retrieval with default collection type ("dataset").
-
-        Verifies that when collection_type is not provided, it defaults to "dataset"
-        as specified in the method signature.
-
-        This test ensures:
-        - The default value "dataset" is used when type is not specified
-        - The query correctly filters by the default type
-        - The correct binding is returned
-        """
-        # Arrange
-        collection_binding_id = "binding-789"
-        # collection_type defaults to "dataset" in method signature
-
-        existing_binding = DatasetCollectionBindingTestDataFactory.create_collection_binding_mock(
-            binding_id=collection_binding_id,
-            provider_name="openai",
-            model_name="text-embedding-ada-002",
-            collection_type="dataset",  # Default type
-        )
-
-        # Mock the query chain
-        mock_query = Mock()
-        mock_where = Mock()
-        mock_order_by = Mock()
-        mock_query.where.return_value = mock_where
-        mock_where.order_by.return_value = mock_order_by
-        mock_order_by.first.return_value = existing_binding
-        mock_db_session.query.return_value = mock_query
-
-        # Act - call without specifying collection_type (uses default)
-        result = DatasetCollectionBindingService.get_dataset_collection_binding_by_id_and_type(
-            collection_binding_id=collection_binding_id
-        )
-
-        # Assert
-        assert result == existing_binding
-        assert result.id == collection_binding_id
-        assert result.type == "dataset"
-
-        # Verify query was constructed correctly
-        mock_db_session.query.assert_called_once_with(DatasetCollectionBinding)
-        mock_query.where.assert_called_once()
-
-    def test_get_dataset_collection_binding_by_id_and_type_wrong_type_error(self, mock_db_session):
-        """
-        Test error handling when binding exists but with wrong collection type.
-
-        Verifies that when a binding exists with the given ID but a different
-        collection type, the method raises a ValueError because the binding
-        doesn't match both the ID and type criteria.
-
-        This test ensures:
-        - The query correctly filters by both ID and type
-        - Bindings with matching ID but different type are not returned
-        - ValueError is raised when no matching binding is found
-        """
-        # Arrange
-        collection_binding_id = "binding-123"
-        collection_type = "dataset"
-
-        # Mock the query chain to return None (binding exists but with different type)
-        mock_query = Mock()
-        mock_where = Mock()
-        mock_order_by = Mock()
-        mock_query.where.return_value = mock_where
-        mock_where.order_by.return_value = mock_order_by
-        mock_order_by.first.return_value = None  # No matching binding
-        mock_db_session.query.return_value = mock_query
-
-        # Act & Assert
-        with pytest.raises(ValueError, match="Dataset collection binding not found"):
-            DatasetCollectionBindingService.get_dataset_collection_binding_by_id_and_type(
-                collection_binding_id=collection_binding_id, collection_type=collection_type
-            )
-
-        # Verify query was attempted with both ID and type filters
-        # The query should filter by both collection_binding_id and collection_type
-        # This ensures we only get bindings that match both criteria
-        mock_db_session.query.assert_called_once_with(DatasetCollectionBinding)
-
-        # Verify the where clause was applied with both filters:
-        # - collection_binding_id filter (exact match)
-        # - collection_type filter (exact match)
-        mock_query.where.assert_called_once()
-
-        # Note: The order_by and first() calls are also part of the query chain,
-        # but we don't need to verify them separately since they're part of the
-        # standard query pattern used by both methods in this service.
-
-
-# ============================================================================
-# Additional Test Scenarios and Edge Cases
-# ============================================================================
-# The following section could contain additional test scenarios if needed:
-#
-# Potential additional tests:
-# 1. Test with multiple existing bindings (verify ordering by created_at)
-# 2. Test with very long provider/model names (boundary testing)
-# 3. Test with special characters in provider/model names
-# 4. Test concurrent binding creation (thread safety)
-# 5. Test database rollback scenarios
-# 6. Test with None values for optional parameters
-# 7. Test with empty strings for required parameters
-# 8. Test collection name generation uniqueness
-# 9. Test with different UUID formats
-# 10. Test query performance with large datasets
-#
-# These scenarios are not currently implemented but could be added if needed
-# based on real-world usage patterns or discovered edge cases.
-#
-# ============================================================================
-
-
-# ============================================================================
-# Integration Notes and Best Practices
-# ============================================================================
-#
-# When using DatasetCollectionBindingService in production code, consider:
-#
-# 1. Error Handling:
-#    - Always handle ValueError exceptions when calling
-#      get_dataset_collection_binding_by_id_and_type
-#    - Check return values from get_dataset_collection_binding to ensure
-#      bindings were created successfully
-#
-# 2. Performance Considerations:
-#    - The service queries the database on every call, so consider caching
-#      bindings if they're accessed frequently
-#    - Collection bindings are typically long-lived, so caching is safe
-#
-# 3. Transaction Management:
-#    - New bindings are automatically committed to the database
-#    - If you need to rollback, ensure you're within a transaction context
-#
-# 4. Collection Type Usage:
-#    - Use "dataset" for standard dataset collections
-#    - Use custom types only when you need to separate collections by purpose
-#    - Be consistent with collection type naming across your application
-#
-# 5. Provider and Model Naming:
-#    - Use consistent provider names (e.g., "openai", not "OpenAI" or "OPENAI")
-#    - Use exact model names as provided by the model provider
-#    - These names are case-sensitive and must match exactly
-#
-# ============================================================================
-
-
-# ============================================================================
-# Database Schema Reference
-# ============================================================================
-#
-# The DatasetCollectionBinding model has the following structure:
-#
-# - id: StringUUID (primary key, auto-generated)
-# - provider_name: String(255) (required, e.g., "openai", "cohere")
-# - model_name: String(255) (required, e.g., "text-embedding-ada-002")
-# - type: String(40) (required, default: "dataset")
-# - collection_name: String(64) (required, unique collection identifier)
-# - created_at: DateTime (auto-generated timestamp)
-#
-# Indexes:
-# - Primary key on id
-# - Composite index on (provider_name, model_name) for efficient lookups
-#
-# Relationships:
-# - One binding can be referenced by multiple datasets
-# - Datasets reference bindings via collection_binding_id
-#
-# ============================================================================
-
-
-# ============================================================================
-# Mocking Strategy Documentation
-# ============================================================================
-#
-# This test suite uses extensive mocking to isolate the unit under test.
-# Here's how the mocking strategy works:
-#
-# 1. Database Session Mocking:
-#    - db.session is patched to prevent actual database access
-#    - Query chains are mocked to return predictable results
-#    - Add and commit operations are tracked for verification
-#
-# 2. Query Chain Mocking:
-#    - query() returns a mock query object
-#    - where() returns a mock where object
-#    - order_by() returns a mock order_by object
-#    - first() returns the final result (binding or None)
-#
-# 3. UUID Generation Mocking:
-#    - uuid.uuid4() is mocked to return predictable UUIDs
-#    - This ensures collection names are generated consistently in tests
-#
-# 4. Collection Name Generation Mocking:
-#    - Dataset.gen_collection_name_by_id() is mocked
-#    - This allows us to verify the method is called correctly
-#    - We can control the generated collection name for testing
-#
-# Benefits of this approach:
-# - Tests run quickly (no database I/O)
-# - Tests are deterministic (no random UUIDs)
-# - Tests are isolated (no side effects)
-# - Tests are maintainable (clear mock setup)
-#
-# ============================================================================