Compare commits

...

2 Commits

Author SHA1 Message Date
-LAN-
17127b1dd1 test: fix external dataset update integration fixtures 2026-04-08 17:55:03 +08:00
-LAN-
9b3d494fe1 Enforce tenant checks for external knowledge bindings 2026-04-08 17:39:37 +08:00
7 changed files with 113 additions and 7 deletions

View File

@@ -303,13 +303,17 @@ class Dataset(Base):
if self.provider != "external":
return None
external_knowledge_binding = db.session.scalar(
select(ExternalKnowledgeBindings).where(ExternalKnowledgeBindings.dataset_id == self.id)
select(ExternalKnowledgeBindings).where(
ExternalKnowledgeBindings.dataset_id == self.id,
ExternalKnowledgeBindings.tenant_id == self.tenant_id,
)
)
if not external_knowledge_binding:
return None
external_knowledge_api = db.session.scalar(
select(ExternalKnowledgeApis).where(
ExternalKnowledgeApis.id == external_knowledge_binding.external_knowledge_api_id
ExternalKnowledgeApis.id == external_knowledge_binding.external_knowledge_api_id,
ExternalKnowledgeApis.tenant_id == self.tenant_id,
)
)
if external_knowledge_api is None or external_knowledge_api.settings is None:

View File

@@ -518,6 +518,7 @@ class DatasetService:
raise ValueError("External knowledge id is required.")
if not external_knowledge_api_id:
raise ValueError("External knowledge api id is required.")
ExternalDatasetService.get_external_knowledge_api(external_knowledge_api_id, dataset.tenant_id)
# Update metadata fields
dataset.updated_by = user.id if user else None
dataset.updated_at = naive_utc_now()

View File

@@ -314,7 +314,10 @@ class ExternalDatasetService:
external_knowledge_api = db.session.scalar(
select(ExternalKnowledgeApis)
.where(ExternalKnowledgeApis.id == external_knowledge_binding.external_knowledge_api_id)
.where(
ExternalKnowledgeApis.id == external_knowledge_binding.external_knowledge_api_id,
ExternalKnowledgeApis.tenant_id == tenant_id,
)
.limit(1)
)
if external_knowledge_api is None or external_knowledge_api.settings is None:

View File

@@ -1,3 +1,4 @@
import json
from unittest.mock import Mock, patch
from uuid import uuid4
@@ -7,7 +8,7 @@ from sqlalchemy.orm import Session
from core.rag.index_processor.constant.index_type import IndexTechniqueType
from models.account import Account, Tenant, TenantAccountJoin, TenantAccountRole
from models.dataset import Dataset, ExternalKnowledgeBindings
from models.dataset import Dataset, ExternalKnowledgeApis, ExternalKnowledgeBindings
from models.enums import DataSourceType
from services.dataset_service import DatasetService
from services.errors.account import NoPermissionError
@@ -103,6 +104,34 @@ class DatasetUpdateTestDataFactory:
db_session_with_containers.commit()
return binding
@staticmethod
def create_external_knowledge_api(
db_session_with_containers: Session,
tenant_id: str,
created_by: str,
api_id: str | None = None,
name: str = "test-api",
) -> ExternalKnowledgeApis:
"""Create a real external knowledge API template for tenant-scoped update validation."""
external_api = ExternalKnowledgeApis(
tenant_id=tenant_id,
created_by=created_by,
updated_by=created_by,
name=name,
description="test description",
settings=json.dumps(
{
"endpoint": "https://example.com",
"api_key": "test-api-key",
}
),
)
if api_id is not None:
external_api.id = api_id
db_session_with_containers.add(external_api)
db_session_with_containers.commit()
return external_api
class TestDatasetServiceUpdateDataset:
"""
@@ -138,6 +167,11 @@ class TestDatasetServiceUpdateDataset:
)
binding_id = binding.id
db_session_with_containers.expunge(binding)
external_api = DatasetUpdateTestDataFactory.create_external_knowledge_api(
db_session_with_containers,
tenant_id=tenant.id,
created_by=user.id,
)
update_data = {
"name": "new_name",
@@ -145,7 +179,7 @@ class TestDatasetServiceUpdateDataset:
"external_retrieval_model": "new_model",
"permission": "only_me",
"external_knowledge_id": "new_knowledge_id",
"external_knowledge_api_id": str(uuid4()),
"external_knowledge_api_id": external_api.id,
}
result = DatasetService.update_dataset(dataset.id, update_data, user)
@@ -218,11 +252,16 @@ class TestDatasetServiceUpdateDataset:
created_by=user.id,
provider="external",
)
external_api = DatasetUpdateTestDataFactory.create_external_knowledge_api(
db_session_with_containers,
tenant_id=tenant.id,
created_by=user.id,
)
update_data = {
"name": "new_name",
"external_knowledge_id": "knowledge_id",
"external_knowledge_api_id": str(uuid4()),
"external_knowledge_api_id": external_api.id,
}
with pytest.raises(ValueError) as context:

View File

@@ -12,7 +12,7 @@ This test suite covers:
import json
import pickle
from datetime import UTC, datetime
from unittest.mock import patch
from unittest.mock import Mock, patch
from uuid import uuid4
from core.rag.index_processor.constant.index_type import IndexTechniqueType
@@ -25,6 +25,7 @@ from models.dataset import (
Document,
DocumentSegment,
Embedding,
ExternalKnowledgeBindings,
)
from models.enums import (
DataSourceType,
@@ -180,6 +181,24 @@ class TestDatasetModelValidation:
assert result["top_k"] == 2
assert result["score_threshold"] == 0.0
def test_dataset_external_knowledge_info_returns_none_for_cross_tenant_template(self):
"""Test external datasets fail closed when the bound template is outside the tenant."""
dataset = Dataset(
tenant_id=str(uuid4()),
name="External Dataset",
data_source_type=DataSourceType.UPLOAD_FILE,
created_by=str(uuid4()),
provider="external",
)
binding = Mock(spec=ExternalKnowledgeBindings)
binding.external_knowledge_id = "knowledge-1"
binding.external_knowledge_api_id = str(uuid4())
with patch("models.dataset.db") as mock_db:
mock_db.session.scalar.side_effect = [binding, None]
assert dataset.external_knowledge_info is None
def test_dataset_retrieval_model_dict_property(self):
"""Test retrieval_model_dict property with default values."""
# Arrange

View File

@@ -532,6 +532,9 @@ class TestDatasetServiceCreationAndUpdate:
with (
patch.object(DatasetService, "_update_external_knowledge_binding") as update_binding,
patch(
"services.dataset_service.ExternalDatasetService.get_external_knowledge_api", return_value=object()
) as get_external_knowledge_api,
patch("services.dataset_service.naive_utc_now", return_value=now),
patch("services.dataset_service.db") as mock_db,
):
@@ -557,6 +560,7 @@ class TestDatasetServiceCreationAndUpdate:
assert dataset.permission == DatasetPermissionEnum.PARTIAL_TEAM
assert dataset.updated_by == "user-1"
assert dataset.updated_at is now
get_external_knowledge_api.assert_called_once_with("api-1", dataset.tenant_id)
update_binding.assert_called_once_with("dataset-1", "knowledge-1", "api-1")
mock_db.session.add.assert_called_once_with(dataset)
mock_db.session.commit.assert_called_once()
@@ -574,6 +578,31 @@ class TestDatasetServiceCreationAndUpdate:
with pytest.raises(ValueError, match=message):
DatasetService._update_external_dataset(dataset, payload, SimpleNamespace(id="user-1"))
def test_update_external_dataset_rejects_cross_tenant_external_api_id(self):
dataset = DatasetServiceUnitDataFactory.create_dataset_mock(dataset_id="dataset-1")
with (
patch(
"services.dataset_service.ExternalDatasetService.get_external_knowledge_api",
side_effect=ValueError("api template not found"),
) as get_external_knowledge_api,
patch.object(DatasetService, "_update_external_knowledge_binding") as update_binding,
patch("services.dataset_service.db") as mock_db,
):
with pytest.raises(ValueError, match="api template not found"):
DatasetService._update_external_dataset(
dataset,
{
"external_knowledge_id": "knowledge-1",
"external_knowledge_api_id": "foreign-api",
},
SimpleNamespace(id="user-1"),
)
get_external_knowledge_api.assert_called_once_with("foreign-api", dataset.tenant_id)
update_binding.assert_not_called()
mock_db.session.commit.assert_not_called()
def test_update_external_knowledge_binding_updates_changed_binding_values(self):
binding = SimpleNamespace(external_knowledge_id="old-knowledge", external_knowledge_api_id="old-api")
session = MagicMock()

View File

@@ -1556,6 +1556,17 @@ class TestExternalDatasetServiceFetchRetrieval:
with pytest.raises(ValueError, match="external knowledge binding not found"):
ExternalDatasetService.fetch_external_knowledge_retrieval("tenant-123", "dataset-123", "query", {})
@patch("services.external_knowledge_service.db")
def test_fetch_external_knowledge_retrieval_cross_tenant_api_template_error(self, mock_db, factory):
"""Test error when a binding points to an API template outside the dataset tenant."""
# Arrange
binding = factory.create_external_knowledge_binding_mock()
mock_db.session.scalar.side_effect = [binding, None]
# Act & Assert
with pytest.raises(ValueError, match="external api template not found"):
ExternalDatasetService.fetch_external_knowledge_retrieval("tenant-123", "dataset-123", "query", {})
@patch("services.external_knowledge_service.ExternalDatasetService.process_external_api")
@patch("services.external_knowledge_service.db")
def test_fetch_external_knowledge_retrieval_empty_results(self, mock_db, mock_process, factory):