mirror of
https://github.com/langgenius/dify.git
synced 2026-03-02 13:35:10 +00:00
Compare commits
6 Commits
move-token
...
yanli/docx
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
60c858aa48 | ||
|
|
46a3a2ae09 | ||
|
|
30af50cb47 | ||
|
|
0d6a4bac0f | ||
|
|
c693cb9789 | ||
|
|
fc91a7a38b |
@@ -114,7 +114,6 @@ class PdfExtractor(BaseExtractor):
|
|||||||
"""
|
"""
|
||||||
image_content = []
|
image_content = []
|
||||||
upload_files = []
|
upload_files = []
|
||||||
base_url = dify_config.INTERNAL_FILES_URL or dify_config.FILES_URL
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
image_objects = page.get_objects(filter=(pdfium_c.FPDF_PAGEOBJ_IMAGE,))
|
image_objects = page.get_objects(filter=(pdfium_c.FPDF_PAGEOBJ_IMAGE,))
|
||||||
@@ -164,7 +163,7 @@ class PdfExtractor(BaseExtractor):
|
|||||||
used_at=naive_utc_now(),
|
used_at=naive_utc_now(),
|
||||||
)
|
)
|
||||||
upload_files.append(upload_file)
|
upload_files.append(upload_file)
|
||||||
image_content.append(f"")
|
image_content.append(f"")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning("Failed to extract image from PDF: %s", e)
|
logger.warning("Failed to extract image from PDF: %s", e)
|
||||||
continue
|
continue
|
||||||
|
|||||||
@@ -87,7 +87,6 @@ class WordExtractor(BaseExtractor):
|
|||||||
def _extract_images_from_docx(self, doc):
|
def _extract_images_from_docx(self, doc):
|
||||||
image_count = 0
|
image_count = 0
|
||||||
image_map = {}
|
image_map = {}
|
||||||
base_url = dify_config.INTERNAL_FILES_URL or dify_config.FILES_URL
|
|
||||||
|
|
||||||
for r_id, rel in doc.part.rels.items():
|
for r_id, rel in doc.part.rels.items():
|
||||||
if "image" in rel.target_ref:
|
if "image" in rel.target_ref:
|
||||||
@@ -126,7 +125,7 @@ class WordExtractor(BaseExtractor):
|
|||||||
used_at=naive_utc_now(),
|
used_at=naive_utc_now(),
|
||||||
)
|
)
|
||||||
db.session.add(upload_file)
|
db.session.add(upload_file)
|
||||||
image_map[r_id] = f""
|
image_map[r_id] = f""
|
||||||
else:
|
else:
|
||||||
image_ext = rel.target_ref.split(".")[-1]
|
image_ext = rel.target_ref.split(".")[-1]
|
||||||
if image_ext is None:
|
if image_ext is None:
|
||||||
@@ -154,7 +153,7 @@ class WordExtractor(BaseExtractor):
|
|||||||
used_at=naive_utc_now(),
|
used_at=naive_utc_now(),
|
||||||
)
|
)
|
||||||
db.session.add(upload_file)
|
db.session.add(upload_file)
|
||||||
image_map[rel.target_part] = f""
|
image_map[rel.target_part] = f""
|
||||||
db.session.commit()
|
db.session.commit()
|
||||||
return image_map
|
return image_map
|
||||||
|
|
||||||
|
|||||||
@@ -10,6 +10,7 @@ import re
|
|||||||
import time
|
import time
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from json import JSONDecodeError
|
from json import JSONDecodeError
|
||||||
|
from operator import itemgetter
|
||||||
from typing import Any, cast
|
from typing import Any, cast
|
||||||
from uuid import uuid4
|
from uuid import uuid4
|
||||||
|
|
||||||
@@ -804,41 +805,53 @@ class DocumentSegment(Base):
|
|||||||
def sign_content(self) -> str:
|
def sign_content(self) -> str:
|
||||||
return self.get_sign_content()
|
return self.get_sign_content()
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _build_signed_query_params(*, sign_target: str, upload_file_id: str) -> str:
|
||||||
|
nonce = os.urandom(16).hex()
|
||||||
|
timestamp = str(int(time.time()))
|
||||||
|
data_to_sign = f"{sign_target}|{upload_file_id}|{timestamp}|{nonce}"
|
||||||
|
secret_key = dify_config.SECRET_KEY.encode() if dify_config.SECRET_KEY else b""
|
||||||
|
sign = hmac.new(secret_key, data_to_sign.encode(), hashlib.sha256).digest()
|
||||||
|
encoded_sign = base64.urlsafe_b64encode(sign).decode()
|
||||||
|
return f"timestamp={timestamp}&nonce={nonce}&sign={encoded_sign}"
|
||||||
|
|
||||||
|
def _get_accessible_upload_file_ids(self, upload_file_ids: set[str]) -> set[str]:
|
||||||
|
if not upload_file_ids:
|
||||||
|
return set()
|
||||||
|
|
||||||
|
matched_upload_file_ids = db.session.scalars(
|
||||||
|
select(UploadFile.id).where(
|
||||||
|
UploadFile.tenant_id == self.tenant_id,
|
||||||
|
UploadFile.id.in_(list(upload_file_ids)),
|
||||||
|
)
|
||||||
|
).all()
|
||||||
|
return {str(upload_file_id) for upload_file_id in matched_upload_file_ids}
|
||||||
|
|
||||||
def get_sign_content(self) -> str:
|
def get_sign_content(self) -> str:
|
||||||
signed_urls: list[tuple[int, int, str]] = []
|
signed_urls: list[tuple[int, int, str]] = []
|
||||||
text = self.content
|
text = self.content
|
||||||
|
|
||||||
# For data before v0.10.0
|
upload_file_preview_patterns = {
|
||||||
pattern = r"/files/([a-f0-9\-]+)/image-preview(?:\?.*?)?"
|
"image-preview": r"(?:https?://[^\s\)\"\']+)?/files/([a-f0-9\-]+)/image-preview(?:\?[^\s\)\"\']*)?",
|
||||||
matches = re.finditer(pattern, text)
|
"file-preview": r"(?:https?://[^\s\)\"\']+)?/files/([a-f0-9\-]+)/file-preview(?:\?[^\s\)\"\']*)?",
|
||||||
for match in matches:
|
}
|
||||||
upload_file_id = match.group(1)
|
upload_file_matches: list[tuple[re.Match[str], str, str]] = []
|
||||||
nonce = os.urandom(16).hex()
|
upload_file_ids: set[str] = set()
|
||||||
timestamp = str(int(time.time()))
|
|
||||||
data_to_sign = f"image-preview|{upload_file_id}|{timestamp}|{nonce}"
|
|
||||||
secret_key = dify_config.SECRET_KEY.encode() if dify_config.SECRET_KEY else b""
|
|
||||||
sign = hmac.new(secret_key, data_to_sign.encode(), hashlib.sha256).digest()
|
|
||||||
encoded_sign = base64.urlsafe_b64encode(sign).decode()
|
|
||||||
|
|
||||||
params = f"timestamp={timestamp}&nonce={nonce}&sign={encoded_sign}"
|
for preview_type, pattern in upload_file_preview_patterns.items():
|
||||||
base_url = f"/files/{upload_file_id}/image-preview"
|
for match in re.finditer(pattern, text):
|
||||||
signed_url = f"{base_url}?{params}"
|
upload_file_id = match.group(1)
|
||||||
signed_urls.append((match.start(), match.end(), signed_url))
|
upload_file_matches.append((match, preview_type, upload_file_id))
|
||||||
|
upload_file_ids.add(upload_file_id)
|
||||||
|
|
||||||
# For data after v0.10.0
|
accessible_upload_file_ids = self._get_accessible_upload_file_ids(upload_file_ids)
|
||||||
pattern = r"/files/([a-f0-9\-]+)/file-preview(?:\?.*?)?"
|
|
||||||
matches = re.finditer(pattern, text)
|
|
||||||
for match in matches:
|
|
||||||
upload_file_id = match.group(1)
|
|
||||||
nonce = os.urandom(16).hex()
|
|
||||||
timestamp = str(int(time.time()))
|
|
||||||
data_to_sign = f"file-preview|{upload_file_id}|{timestamp}|{nonce}"
|
|
||||||
secret_key = dify_config.SECRET_KEY.encode() if dify_config.SECRET_KEY else b""
|
|
||||||
sign = hmac.new(secret_key, data_to_sign.encode(), hashlib.sha256).digest()
|
|
||||||
encoded_sign = base64.urlsafe_b64encode(sign).decode()
|
|
||||||
|
|
||||||
params = f"timestamp={timestamp}&nonce={nonce}&sign={encoded_sign}"
|
for match, preview_type, upload_file_id in upload_file_matches:
|
||||||
base_url = f"/files/{upload_file_id}/file-preview"
|
if upload_file_id not in accessible_upload_file_ids:
|
||||||
|
continue
|
||||||
|
|
||||||
|
params = self._build_signed_query_params(sign_target=preview_type, upload_file_id=upload_file_id)
|
||||||
|
base_url = f"/files/{upload_file_id}/{preview_type}"
|
||||||
signed_url = f"{base_url}?{params}"
|
signed_url = f"{base_url}?{params}"
|
||||||
signed_urls.append((match.start(), match.end(), signed_url))
|
signed_urls.append((match.start(), match.end(), signed_url))
|
||||||
|
|
||||||
@@ -849,19 +862,13 @@ class DocumentSegment(Base):
|
|||||||
for match in matches:
|
for match in matches:
|
||||||
upload_file_id = match.group(1)
|
upload_file_id = match.group(1)
|
||||||
file_extension = match.group(2)
|
file_extension = match.group(2)
|
||||||
nonce = os.urandom(16).hex()
|
params = self._build_signed_query_params(sign_target="file-preview", upload_file_id=upload_file_id)
|
||||||
timestamp = str(int(time.time()))
|
|
||||||
data_to_sign = f"file-preview|{upload_file_id}|{timestamp}|{nonce}"
|
|
||||||
secret_key = dify_config.SECRET_KEY.encode() if dify_config.SECRET_KEY else b""
|
|
||||||
sign = hmac.new(secret_key, data_to_sign.encode(), hashlib.sha256).digest()
|
|
||||||
encoded_sign = base64.urlsafe_b64encode(sign).decode()
|
|
||||||
|
|
||||||
params = f"timestamp={timestamp}&nonce={nonce}&sign={encoded_sign}"
|
|
||||||
base_url = f"/files/tools/{upload_file_id}.{file_extension}"
|
base_url = f"/files/tools/{upload_file_id}.{file_extension}"
|
||||||
signed_url = f"{base_url}?{params}"
|
signed_url = f"{base_url}?{params}"
|
||||||
signed_urls.append((match.start(), match.end(), signed_url))
|
signed_urls.append((match.start(), match.end(), signed_url))
|
||||||
|
|
||||||
# Reconstruct the text with signed URLs
|
# Reconstruct the text with signed URLs
|
||||||
|
signed_urls.sort(key=itemgetter(0))
|
||||||
offset = 0
|
offset = 0
|
||||||
for start, end, signed_url in signed_urls:
|
for start, end, signed_url in signed_urls:
|
||||||
text = text[: start + offset] + signed_url + text[end + offset :]
|
text = text[: start + offset] + signed_url + text[end + offset :]
|
||||||
|
|||||||
@@ -87,7 +87,7 @@ def test_extract_images_formats(mock_dependencies, monkeypatch, image_bytes, exp
|
|||||||
mock_raw.FPDF_PAGEOBJ_IMAGE = 1
|
mock_raw.FPDF_PAGEOBJ_IMAGE = 1
|
||||||
result = extractor._extract_images(mock_page)
|
result = extractor._extract_images(mock_page)
|
||||||
|
|
||||||
assert f"" in result
|
assert f"" in result
|
||||||
assert len(saves) == 1
|
assert len(saves) == 1
|
||||||
assert saves[0][1] == image_bytes
|
assert saves[0][1] == image_bytes
|
||||||
assert len(db_stub.session.added) == 1
|
assert len(db_stub.session.added) == 1
|
||||||
@@ -180,7 +180,7 @@ def test_extract_images_failures(mock_dependencies):
|
|||||||
result = extractor._extract_images(mock_page)
|
result = extractor._extract_images(mock_page)
|
||||||
|
|
||||||
# Should have one success
|
# Should have one success
|
||||||
assert "" in result
|
assert "" in result
|
||||||
assert len(saves) == 1
|
assert len(saves) == 1
|
||||||
assert saves[0][1] == jpeg_bytes
|
assert saves[0][1] == jpeg_bytes
|
||||||
assert db_stub.session.committed is True
|
assert db_stub.session.committed is True
|
||||||
|
|||||||
@@ -121,8 +121,7 @@ def test_extract_images_from_docx(monkeypatch):
|
|||||||
db_stub = SimpleNamespace(session=DummySession())
|
db_stub = SimpleNamespace(session=DummySession())
|
||||||
monkeypatch.setattr(we, "db", db_stub)
|
monkeypatch.setattr(we, "db", db_stub)
|
||||||
|
|
||||||
# Patch config values used for URL composition and storage type
|
# Patch config value used in this code path
|
||||||
monkeypatch.setattr(we.dify_config, "FILES_URL", "http://files.local", raising=False)
|
|
||||||
monkeypatch.setattr(we.dify_config, "STORAGE_TYPE", "local", raising=False)
|
monkeypatch.setattr(we.dify_config, "STORAGE_TYPE", "local", raising=False)
|
||||||
|
|
||||||
# Patch UploadFile to avoid real DB models
|
# Patch UploadFile to avoid real DB models
|
||||||
@@ -164,7 +163,7 @@ def test_extract_images_from_docx(monkeypatch):
|
|||||||
|
|
||||||
# Returned map should contain entries for external (keyed by rId) and internal (keyed by target_part)
|
# Returned map should contain entries for external (keyed by rId) and internal (keyed by target_part)
|
||||||
assert set(image_map.keys()) == {"rId1", internal_part}
|
assert set(image_map.keys()) == {"rId1", internal_part}
|
||||||
assert all(v.startswith(" and v.endswith("/file-preview)") for v in image_map.values())
|
assert all(v.startswith(" and v.endswith("/file-preview)") for v in image_map.values())
|
||||||
|
|
||||||
# Storage should receive both payloads
|
# Storage should receive both payloads
|
||||||
payloads = {data for _, data in saves}
|
payloads = {data for _, data in saves}
|
||||||
@@ -176,39 +175,6 @@ def test_extract_images_from_docx(monkeypatch):
|
|||||||
assert db_stub.session.committed is True
|
assert db_stub.session.committed is True
|
||||||
|
|
||||||
|
|
||||||
def test_extract_images_from_docx_uses_internal_files_url():
|
|
||||||
"""Test that INTERNAL_FILES_URL takes precedence over FILES_URL for plugin access."""
|
|
||||||
# Test the URL generation logic directly
|
|
||||||
from configs import dify_config
|
|
||||||
|
|
||||||
# Mock the configuration values
|
|
||||||
original_files_url = getattr(dify_config, "FILES_URL", None)
|
|
||||||
original_internal_files_url = getattr(dify_config, "INTERNAL_FILES_URL", None)
|
|
||||||
|
|
||||||
try:
|
|
||||||
# Set both URLs - INTERNAL should take precedence
|
|
||||||
dify_config.FILES_URL = "http://external.example.com"
|
|
||||||
dify_config.INTERNAL_FILES_URL = "http://internal.docker:5001"
|
|
||||||
|
|
||||||
# Test the URL generation logic (same as in word_extractor.py)
|
|
||||||
upload_file_id = "test_file_id"
|
|
||||||
|
|
||||||
# This is the pattern we fixed in the word extractor
|
|
||||||
base_url = dify_config.INTERNAL_FILES_URL or dify_config.FILES_URL
|
|
||||||
generated_url = f"{base_url}/files/{upload_file_id}/file-preview"
|
|
||||||
|
|
||||||
# Verify that INTERNAL_FILES_URL is used instead of FILES_URL
|
|
||||||
assert "http://internal.docker:5001" in generated_url, f"Expected internal URL, got: {generated_url}"
|
|
||||||
assert "http://external.example.com" not in generated_url, f"Should not use external URL, got: {generated_url}"
|
|
||||||
|
|
||||||
finally:
|
|
||||||
# Restore original values
|
|
||||||
if original_files_url is not None:
|
|
||||||
dify_config.FILES_URL = original_files_url
|
|
||||||
if original_internal_files_url is not None:
|
|
||||||
dify_config.INTERNAL_FILES_URL = original_internal_files_url
|
|
||||||
|
|
||||||
|
|
||||||
def test_extract_hyperlinks(monkeypatch):
|
def test_extract_hyperlinks(monkeypatch):
|
||||||
# Mock db and storage to avoid issues during image extraction (even if no images are present)
|
# Mock db and storage to avoid issues during image extraction (even if no images are present)
|
||||||
monkeypatch.setattr(we, "storage", SimpleNamespace(save=lambda k, d: None))
|
monkeypatch.setattr(we, "storage", SimpleNamespace(save=lambda k, d: None))
|
||||||
|
|||||||
@@ -15,6 +15,7 @@ from datetime import UTC, datetime
|
|||||||
from unittest.mock import MagicMock, patch
|
from unittest.mock import MagicMock, patch
|
||||||
from uuid import uuid4
|
from uuid import uuid4
|
||||||
|
|
||||||
|
import models.dataset as dataset_module
|
||||||
from models.dataset import (
|
from models.dataset import (
|
||||||
AppDatasetJoin,
|
AppDatasetJoin,
|
||||||
ChildChunk,
|
ChildChunk,
|
||||||
@@ -489,6 +490,15 @@ class TestDocumentModelRelationships:
|
|||||||
class TestDocumentSegmentIndexing:
|
class TestDocumentSegmentIndexing:
|
||||||
"""Test suite for DocumentSegment model indexing and operations."""
|
"""Test suite for DocumentSegment model indexing and operations."""
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _mock_scalars_result(upload_file_ids: list[str]):
|
||||||
|
class _ScalarsResult:
|
||||||
|
@staticmethod
|
||||||
|
def all() -> list[str]:
|
||||||
|
return upload_file_ids
|
||||||
|
|
||||||
|
return _ScalarsResult()
|
||||||
|
|
||||||
def test_document_segment_creation_with_required_fields(self):
|
def test_document_segment_creation_with_required_fields(self):
|
||||||
"""Test creating a document segment with all required fields."""
|
"""Test creating a document segment with all required fields."""
|
||||||
# Arrange
|
# Arrange
|
||||||
@@ -547,6 +557,139 @@ class TestDocumentSegmentIndexing:
|
|||||||
assert segment.index_node_hash == index_node_hash
|
assert segment.index_node_hash == index_node_hash
|
||||||
assert segment.keywords == keywords
|
assert segment.keywords == keywords
|
||||||
|
|
||||||
|
def test_document_segment_sign_content_strips_absolute_files_host(self):
|
||||||
|
"""Test that sign_content strips scheme/host from absolute /files URLs and returns a signed relative URL."""
|
||||||
|
# Arrange
|
||||||
|
upload_file_id = "1602650a-4fe4-423c-85a2-af76c083e3c4"
|
||||||
|
segment = DocumentSegment(
|
||||||
|
tenant_id=str(uuid4()),
|
||||||
|
dataset_id=str(uuid4()),
|
||||||
|
document_id=str(uuid4()),
|
||||||
|
position=1,
|
||||||
|
content=f"",
|
||||||
|
word_count=1,
|
||||||
|
tokens=1,
|
||||||
|
created_by=str(uuid4()),
|
||||||
|
)
|
||||||
|
mock_scalars_result = self._mock_scalars_result([upload_file_id])
|
||||||
|
|
||||||
|
# Act
|
||||||
|
with (
|
||||||
|
patch.object(dataset_module.dify_config, "SECRET_KEY", "secret", create=True),
|
||||||
|
patch("models.dataset.db.session.scalars", return_value=mock_scalars_result),
|
||||||
|
patch("models.dataset.time.time", return_value=1700000000),
|
||||||
|
patch("models.dataset.os.urandom", return_value=b"\x00" * 16),
|
||||||
|
):
|
||||||
|
signed = segment.get_sign_content()
|
||||||
|
|
||||||
|
# Assert
|
||||||
|
assert "internal.docker:5001" not in signed
|
||||||
|
assert f"/files/{upload_file_id}/file-preview?timestamp=" in signed
|
||||||
|
assert "&nonce=" in signed
|
||||||
|
assert "&sign=" in signed
|
||||||
|
|
||||||
|
def test_document_segment_sign_content_strips_absolute_files_host_for_image_preview(self):
|
||||||
|
"""Test that sign_content strips scheme/host from absolute image-preview URLs."""
|
||||||
|
# Arrange
|
||||||
|
upload_file_id = "e2a4f7b1-1234-5678-9abc-def012345678"
|
||||||
|
segment = DocumentSegment(
|
||||||
|
tenant_id=str(uuid4()),
|
||||||
|
dataset_id=str(uuid4()),
|
||||||
|
document_id=str(uuid4()),
|
||||||
|
position=1,
|
||||||
|
content=f"",
|
||||||
|
word_count=1,
|
||||||
|
tokens=1,
|
||||||
|
created_by=str(uuid4()),
|
||||||
|
)
|
||||||
|
mock_scalars_result = self._mock_scalars_result([upload_file_id])
|
||||||
|
|
||||||
|
# Act
|
||||||
|
with (
|
||||||
|
patch.object(dataset_module.dify_config, "SECRET_KEY", "secret", create=True),
|
||||||
|
patch("models.dataset.db.session.scalars", return_value=mock_scalars_result),
|
||||||
|
patch("models.dataset.time.time", return_value=1700000000),
|
||||||
|
patch("models.dataset.os.urandom", return_value=b"\x00" * 16),
|
||||||
|
):
|
||||||
|
signed = segment.get_sign_content()
|
||||||
|
|
||||||
|
# Assert
|
||||||
|
assert "internal.docker:5001" not in signed
|
||||||
|
assert f"/files/{upload_file_id}/image-preview?timestamp=" in signed
|
||||||
|
assert "&nonce=" in signed
|
||||||
|
assert "&sign=" in signed
|
||||||
|
|
||||||
|
def test_document_segment_sign_content_skips_upload_files_outside_tenant(self):
|
||||||
|
"""Test that sign_content only signs upload files belonging to the segment tenant."""
|
||||||
|
# Arrange
|
||||||
|
allowed_upload_file_id = "1602650a-4fe4-423c-85a2-af76c083e3c4"
|
||||||
|
denied_upload_file_id = "f8f35fca-568f-4626-adf0-4f30de96aa32"
|
||||||
|
segment = DocumentSegment(
|
||||||
|
tenant_id=str(uuid4()),
|
||||||
|
dataset_id=str(uuid4()),
|
||||||
|
document_id=str(uuid4()),
|
||||||
|
position=1,
|
||||||
|
content=(
|
||||||
|
f"allowed:  "
|
||||||
|
f"denied: "
|
||||||
|
),
|
||||||
|
word_count=1,
|
||||||
|
tokens=1,
|
||||||
|
created_by=str(uuid4()),
|
||||||
|
)
|
||||||
|
mock_scalars_result = self._mock_scalars_result([allowed_upload_file_id])
|
||||||
|
|
||||||
|
# Act
|
||||||
|
with (
|
||||||
|
patch.object(dataset_module.dify_config, "SECRET_KEY", "secret", create=True),
|
||||||
|
patch("models.dataset.db.session.scalars", return_value=mock_scalars_result),
|
||||||
|
patch("models.dataset.time.time", return_value=1700000000),
|
||||||
|
patch("models.dataset.os.urandom", return_value=b"\x00" * 16),
|
||||||
|
):
|
||||||
|
signed = segment.get_sign_content()
|
||||||
|
|
||||||
|
# Assert
|
||||||
|
assert f"/files/{allowed_upload_file_id}/file-preview?timestamp=" in signed
|
||||||
|
assert f"/files/{denied_upload_file_id}/file-preview?timestamp=" not in signed
|
||||||
|
assert f"/files/{denied_upload_file_id}/file-preview)" in signed
|
||||||
|
|
||||||
|
def test_document_segment_sign_content_handles_mixed_preview_order(self):
|
||||||
|
"""Test that sign_content preserves content when file-preview appears before image-preview."""
|
||||||
|
# Arrange
|
||||||
|
file_preview_id = "1602650a-4fe4-423c-85a2-af76c083e3c4"
|
||||||
|
image_preview_id = "e2a4f7b1-1234-5678-9abc-def012345678"
|
||||||
|
segment = DocumentSegment(
|
||||||
|
tenant_id=str(uuid4()),
|
||||||
|
dataset_id=str(uuid4()),
|
||||||
|
document_id=str(uuid4()),
|
||||||
|
position=1,
|
||||||
|
content=(
|
||||||
|
f"file-first:  "
|
||||||
|
f"then-image: "
|
||||||
|
),
|
||||||
|
word_count=1,
|
||||||
|
tokens=1,
|
||||||
|
created_by=str(uuid4()),
|
||||||
|
)
|
||||||
|
mock_scalars_result = self._mock_scalars_result([file_preview_id, image_preview_id])
|
||||||
|
|
||||||
|
# Act
|
||||||
|
with (
|
||||||
|
patch.object(dataset_module.dify_config, "SECRET_KEY", "secret", create=True),
|
||||||
|
patch("models.dataset.db.session.scalars", return_value=mock_scalars_result),
|
||||||
|
patch("models.dataset.time.time", return_value=1700000000),
|
||||||
|
patch("models.dataset.os.urandom", return_value=b"\x00" * 16),
|
||||||
|
):
|
||||||
|
signed = segment.get_sign_content()
|
||||||
|
|
||||||
|
# Assert
|
||||||
|
file_signed = f"/files/{file_preview_id}/file-preview?timestamp="
|
||||||
|
image_signed = f"/files/{image_preview_id}/image-preview?timestamp="
|
||||||
|
assert file_signed in signed
|
||||||
|
assert image_signed in signed
|
||||||
|
assert signed.index(file_signed) < signed.index(image_signed)
|
||||||
|
assert signed.count("&sign=") == 2
|
||||||
|
|
||||||
def test_document_segment_with_answer_field(self):
|
def test_document_segment_with_answer_field(self):
|
||||||
"""Test creating a document segment with answer field for QA model."""
|
"""Test creating a document segment with answer field for QA model."""
|
||||||
# Arrange
|
# Arrange
|
||||||
|
|||||||
Reference in New Issue
Block a user