fix: remote filename will be 'inline' if Content-Disposition: inline (#25877)

Fixed the issue that filename will be 'inline' if response header contains `Content-Disposition: inline` while retrieving file by url.

Co-authored-by: crazywoola <100913391+crazywoola@users.noreply.github.com>
Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
This commit is contained in:
リイノ Lin
2025-09-25 10:39:44 +08:00
committed by GitHub
parent e937c8c72e
commit 6841a09667
2 changed files with 147 additions and 9 deletions

View File

@@ -8,6 +8,7 @@ from typing import Any
import httpx
from sqlalchemy import select
from sqlalchemy.orm import Session
from werkzeug.http import parse_options_header
from constants import AUDIO_EXTENSIONS, DOCUMENT_EXTENSIONS, IMAGE_EXTENSIONS, VIDEO_EXTENSIONS
from core.file import File, FileBelongsTo, FileTransferMethod, FileType, FileUploadConfig, helpers
@@ -247,6 +248,25 @@ def _build_from_remote_url(
)
def _extract_filename(url_path: str, content_disposition: str | None) -> str | None:
filename = None
# Try to extract from Content-Disposition header first
if content_disposition:
_, params = parse_options_header(content_disposition)
# RFC 5987 https://datatracker.ietf.org/doc/html/rfc5987: filename* takes precedence over filename
filename = params.get("filename*") or params.get("filename")
# Fallback to URL path if no filename from header
if not filename:
filename = os.path.basename(url_path)
return filename or None
def _guess_mime_type(filename: str) -> str:
"""Guess MIME type from filename, returning empty string if None."""
guessed_mime, _ = mimetypes.guess_type(filename)
return guessed_mime or ""
def _get_remote_file_info(url: str):
file_size = -1
parsed_url = urllib.parse.urlparse(url)
@@ -254,23 +274,26 @@ def _get_remote_file_info(url: str):
filename = os.path.basename(url_path)
# Initialize mime_type from filename as fallback
mime_type, _ = mimetypes.guess_type(filename)
if mime_type is None:
mime_type = ""
mime_type = _guess_mime_type(filename)
resp = ssrf_proxy.head(url, follow_redirects=True)
if resp.status_code == httpx.codes.OK:
if content_disposition := resp.headers.get("Content-Disposition"):
filename = str(content_disposition.split("filename=")[-1].strip('"'))
# Re-guess mime_type from updated filename
mime_type, _ = mimetypes.guess_type(filename)
if mime_type is None:
mime_type = ""
content_disposition = resp.headers.get("Content-Disposition")
extracted_filename = _extract_filename(url_path, content_disposition)
if extracted_filename:
filename = extracted_filename
mime_type = _guess_mime_type(filename)
file_size = int(resp.headers.get("Content-Length", file_size))
# Fallback to Content-Type header if mime_type is still empty
if not mime_type:
mime_type = resp.headers.get("Content-Type", "").split(";")[0].strip()
if not filename:
extension = mimetypes.guess_extension(mime_type) or ".bin"
filename = f"{uuid.uuid4().hex}{extension}"
if not mime_type:
mime_type = _guess_mime_type(filename)
return mime_type, filename, file_size