feat(media): add media type hint support for file extension detection, fixes #795 and fixes #751

- Add optional `media_type_hint` parameter to `_get_file_extension_from_content_or_headers` method
- When `media_type_hint` is "video" and no extension can be determined from content/headers/URL, default to `.mp4`
- Pass image metadata type as hint in both `process_example_images` and `process_example_images_batch` methods
- Add unit tests to verify media type hint behavior and priority
This commit is contained in:
Will Miao
2026-01-31 19:39:37 +08:00
parent 92d471daf5
commit 719e18adb6
2 changed files with 40 additions and 4 deletions

View File

@@ -75,6 +75,31 @@ def test_get_file_extension_defaults_to_jpg() -> None:
assert ext == ".jpg"
def test_get_file_extension_from_media_type_hint_video() -> None:
"""Test that media_type_hint='video' returns .mp4 when other methods fail"""
ext = processor_module.ExampleImagesProcessor._get_file_extension_from_content_or_headers(
b"", {}, "https://c.genur.art/536be3c9-e506-4365-b078-bfbc5df9ceec", "video"
)
assert ext == ".mp4"
def test_get_file_extension_from_media_type_hint_image() -> None:
"""Test that media_type_hint='image' falls back to .jpg"""
ext = processor_module.ExampleImagesProcessor._get_file_extension_from_content_or_headers(
b"", {}, "https://example.com/no-extension", "image"
)
assert ext == ".jpg"
def test_get_file_extension_media_type_hint_low_priority() -> None:
"""Test that media_type_hint is only used as last resort (after URL extension)"""
# URL has extension, should use that instead of media_type_hint
ext = processor_module.ExampleImagesProcessor._get_file_extension_from_content_or_headers(
b"", {}, "https://example.com/video.mp4", "image"
)
assert ext == ".mp4"
class StubScanner:
def __init__(self, models: list[Dict[str, Any]]) -> None:
self._cache = SimpleNamespace(raw_data=models)