diff --git a/py/utils/exif_utils.py b/py/utils/exif_utils.py index 08e0cd14..5fc62d4c 100644 --- a/py/utils/exif_utils.py +++ b/py/utils/exif_utils.py @@ -43,7 +43,7 @@ class ExifUtils: struct.unpack('>I', data[:4])[0] == 12 and data[4:8] == b'JXL ' and data[8:12] == bytes([0x0d, 0x0a, 0x87, 0x0a]) - and struct.unpack('>I', data[12:16])[0] == 20 + and struct.unpack('>I', data[12:16])[0] >= 16 and data[16:20] == b'ftyp' and data[20:24] == b'jxl ' ) @@ -57,6 +57,9 @@ class ExifUtils: return True return False + # Max decompressed size for brotli metadata (2 MB) + _BROTLI_MAX_DECOMPRESSED = 2 * 1024 * 1024 + @staticmethod def _extract_isobmff_brotli(image_path: str) -> Optional[dict]: try: @@ -88,6 +91,13 @@ class ExifUtils: if _BROTLI_AVAILABLE: try: decompressed = brotli.decompress(compressed) + if len(decompressed) > ExifUtils._BROTLI_MAX_DECOMPRESSED: + logger.warning( + "Brotli metadata too large (%d bytes, max %d), ignoring", + len(decompressed), + ExifUtils._BROTLI_MAX_DECOMPRESSED, + ) + decompressed = None except Exception: decompressed = None else: diff --git a/requirements.txt b/requirements.txt index af2ea748..37b7cea7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -14,4 +14,4 @@ beautifulsoup4 platformdirs pyyaml # brotli — ISOBMFF (AVIF/JXL) metadata decompression -brotli>=1.2.0 \ No newline at end of file +brotli>=1.2.0 diff --git a/tests/utils/test_exif_utils.py b/tests/utils/test_exif_utils.py index 5d22f521..eee2ca43 100644 --- a/tests/utils/test_exif_utils.py +++ b/tests/utils/test_exif_utils.py @@ -141,3 +141,150 @@ def test_update_image_metadata_preserves_png_workflow(tmp_path): img.info["parameters"] == 'prompt text\nRecipe metadata: {"title":"recipe"}' ) + + +# --- ISOBMFF / brotli extraction tests --- + +import struct + +import brotli + + +def _build_jxl_with_brob(payload_json: dict) -> bytes: + """Build a minimal JXL container with a brob box containing brotli-compressed JSON.""" + # ISOBMFF box 1: JXL signature box (size=12, type='JXL ', signature) + box1 = struct.pack(">I", 12) + b"JXL " + bytes([0x0d, 0x0a, 0x87, 0x0a]) + # ISOBMFF box 2: ftyp (size=16, type='ftyp', major='jxl ', minor=0) + box2 = struct.pack(">I", 16) + b"ftyp" + b"jxl " + struct.pack(">I", 0) + # ISOBMFF box 3: brob — payload is b'comf' + brotli(json) + compressed = brotli.compress(json.dumps(payload_json).encode("utf-8")) + brob_payload = b"comf" + compressed + box3 = struct.pack(">I", 8 + len(brob_payload)) + b"brob" + brob_payload + return box1 + box2 + box3 + + +def _build_avif_with_brob(payload_json: dict) -> bytes: + """Build a minimal AVIF container with a brob box containing brotli-compressed JSON.""" + compressed = brotli.compress(json.dumps(payload_json).encode("utf-8")) + brob_payload = b"comf" + compressed + ftyp_box = struct.pack(">I", 20) + b"ftyp" + b"avif" + struct.pack(">I", 0) + b"avif" + brob_box = struct.pack(">I", 8 + len(brob_payload)) + b"brob" + brob_payload + return ftyp_box + brob_box + + +class TestIsobmffBrotliExtraction: + """Tests for ISOBMFF brotli metadata extraction in ExifUtils.""" + + def test_extract_jxl_brotli_happy_path(self, tmp_path): + """JXL container with valid brob box extracts prompt and workflow.""" + payload = {"prompt": "a cute cat", "workflow": {"nodes": [{"id": 1}]}} + data = _build_jxl_with_brob(payload) + path = tmp_path / "test.jxl" + path.write_bytes(data) + + result = ExifUtils._load_structured_metadata(str(path)) + + assert result["prompt"] == "a cute cat" + assert result["workflow"] == '{"nodes": [{"id": 1}]}' + assert result["parameters"] is None + assert result["comment"] is None + + def test_extract_avif_brotli_happy_path(self, tmp_path): + """AVIF container with valid brob box extracts prompt and workflow.""" + payload = {"prompt": "landscape", "workflow": {"nodes": []}} + data = _build_avif_with_brob(payload) + path = tmp_path / "test.avif" + path.write_bytes(data) + + result = ExifUtils._load_structured_metadata(str(path)) + + assert result["prompt"] == "landscape" + assert result["workflow"] == '{"nodes": []}' + + def test_extract_no_brob_box_returns_none(self, tmp_path): + """JXL container without a brob box returns None from _extract_isobmff_brotli.""" + # Only JXL signature + ftyp, no brob + box1 = struct.pack(">I", 12) + b"JXL " + bytes([0x0d, 0x0a, 0x87, 0x0a]) + box2 = struct.pack(">I", 16) + b"ftyp" + b"jxl " + struct.pack(">I", 0) + path = tmp_path / "test.jxl" + path.write_bytes(box1 + box2) + + # The low-level extraction should return None (no brob box) + result = ExifUtils._extract_isobmff_brotli(str(path)) + assert result is None + + def test_extract_corrupt_brob_returns_none(self, tmp_path): + """Broken brob box payload gracefully returns None.""" + box1 = struct.pack(">I", 12) + b"JXL " + bytes([0x0d, 0x0a, 0x87, 0x0a]) + box2 = struct.pack(">I", 16) + b"ftyp" + b"jxl " + struct.pack(">I", 0) + # brob with garbage payload that doesn't start with b'comf' + garbage = b"\xff\xff\xff\xff" * 32 + box3 = struct.pack(">I", 8 + len(garbage)) + b"brob" + garbage + path = tmp_path / "test.jxl" + path.write_bytes(box1 + box2 + box3) + + result = ExifUtils._extract_isobmff_brotli(str(path)) + assert result is None + + def test_extract_non_isobmff_file_falls_through(self, tmp_path): + """A regular PNG file is not processed as ISOBMFF and returns PIL metadata.""" + png_info = PngImagePlugin.PngInfo() + png_info.add_text("prompt", "from png") + path = tmp_path / "test.png" + Image.new("RGB", (4, 4), color="red").save(path, pnginfo=png_info) + + result = ExifUtils._load_structured_metadata(str(path)) + assert result["prompt"] == "from png" + + def test_extract_skip_on_update_and_optimize(self, tmp_path): + """AVIF/JXL files are skipped for write operations (update/append/optimize).""" + path = tmp_path / "test.avif" + path.write_bytes(b"fake avif data") + + # update_image_metadata should return the path unchanged + result = ExifUtils.update_image_metadata(str(path), "some metadata") + assert result == str(path) + + # append_recipe_metadata should also skip + result = ExifUtils.append_recipe_metadata(str(path), {"title": "test"}) + assert result == str(path) + + # optimize_image should passthrough for AVIF/JXL paths + result_data, ext = ExifUtils.optimize_image(str(path)) + assert ext == ".avif" + assert result_data == b"fake avif data" + + def test_extract_prompt_as_dict(self, tmp_path): + """prompt field as dict is JSON-serialized.""" + payload = {"prompt": {"text": "hello", "negative": "bad"}} + data = _build_jxl_with_brob(payload) + path = tmp_path / "test.jxl" + path.write_bytes(data) + + result = ExifUtils._load_structured_metadata(str(path)) + assert json.loads(result["prompt"]) == {"text": "hello", "negative": "bad"} + + def test_extract_workflow_as_list(self, tmp_path): + """workflow field as list is JSON-serialized.""" + payload = {"workflow": [{"id": 1}, {"id": 2}]} + data = _build_avif_with_brob(payload) + path = tmp_path / "test.avif" + path.write_bytes(data) + + result = ExifUtils._load_structured_metadata(str(path)) + assert json.loads(result["workflow"]) == [{"id": 1}, {"id": 2}] + + def test_over_decompressed_size_limit(self, tmp_path, monkeypatch): + """Decompressed data exceeding _BROTLI_MAX_DECOMPRESSED is rejected.""" + # Monkey-patch the limit to a small value to avoid large test data + monkeypatch.setattr(ExifUtils, "_BROTLI_MAX_DECOMPRESSED", 100) + + large_content = "x" * 200 + payload = {"prompt": large_content} + data = _build_jxl_with_brob(payload) + path = tmp_path / "test.jxl" + path.write_bytes(data) + + # Direct extraction should return None because decompressed size exceeds limit + result = ExifUtils._extract_isobmff_brotli(str(path)) + assert result is None