feat(metadata): implement model data hydration and enhance metadata handling across services, fixes #547

This commit is contained in:
Will Miao
2025-10-09 22:15:07 +08:00
parent f542ade628
commit c9a65c7347
9 changed files with 443 additions and 37 deletions

View File

@@ -1,6 +1,8 @@
from __future__ import annotations
import json
import os
from pathlib import Path
from types import SimpleNamespace
from typing import Any, Dict, List, Tuple
@@ -30,7 +32,23 @@ def patch_metadata_manager(monkeypatch: pytest.MonkeyPatch):
saved.append((path, metadata.copy()))
return True
class SimpleMetadata:
def __init__(self, payload: Dict[str, Any]) -> None:
self._payload = payload
self._unknown_fields: Dict[str, Any] = {}
def to_dict(self) -> Dict[str, Any]:
return self._payload.copy()
async def fake_load(path: str, *_args: Any, **_kwargs: Any):
metadata_path = path if path.endswith(".metadata.json") else f"{os.path.splitext(path)[0]}.metadata.json"
if os.path.exists(metadata_path):
data = json.loads(Path(metadata_path).read_text(encoding="utf-8"))
return SimpleMetadata(data), False
return None, False
monkeypatch.setattr(metadata_module.MetadataManager, "save_metadata", staticmethod(fake_save))
monkeypatch.setattr(metadata_module.MetadataManager, "load_metadata", staticmethod(fake_load))
return saved
@@ -64,10 +82,80 @@ async def test_update_metadata_after_import_enriches_entries(monkeypatch: pytest
assert custom[0]["hasMeta"] is True
assert custom[0]["type"] == "image"
assert patch_metadata_manager[0][0] == str(model_file)
assert Path(patch_metadata_manager[0][0]) == model_file
assert scanner.updates
@pytest.mark.asyncio
async def test_update_metadata_after_import_preserves_existing_metadata(
monkeypatch: pytest.MonkeyPatch,
tmp_path,
patch_metadata_manager,
):
model_hash = "b" * 64
model_file = tmp_path / "preserve.safetensors"
model_file.write_text("content", encoding="utf-8")
metadata_path = tmp_path / "preserve.metadata.json"
existing_payload = {
"model_name": "Example",
"file_path": str(model_file),
"civitai": {
"id": 42,
"modelId": 88,
"name": "Example",
"trainedWords": ["foo"],
"images": [{"url": "https://example.com/default.png", "type": "image"}],
"customImages": [
{"id": "existing-id", "type": "image", "url": "", "nsfwLevel": 0}
],
},
"extraField": "keep-me",
}
metadata_path.write_text(json.dumps(existing_payload), encoding="utf-8")
model_data = {
"sha256": model_hash,
"model_name": "Example",
"file_path": str(model_file),
"civitai": {
"id": 42,
"modelId": 88,
"name": "Example",
"trainedWords": ["foo"],
"customImages": [],
},
}
scanner = StubScanner([model_data])
image_path = tmp_path / "new.png"
image_path.write_bytes(b"fakepng")
monkeypatch.setattr(metadata_module.ExifUtils, "extract_image_metadata", staticmethod(lambda _path: None))
monkeypatch.setattr(metadata_module.MetadataUpdater, "_parse_image_metadata", staticmethod(lambda payload: None))
regular, custom = await metadata_module.MetadataUpdater.update_metadata_after_import(
model_hash,
model_data,
scanner,
[(str(image_path), "new-id")],
)
assert regular == existing_payload["civitai"]["images"]
assert any(entry["id"] == "new-id" for entry in custom)
saved_path, saved_payload = patch_metadata_manager[-1]
assert Path(saved_path) == model_file
assert saved_payload["extraField"] == "keep-me"
assert saved_payload["civitai"]["images"] == existing_payload["civitai"]["images"]
assert saved_payload["civitai"]["trainedWords"] == ["foo"]
assert {entry["id"] for entry in saved_payload["civitai"]["customImages"]} == {"existing-id", "new-id"}
assert scanner.updates
updated_metadata = scanner.updates[-1][2]
assert updated_metadata["civitai"]["images"] == existing_payload["civitai"]["images"]
assert {entry["id"] for entry in updated_metadata["civitai"]["customImages"]} == {"existing-id", "new-id"}
async def test_refresh_model_metadata_records_failures(monkeypatch: pytest.MonkeyPatch, tmp_path):
model_hash = "b" * 64
model_file = tmp_path / "model.safetensors"
@@ -79,6 +167,16 @@ async def test_refresh_model_metadata_records_failures(monkeypatch: pytest.Monke
async def fetch_and_update_model(self, **_kwargs):
return True, None
async def fake_hydrate(model_data: Dict[str, Any]) -> Dict[str, Any]:
model_data["hydrated"] = True
return model_data
monkeypatch.setattr(
metadata_module.MetadataManager,
"hydrate_model_data",
staticmethod(fake_hydrate),
)
monkeypatch.setattr(metadata_module, "_metadata_sync_service", StubMetadataSync())
result = await metadata_module.MetadataUpdater.refresh_model_metadata(
@@ -89,6 +187,7 @@ async def test_refresh_model_metadata_records_failures(monkeypatch: pytest.Monke
{"refreshed_models": set(), "errors": [], "last_error": None},
)
assert result is True
assert cache_item["hydrated"] is True
async def test_update_metadata_from_local_examples_generates_entries(monkeypatch: pytest.MonkeyPatch, tmp_path):
@@ -112,4 +211,4 @@ async def test_update_metadata_from_local_examples_generates_entries(monkeypatch
str(model_dir),
)
assert success is True
assert model_data["civitai"]["images"]
assert model_data["civitai"]["images"]

View File

@@ -1,5 +1,6 @@
from __future__ import annotations
import json
import os
from pathlib import Path
from types import SimpleNamespace
@@ -8,7 +9,9 @@ from typing import Any, Dict, Tuple
import pytest
from py.services.settings_manager import get_settings_manager
from py.utils import example_images_metadata as metadata_module
from py.utils import example_images_processor as processor_module
from py.utils.example_images_paths import get_model_folder
@pytest.fixture(autouse=True)
@@ -22,6 +25,27 @@ def restore_settings() -> None:
manager.settings.update(original)
@pytest.fixture(autouse=True)
def patch_metadata_loader(monkeypatch: pytest.MonkeyPatch) -> None:
class SimpleMetadata:
def __init__(self, payload: Dict[str, Any]) -> None:
self._payload = payload
self._unknown_fields: Dict[str, Any] = {}
def to_dict(self) -> Dict[str, Any]:
return self._payload.copy()
async def fake_load(path: str, *_args: Any, **_kwargs: Any):
metadata_path = path if path.endswith(".metadata.json") else f"{os.path.splitext(path)[0]}.metadata.json"
if os.path.exists(metadata_path):
data = json.loads(Path(metadata_path).read_text(encoding="utf-8"))
return SimpleMetadata(data), False
return None, False
monkeypatch.setattr(processor_module.MetadataManager, "load_metadata", staticmethod(fake_load))
monkeypatch.setattr(metadata_module.MetadataManager, "load_metadata", staticmethod(fake_load))
def test_get_file_extension_from_magic_bytes() -> None:
jpg_bytes = b"\xff\xd8\xff" + b"rest"
ext = processor_module.ExampleImagesProcessor._get_file_extension_from_content_or_headers(
@@ -146,3 +170,88 @@ async def test_import_images_raises_when_model_not_found(monkeypatch: pytest.Mon
with pytest.raises(processor_module.ExampleImagesImportError):
await processor_module.ExampleImagesProcessor.import_images("a" * 64, [str(tmp_path / "missing.png")])
@pytest.mark.asyncio
async def test_delete_custom_image_preserves_existing_metadata(monkeypatch: pytest.MonkeyPatch, tmp_path) -> None:
settings_manager = get_settings_manager()
settings_manager.settings["example_images_path"] = str(tmp_path / "examples")
model_hash = "c" * 64
model_file = tmp_path / "keep.safetensors"
model_file.write_text("content", encoding="utf-8")
metadata_path = tmp_path / "keep.metadata.json"
existing_metadata = {
"model_name": "Keep",
"file_path": str(model_file),
"civitai": {
"images": [{"url": "https://example.com/default.png", "type": "image"}],
"customImages": [{"id": "existing-id", "url": "", "type": "image"}],
"trainedWords": ["foo"],
},
}
metadata_path.write_text(json.dumps(existing_metadata), encoding="utf-8")
model_data = {
"sha256": model_hash,
"model_name": "Keep",
"file_path": str(model_file),
"civitai": {
"customImages": [{"id": "existing-id", "url": "", "type": "image"}],
"trainedWords": ["foo"],
},
}
class Scanner(StubScanner):
def has_hash(self, hash_value: str) -> bool:
return hash_value == model_hash
scanner = Scanner([model_data])
async def _return_scanner(cls=None):
return scanner
monkeypatch.setattr(processor_module.ServiceRegistry, "get_lora_scanner", classmethod(_return_scanner))
monkeypatch.setattr(processor_module.ServiceRegistry, "get_checkpoint_scanner", classmethod(_return_scanner))
monkeypatch.setattr(processor_module.ServiceRegistry, "get_embedding_scanner", classmethod(_return_scanner))
model_folder = get_model_folder(model_hash)
os.makedirs(model_folder, exist_ok=True)
(Path(model_folder) / "custom_existing-id.png").write_bytes(b"data")
saved: list[tuple[str, Dict[str, Any]]] = []
async def fake_save(path: str, payload: Dict[str, Any]) -> bool:
saved.append((path, payload.copy()))
return True
monkeypatch.setattr(processor_module.MetadataManager, "save_metadata", staticmethod(fake_save))
class StubRequest:
def __init__(self, payload: Dict[str, Any]) -> None:
self._payload = payload
async def json(self) -> Dict[str, Any]:
return self._payload
response = await processor_module.ExampleImagesProcessor.delete_custom_image(
StubRequest({"model_hash": model_hash, "short_id": "existing-id"})
)
assert response.status == 200
body = json.loads(response.text)
assert body["success"] is True
assert body["custom_images"] == []
assert not (Path(model_folder) / "custom_existing-id.png").exists()
saved_path, saved_payload = saved[-1]
assert saved_path == str(model_file)
assert saved_payload["civitai"]["images"] == existing_metadata["civitai"]["images"]
assert saved_payload["civitai"]["trainedWords"] == ["foo"]
assert saved_payload["civitai"]["customImages"] == []
assert scanner.updated
_, _, updated_metadata = scanner.updated[-1]
assert updated_metadata["civitai"]["images"] == existing_metadata["civitai"]["images"]
assert updated_metadata["civitai"]["customImages"] == []