diff --git a/py/services/metadata_sync_service.py b/py/services/metadata_sync_service.py index d4a0b811..e7d1d6d9 100644 --- a/py/services/metadata_sync_service.py +++ b/py/services/metadata_sync_service.py @@ -76,7 +76,7 @@ class MetadataSyncService: files = meta.get("files") images = meta.get("images") source = meta.get("source") - return bool(files) and bool(images) and source != "archive_db" + return bool(files) and bool(images) and source not in ("archive_db", "civarchive") async def update_model_metadata( self, @@ -90,11 +90,11 @@ class MetadataSyncService: existing_civitai = local_metadata.get("civitai") or {} if ( - civitai_metadata.get("source") == "archive_db" + not self.is_civitai_api_metadata(civitai_metadata) and self.is_civitai_api_metadata(existing_civitai) ): logger.info( - "Skip civitai update for %s (%s)", + "Skip civitai update for %s (%s) - existing metadata is higher quality", local_metadata.get("model_name", ""), existing_civitai.get("name", ""), ) diff --git a/tests/services/test_metadata_sync_service.py b/tests/services/test_metadata_sync_service.py index 5d6c7e6b..f415262f 100644 --- a/tests/services/test_metadata_sync_service.py +++ b/tests/services/test_metadata_sync_service.py @@ -481,3 +481,56 @@ async def test_relink_metadata_raises_when_version_missing(): model_id=9, model_version_id=None, ) + +@pytest.mark.asyncio +async def test_fetch_and_update_model_does_not_overwrite_api_metadata_with_archive(tmp_path): + helpers = build_service() + + # Existing high-quality metadata + existing_civitai = { + "source": "api", # will be normalized to civitai_api in some paths, but let's use what is_civitai_api_metadata expects + "files": [{"id": 1}], + "images": [{"url": "img1"}], + "name": "High Quality", + "trainedWords": ["keyword1"] + } + + # Incoming lower-quality metadata from CivArchive (simulating fallback) + civarchive_payload = { + "source": "civarchive", + "model": {"name": "Low Quality", "description": "low quality", "tags": []}, + "images": [], # Missing images + "baseModel": "sdxl", + "trainedWords": ["keyword2"] + } + helpers.default_provider.get_model_by_hash.return_value = (civarchive_payload, None) + + model_path = tmp_path / "model.safetensors" + model_data = { + "model_name": "High Quality", + "metadata_source": "civitai_api", + "civitai": existing_civitai, + "file_path": str(model_path), + } + update_cache = AsyncMock(return_value=True) + + ok, error = await helpers.service.fetch_and_update_model( + sha256="abc", + file_path=str(model_path), + model_data=model_data, + update_cache_func=update_cache, + ) + + assert ok and error is None + # Ensure the civitai block still contains the high-quality data + assert model_data["civitai"]["name"] == "High Quality" + assert "keyword1" in model_data["civitai"]["trainedWords"] + # Source might be updated in model_data root, but the block should be protected if logic works + assert model_data["metadata_source"] == "civarchive" + + # Check that trained words were merged if any (though in this case we might skip the whole update) + # Actually, according to the new logic, the update is SKIPPED entirely for the civitai block + assert model_data["civitai"]["trainedWords"] == ["keyword1"] + + helpers.metadata_manager.save_metadata.assert_awaited() + update_cache.assert_awaited()