fix: skip confirmed not-found models in bulk metadata refresh

When enable_metadata_archive_db=True, the previous filter logic would
repeatedly try to fetch metadata for models that were already confirmed
to not exist on CivitAI (from_civitai=False, civitai_deleted=True).

The fix adds a skip condition to exclude models that:
1. Are confirmed not from CivitAI (from_civitai=False)
2. Are marked as deleted/not found on CivitAI (civitai_deleted=True)
3. Either have no archive DB enabled, or have already been checked (db_checked=True)

This prevents unnecessary API calls to CivArchive for user-trained models
or models from non-CivitAI sources.

Fixes repeated "Error fetching version of CivArchive model by hash" logs
for models that will never be found on CivitAI/CivArchive.
This commit is contained in:
Will Miao
2026-02-02 13:27:18 +08:00
parent 1da476d858
commit 94da404cc5
2 changed files with 150 additions and 3 deletions

View File

@@ -48,9 +48,14 @@ class BulkMetadataRefreshUseCase:
for model in cache.raw_data
if model.get("sha256")
and (not model.get("civitai") or not model["civitai"].get("id"))
and (
(enable_metadata_archive_db and not model.get("db_checked", False))
or (not enable_metadata_archive_db and model.get("from_civitai") is True)
and not (
# Skip models confirmed not on CivitAI when no need to retry
model.get("from_civitai") is False
and model.get("civitai_deleted") is True
and (
not enable_metadata_archive_db
or model.get("db_checked", False)
)
)
]

View File

@@ -242,6 +242,148 @@ async def test_bulk_metadata_refresh_reports_errors() -> None:
assert progress.events[-1]["error"] == "boom"
async def test_bulk_metadata_refresh_skips_confirmed_not_found_models(
monkeypatch: pytest.MonkeyPatch,
) -> None:
"""Models marked as from_civitai=False and civitai_deleted=True should be skipped."""
scanner = MockScanner()
scanner._cache.raw_data = [
{
"file_path": "model1.safetensors",
"sha256": "hash1",
"from_civitai": False,
"civitai_deleted": True,
"model_name": "NotOnCivitAI",
},
{
"file_path": "model2.safetensors",
"sha256": "hash2",
"from_civitai": True,
"model_name": "OnCivitAI",
},
]
service = MockModelService(scanner)
metadata_sync = StubMetadataSync()
settings = StubSettings(enable_metadata_archive_db=False)
progress = ProgressCollector()
async def fake_hydrate(model_data: Dict[str, Any]) -> Dict[str, Any]:
# Preserve the original data (simulating no metadata file on disk)
return model_data
monkeypatch.setattr(MetadataManager, "hydrate_model_data", staticmethod(fake_hydrate))
use_case = BulkMetadataRefreshUseCase(
service=service,
metadata_sync=metadata_sync,
settings_service=settings,
logger=logging.getLogger("test"),
)
result = await use_case.execute_with_error_handling(progress_callback=progress)
assert result["success"] is True
# Only model2 should be processed (model1 is skipped)
assert result["processed"] == 1
assert result["updated"] == 1
assert len(metadata_sync.calls) == 1
assert metadata_sync.calls[0]["file_path"] == "model2.safetensors"
async def test_bulk_metadata_refresh_skips_when_archive_checked(
monkeypatch: pytest.MonkeyPatch,
) -> None:
"""Models with db_checked=True should be skipped even if archive DB is enabled."""
scanner = MockScanner()
scanner._cache.raw_data = [
{
"file_path": "model1.safetensors",
"sha256": "hash1",
"from_civitai": False,
"civitai_deleted": True,
"db_checked": True,
"model_name": "ArchiveChecked",
},
{
"file_path": "model2.safetensors",
"sha256": "hash2",
"from_civitai": False,
"civitai_deleted": True,
"db_checked": False,
"model_name": "ArchiveNotChecked",
},
]
service = MockModelService(scanner)
metadata_sync = StubMetadataSync()
settings = StubSettings(enable_metadata_archive_db=True)
progress = ProgressCollector()
async def fake_hydrate(model_data: Dict[str, Any]) -> Dict[str, Any]:
return model_data
monkeypatch.setattr(MetadataManager, "hydrate_model_data", staticmethod(fake_hydrate))
use_case = BulkMetadataRefreshUseCase(
service=service,
metadata_sync=metadata_sync,
settings_service=settings,
logger=logging.getLogger("test"),
)
result = await use_case.execute_with_error_handling(progress_callback=progress)
assert result["success"] is True
# Only model2 should be processed (model1 has db_checked=True)
assert result["processed"] == 1
assert result["updated"] == 1
assert len(metadata_sync.calls) == 1
assert metadata_sync.calls[0]["file_path"] == "model2.safetensors"
async def test_bulk_metadata_refresh_processes_never_fetched_models(
monkeypatch: pytest.MonkeyPatch,
) -> None:
"""Models that have never been fetched (from_civitai=None) should be processed."""
scanner = MockScanner()
scanner._cache.raw_data = [
{
"file_path": "model1.safetensors",
"sha256": "hash1",
"from_civitai": None,
"model_name": "NeverFetched",
},
{
"file_path": "model2.safetensors",
"sha256": "hash2",
"model_name": "NoFromCivitaiField",
},
]
service = MockModelService(scanner)
metadata_sync = StubMetadataSync()
settings = StubSettings(enable_metadata_archive_db=False)
progress = ProgressCollector()
async def fake_hydrate(model_data: Dict[str, Any]) -> Dict[str, Any]:
return model_data
monkeypatch.setattr(MetadataManager, "hydrate_model_data", staticmethod(fake_hydrate))
use_case = BulkMetadataRefreshUseCase(
service=service,
metadata_sync=metadata_sync,
settings_service=settings,
logger=logging.getLogger("test"),
)
result = await use_case.execute_with_error_handling(progress_callback=progress)
assert result["success"] is True
# Both models should be processed
assert result["processed"] == 2
assert result["updated"] == 2
assert len(metadata_sync.calls) == 2
async def test_download_model_use_case_raises_validation_error() -> None:
coordinator = StubDownloadCoordinator(error="validation")
use_case = DownloadModelUseCase(download_coordinator=coordinator)