Compare commits

..

3 Commits

Author SHA1 Message Date
Will Miao
34791c2ad7 fix(recipe): use resources type field to identify checkpoint instead of modelVersionIds[0]
When importing a CivitAI image as a recipe, modelVersionIds[0] was blindly used as the checkpoint version ID. This array mixes checkpoints and LoRAs without ordering guarantees, causing LoRAs to be saved as the recipe checkpoint.

Fix by:
1. Removing the modelVersionIds[0] fallback in _download_remote_media
2. Parsing resources entries with type:"model" as the checkpoint
3. Adding model type validation in populate_checkpoint_from_civitai

Also add 2 tests for the new behavior and fix 3 tests whose mocks lacked the required model.type field.
2026-05-28 15:46:38 +08:00
Will Miao
3f6824eef6 fix(example-images): exclude failed_models from check_pending_models pending count
Previously check_pending_models() only skipped models already in
processed_models, so models that had permanently failed (no CivitAI
images available, download errors) were forever reported as "pending".
This caused repeated auto-download cycles with no actual work to do.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-28 12:00:25 +08:00
Will Miao
3919dfa3f4 fix(metadata): suppress rate-limit propagation when model already confirmed deleted
When CivitAI returns 404 (ResourceNotFoundError) and a fallback provider
like CivArchive subsequently rate-limits, the ChainedMetadataProvider
now suppresses the RateLimitError instead of propagating it. Previously,
the rate-limit error would bubble up through _refresh_single_model and
cause the outer retry loop to re-process the same model repeatedly,
producing dozens of duplicate "Model X is no longer available" log
messages and wasting API quota.

The model is NOT permanently marked as ignored — its last_checked_at
timestamp is preserved, so it will be retried on the next refresh cycle
when the rate limit has cleared and CivArchive may still have the data.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-28 11:56:22 +08:00
8 changed files with 225 additions and 16 deletions

View File

@@ -7,7 +7,7 @@ import re
from typing import Dict, List, Any, Optional, Tuple from typing import Dict, List, Any, Optional, Tuple
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from ..config import config from ..config import config
from ..utils.constants import VALID_LORA_TYPES from ..utils.constants import VALID_LORA_TYPES, VALID_CHECKPOINT_SUB_TYPES
from ..utils.civitai_utils import rewrite_preview_url from ..utils.civitai_utils import rewrite_preview_url
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@@ -173,6 +173,20 @@ class RecipeMetadataParser(ABC):
checkpoint['isDeleted'] = True checkpoint['isDeleted'] = True
return checkpoint return checkpoint
# Validate that the model type is actually a checkpoint.
# Unlike populate_lora_from_civitai which has this check,
# this function was missing type validation — allowing LoRA
# version data to be saved as the recipe's checkpoint when the
# wrong version ID was passed downstream (fixed in v2.7+).
model_type = civitai_data.get('model', {}).get('type', '').lower()
if model_type not in VALID_CHECKPOINT_SUB_TYPES:
logger.warning(
f"Cannot populate checkpoint: model version {civitai_data.get('id')} "
f"has type '{model_type}', expected one of {VALID_CHECKPOINT_SUB_TYPES}. "
f"Skipping checkpoint enrichment."
)
return checkpoint
if 'model' in civitai_data and 'name' in civitai_data['model']: if 'model' in civitai_data and 'name' in civitai_data['model']:
checkpoint['name'] = civitai_data['model']['name'] checkpoint['name'] = civitai_data['model']['name']

View File

@@ -185,8 +185,67 @@ class CivitaiApiMetadataParser(RecipeMetadataParser):
# Process standard resources array # Process standard resources array
if "resources" in metadata and isinstance(metadata["resources"], list): if "resources" in metadata and isinstance(metadata["resources"], list):
for resource in metadata["resources"]: for resource in metadata["resources"]:
resource_type = resource.get("type", "lora")
# Track resources with type "model" — these are checkpoint models.
# The resources array is the most reliable source for checkpoint
# identification because it has an explicit type field and hash,
# unlike modelVersionIds which is a flat list with no type info.
if resource_type == "model":
checkpoint_entry = {
"id": 0,
"modelId": 0,
"name": resource.get("name", "Unknown Model"),
"version": "",
"type": resource.get("type", "model"),
"existsLocally": False,
"localPath": None,
"file_name": resource.get("name", ""),
"hash": resource.get("hash", "") or "",
"thumbnailUrl": "/loras_static/images/no-preview.png",
"baseModel": "",
"size": 0,
"downloadUrl": "",
"isDeleted": False,
}
# Try to look up base model from the checkpoint hash
if checkpoint_entry["hash"] and metadata_provider:
try:
civitai_info = (
await metadata_provider.get_model_by_hash(
checkpoint_entry["hash"]
)
)
civitai_data, error_msg = (
(civitai_info, None)
if not isinstance(civitai_info, tuple)
else civitai_info
)
if civitai_data and error_msg != "Model not found":
if 'model' in civitai_data and 'name' in civitai_data['model']:
checkpoint_entry['name'] = civitai_data['model']['name']
checkpoint_entry['id'] = civitai_data.get('id', 0)
checkpoint_entry['modelId'] = civitai_data.get('modelId', 0)
if 'name' in civitai_data:
checkpoint_entry['version'] = civitai_data['name']
base_model = civitai_data.get('baseModel', '')
if base_model:
checkpoint_entry['baseModel'] = base_model
if not result['base_model']:
result['base_model'] = base_model
except Exception as e:
logger.error(
f"Error fetching checkpoint info for hash "
f"{checkpoint_entry['hash']}: {e}"
)
if result["model"] is None:
result["model"] = checkpoint_entry
continue
# Modified to process resources without a type field as potential LoRAs # Modified to process resources without a type field as potential LoRAs
if resource.get("type", "lora") == "lora": if resource_type == "lora":
lora_hash = resource.get("hash", "") lora_hash = resource.get("hash", "")
# Try to get hash from the hashes field if not present in resource # Try to get hash from the hashes field if not present in resource

View File

@@ -1293,11 +1293,18 @@ class RecipeManagementHandler:
image_info.get("meta") if civitai_image_id and image_info else None image_info.get("meta") if civitai_image_id and image_info else None
) )
if civitai_image_id and image_info: if civitai_image_id and image_info:
# modelVersionId (singular) — the primary version for this
# image on CivitAI. May be absent, or may *not* be the
# checkpoint (e.g. when the image was generated with a LoRA
# as the primary subject). When absent, DO NOT fall back to
# modelVersionIds[0] — that array mixes checkpoints, LoRAs,
# and other model version IDs without ordering guarantees.
# The downstream enrichment flow will find the real
# checkpoint via meta.resources (type:"model" hash) or
# meta.civitaiResources (type:"checkpoint" version ID), so
# leaving model_ver_id as None is safe and avoids the bug
# where a LoRA version ID was treated as the checkpoint.
model_ver_id = image_info.get("modelVersionId") model_ver_id = image_info.get("modelVersionId")
if not model_ver_id:
ids = image_info.get("modelVersionIds")
if isinstance(ids, list) and ids:
model_ver_id = ids[0]
# Inject root-level modelVersionIds into meta so downstream # Inject root-level modelVersionIds into meta so downstream
# parsers (CivitaiApiMetadataParser) can discover ALL resources # parsers (CivitaiApiMetadataParser) can discover ALL resources

View File

@@ -5,7 +5,7 @@ import logging
import random import random
from typing import Optional, Dict, Tuple, Any, List, Sequence from typing import Optional, Dict, Tuple, Any, List, Sequence
from .downloader import get_downloader from .downloader import get_downloader
from .errors import RateLimitError from .errors import RateLimitError, ResourceNotFoundError
try: try:
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
@@ -482,6 +482,7 @@ class FallbackMetadataProvider(ModelMetadataProvider):
return None, "Model not found" return None, "Model not found"
async def get_model_versions(self, model_id: str) -> Optional[Dict]: async def get_model_versions(self, model_id: str) -> Optional[Dict]:
not_found_confirmed = False
for provider, label in self._iter_providers(): for provider, label in self._iter_providers():
try: try:
result = await self._call_with_rate_limit( result = await self._call_with_rate_limit(
@@ -492,8 +493,24 @@ class FallbackMetadataProvider(ModelMetadataProvider):
if result: if result:
return result return result
except RateLimitError as exc: except RateLimitError as exc:
if not_found_confirmed:
logger.debug(
"Suppressing rate limit from %s for model %s: "
"already confirmed as not found by another provider",
label,
model_id,
)
return None
exc.provider = exc.provider or label exc.provider = exc.provider or label
raise exc raise exc
except ResourceNotFoundError:
not_found_confirmed = True
logger.debug(
"Provider %s reports model %s as not found",
label,
model_id,
)
continue
except Exception as e: except Exception as e:
logger.debug("Provider %s failed for get_model_versions: %s", label, e) logger.debug("Provider %s failed for get_model_versions: %s", label, e)
continue continue

View File

@@ -397,13 +397,12 @@ class DownloadManager:
models_with_hash = len(all_models_with_hash) models_with_hash = len(all_models_with_hash)
# Calculate pending count: check which models actually need processing # Calculate pending count: check which models actually need processing.
# A model is pending if it has a hash, is not in processed_models, # A model is pending if it has a hash, is not already processed or known-failed,
# and its folder doesn't exist or is empty # and its folder doesn't exist or is empty.
pending_hashes = set() pending_hashes = set()
for model_hash, model_name in all_models_with_hash: for model_hash, model_name in all_models_with_hash:
if model_hash not in processed_models: if model_hash not in processed_models and model_hash not in failed_models:
# Check if model folder exists with files
model_dir = ExampleImagePathResolver.get_model_folder( model_dir = ExampleImagePathResolver.get_model_folder(
model_hash, active_library model_hash, active_library
) )

View File

@@ -467,7 +467,10 @@ async def test_import_remote_recipe(monkeypatch, tmp_path: Path) -> None:
class Provider: class Provider:
async def get_model_version_info(self, model_version_id): async def get_model_version_info(self, model_version_id):
provider_calls.append(model_version_id) provider_calls.append(model_version_id)
return {"baseModel": "Flux Provider"}, None return {
"baseModel": "Flux Provider",
"model": {"type": "Checkpoint", "name": "Flux"},
}, None
async def fake_get_default_metadata_provider(): async def fake_get_default_metadata_provider():
return Provider() return Provider()

View File

@@ -298,3 +298,113 @@ async def test_parse_metadata_handles_modelVersionIds(monkeypatch):
assert lora2["type"] == "lora" assert lora2["type"] == "lora"
assert lora2["hash"] == "aabbccdd0022" assert lora2["hash"] == "aabbccdd0022"
assert lora2["baseModel"] == "SDXL" assert lora2["baseModel"] == "SDXL"
@pytest.mark.asyncio
async def test_parse_metadata_extracts_checkpoint_from_resources_model_type(monkeypatch):
"""resources entries with type:"model" should be captured as the checkpoint,
not skipped (which was the old buggy behavior), and not mixed into loras."""
captured_hashes = []
async def fake_metadata_provider():
class Provider:
async def get_model_by_hash(self, model_hash):
captured_hashes.append(model_hash)
if model_hash == "a1b2c3d4e5":
return ({
"id": 999,
"modelId": 888,
"name": "v1.0",
"model": {"name": "Real Checkpoint", "type": "Checkpoint"},
"baseModel": "SDXL 1.0",
"images": [{"url": "https://image.civitai.com/cp/original=true"}],
"files": [{"type": "Model", "primary": True, "sizeKB": 1024, "name": "cp.safetensors"}]
}, None)
return None, "Model not found"
return Provider()
monkeypatch.setattr(
"py.recipes.parsers.civitai_image.get_default_metadata_provider",
fake_metadata_provider,
)
parser = CivitaiApiMetadataParser()
metadata = {
"prompt": "test",
"resources": [
{"hash": "a1b2c3d4e5", "name": "Real Checkpoint", "type": "model"},
{"hash": "f6g7h8i9j0", "name": "Some LoRA", "type": "lora", "weight": 0.8},
],
"Model hash": "a1b2c3d4e5",
}
result = await parser.parse_metadata(metadata)
# The type:"model" resource should be in result["model"], not in result["loras"]
assert result["model"] is not None, "checkpoint model should be extracted"
assert result["model"]["name"] == "Real Checkpoint"
assert result["model"]["hash"] == "a1b2c3d4e5"
assert result["model"]["type"] == "model"
# The LoRA resource should be in result["loras"]
assert len(result["loras"]) == 1
assert result["loras"][0]["name"] == "Some LoRA"
# The checkpoint hash should have triggered a lookup
assert "a1b2c3d4e5" in captured_hashes
@pytest.mark.asyncio
async def test_parse_metadata_resources_model_type_does_not_duplicate_checkpoint_in_loras(monkeypatch):
"""When a resources entry has type:"model", it should NOT also appear in loras.
Regression test for the bug where the checkpoint model appeared in both places."""
async def fake_metadata_provider():
class Provider:
async def get_model_by_hash(self, model_hash):
if model_hash == "cp123hash":
return ({
"id": 100,
"modelId": 200,
"name": "v2",
"model": {"name": "My Checkpoint", "type": "Checkpoint"},
"baseModel": "SDXL",
"files": [{"type": "Model", "primary": True, "sizeKB": 1024, "name": "cp.safetensors"}]
}, None)
if model_hash == "lora1hash":
return ({
"id": 300,
"modelId": 400,
"name": "v1",
"model": {"name": "Style LoRA", "type": "LORA"},
"baseModel": "SDXL",
"files": [{"type": "Model", "primary": True, "sizeKB": 512, "name": "style.safetensors"}]
}, None)
return None, "Model not found"
return Provider()
monkeypatch.setattr(
"py.recipes.parsers.civitai_image.get_default_metadata_provider",
fake_metadata_provider,
)
parser = CivitaiApiMetadataParser()
metadata = {
"resources": [
{"hash": "cp123hash", "name": "My Checkpoint", "type": "model"},
{"hash": "lora1hash", "name": "Style LoRA", "type": "lora", "weight": 0.5},
],
}
result = await parser.parse_metadata(metadata)
# Checkpoint must NOT appear in loras
lora_names = {l["name"] for l in result["loras"]}
assert "My Checkpoint" not in lora_names
assert "Style LoRA" in lora_names
# Checkpoint must be in result["model"]
assert result["model"] is not None
assert result["model"]["name"] == "My Checkpoint"

View File

@@ -94,7 +94,7 @@ async def test_repair_all_recipes_with_enriched_checkpoint_id(setup_scanner):
"id": 5678, "id": 5678,
"modelId": 1234, "modelId": 1234,
"name": "v1.0", "name": "v1.0",
"model": {"name": "Full Model Name"}, "model": {"name": "Full Model Name", "type": "Checkpoint"},
"baseModel": "SDXL 1.0", "baseModel": "SDXL 1.0",
"images": [{"url": "https://image.url/thumb.jpg"}], "images": [{"url": "https://image.url/thumb.jpg"}],
"files": [{"type": "Model", "hashes": {"SHA256": "ABCDEF"}, "name": "full_filename.safetensors"}] "files": [{"type": "Model", "hashes": {"SHA256": "ABCDEF"}, "name": "full_filename.safetensors"}]
@@ -142,7 +142,7 @@ async def test_repair_all_recipes_supports_civitai_red_source_url(setup_scanner)
"id": 5678, "id": 5678,
"modelId": 1234, "modelId": 1234,
"name": "v1.0", "name": "v1.0",
"model": {"name": "Full Model Name"}, "model": {"name": "Full Model Name", "type": "Checkpoint"},
"baseModel": "SDXL 1.0", "baseModel": "SDXL 1.0",
"images": [{"url": "https://image.url/thumb.jpg"}], "images": [{"url": "https://image.url/thumb.jpg"}],
"files": [ "files": [
@@ -183,7 +183,7 @@ async def test_repair_all_recipes_with_enriched_checkpoint_hash(setup_scanner):
"id": 999, "id": 999,
"modelId": 888, "modelId": 888,
"name": "v2.0", "name": "v2.0",
"model": {"name": "Hashed Model"}, "model": {"name": "Hashed Model", "type": "Checkpoint"},
"baseModel": "SD 1.5", "baseModel": "SD 1.5",
"files": [{"type": "Model", "hashes": {"SHA256": "hash123"}, "name": "hashed.safetensors"}] "files": [{"type": "Model", "hashes": {"SHA256": "hash123"}, "name": "hashed.safetensors"}]
}, None) }, None)