fix(recipe): use resources type field to identify checkpoint instead of modelVersionIds[0]

When importing a CivitAI image as a recipe, modelVersionIds[0] was blindly used as the checkpoint version ID. This array mixes checkpoints and LoRAs without ordering guarantees, causing LoRAs to be saved as the recipe checkpoint. Fix by: 1. Removing the modelVersionIds[0] fallback in _download_remote_media 2. Parsing resources entries with type:"model" as the checkpoint 3. Adding model type validation in populate_checkpoint_from_civitai Also add 2 tests for the new behavior and fix 3 tests whose mocks lacked the required model.type field.
2026-06-09 12:39:23 -03:00 · 2026-05-28 15:46:33 +08:00
parent 3f6824eef6
commit 34791c2ad7
6 changed files with 203 additions and 10 deletions
--- a/py/recipes/base.py
+++ b/py/recipes/base.py
@@ -7,7 +7,7 @@ import re
 from typing import Dict, List, Any, Optional, Tuple
 from abc import ABC, abstractmethod
 from ..config import config
-from ..utils.constants import VALID_LORA_TYPES
+from ..utils.constants import VALID_LORA_TYPES, VALID_CHECKPOINT_SUB_TYPES
 from ..utils.civitai_utils import rewrite_preview_url
 logger = logging.getLogger(__name__)
@@ -173,6 +173,20 @@ class RecipeMetadataParser(ABC):
                checkpoint['isDeleted'] = True
                return checkpoint
            # Validate that the model type is actually a checkpoint.
            # Unlike populate_lora_from_civitai which has this check,
            # this function was missing type validation — allowing LoRA
            # version data to be saved as the recipe's checkpoint when the
            # wrong version ID was passed downstream (fixed in v2.7+).
            model_type = civitai_data.get('model', {}).get('type', '').lower()
            if model_type not in VALID_CHECKPOINT_SUB_TYPES:
                logger.warning(
                    f"Cannot populate checkpoint: model version {civitai_data.get('id')} "
                    f"has type '{model_type}', expected one of {VALID_CHECKPOINT_SUB_TYPES}. "
                    f"Skipping checkpoint enrichment."
                )
                return checkpoint
            if 'model' in civitai_data and 'name' in civitai_data['model']:
                checkpoint['name'] = civitai_data['model']['name']
--- a/py/recipes/parsers/civitai_image.py
+++ b/py/recipes/parsers/civitai_image.py
@@ -185,8 +185,67 @@ class CivitaiApiMetadataParser(RecipeMetadataParser):
            # Process standard resources array
            if "resources" in metadata and isinstance(metadata["resources"], list):
                for resource in metadata["resources"]:
                    resource_type = resource.get("type", "lora")
                    # Track resources with type "model" — these are checkpoint models.
                    # The resources array is the most reliable source for checkpoint
                    # identification because it has an explicit type field and hash,
                    # unlike modelVersionIds which is a flat list with no type info.
                    if resource_type == "model":
                        checkpoint_entry = {
                            "id": 0,
                            "modelId": 0,
                            "name": resource.get("name", "Unknown Model"),
                            "version": "",
                            "type": resource.get("type", "model"),
                            "existsLocally": False,
                            "localPath": None,
                            "file_name": resource.get("name", ""),
                            "hash": resource.get("hash", "") or "",
                            "thumbnailUrl": "/loras_static/images/no-preview.png",
                            "baseModel": "",
                            "size": 0,
                            "downloadUrl": "",
                            "isDeleted": False,
                        }
                        # Try to look up base model from the checkpoint hash
                        if checkpoint_entry["hash"] and metadata_provider:
                            try:
                                civitai_info = (
                                    await metadata_provider.get_model_by_hash(
                                        checkpoint_entry["hash"]
                                    )
                                )
                                civitai_data, error_msg = (
                                    (civitai_info, None)
                                    if not isinstance(civitai_info, tuple)
                                    else civitai_info
                                )
                                if civitai_data and error_msg != "Model not found":
                                    if 'model' in civitai_data and 'name' in civitai_data['model']:
                                        checkpoint_entry['name'] = civitai_data['model']['name']
                                    checkpoint_entry['id'] = civitai_data.get('id', 0)
                                    checkpoint_entry['modelId'] = civitai_data.get('modelId', 0)
                                    if 'name' in civitai_data:
                                        checkpoint_entry['version'] = civitai_data['name']
                                    base_model = civitai_data.get('baseModel', '')
                                    if base_model:
                                        checkpoint_entry['baseModel'] = base_model
                                        if not result['base_model']:
                                            result['base_model'] = base_model
                            except Exception as e:
                                logger.error(
                                    f"Error fetching checkpoint info for hash "
                                    f"{checkpoint_entry['hash']}: {e}"
                                )
                        if result["model"] is None:
                            result["model"] = checkpoint_entry
                        continue
                    # Modified to process resources without a type field as potential LoRAs
-                    if resource.get("type", "lora") == "lora":
+                    if resource_type == "lora":
                        lora_hash = resource.get("hash", "")
                        # Try to get hash from the hashes field if not present in resource
--- a/py/routes/handlers/recipe_handlers.py
+++ b/py/routes/handlers/recipe_handlers.py
@@ -1293,11 +1293,18 @@ class RecipeManagementHandler:
                    image_info.get("meta") if civitai_image_id and image_info else None
                )
                if civitai_image_id and image_info:
                    # modelVersionId (singular) — the primary version for this
                    # image on CivitAI.  May be absent, or may *not* be the
                    # checkpoint (e.g. when the image was generated with a LoRA
                    # as the primary subject).  When absent, DO NOT fall back to
                    # modelVersionIds[0] — that array mixes checkpoints, LoRAs,
                    # and other model version IDs without ordering guarantees.
                    # The downstream enrichment flow will find the real
                    # checkpoint via meta.resources (type:"model" hash) or
                    # meta.civitaiResources (type:"checkpoint" version ID), so
                    # leaving model_ver_id as None is safe and avoids the bug
                    # where a LoRA version ID was treated as the checkpoint.
                    model_ver_id = image_info.get("modelVersionId")
                    if not model_ver_id:
                        ids = image_info.get("modelVersionIds")
                        if isinstance(ids, list) and ids:
                            model_ver_id = ids[0]
                    # Inject root-level modelVersionIds into meta so downstream
                    # parsers (CivitaiApiMetadataParser) can discover ALL resources
--- a/tests/routes/test_recipe_routes.py
+++ b/tests/routes/test_recipe_routes.py
@@ -467,7 +467,10 @@ async def test_import_remote_recipe(monkeypatch, tmp_path: Path) -> None:
    class Provider:
        async def get_model_version_info(self, model_version_id):
            provider_calls.append(model_version_id)
-            return {"baseModel": "Flux Provider"}, None
+            return {
                "baseModel": "Flux Provider",
                "model": {"type": "Checkpoint", "name": "Flux"},
            }, None
    async def fake_get_default_metadata_provider():
        return Provider()
--- a/tests/services/test_civitai_image_parser.py
+++ b/tests/services/test_civitai_image_parser.py
@@ -298,3 +298,113 @@ async def test_parse_metadata_handles_modelVersionIds(monkeypatch):
    assert lora2["type"] == "lora"
    assert lora2["hash"] == "aabbccdd0022"
    assert lora2["baseModel"] == "SDXL"
@pytest.mark.asyncio
 async def test_parse_metadata_extracts_checkpoint_from_resources_model_type(monkeypatch):
    """resources entries with type:"model" should be captured as the checkpoint,
    not skipped (which was the old buggy behavior), and not mixed into loras."""
    captured_hashes = []
    async def fake_metadata_provider():
        class Provider:
            async def get_model_by_hash(self, model_hash):
                captured_hashes.append(model_hash)
                if model_hash == "a1b2c3d4e5":
                    return ({
                        "id": 999,
                        "modelId": 888,
                        "name": "v1.0",
                        "model": {"name": "Real Checkpoint", "type": "Checkpoint"},
                        "baseModel": "SDXL 1.0",
                        "images": [{"url": "https://image.civitai.com/cp/original=true"}],
                        "files": [{"type": "Model", "primary": True, "sizeKB": 1024, "name": "cp.safetensors"}]
                    }, None)
                return None, "Model not found"
        return Provider()
    monkeypatch.setattr(
        "py.recipes.parsers.civitai_image.get_default_metadata_provider",
        fake_metadata_provider,
    )
    parser = CivitaiApiMetadataParser()
    metadata = {
        "prompt": "test",
        "resources": [
            {"hash": "a1b2c3d4e5", "name": "Real Checkpoint", "type": "model"},
            {"hash": "f6g7h8i9j0", "name": "Some LoRA", "type": "lora", "weight": 0.8},
        ],
        "Model hash": "a1b2c3d4e5",
    }
    result = await parser.parse_metadata(metadata)
    # The type:"model" resource should be in result["model"], not in result["loras"]
    assert result["model"] is not None, "checkpoint model should be extracted"
    assert result["model"]["name"] == "Real Checkpoint"
    assert result["model"]["hash"] == "a1b2c3d4e5"
    assert result["model"]["type"] == "model"
    # The LoRA resource should be in result["loras"]
    assert len(result["loras"]) == 1
    assert result["loras"][0]["name"] == "Some LoRA"
    # The checkpoint hash should have triggered a lookup
    assert "a1b2c3d4e5" in captured_hashes
@pytest.mark.asyncio
 async def test_parse_metadata_resources_model_type_does_not_duplicate_checkpoint_in_loras(monkeypatch):
    """When a resources entry has type:"model", it should NOT also appear in loras.
    Regression test for the bug where the checkpoint model appeared in both places."""
    async def fake_metadata_provider():
        class Provider:
            async def get_model_by_hash(self, model_hash):
                if model_hash == "cp123hash":
                    return ({
                        "id": 100,
                        "modelId": 200,
                        "name": "v2",
                        "model": {"name": "My Checkpoint", "type": "Checkpoint"},
                        "baseModel": "SDXL",
                        "files": [{"type": "Model", "primary": True, "sizeKB": 1024, "name": "cp.safetensors"}]
                    }, None)
                if model_hash == "lora1hash":
                    return ({
                        "id": 300,
                        "modelId": 400,
                        "name": "v1",
                        "model": {"name": "Style LoRA", "type": "LORA"},
                        "baseModel": "SDXL",
                        "files": [{"type": "Model", "primary": True, "sizeKB": 512, "name": "style.safetensors"}]
                    }, None)
                return None, "Model not found"
        return Provider()
    monkeypatch.setattr(
        "py.recipes.parsers.civitai_image.get_default_metadata_provider",
        fake_metadata_provider,
    )
    parser = CivitaiApiMetadataParser()
    metadata = {
        "resources": [
            {"hash": "cp123hash", "name": "My Checkpoint", "type": "model"},
            {"hash": "lora1hash", "name": "Style LoRA", "type": "lora", "weight": 0.5},
        ],
    }
    result = await parser.parse_metadata(metadata)
    # Checkpoint must NOT appear in loras
    lora_names = {l["name"] for l in result["loras"]}
    assert "My Checkpoint" not in lora_names
    assert "Style LoRA" in lora_names
    # Checkpoint must be in result["model"]
    assert result["model"] is not None
    assert result["model"]["name"] == "My Checkpoint"
--- a/tests/services/test_recipe_repair.py
+++ b/tests/services/test_recipe_repair.py
@@ -94,7 +94,7 @@ async def test_repair_all_recipes_with_enriched_checkpoint_id(setup_scanner):
        "id": 5678,
        "modelId": 1234,
        "name": "v1.0",
-        "model": {"name": "Full Model Name"},
+        "model": {"name": "Full Model Name", "type": "Checkpoint"},
        "baseModel": "SDXL 1.0",
        "images": [{"url": "https://image.url/thumb.jpg"}],
        "files": [{"type": "Model", "hashes": {"SHA256": "ABCDEF"}, "name": "full_filename.safetensors"}]
@@ -142,7 +142,7 @@ async def test_repair_all_recipes_supports_civitai_red_source_url(setup_scanner)
            "id": 5678,
            "modelId": 1234,
            "name": "v1.0",
-            "model": {"name": "Full Model Name"},
+            "model": {"name": "Full Model Name", "type": "Checkpoint"},
            "baseModel": "SDXL 1.0",
            "images": [{"url": "https://image.url/thumb.jpg"}],
            "files": [
@@ -183,7 +183,7 @@ async def test_repair_all_recipes_with_enriched_checkpoint_hash(setup_scanner):
        "id": 999,
        "modelId": 888,
        "name": "v2.0",
-        "model": {"name": "Hashed Model"},
+        "model": {"name": "Hashed Model", "type": "Checkpoint"},
        "baseModel": "SD 1.5",
        "files": [{"type": "Model", "hashes": {"SHA256": "hash123"}, "name": "hashed.safetensors"}]
    }, None)