From 53c4165d828bae2c62c9078760ff84b8ce67f9bc Mon Sep 17 00:00:00 2001 From: Will Miao Date: Sat, 29 Nov 2025 08:13:55 +0800 Subject: [PATCH] feat(parser): enhance model metadata extraction in Automatic1111 parser - Add MODEL_NAME_PATTERN regex to extract model names from parameters - Extract model hash from parsed hashes when available in metadata - Add checkpoint model hash and name extraction from parameters section - Implement checkpoint resource processing from Civitai metadata - Improve model information completeness for better recipe tracking --- py/recipes/parsers/automatic.py | 120 +++++++++++++++++- py/recipes/parsers/meta_format.py | 57 +++++++-- py/recipes/parsers/recipe_format.py | 44 ++++++- static/js/managers/import/ImageProcessor.js | 11 ++ .../test_automatic_metadata_parser.py | 120 ++++++++++++++++++ tests/services/test_meta_format_parser.py | 61 +++++++++ tests/services/test_recipe_format_parser.py | 67 ++++++++++ 7 files changed, 467 insertions(+), 13 deletions(-) create mode 100644 tests/services/test_automatic_metadata_parser.py create mode 100644 tests/services/test_meta_format_parser.py create mode 100644 tests/services/test_recipe_format_parser.py diff --git a/py/recipes/parsers/automatic.py b/py/recipes/parsers/automatic.py index b7399c72..f86d134d 100644 --- a/py/recipes/parsers/automatic.py +++ b/py/recipes/parsers/automatic.py @@ -1,6 +1,7 @@ """Parser for Automatic1111 metadata format.""" import re +import os import json import logging from typing import Dict, Any @@ -22,6 +23,7 @@ class AutomaticMetadataParser(RecipeMetadataParser): CIVITAI_METADATA_REGEX = r', Civitai metadata:\s*(\{.*?\})' EXTRANETS_REGEX = r'<(lora|hypernet):([^:]+):(-?[0-9.]+)>' MODEL_HASH_PATTERN = r'Model hash: ([a-zA-Z0-9]+)' + MODEL_NAME_PATTERN = r'Model: ([^,]+)' VAE_HASH_PATTERN = r'VAE hash: ([a-zA-Z0-9]+)' def is_metadata_matching(self, user_comment: str) -> bool: @@ -115,6 +117,12 @@ class AutomaticMetadataParser(RecipeMetadataParser): except json.JSONDecodeError: logger.error("Error parsing hashes JSON") + # Pick up model hash from parsed hashes if available + if "hashes" in metadata and not metadata.get("model_hash"): + model_hash_from_hashes = metadata["hashes"].get("model") + if model_hash_from_hashes: + metadata["model_hash"] = model_hash_from_hashes + # Extract Lora hashes in alternative format lora_hashes_match = re.search(self.LORA_HASHES_REGEX, params_section) if not hashes_match and lora_hashes_match: @@ -137,6 +145,17 @@ class AutomaticMetadataParser(RecipeMetadataParser): params_section = params_section.replace(lora_hashes_match.group(0), '') except Exception as e: logger.error(f"Error parsing Lora hashes: {e}") + + # Extract checkpoint model hash/name when provided outside Civitai resources + model_hash_match = re.search(self.MODEL_HASH_PATTERN, params_section) + if model_hash_match: + metadata["model_hash"] = model_hash_match.group(1).strip() + params_section = params_section.replace(model_hash_match.group(0), '') + + model_name_match = re.search(self.MODEL_NAME_PATTERN, params_section) + if model_name_match: + metadata["model_name"] = model_name_match.group(1).strip() + params_section = params_section.replace(model_name_match.group(0), '') # Extract basic parameters param_pattern = r'([A-Za-z\s]+): ([^,]+)' @@ -178,9 +197,10 @@ class AutomaticMetadataParser(RecipeMetadataParser): metadata["gen_params"] = gen_params - # Extract LoRA information + # Extract LoRA and checkpoint information loras = [] base_model_counts = {} + checkpoint = None # First use Civitai resources if available (more reliable source) if metadata.get("civitai_resources"): @@ -202,6 +222,50 @@ class AutomaticMetadataParser(RecipeMetadataParser): resource["modelVersionId"] = air_modelVersionId # --- End added --- + if resource.get("type") == "checkpoint" and resource.get("modelVersionId"): + version_id = resource.get("modelVersionId") + version_id_str = str(version_id) + checkpoint_entry = { + 'id': version_id, + 'modelId': resource.get("modelId", 0), + 'name': resource.get("modelName", "Unknown Checkpoint"), + 'version': resource.get("modelVersionName", resource.get("versionName", "")), + 'type': resource.get("type", "checkpoint"), + 'existsLocally': False, + 'localPath': None, + 'file_name': resource.get("modelName", ""), + 'hash': resource.get("hash", "") or "", + 'thumbnailUrl': '/loras_static/images/no-preview.png', + 'baseModel': '', + 'size': 0, + 'downloadUrl': '', + 'isDeleted': False + } + + if metadata_provider: + try: + civitai_info = await metadata_provider.get_model_version_info(version_id_str) + checkpoint_entry = await self.populate_checkpoint_from_civitai( + checkpoint_entry, + civitai_info + ) + except Exception as e: + logger.error( + "Error fetching Civitai info for checkpoint version %s: %s", + version_id, + e, + ) + + # Prefer the first checkpoint found + if checkpoint_entry.get("baseModel"): + base_model_value = checkpoint_entry["baseModel"] + base_model_counts[base_model_value] = base_model_counts.get(base_model_value, 0) + 1 + + if checkpoint is None: + checkpoint = checkpoint_entry + + continue + if resource.get("type") in ["lora", "lycoris", "hypernet"] and resource.get("modelVersionId"): # Initialize lora entry lora_entry = { @@ -237,6 +301,52 @@ class AutomaticMetadataParser(RecipeMetadataParser): loras.append(lora_entry) + # Fallback checkpoint parsing from generic "Model" and "Model hash" fields + if checkpoint is None: + model_hash = metadata.get("model_hash") + if not model_hash and metadata.get("hashes"): + model_hash = metadata["hashes"].get("model") + + model_name = metadata.get("model_name") + file_name = "" + if model_name: + cleaned_name = re.split(r"[\\\\/]", model_name)[-1] + file_name = os.path.splitext(cleaned_name)[0] + + if model_hash or model_name: + checkpoint_entry = { + 'id': 0, + 'modelId': 0, + 'name': model_name or "Unknown Checkpoint", + 'version': '', + 'type': 'checkpoint', + 'hash': model_hash or "", + 'existsLocally': False, + 'localPath': None, + 'file_name': file_name, + 'thumbnailUrl': '/loras_static/images/no-preview.png', + 'baseModel': '', + 'size': 0, + 'downloadUrl': '', + 'isDeleted': False + } + + if metadata_provider and model_hash: + try: + civitai_info = await metadata_provider.get_model_by_hash(model_hash) + checkpoint_entry = await self.populate_checkpoint_from_civitai( + checkpoint_entry, + civitai_info + ) + except Exception as e: + logger.error(f"Error fetching Civitai info for checkpoint hash {model_hash}: {e}") + + if checkpoint_entry.get("baseModel"): + base_model_value = checkpoint_entry["baseModel"] + base_model_counts[base_model_value] = base_model_counts.get(base_model_value, 0) + 1 + + checkpoint = checkpoint_entry + # If no LoRAs from Civitai resources or to supplement, extract from metadata["hashes"] if not loras or len(loras) == 0: # Extract lora weights from extranet tags in prompt (for later use) @@ -300,7 +410,9 @@ class AutomaticMetadataParser(RecipeMetadataParser): # Try to get base model from resources or make educated guess base_model = None - if base_model_counts: + if checkpoint and checkpoint.get("baseModel"): + base_model = checkpoint.get("baseModel") + elif base_model_counts: # Use the most common base model from the loras base_model = max(base_model_counts.items(), key=lambda x: x[1])[0] @@ -317,6 +429,10 @@ class AutomaticMetadataParser(RecipeMetadataParser): 'gen_params': filtered_gen_params, 'from_automatic_metadata': True } + + if checkpoint: + result['checkpoint'] = checkpoint + result['model'] = checkpoint return result diff --git a/py/recipes/parsers/meta_format.py b/py/recipes/parsers/meta_format.py index 5eb53af7..2c512103 100644 --- a/py/recipes/parsers/meta_format.py +++ b/py/recipes/parsers/meta_format.py @@ -1,5 +1,6 @@ """Parser for meta format (Lora_N Model hash) metadata.""" +import os import re import logging from typing import Dict, Any @@ -145,14 +146,53 @@ class MetaFormatParser(RecipeMetadataParser): loras.append(lora_entry) - # Extract model information - model = None - if 'model' in metadata: - model = metadata['model'] + # Extract checkpoint information from generic Model/Model hash fields + checkpoint = None + model_hash = metadata.get("model_hash") + model_name = metadata.get("model") + + if model_hash or model_name: + cleaned_name = None + if model_name: + cleaned_name = re.split(r"[\\\\/]", model_name)[-1] + cleaned_name = os.path.splitext(cleaned_name)[0] + + checkpoint_entry = { + 'id': 0, + 'modelId': 0, + 'name': model_name or "Unknown Checkpoint", + 'version': '', + 'type': 'checkpoint', + 'hash': model_hash or "", + 'existsLocally': False, + 'localPath': None, + 'file_name': cleaned_name or (model_name or ""), + 'thumbnailUrl': '/loras_static/images/no-preview.png', + 'baseModel': '', + 'size': 0, + 'downloadUrl': '', + 'isDeleted': False + } + + if metadata_provider and model_hash: + try: + civitai_info = await metadata_provider.get_model_by_hash(model_hash) + checkpoint_entry = await self.populate_checkpoint_from_civitai( + checkpoint_entry, + civitai_info + ) + except Exception as e: + logger.error(f"Error fetching Civitai info for checkpoint hash {model_hash}: {e}") + + if checkpoint_entry.get("baseModel"): + base_model_value = checkpoint_entry["baseModel"] + base_model_counts[base_model_value] = base_model_counts.get(base_model_value, 0) + 1 + + checkpoint = checkpoint_entry - # Set base_model to the most common one from civitai_info - base_model = None - if base_model_counts: + # Set base_model to the most common one from civitai_info or checkpoint + base_model = checkpoint["baseModel"] if checkpoint and checkpoint.get("baseModel") else None + if not base_model and base_model_counts: base_model = max(base_model_counts.items(), key=lambda x: x[1])[0] # Extract generation parameters for recipe metadata @@ -170,7 +210,8 @@ class MetaFormatParser(RecipeMetadataParser): 'loras': loras, 'gen_params': gen_params, 'raw_metadata': metadata, - 'from_meta_format': True + 'from_meta_format': True, + **({'checkpoint': checkpoint, 'model': checkpoint} if checkpoint else {}) } except Exception as e: diff --git a/py/recipes/parsers/recipe_format.py b/py/recipes/parsers/recipe_format.py index 5380cc69..2684e01d 100644 --- a/py/recipes/parsers/recipe_format.py +++ b/py/recipes/parsers/recipe_format.py @@ -94,8 +94,45 @@ class RecipeFormatParser(RecipeMetadataParser): lora_entry['thumbnailUrl'] = '/loras_static/images/no-preview.png' loras.append(lora_entry) - + logger.info(f"Found {len(loras)} loras in recipe metadata") + + # Process checkpoint information if present + checkpoint = None + checkpoint_data = recipe_metadata.get('checkpoint') or {} + if isinstance(checkpoint_data, dict) and checkpoint_data: + version_id = checkpoint_data.get('modelVersionId') or checkpoint_data.get('id') + checkpoint_entry = { + 'id': version_id or 0, + 'modelId': checkpoint_data.get('modelId', 0), + 'name': checkpoint_data.get('name', 'Unknown Checkpoint'), + 'version': checkpoint_data.get('version', ''), + 'type': checkpoint_data.get('type', 'checkpoint'), + 'hash': checkpoint_data.get('hash', ''), + 'existsLocally': False, + 'localPath': None, + 'file_name': checkpoint_data.get('file_name', ''), + 'thumbnailUrl': '/loras_static/images/no-preview.png', + 'baseModel': '', + 'size': 0, + 'downloadUrl': '', + 'isDeleted': False + } + + if metadata_provider: + try: + civitai_info = None + if version_id: + civitai_info = await metadata_provider.get_model_version_info(str(version_id)) + elif checkpoint_entry.get('hash'): + civitai_info = await metadata_provider.get_model_by_hash(checkpoint_entry['hash']) + + if civitai_info: + checkpoint_entry = await self.populate_checkpoint_from_civitai(checkpoint_entry, civitai_info) + except Exception as e: + logger.error(f"Error fetching Civitai info for checkpoint in recipe metadata: {e}") + + checkpoint = checkpoint_entry # Filter gen_params to only include recognized keys filtered_gen_params = {} @@ -105,12 +142,13 @@ class RecipeFormatParser(RecipeMetadataParser): filtered_gen_params[key] = value return { - 'base_model': recipe_metadata.get('base_model', ''), + 'base_model': checkpoint['baseModel'] if checkpoint and checkpoint.get('baseModel') else recipe_metadata.get('base_model', ''), 'loras': loras, 'gen_params': filtered_gen_params, 'tags': recipe_metadata.get('tags', []), 'title': recipe_metadata.get('title', ''), - 'from_recipe_metadata': True + 'from_recipe_metadata': True, + **({'checkpoint': checkpoint, 'model': checkpoint} if checkpoint else {}) } except Exception as e: diff --git a/static/js/managers/import/ImageProcessor.js b/static/js/managers/import/ImageProcessor.js index 22b7e301..a3b62c2b 100644 --- a/static/js/managers/import/ImageProcessor.js +++ b/static/js/managers/import/ImageProcessor.js @@ -83,6 +83,7 @@ export class ImageProcessor { } this.importManager.recipeData = recipeData; + this._ensureCheckpointMetadata(); // Check if we have an error message if (this.importManager.recipeData.error) { @@ -134,6 +135,7 @@ export class ImageProcessor { } this.importManager.recipeData = recipeData; + this._ensureCheckpointMetadata(); // Check if we have an error message if (this.importManager.recipeData.error) { @@ -188,6 +190,7 @@ export class ImageProcessor { } this.importManager.recipeData = recipeData; + this._ensureCheckpointMetadata(); // Check if we have an error message if (this.importManager.recipeData.error) { @@ -215,4 +218,12 @@ export class ImageProcessor { this.importManager.loadingManager.hide(); } } + + _ensureCheckpointMetadata() { + if (!this.importManager.recipeData) return; + + if (this.importManager.recipeData.model && !this.importManager.recipeData.checkpoint) { + this.importManager.recipeData.checkpoint = this.importManager.recipeData.model; + } + } } diff --git a/tests/services/test_automatic_metadata_parser.py b/tests/services/test_automatic_metadata_parser.py new file mode 100644 index 00000000..c708ce73 --- /dev/null +++ b/tests/services/test_automatic_metadata_parser.py @@ -0,0 +1,120 @@ +import pytest + +from py.recipes.parsers.automatic import AutomaticMetadataParser + + +@pytest.mark.asyncio +async def test_parse_metadata_extracts_checkpoint_from_civitai_resources(monkeypatch): + checkpoint_info = { + "id": 2442439, + "modelId": 123456, + "model": {"name": "Z Image", "type": "checkpoint"}, + "name": "Turbo", + "images": [{"url": "https://image.civitai.com/checkpoints/original=true"}], + "baseModel": "sdxl", + "downloadUrl": "https://civitai.com/api/download/checkpoint", + "files": [ + { + "type": "Model", + "primary": True, + "sizeKB": 2048, + "name": "Z_Image_Turbo.safetensors", + "hashes": {"SHA256": "ABC123FF"}, + } + ], + } + + async def fake_metadata_provider(): + class Provider: + async def get_model_version_info(self, version_id): + assert version_id == "2442439" + return checkpoint_info, None + + return Provider() + + monkeypatch.setattr( + "py.recipes.parsers.automatic.get_default_metadata_provider", + fake_metadata_provider, + ) + + parser = AutomaticMetadataParser() + + metadata_text = ( + "Negative space, fog, BLACK blue color GRADIENT BACKGROUND, a vintage car in the middle, " + "FOG, and a silhouetted figure near the car, in the style of the Blade Runner movie " + "Negative prompt: Steps: 23, Sampler: Undefined, CFG scale: 3.5, Seed: 1760020955, " + "Size: 832x1216, Clip skip: 2, Created Date: 2025-11-28T09:18:43.5269343Z, " + 'Civitai resources: [{"type":"checkpoint","modelVersionId":2442439,"modelName":"Z Image","modelVersionName":"Turbo"}], ' + "Civitai metadata: {}" + ) + + result = await parser.parse_metadata(metadata_text) + + checkpoint = result.get("checkpoint") + assert checkpoint is not None + assert checkpoint["name"] == "Z Image" + assert checkpoint["version"] == "Turbo" + assert checkpoint["type"] == "checkpoint" + assert checkpoint["modelId"] == 123456 + assert checkpoint["hash"] == "abc123ff" + assert checkpoint["file_name"] == "Z_Image_Turbo" + assert checkpoint["thumbnailUrl"].endswith("width=450,optimized=true") + assert result["model"] == checkpoint + assert result["base_model"] == "sdxl" + assert result["loras"] == [] + + +@pytest.mark.asyncio +async def test_parse_metadata_extracts_checkpoint_from_model_hash(monkeypatch): + checkpoint_info = { + "id": 98765, + "modelId": 654321, + "model": {"name": "Flux Illustrious", "type": "checkpoint"}, + "name": "v1", + "images": [{"url": "https://image.civitai.com/checkpoints/original=true"}], + "baseModel": "flux", + "downloadUrl": "https://civitai.com/api/download/checkpoint", + "files": [ + { + "type": "Model", + "primary": True, + "sizeKB": 1024, + "name": "FluxIllustrious_v1.safetensors", + "hashes": {"SHA256": "C3688EE04C"}, + } + ], + } + + async def fake_metadata_provider(): + class Provider: + async def get_model_by_hash(self, model_hash): + assert model_hash == "c3688ee04c" + return checkpoint_info, None + + return Provider() + + monkeypatch.setattr( + "py.recipes.parsers.automatic.get_default_metadata_provider", + fake_metadata_provider, + ) + + parser = AutomaticMetadataParser() + + metadata_text = ( + "A cyberpunk portrait with neon highlights.\n" + "Negative prompt: low quality\n" + "Steps: 20, Sampler: Euler a, CFG scale: 7, Seed: 123456, Size: 832x1216, " + "Model hash: c3688ee04c, Model: models/waiNSFWIllustrious_v110.safetensors" + ) + + result = await parser.parse_metadata(metadata_text) + + checkpoint = result.get("checkpoint") + assert checkpoint is not None + assert checkpoint["hash"] == "c3688ee04c" + assert checkpoint["name"] == "Flux Illustrious" + assert checkpoint["version"] == "v1" + assert checkpoint["file_name"] == "FluxIllustrious_v1" + assert result["model"] == checkpoint + assert result["base_model"] == "flux" + assert result["loras"] == [] diff --git a/tests/services/test_meta_format_parser.py b/tests/services/test_meta_format_parser.py new file mode 100644 index 00000000..23a5d91f --- /dev/null +++ b/tests/services/test_meta_format_parser.py @@ -0,0 +1,61 @@ +import pytest + +from py.recipes.parsers.meta_format import MetaFormatParser + + +@pytest.mark.asyncio +async def test_meta_format_parser_extracts_checkpoint_from_model_hash(monkeypatch): + checkpoint_info = { + "id": 222333, + "modelId": 999888, + "model": {"name": "Fluxmania V5P", "type": "checkpoint"}, + "name": "v5p", + "images": [{"url": "https://image.civitai.com/checkpoints/original=true"}], + "baseModel": "flux", + "downloadUrl": "https://civitai.com/api/download/checkpoint", + "files": [ + { + "type": "Model", + "primary": True, + "sizeKB": 1024, + "name": "Fluxmania_V5P.safetensors", + "hashes": {"SHA256": "8AE0583B06"}, + } + ], + } + + async def fake_metadata_provider(): + class Provider: + async def get_model_by_hash(self, model_hash): + assert model_hash == "8ae0583b06" + return checkpoint_info, None + + return Provider() + + monkeypatch.setattr( + "py.recipes.parsers.meta_format.get_default_metadata_provider", + fake_metadata_provider, + ) + + parser = MetaFormatParser() + + metadata_text = ( + "Shimmering metal forms\n" + "Negative prompt: flat color\n" + "Steps: 25, Sampler: dpmpp_2m_sgm_uniform, Seed: 471889513588087, " + "Model: Fluxmania V5P.safetensors, Model hash: 8ae0583b06, VAE: ae.sft, " + "Lora_0 Model name: ArtVador I.safetensors, Lora_0 Model hash: 08f7133a58, " + "Lora_0 Strength model: 0.65, Lora_0 Strength clip: 0.65" + ) + + result = await parser.parse_metadata(metadata_text) + + checkpoint = result.get("checkpoint") + assert checkpoint is not None + assert checkpoint["hash"] == "8ae0583b06" + assert checkpoint["name"] == "Fluxmania V5P" + assert checkpoint["version"] == "v5p" + assert checkpoint["file_name"] == "Fluxmania_V5P" + assert result["model"] == checkpoint + assert result["base_model"] == "flux" + assert len(result["loras"]) == 1 diff --git a/tests/services/test_recipe_format_parser.py b/tests/services/test_recipe_format_parser.py new file mode 100644 index 00000000..4214e72d --- /dev/null +++ b/tests/services/test_recipe_format_parser.py @@ -0,0 +1,67 @@ +import json +import pytest + +from py.recipes.parsers.recipe_format import RecipeFormatParser + + +@pytest.mark.asyncio +async def test_recipe_format_parser_populates_checkpoint(monkeypatch): + checkpoint_info = { + "id": 777111, + "modelId": 333222, + "model": {"name": "Z Image", "type": "checkpoint"}, + "name": "Turbo", + "images": [{"url": "https://image.civitai.com/checkpoints/original=true"}], + "baseModel": "sdxl", + "downloadUrl": "https://civitai.com/api/download/checkpoint", + "files": [ + { + "type": "Model", + "primary": True, + "sizeKB": 2048, + "name": "Z_Image_Turbo.safetensors", + "hashes": {"SHA256": "ABC123FF"}, + } + ], + } + + async def fake_metadata_provider(): + class Provider: + async def get_model_version_info(self, version_id): + assert version_id == "777111" + return checkpoint_info, None + + return Provider() + + monkeypatch.setattr( + "py.recipes.parsers.recipe_format.get_default_metadata_provider", + fake_metadata_provider, + ) + + parser = RecipeFormatParser() + + recipe_metadata = { + "title": "Z Recipe", + "base_model": "", + "loras": [], + "gen_params": {"steps": 20}, + "tags": ["test"], + "checkpoint": { + "modelVersionId": 777111, + "modelId": 333222, + "name": "Z Image", + "version": "Turbo", + }, + } + + metadata_text = f"Recipe metadata: {json.dumps(recipe_metadata)}" + result = await parser.parse_metadata(metadata_text) + + checkpoint = result.get("checkpoint") + assert checkpoint is not None + assert checkpoint["name"] == "Z Image" + assert checkpoint["version"] == "Turbo" + assert checkpoint["hash"] == "abc123ff" + assert checkpoint["file_name"] == "Z_Image_Turbo" + assert result["base_model"] == "sdxl" + assert result["model"] == checkpoint