feat(parser): enhance model metadata extraction in Automatic1111 parser

- Add MODEL_NAME_PATTERN regex to extract model names from parameters
- Extract model hash from parsed hashes when available in metadata
- Add checkpoint model hash and name extraction from parameters section
- Implement checkpoint resource processing from Civitai metadata
- Improve model information completeness for better recipe tracking
This commit is contained in:
Will Miao
2025-11-29 08:13:55 +08:00
parent 8cd4550189
commit 53c4165d82
7 changed files with 467 additions and 13 deletions

View File

@@ -1,6 +1,7 @@
"""Parser for Automatic1111 metadata format.""" """Parser for Automatic1111 metadata format."""
import re import re
import os
import json import json
import logging import logging
from typing import Dict, Any from typing import Dict, Any
@@ -22,6 +23,7 @@ class AutomaticMetadataParser(RecipeMetadataParser):
CIVITAI_METADATA_REGEX = r', Civitai metadata:\s*(\{.*?\})' CIVITAI_METADATA_REGEX = r', Civitai metadata:\s*(\{.*?\})'
EXTRANETS_REGEX = r'<(lora|hypernet):([^:]+):(-?[0-9.]+)>' EXTRANETS_REGEX = r'<(lora|hypernet):([^:]+):(-?[0-9.]+)>'
MODEL_HASH_PATTERN = r'Model hash: ([a-zA-Z0-9]+)' MODEL_HASH_PATTERN = r'Model hash: ([a-zA-Z0-9]+)'
MODEL_NAME_PATTERN = r'Model: ([^,]+)'
VAE_HASH_PATTERN = r'VAE hash: ([a-zA-Z0-9]+)' VAE_HASH_PATTERN = r'VAE hash: ([a-zA-Z0-9]+)'
def is_metadata_matching(self, user_comment: str) -> bool: def is_metadata_matching(self, user_comment: str) -> bool:
@@ -115,6 +117,12 @@ class AutomaticMetadataParser(RecipeMetadataParser):
except json.JSONDecodeError: except json.JSONDecodeError:
logger.error("Error parsing hashes JSON") logger.error("Error parsing hashes JSON")
# Pick up model hash from parsed hashes if available
if "hashes" in metadata and not metadata.get("model_hash"):
model_hash_from_hashes = metadata["hashes"].get("model")
if model_hash_from_hashes:
metadata["model_hash"] = model_hash_from_hashes
# Extract Lora hashes in alternative format # Extract Lora hashes in alternative format
lora_hashes_match = re.search(self.LORA_HASHES_REGEX, params_section) lora_hashes_match = re.search(self.LORA_HASHES_REGEX, params_section)
if not hashes_match and lora_hashes_match: if not hashes_match and lora_hashes_match:
@@ -137,6 +145,17 @@ class AutomaticMetadataParser(RecipeMetadataParser):
params_section = params_section.replace(lora_hashes_match.group(0), '') params_section = params_section.replace(lora_hashes_match.group(0), '')
except Exception as e: except Exception as e:
logger.error(f"Error parsing Lora hashes: {e}") logger.error(f"Error parsing Lora hashes: {e}")
# Extract checkpoint model hash/name when provided outside Civitai resources
model_hash_match = re.search(self.MODEL_HASH_PATTERN, params_section)
if model_hash_match:
metadata["model_hash"] = model_hash_match.group(1).strip()
params_section = params_section.replace(model_hash_match.group(0), '')
model_name_match = re.search(self.MODEL_NAME_PATTERN, params_section)
if model_name_match:
metadata["model_name"] = model_name_match.group(1).strip()
params_section = params_section.replace(model_name_match.group(0), '')
# Extract basic parameters # Extract basic parameters
param_pattern = r'([A-Za-z\s]+): ([^,]+)' param_pattern = r'([A-Za-z\s]+): ([^,]+)'
@@ -178,9 +197,10 @@ class AutomaticMetadataParser(RecipeMetadataParser):
metadata["gen_params"] = gen_params metadata["gen_params"] = gen_params
# Extract LoRA information # Extract LoRA and checkpoint information
loras = [] loras = []
base_model_counts = {} base_model_counts = {}
checkpoint = None
# First use Civitai resources if available (more reliable source) # First use Civitai resources if available (more reliable source)
if metadata.get("civitai_resources"): if metadata.get("civitai_resources"):
@@ -202,6 +222,50 @@ class AutomaticMetadataParser(RecipeMetadataParser):
resource["modelVersionId"] = air_modelVersionId resource["modelVersionId"] = air_modelVersionId
# --- End added --- # --- End added ---
if resource.get("type") == "checkpoint" and resource.get("modelVersionId"):
version_id = resource.get("modelVersionId")
version_id_str = str(version_id)
checkpoint_entry = {
'id': version_id,
'modelId': resource.get("modelId", 0),
'name': resource.get("modelName", "Unknown Checkpoint"),
'version': resource.get("modelVersionName", resource.get("versionName", "")),
'type': resource.get("type", "checkpoint"),
'existsLocally': False,
'localPath': None,
'file_name': resource.get("modelName", ""),
'hash': resource.get("hash", "") or "",
'thumbnailUrl': '/loras_static/images/no-preview.png',
'baseModel': '',
'size': 0,
'downloadUrl': '',
'isDeleted': False
}
if metadata_provider:
try:
civitai_info = await metadata_provider.get_model_version_info(version_id_str)
checkpoint_entry = await self.populate_checkpoint_from_civitai(
checkpoint_entry,
civitai_info
)
except Exception as e:
logger.error(
"Error fetching Civitai info for checkpoint version %s: %s",
version_id,
e,
)
# Prefer the first checkpoint found
if checkpoint_entry.get("baseModel"):
base_model_value = checkpoint_entry["baseModel"]
base_model_counts[base_model_value] = base_model_counts.get(base_model_value, 0) + 1
if checkpoint is None:
checkpoint = checkpoint_entry
continue
if resource.get("type") in ["lora", "lycoris", "hypernet"] and resource.get("modelVersionId"): if resource.get("type") in ["lora", "lycoris", "hypernet"] and resource.get("modelVersionId"):
# Initialize lora entry # Initialize lora entry
lora_entry = { lora_entry = {
@@ -237,6 +301,52 @@ class AutomaticMetadataParser(RecipeMetadataParser):
loras.append(lora_entry) loras.append(lora_entry)
# Fallback checkpoint parsing from generic "Model" and "Model hash" fields
if checkpoint is None:
model_hash = metadata.get("model_hash")
if not model_hash and metadata.get("hashes"):
model_hash = metadata["hashes"].get("model")
model_name = metadata.get("model_name")
file_name = ""
if model_name:
cleaned_name = re.split(r"[\\\\/]", model_name)[-1]
file_name = os.path.splitext(cleaned_name)[0]
if model_hash or model_name:
checkpoint_entry = {
'id': 0,
'modelId': 0,
'name': model_name or "Unknown Checkpoint",
'version': '',
'type': 'checkpoint',
'hash': model_hash or "",
'existsLocally': False,
'localPath': None,
'file_name': file_name,
'thumbnailUrl': '/loras_static/images/no-preview.png',
'baseModel': '',
'size': 0,
'downloadUrl': '',
'isDeleted': False
}
if metadata_provider and model_hash:
try:
civitai_info = await metadata_provider.get_model_by_hash(model_hash)
checkpoint_entry = await self.populate_checkpoint_from_civitai(
checkpoint_entry,
civitai_info
)
except Exception as e:
logger.error(f"Error fetching Civitai info for checkpoint hash {model_hash}: {e}")
if checkpoint_entry.get("baseModel"):
base_model_value = checkpoint_entry["baseModel"]
base_model_counts[base_model_value] = base_model_counts.get(base_model_value, 0) + 1
checkpoint = checkpoint_entry
# If no LoRAs from Civitai resources or to supplement, extract from metadata["hashes"] # If no LoRAs from Civitai resources or to supplement, extract from metadata["hashes"]
if not loras or len(loras) == 0: if not loras or len(loras) == 0:
# Extract lora weights from extranet tags in prompt (for later use) # Extract lora weights from extranet tags in prompt (for later use)
@@ -300,7 +410,9 @@ class AutomaticMetadataParser(RecipeMetadataParser):
# Try to get base model from resources or make educated guess # Try to get base model from resources or make educated guess
base_model = None base_model = None
if base_model_counts: if checkpoint and checkpoint.get("baseModel"):
base_model = checkpoint.get("baseModel")
elif base_model_counts:
# Use the most common base model from the loras # Use the most common base model from the loras
base_model = max(base_model_counts.items(), key=lambda x: x[1])[0] base_model = max(base_model_counts.items(), key=lambda x: x[1])[0]
@@ -317,6 +429,10 @@ class AutomaticMetadataParser(RecipeMetadataParser):
'gen_params': filtered_gen_params, 'gen_params': filtered_gen_params,
'from_automatic_metadata': True 'from_automatic_metadata': True
} }
if checkpoint:
result['checkpoint'] = checkpoint
result['model'] = checkpoint
return result return result

View File

@@ -1,5 +1,6 @@
"""Parser for meta format (Lora_N Model hash) metadata.""" """Parser for meta format (Lora_N Model hash) metadata."""
import os
import re import re
import logging import logging
from typing import Dict, Any from typing import Dict, Any
@@ -145,14 +146,53 @@ class MetaFormatParser(RecipeMetadataParser):
loras.append(lora_entry) loras.append(lora_entry)
# Extract model information # Extract checkpoint information from generic Model/Model hash fields
model = None checkpoint = None
if 'model' in metadata: model_hash = metadata.get("model_hash")
model = metadata['model'] model_name = metadata.get("model")
if model_hash or model_name:
cleaned_name = None
if model_name:
cleaned_name = re.split(r"[\\\\/]", model_name)[-1]
cleaned_name = os.path.splitext(cleaned_name)[0]
checkpoint_entry = {
'id': 0,
'modelId': 0,
'name': model_name or "Unknown Checkpoint",
'version': '',
'type': 'checkpoint',
'hash': model_hash or "",
'existsLocally': False,
'localPath': None,
'file_name': cleaned_name or (model_name or ""),
'thumbnailUrl': '/loras_static/images/no-preview.png',
'baseModel': '',
'size': 0,
'downloadUrl': '',
'isDeleted': False
}
if metadata_provider and model_hash:
try:
civitai_info = await metadata_provider.get_model_by_hash(model_hash)
checkpoint_entry = await self.populate_checkpoint_from_civitai(
checkpoint_entry,
civitai_info
)
except Exception as e:
logger.error(f"Error fetching Civitai info for checkpoint hash {model_hash}: {e}")
if checkpoint_entry.get("baseModel"):
base_model_value = checkpoint_entry["baseModel"]
base_model_counts[base_model_value] = base_model_counts.get(base_model_value, 0) + 1
checkpoint = checkpoint_entry
# Set base_model to the most common one from civitai_info # Set base_model to the most common one from civitai_info or checkpoint
base_model = None base_model = checkpoint["baseModel"] if checkpoint and checkpoint.get("baseModel") else None
if base_model_counts: if not base_model and base_model_counts:
base_model = max(base_model_counts.items(), key=lambda x: x[1])[0] base_model = max(base_model_counts.items(), key=lambda x: x[1])[0]
# Extract generation parameters for recipe metadata # Extract generation parameters for recipe metadata
@@ -170,7 +210,8 @@ class MetaFormatParser(RecipeMetadataParser):
'loras': loras, 'loras': loras,
'gen_params': gen_params, 'gen_params': gen_params,
'raw_metadata': metadata, 'raw_metadata': metadata,
'from_meta_format': True 'from_meta_format': True,
**({'checkpoint': checkpoint, 'model': checkpoint} if checkpoint else {})
} }
except Exception as e: except Exception as e:

View File

@@ -94,8 +94,45 @@ class RecipeFormatParser(RecipeMetadataParser):
lora_entry['thumbnailUrl'] = '/loras_static/images/no-preview.png' lora_entry['thumbnailUrl'] = '/loras_static/images/no-preview.png'
loras.append(lora_entry) loras.append(lora_entry)
logger.info(f"Found {len(loras)} loras in recipe metadata") logger.info(f"Found {len(loras)} loras in recipe metadata")
# Process checkpoint information if present
checkpoint = None
checkpoint_data = recipe_metadata.get('checkpoint') or {}
if isinstance(checkpoint_data, dict) and checkpoint_data:
version_id = checkpoint_data.get('modelVersionId') or checkpoint_data.get('id')
checkpoint_entry = {
'id': version_id or 0,
'modelId': checkpoint_data.get('modelId', 0),
'name': checkpoint_data.get('name', 'Unknown Checkpoint'),
'version': checkpoint_data.get('version', ''),
'type': checkpoint_data.get('type', 'checkpoint'),
'hash': checkpoint_data.get('hash', ''),
'existsLocally': False,
'localPath': None,
'file_name': checkpoint_data.get('file_name', ''),
'thumbnailUrl': '/loras_static/images/no-preview.png',
'baseModel': '',
'size': 0,
'downloadUrl': '',
'isDeleted': False
}
if metadata_provider:
try:
civitai_info = None
if version_id:
civitai_info = await metadata_provider.get_model_version_info(str(version_id))
elif checkpoint_entry.get('hash'):
civitai_info = await metadata_provider.get_model_by_hash(checkpoint_entry['hash'])
if civitai_info:
checkpoint_entry = await self.populate_checkpoint_from_civitai(checkpoint_entry, civitai_info)
except Exception as e:
logger.error(f"Error fetching Civitai info for checkpoint in recipe metadata: {e}")
checkpoint = checkpoint_entry
# Filter gen_params to only include recognized keys # Filter gen_params to only include recognized keys
filtered_gen_params = {} filtered_gen_params = {}
@@ -105,12 +142,13 @@ class RecipeFormatParser(RecipeMetadataParser):
filtered_gen_params[key] = value filtered_gen_params[key] = value
return { return {
'base_model': recipe_metadata.get('base_model', ''), 'base_model': checkpoint['baseModel'] if checkpoint and checkpoint.get('baseModel') else recipe_metadata.get('base_model', ''),
'loras': loras, 'loras': loras,
'gen_params': filtered_gen_params, 'gen_params': filtered_gen_params,
'tags': recipe_metadata.get('tags', []), 'tags': recipe_metadata.get('tags', []),
'title': recipe_metadata.get('title', ''), 'title': recipe_metadata.get('title', ''),
'from_recipe_metadata': True 'from_recipe_metadata': True,
**({'checkpoint': checkpoint, 'model': checkpoint} if checkpoint else {})
} }
except Exception as e: except Exception as e:

View File

@@ -83,6 +83,7 @@ export class ImageProcessor {
} }
this.importManager.recipeData = recipeData; this.importManager.recipeData = recipeData;
this._ensureCheckpointMetadata();
// Check if we have an error message // Check if we have an error message
if (this.importManager.recipeData.error) { if (this.importManager.recipeData.error) {
@@ -134,6 +135,7 @@ export class ImageProcessor {
} }
this.importManager.recipeData = recipeData; this.importManager.recipeData = recipeData;
this._ensureCheckpointMetadata();
// Check if we have an error message // Check if we have an error message
if (this.importManager.recipeData.error) { if (this.importManager.recipeData.error) {
@@ -188,6 +190,7 @@ export class ImageProcessor {
} }
this.importManager.recipeData = recipeData; this.importManager.recipeData = recipeData;
this._ensureCheckpointMetadata();
// Check if we have an error message // Check if we have an error message
if (this.importManager.recipeData.error) { if (this.importManager.recipeData.error) {
@@ -215,4 +218,12 @@ export class ImageProcessor {
this.importManager.loadingManager.hide(); this.importManager.loadingManager.hide();
} }
} }
_ensureCheckpointMetadata() {
if (!this.importManager.recipeData) return;
if (this.importManager.recipeData.model && !this.importManager.recipeData.checkpoint) {
this.importManager.recipeData.checkpoint = this.importManager.recipeData.model;
}
}
} }

View File

@@ -0,0 +1,120 @@
import pytest
from py.recipes.parsers.automatic import AutomaticMetadataParser
@pytest.mark.asyncio
async def test_parse_metadata_extracts_checkpoint_from_civitai_resources(monkeypatch):
checkpoint_info = {
"id": 2442439,
"modelId": 123456,
"model": {"name": "Z Image", "type": "checkpoint"},
"name": "Turbo",
"images": [{"url": "https://image.civitai.com/checkpoints/original=true"}],
"baseModel": "sdxl",
"downloadUrl": "https://civitai.com/api/download/checkpoint",
"files": [
{
"type": "Model",
"primary": True,
"sizeKB": 2048,
"name": "Z_Image_Turbo.safetensors",
"hashes": {"SHA256": "ABC123FF"},
}
],
}
async def fake_metadata_provider():
class Provider:
async def get_model_version_info(self, version_id):
assert version_id == "2442439"
return checkpoint_info, None
return Provider()
monkeypatch.setattr(
"py.recipes.parsers.automatic.get_default_metadata_provider",
fake_metadata_provider,
)
parser = AutomaticMetadataParser()
metadata_text = (
"Negative space, fog, BLACK blue color GRADIENT BACKGROUND, a vintage car in the middle, "
"FOG, and a silhouetted figure near the car, in the style of the Blade Runner movie "
"Negative prompt: Steps: 23, Sampler: Undefined, CFG scale: 3.5, Seed: 1760020955, "
"Size: 832x1216, Clip skip: 2, Created Date: 2025-11-28T09:18:43.5269343Z, "
'Civitai resources: [{"type":"checkpoint","modelVersionId":2442439,"modelName":"Z Image","modelVersionName":"Turbo"}], '
"Civitai metadata: {}"
)
result = await parser.parse_metadata(metadata_text)
checkpoint = result.get("checkpoint")
assert checkpoint is not None
assert checkpoint["name"] == "Z Image"
assert checkpoint["version"] == "Turbo"
assert checkpoint["type"] == "checkpoint"
assert checkpoint["modelId"] == 123456
assert checkpoint["hash"] == "abc123ff"
assert checkpoint["file_name"] == "Z_Image_Turbo"
assert checkpoint["thumbnailUrl"].endswith("width=450,optimized=true")
assert result["model"] == checkpoint
assert result["base_model"] == "sdxl"
assert result["loras"] == []
@pytest.mark.asyncio
async def test_parse_metadata_extracts_checkpoint_from_model_hash(monkeypatch):
checkpoint_info = {
"id": 98765,
"modelId": 654321,
"model": {"name": "Flux Illustrious", "type": "checkpoint"},
"name": "v1",
"images": [{"url": "https://image.civitai.com/checkpoints/original=true"}],
"baseModel": "flux",
"downloadUrl": "https://civitai.com/api/download/checkpoint",
"files": [
{
"type": "Model",
"primary": True,
"sizeKB": 1024,
"name": "FluxIllustrious_v1.safetensors",
"hashes": {"SHA256": "C3688EE04C"},
}
],
}
async def fake_metadata_provider():
class Provider:
async def get_model_by_hash(self, model_hash):
assert model_hash == "c3688ee04c"
return checkpoint_info, None
return Provider()
monkeypatch.setattr(
"py.recipes.parsers.automatic.get_default_metadata_provider",
fake_metadata_provider,
)
parser = AutomaticMetadataParser()
metadata_text = (
"A cyberpunk portrait with neon highlights.\n"
"Negative prompt: low quality\n"
"Steps: 20, Sampler: Euler a, CFG scale: 7, Seed: 123456, Size: 832x1216, "
"Model hash: c3688ee04c, Model: models/waiNSFWIllustrious_v110.safetensors"
)
result = await parser.parse_metadata(metadata_text)
checkpoint = result.get("checkpoint")
assert checkpoint is not None
assert checkpoint["hash"] == "c3688ee04c"
assert checkpoint["name"] == "Flux Illustrious"
assert checkpoint["version"] == "v1"
assert checkpoint["file_name"] == "FluxIllustrious_v1"
assert result["model"] == checkpoint
assert result["base_model"] == "flux"
assert result["loras"] == []

View File

@@ -0,0 +1,61 @@
import pytest
from py.recipes.parsers.meta_format import MetaFormatParser
@pytest.mark.asyncio
async def test_meta_format_parser_extracts_checkpoint_from_model_hash(monkeypatch):
checkpoint_info = {
"id": 222333,
"modelId": 999888,
"model": {"name": "Fluxmania V5P", "type": "checkpoint"},
"name": "v5p",
"images": [{"url": "https://image.civitai.com/checkpoints/original=true"}],
"baseModel": "flux",
"downloadUrl": "https://civitai.com/api/download/checkpoint",
"files": [
{
"type": "Model",
"primary": True,
"sizeKB": 1024,
"name": "Fluxmania_V5P.safetensors",
"hashes": {"SHA256": "8AE0583B06"},
}
],
}
async def fake_metadata_provider():
class Provider:
async def get_model_by_hash(self, model_hash):
assert model_hash == "8ae0583b06"
return checkpoint_info, None
return Provider()
monkeypatch.setattr(
"py.recipes.parsers.meta_format.get_default_metadata_provider",
fake_metadata_provider,
)
parser = MetaFormatParser()
metadata_text = (
"Shimmering metal forms\n"
"Negative prompt: flat color\n"
"Steps: 25, Sampler: dpmpp_2m_sgm_uniform, Seed: 471889513588087, "
"Model: Fluxmania V5P.safetensors, Model hash: 8ae0583b06, VAE: ae.sft, "
"Lora_0 Model name: ArtVador I.safetensors, Lora_0 Model hash: 08f7133a58, "
"Lora_0 Strength model: 0.65, Lora_0 Strength clip: 0.65"
)
result = await parser.parse_metadata(metadata_text)
checkpoint = result.get("checkpoint")
assert checkpoint is not None
assert checkpoint["hash"] == "8ae0583b06"
assert checkpoint["name"] == "Fluxmania V5P"
assert checkpoint["version"] == "v5p"
assert checkpoint["file_name"] == "Fluxmania_V5P"
assert result["model"] == checkpoint
assert result["base_model"] == "flux"
assert len(result["loras"]) == 1

View File

@@ -0,0 +1,67 @@
import json
import pytest
from py.recipes.parsers.recipe_format import RecipeFormatParser
@pytest.mark.asyncio
async def test_recipe_format_parser_populates_checkpoint(monkeypatch):
checkpoint_info = {
"id": 777111,
"modelId": 333222,
"model": {"name": "Z Image", "type": "checkpoint"},
"name": "Turbo",
"images": [{"url": "https://image.civitai.com/checkpoints/original=true"}],
"baseModel": "sdxl",
"downloadUrl": "https://civitai.com/api/download/checkpoint",
"files": [
{
"type": "Model",
"primary": True,
"sizeKB": 2048,
"name": "Z_Image_Turbo.safetensors",
"hashes": {"SHA256": "ABC123FF"},
}
],
}
async def fake_metadata_provider():
class Provider:
async def get_model_version_info(self, version_id):
assert version_id == "777111"
return checkpoint_info, None
return Provider()
monkeypatch.setattr(
"py.recipes.parsers.recipe_format.get_default_metadata_provider",
fake_metadata_provider,
)
parser = RecipeFormatParser()
recipe_metadata = {
"title": "Z Recipe",
"base_model": "",
"loras": [],
"gen_params": {"steps": 20},
"tags": ["test"],
"checkpoint": {
"modelVersionId": 777111,
"modelId": 333222,
"name": "Z Image",
"version": "Turbo",
},
}
metadata_text = f"Recipe metadata: {json.dumps(recipe_metadata)}"
result = await parser.parse_metadata(metadata_text)
checkpoint = result.get("checkpoint")
assert checkpoint is not None
assert checkpoint["name"] == "Z Image"
assert checkpoint["version"] == "Turbo"
assert checkpoint["hash"] == "abc123ff"
assert checkpoint["file_name"] == "Z_Image_Turbo"
assert result["base_model"] == "sdxl"
assert result["model"] == checkpoint