feat(parser): enhance model metadata extraction in Automatic1111 parser

- Add MODEL_NAME_PATTERN regex to extract model names from parameters
- Extract model hash from parsed hashes when available in metadata
- Add checkpoint model hash and name extraction from parameters section
- Implement checkpoint resource processing from Civitai metadata
- Improve model information completeness for better recipe tracking
This commit is contained in:
Will Miao
2025-11-29 08:13:55 +08:00
parent 8cd4550189
commit 53c4165d82
7 changed files with 467 additions and 13 deletions

View File

@@ -1,6 +1,7 @@
"""Parser for Automatic1111 metadata format."""
import re
import os
import json
import logging
from typing import Dict, Any
@@ -22,6 +23,7 @@ class AutomaticMetadataParser(RecipeMetadataParser):
CIVITAI_METADATA_REGEX = r', Civitai metadata:\s*(\{.*?\})'
EXTRANETS_REGEX = r'<(lora|hypernet):([^:]+):(-?[0-9.]+)>'
MODEL_HASH_PATTERN = r'Model hash: ([a-zA-Z0-9]+)'
MODEL_NAME_PATTERN = r'Model: ([^,]+)'
VAE_HASH_PATTERN = r'VAE hash: ([a-zA-Z0-9]+)'
def is_metadata_matching(self, user_comment: str) -> bool:
@@ -115,6 +117,12 @@ class AutomaticMetadataParser(RecipeMetadataParser):
except json.JSONDecodeError:
logger.error("Error parsing hashes JSON")
# Pick up model hash from parsed hashes if available
if "hashes" in metadata and not metadata.get("model_hash"):
model_hash_from_hashes = metadata["hashes"].get("model")
if model_hash_from_hashes:
metadata["model_hash"] = model_hash_from_hashes
# Extract Lora hashes in alternative format
lora_hashes_match = re.search(self.LORA_HASHES_REGEX, params_section)
if not hashes_match and lora_hashes_match:
@@ -137,6 +145,17 @@ class AutomaticMetadataParser(RecipeMetadataParser):
params_section = params_section.replace(lora_hashes_match.group(0), '')
except Exception as e:
logger.error(f"Error parsing Lora hashes: {e}")
# Extract checkpoint model hash/name when provided outside Civitai resources
model_hash_match = re.search(self.MODEL_HASH_PATTERN, params_section)
if model_hash_match:
metadata["model_hash"] = model_hash_match.group(1).strip()
params_section = params_section.replace(model_hash_match.group(0), '')
model_name_match = re.search(self.MODEL_NAME_PATTERN, params_section)
if model_name_match:
metadata["model_name"] = model_name_match.group(1).strip()
params_section = params_section.replace(model_name_match.group(0), '')
# Extract basic parameters
param_pattern = r'([A-Za-z\s]+): ([^,]+)'
@@ -178,9 +197,10 @@ class AutomaticMetadataParser(RecipeMetadataParser):
metadata["gen_params"] = gen_params
# Extract LoRA information
# Extract LoRA and checkpoint information
loras = []
base_model_counts = {}
checkpoint = None
# First use Civitai resources if available (more reliable source)
if metadata.get("civitai_resources"):
@@ -202,6 +222,50 @@ class AutomaticMetadataParser(RecipeMetadataParser):
resource["modelVersionId"] = air_modelVersionId
# --- End added ---
if resource.get("type") == "checkpoint" and resource.get("modelVersionId"):
version_id = resource.get("modelVersionId")
version_id_str = str(version_id)
checkpoint_entry = {
'id': version_id,
'modelId': resource.get("modelId", 0),
'name': resource.get("modelName", "Unknown Checkpoint"),
'version': resource.get("modelVersionName", resource.get("versionName", "")),
'type': resource.get("type", "checkpoint"),
'existsLocally': False,
'localPath': None,
'file_name': resource.get("modelName", ""),
'hash': resource.get("hash", "") or "",
'thumbnailUrl': '/loras_static/images/no-preview.png',
'baseModel': '',
'size': 0,
'downloadUrl': '',
'isDeleted': False
}
if metadata_provider:
try:
civitai_info = await metadata_provider.get_model_version_info(version_id_str)
checkpoint_entry = await self.populate_checkpoint_from_civitai(
checkpoint_entry,
civitai_info
)
except Exception as e:
logger.error(
"Error fetching Civitai info for checkpoint version %s: %s",
version_id,
e,
)
# Prefer the first checkpoint found
if checkpoint_entry.get("baseModel"):
base_model_value = checkpoint_entry["baseModel"]
base_model_counts[base_model_value] = base_model_counts.get(base_model_value, 0) + 1
if checkpoint is None:
checkpoint = checkpoint_entry
continue
if resource.get("type") in ["lora", "lycoris", "hypernet"] and resource.get("modelVersionId"):
# Initialize lora entry
lora_entry = {
@@ -237,6 +301,52 @@ class AutomaticMetadataParser(RecipeMetadataParser):
loras.append(lora_entry)
# Fallback checkpoint parsing from generic "Model" and "Model hash" fields
if checkpoint is None:
model_hash = metadata.get("model_hash")
if not model_hash and metadata.get("hashes"):
model_hash = metadata["hashes"].get("model")
model_name = metadata.get("model_name")
file_name = ""
if model_name:
cleaned_name = re.split(r"[\\\\/]", model_name)[-1]
file_name = os.path.splitext(cleaned_name)[0]
if model_hash or model_name:
checkpoint_entry = {
'id': 0,
'modelId': 0,
'name': model_name or "Unknown Checkpoint",
'version': '',
'type': 'checkpoint',
'hash': model_hash or "",
'existsLocally': False,
'localPath': None,
'file_name': file_name,
'thumbnailUrl': '/loras_static/images/no-preview.png',
'baseModel': '',
'size': 0,
'downloadUrl': '',
'isDeleted': False
}
if metadata_provider and model_hash:
try:
civitai_info = await metadata_provider.get_model_by_hash(model_hash)
checkpoint_entry = await self.populate_checkpoint_from_civitai(
checkpoint_entry,
civitai_info
)
except Exception as e:
logger.error(f"Error fetching Civitai info for checkpoint hash {model_hash}: {e}")
if checkpoint_entry.get("baseModel"):
base_model_value = checkpoint_entry["baseModel"]
base_model_counts[base_model_value] = base_model_counts.get(base_model_value, 0) + 1
checkpoint = checkpoint_entry
# If no LoRAs from Civitai resources or to supplement, extract from metadata["hashes"]
if not loras or len(loras) == 0:
# Extract lora weights from extranet tags in prompt (for later use)
@@ -300,7 +410,9 @@ class AutomaticMetadataParser(RecipeMetadataParser):
# Try to get base model from resources or make educated guess
base_model = None
if base_model_counts:
if checkpoint and checkpoint.get("baseModel"):
base_model = checkpoint.get("baseModel")
elif base_model_counts:
# Use the most common base model from the loras
base_model = max(base_model_counts.items(), key=lambda x: x[1])[0]
@@ -317,6 +429,10 @@ class AutomaticMetadataParser(RecipeMetadataParser):
'gen_params': filtered_gen_params,
'from_automatic_metadata': True
}
if checkpoint:
result['checkpoint'] = checkpoint
result['model'] = checkpoint
return result

View File

@@ -1,5 +1,6 @@
"""Parser for meta format (Lora_N Model hash) metadata."""
import os
import re
import logging
from typing import Dict, Any
@@ -145,14 +146,53 @@ class MetaFormatParser(RecipeMetadataParser):
loras.append(lora_entry)
# Extract model information
model = None
if 'model' in metadata:
model = metadata['model']
# Extract checkpoint information from generic Model/Model hash fields
checkpoint = None
model_hash = metadata.get("model_hash")
model_name = metadata.get("model")
if model_hash or model_name:
cleaned_name = None
if model_name:
cleaned_name = re.split(r"[\\\\/]", model_name)[-1]
cleaned_name = os.path.splitext(cleaned_name)[0]
checkpoint_entry = {
'id': 0,
'modelId': 0,
'name': model_name or "Unknown Checkpoint",
'version': '',
'type': 'checkpoint',
'hash': model_hash or "",
'existsLocally': False,
'localPath': None,
'file_name': cleaned_name or (model_name or ""),
'thumbnailUrl': '/loras_static/images/no-preview.png',
'baseModel': '',
'size': 0,
'downloadUrl': '',
'isDeleted': False
}
if metadata_provider and model_hash:
try:
civitai_info = await metadata_provider.get_model_by_hash(model_hash)
checkpoint_entry = await self.populate_checkpoint_from_civitai(
checkpoint_entry,
civitai_info
)
except Exception as e:
logger.error(f"Error fetching Civitai info for checkpoint hash {model_hash}: {e}")
if checkpoint_entry.get("baseModel"):
base_model_value = checkpoint_entry["baseModel"]
base_model_counts[base_model_value] = base_model_counts.get(base_model_value, 0) + 1
checkpoint = checkpoint_entry
# Set base_model to the most common one from civitai_info
base_model = None
if base_model_counts:
# Set base_model to the most common one from civitai_info or checkpoint
base_model = checkpoint["baseModel"] if checkpoint and checkpoint.get("baseModel") else None
if not base_model and base_model_counts:
base_model = max(base_model_counts.items(), key=lambda x: x[1])[0]
# Extract generation parameters for recipe metadata
@@ -170,7 +210,8 @@ class MetaFormatParser(RecipeMetadataParser):
'loras': loras,
'gen_params': gen_params,
'raw_metadata': metadata,
'from_meta_format': True
'from_meta_format': True,
**({'checkpoint': checkpoint, 'model': checkpoint} if checkpoint else {})
}
except Exception as e:

View File

@@ -94,8 +94,45 @@ class RecipeFormatParser(RecipeMetadataParser):
lora_entry['thumbnailUrl'] = '/loras_static/images/no-preview.png'
loras.append(lora_entry)
logger.info(f"Found {len(loras)} loras in recipe metadata")
# Process checkpoint information if present
checkpoint = None
checkpoint_data = recipe_metadata.get('checkpoint') or {}
if isinstance(checkpoint_data, dict) and checkpoint_data:
version_id = checkpoint_data.get('modelVersionId') or checkpoint_data.get('id')
checkpoint_entry = {
'id': version_id or 0,
'modelId': checkpoint_data.get('modelId', 0),
'name': checkpoint_data.get('name', 'Unknown Checkpoint'),
'version': checkpoint_data.get('version', ''),
'type': checkpoint_data.get('type', 'checkpoint'),
'hash': checkpoint_data.get('hash', ''),
'existsLocally': False,
'localPath': None,
'file_name': checkpoint_data.get('file_name', ''),
'thumbnailUrl': '/loras_static/images/no-preview.png',
'baseModel': '',
'size': 0,
'downloadUrl': '',
'isDeleted': False
}
if metadata_provider:
try:
civitai_info = None
if version_id:
civitai_info = await metadata_provider.get_model_version_info(str(version_id))
elif checkpoint_entry.get('hash'):
civitai_info = await metadata_provider.get_model_by_hash(checkpoint_entry['hash'])
if civitai_info:
checkpoint_entry = await self.populate_checkpoint_from_civitai(checkpoint_entry, civitai_info)
except Exception as e:
logger.error(f"Error fetching Civitai info for checkpoint in recipe metadata: {e}")
checkpoint = checkpoint_entry
# Filter gen_params to only include recognized keys
filtered_gen_params = {}
@@ -105,12 +142,13 @@ class RecipeFormatParser(RecipeMetadataParser):
filtered_gen_params[key] = value
return {
'base_model': recipe_metadata.get('base_model', ''),
'base_model': checkpoint['baseModel'] if checkpoint and checkpoint.get('baseModel') else recipe_metadata.get('base_model', ''),
'loras': loras,
'gen_params': filtered_gen_params,
'tags': recipe_metadata.get('tags', []),
'title': recipe_metadata.get('title', ''),
'from_recipe_metadata': True
'from_recipe_metadata': True,
**({'checkpoint': checkpoint, 'model': checkpoint} if checkpoint else {})
}
except Exception as e:

View File

@@ -83,6 +83,7 @@ export class ImageProcessor {
}
this.importManager.recipeData = recipeData;
this._ensureCheckpointMetadata();
// Check if we have an error message
if (this.importManager.recipeData.error) {
@@ -134,6 +135,7 @@ export class ImageProcessor {
}
this.importManager.recipeData = recipeData;
this._ensureCheckpointMetadata();
// Check if we have an error message
if (this.importManager.recipeData.error) {
@@ -188,6 +190,7 @@ export class ImageProcessor {
}
this.importManager.recipeData = recipeData;
this._ensureCheckpointMetadata();
// Check if we have an error message
if (this.importManager.recipeData.error) {
@@ -215,4 +218,12 @@ export class ImageProcessor {
this.importManager.loadingManager.hide();
}
}
_ensureCheckpointMetadata() {
if (!this.importManager.recipeData) return;
if (this.importManager.recipeData.model && !this.importManager.recipeData.checkpoint) {
this.importManager.recipeData.checkpoint = this.importManager.recipeData.model;
}
}
}

View File

@@ -0,0 +1,120 @@
import pytest
from py.recipes.parsers.automatic import AutomaticMetadataParser
@pytest.mark.asyncio
async def test_parse_metadata_extracts_checkpoint_from_civitai_resources(monkeypatch):
checkpoint_info = {
"id": 2442439,
"modelId": 123456,
"model": {"name": "Z Image", "type": "checkpoint"},
"name": "Turbo",
"images": [{"url": "https://image.civitai.com/checkpoints/original=true"}],
"baseModel": "sdxl",
"downloadUrl": "https://civitai.com/api/download/checkpoint",
"files": [
{
"type": "Model",
"primary": True,
"sizeKB": 2048,
"name": "Z_Image_Turbo.safetensors",
"hashes": {"SHA256": "ABC123FF"},
}
],
}
async def fake_metadata_provider():
class Provider:
async def get_model_version_info(self, version_id):
assert version_id == "2442439"
return checkpoint_info, None
return Provider()
monkeypatch.setattr(
"py.recipes.parsers.automatic.get_default_metadata_provider",
fake_metadata_provider,
)
parser = AutomaticMetadataParser()
metadata_text = (
"Negative space, fog, BLACK blue color GRADIENT BACKGROUND, a vintage car in the middle, "
"FOG, and a silhouetted figure near the car, in the style of the Blade Runner movie "
"Negative prompt: Steps: 23, Sampler: Undefined, CFG scale: 3.5, Seed: 1760020955, "
"Size: 832x1216, Clip skip: 2, Created Date: 2025-11-28T09:18:43.5269343Z, "
'Civitai resources: [{"type":"checkpoint","modelVersionId":2442439,"modelName":"Z Image","modelVersionName":"Turbo"}], '
"Civitai metadata: {}"
)
result = await parser.parse_metadata(metadata_text)
checkpoint = result.get("checkpoint")
assert checkpoint is not None
assert checkpoint["name"] == "Z Image"
assert checkpoint["version"] == "Turbo"
assert checkpoint["type"] == "checkpoint"
assert checkpoint["modelId"] == 123456
assert checkpoint["hash"] == "abc123ff"
assert checkpoint["file_name"] == "Z_Image_Turbo"
assert checkpoint["thumbnailUrl"].endswith("width=450,optimized=true")
assert result["model"] == checkpoint
assert result["base_model"] == "sdxl"
assert result["loras"] == []
@pytest.mark.asyncio
async def test_parse_metadata_extracts_checkpoint_from_model_hash(monkeypatch):
checkpoint_info = {
"id": 98765,
"modelId": 654321,
"model": {"name": "Flux Illustrious", "type": "checkpoint"},
"name": "v1",
"images": [{"url": "https://image.civitai.com/checkpoints/original=true"}],
"baseModel": "flux",
"downloadUrl": "https://civitai.com/api/download/checkpoint",
"files": [
{
"type": "Model",
"primary": True,
"sizeKB": 1024,
"name": "FluxIllustrious_v1.safetensors",
"hashes": {"SHA256": "C3688EE04C"},
}
],
}
async def fake_metadata_provider():
class Provider:
async def get_model_by_hash(self, model_hash):
assert model_hash == "c3688ee04c"
return checkpoint_info, None
return Provider()
monkeypatch.setattr(
"py.recipes.parsers.automatic.get_default_metadata_provider",
fake_metadata_provider,
)
parser = AutomaticMetadataParser()
metadata_text = (
"A cyberpunk portrait with neon highlights.\n"
"Negative prompt: low quality\n"
"Steps: 20, Sampler: Euler a, CFG scale: 7, Seed: 123456, Size: 832x1216, "
"Model hash: c3688ee04c, Model: models/waiNSFWIllustrious_v110.safetensors"
)
result = await parser.parse_metadata(metadata_text)
checkpoint = result.get("checkpoint")
assert checkpoint is not None
assert checkpoint["hash"] == "c3688ee04c"
assert checkpoint["name"] == "Flux Illustrious"
assert checkpoint["version"] == "v1"
assert checkpoint["file_name"] == "FluxIllustrious_v1"
assert result["model"] == checkpoint
assert result["base_model"] == "flux"
assert result["loras"] == []

View File

@@ -0,0 +1,61 @@
import pytest
from py.recipes.parsers.meta_format import MetaFormatParser
@pytest.mark.asyncio
async def test_meta_format_parser_extracts_checkpoint_from_model_hash(monkeypatch):
checkpoint_info = {
"id": 222333,
"modelId": 999888,
"model": {"name": "Fluxmania V5P", "type": "checkpoint"},
"name": "v5p",
"images": [{"url": "https://image.civitai.com/checkpoints/original=true"}],
"baseModel": "flux",
"downloadUrl": "https://civitai.com/api/download/checkpoint",
"files": [
{
"type": "Model",
"primary": True,
"sizeKB": 1024,
"name": "Fluxmania_V5P.safetensors",
"hashes": {"SHA256": "8AE0583B06"},
}
],
}
async def fake_metadata_provider():
class Provider:
async def get_model_by_hash(self, model_hash):
assert model_hash == "8ae0583b06"
return checkpoint_info, None
return Provider()
monkeypatch.setattr(
"py.recipes.parsers.meta_format.get_default_metadata_provider",
fake_metadata_provider,
)
parser = MetaFormatParser()
metadata_text = (
"Shimmering metal forms\n"
"Negative prompt: flat color\n"
"Steps: 25, Sampler: dpmpp_2m_sgm_uniform, Seed: 471889513588087, "
"Model: Fluxmania V5P.safetensors, Model hash: 8ae0583b06, VAE: ae.sft, "
"Lora_0 Model name: ArtVador I.safetensors, Lora_0 Model hash: 08f7133a58, "
"Lora_0 Strength model: 0.65, Lora_0 Strength clip: 0.65"
)
result = await parser.parse_metadata(metadata_text)
checkpoint = result.get("checkpoint")
assert checkpoint is not None
assert checkpoint["hash"] == "8ae0583b06"
assert checkpoint["name"] == "Fluxmania V5P"
assert checkpoint["version"] == "v5p"
assert checkpoint["file_name"] == "Fluxmania_V5P"
assert result["model"] == checkpoint
assert result["base_model"] == "flux"
assert len(result["loras"]) == 1

View File

@@ -0,0 +1,67 @@
import json
import pytest
from py.recipes.parsers.recipe_format import RecipeFormatParser
@pytest.mark.asyncio
async def test_recipe_format_parser_populates_checkpoint(monkeypatch):
checkpoint_info = {
"id": 777111,
"modelId": 333222,
"model": {"name": "Z Image", "type": "checkpoint"},
"name": "Turbo",
"images": [{"url": "https://image.civitai.com/checkpoints/original=true"}],
"baseModel": "sdxl",
"downloadUrl": "https://civitai.com/api/download/checkpoint",
"files": [
{
"type": "Model",
"primary": True,
"sizeKB": 2048,
"name": "Z_Image_Turbo.safetensors",
"hashes": {"SHA256": "ABC123FF"},
}
],
}
async def fake_metadata_provider():
class Provider:
async def get_model_version_info(self, version_id):
assert version_id == "777111"
return checkpoint_info, None
return Provider()
monkeypatch.setattr(
"py.recipes.parsers.recipe_format.get_default_metadata_provider",
fake_metadata_provider,
)
parser = RecipeFormatParser()
recipe_metadata = {
"title": "Z Recipe",
"base_model": "",
"loras": [],
"gen_params": {"steps": 20},
"tags": ["test"],
"checkpoint": {
"modelVersionId": 777111,
"modelId": 333222,
"name": "Z Image",
"version": "Turbo",
},
}
metadata_text = f"Recipe metadata: {json.dumps(recipe_metadata)}"
result = await parser.parse_metadata(metadata_text)
checkpoint = result.get("checkpoint")
assert checkpoint is not None
assert checkpoint["name"] == "Z Image"
assert checkpoint["version"] == "Turbo"
assert checkpoint["hash"] == "abc123ff"
assert checkpoint["file_name"] == "Z_Image_Turbo"
assert result["base_model"] == "sdxl"
assert result["model"] == checkpoint