mirror of
https://github.com/willmiao/ComfyUI-Lora-Manager.git
synced 2026-07-05 17:01:16 -03:00
- Rename py/agent_cli/ -> py/metadata_ops/ (module was never agent-related) - Rename tests/agent_cli/ -> tests/metadata_ops/ - Remove 9 low-value/debug INFO log points across agent_handlers.py, agent_service.py, llm_service.py, and metadata_ops/__init__.py - Keep LLM raw response at DEBUG level for diagnostics - Consolidate per-model progress + LLM result into single concise log line with basename instead of full path - Update package/class/method docstrings to clarify this is a pipeline infrastructure, not a true agent loop
337 lines
14 KiB
Python
337 lines
14 KiB
Python
"""Post-processing engine for skill pipeline outputs.
|
|
|
|
The :class:`PostProcessor` takes the LLM's structured JSON output and applies
|
|
it to a model's on-disk metadata via the :mod:`~py.metadata_ops` functions.
|
|
|
|
It handles all the skill-specific business logic — conditions, transformations,
|
|
and orchestration of multiple side-effects (write metadata, download preview,
|
|
refresh cache). All actual I/O is delegated to :mod:`~py.metadata_ops`.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
import logging
|
|
import os
|
|
import re
|
|
from datetime import datetime, timezone
|
|
from typing import Any, Dict, List, Optional
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class PostProcessor:
|
|
"""Deterministic post-processor for skill pipeline outputs.
|
|
|
|
Usage (called by :class:`~py.services.agent.agent_service.AgentService`)::
|
|
|
|
processor = PostProcessor()
|
|
result = await processor.process(
|
|
skill_name="enrich_hf_metadata",
|
|
model_path="/path/to/model.safetensors",
|
|
llm_output={...},
|
|
metadata={...}, # from metadata_ops.read_metadata()
|
|
)
|
|
"""
|
|
|
|
async def process(
|
|
self,
|
|
*,
|
|
skill_name: str,
|
|
model_path: str,
|
|
llm_output: Dict[str, Any],
|
|
metadata: Dict[str, Any],
|
|
readme_content: str = "",
|
|
) -> Dict[str, Any]:
|
|
"""Route *llm_output* to the correct skill post-processor.
|
|
|
|
*readme_content* is optional raw markdown content (e.g. HF README)
|
|
that is converted to HTML and stored as ``modelDescription`` for
|
|
the description tab.
|
|
|
|
Returns a dict with keys ``success`` (bool), ``updated_fields`` (list),
|
|
``preview_downloaded`` (bool), and ``errors`` (list).
|
|
"""
|
|
if skill_name == "enrich_hf_metadata":
|
|
return await self._process_enrich_hf_metadata(
|
|
model_path, llm_output, metadata, readme_content,
|
|
)
|
|
return {
|
|
"success": False,
|
|
"updated_fields": [],
|
|
"errors": [f"No post-processor registered for skill: {skill_name}"],
|
|
}
|
|
|
|
# ------------------------------------------------------------------
|
|
# enrich_hf_metadata
|
|
# ------------------------------------------------------------------
|
|
|
|
async def _process_enrich_hf_metadata(
|
|
self,
|
|
model_path: str,
|
|
llm_output: Dict[str, Any],
|
|
metadata: Dict[str, Any],
|
|
readme_content: str = "",
|
|
) -> Dict[str, Any]:
|
|
from ...metadata_ops import (
|
|
apply_metadata_updates,
|
|
download_preview,
|
|
refresh_cache,
|
|
)
|
|
from .skills.enrich_hf_metadata.readme_processor import (
|
|
convert_readme_to_html,
|
|
extract_gallery_images,
|
|
extract_gallery_table_images,
|
|
extract_relevant_section,
|
|
extract_simple_markdown_images,
|
|
extract_html_img_tags,
|
|
extract_repo_from_hf_url,
|
|
)
|
|
|
|
updated_fields: List[str] = []
|
|
preview_downloaded = False
|
|
|
|
# -- Determine whether this is an HF-sourced model -----------------
|
|
is_hf_model = not metadata.get("from_civitai", True)
|
|
|
|
# -- Collect updates -----------------------------------------------
|
|
updates: Dict[str, Any] = {}
|
|
|
|
# base_model
|
|
new_base = (llm_output.get("base_model") or "").strip()
|
|
current_base = metadata.get("base_model", "") or ""
|
|
if new_base and self._should_overwrite(current_base, is_hf_model):
|
|
updates["base_model"] = new_base
|
|
|
|
# trigger words → civitai.trainedWords
|
|
new_triggers = llm_output.get("trigger_words", [])
|
|
trigger_words_empty = True
|
|
if isinstance(new_triggers, list):
|
|
cleaned = [t.strip() for t in new_triggers if t.strip()]
|
|
cleaned = [t for t in cleaned if t.lower() not in ("none", "null", "n/a")]
|
|
trigger_words_empty = not cleaned
|
|
current_civitai = metadata.get("civitai") or {}
|
|
current_triggers = current_civitai.get("trainedWords") or []
|
|
if self._should_overwrite_list(current_triggers, is_hf_model):
|
|
trig_civitai = dict(current_civitai)
|
|
if "civitai" in updates and isinstance(updates["civitai"], dict):
|
|
trig_civitai.update(updates["civitai"])
|
|
trig_civitai["trainedWords"] = cleaned
|
|
updates["civitai"] = trig_civitai
|
|
|
|
# modelDescription — from raw README content (converted to HTML)
|
|
if readme_content and is_hf_model:
|
|
converted = convert_readme_to_html(readme_content)
|
|
if converted:
|
|
updates["modelDescription"] = converted
|
|
|
|
# short_description → civitai.description (for "About this version")
|
|
short_desc = (llm_output.get("short_description") or "").strip()
|
|
if short_desc and is_hf_model:
|
|
current_civitai = metadata.get("civitai") or {}
|
|
desc_civitai = dict(current_civitai)
|
|
if "civitai" in updates and isinstance(updates["civitai"], dict):
|
|
desc_civitai.update(updates["civitai"])
|
|
desc_civitai["description"] = short_desc
|
|
updates["civitai"] = desc_civitai
|
|
|
|
# gallery images → civitai.images (from YAML frontmatter widget entries
|
|
# and Sample Gallery markdown tables in the README body)
|
|
gallery_images: List[Dict[str, Any]] = []
|
|
if readme_content and is_hf_model:
|
|
hf_url = metadata.get("hf_url", "") or ""
|
|
repo = extract_repo_from_hf_url(hf_url)
|
|
if repo:
|
|
rec_w = llm_output.get("recommended_width") or 0
|
|
rec_h = llm_output.get("recommended_height") or 0
|
|
|
|
# 1. Widget images (YAML frontmatter)
|
|
gallery = extract_gallery_images(
|
|
readme_content, repo,
|
|
default_width=rec_w, default_height=rec_h,
|
|
)
|
|
|
|
# 2. Sample Gallery table images (markdown body), deduplicated
|
|
existing_urls = {img["url"] for img in gallery if img.get("url")}
|
|
table_images = extract_gallery_table_images(
|
|
readme_content, repo,
|
|
existing_urls=existing_urls,
|
|
default_width=rec_w, default_height=rec_h,
|
|
)
|
|
existing_urls.update(img["url"] for img in table_images if img.get("url"))
|
|
|
|
# 3. Simple markdown images `` in the body
|
|
simple_images = extract_simple_markdown_images(
|
|
readme_content, repo,
|
|
existing_urls=existing_urls,
|
|
default_width=rec_w, default_height=rec_h,
|
|
)
|
|
existing_urls.update(img["url"] for img in simple_images if img.get("url"))
|
|
|
|
# 4. HTML `<img>` tags (used by many collection repos)
|
|
html_images = extract_html_img_tags(
|
|
readme_content, repo,
|
|
existing_urls=existing_urls,
|
|
default_width=rec_w, default_height=rec_h,
|
|
)
|
|
|
|
all_images = gallery + table_images + simple_images + html_images
|
|
if all_images:
|
|
gallery_images = all_images
|
|
current_civitai = metadata.get("civitai") or {}
|
|
gallery_civitai = dict(current_civitai)
|
|
if "civitai" in updates and isinstance(updates["civitai"], dict):
|
|
gallery_civitai.update(updates["civitai"])
|
|
gallery_civitai["images"] = all_images
|
|
updates["civitai"] = gallery_civitai
|
|
|
|
# tags
|
|
new_tags = llm_output.get("tags", [])
|
|
if isinstance(new_tags, list) and new_tags:
|
|
existing_tags = metadata.get("tags") or []
|
|
merged = self._merge_tags(existing_tags, new_tags)
|
|
if len(merged) > len(existing_tags) or is_hf_model:
|
|
updates["tags"] = merged
|
|
|
|
# metadata_source & llm_enriched_at (always set)
|
|
updates["metadata_source"] = "agent:enrich_hf_metadata"
|
|
updates["llm_enriched_at"] = datetime.now(timezone.utc).isoformat()
|
|
|
|
# Store LLM confidence in metadata so it's accessible for evaluation
|
|
raw_confidence = (llm_output.get("confidence") or "").strip()
|
|
if raw_confidence:
|
|
updates["_llm_confidence"] = raw_confidence
|
|
|
|
# Fallback: extract instance_prompt from YAML frontmatter when the LLM
|
|
# returned empty trigger words but the README has instance_prompt.
|
|
if trigger_words_empty:
|
|
instance_prompt = _extract_yaml_instance_prompt(readme_content)
|
|
if instance_prompt:
|
|
current_civitai = metadata.get("civitai") or {}
|
|
trig_civitai = dict(current_civitai)
|
|
if "civitai" in updates and isinstance(updates["civitai"], dict):
|
|
trig_civitai.update(updates["civitai"])
|
|
trig_civitai["trainedWords"] = [instance_prompt]
|
|
updates["civitai"] = trig_civitai
|
|
|
|
preview_remote_url = (llm_output.get("preview_url") or "").strip()
|
|
# Fallback: if the LLM couldn't find a preview image in the cleaned
|
|
# README, find the first gallery image from the *model-specific
|
|
# section* of the README (not the repo-wide first image, which
|
|
# belongs to a different model in collection repos).
|
|
if not preview_remote_url and readme_content and is_hf_model:
|
|
model_basename = os.path.splitext(os.path.basename(model_path))[0]
|
|
relevant_section = extract_relevant_section(
|
|
readme_content, model_basename,
|
|
)
|
|
if relevant_section and relevant_section != readme_content:
|
|
for img in gallery_images:
|
|
img_url = img.get("url", "")
|
|
if img_url and img_url in relevant_section:
|
|
preview_remote_url = img_url
|
|
break
|
|
# Last resort: use the first gallery image from the full README.
|
|
if not preview_remote_url and gallery_images:
|
|
preview_remote_url = gallery_images[0].get("url", "")
|
|
current_preview = metadata.get("preview_url") or ""
|
|
if preview_remote_url and not (current_preview and os.path.exists(current_preview)):
|
|
local_path = await download_preview(model_path, preview_remote_url)
|
|
if local_path:
|
|
preview_downloaded = True
|
|
updates["preview_url"] = local_path
|
|
|
|
# notes — plain-text summary of usage info from the LLM
|
|
new_notes = (llm_output.get("notes") or "").strip()
|
|
if new_notes:
|
|
updates["notes"] = new_notes
|
|
|
|
# usage_tips — JSON string (e.g. {"strength_min":0.85,"strength_max":1.4})
|
|
raw_tips = (llm_output.get("usage_tips") or "").strip()
|
|
if raw_tips and raw_tips != "{}":
|
|
try:
|
|
json.loads(raw_tips)
|
|
updates["usage_tips"] = raw_tips
|
|
except (json.JSONDecodeError, TypeError):
|
|
logger.warning(
|
|
"LLM returned invalid usage_tips JSON: %s", raw_tips[:200]
|
|
)
|
|
|
|
if updates:
|
|
updated_fields = await apply_metadata_updates(model_path, updates)
|
|
|
|
# -- Refresh scanner cache ------------------------------------------
|
|
if updated_fields or preview_downloaded:
|
|
await refresh_cache(model_path)
|
|
|
|
return {
|
|
"success": True,
|
|
"updated_fields": updated_fields,
|
|
"preview_downloaded": preview_downloaded,
|
|
"updates": updates,
|
|
"errors": [],
|
|
}
|
|
|
|
# ------------------------------------------------------------------
|
|
# Helpers
|
|
# ------------------------------------------------------------------
|
|
|
|
@staticmethod
|
|
def _should_overwrite(current_value: str, is_hf_model: bool) -> bool:
|
|
"""Return ``True`` when a scalar field should be overwritten."""
|
|
return is_hf_model or not current_value or current_value.lower() in (
|
|
"", "unknown",
|
|
)
|
|
|
|
@staticmethod
|
|
def _should_overwrite_list(current_list: List[str], is_hf_model: bool) -> bool:
|
|
"""Return ``True`` when a list field should be overwritten."""
|
|
return is_hf_model or not current_list
|
|
|
|
@staticmethod
|
|
def _merge_tags(existing: List[str], new: List[str]) -> List[str]:
|
|
"""Merge *new* tags into *existing*, all lowercased.
|
|
|
|
This matches the behaviour of :class:`TagUpdateService` which
|
|
normalises every tag to lowercase for case-insensitive dedup.
|
|
"""
|
|
merged: List[str] = []
|
|
seen: set = set()
|
|
for tag in list(existing) + list(new):
|
|
t = tag.strip().lower()
|
|
if t and t not in seen:
|
|
merged.append(t)
|
|
seen.add(t)
|
|
return merged
|
|
|
|
|
|
# ------------------------------------------------------------------
|
|
# Module-level helpers
|
|
# ------------------------------------------------------------------
|
|
|
|
|
|
def _extract_yaml_instance_prompt(readme_content: str) -> str:
|
|
"""Extract ``instance_prompt`` from the YAML frontmatter of a HF README.
|
|
|
|
Returns the prompt text, or empty string if not found. Handles
|
|
``null`` / ``~`` YAML null values by returning empty string.
|
|
"""
|
|
if not readme_content or not readme_content.startswith("---"):
|
|
return ""
|
|
|
|
# Find end of frontmatter
|
|
end = readme_content.find("---", 3)
|
|
if end == -1:
|
|
return ""
|
|
frontmatter = readme_content[3:end]
|
|
|
|
for line in frontmatter.split("\n"):
|
|
line = line.strip()
|
|
m = re.match(r"^instance_prompt:\s*(.*)", line)
|
|
if m:
|
|
val = m.group(1).strip().strip('"').strip("'")
|
|
if val.lower() in ("null", "~", "none", ""):
|
|
return ""
|
|
return val
|
|
|
|
return ""
|