feat(agent): extract HF widget gallery images into civitai.images with recommended dimensions

- Add extract_gallery_images() to parse YAML widget entries from README
  frontmatter, convert relative image URLs to absolute HF URLs, and
  build civitai.images-compatible entries with prompt metadata
- LLM now extracts recommended_width/recommended_height from README
  (e.g. "Best Dimensions"), used as gallery image dimensions
- extract_gallery_images() accepts default_width/height parameters,
  falling back to 512x512 when LLM provides no recommendation
- Frontend ShowcaseView.js: defensive NaN guard for 0 width/height
- post_processor: consistently merge civitai updates across triggers,
  description, and gallery blocks with distinct variable names
- SKILL.md: add recommended_width/recommended_height to output schema
- 62 tests pass, including gallery extraction and dimension tests
This commit is contained in:
Will Miao
2026-07-03 07:07:19 +08:00
parent 88349bf944
commit ee8250c26c
6 changed files with 314 additions and 9 deletions

View File

@@ -76,7 +76,11 @@ class PostProcessor:
download_preview,
refresh_cache,
)
from .skills.enrich_hf_metadata.md_to_html import convert_readme_to_html
from .skills.enrich_hf_metadata.md_to_html import (
convert_readme_to_html,
extract_gallery_images,
extract_repo_from_hf_url,
)
updated_fields: List[str] = []
preview_downloaded = False
@@ -93,6 +97,7 @@ class PostProcessor:
if new_base and self._should_overwrite(current_base, is_hf_model):
updates["base_model"] = new_base
# trigger words → civitai.trainedWords
new_triggers = llm_output.get("trigger_words", [])
if isinstance(new_triggers, list):
cleaned = [t.strip() for t in new_triggers if t.strip()]
@@ -100,9 +105,11 @@ class PostProcessor:
current_civitai = metadata.get("civitai") or {}
current_triggers = current_civitai.get("trainedWords") or []
if self._should_overwrite_list(current_triggers, is_hf_model):
civitai_updates = dict(current_civitai)
civitai_updates["trainedWords"] = cleaned
updates["civitai"] = civitai_updates
trig_civitai = dict(current_civitai)
if "civitai" in updates and isinstance(updates["civitai"], dict):
trig_civitai.update(updates["civitai"])
trig_civitai["trainedWords"] = cleaned
updates["civitai"] = trig_civitai
# modelDescription — from raw README content (converted to HTML)
if readme_content and is_hf_model:
@@ -114,11 +121,30 @@ class PostProcessor:
short_desc = (llm_output.get("short_description") or "").strip()
if short_desc and is_hf_model:
current_civitai = metadata.get("civitai") or {}
civitai_updates = dict(current_civitai)
desc_civitai = dict(current_civitai)
if "civitai" in updates and isinstance(updates["civitai"], dict):
civitai_updates.update(updates["civitai"])
civitai_updates["description"] = short_desc
updates["civitai"] = civitai_updates
desc_civitai.update(updates["civitai"])
desc_civitai["description"] = short_desc
updates["civitai"] = desc_civitai
# gallery images → civitai.images (from YAML frontmatter widget entries)
if readme_content and is_hf_model:
hf_url = metadata.get("hf_url", "") or ""
repo = extract_repo_from_hf_url(hf_url)
if repo:
rec_w = llm_output.get("recommended_width") or 0
rec_h = llm_output.get("recommended_height") or 0
gallery = extract_gallery_images(
readme_content, repo,
default_width=rec_w, default_height=rec_h,
)
if gallery:
current_civitai = metadata.get("civitai") or {}
gallery_civitai = dict(current_civitai)
if "civitai" in updates and isinstance(updates["civitai"], dict):
gallery_civitai.update(updates["civitai"])
gallery_civitai["images"] = gallery
updates["civitai"] = gallery_civitai
# tags
new_tags = llm_output.get("tags", [])

View File

@@ -78,6 +78,9 @@ Sources to consider:
Return empty array if no meaningful content tags remain after filtering.
### recommended_width, recommended_height
The recommended image generation resolution for this model, in pixels. Look for sections like "Best Dimensions", "Recommended size", "Suggested resolution", or similar phrasing in the README. Prefer the explicitly marked "Best" or default resolution. If the table/list has multiple entries (e.g. "768 x 1024 (Best)" and "1024 x 1024 (Default)"), use the one marked "Best". Return integers. If no resolution can be determined, return 0 for both.
### preview_url
The URL of the most suitable preview image from the README. Look for image tags (e.g. `![alt](url)`) and the YAML frontmatter `widget:` section (which often has `output.url` fields). Choose the first image that appears to be a generation example (not a logo or diagram). Construct the absolute URL as `https://huggingface.co/{{repo}}/resolve/main/{filename}`. If no suitable image is found, return an empty string.
@@ -98,6 +101,8 @@ Return ONLY a JSON object with exactly these fields (no markdown fences, no extr
"trigger_words": ["<word1>", "<word2>"],
"short_description": "<1-2 sentence summary>",
"tags": ["<tag1>", "<tag2>"],
"recommended_width": 768,
"recommended_height": 1024,
"preview_url": "<image URL or empty string>",
"confidence": "<high|medium|low>"
}

View File

@@ -12,6 +12,120 @@ import re
from typing import List, Tuple
_REPO_URL_PATTERN = re.compile(r"https?://huggingface\.co/([^/]+/[^/]+)")
def extract_repo_from_hf_url(hf_url: str) -> str:
"""Extract ``user/repo`` from a HuggingFace URL."""
m = _REPO_URL_PATTERN.match(hf_url)
return m.group(1) if m else ""
def extract_gallery_images(
markdown_text: str,
repo: str,
default_width: int = 512,
default_height: int = 512,
) -> List[dict]:
"""Extract widget/gallery images from the YAML frontmatter of a HF README.
Args:
markdown_text: Raw README content.
repo: HF repo identifier (``user/repo``).
default_width: Fallback width when the README provides no dimension.
default_height: Fallback height when the README provides no dimension.
Returns a list of dicts compatible with the ``civitai.images`` metadata
format, each containing ``url`` (absolute HF URL), ``meta.prompt``,
``width``, ``height``, and ``type``. Returns an empty list when no
widget entries are found or when *repo* is empty.
"""
if not markdown_text or not repo:
return []
frontmatter = _extract_frontmatter(markdown_text)
if not frontmatter:
return []
images: List[dict] = []
base_url = f"https://huggingface.co/{repo}/resolve/main"
w = default_width or 512
h = default_height or 512
# Find the `widget:` section
widget_match = re.search(r"^widget:\s*$", frontmatter, re.MULTILINE)
if not widget_match:
return images
# Split entries starting with `- text:`
entries = re.split(r"\n- text:", frontmatter[widget_match.end():])
for entry in entries:
if not entry.strip():
continue
entry = entry.strip()
# Extract text (prompt)
text = ""
# Quoted inline: `"some prompt"`
qm = re.match(r'^"((?:[^"\\]|\\.)*)"', entry)
if qm:
text = qm.group(1)
else:
# Multi-line YAML scalar: `>-\n line1\n line2`
mm = re.match(r"^>(?:-\s*)?\n((?:.+(?:\n|$))+)", entry, re.MULTILINE)
if mm:
raw = mm.group(1)
# Take lines until a line starts with a YAML key (word + colon)
text_lines: list[str] = []
for line in raw.split("\n"):
if re.match(r"^\s*\w+:", line):
break
text_lines.append(line)
text = " ".join(
line.strip() for line in text_lines if line.strip()
)
# Extract output.url
url = ""
url_match = re.search(
r"^\s*output:\s*\n\s+url:\s*(.+?)\s*$", entry, re.MULTILINE
)
if url_match:
raw_path = url_match.group(1).strip().strip("'\"")
if raw_path and not raw_path.startswith("http"):
url = f"{base_url}/{raw_path.lstrip('/')}"
elif raw_path.startswith("http"):
url = raw_path
if url:
image: dict = {
"url": url,
"type": "image",
"nsfwLevel": 0,
"width": w,
"height": h,
"meta": {"prompt": text, "negativePrompt": ""},
"hasMeta": bool(text),
"hasPositivePrompt": bool(text),
}
images.append(image)
return images
def _extract_frontmatter(text: str) -> str:
"""Return the YAML frontmatter content (without the ``---`` delimiters).
Returns empty string when no frontmatter is found.
"""
if text.startswith("---"):
idx = text.find("---", 3)
if idx != -1:
return text[3:idx]
return ""
def convert_readme_to_html(markdown_text: str | None) -> str:
"""Convert HF README markdown to sanitised HTML."""
if not markdown_text:

View File

@@ -174,7 +174,10 @@ function renderMediaItem(img, index, exampleFiles) {
const localUrl = localFile ? localFile.path : '';
// Calculate appropriate aspect ratio
const aspectRatio = (img.height / img.width) * 100;
// Defensive fallback: 0 width/height → 4:3 default (prevents NaN layout)
const safeW = img.width || 4;
const safeH = img.height || 3;
const aspectRatio = (safeH / safeW) * 100;
const containerWidth = 800; // modal content maximum width
const minHeightPercent = 40;
const maxHeightPercent = (window.innerHeight * 0.6 / containerWidth) * 100;

View File

@@ -469,3 +469,100 @@ class TestConvertReadmeToHtml:
result = convert_readme_to_html(md)
assert "<pre>" not in result
assert "<h2>heading after spacing</h2>" in result
# ======================================================================
# extract_gallery_images — YAML widget → civitai.images
# ======================================================================
class TestExtractGalleryImages:
_REPO = "prithivMLmods/Flux-Long-Toon-LoRA"
_README = """---
tags:
- lora
widget:
- text: "a cat"
output:
url: images/cat.png
- text: >-
multi line
prompt here
output:
url: images/dog.png
base_model: flux
---
# Content after frontmatter
"""
def test_extracts_widget_images(self):
from py.services.agent.skills.enrich_hf_metadata.md_to_html import \
extract_gallery_images
images = extract_gallery_images(self._README, self._REPO)
assert len(images) == 2
assert images[0]["url"] == (
"https://huggingface.co/prithivMLmods/Flux-Long-Toon-LoRA"
"/resolve/main/images/cat.png"
)
assert images[0]["meta"]["prompt"] == "a cat"
assert images[0]["type"] == "image"
assert images[0]["hasMeta"] is True
assert images[0]["hasPositivePrompt"] is True
assert images[1]["url"] == (
"https://huggingface.co/prithivMLmods/Flux-Long-Toon-LoRA"
"/resolve/main/images/dog.png"
)
assert images[1]["meta"]["prompt"] == "multi line prompt here"
def test_default_dimensions_used(self):
from py.services.agent.skills.enrich_hf_metadata.md_to_html import \
extract_gallery_images
images = extract_gallery_images(self._README, self._REPO)
assert images[0]["width"] == 512
assert images[0]["height"] == 512
def test_custom_dimensions_applied(self):
from py.services.agent.skills.enrich_hf_metadata.md_to_html import \
extract_gallery_images
images = extract_gallery_images(
self._README, self._REPO,
default_width=768, default_height=1024,
)
assert images[0]["width"] == 768
assert images[0]["height"] == 1024
def test_empty_readme_returns_empty(self):
from py.services.agent.skills.enrich_hf_metadata.md_to_html import \
extract_gallery_images
assert extract_gallery_images("", self._REPO) == []
assert extract_gallery_images("no frontmatter here", self._REPO) == []
def test_empty_repo_returns_empty(self):
from py.services.agent.skills.enrich_hf_metadata.md_to_html import \
extract_gallery_images
assert extract_gallery_images(self._README, "") == []
def test_no_widget_returns_empty(self):
from py.services.agent.skills.enrich_hf_metadata.md_to_html import \
extract_gallery_images
md = "---\ntags:\n - lora\n---\n\nContent"
assert extract_gallery_images(md, self._REPO) == []
def test_extract_repo_from_hf_url(self):
from py.services.agent.skills.enrich_hf_metadata.md_to_html import \
extract_repo_from_hf_url
assert extract_repo_from_hf_url(
"https://huggingface.co/prithivMLmods/Flux-Long-Toon-LoRA"
) == "prithivMLmods/Flux-Long-Toon-LoRA"
assert extract_repo_from_hf_url("") == ""
assert extract_repo_from_hf_url("not a url") == ""

View File

@@ -69,6 +69,8 @@ class TestEnrichHfMetadata:
"trigger_words": [],
"short_description": "",
"tags": [],
"recommended_width": 0,
"recommended_height": 0,
"preview_url": "",
"confidence": "low",
}
@@ -244,6 +246,64 @@ class TestEnrichHfMetadata:
applied = mock_apply.call_args[0][1]
assert "modelDescription" not in applied
# -- gallery images → civitai.images ---------------------------------
@pytest.mark.asyncio
async def test_gallery_images_extracted_from_readme(self, processor):
"""Widget entries in README → civitai.images."""
readme = """---
widget:
- text: "a cat"
output:
url: images/cat.png
---
Content
"""
with (
mock.patch("py.agent_cli.apply_metadata_updates") as mock_apply,
mock.patch("py.agent_cli.download_preview", return_value=None),
mock.patch("py.agent_cli.refresh_cache"),
):
await processor.process(
skill_name="enrich_hf_metadata",
model_path="/p.safetensors",
llm_output=self.MIN_LLM_OUTPUT,
metadata={
"from_civitai": False,
"hf_url": "https://huggingface.co/user/repo",
},
readme_content=readme,
)
applied = mock_apply.call_args[0][1]
images = applied.get("civitai", {}).get("images", [])
assert len(images) == 1
assert images[0]["url"] == (
"https://huggingface.co/user/repo/resolve/main/images/cat.png"
)
assert images[0]["meta"]["prompt"] == "a cat"
@pytest.mark.asyncio
async def test_gallery_images_skipped_for_civitai_model(self, processor):
"""Gallery images NOT extracted for CivitAI models."""
with (
mock.patch("py.agent_cli.apply_metadata_updates") as mock_apply,
mock.patch("py.agent_cli.download_preview", return_value=None),
mock.patch("py.agent_cli.refresh_cache"),
):
await processor.process(
skill_name="enrich_hf_metadata",
model_path="/p.safetensors",
llm_output=self.MIN_LLM_OUTPUT,
metadata={
"from_civitai": True,
"hf_url": "https://huggingface.co/user/repo",
},
readme_content="---\nwidget:\n- text: a\n output:\n url: x.png\n---\n",
)
applied = mock_apply.call_args[0][1]
civitai = applied.get("civitai", {})
assert "images" not in civitai
# -- tags ------------------------------------------------------------
@pytest.mark.asyncio