feat(agent): extract HF widget gallery images into civitai.images with recommended dimensions

- Add extract_gallery_images() to parse YAML widget entries from README frontmatter, convert relative image URLs to absolute HF URLs, and build civitai.images-compatible entries with prompt metadata - LLM now extracts recommended_width/recommended_height from README (e.g. "Best Dimensions"), used as gallery image dimensions - extract_gallery_images() accepts default_width/height parameters, falling back to 512x512 when LLM provides no recommendation - Frontend ShowcaseView.js: defensive NaN guard for 0 width/height - post_processor: consistently merge civitai updates across triggers, description, and gallery blocks with distinct variable names - SKILL.md: add recommended_width/recommended_height to output schema - 62 tests pass, including gallery extraction and dimension tests
2026-07-05 17:01:16 -03:00 · 2026-07-03 07:07:19 +08:00
parent 88349bf944
commit ee8250c26c
6 changed files with 314 additions and 9 deletions
--- a/py/services/agent/post_processor.py
+++ b/py/services/agent/post_processor.py
@@ -76,7 +76,11 @@ class PostProcessor:
            download_preview,
            refresh_cache,
        )
-        from .skills.enrich_hf_metadata.md_to_html import convert_readme_to_html
+        from .skills.enrich_hf_metadata.md_to_html import (
+            convert_readme_to_html,
+            extract_gallery_images,
+            extract_repo_from_hf_url,
+        )

        updated_fields: List[str] = []
        preview_downloaded = False
@@ -93,6 +97,7 @@ class PostProcessor:
        if new_base and self._should_overwrite(current_base, is_hf_model):
            updates["base_model"] = new_base

+        # trigger words → civitai.trainedWords
        new_triggers = llm_output.get("trigger_words", [])
        if isinstance(new_triggers, list):
            cleaned = [t.strip() for t in new_triggers if t.strip()]
@@ -100,9 +105,11 @@ class PostProcessor:
            current_civitai = metadata.get("civitai") or {}
            current_triggers = current_civitai.get("trainedWords") or []
            if self._should_overwrite_list(current_triggers, is_hf_model):
-                civitai_updates = dict(current_civitai)
-                civitai_updates["trainedWords"] = cleaned
-                updates["civitai"] = civitai_updates
+                trig_civitai = dict(current_civitai)
+                if "civitai" in updates and isinstance(updates["civitai"], dict):
+                    trig_civitai.update(updates["civitai"])
+                trig_civitai["trainedWords"] = cleaned
+                updates["civitai"] = trig_civitai

        # modelDescription — from raw README content (converted to HTML)
        if readme_content and is_hf_model:
@@ -114,11 +121,30 @@ class PostProcessor:
        short_desc = (llm_output.get("short_description") or "").strip()
        if short_desc and is_hf_model:
            current_civitai = metadata.get("civitai") or {}
-            civitai_updates = dict(current_civitai)
+            desc_civitai = dict(current_civitai)
            if "civitai" in updates and isinstance(updates["civitai"], dict):
-                civitai_updates.update(updates["civitai"])
-            civitai_updates["description"] = short_desc
-            updates["civitai"] = civitai_updates
+                desc_civitai.update(updates["civitai"])
+            desc_civitai["description"] = short_desc
+            updates["civitai"] = desc_civitai
+
+        # gallery images → civitai.images (from YAML frontmatter widget entries)
+        if readme_content and is_hf_model:
+            hf_url = metadata.get("hf_url", "") or ""
+            repo = extract_repo_from_hf_url(hf_url)
+            if repo:
+                rec_w = llm_output.get("recommended_width") or 0
+                rec_h = llm_output.get("recommended_height") or 0
+                gallery = extract_gallery_images(
+                    readme_content, repo,
+                    default_width=rec_w, default_height=rec_h,
+                )
+                if gallery:
+                    current_civitai = metadata.get("civitai") or {}
+                    gallery_civitai = dict(current_civitai)
+                    if "civitai" in updates and isinstance(updates["civitai"], dict):
+                        gallery_civitai.update(updates["civitai"])
+                    gallery_civitai["images"] = gallery
+                    updates["civitai"] = gallery_civitai

        # tags
        new_tags = llm_output.get("tags", [])
--- a/py/services/agent/skills/enrich_hf_metadata/SKILL.md
+++ b/py/services/agent/skills/enrich_hf_metadata/SKILL.md
@@ -78,6 +78,9 @@ Sources to consider:

 Return empty array if no meaningful content tags remain after filtering.

+### recommended_width, recommended_height
+The recommended image generation resolution for this model, in pixels. Look for sections like "Best Dimensions", "Recommended size", "Suggested resolution", or similar phrasing in the README. Prefer the explicitly marked "Best" or default resolution. If the table/list has multiple entries (e.g. "768 x 1024 (Best)" and "1024 x 1024 (Default)"), use the one marked "Best". Return integers. If no resolution can be determined, return 0 for both.
+
 ### preview_url
 The URL of the most suitable preview image from the README. Look for image tags (e.g. `![alt](url)`) and the YAML frontmatter `widget:` section (which often has `output.url` fields). Choose the first image that appears to be a generation example (not a logo or diagram). Construct the absolute URL as `https://huggingface.co/{{repo}}/resolve/main/{filename}`. If no suitable image is found, return an empty string.

@@ -98,6 +101,8 @@ Return ONLY a JSON object with exactly these fields (no markdown fences, no extr
  "trigger_words": ["<word1>", "<word2>"],
  "short_description": "<1-2 sentence summary>",
  "tags": ["<tag1>", "<tag2>"],
+  "recommended_width": 768,
+  "recommended_height": 1024,
  "preview_url": "<image URL or empty string>",
  "confidence": "<high|medium|low>"
 }
--- a/py/services/agent/skills/enrich_hf_metadata/md_to_html.py
+++ b/py/services/agent/skills/enrich_hf_metadata/md_to_html.py
@@ -12,6 +12,120 @@ import re
 from typing import List, Tuple


+_REPO_URL_PATTERN = re.compile(r"https?://huggingface\.co/([^/]+/[^/]+)")
+
+
+def extract_repo_from_hf_url(hf_url: str) -> str:
+    """Extract ``user/repo`` from a HuggingFace URL."""
+    m = _REPO_URL_PATTERN.match(hf_url)
+    return m.group(1) if m else ""
+
+
+def extract_gallery_images(
+    markdown_text: str,
+    repo: str,
+    default_width: int = 512,
+    default_height: int = 512,
+) -> List[dict]:
+    """Extract widget/gallery images from the YAML frontmatter of a HF README.
+
+    Args:
+        markdown_text: Raw README content.
+        repo: HF repo identifier (``user/repo``).
+        default_width: Fallback width when the README provides no dimension.
+        default_height: Fallback height when the README provides no dimension.
+
+    Returns a list of dicts compatible with the ``civitai.images`` metadata
+    format, each containing ``url`` (absolute HF URL), ``meta.prompt``,
+    ``width``, ``height``, and ``type``.  Returns an empty list when no
+    widget entries are found or when *repo* is empty.
+    """
+    if not markdown_text or not repo:
+        return []
+
+    frontmatter = _extract_frontmatter(markdown_text)
+    if not frontmatter:
+        return []
+
+    images: List[dict] = []
+    base_url = f"https://huggingface.co/{repo}/resolve/main"
+    w = default_width or 512
+    h = default_height or 512
+
+    # Find the `widget:` section
+    widget_match = re.search(r"^widget:\s*$", frontmatter, re.MULTILINE)
+    if not widget_match:
+        return images
+
+    # Split entries starting with `- text:`
+    entries = re.split(r"\n- text:", frontmatter[widget_match.end():])
+    for entry in entries:
+        if not entry.strip():
+            continue
+
+        entry = entry.strip()
+
+        # Extract text (prompt)
+        text = ""
+        # Quoted inline: `"some prompt"`
+        qm = re.match(r'^"((?:[^"\\]|\\.)*)"', entry)
+        if qm:
+            text = qm.group(1)
+        else:
+            # Multi-line YAML scalar: `>-\n    line1\n    line2`
+            mm = re.match(r"^>(?:-\s*)?\n((?:.+(?:\n|$))+)", entry, re.MULTILINE)
+            if mm:
+                raw = mm.group(1)
+                # Take lines until a line starts with a YAML key (word + colon)
+                text_lines: list[str] = []
+                for line in raw.split("\n"):
+                    if re.match(r"^\s*\w+:", line):
+                        break
+                    text_lines.append(line)
+                text = " ".join(
+                    line.strip() for line in text_lines if line.strip()
+                )
+
+        # Extract output.url
+        url = ""
+        url_match = re.search(
+            r"^\s*output:\s*\n\s+url:\s*(.+?)\s*$", entry, re.MULTILINE
+        )
+        if url_match:
+            raw_path = url_match.group(1).strip().strip("'\"")
+            if raw_path and not raw_path.startswith("http"):
+                url = f"{base_url}/{raw_path.lstrip('/')}"
+            elif raw_path.startswith("http"):
+                url = raw_path
+
+        if url:
+            image: dict = {
+                "url": url,
+                "type": "image",
+                "nsfwLevel": 0,
+                "width": w,
+                "height": h,
+                "meta": {"prompt": text, "negativePrompt": ""},
+                "hasMeta": bool(text),
+                "hasPositivePrompt": bool(text),
+            }
+            images.append(image)
+
+    return images
+
+
+def _extract_frontmatter(text: str) -> str:
+    """Return the YAML frontmatter content (without the ``---`` delimiters).
+
+    Returns empty string when no frontmatter is found.
+    """
+    if text.startswith("---"):
+        idx = text.find("---", 3)
+        if idx != -1:
+            return text[3:idx]
+    return ""
+
+
 def convert_readme_to_html(markdown_text: str | None) -> str:
    """Convert HF README markdown to sanitised HTML."""
    if not markdown_text:
--- a/static/js/components/shared/showcase/ShowcaseView.js
+++ b/static/js/components/shared/showcase/ShowcaseView.js
@@ -174,7 +174,10 @@ function renderMediaItem(img, index, exampleFiles) {
    const localUrl = localFile ? localFile.path : '';
    
    // Calculate appropriate aspect ratio
-    const aspectRatio = (img.height / img.width) * 100;
+    // Defensive fallback: 0 width/height → 4:3 default (prevents NaN layout)
+    const safeW = img.width || 4;
+    const safeH = img.height || 3;
+    const aspectRatio = (safeH / safeW) * 100;
    const containerWidth = 800; // modal content maximum width
    const minHeightPercent = 40; 
    const maxHeightPercent = (window.innerHeight * 0.6 / containerWidth) * 100;
--- a/tests/agent_cli/test_agent_cli.py
+++ b/tests/agent_cli/test_agent_cli.py
@@ -469,3 +469,100 @@ class TestConvertReadmeToHtml:
        result = convert_readme_to_html(md)
        assert "<pre>" not in result
        assert "<h2>heading after spacing</h2>" in result
+
+
+# ======================================================================
+# extract_gallery_images  —  YAML widget → civitai.images
+# ======================================================================
+
+
+class TestExtractGalleryImages:
+
+    _REPO = "prithivMLmods/Flux-Long-Toon-LoRA"
+    _README = """---
+tags:
+- lora
+widget:
+- text: "a cat"
+  output:
+    url: images/cat.png
+- text: >-
+    multi line
+    prompt here
+  output:
+    url: images/dog.png
+base_model: flux
+---
+# Content after frontmatter
+"""
+
+    def test_extracts_widget_images(self):
+        from py.services.agent.skills.enrich_hf_metadata.md_to_html import \
+            extract_gallery_images
+
+        images = extract_gallery_images(self._README, self._REPO)
+        assert len(images) == 2
+
+        assert images[0]["url"] == (
+            "https://huggingface.co/prithivMLmods/Flux-Long-Toon-LoRA"
+            "/resolve/main/images/cat.png"
+        )
+        assert images[0]["meta"]["prompt"] == "a cat"
+        assert images[0]["type"] == "image"
+        assert images[0]["hasMeta"] is True
+        assert images[0]["hasPositivePrompt"] is True
+
+        assert images[1]["url"] == (
+            "https://huggingface.co/prithivMLmods/Flux-Long-Toon-LoRA"
+            "/resolve/main/images/dog.png"
+        )
+        assert images[1]["meta"]["prompt"] == "multi line prompt here"
+
+    def test_default_dimensions_used(self):
+        from py.services.agent.skills.enrich_hf_metadata.md_to_html import \
+            extract_gallery_images
+
+        images = extract_gallery_images(self._README, self._REPO)
+        assert images[0]["width"] == 512
+        assert images[0]["height"] == 512
+
+    def test_custom_dimensions_applied(self):
+        from py.services.agent.skills.enrich_hf_metadata.md_to_html import \
+            extract_gallery_images
+
+        images = extract_gallery_images(
+            self._README, self._REPO,
+            default_width=768, default_height=1024,
+        )
+        assert images[0]["width"] == 768
+        assert images[0]["height"] == 1024
+
+    def test_empty_readme_returns_empty(self):
+        from py.services.agent.skills.enrich_hf_metadata.md_to_html import \
+            extract_gallery_images
+
+        assert extract_gallery_images("", self._REPO) == []
+        assert extract_gallery_images("no frontmatter here", self._REPO) == []
+
+    def test_empty_repo_returns_empty(self):
+        from py.services.agent.skills.enrich_hf_metadata.md_to_html import \
+            extract_gallery_images
+
+        assert extract_gallery_images(self._README, "") == []
+
+    def test_no_widget_returns_empty(self):
+        from py.services.agent.skills.enrich_hf_metadata.md_to_html import \
+            extract_gallery_images
+
+        md = "---\ntags:\n  - lora\n---\n\nContent"
+        assert extract_gallery_images(md, self._REPO) == []
+
+    def test_extract_repo_from_hf_url(self):
+        from py.services.agent.skills.enrich_hf_metadata.md_to_html import \
+            extract_repo_from_hf_url
+
+        assert extract_repo_from_hf_url(
+            "https://huggingface.co/prithivMLmods/Flux-Long-Toon-LoRA"
+        ) == "prithivMLmods/Flux-Long-Toon-LoRA"
+        assert extract_repo_from_hf_url("") == ""
+        assert extract_repo_from_hf_url("not a url") == ""
--- a/tests/services/test_post_processor.py
+++ b/tests/services/test_post_processor.py
@@ -69,6 +69,8 @@ class TestEnrichHfMetadata:
        "trigger_words": [],
        "short_description": "",
        "tags": [],
+        "recommended_width": 0,
+        "recommended_height": 0,
        "preview_url": "",
        "confidence": "low",
    }
@@ -244,6 +246,64 @@ class TestEnrichHfMetadata:
        applied = mock_apply.call_args[0][1]
        assert "modelDescription" not in applied

+    # -- gallery images → civitai.images ---------------------------------
+
+    @pytest.mark.asyncio
+    async def test_gallery_images_extracted_from_readme(self, processor):
+        """Widget entries in README → civitai.images."""
+        readme = """---
+widget:
+- text: "a cat"
+  output:
+    url: images/cat.png
+---
+Content
+"""
+        with (
+            mock.patch("py.agent_cli.apply_metadata_updates") as mock_apply,
+            mock.patch("py.agent_cli.download_preview", return_value=None),
+            mock.patch("py.agent_cli.refresh_cache"),
+        ):
+            await processor.process(
+                skill_name="enrich_hf_metadata",
+                model_path="/p.safetensors",
+                llm_output=self.MIN_LLM_OUTPUT,
+                metadata={
+                    "from_civitai": False,
+                    "hf_url": "https://huggingface.co/user/repo",
+                },
+                readme_content=readme,
+            )
+        applied = mock_apply.call_args[0][1]
+        images = applied.get("civitai", {}).get("images", [])
+        assert len(images) == 1
+        assert images[0]["url"] == (
+            "https://huggingface.co/user/repo/resolve/main/images/cat.png"
+        )
+        assert images[0]["meta"]["prompt"] == "a cat"
+
+    @pytest.mark.asyncio
+    async def test_gallery_images_skipped_for_civitai_model(self, processor):
+        """Gallery images NOT extracted for CivitAI models."""
+        with (
+            mock.patch("py.agent_cli.apply_metadata_updates") as mock_apply,
+            mock.patch("py.agent_cli.download_preview", return_value=None),
+            mock.patch("py.agent_cli.refresh_cache"),
+        ):
+            await processor.process(
+                skill_name="enrich_hf_metadata",
+                model_path="/p.safetensors",
+                llm_output=self.MIN_LLM_OUTPUT,
+                metadata={
+                    "from_civitai": True,
+                    "hf_url": "https://huggingface.co/user/repo",
+                },
+                readme_content="---\nwidget:\n- text: a\n  output:\n    url: x.png\n---\n",
+            )
+        applied = mock_apply.call_args[0][1]
+        civitai = applied.get("civitai", {})
+        assert "images" not in civitai
+
    # -- tags ------------------------------------------------------------

    @pytest.mark.asyncio