feat(agent): extract HF widget gallery images into civitai.images with recommended dimensions

- Add extract_gallery_images() to parse YAML widget entries from README frontmatter, convert relative image URLs to absolute HF URLs, and build civitai.images-compatible entries with prompt metadata - LLM now extracts recommended_width/recommended_height from README (e.g. "Best Dimensions"), used as gallery image dimensions - extract_gallery_images() accepts default_width/height parameters, falling back to 512x512 when LLM provides no recommendation - Frontend ShowcaseView.js: defensive NaN guard for 0 width/height - post_processor: consistently merge civitai updates across triggers, description, and gallery blocks with distinct variable names - SKILL.md: add recommended_width/recommended_height to output schema - 62 tests pass, including gallery extraction and dimension tests
2026-07-05 17:01:16 -03:00 · 2026-07-03 07:07:19 +08:00
parent 88349bf944
commit ee8250c26c
6 changed files with 314 additions and 9 deletions
--- a/py/services/agent/post_processor.py
+++ b/py/services/agent/post_processor.py
@@ -76,7 +76,11 @@ class PostProcessor:
            download_preview,
            refresh_cache,
        )
-        from .skills.enrich_hf_metadata.md_to_html import convert_readme_to_html
+        from .skills.enrich_hf_metadata.md_to_html import (
            convert_readme_to_html,
            extract_gallery_images,
            extract_repo_from_hf_url,
        )
        updated_fields: List[str] = []
        preview_downloaded = False
@@ -93,6 +97,7 @@ class PostProcessor:
        if new_base and self._should_overwrite(current_base, is_hf_model):
            updates["base_model"] = new_base
        # trigger words → civitai.trainedWords
        new_triggers = llm_output.get("trigger_words", [])
        if isinstance(new_triggers, list):
            cleaned = [t.strip() for t in new_triggers if t.strip()]
@@ -100,9 +105,11 @@ class PostProcessor:
            current_civitai = metadata.get("civitai") or {}
            current_triggers = current_civitai.get("trainedWords") or []
            if self._should_overwrite_list(current_triggers, is_hf_model):
-                civitai_updates = dict(current_civitai)
+                trig_civitai = dict(current_civitai)
-                civitai_updates["trainedWords"] = cleaned
+                if "civitai" in updates and isinstance(updates["civitai"], dict):
-                updates["civitai"] = civitai_updates
+                    trig_civitai.update(updates["civitai"])
                trig_civitai["trainedWords"] = cleaned
                updates["civitai"] = trig_civitai
        # modelDescription — from raw README content (converted to HTML)
        if readme_content and is_hf_model:
@@ -114,11 +121,30 @@ class PostProcessor:
        short_desc = (llm_output.get("short_description") or "").strip()
        if short_desc and is_hf_model:
            current_civitai = metadata.get("civitai") or {}
-            civitai_updates = dict(current_civitai)
+            desc_civitai = dict(current_civitai)
            if "civitai" in updates and isinstance(updates["civitai"], dict):
-                civitai_updates.update(updates["civitai"])
+                desc_civitai.update(updates["civitai"])
-            civitai_updates["description"] = short_desc
+            desc_civitai["description"] = short_desc
-            updates["civitai"] = civitai_updates
+            updates["civitai"] = desc_civitai
        # gallery images → civitai.images (from YAML frontmatter widget entries)
        if readme_content and is_hf_model:
            hf_url = metadata.get("hf_url", "") or ""
            repo = extract_repo_from_hf_url(hf_url)
            if repo:
                rec_w = llm_output.get("recommended_width") or 0
                rec_h = llm_output.get("recommended_height") or 0
                gallery = extract_gallery_images(
                    readme_content, repo,
                    default_width=rec_w, default_height=rec_h,
                )
                if gallery:
                    current_civitai = metadata.get("civitai") or {}
                    gallery_civitai = dict(current_civitai)
                    if "civitai" in updates and isinstance(updates["civitai"], dict):
                        gallery_civitai.update(updates["civitai"])
                    gallery_civitai["images"] = gallery
                    updates["civitai"] = gallery_civitai
        # tags
        new_tags = llm_output.get("tags", [])
--- a/py/services/agent/skills/enrich_hf_metadata/SKILL.md
+++ b/py/services/agent/skills/enrich_hf_metadata/SKILL.md
@@ -78,6 +78,9 @@ Sources to consider:
 Return empty array if no meaningful content tags remain after filtering.
 ### recommended_width, recommended_height
 The recommended image generation resolution for this model, in pixels. Look for sections like "Best Dimensions", "Recommended size", "Suggested resolution", or similar phrasing in the README. Prefer the explicitly marked "Best" or default resolution. If the table/list has multiple entries (e.g. "768 x 1024 (Best)" and "1024 x 1024 (Default)"), use the one marked "Best". Return integers. If no resolution can be determined, return 0 for both.
 ### preview_url
 The URL of the most suitable preview image from the README. Look for image tags (e.g. `![alt](url)`) and the YAML frontmatter `widget:` section (which often has `output.url` fields). Choose the first image that appears to be a generation example (not a logo or diagram). Construct the absolute URL as `https://huggingface.co/{{repo}}/resolve/main/{filename}`. If no suitable image is found, return an empty string.
@@ -98,6 +101,8 @@ Return ONLY a JSON object with exactly these fields (no markdown fences, no extr
  "trigger_words": ["<word1>", "<word2>"],
  "short_description": "<1-2 sentence summary>",
  "tags": ["<tag1>", "<tag2>"],
  "recommended_width": 768,
  "recommended_height": 1024,
  "preview_url": "<image URL or empty string>",
  "confidence": "<high|medium|low>"
 }
--- a/py/services/agent/skills/enrich_hf_metadata/md_to_html.py
+++ b/py/services/agent/skills/enrich_hf_metadata/md_to_html.py
@@ -12,6 +12,120 @@ import re
 from typing import List, Tuple
 _REPO_URL_PATTERN = re.compile(r"https?://huggingface\.co/([^/]+/[^/]+)")
 def extract_repo_from_hf_url(hf_url: str) -> str:
    """Extract ``user/repo`` from a HuggingFace URL."""
    m = _REPO_URL_PATTERN.match(hf_url)
    return m.group(1) if m else ""
 def extract_gallery_images(
    markdown_text: str,
    repo: str,
    default_width: int = 512,
    default_height: int = 512,
 ) -> List[dict]:
    """Extract widget/gallery images from the YAML frontmatter of a HF README.
    Args:
        markdown_text: Raw README content.
        repo: HF repo identifier (``user/repo``).
        default_width: Fallback width when the README provides no dimension.
        default_height: Fallback height when the README provides no dimension.
    Returns a list of dicts compatible with the ``civitai.images`` metadata
    format, each containing ``url`` (absolute HF URL), ``meta.prompt``,
    ``width``, ``height``, and ``type``.  Returns an empty list when no
    widget entries are found or when *repo* is empty.
    """
    if not markdown_text or not repo:
        return []
    frontmatter = _extract_frontmatter(markdown_text)
    if not frontmatter:
        return []
    images: List[dict] = []
    base_url = f"https://huggingface.co/{repo}/resolve/main"
    w = default_width or 512
    h = default_height or 512
    # Find the `widget:` section
    widget_match = re.search(r"^widget:\s*$", frontmatter, re.MULTILINE)
    if not widget_match:
        return images
    # Split entries starting with `- text:`
    entries = re.split(r"\n- text:", frontmatter[widget_match.end():])
    for entry in entries:
        if not entry.strip():
            continue
        entry = entry.strip()
        # Extract text (prompt)
        text = ""
        # Quoted inline: `"some prompt"`
        qm = re.match(r'^"((?:[^"\\]|\\.)*)"', entry)
        if qm:
            text = qm.group(1)
        else:
            # Multi-line YAML scalar: `>-\n    line1\n    line2`
            mm = re.match(r"^>(?:-\s*)?\n((?:.+(?:\n|$))+)", entry, re.MULTILINE)
            if mm:
                raw = mm.group(1)
                # Take lines until a line starts with a YAML key (word + colon)
                text_lines: list[str] = []
                for line in raw.split("\n"):
                    if re.match(r"^\s*\w+:", line):
                        break
                    text_lines.append(line)
                text = " ".join(
                    line.strip() for line in text_lines if line.strip()
                )
        # Extract output.url
        url = ""
        url_match = re.search(
            r"^\s*output:\s*\n\s+url:\s*(.+?)\s*$", entry, re.MULTILINE
        )
        if url_match:
            raw_path = url_match.group(1).strip().strip("'\"")
            if raw_path and not raw_path.startswith("http"):
                url = f"{base_url}/{raw_path.lstrip('/')}"
            elif raw_path.startswith("http"):
                url = raw_path
        if url:
            image: dict = {
                "url": url,
                "type": "image",
                "nsfwLevel": 0,
                "width": w,
                "height": h,
                "meta": {"prompt": text, "negativePrompt": ""},
                "hasMeta": bool(text),
                "hasPositivePrompt": bool(text),
            }
            images.append(image)
    return images
 def _extract_frontmatter(text: str) -> str:
    """Return the YAML frontmatter content (without the ``---`` delimiters).
    Returns empty string when no frontmatter is found.
    """
    if text.startswith("---"):
        idx = text.find("---", 3)
        if idx != -1:
            return text[3:idx]
    return ""
 def convert_readme_to_html(markdown_text: str | None) -> str:
    """Convert HF README markdown to sanitised HTML."""
    if not markdown_text:
--- a/static/js/components/shared/showcase/ShowcaseView.js
+++ b/static/js/components/shared/showcase/ShowcaseView.js
@@ -174,7 +174,10 @@ function renderMediaItem(img, index, exampleFiles) {
    const localUrl = localFile ? localFile.path : '';
    // Calculate appropriate aspect ratio
-    const aspectRatio = (img.height / img.width) * 100;
+    // Defensive fallback: 0 width/height → 4:3 default (prevents NaN layout)
    const safeW = img.width || 4;
    const safeH = img.height || 3;
    const aspectRatio = (safeH / safeW) * 100;
    const containerWidth = 800; // modal content maximum width
    const minHeightPercent = 40; 
    const maxHeightPercent = (window.innerHeight * 0.6 / containerWidth) * 100;
--- a/tests/agent_cli/test_agent_cli.py
+++ b/tests/agent_cli/test_agent_cli.py
@@ -469,3 +469,100 @@ class TestConvertReadmeToHtml:
        result = convert_readme_to_html(md)
        assert "<pre>" not in result
        assert "<h2>heading after spacing</h2>" in result
 # ======================================================================
 # extract_gallery_images  —  YAML widget → civitai.images
 # ======================================================================
 class TestExtractGalleryImages:
    _REPO = "prithivMLmods/Flux-Long-Toon-LoRA"
    _README = """---
 tags:
 - lora
 widget:
 - text: "a cat"
  output:
    url: images/cat.png
 - text: >-
    multi line
    prompt here
  output:
    url: images/dog.png
 base_model: flux
 ---
 # Content after frontmatter
 """
    def test_extracts_widget_images(self):
        from py.services.agent.skills.enrich_hf_metadata.md_to_html import \
            extract_gallery_images
        images = extract_gallery_images(self._README, self._REPO)
        assert len(images) == 2
        assert images[0]["url"] == (
            "https://huggingface.co/prithivMLmods/Flux-Long-Toon-LoRA"
            "/resolve/main/images/cat.png"
        )
        assert images[0]["meta"]["prompt"] == "a cat"
        assert images[0]["type"] == "image"
        assert images[0]["hasMeta"] is True
        assert images[0]["hasPositivePrompt"] is True
        assert images[1]["url"] == (
            "https://huggingface.co/prithivMLmods/Flux-Long-Toon-LoRA"
            "/resolve/main/images/dog.png"
        )
        assert images[1]["meta"]["prompt"] == "multi line prompt here"
    def test_default_dimensions_used(self):
        from py.services.agent.skills.enrich_hf_metadata.md_to_html import \
            extract_gallery_images
        images = extract_gallery_images(self._README, self._REPO)
        assert images[0]["width"] == 512
        assert images[0]["height"] == 512
    def test_custom_dimensions_applied(self):
        from py.services.agent.skills.enrich_hf_metadata.md_to_html import \
            extract_gallery_images
        images = extract_gallery_images(
            self._README, self._REPO,
            default_width=768, default_height=1024,
        )
        assert images[0]["width"] == 768
        assert images[0]["height"] == 1024
    def test_empty_readme_returns_empty(self):
        from py.services.agent.skills.enrich_hf_metadata.md_to_html import \
            extract_gallery_images
        assert extract_gallery_images("", self._REPO) == []
        assert extract_gallery_images("no frontmatter here", self._REPO) == []
    def test_empty_repo_returns_empty(self):
        from py.services.agent.skills.enrich_hf_metadata.md_to_html import \
            extract_gallery_images
        assert extract_gallery_images(self._README, "") == []
    def test_no_widget_returns_empty(self):
        from py.services.agent.skills.enrich_hf_metadata.md_to_html import \
            extract_gallery_images
        md = "---\ntags:\n  - lora\n---\n\nContent"
        assert extract_gallery_images(md, self._REPO) == []
    def test_extract_repo_from_hf_url(self):
        from py.services.agent.skills.enrich_hf_metadata.md_to_html import \
            extract_repo_from_hf_url
        assert extract_repo_from_hf_url(
            "https://huggingface.co/prithivMLmods/Flux-Long-Toon-LoRA"
        ) == "prithivMLmods/Flux-Long-Toon-LoRA"
        assert extract_repo_from_hf_url("") == ""
        assert extract_repo_from_hf_url("not a url") == ""
--- a/tests/services/test_post_processor.py
+++ b/tests/services/test_post_processor.py
@@ -69,6 +69,8 @@ class TestEnrichHfMetadata:
        "trigger_words": [],
        "short_description": "",
        "tags": [],
        "recommended_width": 0,
        "recommended_height": 0,
        "preview_url": "",
        "confidence": "low",
    }
@@ -244,6 +246,64 @@ class TestEnrichHfMetadata:
        applied = mock_apply.call_args[0][1]
        assert "modelDescription" not in applied
    # -- gallery images → civitai.images ---------------------------------
    @pytest.mark.asyncio
    async def test_gallery_images_extracted_from_readme(self, processor):
        """Widget entries in README → civitai.images."""
        readme = """---
 widget:
 - text: "a cat"
  output:
    url: images/cat.png
 ---
 Content
 """
        with (
            mock.patch("py.agent_cli.apply_metadata_updates") as mock_apply,
            mock.patch("py.agent_cli.download_preview", return_value=None),
            mock.patch("py.agent_cli.refresh_cache"),
        ):
            await processor.process(
                skill_name="enrich_hf_metadata",
                model_path="/p.safetensors",
                llm_output=self.MIN_LLM_OUTPUT,
                metadata={
                    "from_civitai": False,
                    "hf_url": "https://huggingface.co/user/repo",
                },
                readme_content=readme,
            )
        applied = mock_apply.call_args[0][1]
        images = applied.get("civitai", {}).get("images", [])
        assert len(images) == 1
        assert images[0]["url"] == (
            "https://huggingface.co/user/repo/resolve/main/images/cat.png"
        )
        assert images[0]["meta"]["prompt"] == "a cat"
    @pytest.mark.asyncio
    async def test_gallery_images_skipped_for_civitai_model(self, processor):
        """Gallery images NOT extracted for CivitAI models."""
        with (
            mock.patch("py.agent_cli.apply_metadata_updates") as mock_apply,
            mock.patch("py.agent_cli.download_preview", return_value=None),
            mock.patch("py.agent_cli.refresh_cache"),
        ):
            await processor.process(
                skill_name="enrich_hf_metadata",
                model_path="/p.safetensors",
                llm_output=self.MIN_LLM_OUTPUT,
                metadata={
                    "from_civitai": True,
                    "hf_url": "https://huggingface.co/user/repo",
                },
                readme_content="---\nwidget:\n- text: a\n  output:\n    url: x.png\n---\n",
            )
        applied = mock_apply.call_args[0][1]
        civitai = applied.get("civitai", {})
        assert "images" not in civitai
    # -- tags ------------------------------------------------------------
    @pytest.mark.asyncio