mirror of
https://github.com/willmiao/ComfyUI-Lora-Manager.git
synced 2026-07-05 17:01:16 -03:00
feat(agent): optimize enrich_hf_metadata with README cleaning, Ollama native API, and expanded fields
- Add clean_readme_for_llm() to strip noise from README before LLM injection - Keep widget section text (valuable tag signal) and unmarked code blocks (trigger words) - Preserve standalone image alt text instead of removing entirely - Switch Ollama to native /api/chat with think:false to fix empty content on thinking models - Extract Sample Gallery table images and deduplicate with widget images - Only strip code blocks with explicit language tags (bash) - Add notes and usage_tips fields to SKILL.md output format and post-processor - Clean up dead code, fix regex edge cases, remove double type annotation
This commit is contained in:
@@ -28,6 +28,7 @@ from ..llm_service import LLMService
|
||||
from ..websocket_manager import ws_manager
|
||||
from .post_processor import PostProcessor
|
||||
from .skill_registry import SkillRegistry
|
||||
from .skills.enrich_hf_metadata.md_to_html import clean_readme_for_llm
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -368,7 +369,8 @@ class AgentService:
|
||||
context["repo"] = repo or ""
|
||||
if repo:
|
||||
readme = await self._fetch_readme(repo)
|
||||
context["readme_content"] = readme[:8000] if readme else "(README not available)"
|
||||
cleaned = clean_readme_for_llm(readme) if readme else ""
|
||||
context["readme_content"] = cleaned if cleaned else "(README not available)"
|
||||
context["readme_content_full"] = readme or ""
|
||||
|
||||
try:
|
||||
|
||||
@@ -10,6 +10,7 @@ refresh cache). All actual I/O is delegated to :mod:`~py.agent_cli`.
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
from datetime import datetime, timezone
|
||||
@@ -79,6 +80,7 @@ class PostProcessor:
|
||||
from .skills.enrich_hf_metadata.md_to_html import (
|
||||
convert_readme_to_html,
|
||||
extract_gallery_images,
|
||||
extract_gallery_table_images,
|
||||
extract_repo_from_hf_url,
|
||||
)
|
||||
|
||||
@@ -127,23 +129,38 @@ class PostProcessor:
|
||||
desc_civitai["description"] = short_desc
|
||||
updates["civitai"] = desc_civitai
|
||||
|
||||
# gallery images → civitai.images (from YAML frontmatter widget entries)
|
||||
# gallery images → civitai.images (from YAML frontmatter widget entries
|
||||
# and Sample Gallery markdown tables in the README body)
|
||||
gallery_images: List[Dict[str, Any]] = []
|
||||
if readme_content and is_hf_model:
|
||||
hf_url = metadata.get("hf_url", "") or ""
|
||||
repo = extract_repo_from_hf_url(hf_url)
|
||||
if repo:
|
||||
rec_w = llm_output.get("recommended_width") or 0
|
||||
rec_h = llm_output.get("recommended_height") or 0
|
||||
|
||||
# 1. Widget images (YAML frontmatter)
|
||||
gallery = extract_gallery_images(
|
||||
readme_content, repo,
|
||||
default_width=rec_w, default_height=rec_h,
|
||||
)
|
||||
if gallery:
|
||||
|
||||
# 2. Sample Gallery table images (markdown body), deduplicated
|
||||
existing_urls = {img["url"] for img in gallery if img.get("url")}
|
||||
table_images = extract_gallery_table_images(
|
||||
readme_content, repo,
|
||||
existing_urls=existing_urls,
|
||||
default_width=rec_w, default_height=rec_h,
|
||||
)
|
||||
|
||||
all_images = gallery + table_images
|
||||
if all_images:
|
||||
gallery_images = all_images
|
||||
current_civitai = metadata.get("civitai") or {}
|
||||
gallery_civitai = dict(current_civitai)
|
||||
if "civitai" in updates and isinstance(updates["civitai"], dict):
|
||||
gallery_civitai.update(updates["civitai"])
|
||||
gallery_civitai["images"] = gallery
|
||||
gallery_civitai["images"] = all_images
|
||||
updates["civitai"] = gallery_civitai
|
||||
|
||||
# tags
|
||||
@@ -159,6 +176,11 @@ class PostProcessor:
|
||||
updates["llm_enriched_at"] = datetime.now(timezone.utc).isoformat()
|
||||
|
||||
preview_remote_url = (llm_output.get("preview_url") or "").strip()
|
||||
# Fallback: if the LLM couldn't find a preview image in the cleaned
|
||||
# README, use the first gallery image extracted from the YAML widget
|
||||
# section.
|
||||
if not preview_remote_url and gallery_images:
|
||||
preview_remote_url = gallery_images[0].get("url", "")
|
||||
current_preview = metadata.get("preview_url") or ""
|
||||
if preview_remote_url and not (current_preview and os.path.exists(current_preview)):
|
||||
local_path = await download_preview(model_path, preview_remote_url)
|
||||
@@ -166,6 +188,22 @@ class PostProcessor:
|
||||
preview_downloaded = True
|
||||
updates["preview_url"] = local_path
|
||||
|
||||
# notes — plain-text summary of usage info from the LLM
|
||||
new_notes = (llm_output.get("notes") or "").strip()
|
||||
if new_notes:
|
||||
updates["notes"] = new_notes
|
||||
|
||||
# usage_tips — JSON string (e.g. {"strength_min":0.85,"strength_max":1.4})
|
||||
raw_tips = (llm_output.get("usage_tips") or "").strip()
|
||||
if raw_tips and raw_tips != "{}":
|
||||
try:
|
||||
json.loads(raw_tips)
|
||||
updates["usage_tips"] = raw_tips
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
logger.warning(
|
||||
"LLM returned invalid usage_tips JSON: %s", raw_tips[:200]
|
||||
)
|
||||
|
||||
if updates:
|
||||
updated_fields = await apply_metadata_updates(model_path, updates)
|
||||
|
||||
|
||||
@@ -84,6 +84,25 @@ The recommended image generation resolution for this model, in pixels. Look for
|
||||
### preview_url
|
||||
The URL of the most suitable preview image from the README. Look for image tags (e.g. ``) and the YAML frontmatter `widget:` section (which often has `output.url` fields). Choose the first image that appears to be a generation example (not a logo or diagram). Construct the absolute URL as `https://huggingface.co/{{repo}}/resolve/main/{filename}`. If no suitable image is found, return an empty string.
|
||||
|
||||
### notes
|
||||
A plain-text summary of the model card's key practical usage information. Combine trigger words, style modifiers, recommended parameters (steps, CFG, resolution, sampler), and any setup tips into a readable paragraph. Return empty string if the README has no useful usage info.
|
||||
|
||||
### usage_tips
|
||||
A JSON string with structured usage recommendations. Extract from the README any explicit ranges or recommended values (e.g. "Set LoRA strength: **0.85 - 1.4**", "CLIP strength: 0.5"). Possible fields (include only those you can determine):
|
||||
|
||||
```json
|
||||
{
|
||||
"strength_min": 0.85,
|
||||
"strength_max": 1.4,
|
||||
"strength_range": "0.85-1.4",
|
||||
"strength": 0.6,
|
||||
"clip_strength": 0.5,
|
||||
"clip_skip": 2
|
||||
}
|
||||
```
|
||||
|
||||
Return the JSON string (e.g. `'{"strength_min":0.85,"strength_max":1.4}'`). Return `"{}"` if nothing useful is found.
|
||||
|
||||
### confidence
|
||||
Your confidence level in the extracted data:
|
||||
- "high" — most fields were explicitly stated in the README
|
||||
@@ -104,6 +123,8 @@ Return ONLY a JSON object with exactly these fields (no markdown fences, no extr
|
||||
"recommended_width": 768,
|
||||
"recommended_height": 1024,
|
||||
"preview_url": "<image URL or empty string>",
|
||||
"notes": "<plain-text usage summary or empty string>",
|
||||
"usage_tips": "<JSON string like '{\"strength_min\":0.85,\"strength_max\":1.4}' or '{}'>",
|
||||
"confidence": "<high|medium|low>"
|
||||
}
|
||||
```
|
||||
|
||||
@@ -1,8 +1,13 @@
|
||||
"""Inline markdown-to-HTML converter for HF README content.
|
||||
"""Inline markdown-to-HTML converter and LLM-prompt cleaner for HF README content.
|
||||
|
||||
No external dependencies. Strips YAML frontmatter, ``<Gallery />`` sections,
|
||||
badge images, and HTML comments before rendering. Only used by the
|
||||
``enrich_hf_metadata`` skill.
|
||||
|
||||
Also provides :func:`clean_readme_for_llm` which pre-processes the raw README
|
||||
before it is injected into the LLM prompt, removing content that has zero value
|
||||
for metadata extraction (widget sections, code blocks, training tables,
|
||||
boilerplate, massive lists, etc.).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
@@ -118,6 +123,88 @@ def extract_gallery_images(
|
||||
return images
|
||||
|
||||
|
||||
def extract_gallery_table_images(
|
||||
markdown_text: str,
|
||||
repo: str,
|
||||
existing_urls: set | None = None,
|
||||
default_width: int = 512,
|
||||
default_height: int = 512,
|
||||
) -> list[dict]:
|
||||
"""Extract images from ``| Preview | Prompt |`` markdown gallery tables.
|
||||
|
||||
Many HF READMEs include a sample-gallery table in the body (outside
|
||||
the YAML frontmatter) that shows generation examples with their
|
||||
prompts. This function parses those tables and merges results with
|
||||
the widget-sourced images from :func:`extract_gallery_images`.
|
||||
|
||||
Returns a list of dicts in the same ``civitai.images`` format as
|
||||
:func:`extract_gallery_images`. Already-seen URLs (from *existing_urls*)
|
||||
are skipped.
|
||||
"""
|
||||
if not markdown_text or not repo:
|
||||
return []
|
||||
|
||||
base_url = f"https://huggingface.co/{repo}/resolve/main"
|
||||
images: list[dict] = []
|
||||
seen_urls: set = set(existing_urls) if existing_urls else set()
|
||||
lines = markdown_text.split("\n")
|
||||
n = len(lines)
|
||||
i = 0
|
||||
|
||||
while i < n:
|
||||
line = lines[i]
|
||||
if "|" not in line or i + 1 >= n:
|
||||
i += 1
|
||||
continue
|
||||
|
||||
# Check for table separator row
|
||||
if not re.match(r"^\|[\s:-]+\|", lines[i + 1]):
|
||||
i += 1
|
||||
continue
|
||||
|
||||
header_lower = line.strip().lower()
|
||||
first_cell = header_lower.strip("|").split("|")[0].strip() if "|" in header_lower else ""
|
||||
is_gallery = any(kw in first_cell for kw in ("preview", "sample", "gallery", "image", "thumbnail"))
|
||||
if not is_gallery:
|
||||
i += 1
|
||||
continue
|
||||
|
||||
# Skip header + separator
|
||||
i += 2
|
||||
while i < n and "|" in lines[i]:
|
||||
cells = [c.strip() for c in lines[i].strip().strip("|").split("|")]
|
||||
if len(cells) >= 2:
|
||||
first = cells[0]
|
||||
prompt = cells[1]
|
||||
|
||||
url_match = re.search(r"!\[([^\]]*)\]\(([^)]+)\)", first)
|
||||
if url_match:
|
||||
raw_path = url_match.group(2)
|
||||
if raw_path.startswith("http"):
|
||||
url = raw_path
|
||||
else:
|
||||
# Normalise: remove leading / and ./ prefixes
|
||||
clean = raw_path.lstrip("./").lstrip("/")
|
||||
url = f"{base_url}/{clean}"
|
||||
|
||||
if url not in seen_urls:
|
||||
seen_urls.add(url)
|
||||
images.append({
|
||||
"url": url,
|
||||
"type": "image",
|
||||
"nsfwLevel": 0,
|
||||
"width": default_width,
|
||||
"height": default_height,
|
||||
"meta": {"prompt": prompt, "negativePrompt": ""},
|
||||
"hasMeta": bool(prompt),
|
||||
"hasPositivePrompt": bool(prompt),
|
||||
})
|
||||
i += 1
|
||||
continue
|
||||
|
||||
return images
|
||||
|
||||
|
||||
def _extract_frontmatter(text: str) -> str:
|
||||
"""Return the YAML frontmatter content (without the ``---`` delimiters).
|
||||
|
||||
@@ -145,7 +232,260 @@ def convert_readme_to_html(markdown_text: str | None) -> str:
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Pre-processing: strip unwanted sections
|
||||
# README cleaning for LLM prompt injection
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
#: Section headers that signal boilerplate content with zero metadata value.
|
||||
_BOILERPLATE_HEADERS: tuple[str, ...] = (
|
||||
"download model",
|
||||
"license",
|
||||
"citation",
|
||||
"links",
|
||||
"disclaimer",
|
||||
"architecture notes",
|
||||
"training details",
|
||||
"dataset",
|
||||
"provenance",
|
||||
)
|
||||
|
||||
#: Table header keywords that identify training-parameter tables.
|
||||
_TRAINING_PARAM_KEYWORDS: tuple[str, ...] = (
|
||||
"lr scheduler",
|
||||
"optimizer",
|
||||
"network dim",
|
||||
"network alpha",
|
||||
"noise offset",
|
||||
"multires noise",
|
||||
"repeat",
|
||||
"epoch",
|
||||
"batch size",
|
||||
"gradient accumulation",
|
||||
"learning rate",
|
||||
"rslora",
|
||||
"dtype",
|
||||
)
|
||||
|
||||
#: Maximum chars before a single-line comma list is considered massive.
|
||||
_MASSIVE_LIST_LINE_MIN_LEN = 150
|
||||
#: Minimum consecutive enumeration lines to trigger massive-list stripping.
|
||||
_MASSIVE_LIST_THRESHOLD = 8
|
||||
|
||||
|
||||
def clean_readme_for_llm(markdown_text: str | None, max_length: int = 6000) -> str:
|
||||
"""Clean a HF README for injection into an LLM metadata-extraction prompt.
|
||||
|
||||
Removes content that carries no signal for inferring base model,
|
||||
trigger words, short description, tags, or a preview image URL:
|
||||
|
||||
* ``widget:`` YAML block (example prompts + output URLs)
|
||||
* ``<Gallery />`` tags and wrappers
|
||||
* Fenced code blocks (Python / bash / bibtex / yaml)
|
||||
* Standalone ```` image lines and ``<img>`` tags
|
||||
* Training-parameter tables
|
||||
* Boilerplate sections (Download / License / Citation / …)
|
||||
* Massive enumeration lists (e.g. 3000+ celebrity names)
|
||||
|
||||
The post-processor still receives the **full** raw README via
|
||||
``readme_content_full``, so nothing is lost for HTML conversion or
|
||||
gallery-image extraction.
|
||||
|
||||
Args:
|
||||
markdown_text: Raw README.md content from HuggingFace.
|
||||
max_length: Hard ceiling on output length (default 6 000 chars).
|
||||
|
||||
Returns:
|
||||
Cleaned markdown, truncated to *max_length*.
|
||||
"""
|
||||
if not markdown_text:
|
||||
return ""
|
||||
|
||||
text = markdown_text
|
||||
|
||||
# Order matters — broader strips first, then finer ones.
|
||||
text = _strip_gallery(text)
|
||||
text = _strip_fenced_code_blocks(text)
|
||||
text = _strip_standalone_images(text)
|
||||
text = _strip_training_tables(text)
|
||||
text = _strip_boilerplate_sections(text)
|
||||
text = _strip_massive_lists(text)
|
||||
text = _strip_badge_images(text)
|
||||
text = _strip_html_comments(text)
|
||||
text = _compress_blank_lines(text)
|
||||
|
||||
if len(text) > max_length:
|
||||
text = text[:max_length]
|
||||
|
||||
return text.strip()
|
||||
|
||||
|
||||
def _strip_fenced_code_blocks(text: str) -> str:
|
||||
"""Strip fenced code blocks that have an explicit programming-language tag.
|
||||
|
||||
Blocks without a language tag (just `` ``` ``) are preserved — they
|
||||
often contain trigger words, example prompts, or config snippets
|
||||
rather than actual runnable code.
|
||||
"""
|
||||
# Match opening ``` immediately followed by a word character (the language
|
||||
# tag), then any content, then closing ```. Plain ``` at the start of a
|
||||
# line is left intact. A leading \n is optional (handles blocks at the
|
||||
# start of the text).
|
||||
return re.sub(
|
||||
r"(?:\n|^)```[a-zA-Z_][a-zA-Z0-9_]*\s*\n.*?\n```",
|
||||
"",
|
||||
text,
|
||||
flags=re.DOTALL,
|
||||
)
|
||||
|
||||
|
||||
def _strip_standalone_images(text: str) -> str:
|
||||
"""Strip image embeds that occupy their own line.
|
||||
|
||||
Preserves the alt text from markdown images (```` → ``alt``)
|
||||
since it often describes what the model generates, which is useful signal
|
||||
for tag/description extraction.
|
||||
"""
|
||||
# Markdown: ```` on its own line → keep alt text
|
||||
text = re.sub(
|
||||
r"^\s*!\[([^\]]*)\]\([^)]+\)\s*$",
|
||||
r"\1",
|
||||
text,
|
||||
flags=re.MULTILINE,
|
||||
)
|
||||
# HTML: ``<img src="..." ...>`` on its own line → remove entirely
|
||||
text = re.sub(
|
||||
r'^\s*<img\s[^>]+/?>(?:</img>)?\s*$',
|
||||
"",
|
||||
text,
|
||||
flags=re.MULTILINE | re.IGNORECASE,
|
||||
)
|
||||
return text
|
||||
|
||||
|
||||
def _strip_training_tables(text: str) -> str:
|
||||
"""Strip markdown tables whose header row mentions training parameters.
|
||||
|
||||
Checks the header row (first line of a detected table) against
|
||||
``_TRAINING_PARAM_KEYWORDS``. Non-training tables (e.g. "Best
|
||||
Dimensions") are preserved.
|
||||
"""
|
||||
lines = text.split("\n")
|
||||
out: list[str] = []
|
||||
i = 0
|
||||
n = len(lines)
|
||||
|
||||
while i < n:
|
||||
line = lines[i]
|
||||
if "|" in line and i + 1 < n and re.match(r"^\|[\s:-]+\|", lines[i + 1]):
|
||||
table_lines = [line]
|
||||
i += 1
|
||||
while i < n and "|" in lines[i]:
|
||||
table_lines.append(lines[i])
|
||||
i += 1
|
||||
|
||||
# Check header + first data row for training keywords
|
||||
header_and_first = (line + "\n" + (table_lines[2] if len(table_lines) > 2 else "")).lower()
|
||||
if any(kw in header_and_first for kw in _TRAINING_PARAM_KEYWORDS):
|
||||
continue
|
||||
out.extend(table_lines)
|
||||
else:
|
||||
out.append(line)
|
||||
i += 1
|
||||
|
||||
return "\n".join(out)
|
||||
|
||||
|
||||
def _strip_boilerplate_sections(text: str) -> str:
|
||||
"""Strip sections whose headings match known boilerplate patterns.
|
||||
|
||||
When a heading (``## Download model``, ``## License``, etc.) is
|
||||
detected, the heading and all content until the next heading of
|
||||
equal-or-higher level is removed.
|
||||
"""
|
||||
lines = text.split("\n")
|
||||
out: list[str] = []
|
||||
i = 0
|
||||
n = len(lines)
|
||||
skip_until_level: int | None = None
|
||||
|
||||
while i < n:
|
||||
line = lines[i]
|
||||
h_match = re.match(r"^(#{1,4})\s+(.+?)\s*#*$", line)
|
||||
if h_match:
|
||||
level = len(h_match.group(1))
|
||||
title = h_match.group(2).strip().lower()
|
||||
|
||||
is_boilerplate = any(
|
||||
title == kw or title.startswith(kw + " ") or title.startswith(kw + ":")
|
||||
for kw in _BOILERPLATE_HEADERS
|
||||
)
|
||||
|
||||
if is_boilerplate:
|
||||
skip_until_level = level
|
||||
i += 1
|
||||
continue
|
||||
|
||||
if skip_until_level is not None and level <= skip_until_level:
|
||||
skip_until_level = None
|
||||
|
||||
if skip_until_level is None:
|
||||
out.append(line)
|
||||
i += 1
|
||||
|
||||
return "\n".join(out)
|
||||
|
||||
|
||||
def _strip_massive_lists(text: str) -> str:
|
||||
"""Strip blocks of 8+ consecutive enumeration-style lines.
|
||||
|
||||
Targets long comma-separated name lists (e.g. the 3000+ celebrity
|
||||
names in some Z-Image READMEs) and dense bullet enumerations.
|
||||
"""
|
||||
lines = text.split("\n")
|
||||
out: list[str] = []
|
||||
i = 0
|
||||
n = len(lines)
|
||||
|
||||
while i < n:
|
||||
stripped = lines[i].strip()
|
||||
|
||||
# A "list-like" line ends with comma or is a bullet with commas
|
||||
is_list_like = bool(stripped) and (
|
||||
stripped.endswith(",")
|
||||
or len(stripped) >= _MASSIVE_LIST_LINE_MIN_LEN
|
||||
or (bool(re.match(r"^[-*+]\s", stripped)) and "," in stripped)
|
||||
)
|
||||
|
||||
if is_list_like:
|
||||
count = 1
|
||||
j = i + 1
|
||||
while j < n:
|
||||
s = lines[j].strip()
|
||||
if not s:
|
||||
j += 1
|
||||
continue
|
||||
if s.endswith(",") or (bool(re.match(r"^[-*+]\s", s)) and "," in s):
|
||||
count += 1
|
||||
j += 1
|
||||
else:
|
||||
break
|
||||
|
||||
if count >= _MASSIVE_LIST_THRESHOLD:
|
||||
i = j
|
||||
continue
|
||||
|
||||
out.append(lines[i])
|
||||
i += 1
|
||||
|
||||
return "\n".join(out)
|
||||
|
||||
|
||||
def _compress_blank_lines(text: str) -> str:
|
||||
"""Collapse runs of 3+ blank lines down to 2."""
|
||||
return re.sub(r"\n{3,}", "\n\n", text)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Pre-processing: strip unwanted sections (HTML conversion helpers)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
@@ -333,10 +333,45 @@ class LLMService:
|
||||
|
||||
cfg = self._ensure_configured()
|
||||
api_base = self._resolve_api_base(cfg["provider"], cfg["api_base"])
|
||||
url = f"{api_base}/chat/completions"
|
||||
model_name = model or cfg["model"]
|
||||
|
||||
payload: Dict[str, Any] = {
|
||||
is_ollama = cfg["provider"] == "ollama"
|
||||
|
||||
if is_ollama:
|
||||
# Use Ollama's native /api/chat endpoint which does NOT expose
|
||||
# a separate reasoning/thinking field (the model's full output
|
||||
# lands directly in message.content). The OpenAI-compatible
|
||||
# endpoint splits thinking into the "reasoning" field, making
|
||||
# content empty when thinking consumes all available tokens.
|
||||
base = api_base.rstrip("/")
|
||||
if base.endswith("/v1"):
|
||||
base = base[:-3]
|
||||
url = f"{base}/api/chat"
|
||||
else:
|
||||
url = f"{api_base}/chat/completions"
|
||||
|
||||
payload: Dict[str, Any]
|
||||
if is_ollama:
|
||||
payload = {
|
||||
"model": model_name,
|
||||
"messages": messages,
|
||||
"stream": False,
|
||||
# Suppress separate thinking trace — thinking still happens
|
||||
# internally (accuracy preserved) but output goes directly to
|
||||
# message.content instead of being split across content +
|
||||
# thinking. Without this the model can exhaust num_predict
|
||||
# on thinking alone and leave content empty.
|
||||
"think": False,
|
||||
"options": {
|
||||
"temperature": temperature,
|
||||
},
|
||||
}
|
||||
if response_format is not None:
|
||||
payload["format"] = "json"
|
||||
if max_tokens is not None:
|
||||
payload["options"]["num_predict"] = max_tokens
|
||||
else:
|
||||
payload = {
|
||||
"model": model_name,
|
||||
"messages": messages,
|
||||
"temperature": temperature,
|
||||
@@ -387,8 +422,25 @@ class LLMService:
|
||||
|
||||
# Parse response
|
||||
try:
|
||||
content = data["choices"][0]["message"]["content"]
|
||||
if is_ollama:
|
||||
content = (data.get("message") or {}).get("content") or ""
|
||||
usage = {"completion_tokens": data.get("eval_count", 0)}
|
||||
finish_reason = data.get("done_reason", "")
|
||||
if not content:
|
||||
logger.warning(
|
||||
"LLM returned empty content. Provider=ollama, "
|
||||
"done_reason=%s, eval_count=%s",
|
||||
finish_reason,
|
||||
data.get("eval_count", 0),
|
||||
)
|
||||
else:
|
||||
content = data["choices"][0]["message"].get("content") or ""
|
||||
usage = data.get("usage", {})
|
||||
if not content:
|
||||
logger.warning(
|
||||
"LLM returned empty content. Full response truncated: %s",
|
||||
json.dumps(data, ensure_ascii=False)[:1000],
|
||||
)
|
||||
return {
|
||||
"content": content,
|
||||
"usage": usage,
|
||||
@@ -442,13 +494,16 @@ class LLMService:
|
||||
{"role": "user", "content": user_prompt},
|
||||
]
|
||||
|
||||
# First attempt with JSON mode
|
||||
# First attempt with JSON mode.
|
||||
# Use a generous max_tokens so thinking-enabled models (e.g.
|
||||
# gemma4 via Ollama) have room to reason AND still emit content.
|
||||
effective_max = max_tokens or 131072
|
||||
result = await self.chat_completion(
|
||||
messages=messages,
|
||||
model=model,
|
||||
temperature=temperature,
|
||||
response_format={"type": "json_object"},
|
||||
max_tokens=max_tokens,
|
||||
max_tokens=effective_max,
|
||||
)
|
||||
|
||||
try:
|
||||
@@ -458,11 +513,15 @@ class LLMService:
|
||||
"LLM JSON parse failed on first attempt: %s. Retrying.", exc
|
||||
)
|
||||
|
||||
# Retry with explicit instruction to return valid JSON
|
||||
# Retry WITHOUT response_format — some providers (Ollama with
|
||||
# thinking-enabled models like gemma4) may return empty content
|
||||
# when json_object mode is active. Fall back to a textual
|
||||
# instruction instead.
|
||||
previous_content = result.get("content", "") or ""
|
||||
retry_messages = messages + [
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": result["content"],
|
||||
"content": previous_content or "(empty response)",
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
@@ -478,14 +537,21 @@ class LLMService:
|
||||
messages=retry_messages,
|
||||
model=model,
|
||||
temperature=0.0, # More deterministic for retry
|
||||
response_format={"type": "json_object"},
|
||||
max_tokens=max_tokens,
|
||||
max_tokens=effective_max,
|
||||
)
|
||||
|
||||
content = result.get("content", "") or ""
|
||||
if not content:
|
||||
raise LLMResponseError(
|
||||
"LLM response could not be parsed as JSON after retry: "
|
||||
f"Expecting value: line 1 column 1 (char 0)\n"
|
||||
f"Raw content: {content[:500]}"
|
||||
)
|
||||
|
||||
try:
|
||||
return json.loads(result["content"])
|
||||
except (json.JSONDecodeError, TypeError) as exc:
|
||||
return json.loads(content)
|
||||
except (json.JSONDecodeError, TypeError) as parse_err:
|
||||
raise LLMResponseError(
|
||||
f"LLM response could not be parsed as JSON after retry: {exc}\n"
|
||||
f"Raw content: {result['content'][:500]}"
|
||||
) from exc
|
||||
f"LLM response could not be parsed as JSON after retry: {parse_err}\n"
|
||||
f"Raw content: {content[:500]}"
|
||||
) from parse_err
|
||||
|
||||
@@ -583,3 +583,443 @@ widget:
|
||||
assert len(images) == 1
|
||||
assert "two samurais doing a muay thai fight" in images[0]["meta"]["prompt"]
|
||||
assert "Textured abstract style" in images[0]["meta"]["prompt"]
|
||||
|
||||
|
||||
# ======================================================================
|
||||
# extract_gallery_table_images — Sample Gallery markdown tables
|
||||
# ======================================================================
|
||||
|
||||
|
||||
class TestExtractGalleryTableImages:
|
||||
|
||||
_REPO = "Limbicnation/pixel-art-lora"
|
||||
_README = """## Sample Gallery
|
||||
|
||||
| Preview | Prompt |
|
||||
|---------|--------|
|
||||
|  | pixel art sprite, a brave knight |
|
||||
|  | pixel art sprite, a fire dragon |
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def _extract(md: str, repo: str = _REPO, existing: set | None = None):
|
||||
from py.services.agent.skills.enrich_hf_metadata.md_to_html import \
|
||||
extract_gallery_table_images
|
||||
return extract_gallery_table_images(md, repo, existing_urls=existing)
|
||||
|
||||
def test_extracts_table_images(self):
|
||||
images = self._extract(self._README)
|
||||
assert len(images) == 2
|
||||
assert "knight.png" in images[0]["url"]
|
||||
assert images[0]["meta"]["prompt"] == "pixel art sprite, a brave knight"
|
||||
assert "dragon.png" in images[1]["url"]
|
||||
|
||||
def test_skips_existing_urls(self):
|
||||
existing = {"https://huggingface.co/Limbicnation/pixel-art-lora/resolve/main/samples/knight.png"}
|
||||
images = self._extract(self._README, existing=existing)
|
||||
assert len(images) == 1
|
||||
assert "knight.png" not in images[0]["url"]
|
||||
|
||||
def test_empty_readme_returns_empty(self):
|
||||
assert self._extract("") == []
|
||||
|
||||
def test_no_gallery_table_returns_empty(self):
|
||||
md = "## Description\nSome text."
|
||||
assert self._extract(md) == []
|
||||
|
||||
def test_non_gallery_table_skipped(self):
|
||||
md = "| Param | Value |\n|---|---|\n| Steps | 4 |"
|
||||
assert self._extract(md) == []
|
||||
|
||||
def test_absolute_url_preserved(self):
|
||||
md = "| Preview | Prompt |\n|---|---|\n|  | text |"
|
||||
images = self._extract(md, repo="user/repo")
|
||||
assert len(images) == 1
|
||||
assert images[0]["url"] == "https://cdn.example.com/img.png"
|
||||
|
||||
|
||||
# ======================================================================
|
||||
# clean_readme_for_llm — pre-process README before LLM injection
|
||||
# ======================================================================
|
||||
|
||||
|
||||
class TestCleanReadmeForLlm:
|
||||
|
||||
@staticmethod
|
||||
def _clean(md: str, max_length: int = 6000) -> str:
|
||||
from py.services.agent.skills.enrich_hf_metadata.md_to_html import \
|
||||
clean_readme_for_llm
|
||||
return clean_readme_for_llm(md, max_length=max_length)
|
||||
|
||||
# -- basic guards --------------------------------------------------------
|
||||
|
||||
def test_none_returns_empty(self):
|
||||
assert self._clean(None) == "" # type: ignore[arg-type]
|
||||
|
||||
def test_empty_returns_empty(self):
|
||||
assert self._clean("") == ""
|
||||
|
||||
def test_plain_text_passes_through(self):
|
||||
result = self._clean("Just some description text.")
|
||||
assert "Just some description text." in result
|
||||
|
||||
# -- widget section stripping -------------------------------------------
|
||||
|
||||
def test_widget_text_preserved_in_cleaned_output(self):
|
||||
"""Widget section text is preserved — it provides useful signal
|
||||
for tag and description extraction (example prompts describe what
|
||||
the model generates)."""
|
||||
md = """---
|
||||
tags:
|
||||
- lora
|
||||
- anime
|
||||
widget:
|
||||
- text: "a test prompt"
|
||||
output:
|
||||
url: images/test.png
|
||||
- text: >-
|
||||
another long
|
||||
prompt here
|
||||
output:
|
||||
url: images/test2.png
|
||||
base_model: black-forest-labs/FLUX.1-dev
|
||||
instance_prompt: trigger word
|
||||
---
|
||||
# Model Description
|
||||
This is the actual content.
|
||||
"""
|
||||
result = self._clean(md)
|
||||
# Widget text content preserved (valuable signal for tags)
|
||||
# YAML folded scalars (``>-``) may split text across lines
|
||||
assert "a test prompt" in result
|
||||
assert "another long" in result
|
||||
assert "prompt here" in result
|
||||
# Non-widget frontmatter preserved
|
||||
assert "base_model: black-forest-labs/FLUX.1-dev" in result
|
||||
assert "instance_prompt: trigger word" in result
|
||||
assert "tags:" in result
|
||||
assert "- lora" in result
|
||||
assert "- anime" in result
|
||||
assert "Model Description" in result
|
||||
|
||||
def test_widget_last_key_in_frontmatter(self):
|
||||
"""Widget text at end of frontmatter is preserved."""
|
||||
md = """---
|
||||
tags:
|
||||
- lora
|
||||
widget:
|
||||
- output:
|
||||
url: img.png
|
||||
text: prompt
|
||||
---
|
||||
# Content
|
||||
"""
|
||||
result = self._clean(md)
|
||||
assert "prompt" in result
|
||||
assert "tags:" in result
|
||||
|
||||
def test_no_widget_untouched(self):
|
||||
md = """---
|
||||
tags:
|
||||
- lora
|
||||
base_model: flux
|
||||
---
|
||||
# Content
|
||||
"""
|
||||
result = self._clean(md)
|
||||
assert "tags:" in result
|
||||
assert "base_model: flux" in result
|
||||
|
||||
# -- gallery stripping ---------------------------------------------------
|
||||
|
||||
def test_gallery_tag_stripped(self):
|
||||
md = "Some text\n<Gallery />\nmore text"
|
||||
result = self._clean(md)
|
||||
assert "<Gallery" not in result
|
||||
|
||||
# -- code block stripping ------------------------------------------------
|
||||
|
||||
def test_fenced_code_block_stripped(self):
|
||||
md = """## Usage
|
||||
```python
|
||||
import torch
|
||||
pipe = DiffusionPipeline.from_pretrained('base')
|
||||
```
|
||||
## Description
|
||||
Some text.
|
||||
"""
|
||||
result = self._clean(md)
|
||||
assert "import torch" not in result
|
||||
assert "DiffusionPipeline" not in result
|
||||
assert "## Usage" in result
|
||||
assert "## Description" in result
|
||||
|
||||
def test_bash_code_block_stripped(self):
|
||||
md = """## Setup
|
||||
```bash
|
||||
pip install diffusers
|
||||
huggingface-cli download repo
|
||||
```
|
||||
"""
|
||||
result = self._clean(md)
|
||||
assert "pip install" not in result
|
||||
assert "## Setup" in result
|
||||
|
||||
def test_code_block_sections_remain_separated(self):
|
||||
md = "## Install\n```bash\npip install x\n```\n\n## Usage\nSome text."
|
||||
result = self._clean(md)
|
||||
assert "pip install" not in result
|
||||
assert "## Install" in result
|
||||
assert "## Usage" in result
|
||||
assert "Some text." in result
|
||||
|
||||
def test_unmarked_code_block_preserved(self):
|
||||
"""Unmarked fenced code blocks (just ```) are kept since they
|
||||
often contain trigger words rather than code."""
|
||||
md = """### Trigger Words
|
||||
|
||||
Always include:
|
||||
|
||||
```
|
||||
pixel art sprite, game asset, transparent background
|
||||
```
|
||||
"""
|
||||
result = self._clean(md)
|
||||
assert "pixel art sprite" in result
|
||||
assert "game asset" in result
|
||||
assert "transparent background" in result
|
||||
|
||||
def test_unmarked_code_block_with_python_preserved(self):
|
||||
"""Even unmarked blocks with Python code are kept (false positive
|
||||
accepted because trigger-word blocks are unmarked)."""
|
||||
md = "## Setup\n```\nimport torch\nprint('hello')\n```\n## Desc\nText."
|
||||
result = self._clean(md)
|
||||
assert "import torch" in result
|
||||
|
||||
# -- standalone image stripping ------------------------------------------
|
||||
|
||||
def test_standalone_image_stripped(self):
|
||||
md = "## Gallery\n\n\n\nSome text."
|
||||
result = self._clean(md)
|
||||
assert "cdn.hf.co" not in result
|
||||
assert "sample" in result # alt text preserved
|
||||
assert "another" in result # alt text preserved
|
||||
assert "## Gallery" in result
|
||||
assert "Some text." in result
|
||||
|
||||
def test_html_img_tag_stripped(self):
|
||||
md = '## Preview\n<img src="https://cdn.hf.co/img.webp"></img>\n\nDescription.'
|
||||
result = self._clean(md)
|
||||
assert "cdn.hf.co" not in result
|
||||
assert "Description." in result
|
||||
|
||||
def test_inline_image_within_paragraph_preserved(self):
|
||||
"""Inline images inside paragraphs are rare but shouldn't be stripped."""
|
||||
md = "Click here  for more info."
|
||||
result = self._clean(md)
|
||||
assert "Click here" in result
|
||||
assert "for more info" in result
|
||||
|
||||
# -- training table stripping --------------------------------------------
|
||||
|
||||
def test_training_table_stripped(self):
|
||||
md = """## Training
|
||||
| Parameter | Value |
|
||||
|---------------|----------|
|
||||
| LR Scheduler | constant |
|
||||
| Optimizer | AdamW |
|
||||
| Network Dim | 64 |
|
||||
## Best Dimensions
|
||||
| Resolution | Status |
|
||||
|-----------|---------|
|
||||
| 768x1024 | Best |
|
||||
"""
|
||||
result = self._clean(md)
|
||||
assert "LR Scheduler" not in result
|
||||
assert "Optimizer" not in result
|
||||
assert "Network Dim" not in result
|
||||
# Normal table preserved
|
||||
assert "Best Dimensions" in result
|
||||
assert "768x1024" in result
|
||||
|
||||
def test_normal_table_preserved(self):
|
||||
md = """## Recommended
|
||||
| Resolution | Status |
|
||||
|-----------|---------|
|
||||
| 1024x1024 | Default |
|
||||
"""
|
||||
result = self._clean(md)
|
||||
assert "1024x1024" in result
|
||||
|
||||
# -- boilerplate section stripping ---------------------------------------
|
||||
|
||||
def test_boilerplate_license_stripped(self):
|
||||
md = """## Description
|
||||
Some text.
|
||||
## License
|
||||
apache-2.0
|
||||
Some license details here.
|
||||
## More Content
|
||||
After license.
|
||||
"""
|
||||
result = self._clean(md)
|
||||
assert "apache-2.0" not in result
|
||||
assert "## License" not in result
|
||||
assert "## Description" in result
|
||||
assert "## More Content" in result
|
||||
assert "After license." in result
|
||||
|
||||
def test_boilerplate_disclaimer_stripped(self):
|
||||
md = """## Description
|
||||
Some text.
|
||||
## DISCLAIMER
|
||||
Legal text here.
|
||||
## Citation
|
||||
Bibtex here.
|
||||
"""
|
||||
result = self._clean(md)
|
||||
assert "Legal text" not in result
|
||||
assert "Bibtex" not in result
|
||||
assert "Some text." in result
|
||||
|
||||
def test_boilerplate_subsection_not_stripped(self):
|
||||
"""Only top-level (##) boilerplate is stripped; ### subsections inside
|
||||
non-boilerplate headings are left alone."""
|
||||
md = """## Usage
|
||||
Some text.
|
||||
### Important Note
|
||||
This is a note within the usage section.
|
||||
"""
|
||||
result = self._clean(md)
|
||||
assert "Important Note" in result
|
||||
|
||||
# -- massive list stripping ----------------------------------------------
|
||||
|
||||
def test_massive_name_list_stripped(self):
|
||||
lines = ["## 2026 Updates:"]
|
||||
for i in range(12):
|
||||
lines.append(f"Name{i}A, Name{i}B, Name{i}C, Name{i}D, Name{i}E,")
|
||||
lines.append("## License")
|
||||
lines.append("apache")
|
||||
md = "\n".join(lines)
|
||||
result = self._clean(md)
|
||||
assert "Name0A" not in result
|
||||
assert "Name11E" not in result
|
||||
assert "## 2026 Updates:" in result
|
||||
# License stripped by boilerplate
|
||||
assert "apache" not in result
|
||||
|
||||
def test_short_list_preserved(self):
|
||||
"""Short lists (< 8 consecutive lines) should not be stripped."""
|
||||
lines = ["## Tags:"]
|
||||
for i in range(4):
|
||||
lines.append(f"tag{i}A, tag{i}B,")
|
||||
lines.append("## Description")
|
||||
lines.append("Some text.")
|
||||
md = "\n".join(lines)
|
||||
result = self._clean(md)
|
||||
assert "tag0A" in result
|
||||
assert "tag3B" in result
|
||||
|
||||
# -- max_length truncation -----------------------------------------------
|
||||
|
||||
def test_truncation(self):
|
||||
md = "A" * 100 + "\n" + "B" * 100
|
||||
result = self._clean(md, max_length=150)
|
||||
assert len(result) <= 150
|
||||
assert result.startswith("A" * 100)
|
||||
|
||||
# -- integration: end-to-end realistic README ----------------------------
|
||||
|
||||
def test_realistic_flux_lora_readme(self):
|
||||
md = """---
|
||||
tags:
|
||||
- text-to-image
|
||||
- lora
|
||||
- diffusers
|
||||
- 3D
|
||||
- Toon
|
||||
widget:
|
||||
- text: >-
|
||||
Long toons, a close-up of a cartoon character face...
|
||||
output:
|
||||
url: images/LT4.png
|
||||
- text: >-
|
||||
Long toons, Super Detail, a close-up shot...
|
||||
output:
|
||||
url: images/LT5.png
|
||||
base_model: black-forest-labs/FLUX.1-dev
|
||||
instance_prompt: Long toons
|
||||
license: creativeml-openrail-m
|
||||
---
|
||||
# Flux-Long-Toon-LoRA
|
||||
|
||||
<Gallery />
|
||||
|
||||
**The model is still in the training phase.**
|
||||
|
||||
## Model description
|
||||
|
||||
**prithivMLmods/Flux-Long-Toon-LoRA**
|
||||
|
||||
Image Processing Parameters
|
||||
|
||||
| Parameter | Value | Parameter | Value |
|
||||
|---------------------------|--------|---------------------------|--------|
|
||||
| LR Scheduler | constant | Noise Offset | 0.03 |
|
||||
| Optimizer | AdamW | Multires Noise Discount | 0.1 |
|
||||
| Network Dim | 64 | Multires Noise Iterations | 10 |
|
||||
| Network Alpha | 32 | Repeat & Steps | 25 & 3270 |
|
||||
| Epoch | 18 | Save Every N Epochs | 1 |
|
||||
|
||||
## Best Dimensions
|
||||
|
||||
- 768 x 1024 (Best)
|
||||
- 1024 x 1024 (Default)
|
||||
|
||||
## Setting Up
|
||||
```python
|
||||
import torch
|
||||
from pipelines import DiffusionPipeline
|
||||
|
||||
base_model = "black-forest-labs/FLUX.1-dev"
|
||||
pipe = DiffusionPipeline.from_pretrained(base_model, torch_dtype=torch.bfloat16)
|
||||
|
||||
lora_repo = "prithivMLmods/Flux-Long-Toon-LoRA"
|
||||
trigger_word = "Long toons"
|
||||
pipe.load_lora_weights(lora_repo)
|
||||
```
|
||||
|
||||
## Trigger words
|
||||
|
||||
You should use `Long toons` to trigger the image generation.
|
||||
|
||||
## Download model
|
||||
|
||||
Weights for this model are available in Safetensors format.
|
||||
"""
|
||||
original_len = len(md)
|
||||
result = self._clean(md)
|
||||
|
||||
# Still significantly smaller (widget text is kept but training
|
||||
# tables, code blocks, boilerplate are stripped)
|
||||
assert len(result) < original_len * 0.7, (
|
||||
f"Expected <70% of original, got {len(result)}/{original_len}"
|
||||
)
|
||||
|
||||
# Signal preserved
|
||||
assert "Long toons" in result
|
||||
assert "black-forest-labs/FLUX.1-dev" in result
|
||||
assert "3D" in result
|
||||
assert "Toon" in result
|
||||
|
||||
# Widget content preserved (text is valuable signal for tags/desc)
|
||||
assert "close-up of a cartoon character face" in result
|
||||
assert "Super Detail" in result
|
||||
|
||||
# Noise stripped
|
||||
assert "import torch" not in result
|
||||
assert "DiffusionPipeline" not in result
|
||||
assert "LR Scheduler" not in result
|
||||
assert "<Gallery" not in result
|
||||
assert "Download model" not in result
|
||||
|
||||
Reference in New Issue
Block a user