feat(agent): add LLM-powered metadata enrichment system with AgentCLI and PostProcessor

Introduce an agent skill framework for LLM-driven metadata enrichment:

- AgentCLI (py/agent_cli/): in-process wrappers around internal services
  using standard relative imports, eliminating the need for sys.path hacks
- LLMService: centralized BYOK (bring-your-own-key) LLM client supporting
  OpenAI, Ollama, and custom OpenAI-compatible endpoints
- PostProcessor: deterministic engine that applies LLM output via AgentCLI
  (replaces old handler.py + _BASE_MODEL_ALIASES approach)
- SkillRegistry: filesystem-based skill discovery (skill.yaml + prompt.md)
- AgentService: orchestrates skill execution with WebSocket progress
- Frontend AgentManager: WebSocket listeners, skill execution, config UI
- Context menu entries (single + bulk) for "Enrich Metadata (Agent)"
- Settings UI for AI Provider configuration (BYOK)
- Full i18n support across 9 locales

Bug fixes found during review:
- aiohttp.web.json_response: status_code= -> status=
- settings_modal cancelEditApiKey: wrong argument position
- AgentManager.isLlmConfigured: allow Ollama without API key
- PostProcessor._merge_tags: lowercase all tags to match TagUpdateService
This commit is contained in:
Will Miao
2026-07-02 20:51:11 +08:00
parent fe90f7f9b1
commit cf898da193
44 changed files with 5937 additions and 2180 deletions

View File

@@ -0,0 +1,237 @@
"""Tests for the LLMService."""
from __future__ import annotations
import asyncio
import json
from unittest import mock
import pytest
from py.services.llm_service import LLMService
from py.services.errors import LLMNotConfiguredError, LLMRateLimitError, LLMResponseError
class MockSettings:
"""Minimal settings mock for LLMService tests."""
def __init__(self, **kwargs):
self._data = {
"llm_enabled": False,
"llm_provider": "openai",
"llm_api_key": "",
"llm_api_base": "",
"llm_model": "",
}
self._data.update(kwargs)
def get(self, key, default=None):
return self._data.get(key, default)
class MockResponse:
"""Mock aiohttp response."""
def __init__(self, status, json_data=None, text_data="", headers=None):
self.status = status
self._json_data = json_data
self._text_data = text_data
self.headers = headers or {}
async def json(self):
return self._json_data
async def text(self):
return self._text_data
async def __aenter__(self):
return self
async def __aexit__(self, *args):
pass
class MockSession:
"""Mock aiohttp ClientSession."""
def __init__(self, response):
self._response = response
self.closed = False
def post(self, url, json=None, headers=None):
self.last_url = url
self.last_json = json
self.last_headers = headers
return self._response
async def __aenter__(self):
return self
async def __aexit__(self, *args):
pass
@pytest.fixture
def llm_service():
"""Create an LLMService with mock settings."""
LLMService.reset_instance()
settings = MockSettings(
llm_enabled=True,
llm_provider="openai",
llm_api_key="sk-test-key",
llm_api_base="",
llm_model="gpt-4o-mini",
)
return LLMService(settings)
class TestLLMServiceConfiguration:
def test_is_configured_when_enabled_with_key_and_model(self, llm_service):
assert llm_service.is_configured() is True
def test_not_configured_when_disabled(self):
settings = MockSettings(
llm_enabled=False, llm_api_key="sk-test", llm_model="gpt-4o"
)
service = LLMService(settings)
# Lenient: model + API key is treated as configured even without
# the toggle, because the user clearly intends to use the feature.
assert service.is_configured() is True
def test_not_configured_without_model(self):
settings = MockSettings(llm_enabled=True, llm_api_key="sk-test", llm_model="")
service = LLMService(settings)
assert service.is_configured() is False
def test_not_configured_without_api_key_for_openai(self):
settings = MockSettings(llm_enabled=True, llm_api_key="", llm_model="gpt-4o")
service = LLMService(settings)
assert service.is_configured() is False
def test_ollama_configured_without_api_key(self):
settings = MockSettings(
llm_enabled=True, llm_provider="ollama", llm_api_key="", llm_model="llama3"
)
service = LLMService(settings)
assert service.is_configured() is True
def test_resolve_api_base_openai_default(self, llm_service):
assert llm_service._resolve_api_base("openai", "") == "https://api.openai.com/v1"
def test_resolve_api_base_ollama_default(self, llm_service):
assert llm_service._resolve_api_base("ollama", "") == "http://localhost:11434/v1"
def test_resolve_api_base_custom_override(self, llm_service):
assert llm_service._resolve_api_base("custom", "https://my.api.com/v1/") == "https://my.api.com/v1"
def test_ensure_configured_raises_when_disabled(self):
settings = MockSettings(llm_enabled=False)
service = LLMService(settings)
with pytest.raises(LLMNotConfiguredError):
service._ensure_configured()
def test_ensure_configured_raises_without_model(self):
settings = MockSettings(llm_enabled=True, llm_api_key="sk-test", llm_model="")
service = LLMService(settings)
with pytest.raises(LLMNotConfiguredError):
service._ensure_configured()
class TestLLMServiceChatCompletion:
@pytest.mark.asyncio
async def test_chat_completion_success(self, llm_service):
mock_response = MockResponse(
200,
json_data={
"choices": [{"message": {"content": "Hello!"}}],
"usage": {"total_tokens": 10},
"model": "gpt-4o-mini",
},
)
mock_session = MockSession(mock_response)
with mock.patch("aiohttp.ClientSession", return_value=mock_session):
result = await llm_service.chat_completion(
messages=[{"role": "user", "content": "Hi"}],
)
assert result["content"] == "Hello!"
assert result["usage"]["total_tokens"] == 10
assert result["model"] == "gpt-4o-mini"
@pytest.mark.asyncio
async def test_chat_completion_raises_on_not_configured(self):
settings = MockSettings(llm_enabled=False)
service = LLMService(settings)
with pytest.raises(LLMNotConfiguredError):
await service.chat_completion(messages=[])
@pytest.mark.asyncio
async def test_chat_completion_raises_on_http_error(self, llm_service):
mock_response = MockResponse(500, text_data="Internal Server Error")
mock_session = MockSession(mock_response)
with mock.patch("aiohttp.ClientSession", return_value=mock_session):
with pytest.raises(LLMResponseError, match="HTTP 500"):
await llm_service.chat_completion(messages=[])
@pytest.mark.asyncio
async def test_chat_completion_raises_on_rate_limit(self, llm_service):
mock_response = MockResponse(429, text_data="Rate limited", headers={"Retry-After": "0"})
mock_session = MockSession(mock_response)
with mock.patch("aiohttp.ClientSession", return_value=mock_session):
with pytest.raises(LLMRateLimitError):
await llm_service.chat_completion(
messages=[], retry_on_rate_limit=False
)
@pytest.mark.asyncio
async def test_chat_completion_raises_on_bad_response_structure(self, llm_service):
mock_response = MockResponse(200, json_data={"unexpected": "data"})
mock_session = MockSession(mock_response)
with mock.patch("aiohttp.ClientSession", return_value=mock_session):
with pytest.raises(LLMResponseError, match="Unexpected LLM response"):
await llm_service.chat_completion(messages=[])
class TestLLMServiceChatCompletionJson:
@pytest.mark.asyncio
async def test_chat_completion_json_parses_json(self, llm_service):
mock_response = MockResponse(
200,
json_data={
"choices": [{"message": {"content": '{"key": "value"}'}}],
"usage": {},
"model": "gpt-4o-mini",
},
)
mock_session = MockSession(mock_response)
with mock.patch("aiohttp.ClientSession", return_value=mock_session):
result = await llm_service.chat_completion_json(
system_prompt="You are helpful.",
user_prompt="Return JSON.",
)
assert result == {"key": "value"}
@pytest.mark.asyncio
async def test_chat_completion_json_raises_on_non_json(self, llm_service):
# First attempt: non-JSON; second attempt (retry): also non-JSON
mock_response = MockResponse(
200,
json_data={
"choices": [{"message": {"content": "not json at all"}}],
"usage": {},
},
)
mock_session = MockSession(mock_response)
with mock.patch("aiohttp.ClientSession", return_value=mock_session):
with pytest.raises(LLMResponseError, match="could not be parsed as JSON"):
await llm_service.chat_completion_json(
system_prompt="test",
user_prompt="test",
)

View File

@@ -0,0 +1,313 @@
"""Tests for the PostProcessor (py/services/agent/post_processor.py).
PostProcessor delegates all I/O to AgentCLI — these tests mock AgentCLI
functions and verify the business logic (conditions, merges, dispatch).
"""
from __future__ import annotations
from datetime import datetime, timezone
from unittest import mock
import pytest
from py.services.agent.post_processor import PostProcessor
@pytest.fixture
def processor():
return PostProcessor()
# ======================================================================
# process() — routing
# ======================================================================
class TestProcessDispatch:
@pytest.mark.asyncio
async def test_unknown_skill_returns_error(self, processor):
result = await processor.process(
skill_name="nonexistent",
model_path="/p.safetensors",
llm_output={},
metadata={},
)
assert result["success"] is False
assert "nonexistent" in result["errors"][0]
@pytest.mark.asyncio
async def test_enrich_hf_metadata_routes_correctly(self, processor):
with (
mock.patch("py.agent_cli.apply_metadata_updates") as mock_apply,
mock.patch("py.agent_cli.download_preview") as mock_dl,
mock.patch("py.agent_cli.refresh_cache") as mock_ref,
):
mock_apply.return_value = ["metadata_source"]
mock_dl.return_value = False
result = await processor.process(
skill_name="enrich_hf_metadata",
model_path="/p.safetensors",
llm_output={},
metadata={"from_civitai": True},
)
assert result["success"] is True
# ======================================================================
# enrich_hf_metadata — field-level logic
# ======================================================================
class TestEnrichHfMetadata:
"""Business logic tests for the enrich_hf_metadata post-processor."""
MIN_LLM_OUTPUT = {
"base_model": "",
"trigger_words": [],
"description": "",
"tags": [],
"preview_url": "",
"confidence": "low",
}
# -- base_model ------------------------------------------------------
@pytest.mark.asyncio
async def test_base_model_overwrites_empty(self, processor):
"""Empty current base_model → new value is applied."""
llm = {**self.MIN_LLM_OUTPUT, "base_model": "Flux.1 D"}
with (
mock.patch("py.agent_cli.apply_metadata_updates") as mock_apply,
mock.patch("py.agent_cli.download_preview", return_value=False),
mock.patch("py.agent_cli.refresh_cache"),
):
await processor.process(
skill_name="enrich_hf_metadata",
model_path="/p.safetensors",
llm_output=llm,
metadata={"base_model": ""},
)
applied = mock_apply.call_args[0][1]
assert applied["base_model"] == "Flux.1 D"
@pytest.mark.asyncio
async def test_base_model_does_not_overwrite_existing_civitai(self, processor):
"""Existing base_model from CivitAI → not overwritten."""
llm = {**self.MIN_LLM_OUTPUT, "base_model": "Flux.1 D"}
with (
mock.patch("py.agent_cli.apply_metadata_updates") as mock_apply,
mock.patch("py.agent_cli.download_preview", return_value=False),
mock.patch("py.agent_cli.refresh_cache"),
):
await processor.process(
skill_name="enrich_hf_metadata",
model_path="/p.safetensors",
llm_output=llm,
metadata={"base_model": "SDXL 1.0", "from_civitai": True},
)
# apply IS called (metadata_source, llm_enriched_at) but base_model not in it
applied = mock_apply.call_args[0][1]
assert "base_model" not in applied
@pytest.mark.asyncio
async def test_base_model_overwrites_existing_hf_model(self, processor):
"""Existing base_model from HF → overwritten (LLM is more reliable)."""
llm = {**self.MIN_LLM_OUTPUT, "base_model": "Flux.1 D"}
with (
mock.patch("py.agent_cli.apply_metadata_updates") as mock_apply,
mock.patch("py.agent_cli.download_preview", return_value=False),
mock.patch("py.agent_cli.refresh_cache"),
):
await processor.process(
skill_name="enrich_hf_metadata",
model_path="/p.safetensors",
llm_output=llm,
metadata={"base_model": "SD 1.5", "from_civitai": False},
)
applied = mock_apply.call_args[0][1]
assert applied["base_model"] == "Flux.1 D"
@pytest.mark.asyncio
async def test_base_model_skipped_when_llm_empty(self, processor):
"""LLM returns empty base_model → nothing written."""
with (
mock.patch("py.agent_cli.apply_metadata_updates") as mock_apply,
mock.patch("py.agent_cli.download_preview", return_value=False),
mock.patch("py.agent_cli.refresh_cache"),
):
await processor.process(
skill_name="enrich_hf_metadata",
model_path="/p.safetensors",
llm_output=self.MIN_LLM_OUTPUT,
metadata={"base_model": ""},
)
applied = mock_apply.call_args[0][1]
assert "base_model" not in applied
# -- trigger_words ---------------------------------------------------
@pytest.mark.asyncio
async def test_trigger_words_merged(self, processor):
"""New trigger words written when current list is empty."""
llm = {**self.MIN_LLM_OUTPUT, "trigger_words": ["trigger1", "trigger2"]}
with (
mock.patch("py.agent_cli.apply_metadata_updates") as mock_apply,
mock.patch("py.agent_cli.download_preview", return_value=False),
mock.patch("py.agent_cli.refresh_cache"),
):
await processor.process(
skill_name="enrich_hf_metadata",
model_path="/p.safetensors",
llm_output=llm,
metadata={"trainedWords": []},
)
applied = mock_apply.call_args[0][1]
assert applied["trainedWords"] == ["trigger1", "trigger2"]
# -- description -----------------------------------------------------
@pytest.mark.asyncio
async def test_description_set_when_empty(self, processor):
llm = {**self.MIN_LLM_OUTPUT, "description": "A model description"}
with (
mock.patch("py.agent_cli.apply_metadata_updates") as mock_apply,
mock.patch("py.agent_cli.download_preview", return_value=False),
mock.patch("py.agent_cli.refresh_cache"),
):
await processor.process(
skill_name="enrich_hf_metadata",
model_path="/p.safetensors",
llm_output=llm,
metadata={"modelDescription": ""},
)
assert "modelDescription" in mock_apply.call_args[0][1]
# -- tags ------------------------------------------------------------
@pytest.mark.asyncio
async def test_tags_merged_and_deduplicated(self, processor):
llm = {**self.MIN_LLM_OUTPUT, "tags": ["flux", "lora", "STYLE"]}
with (
mock.patch("py.agent_cli.apply_metadata_updates") as mock_apply,
mock.patch("py.agent_cli.download_preview", return_value=False),
mock.patch("py.agent_cli.refresh_cache"),
):
await processor.process(
skill_name="enrich_hf_metadata",
model_path="/p.safetensors",
llm_output=llm,
metadata={"tags": ["anime"], "from_civitai": False},
)
merged = mock_apply.call_args[0][1]["tags"]
assert "anime" in merged
assert "flux" in merged
assert "style" in merged # lowercased
# "lora" and "STYLE" → "lora" and "style"
assert len(merged) == 4 # anime, flux, lora, style
# -- metadata_source & llm_enriched_at --------------------------------
@pytest.mark.asyncio
async def test_audit_fields_always_set(self, processor):
with (
mock.patch("py.agent_cli.apply_metadata_updates") as mock_apply,
mock.patch("py.agent_cli.download_preview", return_value=False),
mock.patch("py.agent_cli.refresh_cache"),
):
await processor.process(
skill_name="enrich_hf_metadata",
model_path="/p.safetensors",
llm_output=self.MIN_LLM_OUTPUT,
metadata={},
)
applied = mock_apply.call_args[0][1]
assert applied["metadata_source"] == "agent:enrich_hf_metadata"
assert "llm_enriched_at" in applied
# -- preview download ------------------------------------------------
@pytest.mark.asyncio
async def test_preview_downloaded_when_url_provided(self, processor):
llm = {**self.MIN_LLM_OUTPUT, "preview_url": "https://ex.com/img.png"}
with (
mock.patch("py.agent_cli.apply_metadata_updates") as mock_apply,
mock.patch("py.agent_cli.download_preview") as mock_dl,
mock.patch("py.agent_cli.refresh_cache"),
):
mock_dl.return_value = True
result = await processor.process(
skill_name="enrich_hf_metadata",
model_path="/p.safetensors",
llm_output=llm,
metadata={},
)
assert result["preview_downloaded"] is True
mock_dl.assert_awaited_once_with("/p.safetensors", "https://ex.com/img.png")
@pytest.mark.asyncio
async def test_preview_skipped_when_exists(self, processor):
"""If current_preview file exists on disk, skip download."""
llm = {**self.MIN_LLM_OUTPUT, "preview_url": "https://ex.com/img.png"}
with (
mock.patch("py.agent_cli.apply_metadata_updates"),
mock.patch("py.agent_cli.download_preview") as mock_dl,
mock.patch("py.agent_cli.refresh_cache"),
mock.patch("os.path.exists", return_value=True),
):
await processor.process(
skill_name="enrich_hf_metadata",
model_path="/p.safetensors",
llm_output=llm,
metadata={"preview_url": "/existing/preview.webp"},
)
mock_dl.assert_not_called()
# -- cache refresh ---------------------------------------------------
@pytest.mark.asyncio
async def test_cache_refreshed_when_updates_applied(self, processor):
llm = {**self.MIN_LLM_OUTPUT, "base_model": "Flux.1 D"}
with (
mock.patch("py.agent_cli.apply_metadata_updates", return_value=["base_model"]),
mock.patch("py.agent_cli.download_preview", return_value=False),
mock.patch("py.agent_cli.refresh_cache") as mock_ref,
):
await processor.process(
skill_name="enrich_hf_metadata",
model_path="/p.safetensors",
llm_output=llm,
metadata={"base_model": ""},
)
mock_ref.assert_awaited_once_with("/p.safetensors")
@pytest.mark.asyncio
async def test_cache_not_refreshed_when_nothing_changed(self, processor):
with (
mock.patch("py.agent_cli.apply_metadata_updates", return_value=[]),
mock.patch("py.agent_cli.download_preview", return_value=False),
mock.patch("py.agent_cli.refresh_cache") as mock_ref,
):
await processor.process(
skill_name="enrich_hf_metadata",
model_path="/p.safetensors",
llm_output=self.MIN_LLM_OUTPUT,
metadata={"base_model": ""},
)
mock_ref.assert_not_called()
# ======================================================================
# Unit: _merge_tags
# ======================================================================
class TestMergeTags:
def test_deduplicates_case_insensitive(self):
existing = ["anime", "Flux"]
new = ["flux", "LORA", "anime"]
result = PostProcessor._merge_tags(existing, new)
# All tags are lowercased (matching TagUpdateService behaviour)
assert result == ["anime", "flux", "lora"]

View File

@@ -0,0 +1,91 @@
"""Tests for the SkillRegistry."""
from __future__ import annotations
from pathlib import Path
import pytest
from py.services.agent.skill_registry import SkillRegistry
from py.services.agent.skill_definition import SkillDefinition, SkillPermissions
@pytest.fixture
def registry():
"""Create a SkillRegistry with the real skills directory."""
SkillRegistry.reset_instance()
reg = SkillRegistry()
reg._discover()
return reg
class TestSkillRegistryDiscovery:
def test_discovers_enrich_hf_metadata_skill(self, registry):
skills = registry.list_skills()
assert len(skills) >= 1
skill = registry.get_skill("enrich_hf_metadata")
assert skill is not None
assert skill.name == "enrich_hf_metadata"
assert skill.llm_required is True
def test_skill_has_correct_model_type_filter(self, registry):
skill = registry.get_skill("enrich_hf_metadata")
assert skill.model_type_filter == ["lora", "checkpoint", "embedding"]
def test_skill_has_permissions(self, registry):
skill = registry.get_skill("enrich_hf_metadata")
assert skill.permissions.write_metadata is True
assert skill.permissions.write_previews is True
assert "huggingface.co" in skill.permissions.network_domains
def test_get_skill_returns_none_for_unknown(self, registry):
assert registry.get_skill("nonexistent_skill") is None
class TestSkillRegistryLoading:
def test_load_prompt_returns_content(self, registry):
prompt = registry.load_prompt("enrich_hf_metadata")
assert isinstance(prompt, str)
assert len(prompt) > 100
assert "base_model" in prompt
assert "trigger_words" in prompt
def test_load_prompt_raises_for_unknown_skill(self, registry):
with pytest.raises(FileNotFoundError):
registry.load_prompt("nonexistent")
def test_load_handler_raises_when_handler_missing(self, registry):
with pytest.raises(FileNotFoundError):
registry.load_handler("enrich_hf_metadata")
class TestSkillDefinition:
def test_applies_to_model_type_with_filter(self):
sd = SkillDefinition(
name="test",
title="Test",
description="",
llm_required=False,
model_type_filter=["lora"],
)
assert sd.applies_to_model_type("lora") is True
assert sd.applies_to_model_type("checkpoint") is False
def test_applies_to_model_type_without_filter(self):
sd = SkillDefinition(
name="test",
title="Test",
description="",
llm_required=False,
model_type_filter=None,
)
assert sd.applies_to_model_type("lora") is True
assert sd.applies_to_model_type("checkpoint") is True
class TestSkillPermissions:
def test_defaults(self):
sp = SkillPermissions()
assert sp.write_metadata is True
assert sp.write_previews is True
assert sp.network_domains == ()