fix(cache): prevent corrupted cache rows from breaking model listings (#730)

Cache corruption (NULL model_name/file_name from legacy DB rows or partial
writes) caused format_response to raise KeyError/AttributeError, failing the
entire /loras/list request and showing no models in the UI.

Fix across three layers:
- format_response (lora/checkpoint/embedding): replace direct dict[] access
  with .get() fallbacks; return None for entries missing file_path
- handlers: filter None entries from list/excluded/fetch/duplicate/conflict
  endpoints instead of letting them crash or appear as null in responses
- model_scanner: always use validate_batch repaired copies (previously
  discarded when no invalid entries, leaving None values in raw_data)
- persistent_model_cache: add or-empty-string guards on read and write for
  nullable TEXT columns (model_name, file_name, folder, base_model, etc.)
This commit is contained in:
Will Miao
2026-06-30 09:02:42 +08:00
parent 28e7c04b37
commit 16f5222efd
9 changed files with 274 additions and 54 deletions

View File

@@ -1,6 +1,6 @@
import os
import logging
from typing import Dict
from typing import Dict, Optional
from .base_model_service import BaseModelService
from .auto_tag_service import extract_auto_tags
@@ -21,20 +21,37 @@ class EmbeddingService(BaseModelService):
"""
super().__init__("embedding", scanner, EmbeddingMetadata, update_service=update_service)
async def format_response(self, embedding_data: Dict) -> Dict:
"""Format Embedding data for API response"""
async def format_response(self, embedding_data: Dict) -> Optional[Dict]:
"""Format Embedding data for API response.
Returns None when the entry is missing critical fields (corrupted cache
row), so the handler layer can filter it out. See issue #730.
"""
# Guard against corrupted cache entries missing critical fields
file_path = embedding_data.get("file_path")
if not file_path or not isinstance(file_path, str):
logger.warning(
"Skipping corrupted embedding entry (missing file_path): %s",
embedding_data.get("file_name", "<unknown>"),
)
return None
# Get sub_type from cache entry (new canonical field)
sub_type = embedding_data.get("sub_type", "embedding")
file_name = embedding_data.get("file_name") or ""
model_name = embedding_data.get("model_name") or file_name
folder = embedding_data.get("folder") or ""
return {
"model_name": embedding_data["model_name"],
"file_name": embedding_data["file_name"],
"model_name": model_name,
"file_name": file_name,
"preview_url": config.get_preview_static_url(embedding_data.get("preview_url", "")),
"preview_nsfw_level": embedding_data.get("preview_nsfw_level", 0),
"base_model": embedding_data.get("base_model", ""),
"folder": embedding_data["folder"],
"folder": folder,
"sha256": embedding_data.get("sha256", ""),
"file_path": embedding_data["file_path"].replace(os.sep, "/"),
"file_path": file_path.replace(os.sep, "/"),
"file_size": embedding_data.get("size", 0),
"modified": embedding_data.get("modified", ""),
"tags": embedding_data.get("tags", []),