fix(cache): prevent corrupted cache rows from breaking model listings (#730)

Cache corruption (NULL model_name/file_name from legacy DB rows or partial
writes) caused format_response to raise KeyError/AttributeError, failing the
entire /loras/list request and showing no models in the UI.

Fix across three layers:
- format_response (lora/checkpoint/embedding): replace direct dict[] access
  with .get() fallbacks; return None for entries missing file_path
- handlers: filter None entries from list/excluded/fetch/duplicate/conflict
  endpoints instead of letting them crash or appear as null in responses
- model_scanner: always use validate_batch repaired copies (previously
  discarded when no invalid entries, leaving None values in raw_data)
- persistent_model_cache: add or-empty-string guards on read and write for
  nullable TEXT columns (model_name, file_name, folder, base_model, etc.)
This commit is contained in:
Will Miao
2026-06-30 09:02:42 +08:00
parent 28e7c04b37
commit 16f5222efd
9 changed files with 274 additions and 54 deletions

View File

@@ -201,6 +201,45 @@ def test_list_models_returns_formatted_items(mock_service, mock_scanner):
asyncio.run(scenario())
def test_list_models_filters_out_corrupted_entries(mock_service, mock_scanner):
"""Corrupted cache entries (format_response returns None) must not appear
in the response items nor cause a 500. See issue #730.
"""
mock_service.paginated_items = [
{"file_path": "/tmp/good.safetensors", "name": "Good"},
{"file_path": None, "name": "Corrupted"}, # triggers None from format_response
{"file_path": "/tmp/also_good.safetensors", "name": "AlsoGood"},
]
# Override format_response to return None for corrupted entries
original_format = mock_service.format_response
async def conditional_format(item):
if item.get("file_path") is None:
return None
return await original_format(item)
mock_service.format_response = conditional_format
async def scenario():
client = await create_test_client(mock_service)
try:
response = await client.get("/api/lm/test-models/list")
payload = await response.json()
assert response.status == 200
# Only the 2 non-corrupted entries should appear
assert len(payload["items"]) == 2
assert payload["items"][0]["name"] == "Good"
assert payload["items"][1]["name"] == "AlsoGood"
# None should never appear in the items list
assert None not in payload["items"]
finally:
await client.close()
asyncio.run(scenario())
def test_model_types_endpoint_returns_counts(mock_service, mock_scanner):
mock_service.model_types = [
{"type": "LoRa", "count": 3},

View File

@@ -199,8 +199,107 @@ class TestEmbeddingServiceFormatResponse:
"from_civitai": True,
"civitai": {},
}
result = await embedding_service.format_response(embedding_data)
assert result["sub_type"] == "embedding"
assert "model_type" not in result # Removed in refactoring
class TestFormatResponseCorruptedEntries:
"""Test format_response handles corrupted cache entries gracefully (issue #730).
When cache rows have None/missing critical fields (e.g. from a partially
written or legacy DB), format_response must NOT raise KeyError/AttributeError.
Instead it returns None so the handler layer can filter the bad entry out
instead of failing the entire listing request.
"""
@pytest.fixture
def mock_scanner(self):
scanner = MagicMock()
scanner._hash_index = MagicMock()
return scanner
@pytest.fixture
def lora_service(self, mock_scanner):
return LoraService(mock_scanner)
@pytest.fixture
def checkpoint_service(self, mock_scanner):
return CheckpointService(mock_scanner)
@pytest.fixture
def embedding_service(self, mock_scanner):
return EmbeddingService(mock_scanner)
@pytest.mark.asyncio
async def test_lora_returns_none_on_missing_file_path(self, lora_service):
"""format_response returns None when file_path is missing (corrupted row)."""
lora_data = {
"model_name": "Test LoRA",
"file_name": "test_lora",
"file_path": None, # corrupted: missing file_path
"folder": "",
"sha256": "abc123",
"tags": [],
"from_civitai": True,
"civitai": {},
}
result = await lora_service.format_response(lora_data)
assert result is None
@pytest.mark.asyncio
async def test_lora_handles_none_model_name_gracefully(self, lora_service):
"""format_response should not crash when model_name is None (legacy DB row)."""
lora_data = {
"model_name": None, # NULL from old DB row
"file_name": "test_lora",
"file_path": "/models/test_lora.safetensors",
"folder": "",
"sha256": "abc123",
"tags": [],
"from_civitai": True,
"civitai": {},
}
result = await lora_service.format_response(lora_data)
# Should not raise; model_name falls back to file_name
assert result is not None
assert result["model_name"] == "test_lora"
@pytest.mark.asyncio
async def test_checkpoint_returns_none_on_missing_file_path(self, checkpoint_service):
"""format_response returns None when file_path is missing (corrupted row)."""
checkpoint_data = {
"model_name": "Test",
"file_name": "test",
"file_path": "", # empty string == corrupted
"folder": "",
"sha256": "abc",
"tags": [],
"from_civitai": True,
"civitai": {},
"sub_type": "checkpoint",
}
result = await checkpoint_service.format_response(checkpoint_data)
assert result is None
@pytest.mark.asyncio
async def test_embedding_handles_none_fields_gracefully(self, embedding_service):
"""format_response should not crash when optional fields are None."""
embedding_data = {
"model_name": None,
"file_name": None,
"file_path": "/models/test.pt",
"folder": None,
"sha256": "abc",
"tags": [],
"from_civitai": True,
"civitai": {},
"sub_type": "embedding",
}
result = await embedding_service.format_response(embedding_data)
assert result is not None
assert result["file_path"] == "/models/test.pt"
# model_name falls back to file_name which falls back to ""
assert result["model_name"] == ""