feat(doctor): add duplicate filename conflict detection and one-click resolution

Detects when multiple model files share the same basename (causing
ambiguity in LoRA resolution), logs warnings during scanning, and
provides a "Resolve Conflicts" button in the Doctor panel. Resolution
renames duplicates with hash-prefixed unique filenames, migrates all
sidecar and preview files, and updates the cache and frontend scroller
in-place so the model modal immediately reflects the new filename.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
Will Miao
2026-04-30 15:21:26 +08:00
parent 1d035361a4
commit 5dcfde36ea
18 changed files with 601 additions and 19 deletions

View File

@@ -10,6 +10,7 @@ from unittest.mock import patch, MagicMock
import pytest
from aiohttp import web
from py.services.model_hash_index import ModelHashIndex
from py.routes.handlers.misc_handlers import (
BackupHandler,
DoctorHandler,
@@ -78,10 +79,11 @@ async def dummy_downloader_factory():
class DummyDoctorScanner:
def __init__(self, *, model_type='lora', raw_data=None, rebuild_error=None):
def __init__(self, *, model_type='lora', raw_data=None, rebuild_error=None, hash_index=None):
self.model_type = model_type
self._raw_data = list(raw_data or [])
self._rebuild_error = rebuild_error
self._hash_index = hash_index
self._persistent_cache = SimpleNamespace(
load_cache=lambda _model_type: SimpleNamespace(raw_data=list(self._raw_data))
)
@@ -91,6 +93,16 @@ class DummyDoctorScanner:
raise self._rebuild_error
return SimpleNamespace(raw_data=list(self._raw_data))
async def update_single_model_cache(self, original_path, new_path, metadata):
for item in self._raw_data:
if item.get("file_path") == original_path:
item["file_path"] = new_path
item["file_name"] = metadata.get("file_name", item.get("file_name", ""))
if metadata.get("preview_url"):
item["preview_url"] = metadata["preview_url"]
break
return True
class DummyCivitaiClient:
def __init__(self, *, success=True, result=None):
@@ -1582,3 +1594,107 @@ def test_wsl_to_windows_path_returns_none_on_subprocess_error(tmp_path):
):
result = _wsl_to_windows_path("/mnt/c/test")
assert result is None
# ── DoctorHandler filename conflict tests ──────────────────────────────
@pytest.mark.asyncio
async def test_check_filename_conflicts_returns_ok_when_no_duplicates():
hash_index = ModelHashIndex()
async def scanner_factory():
return DummyDoctorScanner(
model_type="lora", raw_data=[], hash_index=hash_index
)
handler = DoctorHandler(
settings_service=DummySettings({"civitai_api_key": "token"}),
scanner_factories=(("lora", "LoRAs", scanner_factory),),
)
response = await handler.get_doctor_diagnostics(FakeRequest(method="GET"))
payload = json.loads(response.text)
diagnostic_map = {item["id"]: item for item in payload["diagnostics"]}
assert diagnostic_map["filename_conflicts"]["status"] == "ok"
assert "No duplicate filenames" in diagnostic_map["filename_conflicts"]["summary"]
@pytest.mark.asyncio
async def test_check_filename_conflicts_detects_duplicates():
hash_index = ModelHashIndex()
hash_index.add_entry("abc123", "/a/lora.safetensors")
hash_index.add_entry("def456", "/b/lora.safetensors")
async def scanner_factory():
return DummyDoctorScanner(
model_type="lora",
raw_data=[
{"file_path": "/a/lora.safetensors"},
{"file_path": "/b/lora.safetensors"},
],
hash_index=hash_index,
)
handler = DoctorHandler(
settings_service=DummySettings({"civitai_api_key": "token"}),
scanner_factories=(("lora", "LoRAs", scanner_factory),),
)
response = await handler.get_doctor_diagnostics(FakeRequest(method="GET"))
payload = json.loads(response.text)
diagnostic_map = {item["id"]: item for item in payload["diagnostics"]}
conflict_diag = diagnostic_map["filename_conflicts"]
assert conflict_diag["status"] == "warning"
assert "1 filename(s)" in conflict_diag["summary"]
assert any("resolve-filename-conflicts" in str(a) for a in conflict_diag.get("actions", []))
@pytest.mark.asyncio
async def test_resolve_filename_conflicts_returns_renamed_list():
hash_index = ModelHashIndex()
hash_index.add_entry("abc123", "lora.safetensors")
hash_index.add_entry("def456", "lora.safetensors")
async def scanner_factory():
return DummyDoctorScanner(
model_type="lora",
raw_data=[],
hash_index=hash_index,
)
handler = DoctorHandler(
settings_service=DummySettings({"civitai_api_key": "token"}),
scanner_factories=(("lora", "LoRAs", scanner_factory),),
)
response = await handler.resolve_filename_conflicts(FakeRequest(method="POST"))
payload = json.loads(response.text)
assert payload["success"] is True
# Files don't exist on disk, so nothing gets renamed
assert payload["count"] == 0
@pytest.mark.asyncio
async def test_resolve_filename_conflicts_handles_scanner_error_gracefully():
class ErrorScanner:
model_type = "lora"
async def get_cached_data(self):
raise RuntimeError("scanner unavailable")
async def scanner_factory():
return ErrorScanner()
handler = DoctorHandler(
settings_service=DummySettings({"civitai_api_key": "token"}),
scanner_factories=(("lora", "LoRAs", scanner_factory),),
)
response = await handler.resolve_filename_conflicts(FakeRequest(method="POST"))
payload = json.loads(response.text)
assert payload["success"] is True
assert payload["count"] == 0

View File

@@ -0,0 +1,113 @@
import pytest
from py.services.model_hash_index import ModelHashIndex
class TestModelHashIndexRemoveByPath:
def test_remove_by_path_finds_hash_in_hash_to_path(self):
index = ModelHashIndex()
index.add_entry("abc123", "/models/lora.safetensors")
index.remove_by_path("/models/lora.safetensors")
assert len(index) == 0
assert not index.get_duplicate_filenames()
def test_remove_by_path_falls_back_to_duplicate_hashes(self):
"""When a path is only tracked in _duplicate_hashes, remove_by_path
should still find and remove it."""
index = ModelHashIndex()
index.add_entry("abc123", "/models/lora_v1.safetensors")
index.add_entry("abc123", "/models/lora_v2.safetensors")
# lora_v1 is the primary (_hash_to_path), lora_v2 is in _duplicate_hashes
index.remove_by_path("/models/lora_v2.safetensors")
assert len(index) == 1
assert index._hash_to_path.get("abc123") == "/models/lora_v1.safetensors"
assert "abc123" not in index._duplicate_hashes
def test_remove_by_path_cleans_up_duplicate_filenames(self):
"""After removing a path, _duplicate_filenames should be updated."""
index = ModelHashIndex()
index.add_entry("abc123", "/models/mylora.safetensors")
index.add_entry("def456", "/other/mylora.safetensors")
assert "mylora" in index.get_duplicate_filenames()
assert len(index.get_duplicate_filenames()["mylora"]) == 2
index.remove_by_path("/other/mylora.safetensors")
# After removing one duplicate, only one path remains — no longer a duplicate
assert "mylora" not in index.get_duplicate_filenames()
def test_remove_by_path_keeps_duplicate_filenames_with_three_entries(self):
"""With 3 entries for the same filename, removing one should leave 2."""
index = ModelHashIndex()
index.add_entry("abc123", "/models/mylora.safetensors")
index.add_entry("def456", "/other/mylora.safetensors")
index.add_entry("ghi789", "/third/mylora.safetensors")
index.remove_by_path("/other/mylora.safetensors")
assert "mylora" in index.get_duplicate_filenames()
paths = index.get_duplicate_filenames()["mylora"]
assert len(paths) == 2
assert "/other/mylora.safetensors" not in paths
def test_remove_by_path_noop_on_unknown_path(self):
index = ModelHashIndex()
index.add_entry("abc123", "/models/lora.safetensors")
# Should not raise
index.remove_by_path("/nonexistent/lora.safetensors")
assert len(index) == 1
def test_remove_by_path_handles_hash_from_duplicate_hashes_only(self):
"""Remove a path whose hash exists ONLY in _duplicate_hashes,
not in _hash_to_path (edge case from index rebuilds)."""
index = ModelHashIndex()
index.add_entry("abc123", "/a/model.safetensors")
index.add_entry("abc123", "/b/model.safetensors")
# Manually remove the primary entry to simulate edge case
del index._hash_to_path["abc123"]
# Now the path is only referenced in _duplicate_hashes
assert "abc123" in index._duplicate_hashes
index.remove_by_path("/b/model.safetensors")
# The remaining path is promoted to _hash_to_path, duplicates cleared
assert "abc123" not in index._duplicate_hashes
assert index._hash_to_path.get("abc123") == "/a/model.safetensors"
class TestModelHashIndexGetDuplicateFilenames:
def test_empty_index_returns_empty_dict(self):
index = ModelHashIndex()
assert index.get_duplicate_filenames() == {}
def test_no_duplicates_returns_empty_dict(self):
index = ModelHashIndex()
index.add_entry("abc123", "/models/lora.safetensors")
index.add_entry("def456", "/models/other.safetensors")
assert index.get_duplicate_filenames() == {}
def test_duplicate_filenames_detected(self):
index = ModelHashIndex()
index.add_entry("abc123", "/a/mylora.safetensors")
index.add_entry("def456", "/b/mylora.safetensors")
dupes = index.get_duplicate_filenames()
assert "mylora" in dupes
assert len(dupes["mylora"]) == 2
def test_same_hash_same_name_not_a_filename_duplicate(self):
"""Same hash with same filename = hash duplicate, not filename conflict."""
index = ModelHashIndex()
index.add_entry("abc123", "/a/lora.safetensors")
# Same hash, same filename — this is a true duplicate (hash collision)
# but the filename index only tracks different files with same name
# Currently add_entry for same hash+path would update, not create duplicate
# This is correct behavior — filename dupes are for different files
def test_add_entry_idempotent_for_same_path_and_hash(self):
index = ModelHashIndex()
index.add_entry("abc123", "/a/lora.safetensors")
index.add_entry("abc123", "/a/lora.safetensors")
assert len(index) == 1
assert index.get_duplicate_filenames() == {}

View File

@@ -624,3 +624,42 @@ async def test_reconcile_cache_removes_duplicate_alias_when_same_real_file_seen_
cache = await scanner.get_cached_data()
cached_paths = {item["file_path"] for item in cache.raw_data}
assert cached_paths == {_normalize_path(loras_root / "link" / "one.txt")}
@pytest.mark.asyncio
async def test_log_duplicate_filename_summary_logs_warning(tmp_path: Path, caplog):
"""When duplicate filenames exist, _log_duplicate_filename_summary should emit
a single warning log with the conflict count and total file count."""
import logging
caplog.set_level(logging.WARNING)
root = tmp_path / "loras"
root.mkdir()
scanner = DummyScanner(root)
# Simulate duplicate filenames in the hash index
scanner._hash_index.add_entry("aaa111", str(root / "model.safetensors"))
scanner._hash_index.add_entry("bbb222", str(root / "dir" / "model.safetensors"))
scanner._log_duplicate_filename_summary()
assert len(caplog.records) >= 1
log_msg = caplog.records[-1].message
assert "Duplicate filename conflict detected" in log_msg
assert "1 dummy filename(s)" in log_msg
assert "2 files total" in log_msg
@pytest.mark.asyncio
async def test_log_duplicate_filename_summary_silent_when_no_duplicates(tmp_path: Path, caplog):
import logging
caplog.set_level(logging.WARNING)
root = tmp_path / "loras"
root.mkdir()
scanner = DummyScanner(root)
scanner._log_duplicate_filename_summary()
# No warning should be logged when there are no duplicates
for record in caplog.records:
assert "Duplicate filename conflict detected" not in record.message