feat(doctor): add duplicate filename conflict detection and one-click resolution

Detects when multiple model files share the same basename (causing ambiguity in LoRA resolution), logs warnings during scanning, and provides a "Resolve Conflicts" button in the Doctor panel. Resolution renames duplicates with hash-prefixed unique filenames, migrates all sidecar and preview files, and updates the cache and frontend scroller in-place so the model modal immediately reflects the new filename. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-20 17:32:05 -03:00 · 2026-04-30 15:21:26 +08:00
parent 1d035361a4
commit 5dcfde36ea
18 changed files with 601 additions and 19 deletions
--- a/tests/routes/test_misc_routes.py
+++ b/tests/routes/test_misc_routes.py
@@ -10,6 +10,7 @@ from unittest.mock import patch, MagicMock
 import pytest
 from aiohttp import web

+from py.services.model_hash_index import ModelHashIndex
 from py.routes.handlers.misc_handlers import (
    BackupHandler,
    DoctorHandler,
@@ -78,10 +79,11 @@ async def dummy_downloader_factory():


 class DummyDoctorScanner:
-    def __init__(self, *, model_type='lora', raw_data=None, rebuild_error=None):
+    def __init__(self, *, model_type='lora', raw_data=None, rebuild_error=None, hash_index=None):
        self.model_type = model_type
        self._raw_data = list(raw_data or [])
        self._rebuild_error = rebuild_error
+        self._hash_index = hash_index
        self._persistent_cache = SimpleNamespace(
            load_cache=lambda _model_type: SimpleNamespace(raw_data=list(self._raw_data))
        )
@@ -91,6 +93,16 @@ class DummyDoctorScanner:
            raise self._rebuild_error
        return SimpleNamespace(raw_data=list(self._raw_data))

+    async def update_single_model_cache(self, original_path, new_path, metadata):
+        for item in self._raw_data:
+            if item.get("file_path") == original_path:
+                item["file_path"] = new_path
+                item["file_name"] = metadata.get("file_name", item.get("file_name", ""))
+                if metadata.get("preview_url"):
+                    item["preview_url"] = metadata["preview_url"]
+                break
+        return True
+

 class DummyCivitaiClient:
    def __init__(self, *, success=True, result=None):
@@ -1582,3 +1594,107 @@ def test_wsl_to_windows_path_returns_none_on_subprocess_error(tmp_path):
    ):
        result = _wsl_to_windows_path("/mnt/c/test")
        assert result is None
+
+
+# ── DoctorHandler filename conflict tests ──────────────────────────────
+
+
+@pytest.mark.asyncio
+async def test_check_filename_conflicts_returns_ok_when_no_duplicates():
+    hash_index = ModelHashIndex()
+    async def scanner_factory():
+        return DummyDoctorScanner(
+            model_type="lora", raw_data=[], hash_index=hash_index
+        )
+
+    handler = DoctorHandler(
+        settings_service=DummySettings({"civitai_api_key": "token"}),
+        scanner_factories=(("lora", "LoRAs", scanner_factory),),
+    )
+
+    response = await handler.get_doctor_diagnostics(FakeRequest(method="GET"))
+    payload = json.loads(response.text)
+
+    diagnostic_map = {item["id"]: item for item in payload["diagnostics"]}
+    assert diagnostic_map["filename_conflicts"]["status"] == "ok"
+    assert "No duplicate filenames" in diagnostic_map["filename_conflicts"]["summary"]
+
+
+@pytest.mark.asyncio
+async def test_check_filename_conflicts_detects_duplicates():
+    hash_index = ModelHashIndex()
+    hash_index.add_entry("abc123", "/a/lora.safetensors")
+    hash_index.add_entry("def456", "/b/lora.safetensors")
+
+    async def scanner_factory():
+        return DummyDoctorScanner(
+            model_type="lora",
+            raw_data=[
+                {"file_path": "/a/lora.safetensors"},
+                {"file_path": "/b/lora.safetensors"},
+            ],
+            hash_index=hash_index,
+        )
+
+    handler = DoctorHandler(
+        settings_service=DummySettings({"civitai_api_key": "token"}),
+        scanner_factories=(("lora", "LoRAs", scanner_factory),),
+    )
+
+    response = await handler.get_doctor_diagnostics(FakeRequest(method="GET"))
+    payload = json.loads(response.text)
+
+    diagnostic_map = {item["id"]: item for item in payload["diagnostics"]}
+    conflict_diag = diagnostic_map["filename_conflicts"]
+    assert conflict_diag["status"] == "warning"
+    assert "1 filename(s)" in conflict_diag["summary"]
+    assert any("resolve-filename-conflicts" in str(a) for a in conflict_diag.get("actions", []))
+
+
+@pytest.mark.asyncio
+async def test_resolve_filename_conflicts_returns_renamed_list():
+    hash_index = ModelHashIndex()
+    hash_index.add_entry("abc123", "lora.safetensors")
+    hash_index.add_entry("def456", "lora.safetensors")
+
+    async def scanner_factory():
+        return DummyDoctorScanner(
+            model_type="lora",
+            raw_data=[],
+            hash_index=hash_index,
+        )
+
+    handler = DoctorHandler(
+        settings_service=DummySettings({"civitai_api_key": "token"}),
+        scanner_factories=(("lora", "LoRAs", scanner_factory),),
+    )
+
+    response = await handler.resolve_filename_conflicts(FakeRequest(method="POST"))
+    payload = json.loads(response.text)
+
+    assert payload["success"] is True
+    # Files don't exist on disk, so nothing gets renamed
+    assert payload["count"] == 0
+
+
+@pytest.mark.asyncio
+async def test_resolve_filename_conflicts_handles_scanner_error_gracefully():
+    class ErrorScanner:
+        model_type = "lora"
+
+        async def get_cached_data(self):
+            raise RuntimeError("scanner unavailable")
+
+    async def scanner_factory():
+        return ErrorScanner()
+
+    handler = DoctorHandler(
+        settings_service=DummySettings({"civitai_api_key": "token"}),
+        scanner_factories=(("lora", "LoRAs", scanner_factory),),
+    )
+
+    response = await handler.resolve_filename_conflicts(FakeRequest(method="POST"))
+    payload = json.loads(response.text)
+
+    assert payload["success"] is True
+    assert payload["count"] == 0
--- a/tests/services/test_model_hash_index.py
+++ b/tests/services/test_model_hash_index.py
@@ -0,0 +1,113 @@
+import pytest
+from py.services.model_hash_index import ModelHashIndex
+
+
+class TestModelHashIndexRemoveByPath:
+    def test_remove_by_path_finds_hash_in_hash_to_path(self):
+        index = ModelHashIndex()
+        index.add_entry("abc123", "/models/lora.safetensors")
+        index.remove_by_path("/models/lora.safetensors")
+        assert len(index) == 0
+        assert not index.get_duplicate_filenames()
+
+    def test_remove_by_path_falls_back_to_duplicate_hashes(self):
+        """When a path is only tracked in _duplicate_hashes, remove_by_path
+        should still find and remove it."""
+        index = ModelHashIndex()
+        index.add_entry("abc123", "/models/lora_v1.safetensors")
+        index.add_entry("abc123", "/models/lora_v2.safetensors")
+
+        # lora_v1 is the primary (_hash_to_path), lora_v2 is in _duplicate_hashes
+        index.remove_by_path("/models/lora_v2.safetensors")
+
+        assert len(index) == 1
+        assert index._hash_to_path.get("abc123") == "/models/lora_v1.safetensors"
+        assert "abc123" not in index._duplicate_hashes
+
+    def test_remove_by_path_cleans_up_duplicate_filenames(self):
+        """After removing a path, _duplicate_filenames should be updated."""
+        index = ModelHashIndex()
+        index.add_entry("abc123", "/models/mylora.safetensors")
+        index.add_entry("def456", "/other/mylora.safetensors")
+
+        assert "mylora" in index.get_duplicate_filenames()
+        assert len(index.get_duplicate_filenames()["mylora"]) == 2
+
+        index.remove_by_path("/other/mylora.safetensors")
+
+        # After removing one duplicate, only one path remains — no longer a duplicate
+        assert "mylora" not in index.get_duplicate_filenames()
+
+    def test_remove_by_path_keeps_duplicate_filenames_with_three_entries(self):
+        """With 3 entries for the same filename, removing one should leave 2."""
+        index = ModelHashIndex()
+        index.add_entry("abc123", "/models/mylora.safetensors")
+        index.add_entry("def456", "/other/mylora.safetensors")
+        index.add_entry("ghi789", "/third/mylora.safetensors")
+
+        index.remove_by_path("/other/mylora.safetensors")
+
+        assert "mylora" in index.get_duplicate_filenames()
+        paths = index.get_duplicate_filenames()["mylora"]
+        assert len(paths) == 2
+        assert "/other/mylora.safetensors" not in paths
+
+    def test_remove_by_path_noop_on_unknown_path(self):
+        index = ModelHashIndex()
+        index.add_entry("abc123", "/models/lora.safetensors")
+        # Should not raise
+        index.remove_by_path("/nonexistent/lora.safetensors")
+        assert len(index) == 1
+
+    def test_remove_by_path_handles_hash_from_duplicate_hashes_only(self):
+        """Remove a path whose hash exists ONLY in _duplicate_hashes,
+        not in _hash_to_path (edge case from index rebuilds)."""
+        index = ModelHashIndex()
+        index.add_entry("abc123", "/a/model.safetensors")
+        index.add_entry("abc123", "/b/model.safetensors")
+
+        # Manually remove the primary entry to simulate edge case
+        del index._hash_to_path["abc123"]
+        # Now the path is only referenced in _duplicate_hashes
+        assert "abc123" in index._duplicate_hashes
+
+        index.remove_by_path("/b/model.safetensors")
+        # The remaining path is promoted to _hash_to_path, duplicates cleared
+        assert "abc123" not in index._duplicate_hashes
+        assert index._hash_to_path.get("abc123") == "/a/model.safetensors"
+
+
+class TestModelHashIndexGetDuplicateFilenames:
+    def test_empty_index_returns_empty_dict(self):
+        index = ModelHashIndex()
+        assert index.get_duplicate_filenames() == {}
+
+    def test_no_duplicates_returns_empty_dict(self):
+        index = ModelHashIndex()
+        index.add_entry("abc123", "/models/lora.safetensors")
+        index.add_entry("def456", "/models/other.safetensors")
+        assert index.get_duplicate_filenames() == {}
+
+    def test_duplicate_filenames_detected(self):
+        index = ModelHashIndex()
+        index.add_entry("abc123", "/a/mylora.safetensors")
+        index.add_entry("def456", "/b/mylora.safetensors")
+        dupes = index.get_duplicate_filenames()
+        assert "mylora" in dupes
+        assert len(dupes["mylora"]) == 2
+
+    def test_same_hash_same_name_not_a_filename_duplicate(self):
+        """Same hash with same filename = hash duplicate, not filename conflict."""
+        index = ModelHashIndex()
+        index.add_entry("abc123", "/a/lora.safetensors")
+        # Same hash, same filename — this is a true duplicate (hash collision)
+        # but the filename index only tracks different files with same name
+        # Currently add_entry for same hash+path would update, not create duplicate
+        # This is correct behavior — filename dupes are for different files
+
+    def test_add_entry_idempotent_for_same_path_and_hash(self):
+        index = ModelHashIndex()
+        index.add_entry("abc123", "/a/lora.safetensors")
+        index.add_entry("abc123", "/a/lora.safetensors")
+        assert len(index) == 1
+        assert index.get_duplicate_filenames() == {}
--- a/tests/services/test_model_scanner.py
+++ b/tests/services/test_model_scanner.py
@@ -624,3 +624,42 @@ async def test_reconcile_cache_removes_duplicate_alias_when_same_real_file_seen_
    cache = await scanner.get_cached_data()
    cached_paths = {item["file_path"] for item in cache.raw_data}
    assert cached_paths == {_normalize_path(loras_root / "link" / "one.txt")}
+
+
+@pytest.mark.asyncio
+async def test_log_duplicate_filename_summary_logs_warning(tmp_path: Path, caplog):
+    """When duplicate filenames exist, _log_duplicate_filename_summary should emit
+    a single warning log with the conflict count and total file count."""
+    import logging
+    caplog.set_level(logging.WARNING)
+
+    root = tmp_path / "loras"
+    root.mkdir()
+    scanner = DummyScanner(root)
+
+    # Simulate duplicate filenames in the hash index
+    scanner._hash_index.add_entry("aaa111", str(root / "model.safetensors"))
+    scanner._hash_index.add_entry("bbb222", str(root / "dir" / "model.safetensors"))
+
+    scanner._log_duplicate_filename_summary()
+
+    assert len(caplog.records) >= 1
+    log_msg = caplog.records[-1].message
+    assert "Duplicate filename conflict detected" in log_msg
+    assert "1 dummy filename(s)" in log_msg
+    assert "2 files total" in log_msg
+
+
+@pytest.mark.asyncio
+async def test_log_duplicate_filename_summary_silent_when_no_duplicates(tmp_path: Path, caplog):
+    import logging
+    caplog.set_level(logging.WARNING)
+
+    root = tmp_path / "loras"
+    root.mkdir()
+    scanner = DummyScanner(root)
+    scanner._log_duplicate_filename_summary()
+
+    # No warning should be logged when there are no duplicates
+    for record in caplog.records:
+        assert "Duplicate filename conflict detected" not in record.message