From 553e1868e1a3b7b3e1dfb4ee66af2d168467cb87 Mon Sep 17 00:00:00 2001 From: Will Miao Date: Sun, 1 Feb 2026 12:42:35 +0800 Subject: [PATCH] perf(config): limit symlink scan to first level for faster startup Replace recursive directory traversal with first-level-only symlink scanning to fix severe performance issues on large model collections (220K+ files). - Rename _scan_directory_links to _scan_first_level_symlinks - Only scan symlinks directly under each root directory - Skip traversal of normal subdirectories entirely - Update tests to reflect first-level behavior - Add test_deep_symlink_not_scanned to document intentional limitation Startup time reduced from 15+ minutes to seconds for affected users. Co-Authored-By: Claude Opus 4.5 --- py/config.py | 93 ++++++++++------------------- tests/config/test_symlink_cache.py | 94 +++++++++++++++++------------- 2 files changed, 87 insertions(+), 100 deletions(-) diff --git a/py/config.py b/py/config.py index 2b6911da..dbb8909d 100644 --- a/py/config.py +++ b/py/config.py @@ -441,82 +441,53 @@ class Config: logger.info("Failed to write symlink cache %s: %s", cache_path, exc) def _scan_symbolic_links(self): - """Scan all symbolic links in LoRA, Checkpoint, and Embedding root directories""" + """Scan symbolic links in LoRA, Checkpoint, and Embedding root directories. + + Only scans the first level of each root directory to avoid performance + issues with large file systems. Detects symlinks and Windows junctions + at the root level only (not nested symlinks in subdirectories). + """ start = time.perf_counter() # Reset mappings before rescanning to avoid stale entries self._path_mappings.clear() self._seed_root_symlink_mappings() - visited_dirs: Set[str] = set() for root in self._symlink_roots(): - self._scan_directory_links(root, visited_dirs) + self._scan_first_level_symlinks(root) logger.debug( "Symlink scan finished in %.2f ms with %d mappings", (time.perf_counter() - start) * 1000, len(self._path_mappings), ) - def _scan_directory_links(self, root: str, visited_dirs: Set[str]): - """Iteratively scan directory symlinks to avoid deep recursion.""" + def _scan_first_level_symlinks(self, root: str): + """Scan only the first level of a directory for symlinks. + + This avoids traversing the entire directory tree which can be extremely + slow for large model collections. Only symlinks directly under the root + are detected. + """ try: - # Note: We only use realpath for the initial root if it's not already resolved - # to ensure we have a valid entry point. - root_real = self._normalize_path(os.path.realpath(root)) - except OSError: - root_real = self._normalize_path(root) + with os.scandir(root) as it: + for entry in it: + try: + # Only detect symlinks including Windows junctions + # Skip normal directories to avoid deep traversal + if not self._entry_is_symlink(entry): + continue - if root_real in visited_dirs: - return + # Resolve the symlink target + target_path = os.path.realpath(entry.path) + if not os.path.isdir(target_path): + continue - visited_dirs.add(root_real) - # Stack entries: (display_path, real_resolved_path) - stack: List[Tuple[str, str]] = [(root, root_real)] - - while stack: - current_display, current_real = stack.pop() - try: - with os.scandir(current_display) as it: - for entry in it: - try: - # 1. Detect symlinks including Windows junctions - is_link = self._entry_is_symlink(entry) - - if is_link: - # Only resolve realpath when we actually find a link - target_path = os.path.realpath(entry.path) - if not os.path.isdir(target_path): - continue - - normalized_target = self._normalize_path(target_path) - self.add_path_mapping(entry.path, target_path) - - if normalized_target in visited_dirs: - continue - - visited_dirs.add(normalized_target) - stack.append((target_path, normalized_target)) - continue - - # 2. Process normal directories - if not entry.is_dir(follow_symlinks=False): - continue - - # For normal directories, we avoid realpath() call by - # incrementally building the real path relative to current_real. - # This is safe because 'entry' is NOT a symlink. - entry_real = self._normalize_path(os.path.join(current_real, entry.name)) - - if entry_real in visited_dirs: - continue - - visited_dirs.add(entry_real) - stack.append((entry.path, entry_real)) - except Exception as inner_exc: - logger.debug( - "Error processing directory entry %s: %s", entry.path, inner_exc - ) - except Exception as e: - logger.error(f"Error scanning links in {current_display}: {e}") + self.add_path_mapping(entry.path, target_path) + except Exception as inner_exc: + logger.debug( + "Error processing directory entry %s: %s", entry.path, inner_exc + ) + except Exception as e: + logger.error(f"Error scanning links in {root}: {e}") diff --git a/tests/config/test_symlink_cache.py b/tests/config/test_symlink_cache.py index 9f140349..c0d33567 100644 --- a/tests/config/test_symlink_cache.py +++ b/tests/config/test_symlink_cache.py @@ -230,8 +230,58 @@ def test_new_symlink_triggers_rescan(monkeypatch: pytest.MonkeyPatch, tmp_path): assert normalized_external in second_cfg._path_mappings -def test_removed_deep_symlink_triggers_rescan(monkeypatch: pytest.MonkeyPatch, tmp_path): - """Removing a deep symlink should trigger cache invalidation.""" +def test_removed_first_level_symlink_triggers_rescan(monkeypatch: pytest.MonkeyPatch, tmp_path): + """Removing a first-level symlink should trigger cache invalidation.""" + loras_dir, settings_dir = _setup_paths(monkeypatch, tmp_path) + + # Create first-level symlink (directly under loras root) + external_dir = tmp_path / "external" + external_dir.mkdir() + symlink = loras_dir / "external_models" + symlink.symlink_to(external_dir, target_is_directory=True) + + # Initial scan finds the symlink + first_cfg = config_module.Config() + normalized_external = _normalize(str(external_dir)) + assert normalized_external in first_cfg._path_mappings + + # Remove the symlink + symlink.unlink() + + # Second config should detect invalid cached mapping and rescan + second_cfg = config_module.Config() + assert normalized_external not in second_cfg._path_mappings + + +def test_retargeted_first_level_symlink_triggers_rescan(monkeypatch: pytest.MonkeyPatch, tmp_path): + """Changing a first-level symlink's target should trigger cache invalidation.""" + loras_dir, settings_dir = _setup_paths(monkeypatch, tmp_path) + + # Create first-level symlink + target_v1 = tmp_path / "external_v1" + target_v1.mkdir() + target_v2 = tmp_path / "external_v2" + target_v2.mkdir() + + symlink = loras_dir / "external_models" + symlink.symlink_to(target_v1, target_is_directory=True) + + # Initial scan + first_cfg = config_module.Config() + assert _normalize(str(target_v1)) in first_cfg._path_mappings + + # Retarget the symlink + symlink.unlink() + symlink.symlink_to(target_v2, target_is_directory=True) + + # Second config should detect changed target and rescan + second_cfg = config_module.Config() + assert _normalize(str(target_v2)) in second_cfg._path_mappings + assert _normalize(str(target_v1)) not in second_cfg._path_mappings + + +def test_deep_symlink_not_scanned(monkeypatch: pytest.MonkeyPatch, tmp_path): + """Deep symlinks (below first level) are not scanned to avoid performance issues.""" loras_dir, settings_dir = _setup_paths(monkeypatch, tmp_path) # Create nested structure with deep symlink @@ -242,46 +292,12 @@ def test_removed_deep_symlink_triggers_rescan(monkeypatch: pytest.MonkeyPatch, t deep_symlink = subdir / "styles" deep_symlink.symlink_to(external_dir, target_is_directory=True) - # Initial scan finds the deep symlink - first_cfg = config_module.Config() + # Config should not detect deep symlinks (only first-level) + cfg = config_module.Config() normalized_external = _normalize(str(external_dir)) - assert normalized_external in first_cfg._path_mappings - - # Remove the deep symlink - deep_symlink.unlink() - - # Second config should detect invalid cached mapping and rescan - second_cfg = config_module.Config() - assert normalized_external not in second_cfg._path_mappings + assert normalized_external not in cfg._path_mappings -def test_retargeted_deep_symlink_triggers_rescan(monkeypatch: pytest.MonkeyPatch, tmp_path): - """Changing a deep symlink's target should trigger cache invalidation.""" - loras_dir, settings_dir = _setup_paths(monkeypatch, tmp_path) - - # Create nested structure - subdir = loras_dir / "anime" - subdir.mkdir() - target_v1 = tmp_path / "external_v1" - target_v1.mkdir() - target_v2 = tmp_path / "external_v2" - target_v2.mkdir() - - deep_symlink = subdir / "styles" - deep_symlink.symlink_to(target_v1, target_is_directory=True) - - # Initial scan - first_cfg = config_module.Config() - assert _normalize(str(target_v1)) in first_cfg._path_mappings - - # Retarget the symlink - deep_symlink.unlink() - deep_symlink.symlink_to(target_v2, target_is_directory=True) - - # Second config should detect changed target and rescan - second_cfg = config_module.Config() - assert _normalize(str(target_v2)) in second_cfg._path_mappings - assert _normalize(str(target_v1)) not in second_cfg._path_mappings def test_legacy_symlink_cache_automatic_cleanup(monkeypatch: pytest.MonkeyPatch, tmp_path): """Test that legacy symlink cache is automatically cleaned up after migration.""" settings_dir = tmp_path / "settings"