perf(config): limit symlink scan to first level for faster startup

Replace recursive directory traversal with first-level-only symlink scanning
to fix severe performance issues on large model collections (220K+ files).

- Rename _scan_directory_links to _scan_first_level_symlinks
- Only scan symlinks directly under each root directory
- Skip traversal of normal subdirectories entirely
- Update tests to reflect first-level behavior
- Add test_deep_symlink_not_scanned to document intentional limitation

Startup time reduced from 15+ minutes to seconds for affected users.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Will Miao
2026-02-01 12:42:35 +08:00
parent 938ceb49b2
commit 553e1868e1
2 changed files with 87 additions and 100 deletions

View File

@@ -441,82 +441,53 @@ class Config:
logger.info("Failed to write symlink cache %s: %s", cache_path, exc)
def _scan_symbolic_links(self):
"""Scan all symbolic links in LoRA, Checkpoint, and Embedding root directories"""
"""Scan symbolic links in LoRA, Checkpoint, and Embedding root directories.
Only scans the first level of each root directory to avoid performance
issues with large file systems. Detects symlinks and Windows junctions
at the root level only (not nested symlinks in subdirectories).
"""
start = time.perf_counter()
# Reset mappings before rescanning to avoid stale entries
self._path_mappings.clear()
self._seed_root_symlink_mappings()
visited_dirs: Set[str] = set()
for root in self._symlink_roots():
self._scan_directory_links(root, visited_dirs)
self._scan_first_level_symlinks(root)
logger.debug(
"Symlink scan finished in %.2f ms with %d mappings",
(time.perf_counter() - start) * 1000,
len(self._path_mappings),
)
def _scan_directory_links(self, root: str, visited_dirs: Set[str]):
"""Iteratively scan directory symlinks to avoid deep recursion."""
def _scan_first_level_symlinks(self, root: str):
"""Scan only the first level of a directory for symlinks.
This avoids traversing the entire directory tree which can be extremely
slow for large model collections. Only symlinks directly under the root
are detected.
"""
try:
# Note: We only use realpath for the initial root if it's not already resolved
# to ensure we have a valid entry point.
root_real = self._normalize_path(os.path.realpath(root))
except OSError:
root_real = self._normalize_path(root)
with os.scandir(root) as it:
for entry in it:
try:
# Only detect symlinks including Windows junctions
# Skip normal directories to avoid deep traversal
if not self._entry_is_symlink(entry):
continue
if root_real in visited_dirs:
return
# Resolve the symlink target
target_path = os.path.realpath(entry.path)
if not os.path.isdir(target_path):
continue
visited_dirs.add(root_real)
# Stack entries: (display_path, real_resolved_path)
stack: List[Tuple[str, str]] = [(root, root_real)]
while stack:
current_display, current_real = stack.pop()
try:
with os.scandir(current_display) as it:
for entry in it:
try:
# 1. Detect symlinks including Windows junctions
is_link = self._entry_is_symlink(entry)
if is_link:
# Only resolve realpath when we actually find a link
target_path = os.path.realpath(entry.path)
if not os.path.isdir(target_path):
continue
normalized_target = self._normalize_path(target_path)
self.add_path_mapping(entry.path, target_path)
if normalized_target in visited_dirs:
continue
visited_dirs.add(normalized_target)
stack.append((target_path, normalized_target))
continue
# 2. Process normal directories
if not entry.is_dir(follow_symlinks=False):
continue
# For normal directories, we avoid realpath() call by
# incrementally building the real path relative to current_real.
# This is safe because 'entry' is NOT a symlink.
entry_real = self._normalize_path(os.path.join(current_real, entry.name))
if entry_real in visited_dirs:
continue
visited_dirs.add(entry_real)
stack.append((entry.path, entry_real))
except Exception as inner_exc:
logger.debug(
"Error processing directory entry %s: %s", entry.path, inner_exc
)
except Exception as e:
logger.error(f"Error scanning links in {current_display}: {e}")
self.add_path_mapping(entry.path, target_path)
except Exception as inner_exc:
logger.debug(
"Error processing directory entry %s: %s", entry.path, inner_exc
)
except Exception as e:
logger.error(f"Error scanning links in {root}: {e}")

View File

@@ -230,8 +230,58 @@ def test_new_symlink_triggers_rescan(monkeypatch: pytest.MonkeyPatch, tmp_path):
assert normalized_external in second_cfg._path_mappings
def test_removed_deep_symlink_triggers_rescan(monkeypatch: pytest.MonkeyPatch, tmp_path):
"""Removing a deep symlink should trigger cache invalidation."""
def test_removed_first_level_symlink_triggers_rescan(monkeypatch: pytest.MonkeyPatch, tmp_path):
"""Removing a first-level symlink should trigger cache invalidation."""
loras_dir, settings_dir = _setup_paths(monkeypatch, tmp_path)
# Create first-level symlink (directly under loras root)
external_dir = tmp_path / "external"
external_dir.mkdir()
symlink = loras_dir / "external_models"
symlink.symlink_to(external_dir, target_is_directory=True)
# Initial scan finds the symlink
first_cfg = config_module.Config()
normalized_external = _normalize(str(external_dir))
assert normalized_external in first_cfg._path_mappings
# Remove the symlink
symlink.unlink()
# Second config should detect invalid cached mapping and rescan
second_cfg = config_module.Config()
assert normalized_external not in second_cfg._path_mappings
def test_retargeted_first_level_symlink_triggers_rescan(monkeypatch: pytest.MonkeyPatch, tmp_path):
"""Changing a first-level symlink's target should trigger cache invalidation."""
loras_dir, settings_dir = _setup_paths(monkeypatch, tmp_path)
# Create first-level symlink
target_v1 = tmp_path / "external_v1"
target_v1.mkdir()
target_v2 = tmp_path / "external_v2"
target_v2.mkdir()
symlink = loras_dir / "external_models"
symlink.symlink_to(target_v1, target_is_directory=True)
# Initial scan
first_cfg = config_module.Config()
assert _normalize(str(target_v1)) in first_cfg._path_mappings
# Retarget the symlink
symlink.unlink()
symlink.symlink_to(target_v2, target_is_directory=True)
# Second config should detect changed target and rescan
second_cfg = config_module.Config()
assert _normalize(str(target_v2)) in second_cfg._path_mappings
assert _normalize(str(target_v1)) not in second_cfg._path_mappings
def test_deep_symlink_not_scanned(monkeypatch: pytest.MonkeyPatch, tmp_path):
"""Deep symlinks (below first level) are not scanned to avoid performance issues."""
loras_dir, settings_dir = _setup_paths(monkeypatch, tmp_path)
# Create nested structure with deep symlink
@@ -242,46 +292,12 @@ def test_removed_deep_symlink_triggers_rescan(monkeypatch: pytest.MonkeyPatch, t
deep_symlink = subdir / "styles"
deep_symlink.symlink_to(external_dir, target_is_directory=True)
# Initial scan finds the deep symlink
first_cfg = config_module.Config()
# Config should not detect deep symlinks (only first-level)
cfg = config_module.Config()
normalized_external = _normalize(str(external_dir))
assert normalized_external in first_cfg._path_mappings
# Remove the deep symlink
deep_symlink.unlink()
# Second config should detect invalid cached mapping and rescan
second_cfg = config_module.Config()
assert normalized_external not in second_cfg._path_mappings
assert normalized_external not in cfg._path_mappings
def test_retargeted_deep_symlink_triggers_rescan(monkeypatch: pytest.MonkeyPatch, tmp_path):
"""Changing a deep symlink's target should trigger cache invalidation."""
loras_dir, settings_dir = _setup_paths(monkeypatch, tmp_path)
# Create nested structure
subdir = loras_dir / "anime"
subdir.mkdir()
target_v1 = tmp_path / "external_v1"
target_v1.mkdir()
target_v2 = tmp_path / "external_v2"
target_v2.mkdir()
deep_symlink = subdir / "styles"
deep_symlink.symlink_to(target_v1, target_is_directory=True)
# Initial scan
first_cfg = config_module.Config()
assert _normalize(str(target_v1)) in first_cfg._path_mappings
# Retarget the symlink
deep_symlink.unlink()
deep_symlink.symlink_to(target_v2, target_is_directory=True)
# Second config should detect changed target and rescan
second_cfg = config_module.Config()
assert _normalize(str(target_v2)) in second_cfg._path_mappings
assert _normalize(str(target_v1)) not in second_cfg._path_mappings
def test_legacy_symlink_cache_automatic_cleanup(monkeypatch: pytest.MonkeyPatch, tmp_path):
"""Test that legacy symlink cache is automatically cleaned up after migration."""
settings_dir = tmp_path / "settings"