fix(scanner): respect lazy hash for checkpoints, add posix_fadvise, cancel on shutdown (#985)

This commit is contained in:
Will Miao
2026-06-16 23:00:23 +08:00
parent 2939813e1a
commit 58c266ad07
3 changed files with 22 additions and 3 deletions

View File

@@ -436,5 +436,14 @@ class LoraManager:
try: try:
logger.info("LoRA Manager: Cleaning up services") logger.info("LoRA Manager: Cleaning up services")
# Cancel any in-flight scanner initialization tasks so thread-pool
# workers (e.g. _initialize_cache_sync) can break out of their loops
# when the server shuts down (e.g. Ctrl+C on WSL).
for name in ("lora_scanner", "checkpoint_scanner", "embedding_scanner"):
scanner = ServiceRegistry.get_service_sync(name)
if scanner is not None and hasattr(scanner, "cancel_task"):
scanner.cancel_task()
logger.debug("LoRA Manager: Cancelled %s", name)
except Exception as e: except Exception as e:
logger.error(f"Error during cleanup: {e}", exc_info=True) logger.error(f"Error during cleanup: {e}", exc_info=True)

View File

@@ -1067,8 +1067,11 @@ class ModelScanner:
model_data = self._build_cache_entry(metadata, folder=normalized_folder) model_data = self._build_cache_entry(metadata, folder=normalized_folder)
# Compute SHA256 hash when metadata provided none (e.g., CivitAI API response has empty hashes) # Compute SHA256 hash when metadata provided none (e.g., CivitAI API response has empty hashes).
if not model_data.get('sha256') and file_path: # Respect hash_status='pending' (set by CheckpointScanner for large models) to defer
# hash calculation until on-demand — avoids reading entire checkpoint files at startup.
hash_status = model_data.get('hash_status', '')
if not model_data.get('sha256') and hash_status != 'pending' and file_path:
try: try:
logger.info(f"Computing SHA256 hash for {file_path} (was empty from metadata)") logger.info(f"Computing SHA256 hash for {file_path} (was empty from metadata)")
sha256 = await calculate_sha256(file_path) sha256 = await calculate_sha256(file_path)

View File

@@ -34,10 +34,17 @@ def _get_hash_chunk_size_bytes() -> int:
async def calculate_sha256(file_path: str) -> str: async def calculate_sha256(file_path: str) -> str:
"""Calculate SHA256 hash of a file (full file content).""" """Calculate SHA256 hash of a file (full file content).
Uses ``posix_fadvise`` with ``POSIX_FADV_DONTNEED`` to avoid polluting the OS page
cache — critical on WSL where cached file pages live inside the VM and are not
accounted for in guest ``used`` memory, causing VmmemWSL to balloon.
"""
sha256_hash = hashlib.sha256() sha256_hash = hashlib.sha256()
chunk_size = _get_hash_chunk_size_bytes() chunk_size = _get_hash_chunk_size_bytes()
with open(file_path, "rb") as f: with open(file_path, "rb") as f:
fd = f.fileno()
os.posix_fadvise(fd, 0, 0, os.POSIX_FADV_DONTNEED)
for byte_block in iter(lambda: f.read(chunk_size), b""): for byte_block in iter(lambda: f.read(chunk_size), b""):
sha256_hash.update(byte_block) sha256_hash.update(byte_block)
return sha256_hash.hexdigest() return sha256_hash.hexdigest()