fix(scanner): respect lazy hash for checkpoints, add posix_fadvise, cancel on shutdown (#985)

This commit is contained in:
Will Miao
2026-06-16 23:00:23 +08:00
parent 2939813e1a
commit 58c266ad07
3 changed files with 22 additions and 3 deletions

View File

@@ -34,10 +34,17 @@ def _get_hash_chunk_size_bytes() -> int:
async def calculate_sha256(file_path: str) -> str:
"""Calculate SHA256 hash of a file (full file content)."""
"""Calculate SHA256 hash of a file (full file content).
Uses ``posix_fadvise`` with ``POSIX_FADV_DONTNEED`` to avoid polluting the OS page
cache — critical on WSL where cached file pages live inside the VM and are not
accounted for in guest ``used`` memory, causing VmmemWSL to balloon.
"""
sha256_hash = hashlib.sha256()
chunk_size = _get_hash_chunk_size_bytes()
with open(file_path, "rb") as f:
fd = f.fileno()
os.posix_fadvise(fd, 0, 0, os.POSIX_FADV_DONTNEED)
for byte_block in iter(lambda: f.read(chunk_size), b""):
sha256_hash.update(byte_block)
return sha256_hash.hexdigest()