From 415fc5720ccbaab0b0ba368fbc3fdcfcdca2f865 Mon Sep 17 00:00:00 2001 From: Will Miao Date: Mon, 10 Nov 2025 10:09:56 +0800 Subject: [PATCH] feat(settings): add configurable hash chunk size --- py/services/settings_manager.py | 3 ++- py/utils/constants.py | 3 +++ py/utils/file_utils.py | 32 ++++++++++++++++++++++++++++---- 3 files changed, 33 insertions(+), 5 deletions(-) diff --git a/py/services/settings_manager.py b/py/services/settings_manager.py index 17f6d18c..3ba77183 100644 --- a/py/services/settings_manager.py +++ b/py/services/settings_manager.py @@ -8,7 +8,7 @@ from datetime import datetime, timezone from threading import Lock from typing import Any, Awaitable, Dict, Iterable, List, Mapping, Optional, Sequence, Tuple -from ..utils.constants import DEFAULT_PRIORITY_TAG_CONFIG +from ..utils.constants import DEFAULT_HASH_CHUNK_SIZE_MB, DEFAULT_PRIORITY_TAG_CONFIG from ..utils.settings_paths import ensure_settings_file from ..utils.tag_priorities import ( PriorityTagEntry, @@ -29,6 +29,7 @@ CORE_USER_SETTING_KEYS: Tuple[str, ...] = ( DEFAULT_SETTINGS: Dict[str, Any] = { "civitai_api_key": "", "use_portable_settings": False, + "hash_chunk_size_mb": DEFAULT_HASH_CHUNK_SIZE_MB, "language": "en", "show_only_sfw": False, "enable_metadata_archive_db": False, diff --git a/py/utils/constants.py b/py/utils/constants.py index fb8f578d..37008537 100644 --- a/py/utils/constants.py +++ b/py/utils/constants.py @@ -55,6 +55,9 @@ CIVITAI_USER_MODEL_TYPES = [ 'checkpoint', ] +# Default chunk size in megabytes used for hashing large files. +DEFAULT_HASH_CHUNK_SIZE_MB = 4 + # Auto-organize settings AUTO_ORGANIZE_BATCH_SIZE = 50 # Process models in batches to avoid overwhelming the system diff --git a/py/utils/file_utils.py b/py/utils/file_utils.py index 68cfa036..f0532fb6 100644 --- a/py/utils/file_utils.py +++ b/py/utils/file_utils.py @@ -1,17 +1,41 @@ + +import hashlib import logging import os -import hashlib -from .constants import PREVIEW_EXTENSIONS, CARD_PREVIEW_WIDTH +from .constants import ( + CARD_PREVIEW_WIDTH, + DEFAULT_HASH_CHUNK_SIZE_MB, + PREVIEW_EXTENSIONS, +) from .exif_utils import ExifUtils +from ..services.settings_manager import get_settings_manager logger = logging.getLogger(__name__) + +def _get_hash_chunk_size_bytes() -> int: + """Return the chunk size used for hashing, in bytes.""" + + settings_manager = get_settings_manager() + chunk_size_mb = settings_manager.get("hash_chunk_size_mb", DEFAULT_HASH_CHUNK_SIZE_MB) + try: + chunk_size_value = float(chunk_size_mb) + except (TypeError, ValueError): + chunk_size_value = float(DEFAULT_HASH_CHUNK_SIZE_MB) + + if chunk_size_value <= 0: + chunk_size_value = float(DEFAULT_HASH_CHUNK_SIZE_MB) + + return max(1, int(chunk_size_value * 1024 * 1024)) + + async def calculate_sha256(file_path: str) -> str: """Calculate SHA256 hash of a file""" sha256_hash = hashlib.sha256() + chunk_size = _get_hash_chunk_size_bytes() with open(file_path, "rb") as f: - for byte_block in iter(lambda: f.read(128 * 1024), b""): + for byte_block in iter(lambda: f.read(chunk_size), b""): sha256_hash.update(byte_block) return sha256_hash.hexdigest() @@ -81,4 +105,4 @@ def get_preview_extension(preview_path: str) -> str: def normalize_path(path: str) -> str: """Normalize file path to use forward slashes""" - return path.replace(os.sep, "/") if path else path \ No newline at end of file + return path.replace(os.sep, "/") if path else path