From 98e1d168b0e11f7808fbc2e67267bf671f4d4a1b Mon Sep 17 00:00:00 2001 From: Will Miao Date: Wed, 3 Jun 2026 15:02:37 +0800 Subject: [PATCH] feat(utils): add AutoV2 and AutoV3 hash calculation functions --- py/utils/file_utils.py | 78 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 77 insertions(+), 1 deletion(-) diff --git a/py/utils/file_utils.py b/py/utils/file_utils.py index 4c6d4317..73736358 100644 --- a/py/utils/file_utils.py +++ b/py/utils/file_utils.py @@ -1,7 +1,10 @@ import hashlib +import json import logging import os +import struct +from typing import Any from .constants import ( CARD_PREVIEW_WIDTH, @@ -31,7 +34,7 @@ def _get_hash_chunk_size_bytes() -> int: async def calculate_sha256(file_path: str) -> str: - """Calculate SHA256 hash of a file""" + """Calculate SHA256 hash of a file (full file content).""" sha256_hash = hashlib.sha256() chunk_size = _get_hash_chunk_size_bytes() with open(file_path, "rb") as f: @@ -39,6 +42,79 @@ async def calculate_sha256(file_path: str) -> str: sha256_hash.update(byte_block) return sha256_hash.hexdigest() + +def calculate_autov2(file_path: str) -> str: + """Calculate CivitAI AutoV2 hash. + + AutoV2 is the first 10 characters of the full file SHA256. + Used by CivitAI as a shortened file identifier. + + Reference: https://developer.civitai.com/site/reference/model-versions + """ + full_hash = hashlib.sha256() + chunk_size = _get_hash_chunk_size_bytes() + with open(file_path, "rb") as f: + for byte_block in iter(lambda: f.read(chunk_size), b""): + full_hash.update(byte_block) + return full_hash.hexdigest()[:10] + + +def read_safetensors_metadata(file_path: str) -> dict[str, Any]: + """Read the ``__metadata__`` dict from a safetensors file header. + + Safetensors file format: + - 8 bytes: header length (little-endian 64-bit) + - N bytes: UTF-8 JSON header + - The header JSON contains a ``__metadata__`` key holding arbitrary metadata. + + Returns an empty dict if the file is not a valid safetensors file or has no + metadata. + """ + try: + with open(file_path, "rb") as f: + header_len_bytes = f.read(8) + if len(header_len_bytes) < 8: + return {} + header_len = struct.unpack(" str | None: + """Calculate CivitAI AutoV3 hash from a safetensors file. + + AutoV3 is extracted from the safetensors file's embedded metadata, not + computed from the file bytes directly. The orchestrator reads the + ``sshs_model_hash`` (kohya-ss format) or ``modelspec.hash_sha256`` field + from the safetensors header and stores the first 12 characters. + + The embedded hash itself is the SHA256 of the file after skipping the + 8-byte header length + JSON header (a.k.a. the addnet hash / tensor-only + hash). + + Reference: + - CivitAI DB trigger: ``SUBSTRING(NEW.hash FROM 1 FOR 12)`` + - https://developer.civitai.com/site/reference/model-versions + + Returns ``None`` when no AutoV3 hash can be determined (e.g. the file is + not safetensors, or the metadata doesn't contain a recognised hash field). + """ + metadata = read_safetensors_metadata(file_path) + if not metadata: + return None + + embedded_hash = metadata.get("sshs_model_hash") or metadata.get("modelspec.hash_sha256") + if embedded_hash and isinstance(embedded_hash, str) and len(embedded_hash) >= 12: + return embedded_hash[:12] + + return None + + def find_preview_file(base_name: str, dir_path: str) -> str: """Find preview file for given base name in directory.