diff --git a/py/services/cache_entry_validator.py b/py/services/cache_entry_validator.py index 08148258..c4848856 100644 --- a/py/services/cache_entry_validator.py +++ b/py/services/cache_entry_validator.py @@ -125,16 +125,20 @@ class CacheEntryValidator: ) # Special validation: sha256 must not be empty for required field + # BUT allow empty sha256 when hash_status is pending (lazy hash calculation) sha256 = working_entry.get('sha256', '') + hash_status = working_entry.get('hash_status', 'completed') if not sha256 or (isinstance(sha256, str) and not sha256.strip()): - errors.append("Required field 'sha256' is empty") - # Cannot repair empty sha256 - entry is invalid - return ValidationResult( - is_valid=False, - repaired=repaired, - errors=errors, - entry=working_entry if auto_repair else None - ) + # Allow empty sha256 for lazy hash calculation (checkpoints) + if hash_status != 'pending': + errors.append("Required field 'sha256' is empty") + # Cannot repair empty sha256 - entry is invalid + return ValidationResult( + is_valid=False, + repaired=repaired, + errors=errors, + entry=working_entry if auto_repair else None + ) # Normalize sha256 to lowercase if needed if isinstance(sha256, str): diff --git a/py/services/use_cases/bulk_metadata_refresh_use_case.py b/py/services/use_cases/bulk_metadata_refresh_use_case.py index 51f1fb28..13b5d6b7 100644 --- a/py/services/use_cases/bulk_metadata_refresh_use_case.py +++ b/py/services/use_cases/bulk_metadata_refresh_use_case.py @@ -47,8 +47,7 @@ class BulkMetadataRefreshUseCase: to_process: Sequence[Dict[str, Any]] = [ model for model in cache.raw_data - if model.get("sha256") - and not model.get("skip_metadata_refresh", False) + if not model.get("skip_metadata_refresh", False) and not self._is_in_skip_path(model.get("folder", ""), skip_paths) and (not model.get("civitai") or not model["civitai"].get("id")) and not ( @@ -85,6 +84,36 @@ class BulkMetadataRefreshUseCase: return {"success": False, "message": "Operation cancelled", "processed": processed, "updated": success, "total": total_models} try: original_name = model.get("model_name") + + # Handle lazy hash calculation for models with pending hash status + sha256 = model.get("sha256", "") + hash_status = model.get("hash_status", "completed") + file_path = model.get("file_path") + + if not sha256 and hash_status == "pending" and file_path: + self._logger.info(f"Calculating pending hash for {file_path}") + # Check if scanner has calculate_hash_for_model method (CheckpointScanner) + calculate_hash_method = getattr(self._service.scanner, "calculate_hash_for_model", None) + if calculate_hash_method: + sha256 = await calculate_hash_method(file_path) + if sha256: + model["sha256"] = sha256 + model["hash_status"] = "completed" + else: + self._logger.error(f"Failed to calculate hash for {file_path}") + processed += 1 + continue + else: + self._logger.warning(f"Scanner does not support lazy hash calculation for {file_path}") + processed += 1 + continue + + # Skip models without valid hash + if not model.get("sha256"): + self._logger.warning(f"Skipping model without hash: {file_path}") + processed += 1 + continue + await MetadataManager.hydrate_model_data(model) result, _ = await self._metadata_sync.fetch_and_update_model( sha256=model["sha256"], diff --git a/tests/services/test_cache_entry_validator.py b/tests/services/test_cache_entry_validator.py index 0a36b606..58fc75ff 100644 --- a/tests/services/test_cache_entry_validator.py +++ b/tests/services/test_cache_entry_validator.py @@ -73,6 +73,46 @@ class TestCacheEntryValidator: assert result.repaired is False assert any('sha256' in error for error in result.errors) + def test_validate_empty_sha256_allowed_when_hash_status_pending(self): + """Test validation passes when sha256 is empty but hash_status is pending (lazy hash)""" + entry = { + 'file_path': '/models/test.safetensors', + 'sha256': '', # Empty string + 'hash_status': 'pending', # Lazy hash calculation + } + + result = CacheEntryValidator.validate(entry, auto_repair=False) + + assert result.is_valid is True + assert result.entry['sha256'] == '' + assert result.entry['hash_status'] == 'pending' + + def test_validate_empty_sha256_fails_when_hash_status_not_pending(self): + """Test validation fails when sha256 is empty and hash_status is not pending""" + entry = { + 'file_path': '/models/test.safetensors', + 'sha256': '', # Empty string + 'hash_status': 'completed', # Not pending + } + + result = CacheEntryValidator.validate(entry, auto_repair=False) + + assert result.is_valid is False + assert any('sha256' in error for error in result.errors) + + def test_validate_empty_sha256_fails_when_hash_status_missing(self): + """Test validation fails when sha256 is empty and hash_status is missing""" + entry = { + 'file_path': '/models/test.safetensors', + 'sha256': '', # Empty string + # hash_status missing (defaults to 'completed') + } + + result = CacheEntryValidator.validate(entry, auto_repair=False) + + assert result.is_valid is False + assert any('sha256' in error for error in result.errors) + def test_validate_empty_required_field_file_path(self): """Test validation fails when file_path is empty string""" entry = { diff --git a/tests/services/use_cases/test_bulk_metadata_refresh_use_case.py b/tests/services/use_cases/test_bulk_metadata_refresh_use_case.py new file mode 100644 index 00000000..b0feae76 --- /dev/null +++ b/tests/services/use_cases/test_bulk_metadata_refresh_use_case.py @@ -0,0 +1,404 @@ +"""Tests for BulkMetadataRefreshUseCase.""" + +from __future__ import annotations + +import pytest +from types import SimpleNamespace +from typing import Any, Dict +from unittest.mock import AsyncMock, MagicMock, patch + +from py.services.use_cases.bulk_metadata_refresh_use_case import ( + BulkMetadataRefreshUseCase, + MetadataRefreshProgressReporter, +) +from py.utils import metadata_manager + + +class MockProgressReporter: + """Mock progress reporter for testing.""" + + def __init__(self): + self.progress_calls = [] + + async def on_progress(self, payload: Dict[str, Any]) -> None: + self.progress_calls.append(payload) + + +@pytest.fixture +def mock_service(): + """Create a mock service with scanner.""" + scanner = MagicMock() + scanner.get_cached_data = AsyncMock() + scanner.reset_cancellation = MagicMock() + scanner.is_cancelled = MagicMock(return_value=False) + scanner.update_single_model_cache = AsyncMock(return_value=True) + scanner.calculate_hash_for_model = AsyncMock(return_value="calculated_hash_123") + + service = MagicMock() + service.scanner = scanner + service.model_type = "checkpoint" + + return service + + +@pytest.fixture +def mock_metadata_sync(): + """Create a mock metadata sync service.""" + sync = MagicMock() + sync.fetch_and_update_model = AsyncMock(return_value=(True, None)) + return sync + + +@pytest.fixture +def mock_settings(): + """Create mock settings service.""" + settings = MagicMock() + settings.get = MagicMock(return_value=False) + return settings + + +@pytest.fixture +def use_case(mock_service, mock_metadata_sync, mock_settings): + """Create a BulkMetadataRefreshUseCase instance.""" + return BulkMetadataRefreshUseCase( + service=mock_service, + metadata_sync=mock_metadata_sync, + settings_service=mock_settings, + ) + + +@pytest.mark.asyncio +@patch.object(metadata_manager.MetadataManager, "hydrate_model_data") +async def test_pending_hash_calculated_on_demand(mock_hydrate, use_case, mock_service, mock_metadata_sync): + """Test that models with pending hash status get their hash calculated on demand.""" + mock_hydrate.return_value = None + + # Setup cache with a model that has pending hash + pending_model = { + "file_path": "/extra_ckpt/model.safetensors", + "sha256": "", # Empty hash + "hash_status": "pending", + "model_name": "Test Model", + "folder": "extra_ckpt", + "civitai": {}, + "from_civitai": False, + "civitai_deleted": False, + } + + cache = SimpleNamespace(raw_data=[pending_model], resort=AsyncMock()) + mock_service.scanner.get_cached_data.return_value = cache + + # Execute + result = await use_case.execute() + + # Verify hash was calculated + mock_service.scanner.calculate_hash_for_model.assert_called_once_with( + "/extra_ckpt/model.safetensors" + ) + + # Verify model hash was updated + assert pending_model["sha256"] == "calculated_hash_123" + assert pending_model["hash_status"] == "completed" + + # Verify metadata sync was called with the calculated hash + mock_metadata_sync.fetch_and_update_model.assert_called_once() + call_args = mock_metadata_sync.fetch_and_update_model.call_args[1] + assert call_args["sha256"] == "calculated_hash_123" + + assert result["success"] is True + + +@pytest.mark.asyncio +@patch.object(metadata_manager.MetadataManager, "hydrate_model_data") +async def test_skip_model_when_hash_calculation_fails(mock_hydrate, use_case, mock_service, mock_metadata_sync): + """Test that models are skipped when hash calculation fails.""" + mock_hydrate.return_value = None + + # Setup model with pending hash + pending_model = { + "file_path": "/extra_ckpt/model.safetensors", + "sha256": "", + "hash_status": "pending", + "model_name": "Test Model", + "civitai": {}, + "from_civitai": False, + "civitai_deleted": False, + } + + # Make hash calculation fail + mock_service.scanner.calculate_hash_for_model.return_value = None + + cache = SimpleNamespace(raw_data=[pending_model], resort=AsyncMock()) + mock_service.scanner.get_cached_data.return_value = cache + + # Execute + result = await use_case.execute() + + # Verify hash was attempted + mock_service.scanner.calculate_hash_for_model.assert_called_once() + + # Verify metadata sync was NOT called (model skipped) + mock_metadata_sync.fetch_and_update_model.assert_not_called() + + # Verify result shows processed but no success + assert result["success"] is True + assert result["processed"] == 1 + assert result["updated"] == 0 + + +@pytest.mark.asyncio +async def test_skip_model_when_scanner_does_not_support_lazy_hash( + use_case, mock_service, mock_metadata_sync +): + """Test that models are skipped when scanner doesn't support lazy hash calculation.""" + # Setup model with pending hash + pending_model = { + "file_path": "/models/model.safetensors", + "sha256": "", + "hash_status": "pending", + "model_name": "Test Model", + "civitai": {}, + "from_civitai": False, + "civitai_deleted": False, + } + + # Remove calculate_hash_for_model method (simulating LoRA scanner) + del mock_service.scanner.calculate_hash_for_model + + cache = SimpleNamespace(raw_data=[pending_model], resort=AsyncMock()) + mock_service.scanner.get_cached_data.return_value = cache + + # Execute + result = await use_case.execute() + + # Verify metadata sync was NOT called + mock_metadata_sync.fetch_and_update_model.assert_not_called() + + assert result["success"] is True + assert result["processed"] == 1 + assert result["updated"] == 0 + + +@pytest.mark.asyncio +@patch.object(metadata_manager.MetadataManager, "hydrate_model_data") +async def test_normal_model_with_existing_hash_not_affected(mock_hydrate, use_case, mock_service, mock_metadata_sync): + """Test that models with existing hash work normally.""" + mock_hydrate.return_value = None + + # Setup model with existing hash + existing_model = { + "file_path": "/models/model.safetensors", + "sha256": "existing_hash_abc", + "hash_status": "completed", + "model_name": "Test Model", + "civitai": {}, + "from_civitai": False, + "civitai_deleted": False, + } + + cache = SimpleNamespace(raw_data=[existing_model], resort=AsyncMock()) + mock_service.scanner.get_cached_data.return_value = cache + + # Execute + result = await use_case.execute() + + # Verify hash calculation was NOT called + assert not mock_service.scanner.calculate_hash_for_model.called + + # Verify metadata sync was called with existing hash + mock_metadata_sync.fetch_and_update_model.assert_called_once() + call_args = mock_metadata_sync.fetch_and_update_model.call_args[1] + assert call_args["sha256"] == "existing_hash_abc" + + assert result["success"] is True + assert result["updated"] == 1 + + +@pytest.mark.asyncio +@patch.object(metadata_manager.MetadataManager, "hydrate_model_data") +async def test_mixed_models_some_pending_some_existing(mock_hydrate, use_case, mock_service, mock_metadata_sync): + """Test handling of mixed models: some with pending hash, some with existing hash.""" + mock_hydrate.return_value = None + + pending_model = { + "file_path": "/extra_ckpt/pending_model.safetensors", + "sha256": "", + "hash_status": "pending", + "model_name": "Pending Model", + "civitai": {}, + "from_civitai": False, + "civitai_deleted": False, + } + + existing_model = { + "file_path": "/models/existing_model.safetensors", + "sha256": "existing_hash_xyz", + "hash_status": "completed", + "model_name": "Existing Model", + "civitai": {}, + "from_civitai": False, + "civitai_deleted": False, + } + + cache = SimpleNamespace(raw_data=[pending_model, existing_model], resort=AsyncMock()) + mock_service.scanner.get_cached_data.return_value = cache + + # Execute + result = await use_case.execute() + + # Verify hash was calculated only for pending model + mock_service.scanner.calculate_hash_for_model.assert_called_once_with( + "/extra_ckpt/pending_model.safetensors" + ) + + # Verify metadata sync was called for both + assert mock_metadata_sync.fetch_and_update_model.call_count == 2 + + assert result["success"] is True + assert result["processed"] == 2 + + +@pytest.mark.asyncio +@patch.object(metadata_manager.MetadataManager, "hydrate_model_data") +async def test_progress_callback_receives_updates(mock_hydrate, use_case, mock_service): + """Test that progress callback receives correct updates.""" + mock_hydrate.return_value = None + + model = { + "file_path": "/models/model.safetensors", + "sha256": "hash123", + "model_name": "Test Model", + "civitai": {}, + "from_civitai": False, + "civitai_deleted": False, + } + + cache = SimpleNamespace(raw_data=[model], resort=AsyncMock()) + mock_service.scanner.get_cached_data.return_value = cache + + reporter = MockProgressReporter() + + # Execute + await use_case.execute(progress_callback=reporter) + + # Verify progress was reported + assert len(reporter.progress_calls) >= 2 + + # Check started status + started_calls = [c for c in reporter.progress_calls if c["status"] == "started"] + assert len(started_calls) == 1 + + # Check completed status + completed_calls = [c for c in reporter.progress_calls if c["status"] == "completed"] + assert len(completed_calls) == 1 + assert completed_calls[0]["processed"] == 1 + assert completed_calls[0]["success"] == 1 + + +@pytest.mark.asyncio +@patch.object(metadata_manager.MetadataManager, "hydrate_model_data") +async def test_respects_skip_metadata_refresh_flag(mock_hydrate, use_case, mock_service, mock_metadata_sync): + """Test that models with skip_metadata_refresh=True are skipped.""" + mock_hydrate.return_value = None + + skip_model = { + "file_path": "/models/skip_model.safetensors", + "sha256": "hash123", + "model_name": "Skip Model", + "skip_metadata_refresh": True, + "civitai": {}, + } + + normal_model = { + "file_path": "/models/normal_model.safetensors", + "sha256": "hash456", + "model_name": "Normal Model", + "civitai": {}, + "from_civitai": False, + "civitai_deleted": False, + } + + cache = SimpleNamespace(raw_data=[skip_model, normal_model], resort=AsyncMock()) + mock_service.scanner.get_cached_data.return_value = cache + + # Execute + result = await use_case.execute() + + # Verify only normal model was processed + assert mock_metadata_sync.fetch_and_update_model.call_count == 1 + call_args = mock_metadata_sync.fetch_and_update_model.call_args[1] + assert call_args["file_path"] == "/models/normal_model.safetensors" + + assert result["processed"] == 1 + + +@pytest.mark.asyncio +@patch.object(metadata_manager.MetadataManager, "hydrate_model_data") +async def test_respects_skip_paths(mock_hydrate, use_case, mock_service, mock_metadata_sync): + """Test that models in skip paths are excluded.""" + mock_hydrate.return_value = None + + # Setup settings to skip certain paths + use_case._settings.get = MagicMock(side_effect=lambda key, default=None: { + "enable_metadata_archive_db": False, + "metadata_refresh_skip_paths": ["skip_folder"], + }.get(key, default)) + + skip_path_model = { + "file_path": "/models/skip_folder/model.safetensors", + "sha256": "hash123", + "model_name": "Skip Path Model", + "folder": "skip_folder", + "civitai": {}, + "from_civitai": False, + "civitai_deleted": False, + } + + normal_model = { + "file_path": "/models/normal/model.safetensors", + "sha256": "hash456", + "model_name": "Normal Model", + "folder": "normal", + "civitai": {}, + "from_civitai": False, + "civitai_deleted": False, + } + + cache = SimpleNamespace(raw_data=[skip_path_model, normal_model], resort=AsyncMock()) + mock_service.scanner.get_cached_data.return_value = cache + + # Execute + result = await use_case.execute() + + # Verify only normal model was processed + assert mock_metadata_sync.fetch_and_update_model.call_count == 1 + call_args = mock_metadata_sync.fetch_and_update_model.call_args[1] + assert "normal" in call_args["file_path"] + + assert result["processed"] == 1 + + +@pytest.mark.asyncio +async def test_model_without_hash_skipped(use_case, mock_service, mock_metadata_sync): + """Test that models without hash (and not pending) are skipped.""" + no_hash_model = { + "file_path": "/models/no_hash_model.safetensors", + "sha256": "", # Empty but NOT pending + "hash_status": "completed", # Not pending + "model_name": "No Hash Model", + "civitai": {}, + "from_civitai": False, + "civitai_deleted": False, + } + + cache = SimpleNamespace(raw_data=[no_hash_model], resort=AsyncMock()) + mock_service.scanner.get_cached_data.return_value = cache + + # Execute + result = await use_case.execute() + + # Verify metadata sync was NOT called + mock_metadata_sync.fetch_and_update_model.assert_not_called() + + assert result["processed"] == 1 + assert result["updated"] == 0