From 613aa3b1c366deea7dbd185f7ba7c83f49f6632e Mon Sep 17 00:00:00 2001 From: pixelpaws Date: Mon, 27 Oct 2025 18:21:14 +0800 Subject: [PATCH] fix(example-images): preserve local previews on 404 --- py/utils/example_images_download_manager.py | 180 +++++++++++++----- py/utils/example_images_processor.py | 34 +++- ...t_example_images_download_manager_async.py | 136 ++++++++++++- 3 files changed, 293 insertions(+), 57 deletions(-) diff --git a/py/utils/example_images_download_manager.py b/py/utils/example_images_download_manager.py index 85a70b6c..0c33dc9e 100644 --- a/py/utils/example_images_download_manager.py +++ b/py/utils/example_images_download_manager.py @@ -5,8 +5,10 @@ import json import time import logging import os +import re import shutil -from typing import Any, Dict +import uuid +from typing import Any, Dict, Iterable, List, Set, Tuple from ..services.service_registry import ServiceRegistry from ..utils.example_images_paths import ( @@ -516,10 +518,12 @@ class DownloadManager: if civitai_payload.get('images'): images = civitai_payload.get('images', []) - success, is_stale = await ExampleImagesProcessor.download_model_images( + success, is_stale, failed_images = await ExampleImagesProcessor.download_model_images_with_tracking( model_hash, model_name, images, model_dir, optimize, downloader ) + failed_urls: Set[str] = set(failed_images) + # If metadata is stale, try to refresh it if is_stale and model_hash not in self._progress['refreshed_models']: await MetadataUpdater.refresh_model_metadata( @@ -536,20 +540,36 @@ class DownloadManager: if updated_civitai.get('images'): # Retry download with updated metadata updated_images = updated_civitai.get('images', []) - success, _ = await ExampleImagesProcessor.download_model_images( + success, _, additional_failed = await ExampleImagesProcessor.download_model_images_with_tracking( model_hash, model_name, updated_images, model_dir, optimize, downloader ) + failed_urls.update(additional_failed) + self._progress['refreshed_models'].add(model_hash) - # Mark as processed if successful, or as failed if unsuccessful after refresh - if success: + if failed_urls: + await self._remove_failed_images_from_metadata( + model_hash, + model_name, + model_dir, + failed_urls, + scanner, + ) + + if failed_urls: + self._progress['failed_models'].add(model_hash) + self._progress['processed_models'].add(model_hash) + logger.info( + "Removed %s failed example images for %s", len(failed_urls), model_name + ) + elif success: self._progress['processed_models'].add(model_hash) else: - # If we refreshed metadata and still failed, mark as permanently failed - if model_hash in self._progress['refreshed_models']: - self._progress['failed_models'].add(model_hash) - logger.info(f"Marking model {model_name} as failed after metadata refresh") + self._progress['failed_models'].add(model_hash) + logger.info( + "Example images download failed for %s despite metadata refresh", model_name + ) return True # Return True to indicate a remote download happened else: @@ -888,6 +908,8 @@ class DownloadManager: model_hash, model_name, images, model_dir, optimize, downloader ) + failed_urls: Set[str] = set(failed_images) + # If metadata is stale, try to refresh it if is_stale and model_hash not in self._progress['refreshed_models']: await MetadataUpdater.refresh_model_metadata( @@ -909,19 +931,18 @@ class DownloadManager: ) # Combine failed images from both attempts - failed_images.extend(additional_failed_images) + failed_urls.update(additional_failed_images) self._progress['refreshed_models'].add(model_hash) # For forced downloads, remove failed images from metadata - if failed_images: - # Create a copy of images excluding failed ones + if failed_urls: await self._remove_failed_images_from_metadata( - model_hash, model_name, failed_images, scanner + model_hash, model_name, model_dir, failed_urls, scanner ) # Mark as processed - if success or failed_images: # Mark as processed if we successfully downloaded some images or removed failed ones + if success or failed_urls: # Mark as processed if we successfully downloaded some images or removed failed ones self._progress['processed_models'].add(model_hash) return True # Return True to indicate a remote download happened @@ -938,49 +959,112 @@ class DownloadManager: self._progress['last_error'] = error_msg return False # Return False on exception - async def _remove_failed_images_from_metadata(self, model_hash, model_name, failed_images, scanner): - """Remove failed images from model metadata""" + async def _remove_failed_images_from_metadata( + self, + model_hash: str, + model_name: str, + model_dir: str, + failed_images: Iterable[str], + scanner, + ) -> None: + """Mark failed images in model metadata so they won't be retried.""" + + failed_set: Set[str] = {url for url in failed_images if url} + if not failed_set: + return + try: # Get current model data model_data = await MetadataUpdater.get_updated_model(model_hash, scanner) if not model_data: logger.warning(f"Could not find model data for {model_name} to remove failed images") return - - if not model_data.get('civitai', {}).get('images'): + + civitai_payload = model_data.get('civitai') or {} + current_images = civitai_payload.get('images') or [] + if not current_images: logger.warning(f"No images in metadata for {model_name}") return - - # Get current images - current_images = model_data['civitai']['images'] - - # Filter out failed images - updated_images = [img for img in current_images if img.get('url') not in failed_images] - - # If images were removed, update metadata - if len(updated_images) < len(current_images): - removed_count = len(current_images) - len(updated_images) - logger.info(f"Removing {removed_count} failed images from metadata for {model_name}") - - # Update the images list - model_data['civitai']['images'] = updated_images - - # Save metadata to file - file_path = model_data.get('file_path') - if file_path: - # Create a copy of model data without 'folder' field - model_copy = model_data.copy() - model_copy.pop('folder', None) - - # Write metadata to file - await MetadataManager.save_metadata(file_path, model_copy) - logger.info(f"Saved updated metadata for {model_name} after removing failed images") - - # Update the scanner cache + + updated = False + + for image in current_images: + image_url = image.get('url') + optimized_url = ( + ExampleImagesProcessor.get_civitai_optimized_url(image_url) + if image_url and 'civitai.com' in image_url + else None + ) + + if image_url not in failed_set and optimized_url not in failed_set: + continue + + if image.get('downloadFailed'): + continue + + image['downloadFailed'] = True + image.setdefault('downloadError', 'not_found') + logger.debug( + "Marked example image %s for %s as failed due to missing remote asset", + image_url, + model_name, + ) + updated = True + + if not updated: + return + + file_path = model_data.get('file_path') + if file_path: + model_copy = model_data.copy() + model_copy.pop('folder', None) + await MetadataManager.save_metadata(file_path, model_copy) + + try: await scanner.update_single_model_cache(file_path, file_path, model_data) - - except Exception as e: - logger.error(f"Error removing failed images from metadata for {model_name}: {e}", exc_info=True) + except AttributeError: + logger.debug("Scanner does not expose cache update for %s", model_name) + + except Exception as exc: # pragma: no cover - defensive logging + logger.error( + "Error removing failed images from metadata for %s: %s", model_name, exc, exc_info=True + ) + + def _renumber_example_image_files(self, model_dir: str) -> None: + if not model_dir or not os.path.isdir(model_dir): + return + + pattern = re.compile(r'^image_(\d+)(\.[^.]+)$', re.IGNORECASE) + matches: List[Tuple[int, str, str]] = [] + + for entry in os.listdir(model_dir): + match = pattern.match(entry) + if match: + matches.append((int(match.group(1)), entry, match.group(2))) + + if not matches: + return + + matches.sort(key=lambda item: item[0]) + staged_paths: List[Tuple[str, str]] = [] + + for _, original_name, extension in matches: + source_path = os.path.join(model_dir, original_name) + temp_name = f"tmp_{uuid.uuid4().hex}_{original_name}" + temp_path = os.path.join(model_dir, temp_name) + try: + os.rename(source_path, temp_path) + staged_paths.append((temp_path, extension)) + except OSError as exc: + logger.warning("Failed to stage rename for %s: %s", source_path, exc) + + for new_index, (temp_path, extension) in enumerate(staged_paths): + final_name = f"image_{new_index}{extension}" + final_path = os.path.join(model_dir, final_name) + try: + os.rename(temp_path, final_path) + except OSError as exc: + logger.warning("Failed to finalise rename for %s: %s", final_path, exc) async def _broadcast_progress( self, diff --git a/py/utils/example_images_processor.py b/py/utils/example_images_processor.py index 19e9e073..b979bdb7 100644 --- a/py/utils/example_images_processor.py +++ b/py/utils/example_images_processor.py @@ -85,6 +85,16 @@ class ExampleImagesProcessor: # Default fallback return '.jpg' + @staticmethod + def _is_not_found_error(error) -> bool: + """Return True when the downloader response represents a 404/Not Found.""" + + if not error: + return False + + message = str(error).lower() + return '404' in message or 'file not found' in message + @staticmethod async def download_model_images(model_hash, model_name, model_images, model_dir, optimize, downloader): """Download images for a single model @@ -98,7 +108,15 @@ class ExampleImagesProcessor: image_url = image.get('url') if not image_url: continue - + + if image.get('downloadFailed'): + logger.debug( + "Skipping example image %s for %s because it previously failed to download", + image_url, + model_name, + ) + continue + # Apply optimization for Civitai URLs if enabled original_url = image_url if optimize and 'civitai.com' in image_url: @@ -142,7 +160,7 @@ class ExampleImagesProcessor: with open(save_path, 'wb') as f: f.write(content) - elif "404" in str(content): + elif ExampleImagesProcessor._is_not_found_error(content): error_msg = f"Failed to download file: {image_url}, status code: 404 - Model metadata might be stale" logger.warning(error_msg) model_success = False # Mark the model as failed due to 404 error @@ -173,7 +191,15 @@ class ExampleImagesProcessor: image_url = image.get('url') if not image_url: continue - + + if image.get('downloadFailed'): + logger.debug( + "Skipping example image %s for %s because it previously failed to download", + image_url, + model_name, + ) + continue + # Apply optimization for Civitai URLs if enabled original_url = image_url if optimize and 'civitai.com' in image_url: @@ -217,7 +243,7 @@ class ExampleImagesProcessor: with open(save_path, 'wb') as f: f.write(content) - elif "404" in str(content): + elif ExampleImagesProcessor._is_not_found_error(content): error_msg = f"Failed to download file: {image_url}, status code: 404 - Model metadata might be stale" logger.warning(error_msg) model_success = False # Mark the model as failed due to 404 error diff --git a/tests/services/test_example_images_download_manager_async.py b/tests/services/test_example_images_download_manager_async.py index 6735a3c8..8ccbe48d 100644 --- a/tests/services/test_example_images_download_manager_async.py +++ b/tests/services/test_example_images_download_manager_async.py @@ -30,6 +30,14 @@ class StubScanner: async def get_cached_data(self): return self._cache + async def update_single_model_cache(self, _old_path, _new_path, metadata): + # Replace the cached entry with the updated metadata for assertions. + for index, model in enumerate(self._cache.raw_data): + if model.get("file_path") == metadata.get("file_path"): + self._cache.raw_data[index] = metadata + break + return True + def _patch_scanner(monkeypatch: pytest.MonkeyPatch, scanner: StubScanner) -> None: async def _get_lora_scanner(cls): @@ -156,7 +164,7 @@ async def test_pause_resume_blocks_processing( await first_release.wait() else: second_call_started.set() - return True, False + return True, False, [] async def fake_get_downloader(): class _Downloader: @@ -177,7 +185,7 @@ async def test_pause_resume_blocks_processing( ) monkeypatch.setattr( download_module.ExampleImagesProcessor, - "download_model_images", + "download_model_images_with_tracking", staticmethod(fake_download_model_images), ) monkeypatch.setattr(download_module, "get_downloader", fake_get_downloader) @@ -280,7 +288,7 @@ async def test_legacy_folder_migrated_and_skipped( async def fake_download_model_images(*_args, **_kwargs): nonlocal download_called download_called = True - return True, False + return True, False, [] async def fake_get_downloader(): class _Downloader: @@ -296,7 +304,7 @@ async def test_legacy_folder_migrated_and_skipped( ) monkeypatch.setattr( download_module.ExampleImagesProcessor, - "download_model_images", + "download_model_images_with_tracking", staticmethod(fake_download_model_images), ) monkeypatch.setattr(download_module, "get_downloader", fake_get_downloader) @@ -375,7 +383,7 @@ async def test_legacy_progress_file_migrates( ) monkeypatch.setattr( download_module.ExampleImagesProcessor, - "download_model_images", + "download_model_images_with_tracking", staticmethod(fake_download_model_images), ) monkeypatch.setattr(download_module, "get_downloader", fake_get_downloader) @@ -475,6 +483,124 @@ async def test_download_remains_in_initial_library( assert not (library_b_root / ".download_progress.json").exists() assert not (library_b_root / model_hash).exists() +@pytest.mark.usefixtures("tmp_path") +async def test_not_found_example_images_are_cleaned( + monkeypatch: pytest.MonkeyPatch, + tmp_path, + settings_manager, +): + ws_manager = RecordingWebSocketManager() + manager = download_module.DownloadManager(ws_manager=ws_manager) + + images_root = tmp_path / "examples" + monkeypatch.setitem(settings_manager.settings, "example_images_path", str(images_root)) + + model_hash = "f" * 64 + model_path = tmp_path / "missing-model.safetensors" + model_path.write_text("data", encoding="utf-8") + + missing_url = "https://example.com/missing.png" + valid_url = "https://example.com/valid.png" + + model_metadata = { + "sha256": model_hash, + "model_name": "Missing Example", + "file_path": str(model_path), + "file_name": "missing-model.safetensors", + "civitai": { + "images": [ + {"url": missing_url}, + {"url": valid_url}, + ] + }, + } + + scanner = StubScanner([model_metadata.copy()]) + _patch_scanner(monkeypatch, scanner) + + model_dir = images_root / model_hash + model_dir.mkdir(parents=True, exist_ok=True) + (model_dir / "image_0.png").write_bytes(b"first") + (model_dir / "image_1.png").write_bytes(b"second") + + async def fake_process_local_examples(*_args, **_kwargs): + return False + + refresh_calls: list[str] = [] + + async def fake_refresh(model_hash_arg, *_args, **_kwargs): + refresh_calls.append(model_hash_arg) + return True + + async def fake_get_updated_model(model_hash_arg, _scanner): + assert model_hash_arg == model_hash + return model_metadata + + async def fake_save_metadata(_path, metadata): + model_metadata.update(metadata) + return True + + class DownloaderStub: + def __init__(self): + self.calls: list[str] = [] + + async def download_to_memory(self, url, *_args, **_kwargs): + self.calls.append(url) + if url == missing_url: + return False, "File not found", None + return True, b"\x89PNG\r\n\x1a\n", {"content-type": "image/png"} + + downloader = DownloaderStub() + + async def fake_get_downloader(): + return downloader + + monkeypatch.setattr( + download_module.ExampleImagesProcessor, + "process_local_examples", + staticmethod(fake_process_local_examples), + ) + monkeypatch.setattr( + download_module.MetadataUpdater, + "refresh_model_metadata", + staticmethod(fake_refresh), + ) + monkeypatch.setattr( + download_module.MetadataUpdater, + "get_updated_model", + staticmethod(fake_get_updated_model), + ) + monkeypatch.setattr( + download_module.MetadataManager, + "save_metadata", + staticmethod(fake_save_metadata), + ) + monkeypatch.setattr(download_module, "get_downloader", fake_get_downloader) + monkeypatch.setattr(download_module, "_model_directory_has_files", lambda _path: False) + + result = await manager.start_download({"model_types": ["lora"], "delay": 0, "optimize": False}) + assert result["success"] is True + + if manager._download_task is not None: + await asyncio.wait_for(manager._download_task, timeout=1) + + assert refresh_calls == [model_hash] + assert missing_url in downloader.calls + assert manager._progress["failed_models"] == {model_hash} + assert model_hash in manager._progress["processed_models"] + + remaining_images = model_metadata["civitai"]["images"] + assert remaining_images == [ + {"url": missing_url, "downloadFailed": True, "downloadError": "not_found"}, + {"url": valid_url}, + ] + + files = sorted(p.name for p in model_dir.iterdir()) + assert files == ["image_0.png", "image_1.png"] + assert (model_dir / "image_0.png").read_bytes() == b"first" + assert (model_dir / "image_1.png").read_bytes() == b"second" + + @pytest.fixture def settings_manager(): return get_settings_manager()