fix(example-images): preserve local previews on 404

This commit is contained in:
pixelpaws
2025-10-27 18:21:14 +08:00
parent a6b704d4b4
commit 613aa3b1c3
3 changed files with 293 additions and 57 deletions

View File

@@ -5,8 +5,10 @@ import json
import time
import logging
import os
import re
import shutil
from typing import Any, Dict
import uuid
from typing import Any, Dict, Iterable, List, Set, Tuple
from ..services.service_registry import ServiceRegistry
from ..utils.example_images_paths import (
@@ -516,10 +518,12 @@ class DownloadManager:
if civitai_payload.get('images'):
images = civitai_payload.get('images', [])
success, is_stale = await ExampleImagesProcessor.download_model_images(
success, is_stale, failed_images = await ExampleImagesProcessor.download_model_images_with_tracking(
model_hash, model_name, images, model_dir, optimize, downloader
)
failed_urls: Set[str] = set(failed_images)
# If metadata is stale, try to refresh it
if is_stale and model_hash not in self._progress['refreshed_models']:
await MetadataUpdater.refresh_model_metadata(
@@ -536,20 +540,36 @@ class DownloadManager:
if updated_civitai.get('images'):
# Retry download with updated metadata
updated_images = updated_civitai.get('images', [])
success, _ = await ExampleImagesProcessor.download_model_images(
success, _, additional_failed = await ExampleImagesProcessor.download_model_images_with_tracking(
model_hash, model_name, updated_images, model_dir, optimize, downloader
)
failed_urls.update(additional_failed)
self._progress['refreshed_models'].add(model_hash)
# Mark as processed if successful, or as failed if unsuccessful after refresh
if success:
if failed_urls:
await self._remove_failed_images_from_metadata(
model_hash,
model_name,
model_dir,
failed_urls,
scanner,
)
if failed_urls:
self._progress['failed_models'].add(model_hash)
self._progress['processed_models'].add(model_hash)
logger.info(
"Removed %s failed example images for %s", len(failed_urls), model_name
)
elif success:
self._progress['processed_models'].add(model_hash)
else:
# If we refreshed metadata and still failed, mark as permanently failed
if model_hash in self._progress['refreshed_models']:
self._progress['failed_models'].add(model_hash)
logger.info(f"Marking model {model_name} as failed after metadata refresh")
self._progress['failed_models'].add(model_hash)
logger.info(
"Example images download failed for %s despite metadata refresh", model_name
)
return True # Return True to indicate a remote download happened
else:
@@ -888,6 +908,8 @@ class DownloadManager:
model_hash, model_name, images, model_dir, optimize, downloader
)
failed_urls: Set[str] = set(failed_images)
# If metadata is stale, try to refresh it
if is_stale and model_hash not in self._progress['refreshed_models']:
await MetadataUpdater.refresh_model_metadata(
@@ -909,19 +931,18 @@ class DownloadManager:
)
# Combine failed images from both attempts
failed_images.extend(additional_failed_images)
failed_urls.update(additional_failed_images)
self._progress['refreshed_models'].add(model_hash)
# For forced downloads, remove failed images from metadata
if failed_images:
# Create a copy of images excluding failed ones
if failed_urls:
await self._remove_failed_images_from_metadata(
model_hash, model_name, failed_images, scanner
model_hash, model_name, model_dir, failed_urls, scanner
)
# Mark as processed
if success or failed_images: # Mark as processed if we successfully downloaded some images or removed failed ones
if success or failed_urls: # Mark as processed if we successfully downloaded some images or removed failed ones
self._progress['processed_models'].add(model_hash)
return True # Return True to indicate a remote download happened
@@ -938,49 +959,112 @@ class DownloadManager:
self._progress['last_error'] = error_msg
return False # Return False on exception
async def _remove_failed_images_from_metadata(self, model_hash, model_name, failed_images, scanner):
"""Remove failed images from model metadata"""
async def _remove_failed_images_from_metadata(
self,
model_hash: str,
model_name: str,
model_dir: str,
failed_images: Iterable[str],
scanner,
) -> None:
"""Mark failed images in model metadata so they won't be retried."""
failed_set: Set[str] = {url for url in failed_images if url}
if not failed_set:
return
try:
# Get current model data
model_data = await MetadataUpdater.get_updated_model(model_hash, scanner)
if not model_data:
logger.warning(f"Could not find model data for {model_name} to remove failed images")
return
if not model_data.get('civitai', {}).get('images'):
civitai_payload = model_data.get('civitai') or {}
current_images = civitai_payload.get('images') or []
if not current_images:
logger.warning(f"No images in metadata for {model_name}")
return
# Get current images
current_images = model_data['civitai']['images']
# Filter out failed images
updated_images = [img for img in current_images if img.get('url') not in failed_images]
# If images were removed, update metadata
if len(updated_images) < len(current_images):
removed_count = len(current_images) - len(updated_images)
logger.info(f"Removing {removed_count} failed images from metadata for {model_name}")
# Update the images list
model_data['civitai']['images'] = updated_images
# Save metadata to file
file_path = model_data.get('file_path')
if file_path:
# Create a copy of model data without 'folder' field
model_copy = model_data.copy()
model_copy.pop('folder', None)
# Write metadata to file
await MetadataManager.save_metadata(file_path, model_copy)
logger.info(f"Saved updated metadata for {model_name} after removing failed images")
# Update the scanner cache
updated = False
for image in current_images:
image_url = image.get('url')
optimized_url = (
ExampleImagesProcessor.get_civitai_optimized_url(image_url)
if image_url and 'civitai.com' in image_url
else None
)
if image_url not in failed_set and optimized_url not in failed_set:
continue
if image.get('downloadFailed'):
continue
image['downloadFailed'] = True
image.setdefault('downloadError', 'not_found')
logger.debug(
"Marked example image %s for %s as failed due to missing remote asset",
image_url,
model_name,
)
updated = True
if not updated:
return
file_path = model_data.get('file_path')
if file_path:
model_copy = model_data.copy()
model_copy.pop('folder', None)
await MetadataManager.save_metadata(file_path, model_copy)
try:
await scanner.update_single_model_cache(file_path, file_path, model_data)
except Exception as e:
logger.error(f"Error removing failed images from metadata for {model_name}: {e}", exc_info=True)
except AttributeError:
logger.debug("Scanner does not expose cache update for %s", model_name)
except Exception as exc: # pragma: no cover - defensive logging
logger.error(
"Error removing failed images from metadata for %s: %s", model_name, exc, exc_info=True
)
def _renumber_example_image_files(self, model_dir: str) -> None:
if not model_dir or not os.path.isdir(model_dir):
return
pattern = re.compile(r'^image_(\d+)(\.[^.]+)$', re.IGNORECASE)
matches: List[Tuple[int, str, str]] = []
for entry in os.listdir(model_dir):
match = pattern.match(entry)
if match:
matches.append((int(match.group(1)), entry, match.group(2)))
if not matches:
return
matches.sort(key=lambda item: item[0])
staged_paths: List[Tuple[str, str]] = []
for _, original_name, extension in matches:
source_path = os.path.join(model_dir, original_name)
temp_name = f"tmp_{uuid.uuid4().hex}_{original_name}"
temp_path = os.path.join(model_dir, temp_name)
try:
os.rename(source_path, temp_path)
staged_paths.append((temp_path, extension))
except OSError as exc:
logger.warning("Failed to stage rename for %s: %s", source_path, exc)
for new_index, (temp_path, extension) in enumerate(staged_paths):
final_name = f"image_{new_index}{extension}"
final_path = os.path.join(model_dir, final_name)
try:
os.rename(temp_path, final_path)
except OSError as exc:
logger.warning("Failed to finalise rename for %s: %s", final_path, exc)
async def _broadcast_progress(
self,

View File

@@ -85,6 +85,16 @@ class ExampleImagesProcessor:
# Default fallback
return '.jpg'
@staticmethod
def _is_not_found_error(error) -> bool:
"""Return True when the downloader response represents a 404/Not Found."""
if not error:
return False
message = str(error).lower()
return '404' in message or 'file not found' in message
@staticmethod
async def download_model_images(model_hash, model_name, model_images, model_dir, optimize, downloader):
"""Download images for a single model
@@ -98,7 +108,15 @@ class ExampleImagesProcessor:
image_url = image.get('url')
if not image_url:
continue
if image.get('downloadFailed'):
logger.debug(
"Skipping example image %s for %s because it previously failed to download",
image_url,
model_name,
)
continue
# Apply optimization for Civitai URLs if enabled
original_url = image_url
if optimize and 'civitai.com' in image_url:
@@ -142,7 +160,7 @@ class ExampleImagesProcessor:
with open(save_path, 'wb') as f:
f.write(content)
elif "404" in str(content):
elif ExampleImagesProcessor._is_not_found_error(content):
error_msg = f"Failed to download file: {image_url}, status code: 404 - Model metadata might be stale"
logger.warning(error_msg)
model_success = False # Mark the model as failed due to 404 error
@@ -173,7 +191,15 @@ class ExampleImagesProcessor:
image_url = image.get('url')
if not image_url:
continue
if image.get('downloadFailed'):
logger.debug(
"Skipping example image %s for %s because it previously failed to download",
image_url,
model_name,
)
continue
# Apply optimization for Civitai URLs if enabled
original_url = image_url
if optimize and 'civitai.com' in image_url:
@@ -217,7 +243,7 @@ class ExampleImagesProcessor:
with open(save_path, 'wb') as f:
f.write(content)
elif "404" in str(content):
elif ExampleImagesProcessor._is_not_found_error(content):
error_msg = f"Failed to download file: {image_url}, status code: 404 - Model metadata might be stale"
logger.warning(error_msg)
model_success = False # Mark the model as failed due to 404 error

View File

@@ -30,6 +30,14 @@ class StubScanner:
async def get_cached_data(self):
return self._cache
async def update_single_model_cache(self, _old_path, _new_path, metadata):
# Replace the cached entry with the updated metadata for assertions.
for index, model in enumerate(self._cache.raw_data):
if model.get("file_path") == metadata.get("file_path"):
self._cache.raw_data[index] = metadata
break
return True
def _patch_scanner(monkeypatch: pytest.MonkeyPatch, scanner: StubScanner) -> None:
async def _get_lora_scanner(cls):
@@ -156,7 +164,7 @@ async def test_pause_resume_blocks_processing(
await first_release.wait()
else:
second_call_started.set()
return True, False
return True, False, []
async def fake_get_downloader():
class _Downloader:
@@ -177,7 +185,7 @@ async def test_pause_resume_blocks_processing(
)
monkeypatch.setattr(
download_module.ExampleImagesProcessor,
"download_model_images",
"download_model_images_with_tracking",
staticmethod(fake_download_model_images),
)
monkeypatch.setattr(download_module, "get_downloader", fake_get_downloader)
@@ -280,7 +288,7 @@ async def test_legacy_folder_migrated_and_skipped(
async def fake_download_model_images(*_args, **_kwargs):
nonlocal download_called
download_called = True
return True, False
return True, False, []
async def fake_get_downloader():
class _Downloader:
@@ -296,7 +304,7 @@ async def test_legacy_folder_migrated_and_skipped(
)
monkeypatch.setattr(
download_module.ExampleImagesProcessor,
"download_model_images",
"download_model_images_with_tracking",
staticmethod(fake_download_model_images),
)
monkeypatch.setattr(download_module, "get_downloader", fake_get_downloader)
@@ -375,7 +383,7 @@ async def test_legacy_progress_file_migrates(
)
monkeypatch.setattr(
download_module.ExampleImagesProcessor,
"download_model_images",
"download_model_images_with_tracking",
staticmethod(fake_download_model_images),
)
monkeypatch.setattr(download_module, "get_downloader", fake_get_downloader)
@@ -475,6 +483,124 @@ async def test_download_remains_in_initial_library(
assert not (library_b_root / ".download_progress.json").exists()
assert not (library_b_root / model_hash).exists()
@pytest.mark.usefixtures("tmp_path")
async def test_not_found_example_images_are_cleaned(
monkeypatch: pytest.MonkeyPatch,
tmp_path,
settings_manager,
):
ws_manager = RecordingWebSocketManager()
manager = download_module.DownloadManager(ws_manager=ws_manager)
images_root = tmp_path / "examples"
monkeypatch.setitem(settings_manager.settings, "example_images_path", str(images_root))
model_hash = "f" * 64
model_path = tmp_path / "missing-model.safetensors"
model_path.write_text("data", encoding="utf-8")
missing_url = "https://example.com/missing.png"
valid_url = "https://example.com/valid.png"
model_metadata = {
"sha256": model_hash,
"model_name": "Missing Example",
"file_path": str(model_path),
"file_name": "missing-model.safetensors",
"civitai": {
"images": [
{"url": missing_url},
{"url": valid_url},
]
},
}
scanner = StubScanner([model_metadata.copy()])
_patch_scanner(monkeypatch, scanner)
model_dir = images_root / model_hash
model_dir.mkdir(parents=True, exist_ok=True)
(model_dir / "image_0.png").write_bytes(b"first")
(model_dir / "image_1.png").write_bytes(b"second")
async def fake_process_local_examples(*_args, **_kwargs):
return False
refresh_calls: list[str] = []
async def fake_refresh(model_hash_arg, *_args, **_kwargs):
refresh_calls.append(model_hash_arg)
return True
async def fake_get_updated_model(model_hash_arg, _scanner):
assert model_hash_arg == model_hash
return model_metadata
async def fake_save_metadata(_path, metadata):
model_metadata.update(metadata)
return True
class DownloaderStub:
def __init__(self):
self.calls: list[str] = []
async def download_to_memory(self, url, *_args, **_kwargs):
self.calls.append(url)
if url == missing_url:
return False, "File not found", None
return True, b"\x89PNG\r\n\x1a\n", {"content-type": "image/png"}
downloader = DownloaderStub()
async def fake_get_downloader():
return downloader
monkeypatch.setattr(
download_module.ExampleImagesProcessor,
"process_local_examples",
staticmethod(fake_process_local_examples),
)
monkeypatch.setattr(
download_module.MetadataUpdater,
"refresh_model_metadata",
staticmethod(fake_refresh),
)
monkeypatch.setattr(
download_module.MetadataUpdater,
"get_updated_model",
staticmethod(fake_get_updated_model),
)
monkeypatch.setattr(
download_module.MetadataManager,
"save_metadata",
staticmethod(fake_save_metadata),
)
monkeypatch.setattr(download_module, "get_downloader", fake_get_downloader)
monkeypatch.setattr(download_module, "_model_directory_has_files", lambda _path: False)
result = await manager.start_download({"model_types": ["lora"], "delay": 0, "optimize": False})
assert result["success"] is True
if manager._download_task is not None:
await asyncio.wait_for(manager._download_task, timeout=1)
assert refresh_calls == [model_hash]
assert missing_url in downloader.calls
assert manager._progress["failed_models"] == {model_hash}
assert model_hash in manager._progress["processed_models"]
remaining_images = model_metadata["civitai"]["images"]
assert remaining_images == [
{"url": missing_url, "downloadFailed": True, "downloadError": "not_found"},
{"url": valid_url},
]
files = sorted(p.name for p in model_dir.iterdir())
assert files == ["image_0.png", "image_1.png"]
assert (model_dir / "image_0.png").read_bytes() == b"first"
assert (model_dir / "image_1.png").read_bytes() == b"second"
@pytest.fixture
def settings_manager():
return get_settings_manager()