Merge pull request #607 from willmiao/codex/fix-404-errors-for-example-image-links, see #590

Preserve local previews when CivitAI images 404
This commit is contained in:
pixelpaws
2025-10-27 19:09:41 +08:00
committed by GitHub
3 changed files with 293 additions and 57 deletions

View File

@@ -5,8 +5,10 @@ import json
import time import time
import logging import logging
import os import os
import re
import shutil import shutil
from typing import Any, Dict import uuid
from typing import Any, Dict, Iterable, List, Set, Tuple
from ..services.service_registry import ServiceRegistry from ..services.service_registry import ServiceRegistry
from ..utils.example_images_paths import ( from ..utils.example_images_paths import (
@@ -516,10 +518,12 @@ class DownloadManager:
if civitai_payload.get('images'): if civitai_payload.get('images'):
images = civitai_payload.get('images', []) images = civitai_payload.get('images', [])
success, is_stale = await ExampleImagesProcessor.download_model_images( success, is_stale, failed_images = await ExampleImagesProcessor.download_model_images_with_tracking(
model_hash, model_name, images, model_dir, optimize, downloader model_hash, model_name, images, model_dir, optimize, downloader
) )
failed_urls: Set[str] = set(failed_images)
# If metadata is stale, try to refresh it # If metadata is stale, try to refresh it
if is_stale and model_hash not in self._progress['refreshed_models']: if is_stale and model_hash not in self._progress['refreshed_models']:
await MetadataUpdater.refresh_model_metadata( await MetadataUpdater.refresh_model_metadata(
@@ -536,20 +540,36 @@ class DownloadManager:
if updated_civitai.get('images'): if updated_civitai.get('images'):
# Retry download with updated metadata # Retry download with updated metadata
updated_images = updated_civitai.get('images', []) updated_images = updated_civitai.get('images', [])
success, _ = await ExampleImagesProcessor.download_model_images( success, _, additional_failed = await ExampleImagesProcessor.download_model_images_with_tracking(
model_hash, model_name, updated_images, model_dir, optimize, downloader model_hash, model_name, updated_images, model_dir, optimize, downloader
) )
failed_urls.update(additional_failed)
self._progress['refreshed_models'].add(model_hash) self._progress['refreshed_models'].add(model_hash)
# Mark as processed if successful, or as failed if unsuccessful after refresh if failed_urls:
if success: await self._remove_failed_images_from_metadata(
model_hash,
model_name,
model_dir,
failed_urls,
scanner,
)
if failed_urls:
self._progress['failed_models'].add(model_hash)
self._progress['processed_models'].add(model_hash)
logger.info(
"Removed %s failed example images for %s", len(failed_urls), model_name
)
elif success:
self._progress['processed_models'].add(model_hash) self._progress['processed_models'].add(model_hash)
else: else:
# If we refreshed metadata and still failed, mark as permanently failed self._progress['failed_models'].add(model_hash)
if model_hash in self._progress['refreshed_models']: logger.info(
self._progress['failed_models'].add(model_hash) "Example images download failed for %s despite metadata refresh", model_name
logger.info(f"Marking model {model_name} as failed after metadata refresh") )
return True # Return True to indicate a remote download happened return True # Return True to indicate a remote download happened
else: else:
@@ -888,6 +908,8 @@ class DownloadManager:
model_hash, model_name, images, model_dir, optimize, downloader model_hash, model_name, images, model_dir, optimize, downloader
) )
failed_urls: Set[str] = set(failed_images)
# If metadata is stale, try to refresh it # If metadata is stale, try to refresh it
if is_stale and model_hash not in self._progress['refreshed_models']: if is_stale and model_hash not in self._progress['refreshed_models']:
await MetadataUpdater.refresh_model_metadata( await MetadataUpdater.refresh_model_metadata(
@@ -909,19 +931,18 @@ class DownloadManager:
) )
# Combine failed images from both attempts # Combine failed images from both attempts
failed_images.extend(additional_failed_images) failed_urls.update(additional_failed_images)
self._progress['refreshed_models'].add(model_hash) self._progress['refreshed_models'].add(model_hash)
# For forced downloads, remove failed images from metadata # For forced downloads, remove failed images from metadata
if failed_images: if failed_urls:
# Create a copy of images excluding failed ones
await self._remove_failed_images_from_metadata( await self._remove_failed_images_from_metadata(
model_hash, model_name, failed_images, scanner model_hash, model_name, model_dir, failed_urls, scanner
) )
# Mark as processed # Mark as processed
if success or failed_images: # Mark as processed if we successfully downloaded some images or removed failed ones if success or failed_urls: # Mark as processed if we successfully downloaded some images or removed failed ones
self._progress['processed_models'].add(model_hash) self._progress['processed_models'].add(model_hash)
return True # Return True to indicate a remote download happened return True # Return True to indicate a remote download happened
@@ -938,49 +959,112 @@ class DownloadManager:
self._progress['last_error'] = error_msg self._progress['last_error'] = error_msg
return False # Return False on exception return False # Return False on exception
async def _remove_failed_images_from_metadata(self, model_hash, model_name, failed_images, scanner): async def _remove_failed_images_from_metadata(
"""Remove failed images from model metadata""" self,
model_hash: str,
model_name: str,
model_dir: str,
failed_images: Iterable[str],
scanner,
) -> None:
"""Mark failed images in model metadata so they won't be retried."""
failed_set: Set[str] = {url for url in failed_images if url}
if not failed_set:
return
try: try:
# Get current model data # Get current model data
model_data = await MetadataUpdater.get_updated_model(model_hash, scanner) model_data = await MetadataUpdater.get_updated_model(model_hash, scanner)
if not model_data: if not model_data:
logger.warning(f"Could not find model data for {model_name} to remove failed images") logger.warning(f"Could not find model data for {model_name} to remove failed images")
return return
if not model_data.get('civitai', {}).get('images'): civitai_payload = model_data.get('civitai') or {}
current_images = civitai_payload.get('images') or []
if not current_images:
logger.warning(f"No images in metadata for {model_name}") logger.warning(f"No images in metadata for {model_name}")
return return
# Get current images updated = False
current_images = model_data['civitai']['images']
for image in current_images:
# Filter out failed images image_url = image.get('url')
updated_images = [img for img in current_images if img.get('url') not in failed_images] optimized_url = (
ExampleImagesProcessor.get_civitai_optimized_url(image_url)
# If images were removed, update metadata if image_url and 'civitai.com' in image_url
if len(updated_images) < len(current_images): else None
removed_count = len(current_images) - len(updated_images) )
logger.info(f"Removing {removed_count} failed images from metadata for {model_name}")
if image_url not in failed_set and optimized_url not in failed_set:
# Update the images list continue
model_data['civitai']['images'] = updated_images
if image.get('downloadFailed'):
# Save metadata to file continue
file_path = model_data.get('file_path')
if file_path: image['downloadFailed'] = True
# Create a copy of model data without 'folder' field image.setdefault('downloadError', 'not_found')
model_copy = model_data.copy() logger.debug(
model_copy.pop('folder', None) "Marked example image %s for %s as failed due to missing remote asset",
image_url,
# Write metadata to file model_name,
await MetadataManager.save_metadata(file_path, model_copy) )
logger.info(f"Saved updated metadata for {model_name} after removing failed images") updated = True
# Update the scanner cache if not updated:
return
file_path = model_data.get('file_path')
if file_path:
model_copy = model_data.copy()
model_copy.pop('folder', None)
await MetadataManager.save_metadata(file_path, model_copy)
try:
await scanner.update_single_model_cache(file_path, file_path, model_data) await scanner.update_single_model_cache(file_path, file_path, model_data)
except AttributeError:
except Exception as e: logger.debug("Scanner does not expose cache update for %s", model_name)
logger.error(f"Error removing failed images from metadata for {model_name}: {e}", exc_info=True)
except Exception as exc: # pragma: no cover - defensive logging
logger.error(
"Error removing failed images from metadata for %s: %s", model_name, exc, exc_info=True
)
def _renumber_example_image_files(self, model_dir: str) -> None:
if not model_dir or not os.path.isdir(model_dir):
return
pattern = re.compile(r'^image_(\d+)(\.[^.]+)$', re.IGNORECASE)
matches: List[Tuple[int, str, str]] = []
for entry in os.listdir(model_dir):
match = pattern.match(entry)
if match:
matches.append((int(match.group(1)), entry, match.group(2)))
if not matches:
return
matches.sort(key=lambda item: item[0])
staged_paths: List[Tuple[str, str]] = []
for _, original_name, extension in matches:
source_path = os.path.join(model_dir, original_name)
temp_name = f"tmp_{uuid.uuid4().hex}_{original_name}"
temp_path = os.path.join(model_dir, temp_name)
try:
os.rename(source_path, temp_path)
staged_paths.append((temp_path, extension))
except OSError as exc:
logger.warning("Failed to stage rename for %s: %s", source_path, exc)
for new_index, (temp_path, extension) in enumerate(staged_paths):
final_name = f"image_{new_index}{extension}"
final_path = os.path.join(model_dir, final_name)
try:
os.rename(temp_path, final_path)
except OSError as exc:
logger.warning("Failed to finalise rename for %s: %s", final_path, exc)
async def _broadcast_progress( async def _broadcast_progress(
self, self,

View File

@@ -85,6 +85,16 @@ class ExampleImagesProcessor:
# Default fallback # Default fallback
return '.jpg' return '.jpg'
@staticmethod
def _is_not_found_error(error) -> bool:
"""Return True when the downloader response represents a 404/Not Found."""
if not error:
return False
message = str(error).lower()
return '404' in message or 'file not found' in message
@staticmethod @staticmethod
async def download_model_images(model_hash, model_name, model_images, model_dir, optimize, downloader): async def download_model_images(model_hash, model_name, model_images, model_dir, optimize, downloader):
"""Download images for a single model """Download images for a single model
@@ -98,7 +108,15 @@ class ExampleImagesProcessor:
image_url = image.get('url') image_url = image.get('url')
if not image_url: if not image_url:
continue continue
if image.get('downloadFailed'):
logger.debug(
"Skipping example image %s for %s because it previously failed to download",
image_url,
model_name,
)
continue
# Apply optimization for Civitai URLs if enabled # Apply optimization for Civitai URLs if enabled
original_url = image_url original_url = image_url
if optimize and 'civitai.com' in image_url: if optimize and 'civitai.com' in image_url:
@@ -142,7 +160,7 @@ class ExampleImagesProcessor:
with open(save_path, 'wb') as f: with open(save_path, 'wb') as f:
f.write(content) f.write(content)
elif "404" in str(content): elif ExampleImagesProcessor._is_not_found_error(content):
error_msg = f"Failed to download file: {image_url}, status code: 404 - Model metadata might be stale" error_msg = f"Failed to download file: {image_url}, status code: 404 - Model metadata might be stale"
logger.warning(error_msg) logger.warning(error_msg)
model_success = False # Mark the model as failed due to 404 error model_success = False # Mark the model as failed due to 404 error
@@ -173,7 +191,15 @@ class ExampleImagesProcessor:
image_url = image.get('url') image_url = image.get('url')
if not image_url: if not image_url:
continue continue
if image.get('downloadFailed'):
logger.debug(
"Skipping example image %s for %s because it previously failed to download",
image_url,
model_name,
)
continue
# Apply optimization for Civitai URLs if enabled # Apply optimization for Civitai URLs if enabled
original_url = image_url original_url = image_url
if optimize and 'civitai.com' in image_url: if optimize and 'civitai.com' in image_url:
@@ -217,7 +243,7 @@ class ExampleImagesProcessor:
with open(save_path, 'wb') as f: with open(save_path, 'wb') as f:
f.write(content) f.write(content)
elif "404" in str(content): elif ExampleImagesProcessor._is_not_found_error(content):
error_msg = f"Failed to download file: {image_url}, status code: 404 - Model metadata might be stale" error_msg = f"Failed to download file: {image_url}, status code: 404 - Model metadata might be stale"
logger.warning(error_msg) logger.warning(error_msg)
model_success = False # Mark the model as failed due to 404 error model_success = False # Mark the model as failed due to 404 error

View File

@@ -30,6 +30,14 @@ class StubScanner:
async def get_cached_data(self): async def get_cached_data(self):
return self._cache return self._cache
async def update_single_model_cache(self, _old_path, _new_path, metadata):
# Replace the cached entry with the updated metadata for assertions.
for index, model in enumerate(self._cache.raw_data):
if model.get("file_path") == metadata.get("file_path"):
self._cache.raw_data[index] = metadata
break
return True
def _patch_scanner(monkeypatch: pytest.MonkeyPatch, scanner: StubScanner) -> None: def _patch_scanner(monkeypatch: pytest.MonkeyPatch, scanner: StubScanner) -> None:
async def _get_lora_scanner(cls): async def _get_lora_scanner(cls):
@@ -156,7 +164,7 @@ async def test_pause_resume_blocks_processing(
await first_release.wait() await first_release.wait()
else: else:
second_call_started.set() second_call_started.set()
return True, False return True, False, []
async def fake_get_downloader(): async def fake_get_downloader():
class _Downloader: class _Downloader:
@@ -177,7 +185,7 @@ async def test_pause_resume_blocks_processing(
) )
monkeypatch.setattr( monkeypatch.setattr(
download_module.ExampleImagesProcessor, download_module.ExampleImagesProcessor,
"download_model_images", "download_model_images_with_tracking",
staticmethod(fake_download_model_images), staticmethod(fake_download_model_images),
) )
monkeypatch.setattr(download_module, "get_downloader", fake_get_downloader) monkeypatch.setattr(download_module, "get_downloader", fake_get_downloader)
@@ -280,7 +288,7 @@ async def test_legacy_folder_migrated_and_skipped(
async def fake_download_model_images(*_args, **_kwargs): async def fake_download_model_images(*_args, **_kwargs):
nonlocal download_called nonlocal download_called
download_called = True download_called = True
return True, False return True, False, []
async def fake_get_downloader(): async def fake_get_downloader():
class _Downloader: class _Downloader:
@@ -296,7 +304,7 @@ async def test_legacy_folder_migrated_and_skipped(
) )
monkeypatch.setattr( monkeypatch.setattr(
download_module.ExampleImagesProcessor, download_module.ExampleImagesProcessor,
"download_model_images", "download_model_images_with_tracking",
staticmethod(fake_download_model_images), staticmethod(fake_download_model_images),
) )
monkeypatch.setattr(download_module, "get_downloader", fake_get_downloader) monkeypatch.setattr(download_module, "get_downloader", fake_get_downloader)
@@ -375,7 +383,7 @@ async def test_legacy_progress_file_migrates(
) )
monkeypatch.setattr( monkeypatch.setattr(
download_module.ExampleImagesProcessor, download_module.ExampleImagesProcessor,
"download_model_images", "download_model_images_with_tracking",
staticmethod(fake_download_model_images), staticmethod(fake_download_model_images),
) )
monkeypatch.setattr(download_module, "get_downloader", fake_get_downloader) monkeypatch.setattr(download_module, "get_downloader", fake_get_downloader)
@@ -475,6 +483,124 @@ async def test_download_remains_in_initial_library(
assert not (library_b_root / ".download_progress.json").exists() assert not (library_b_root / ".download_progress.json").exists()
assert not (library_b_root / model_hash).exists() assert not (library_b_root / model_hash).exists()
@pytest.mark.usefixtures("tmp_path")
async def test_not_found_example_images_are_cleaned(
monkeypatch: pytest.MonkeyPatch,
tmp_path,
settings_manager,
):
ws_manager = RecordingWebSocketManager()
manager = download_module.DownloadManager(ws_manager=ws_manager)
images_root = tmp_path / "examples"
monkeypatch.setitem(settings_manager.settings, "example_images_path", str(images_root))
model_hash = "f" * 64
model_path = tmp_path / "missing-model.safetensors"
model_path.write_text("data", encoding="utf-8")
missing_url = "https://example.com/missing.png"
valid_url = "https://example.com/valid.png"
model_metadata = {
"sha256": model_hash,
"model_name": "Missing Example",
"file_path": str(model_path),
"file_name": "missing-model.safetensors",
"civitai": {
"images": [
{"url": missing_url},
{"url": valid_url},
]
},
}
scanner = StubScanner([model_metadata.copy()])
_patch_scanner(monkeypatch, scanner)
model_dir = images_root / model_hash
model_dir.mkdir(parents=True, exist_ok=True)
(model_dir / "image_0.png").write_bytes(b"first")
(model_dir / "image_1.png").write_bytes(b"second")
async def fake_process_local_examples(*_args, **_kwargs):
return False
refresh_calls: list[str] = []
async def fake_refresh(model_hash_arg, *_args, **_kwargs):
refresh_calls.append(model_hash_arg)
return True
async def fake_get_updated_model(model_hash_arg, _scanner):
assert model_hash_arg == model_hash
return model_metadata
async def fake_save_metadata(_path, metadata):
model_metadata.update(metadata)
return True
class DownloaderStub:
def __init__(self):
self.calls: list[str] = []
async def download_to_memory(self, url, *_args, **_kwargs):
self.calls.append(url)
if url == missing_url:
return False, "File not found", None
return True, b"\x89PNG\r\n\x1a\n", {"content-type": "image/png"}
downloader = DownloaderStub()
async def fake_get_downloader():
return downloader
monkeypatch.setattr(
download_module.ExampleImagesProcessor,
"process_local_examples",
staticmethod(fake_process_local_examples),
)
monkeypatch.setattr(
download_module.MetadataUpdater,
"refresh_model_metadata",
staticmethod(fake_refresh),
)
monkeypatch.setattr(
download_module.MetadataUpdater,
"get_updated_model",
staticmethod(fake_get_updated_model),
)
monkeypatch.setattr(
download_module.MetadataManager,
"save_metadata",
staticmethod(fake_save_metadata),
)
monkeypatch.setattr(download_module, "get_downloader", fake_get_downloader)
monkeypatch.setattr(download_module, "_model_directory_has_files", lambda _path: False)
result = await manager.start_download({"model_types": ["lora"], "delay": 0, "optimize": False})
assert result["success"] is True
if manager._download_task is not None:
await asyncio.wait_for(manager._download_task, timeout=1)
assert refresh_calls == [model_hash]
assert missing_url in downloader.calls
assert manager._progress["failed_models"] == {model_hash}
assert model_hash in manager._progress["processed_models"]
remaining_images = model_metadata["civitai"]["images"]
assert remaining_images == [
{"url": missing_url, "downloadFailed": True, "downloadError": "not_found"},
{"url": valid_url},
]
files = sorted(p.name for p in model_dir.iterdir())
assert files == ["image_0.png", "image_1.png"]
assert (model_dir / "image_0.png").read_bytes() == b"first"
assert (model_dir / "image_1.png").read_bytes() == b"second"
@pytest.fixture @pytest.fixture
def settings_manager(): def settings_manager():
return get_settings_manager() return get_settings_manager()