fix: Reprocess example images with missing folders, improve error handling, and add new tests. Fixes #760

This commit is contained in:
Will Miao
2026-01-08 00:25:36 +08:00
parent eb30595d23
commit fa063ba1ce
3 changed files with 215 additions and 90 deletions

View File

@@ -71,6 +71,7 @@ class _DownloadProgress(dict):
processed_models=set(), processed_models=set(),
refreshed_models=set(), refreshed_models=set(),
failed_models=set(), failed_models=set(),
reprocessed_models=set(),
) )
def snapshot(self) -> dict: def snapshot(self) -> dict:
@@ -80,6 +81,7 @@ class _DownloadProgress(dict):
snapshot['processed_models'] = list(self['processed_models']) snapshot['processed_models'] = list(self['processed_models'])
snapshot['refreshed_models'] = list(self['refreshed_models']) snapshot['refreshed_models'] = list(self['refreshed_models'])
snapshot['failed_models'] = list(self['failed_models']) snapshot['failed_models'] = list(self['failed_models'])
snapshot['reprocessed_models'] = list(self.get('reprocessed_models', set()))
return snapshot return snapshot
@@ -404,6 +406,13 @@ class DownloadManager:
self._progress['total'], self._progress['total'],
) )
reprocessed = self._progress.get('reprocessed_models', set())
if reprocessed:
logger.info(
"Detected %s models with missing or empty example image folders; reprocessing triggered for those models",
len(reprocessed),
)
await self._broadcast_progress(status=final_status) await self._broadcast_progress(status=final_status)
except Exception as e: except Exception as e:
@@ -472,7 +481,14 @@ class DownloadManager:
if existing_files: if existing_files:
logger.debug(f"Skipping already processed model: {model_name}") logger.debug(f"Skipping already processed model: {model_name}")
return False return False
logger.info(f"Model {model_name} marked as processed but folder empty or missing, reprocessing")
logger.debug(
"Model %s (%s) marked as processed but folder empty or missing, reprocessing triggered",
model_name,
model_hash,
)
# Track that we are reprocessing this model for summary logging
self._progress['reprocessed_models'].add(model_hash)
# Remove from processed models since we need to reprocess # Remove from processed models since we need to reprocess
self._progress['processed_models'].discard(model_hash) self._progress['processed_models'].discard(model_hash)
@@ -584,11 +600,13 @@ class DownloadManager:
return False # Default return if no conditions met return False # Default return if no conditions met
except Exception as e: except Exception as e:
error_msg = f"Error processing model {model.get('model_name')}: {str(e)}" error_msg = f"Error processing model {model.get('model_name')} ({model_hash}): {str(e)}"
logger.error(error_msg, exc_info=True) logger.error(error_msg, exc_info=True)
self._progress['errors'].append(error_msg) self._progress['errors'].append(error_msg)
self._progress['last_error'] = error_msg self._progress['last_error'] = error_msg
return False # Return False on exception # Ensure model is marked as failed so we don't try again in this run
self._progress['failed_models'].add(model_hash)
return False
def _save_progress(self, output_dir): def _save_progress(self, output_dir):
"""Save download progress to file.""" """Save download progress to file."""

View File

@@ -753,11 +753,12 @@ export class ExampleImagesManager {
// Clear any existing interval // Clear any existing interval
this.clearAutoDownload(); this.clearAutoDownload();
// Wait at least 30 seconds after page initialization before first check // Wait at least 30 seconds after page initialization before first check, plus random jitter
const timeSinceInit = Date.now() - this.pageInitTime; const timeSinceInit = Date.now() - this.pageInitTime;
const initialDelay = Math.max(60000 - timeSinceInit, 5000); // At least 5 seconds, up to 60 seconds const jitter = Math.floor(Math.random() * 30000); // 0-30 seconds jitter to prevent thundering herd
const initialDelay = Math.max(60000 - timeSinceInit, 5000) + jitter;
console.log(`Setting up auto download with initial delay of ${initialDelay}ms`); console.log(`Setting up auto download with initial delay of ${initialDelay}ms (including ${jitter}ms jitter)`);
setTimeout(() => { setTimeout(() => {
// Do initial check // Do initial check
@@ -835,6 +836,11 @@ export class ExampleImagesManager {
if (!data.success) { if (!data.success) {
console.warn('Auto download check failed:', data.error); console.warn('Auto download check failed:', data.error);
// If already in progress, push back the next check to avoid hammering the API
if (data.error && data.error.includes('already in progress')) {
console.log('Download already in progress, backing off next check');
this.lastAutoDownloadCheck = now + (5 * 60 * 1000); // Back off for 5 extra minutes
}
} }
} catch (error) { } catch (error) {
console.error('Auto download check error:', error); console.error('Auto download check error:', error);

View File

@@ -0,0 +1,101 @@
import asyncio
import json
import pytest
from pathlib import Path
from py.services.settings_manager import get_settings_manager
from py.utils import example_images_download_manager as download_module
class RecordingWebSocketManager:
def __init__(self) -> None:
self.payloads: list[dict] = []
async def broadcast(self, payload: dict) -> None:
self.payloads.append(payload)
class StubScanner:
def __init__(self, models: list[dict]) -> None:
self.raw_data = models
async def get_cached_data(self):
class Cache:
def __init__(self, data): self.raw_data = data
return Cache(self.raw_data)
@pytest.mark.asyncio
async def test_reprocessing_triggered_when_folder_missing(monkeypatch, tmp_path):
# Setup paths
images_root = tmp_path / "examples"
images_root.mkdir()
settings_manager = get_settings_manager()
monkeypatch.setitem(settings_manager.settings, "example_images_path", str(images_root))
monkeypatch.setitem(settings_manager.settings, "libraries", {"default": {}})
monkeypatch.setitem(settings_manager.settings, "active_library", "default")
model_hash = "f" * 64
model_name = "Issue 760 Model"
# Create a progress file where this model is already processed
progress_file = images_root / ".download_progress.json"
progress_file.write_text(json.dumps({
"processed_models": [model_hash],
"failed_models": []
}))
# But the model folder is missing! (repro condition)
model_data = {
"sha256": model_hash,
"model_name": model_name,
"file_path": str(tmp_path / "model.safetensors"),
"file_name": "model.safetensors",
"civitai": {"images": [{"url": "https://example.com/img.png"}]}
}
scanner = StubScanner([model_data])
async def mock_get_lora_scanner():
return scanner
monkeypatch.setattr(download_module.ServiceRegistry, "get_lora_scanner", mock_get_lora_scanner)
# Mock downloader and processor to avoid actual network/file ops
async def fake_get_downloader():
class MockDownloader:
async def download_to_memory(self, *args, **kwargs):
return True, b"data", {"content-type": "image/png"}
return MockDownloader()
monkeypatch.setattr(download_module, "get_downloader", fake_get_downloader)
process_called = False
async def fake_process_local_examples(*args):
nonlocal process_called
process_called = True
return False # Fallback to remote
monkeypatch.setattr(download_module.ExampleImagesProcessor, "process_local_examples", fake_process_local_examples)
async def fake_download_model_images(*args):
# Create the directory so it's "fixed"
model_dir = args[3]
Path(model_dir).mkdir(parents=True, exist_ok=True)
(Path(model_dir) / "image_0.png").write_text("fixed")
return True, False, []
monkeypatch.setattr(download_module.ExampleImagesProcessor, "download_model_images_with_tracking", fake_download_model_images)
# Run the manager
ws_manager = RecordingWebSocketManager()
manager = download_module.DownloadManager(ws_manager=ws_manager)
result = await manager.start_download({"model_types": ["lora"], "delay": 0})
assert result["success"] is True
# Wait for completion
if manager._download_task:
await asyncio.wait_for(manager._download_task, timeout=2)
# Verify reprocessing was triggered
assert model_hash in manager._progress["reprocessed_models"]
assert model_hash in manager._progress["processed_models"] # Should be back in processed
# Verify the progress was saved (discarding reprocessed in memory, but summary logged)
saved_progress = json.loads(progress_file.read_text())
assert model_hash in saved_progress["processed_models"]