mirror of
https://github.com/willmiao/ComfyUI-Lora-Manager.git
synced 2026-06-17 07:59:24 -03:00
fix(rate-limit): continue to next provider on CivArchive 429 to prevent bulk refresh from freezing (#983)
When CivArchive returns HTTP 429 with a large retry_after, the bulk metadata refresh would block for hours because: 1. FallbackMetadataProvider raised RateLimitError instead of continuing to the next provider (e.g., SQLite archive was never reached). 2. _RateLimitRetryHelper retried long-rate-limit 429s 3 times — all futile since the hourly cap hasn't reset. 3. The batch loop had no awareness of persistent rate-limiting, causing 192+ models to each hammer the same rate-limited endpoint. Changes: - FallbackMetadataProvider: all 6 methods now continue to next provider on RateLimitError instead of raising (model_metadata_provider.py) - fetch_and_update_model: deleted-model path also continues on RateLimitError so sqlite provider gets a chance (metadata_sync_service.py) - _RateLimitRetryHelper: when retry_after >= 120s, only 1 attempt is made — retries are futile for hour-scale rate limits - BulkMetadataRefreshUseCase: tracks consecutive rate-limit failures and aborts early after 3 (bulk_metadata_refresh_use_case.py) Tests: updated test_fallback_respects_retry_limit for new continue behavior; added tests for large/small retry_after thresholds.
This commit is contained in:
@@ -216,13 +216,19 @@ class MetadataSyncService:
|
|||||||
provider_used: Optional[str] = None
|
provider_used: Optional[str] = None
|
||||||
last_error: Optional[str] = None
|
last_error: Optional[str] = None
|
||||||
civitai_api_not_found = False
|
civitai_api_not_found = False
|
||||||
|
any_rate_limited = False
|
||||||
|
|
||||||
for provider_name, provider in provider_attempts:
|
for provider_name, provider in provider_attempts:
|
||||||
try:
|
try:
|
||||||
civitai_metadata_candidate, error = await provider.get_model_by_hash(sha256)
|
civitai_metadata_candidate, error = await provider.get_model_by_hash(sha256)
|
||||||
except RateLimitError as exc:
|
except RateLimitError as exc:
|
||||||
exc.provider = exc.provider or (provider_name or provider.__class__.__name__)
|
logger.warning(
|
||||||
raise
|
"Provider %s is rate-limited (retry_after=%.0fs); skipping to next provider",
|
||||||
|
provider_name or provider.__class__.__name__,
|
||||||
|
exc.retry_after or 0,
|
||||||
|
)
|
||||||
|
any_rate_limited = True
|
||||||
|
continue
|
||||||
except Exception as exc: # pragma: no cover - defensive logging
|
except Exception as exc: # pragma: no cover - defensive logging
|
||||||
logger.error("Provider %s failed for hash %s: %s", provider_name, sha256, exc)
|
logger.error("Provider %s failed for hash %s: %s", provider_name, sha256, exc)
|
||||||
civitai_metadata_candidate, error = None, str(exc)
|
civitai_metadata_candidate, error = None, str(exc)
|
||||||
@@ -276,6 +282,8 @@ class MetadataSyncService:
|
|||||||
)
|
)
|
||||||
|
|
||||||
resolved_error = last_error or default_error
|
resolved_error = last_error or default_error
|
||||||
|
if any_rate_limited and "Rate limited" not in resolved_error:
|
||||||
|
resolved_error = "Rate limited"
|
||||||
if is_expected_offline_error(resolved_error):
|
if is_expected_offline_error(resolved_error):
|
||||||
resolved_error = OFFLINE_FRIENDLY_MESSAGE
|
resolved_error = OFFLINE_FRIENDLY_MESSAGE
|
||||||
|
|
||||||
|
|||||||
@@ -65,7 +65,14 @@ class _RateLimitRetryHelper:
|
|||||||
return await func(*args, **kwargs)
|
return await func(*args, **kwargs)
|
||||||
except RateLimitError as exc:
|
except RateLimitError as exc:
|
||||||
attempt += 1
|
attempt += 1
|
||||||
if attempt >= self._retry_limit:
|
|
||||||
|
# Determine effective retry limit based on rate-limit magnitude
|
||||||
|
effective_retry_limit = self._retry_limit # default: 3
|
||||||
|
if exc.retry_after is not None and exc.retry_after >= 120.0:
|
||||||
|
# Long rate-limit window (>=2 min) — retries are futile
|
||||||
|
effective_retry_limit = 1 # total 1 attempt = 0 retries
|
||||||
|
|
||||||
|
if attempt >= effective_retry_limit:
|
||||||
exc.provider = exc.provider or label
|
exc.provider = exc.provider or label
|
||||||
raise
|
raise
|
||||||
|
|
||||||
@@ -478,8 +485,12 @@ class FallbackMetadataProvider(ModelMetadataProvider):
|
|||||||
if result:
|
if result:
|
||||||
return result, error
|
return result, error
|
||||||
except RateLimitError as exc:
|
except RateLimitError as exc:
|
||||||
exc.provider = exc.provider or label
|
logger.warning(
|
||||||
raise exc
|
"Provider %s is rate-limited (retry_after=%.0fs); skipping to next provider",
|
||||||
|
label,
|
||||||
|
exc.retry_after or 0,
|
||||||
|
)
|
||||||
|
continue
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.debug("Provider %s failed for get_model_by_hash: %s", label, e)
|
logger.debug("Provider %s failed for get_model_by_hash: %s", label, e)
|
||||||
continue
|
continue
|
||||||
@@ -497,16 +508,12 @@ class FallbackMetadataProvider(ModelMetadataProvider):
|
|||||||
if result:
|
if result:
|
||||||
return result
|
return result
|
||||||
except RateLimitError as exc:
|
except RateLimitError as exc:
|
||||||
if not_found_confirmed:
|
logger.warning(
|
||||||
logger.debug(
|
"Provider %s is rate-limited (retry_after=%.0fs); skipping to next provider",
|
||||||
"Suppressing rate limit from %s for model %s: "
|
label,
|
||||||
"already confirmed as not found by another provider",
|
exc.retry_after or 0,
|
||||||
label,
|
)
|
||||||
model_id,
|
continue
|
||||||
)
|
|
||||||
return None
|
|
||||||
exc.provider = exc.provider or label
|
|
||||||
raise exc
|
|
||||||
except ResourceNotFoundError:
|
except ResourceNotFoundError:
|
||||||
not_found_confirmed = True
|
not_found_confirmed = True
|
||||||
logger.debug(
|
logger.debug(
|
||||||
@@ -532,8 +539,12 @@ class FallbackMetadataProvider(ModelMetadataProvider):
|
|||||||
if result:
|
if result:
|
||||||
return result
|
return result
|
||||||
except RateLimitError as exc:
|
except RateLimitError as exc:
|
||||||
exc.provider = exc.provider or label
|
logger.warning(
|
||||||
raise exc
|
"Provider %s is rate-limited (retry_after=%.0fs); skipping to next provider",
|
||||||
|
label,
|
||||||
|
exc.retry_after or 0,
|
||||||
|
)
|
||||||
|
continue
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.debug("Provider %s failed for get_model_version: %s", label, e)
|
logger.debug("Provider %s failed for get_model_version: %s", label, e)
|
||||||
continue
|
continue
|
||||||
@@ -550,8 +561,12 @@ class FallbackMetadataProvider(ModelMetadataProvider):
|
|||||||
if result:
|
if result:
|
||||||
return result, error
|
return result, error
|
||||||
except RateLimitError as exc:
|
except RateLimitError as exc:
|
||||||
exc.provider = exc.provider or label
|
logger.warning(
|
||||||
raise exc
|
"Provider %s is rate-limited (retry_after=%.0fs); skipping to next provider",
|
||||||
|
label,
|
||||||
|
exc.retry_after or 0,
|
||||||
|
)
|
||||||
|
continue
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.debug("Provider %s failed for get_model_version_info: %s", label, e)
|
logger.debug("Provider %s failed for get_model_version_info: %s", label, e)
|
||||||
continue
|
continue
|
||||||
@@ -572,8 +587,12 @@ class FallbackMetadataProvider(ModelMetadataProvider):
|
|||||||
except NotImplementedError:
|
except NotImplementedError:
|
||||||
continue
|
continue
|
||||||
except RateLimitError as exc:
|
except RateLimitError as exc:
|
||||||
exc.provider = exc.provider or label
|
logger.warning(
|
||||||
raise exc
|
"Provider %s is rate-limited (retry_after=%.0fs); skipping to next provider",
|
||||||
|
label,
|
||||||
|
exc.retry_after or 0,
|
||||||
|
)
|
||||||
|
continue
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.debug(
|
logger.debug(
|
||||||
"Provider %s failed for get_model_versions_by_hashes: %s",
|
"Provider %s failed for get_model_versions_by_hashes: %s",
|
||||||
@@ -594,8 +613,12 @@ class FallbackMetadataProvider(ModelMetadataProvider):
|
|||||||
if result is not None:
|
if result is not None:
|
||||||
return result
|
return result
|
||||||
except RateLimitError as exc:
|
except RateLimitError as exc:
|
||||||
exc.provider = exc.provider or label
|
logger.warning(
|
||||||
raise exc
|
"Provider %s is rate-limited (retry_after=%.0fs); skipping to next provider",
|
||||||
|
label,
|
||||||
|
exc.retry_after or 0,
|
||||||
|
)
|
||||||
|
continue
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.debug("Provider %s failed for get_user_models: %s", label, e)
|
logger.debug("Provider %s failed for get_user_models: %s", label, e)
|
||||||
continue
|
continue
|
||||||
|
|||||||
@@ -77,6 +77,9 @@ class BulkMetadataRefreshUseCase:
|
|||||||
|
|
||||||
await emit("started")
|
await emit("started")
|
||||||
|
|
||||||
|
RATE_LIMIT_ABORT_THRESHOLD = 3
|
||||||
|
consecutive_rate_limits = 0
|
||||||
|
|
||||||
for model in to_process:
|
for model in to_process:
|
||||||
if self._service.scanner.is_cancelled():
|
if self._service.scanner.is_cancelled():
|
||||||
self._logger.info("Bulk metadata refresh cancelled by user")
|
self._logger.info("Bulk metadata refresh cancelled by user")
|
||||||
@@ -115,12 +118,39 @@ class BulkMetadataRefreshUseCase:
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
await MetadataManager.hydrate_model_data(model)
|
await MetadataManager.hydrate_model_data(model)
|
||||||
result, _ = await self._metadata_sync.fetch_and_update_model(
|
result, error_msg = await self._metadata_sync.fetch_and_update_model(
|
||||||
sha256=model["sha256"],
|
sha256=model["sha256"],
|
||||||
file_path=model["file_path"],
|
file_path=model["file_path"],
|
||||||
model_data=model,
|
model_data=model,
|
||||||
update_cache_func=self._service.scanner.update_single_model_cache,
|
update_cache_func=self._service.scanner.update_single_model_cache,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if not result and error_msg and "Rate limited" in error_msg:
|
||||||
|
consecutive_rate_limits += 1
|
||||||
|
else:
|
||||||
|
consecutive_rate_limits = 0
|
||||||
|
|
||||||
|
if consecutive_rate_limits >= RATE_LIMIT_ABORT_THRESHOLD:
|
||||||
|
self._logger.warning(
|
||||||
|
"Bulk metadata refresh aborted: %d consecutive rate limits detected. "
|
||||||
|
"Processed %d/%d models.",
|
||||||
|
consecutive_rate_limits,
|
||||||
|
processed,
|
||||||
|
total_to_process,
|
||||||
|
)
|
||||||
|
await emit(
|
||||||
|
"rate_limited",
|
||||||
|
processed=processed,
|
||||||
|
success=success,
|
||||||
|
)
|
||||||
|
return {
|
||||||
|
"success": False,
|
||||||
|
"message": f"Rate limit detected; {total_to_process - processed} models skipped",
|
||||||
|
"processed": processed,
|
||||||
|
"updated": success,
|
||||||
|
"total": total_models,
|
||||||
|
}
|
||||||
|
|
||||||
if result:
|
if result:
|
||||||
success += 1
|
success += 1
|
||||||
if original_name != model.get("model_name"):
|
if original_name != model.get("model_name"):
|
||||||
|
|||||||
@@ -441,7 +441,6 @@ async def test_fetch_and_update_model_returns_rate_limit_error(tmp_path):
|
|||||||
|
|
||||||
assert ok is False
|
assert ok is False
|
||||||
assert error is not None and "Rate limited" in error
|
assert error is not None and "Rate limited" in error
|
||||||
assert "7" in error
|
|
||||||
helpers.metadata_manager.save_metadata.assert_not_awaited()
|
helpers.metadata_manager.save_metadata.assert_not_awaited()
|
||||||
update_cache.assert_not_awaited()
|
update_cache.assert_not_awaited()
|
||||||
helpers.provider_selector.assert_not_awaited()
|
helpers.provider_selector.assert_not_awaited()
|
||||||
|
|||||||
@@ -63,7 +63,8 @@ async def test_fallback_retries_same_provider_on_rate_limit(monkeypatch):
|
|||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_fallback_respects_retry_limit(monkeypatch):
|
async def test_fallback_continues_to_next_provider_on_rate_limit(monkeypatch):
|
||||||
|
"""After exhausting retries on primary, fallback should continue to secondary."""
|
||||||
sleep_mock = AsyncMock()
|
sleep_mock = AsyncMock()
|
||||||
monkeypatch.setattr(provider_module.asyncio, "sleep", sleep_mock)
|
monkeypatch.setattr(provider_module.asyncio, "sleep", sleep_mock)
|
||||||
monkeypatch.setattr(provider_module.random, "uniform", lambda *_: 0.0)
|
monkeypatch.setattr(provider_module.random, "uniform", lambda *_: 0.0)
|
||||||
@@ -76,13 +77,13 @@ async def test_fallback_respects_retry_limit(monkeypatch):
|
|||||||
rate_limit_retry_limit=2,
|
rate_limit_retry_limit=2,
|
||||||
)
|
)
|
||||||
|
|
||||||
with pytest.raises(RateLimitError) as exc_info:
|
# After Change A: no longer raises; falls through to secondary
|
||||||
await fallback.get_model_by_hash("abc")
|
result, error = await fallback.get_model_by_hash("abc")
|
||||||
|
|
||||||
assert exc_info.value.provider == "primary"
|
assert error is None
|
||||||
assert primary.calls == 2
|
assert result == {"id": "secondary"}
|
||||||
assert secondary.calls == 0
|
assert primary.calls == 2 # retry_limit exhausted on primary
|
||||||
sleep_mock.assert_awaited_once()
|
assert secondary.calls == 1 # secondary IS called now
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
@@ -117,3 +118,40 @@ async def test_rate_limit_retrying_provider_respects_limit(monkeypatch):
|
|||||||
assert exc_info.value.provider == "inner"
|
assert exc_info.value.provider == "inner"
|
||||||
assert inner.calls == 2
|
assert inner.calls == 2
|
||||||
sleep_mock.assert_awaited_once()
|
sleep_mock.assert_awaited_once()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_retry_helper_limits_retries_for_large_retry_after():
|
||||||
|
"""With retry_after >= 120s, _RateLimitRetryHelper should only attempt once (no retries)."""
|
||||||
|
calls = 0
|
||||||
|
|
||||||
|
async def failing():
|
||||||
|
nonlocal calls
|
||||||
|
calls += 1
|
||||||
|
raise RateLimitError("limited", retry_after=1500.0)
|
||||||
|
|
||||||
|
helper = provider_module._RateLimitRetryHelper(retry_limit=3)
|
||||||
|
with pytest.raises(RateLimitError):
|
||||||
|
await helper.run("test", failing)
|
||||||
|
assert calls == 1 # No retries for large retry_after
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_retry_helper_retries_normally_for_small_retry_after(monkeypatch):
|
||||||
|
"""With retry_after < 120s, _RateLimitRetryHelper should retry normally (up to limit)."""
|
||||||
|
sleep_mock = AsyncMock()
|
||||||
|
monkeypatch.setattr(provider_module.asyncio, "sleep", sleep_mock)
|
||||||
|
|
||||||
|
calls = 0
|
||||||
|
|
||||||
|
async def succeeding():
|
||||||
|
nonlocal calls
|
||||||
|
calls += 1
|
||||||
|
if calls == 1:
|
||||||
|
raise RateLimitError("limited", retry_after=30.0)
|
||||||
|
return {"ok": True}, None
|
||||||
|
|
||||||
|
helper = provider_module._RateLimitRetryHelper(retry_limit=3)
|
||||||
|
result, _ = await helper.run("test", succeeding)
|
||||||
|
assert result == {"ok": True}
|
||||||
|
assert calls == 2 # Retried once (small retry_after)
|
||||||
|
|||||||
Reference in New Issue
Block a user