fix(api): treat transient server errors (524/5xx) as non-fatal in model updates (#935)

Teach CivitaiClient.get_model_versions() to recognise Cloudflare 524, generic
5xx, and connection-level errors as transient failures and return None
instead of raising RuntimeError, so a single upstream glitch does not
block the entire batch update or produce a scary traceback.

Also downgrade the generic except Exception log level in
ModelUpdateService._refresh_single_model() from error (with exc_info)
to warning (message only), since the full traceback is already logged
upstream in CivitaiClient.

Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent)

Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
This commit is contained in:
Will Miao
2026-05-22 07:05:06 +08:00
parent 0e51851025
commit 15dfaed462
2 changed files with 31 additions and 2 deletions

View File

@@ -201,6 +201,29 @@ class CivitaiClient:
return _from_value(payload)
@staticmethod
def _is_transient_server_error(message: str) -> bool:
"""Return True when the message indicates a transient upstream failure.
Recognises Cloudflare 524, generic 5xx, and connectivity-level flakiness
that should not be treated as a permanent failure.
"""
normalized = message.lower()
if "status 5" in normalized or "status 524" in normalized:
return True
if any(
keyword in normalized
for keyword in (
"connection refused",
"connection reset",
"temporary failure",
"name resolution",
"connection closed",
)
):
return True
return False
async def get_model_versions(self, model_id: str) -> Optional[Dict]:
"""Get all versions of a model with local availability info"""
try:
@@ -223,6 +246,13 @@ class CivitaiClient:
logger.info("Civitai request skipped: %s", OFFLINE_FRIENDLY_MESSAGE)
return None
if message:
if self._is_transient_server_error(message):
logger.info(
"Transient server error for model %s: %s",
model_id,
message,
)
return None
raise RuntimeError(message)
return None
except RateLimitError:

View File

@@ -1000,12 +1000,11 @@ class ModelUpdateService:
fallback_error_message = str(exc) or "resource not found"
mark_model_as_ignored = True
except Exception as exc: # pragma: no cover - defensive log
logger.error(
logger.warning(
"Failed to fetch versions for model %s (%s): %s",
model_id,
model_type,
exc,
exc_info=True,
)
fallback_error_message = str(exc)
if response is not None: