fix(network): add offline cooldown guard for remote metadata requests

This commit is contained in:
pixelpaws
2026-04-20 15:04:04 +08:00
parent 0ced53c059
commit 5a7f4dc88b
8 changed files with 364 additions and 14 deletions

View File

@@ -3,6 +3,11 @@ import copy
import logging
import os
from typing import Any, Optional, Dict, Tuple, List, Sequence
from .connectivity_guard import (
OFFLINE_FRIENDLY_MESSAGE,
is_expected_offline_error,
is_offline_cooldown_error,
)
from .model_metadata_provider import (
CivitaiModelMetadataProvider,
ModelMetadataProviderManager,
@@ -65,6 +70,8 @@ class CivitaiClient:
if result.provider is None:
result.provider = "civitai_api"
raise result
if not success and is_offline_cooldown_error(result):
return False, OFFLINE_FRIENDLY_MESSAGE
return success, result
@staticmethod
@@ -124,6 +131,8 @@ class CivitaiClient:
)
if not success:
message = str(version)
if is_expected_offline_error(message):
return None, OFFLINE_FRIENDLY_MESSAGE
if "not found" in message.lower():
return None, "Model not found"
@@ -164,6 +173,9 @@ class CivitaiClient:
return True
return False
except Exception as e:
if is_expected_offline_error(str(e)):
logger.debug("Preview download skipped due to offline state.")
return False
logger.error(f"Download Error: {str(e)}")
return False
@@ -207,6 +219,9 @@ class CivitaiClient:
message = self._extract_error_message(result)
if message and "not found" in message.lower():
raise ResourceNotFoundError(f"Resource not found for model {model_id}")
if is_expected_offline_error(message):
logger.info("Civitai request skipped: %s", OFFLINE_FRIENDLY_MESSAGE)
return None
if message:
raise RuntimeError(message)
return None
@@ -357,6 +372,8 @@ class CivitaiClient:
)
if success:
return data
if is_expected_offline_error(data):
return None
logger.warning(f"Failed to fetch model data for model {model_id}")
return None
@@ -371,6 +388,8 @@ class CivitaiClient:
)
if success:
return version
if is_expected_offline_error(version):
return None
logger.warning(f"Failed to fetch version by id {version_id}")
return None
@@ -386,6 +405,8 @@ class CivitaiClient:
)
if success:
return version
if is_expected_offline_error(version):
return None
logger.warning(f"Failed to fetch version by hash {model_hash}")
return None
@@ -473,6 +494,8 @@ class CivitaiClient:
return result, None
# Handle specific error cases
if is_expected_offline_error(result):
return None, OFFLINE_FRIENDLY_MESSAGE
if "not found" in str(result):
error_msg = f"Model not found"
logger.warning(f"Model version not found: {version_id} - {error_msg}")
@@ -507,6 +530,8 @@ class CivitaiClient:
success, result = await self._make_request("GET", url, use_auth=True)
if not success:
if is_expected_offline_error(result):
return None
logger.error(
"Failed to fetch image info for ID %s from civitai.red: %s",
image_id,
@@ -566,6 +591,9 @@ class CivitaiClient:
)
if not success:
if is_expected_offline_error(result):
logger.info("User model fetch skipped: %s", OFFLINE_FRIENDLY_MESSAGE)
return None
logger.error("Failed to fetch models for %s: %s", username, result)
return None

View File

@@ -0,0 +1,147 @@
"""In-memory connectivity guard to suppress repeated network retries when offline."""
from __future__ import annotations
import asyncio
import errno
import logging
import socket
from datetime import datetime, timedelta
from typing import Any
import aiohttp
logger = logging.getLogger(__name__)
OFFLINE_COOLDOWN_ERROR = "offline_cooldown"
OFFLINE_FRIENDLY_MESSAGE = "Network offline, will retry automatically later"
def is_offline_cooldown_error(value: Any) -> bool:
"""Return True when a response payload represents guard short-circuit."""
return isinstance(value, str) and value == OFFLINE_COOLDOWN_ERROR
def is_expected_offline_error(value: Any) -> bool:
"""Return True when payload is an expected offline-related result."""
if is_offline_cooldown_error(value):
return True
if not isinstance(value, str):
return False
normalized = value.lower()
return "network offline" in normalized or "offline" in normalized
class ConnectivityGuard:
"""Tracks network failures and gates outbound requests during cooldown."""
_instance: "ConnectivityGuard | None" = None
_instance_lock = asyncio.Lock()
@classmethod
async def get_instance(cls) -> "ConnectivityGuard":
async with cls._instance_lock:
if cls._instance is None:
cls._instance = cls()
return cls._instance
def __init__(self) -> None:
if hasattr(self, "_initialized"):
return
self._initialized = True
self.online = True
self.failure_count = 0
self.cooldown_until: datetime | None = None
self.base_backoff_seconds = 30
self.max_backoff_seconds = 300
self.failure_threshold = 3
def _now(self) -> datetime:
return datetime.now()
def in_cooldown(self) -> bool:
if self.cooldown_until is None:
return False
return self._now() < self.cooldown_until
def cooldown_remaining_seconds(self) -> float:
if self.cooldown_until is None:
return 0.0
return max(0.0, (self.cooldown_until - self._now()).total_seconds())
def should_block_request(self) -> bool:
return self.in_cooldown()
def register_success(self) -> None:
was_offline = (not self.online) or self.cooldown_until is not None
self.online = True
self.failure_count = 0
self.cooldown_until = None
if was_offline:
logger.info("Connectivity restored; requests resumed.")
def register_network_failure(self, exc: Exception) -> None:
self.online = False
self.failure_count += 1
if self.failure_count < self.failure_threshold:
logger.debug(
"Network failure tracked (%d/%d): %s",
self.failure_count,
self.failure_threshold,
exc,
)
return
retry_step = self.failure_count - self.failure_threshold
backoff = min(
self.max_backoff_seconds,
self.base_backoff_seconds * (2**retry_step),
)
should_log_warning = not self.in_cooldown()
self.cooldown_until = self._now() + timedelta(seconds=backoff)
if should_log_warning:
logger.warning(
"Connectivity offline; enter cooldown for %ss after %d network failures.",
int(backoff),
self.failure_count,
)
else:
logger.debug(
"Cooldown still active; failure_count=%d, backoff=%ss.",
self.failure_count,
int(backoff),
)
@staticmethod
def is_network_unreachable_error(exc: Exception) -> bool:
"""Return whether the exception should count as connectivity failure."""
if isinstance(exc, asyncio.CancelledError):
return False
if isinstance(
exc,
(
asyncio.TimeoutError,
TimeoutError,
ConnectionRefusedError,
socket.gaierror,
aiohttp.ServerTimeoutError,
aiohttp.ConnectionTimeoutError,
aiohttp.ClientConnectorError,
aiohttp.ClientConnectionError,
),
):
return True
if isinstance(exc, OSError) and exc.errno in {
errno.ENETUNREACH,
errno.EHOSTUNREACH,
errno.ETIMEDOUT,
errno.ECONNREFUSED,
}:
return True
return False

View File

@@ -20,6 +20,10 @@ from datetime import datetime, timedelta
from email.utils import parsedate_to_datetime
from typing import Optional, Dict, Tuple, Callable, Union, Awaitable
from ..services.settings_manager import get_settings_manager
from .connectivity_guard import (
OFFLINE_COOLDOWN_ERROR,
ConnectivityGuard,
)
from .errors import RateLimitError
logger = logging.getLogger(__name__)
@@ -797,6 +801,10 @@ class Downloader:
Returns:
Tuple[bool, Union[bytes, str], Optional[Dict]]: (success, content or error message, response headers if requested)
"""
guard = await ConnectivityGuard.get_instance()
if guard.should_block_request():
return False, OFFLINE_COOLDOWN_ERROR, None
try:
session = await self.session
# Debug log for proxy mode at request time
@@ -819,6 +827,7 @@ class Downloader:
) as response:
if response.status == 200:
content = await response.read()
guard.register_success()
if return_headers:
return True, content, dict(response.headers)
else:
@@ -837,6 +846,12 @@ class Downloader:
return False, error_msg, None
except Exception as e:
if guard.is_network_unreachable_error(e):
guard.register_network_failure(e)
if guard.should_block_request():
return False, OFFLINE_COOLDOWN_ERROR, None
logger.debug("Network unavailable during memory download: %s", e)
return False, str(e), None
logger.error(f"Error downloading to memory from {url}: {e}")
return False, str(e), None
@@ -857,6 +872,10 @@ class Downloader:
Returns:
Tuple[bool, Union[Dict, str]]: (success, headers dict or error message)
"""
guard = await ConnectivityGuard.get_instance()
if guard.should_block_request():
return False, OFFLINE_COOLDOWN_ERROR
try:
session = await self.session
# Debug log for proxy mode at request time
@@ -878,11 +897,18 @@ class Downloader:
url, headers=headers, proxy=self.proxy_url
) as response:
if response.status == 200:
guard.register_success()
return True, dict(response.headers)
else:
return False, f"Head request failed with status {response.status}"
except Exception as e:
if guard.is_network_unreachable_error(e):
guard.register_network_failure(e)
if guard.should_block_request():
return False, OFFLINE_COOLDOWN_ERROR
logger.debug("Network unavailable during header probe: %s", e)
return False, str(e)
logger.error(f"Error getting headers from {url}: {e}")
return False, str(e)
@@ -907,6 +933,10 @@ class Downloader:
Returns:
Tuple[bool, Union[Dict, str]]: (success, response data or error message)
"""
guard = await ConnectivityGuard.get_instance()
if guard.should_block_request():
return False, OFFLINE_COOLDOWN_ERROR
try:
session = await self.session
# Debug log for proxy mode at request time
@@ -930,6 +960,7 @@ class Downloader:
method, url, headers=headers, **kwargs
) as response:
if response.status == 200:
guard.register_success()
# Try to parse as JSON, fall back to text
try:
data = await response.json()
@@ -960,6 +991,12 @@ class Downloader:
return False, f"Request failed with status {response.status}"
except Exception as e:
if guard.is_network_unreachable_error(e):
guard.register_network_failure(e)
if guard.should_block_request():
return False, OFFLINE_COOLDOWN_ERROR
logger.debug("Network unavailable for %s %s: %s", method, url, e)
return False, str(e)
logger.error(f"Error making {method} request to {url}: {e}")
return False, str(e)

View File

@@ -11,6 +11,7 @@ from typing import Any, Awaitable, Callable, Dict, Iterable, Optional
from ..services.settings_manager import SettingsManager
from ..utils.civitai_utils import resolve_license_payload
from ..utils.model_utils import determine_base_model
from .connectivity_guard import OFFLINE_FRIENDLY_MESSAGE, is_expected_offline_error
from .errors import RateLimitError
logger = logging.getLogger(__name__)
@@ -274,11 +275,18 @@ class MetadataSyncService:
else "No provider returned metadata"
)
resolved_error = last_error or default_error
if is_expected_offline_error(resolved_error):
resolved_error = OFFLINE_FRIENDLY_MESSAGE
error_msg = (
f"Error fetching metadata: {last_error or default_error} "
f"Error fetching metadata: {resolved_error} "
f"(model_name={model_data.get('model_name', '')})"
)
logger.error(error_msg)
if is_expected_offline_error(resolved_error):
logger.info(error_msg)
else:
logger.error(error_msg)
return False, error_msg
model_data["from_civitai"] = True
@@ -347,6 +355,9 @@ class MetadataSyncService:
return False, error_msg
except Exception as exc: # pragma: no cover - error path
error_msg = f"Error fetching metadata: {exc}"
if is_expected_offline_error(str(exc)):
logger.info(OFFLINE_FRIENDLY_MESSAGE)
return False, OFFLINE_FRIENDLY_MESSAGE
logger.error(error_msg, exc_info=True)
return False, error_msg