feat(metadata): add rate limit retry support to metadata providers

Add RateLimitRetryingProvider and _RateLimitRetryHelper classes to handle rate limiting with exponential backoff retries. Update get_metadata_provider function to automatically wrap providers with rate limit handling. This improves reliability when external APIs return rate limit errors by implementing automatic retries with configurable delays and jitter.
This commit is contained in:
Will Miao
2025-11-07 09:18:59 +08:00
parent c3932538e1
commit 1bb5d0b072
4 changed files with 244 additions and 43 deletions

View File

@@ -0,0 +1,62 @@
from types import SimpleNamespace
from unittest.mock import AsyncMock
import pytest
from py.services import metadata_service
from py.services.model_metadata_provider import (
FallbackMetadataProvider,
ModelMetadataProvider,
RateLimitRetryingProvider,
)
class DummyProvider(ModelMetadataProvider):
async def get_model_by_hash(self, model_hash: str):
return None, None
async def get_model_versions(self, model_id: str):
return None
async def get_model_versions_bulk(self, model_ids):
return None
async def get_model_version(self, model_id: int = None, version_id: int = None):
return None
async def get_model_version_info(self, version_id: str):
return None, None
async def get_user_models(self, username: str):
return None
@pytest.mark.asyncio
async def test_get_metadata_provider_wraps_non_fallback(monkeypatch):
provider = DummyProvider()
dummy_manager = SimpleNamespace(_get_provider=lambda _name=None: provider)
monkeypatch.setattr(
metadata_service.ModelMetadataProviderManager,
"get_instance",
AsyncMock(return_value=dummy_manager),
)
wrapped = await metadata_service.get_metadata_provider("dummy")
assert isinstance(wrapped, RateLimitRetryingProvider)
assert wrapped is not provider
@pytest.mark.asyncio
async def test_get_metadata_provider_returns_fallback_as_is(monkeypatch):
fallback = FallbackMetadataProvider([("dummy", DummyProvider())])
dummy_manager = SimpleNamespace(_get_provider=lambda _name=None: fallback)
monkeypatch.setattr(
metadata_service.ModelMetadataProviderManager,
"get_instance",
AsyncMock(return_value=dummy_manager),
)
provider = await metadata_service.get_metadata_provider()
assert provider is fallback

View File

@@ -4,7 +4,10 @@ import pytest
from py.services import model_metadata_provider as provider_module
from py.services.errors import RateLimitError
from py.services.model_metadata_provider import FallbackMetadataProvider
from py.services.model_metadata_provider import (
FallbackMetadataProvider,
RateLimitRetryingProvider,
)
class RateLimitThenSuccessProvider:
@@ -80,3 +83,37 @@ async def test_fallback_respects_retry_limit(monkeypatch):
assert primary.calls == 2
assert secondary.calls == 0
sleep_mock.assert_awaited_once()
@pytest.mark.asyncio
async def test_rate_limit_retrying_provider_retries(monkeypatch):
sleep_mock = AsyncMock()
monkeypatch.setattr(provider_module.asyncio, "sleep", sleep_mock)
monkeypatch.setattr(provider_module.random, "uniform", lambda *_: 0.0)
inner = RateLimitThenSuccessProvider()
wrapper = RateLimitRetryingProvider(inner, label="inner", rate_limit_base_delay=0.1)
result, error = await wrapper.get_model_by_hash("abc")
assert error is None
assert result == {"id": "ok"}
assert inner.calls == 2
sleep_mock.assert_awaited_once()
@pytest.mark.asyncio
async def test_rate_limit_retrying_provider_respects_limit(monkeypatch):
sleep_mock = AsyncMock()
monkeypatch.setattr(provider_module.asyncio, "sleep", sleep_mock)
monkeypatch.setattr(provider_module.random, "uniform", lambda *_: 0.0)
inner = AlwaysRateLimitedProvider()
wrapper = RateLimitRetryingProvider(inner, label="inner", rate_limit_retry_limit=2)
with pytest.raises(RateLimitError) as exc_info:
await wrapper.get_model_by_hash("abc")
assert exc_info.value.provider == "inner"
assert inner.calls == 2
sleep_mock.assert_awaited_once()