perf(recipe): precompute image_id_map for O(1) CivitAI image existence checks

Build a civitai_image_id → recipe_id mapping once during cache
initialization instead of scanning all recipes on every
check_image_exists and import_from_url call.

- RecipeCache gains an image_id_map field populated by
  _build_image_id_map() during cache init
- check_image_exists and import_from_url duplicate detection
  now use the precomputed map (O(k) / O(1) vs O(n))
- Map is persisted in SQLite cache_metadata for fast startup
- Incrementally updated on add/remove/bulk_remove paths
- Fix: conn.close() before cache_metadata query (dead connection)
This commit is contained in:
Will Miao
2026-06-13 08:32:03 +08:00
parent 7cd6a53447
commit bef222c77d
7 changed files with 410 additions and 30 deletions

View File

@@ -46,6 +46,7 @@ class StubRecipeScanner:
self.last_paginated_params: Dict[str, Any] | None = None
self.lora_lookup: Dict[str, List[Dict[str, Any]]] = {}
self.checkpoint_lookup: Dict[str, List[Dict[str, Any]]] = {}
self.image_id_map_override: Dict[str, str] = {}
async def _noop_get_cached_data(force_refresh: bool = False) -> None: # noqa: ARG001 - signature mirrors real scanner
return None
@@ -56,7 +57,10 @@ class StubRecipeScanner:
)
async def get_cached_data(self, force_refresh: bool = False) -> SimpleNamespace: # noqa: ARG002 - flag unused by stub
return SimpleNamespace(raw_data=list(self.cached_raw))
return SimpleNamespace(
raw_data=list(self.cached_raw),
image_id_map=dict(getattr(self, "image_id_map_override", {})),
)
async def get_paginated_data(self, **params: Any) -> Dict[str, Any]:
self.last_paginated_params = params
@@ -999,3 +1003,95 @@ async def test_batch_import_cancel_missing_id(monkeypatch, tmp_path: Path) -> No
payload = await response.json()
assert response.status == 400
assert payload["success"] is False
async def test_check_image_exists_uses_image_id_map(monkeypatch, tmp_path: Path) -> None:
"""check_image_exists must use precomputed image_id_map instead of scanning raw_data."""
async with recipe_harness(monkeypatch, tmp_path) as harness:
harness.scanner.image_id_map_override = {
"123": "recipe-alpha",
"789": "recipe-gamma",
}
response = await harness.client.get(
"/api/lm/recipes/check-image-exists",
params={"image_ids": "123,456,789"},
)
payload = await response.json()
assert response.status == 200
assert payload["success"] is True
assert payload["results"]["123"] == {
"in_library": True,
"recipe_id": "recipe-alpha",
}
assert payload["results"]["456"] == {
"in_library": False,
"recipe_id": None,
}
assert payload["results"]["789"] == {
"in_library": True,
"recipe_id": "recipe-gamma",
}
async def test_check_image_exists_handles_empty_input(monkeypatch, tmp_path: Path) -> None:
"""Empty or non-numeric image_ids must return an empty results dict."""
async with recipe_harness(monkeypatch, tmp_path) as harness:
response = await harness.client.get(
"/api/lm/recipes/check-image-exists",
params={"image_ids": ""},
)
payload = await response.json()
assert response.status == 200
assert payload["results"] == {}
async def test_import_from_url_detects_duplicate_via_image_id_map(
monkeypatch, tmp_path: Path,
) -> None:
"""import_from_url must return already_exists when image_id is in image_id_map."""
async with recipe_harness(monkeypatch, tmp_path) as harness:
harness.scanner.cached_raw = [
{"id": "existing-recipe", "title": "My Recipe"},
]
harness.scanner.image_id_map_override = {
"99999": "existing-recipe",
}
response = await harness.client.get(
"/api/lm/recipes/import-from-url",
params={"image_url": "https://civitai.com/images/99999"},
)
payload = await response.json()
assert response.status == 200
assert payload["already_exists"] is True
assert payload["recipe_id"] == "existing-recipe"
assert payload["name"] == "My Recipe"
async def test_import_from_url_proceeds_when_image_id_not_in_map(
monkeypatch, tmp_path: Path,
) -> None:
"""When image_id is absent from image_id_map, import_from_url must proceed to import."""
async with recipe_harness(monkeypatch, tmp_path) as harness:
harness.scanner.image_id_map_override = {
"111": "some-other-recipe",
}
harness.civitai.image_info["99999"] = {
"id": 99999,
"url": "https://image.civitai.com/x/y/original=true/sample.jpeg",
"type": "image",
"meta": {"prompt": "test"},
}
response = await harness.client.get(
"/api/lm/recipes/import-from-url",
params={"image_url": "https://civitai.com/images/99999"},
)
# The import may succeed or fail depending on downstream stubs,
# but it must NOT return already_exists
payload = await response.json()
assert payload.get("already_exists") is not True

View File

@@ -1015,3 +1015,85 @@ async def test_get_paginated_data_sorting(recipe_scanner):
# Test Date ASC: Gamma (5), Alpha (10), Beta (20)
res = await scanner.get_paginated_data(page=1, page_size=10, sort_by="date:asc")
assert [i["id"] for i in res["items"]] == ["C", "A", "B"]
async def test_build_image_id_map_filters_correctly(recipe_scanner):
"""Only recipes with valid CivitAI source_path appear in image_id_map.
Recipes imported from local files or with empty/missing source_path
must be naturally excluded.
"""
scanner, _ = recipe_scanner
from py.services.recipe_cache import RecipeCache
scanner._cache = RecipeCache(
raw_data=[
{"id": "r1", "source_path": "https://civitai.com/images/12345"},
{"id": "r2", "source_path": "https://civitai.com/images/67890"},
{"id": "r3", "source_path": "/home/user/local_image.png"},
{"id": "r4", "source_path": ""},
{"id": "r5"},
],
sorted_by_name=[],
sorted_by_date=[],
)
result = scanner._build_image_id_map()
assert result == {
"12345": "r1",
"67890": "r2",
}
# r3 = local file path, r4 = empty string, r5 = no key → all excluded
for rid in ("r3", "r4", "r5"):
assert rid not in result.values()
async def test_add_recipe_updates_image_id_map(recipe_scanner):
"""Adding a recipe with a CivitAI URL must update image_id_map.
A recipe with a local file path must NOT produce an entry.
"""
scanner, _ = recipe_scanner
await scanner.add_recipe({
"id": "civitai-recipe",
"title": "CivitAI",
"source_path": "https://civitai.com/images/55555",
})
cache = await scanner.get_cached_data()
assert cache.image_id_map.get("55555") == "civitai-recipe"
await scanner.add_recipe({
"id": "local-recipe",
"title": "Local",
"source_path": "/path/to/local.png",
})
assert "local-recipe" not in cache.image_id_map.values()
async def test_remove_recipe_clears_image_id_map(recipe_scanner):
"""Removing a recipe that has a CivitAI image_id must clean up the map."""
scanner, _ = recipe_scanner
await scanner.add_recipe({
"id": "recipe-a",
"title": "A",
"source_path": "https://civitai.com/images/111",
})
await scanner.add_recipe({
"id": "recipe-b",
"title": "B",
"source_path": "https://civitai.com/images/222",
})
cache = await scanner.get_cached_data()
assert "111" in cache.image_id_map
assert cache.image_id_map["222"] == "recipe-b"
await scanner.remove_recipe("recipe-a")
assert "111" not in cache.image_id_map
assert cache.image_id_map["222"] == "recipe-b"

View File

@@ -465,3 +465,81 @@ class TestPersistentRecipeCache:
# Operations should complete
assert operation_counts["saves"] == 5
assert operation_counts["removes"] == 5
# -----------------------------------------------------------------------
# image_id_map persistence (Phase 1 improvement)
# -----------------------------------------------------------------------
def test_save_and_load_image_id_map_roundtrip(self, temp_db_path, sample_recipes):
"""Save image_id_map via save_cache() and verify it round-trips through load_cache()."""
cache = PersistentRecipeCache(db_path=temp_db_path)
image_id_map = {
"12345": "recipe-alpha",
"67890": "recipe-beta",
}
cache.save_cache(sample_recipes, image_id_map=image_id_map)
loaded = cache.load_cache()
assert loaded is not None
assert loaded.image_id_map == image_id_map
def test_load_without_image_id_map_returns_empty_dict(self, temp_db_path, sample_recipes):
"""Loading from a cache that has no image_id_map metadata must yield {}."""
cache = PersistentRecipeCache(db_path=temp_db_path)
# Save without image_id_map
cache.save_cache(sample_recipes)
loaded = cache.load_cache()
assert loaded is not None
assert loaded.image_id_map == {}
def test_save_cache_without_image_id_map_does_not_corrupt_existing(
self, temp_db_path, sample_recipes,
):
"""Overwriting cache without passing image_id_map must not leave stale data.
The previous image_id_map entry in cache_metadata should be replaced with {}.
"""
cache = PersistentRecipeCache(db_path=temp_db_path)
cache.save_cache(sample_recipes, image_id_map={"123": "old-recipe"})
# Overwrite without image_id_map
cache.save_cache(sample_recipes)
loaded = cache.load_cache()
assert loaded.image_id_map == {}
def test_image_id_map_survives_recipe_update(self, temp_db_path, sample_recipes):
"""Updating a single recipe must not drop the image_id_map metadata."""
cache = PersistentRecipeCache(db_path=temp_db_path)
cache.save_cache(sample_recipes, image_id_map={"123": "recipe-alpha"})
updated = dict(sample_recipes[0])
updated["title"] = "Updated"
cache.update_recipe(updated)
loaded = cache.load_cache()
assert loaded.image_id_map == {"123": "recipe-alpha"}
def test_save_image_id_map_persists_without_full_save(self, temp_db_path, sample_recipes):
"""save_image_id_map must update cache_metadata without rewriting all recipes."""
cache = PersistentRecipeCache(db_path=temp_db_path)
cache.save_cache(sample_recipes)
cache.save_image_id_map({"555": "new-recipe", "666": "another-recipe"})
loaded = cache.load_cache()
assert loaded.image_id_map == {"555": "new-recipe", "666": "another-recipe"}
def test_save_image_id_map_overwrites_previous(self, temp_db_path, sample_recipes):
"""Calling save_image_id_map twice must replace, not merge."""
cache = PersistentRecipeCache(db_path=temp_db_path)
cache.save_cache(sample_recipes, image_id_map={"111": "old"})
cache.save_image_id_map({"222": "new-only"})
loaded = cache.load_cache()
assert loaded.image_id_map == {"222": "new-only"}