perf(recipe): precompute image_id_map for O(1) CivitAI image existence checks

Build a civitai_image_id → recipe_id mapping once during cache
initialization instead of scanning all recipes on every
check_image_exists and import_from_url call.

- RecipeCache gains an image_id_map field populated by
  _build_image_id_map() during cache init
- check_image_exists and import_from_url duplicate detection
  now use the precomputed map (O(k) / O(1) vs O(n))
- Map is persisted in SQLite cache_metadata for fast startup
- Incrementally updated on add/remove/bulk_remove paths
- Fix: conn.close() before cache_metadata query (dead connection)
This commit is contained in:
Will Miao
2026-06-13 08:32:03 +08:00
parent 7cd6a53447
commit bef222c77d
7 changed files with 410 additions and 30 deletions

View File

@@ -1597,15 +1597,8 @@ class RecipeManagementHandler:
cache = await recipe_scanner.get_cached_data()
# Build lookup: image_id -> recipe_id from stored source_path
image_to_recipe = {}
for recipe in getattr(cache, "raw_data", []):
source = recipe.get("source_path")
if not source:
continue
image_id = extract_civitai_image_id(source)
if image_id and image_id not in image_to_recipe:
image_to_recipe[image_id] = recipe.get("id")
# Use precomputed image_id_map (built once at cache init)
image_to_recipe = getattr(cache, "image_id_map", {})
results = {}
for img_id in requested_ids:
@@ -1641,20 +1634,22 @@ class RecipeManagementHandler:
"Could not extract Civitai image ID from URL"
)
# Check for duplicate (fast, before acquiring semaphore), unless force
if not force:
cache = await recipe_scanner.get_cached_data()
for recipe in getattr(cache, "raw_data", []):
source = recipe.get("source_path")
if source:
existing_id = extract_civitai_image_id(source)
if existing_id == image_id:
return web.json_response({
"success": True,
"recipe_id": recipe.get("id"),
"name": recipe.get("title", ""),
"already_exists": True,
})
image_to_recipe = getattr(cache, "image_id_map", {})
existing_recipe_id = image_to_recipe.get(image_id)
if existing_recipe_id:
recipe_name = ""
for recipe in getattr(cache, "raw_data", []):
if str(recipe.get("id", "")) == existing_recipe_id:
recipe_name = recipe.get("title", "") or ""
break
return web.json_response({
"success": True,
"recipe_id": existing_recipe_id,
"name": recipe_name,
"already_exists": True,
})
async with self._import_semaphore:
return await self._do_import_from_url(image_url, recipe_scanner)

View File

@@ -12,7 +12,7 @@ import logging
import os
import sqlite3
import threading
from dataclasses import dataclass
from dataclasses import dataclass, field
from typing import Dict, List, Optional, Set, Tuple
from ..utils.cache_paths import CacheType, resolve_cache_path_with_migration
@@ -26,6 +26,8 @@ class PersistedRecipeData:
raw_data: List[Dict]
file_stats: Dict[str, Tuple[float, int]] # json_path -> (mtime, size)
image_id_map: Dict[str, str] = field(default_factory=dict)
"""Precomputed mapping of civitai image_id → recipe_id."""
class PersistentRecipeCache:
@@ -116,6 +118,20 @@ class PersistentRecipeCache:
if not rows:
return None
# Restore precomputed image_id_map if available
image_id_map: Dict[str, str] = {}
try:
meta_row = conn.execute(
"SELECT value FROM cache_metadata WHERE key = ?",
("image_id_map",),
).fetchone()
if meta_row:
parsed = json.loads(meta_row["value"])
if isinstance(parsed, dict):
image_id_map = parsed
except Exception:
pass # missing or corrupt — rebuilt on next cache refresh
finally:
conn.close()
except FileNotFoundError:
@@ -138,14 +154,24 @@ class PersistentRecipeCache:
row["file_size"] or 0,
)
return PersistedRecipeData(raw_data=raw_data, file_stats=file_stats)
return PersistedRecipeData(
raw_data=raw_data,
file_stats=file_stats,
image_id_map=image_id_map,
)
def save_cache(self, recipes: List[Dict], json_paths: Optional[Dict[str, str]] = None) -> None:
def save_cache(
self,
recipes: List[Dict],
json_paths: Optional[Dict[str, str]] = None,
image_id_map: Optional[Dict[str, str]] = None,
) -> None:
"""Save all recipes to SQLite cache.
Args:
recipes: List of recipe dictionaries to persist.
json_paths: Optional mapping of recipe_id -> json_path for file stats.
image_id_map: Optional precomputed civitai image_id → recipe_id mapping.
"""
if not self.is_enabled():
return
@@ -186,6 +212,12 @@ class PersistentRecipeCache:
recipe_rows,
)
# Persist image_id_map for O(1) lookups on cache load
conn.execute(
"INSERT OR REPLACE INTO cache_metadata (key, value) VALUES (?, ?)",
("image_id_map", json.dumps(image_id_map or {})),
)
conn.commit()
logger.debug("Persisted %d recipes to cache", len(recipe_rows))
finally:
@@ -273,6 +305,29 @@ class PersistentRecipeCache:
except Exception as exc:
logger.debug("Failed to remove recipe %s from cache: %s", recipe_id, exc)
def save_image_id_map(self, image_id_map: Dict[str, str]) -> None:
"""Persist the image_id_map to cache_metadata without rewriting the full cache.
This is called after ``add_recipe`` / ``remove_recipe`` mutations so
the persistent copy does not go stale between full ``save_cache`` calls.
"""
if not self.is_enabled() or not self._schema_initialized:
return
try:
with self._db_lock:
conn = self._connect()
try:
conn.execute(
"INSERT OR REPLACE INTO cache_metadata (key, value) VALUES (?, ?)",
("image_id_map", json.dumps(image_id_map)),
)
conn.commit()
finally:
conn.close()
except Exception as exc:
logger.debug("Failed to persist image_id_map: %s", exc)
def get_indexed_recipe_ids(self) -> Set[str]:
"""Return all recipe IDs in the cache.

View File

@@ -1,6 +1,6 @@
import asyncio
from typing import Iterable, List, Dict, Optional
from dataclasses import dataclass
from dataclasses import dataclass, field
from operator import itemgetter
from natsort import natsorted
@@ -14,6 +14,15 @@ class RecipeCache:
sorted_by_date: List[Dict]
folders: List[str] | None = None
folder_tree: Dict | None = None
image_id_map: Dict[str, str] = field(default_factory=dict)
"""Mapping of civitai image_id → recipe_id, precomputed at cache build time.
Built once during cache initialization (O(n)) so that
``check_image_exists`` and ``import_from_url`` duplicate checks
can look up image_id in O(1) instead of scanning all recipes.
Recipes imported from local files have no valid civitai image_id
and are naturally excluded from this map.
"""
def __post_init__(self):
self._lock = asyncio.Lock()

View File

@@ -20,6 +20,7 @@ from .metadata_service import get_default_metadata_provider
from .checkpoint_scanner import CheckpointScanner
from .settings_manager import get_settings_manager
from .recipes.errors import RecipeNotFoundError
from ..utils.civitai_utils import extract_civitai_image_id
from ..utils.utils import calculate_recipe_fingerprint, fuzzy_match
from natsort import natsorted
import sys
@@ -532,7 +533,21 @@ class RecipeScanner:
self._sort_cache_sync()
# Backfill source_path from JSON files if missing (schema migration)
if self._backfill_source_path_if_needed(recipes, json_paths):
self._persistent_cache.save_cache(recipes, json_paths)
self._cache.image_id_map = self._build_image_id_map()
self._persistent_cache.save_cache(
recipes, json_paths, self._cache.image_id_map
)
else:
# Use persisted map, or rebuild if empty (e.g. first startup
# after deploying the image_id_map feature).
if persisted.image_id_map:
self._cache.image_id_map = dict(persisted.image_id_map)
else:
self._cache.image_id_map = self._build_image_id_map()
if self._cache.image_id_map:
self._persistent_cache.save_image_id_map(
self._cache.image_id_map
)
return self._cache
else:
# Partial update: some files changed
@@ -545,8 +560,11 @@ class RecipeScanner:
self._sort_cache_sync()
# Backfill source_path from JSON files if missing (schema migration)
self._backfill_source_path_if_needed(recipes, json_paths)
self._cache.image_id_map = self._build_image_id_map()
# Persist updated cache
self._persistent_cache.save_cache(recipes, json_paths)
self._persistent_cache.save_cache(
recipes, json_paths, self._cache.image_id_map
)
return self._cache
# Fall back to full directory scan
@@ -558,9 +576,12 @@ class RecipeScanner:
self._cache.raw_data = recipes
self._update_folder_metadata(self._cache)
self._sort_cache_sync()
self._cache.image_id_map = self._build_image_id_map()
# Persist for next startup
self._persistent_cache.save_cache(recipes, json_paths)
self._persistent_cache.save_cache(
recipes, json_paths, self._cache.image_id_map
)
return self._cache
except Exception as e:
@@ -832,6 +853,28 @@ class RecipeScanner:
except Exception as e:
logger.error(f"Error sorting recipe cache: {e}")
def _build_image_id_map(self) -> Dict[str, str]:
"""Build civitai image_id → recipe_id mapping from cached recipes.
Only recipes with a valid CivitAI image URL source_path produce an
entry. Recipes imported from local files are naturally excluded.
"""
mapping: Dict[str, str] = {}
if not self._cache:
return mapping
for recipe in getattr(self._cache, "raw_data", []):
if not isinstance(recipe, dict):
continue
source = recipe.get("source_path")
if not source:
continue
image_id = extract_civitai_image_id(source)
if image_id and image_id not in mapping:
recipe_id = recipe.get("id")
if recipe_id is not None:
mapping[image_id] = str(recipe_id)
return mapping
async def _wait_for_lora_scanner(self) -> None:
"""Ensure the LoRA scanner has initialized before recipe enrichment."""
@@ -1296,11 +1339,20 @@ class RecipeScanner:
# Update FTS index
self._update_fts_index_for_recipe(recipe_data, "add")
source = recipe_data.get("source_path")
if source:
image_id = extract_civitai_image_id(source)
if image_id:
recipe_id_value = recipe_data.get("id")
if recipe_id_value is not None:
cache.image_id_map[image_id] = str(recipe_id_value)
# Persist to SQLite cache
if self._persistent_cache:
recipe_id = str(recipe_data.get("id", ""))
json_path = self._json_path_map.get(recipe_id, "")
self._persistent_cache.update_recipe(recipe_data, json_path)
self._persistent_cache.save_image_id_map(cache.image_id_map)
async def remove_recipe(self, recipe_id: str) -> bool:
"""Remove a recipe from the cache by ID."""
@@ -1319,9 +1371,15 @@ class RecipeScanner:
# Update FTS index
self._update_fts_index_for_recipe(recipe_id, "remove")
# Remove any image_id entry pointing to this recipe
stale = [k for k, v in cache.image_id_map.items() if v == recipe_id]
for k in stale:
del cache.image_id_map[k]
# Remove from SQLite cache
if self._persistent_cache:
self._persistent_cache.remove_recipe(recipe_id)
self._persistent_cache.save_image_id_map(cache.image_id_map)
self._json_path_map.pop(recipe_id, None)
return True
@@ -1332,14 +1390,21 @@ class RecipeScanner:
cache = await self.get_cached_data()
removed = await cache.bulk_remove(recipe_ids, resort=False)
if removed:
removed_ids = {str(r.get("id", "")) for r in removed}
stale = [k for k, v in cache.image_id_map.items() if v in removed_ids]
for k in stale:
del cache.image_id_map[k]
self._schedule_resort()
# Update FTS index and persistent cache for each removed recipe
for recipe in removed:
recipe_id = str(recipe.get("id", ""))
self._update_fts_index_for_recipe(recipe_id, "remove")
if self._persistent_cache:
self._persistent_cache.remove_recipe(recipe_id)
self._json_path_map.pop(recipe_id, None)
if self._persistent_cache:
self._persistent_cache.save_image_id_map(cache.image_id_map)
return len(removed)
async def scan_all_recipes(self) -> List[Dict]:

View File

@@ -46,6 +46,7 @@ class StubRecipeScanner:
self.last_paginated_params: Dict[str, Any] | None = None
self.lora_lookup: Dict[str, List[Dict[str, Any]]] = {}
self.checkpoint_lookup: Dict[str, List[Dict[str, Any]]] = {}
self.image_id_map_override: Dict[str, str] = {}
async def _noop_get_cached_data(force_refresh: bool = False) -> None: # noqa: ARG001 - signature mirrors real scanner
return None
@@ -56,7 +57,10 @@ class StubRecipeScanner:
)
async def get_cached_data(self, force_refresh: bool = False) -> SimpleNamespace: # noqa: ARG002 - flag unused by stub
return SimpleNamespace(raw_data=list(self.cached_raw))
return SimpleNamespace(
raw_data=list(self.cached_raw),
image_id_map=dict(getattr(self, "image_id_map_override", {})),
)
async def get_paginated_data(self, **params: Any) -> Dict[str, Any]:
self.last_paginated_params = params
@@ -999,3 +1003,95 @@ async def test_batch_import_cancel_missing_id(monkeypatch, tmp_path: Path) -> No
payload = await response.json()
assert response.status == 400
assert payload["success"] is False
async def test_check_image_exists_uses_image_id_map(monkeypatch, tmp_path: Path) -> None:
"""check_image_exists must use precomputed image_id_map instead of scanning raw_data."""
async with recipe_harness(monkeypatch, tmp_path) as harness:
harness.scanner.image_id_map_override = {
"123": "recipe-alpha",
"789": "recipe-gamma",
}
response = await harness.client.get(
"/api/lm/recipes/check-image-exists",
params={"image_ids": "123,456,789"},
)
payload = await response.json()
assert response.status == 200
assert payload["success"] is True
assert payload["results"]["123"] == {
"in_library": True,
"recipe_id": "recipe-alpha",
}
assert payload["results"]["456"] == {
"in_library": False,
"recipe_id": None,
}
assert payload["results"]["789"] == {
"in_library": True,
"recipe_id": "recipe-gamma",
}
async def test_check_image_exists_handles_empty_input(monkeypatch, tmp_path: Path) -> None:
"""Empty or non-numeric image_ids must return an empty results dict."""
async with recipe_harness(monkeypatch, tmp_path) as harness:
response = await harness.client.get(
"/api/lm/recipes/check-image-exists",
params={"image_ids": ""},
)
payload = await response.json()
assert response.status == 200
assert payload["results"] == {}
async def test_import_from_url_detects_duplicate_via_image_id_map(
monkeypatch, tmp_path: Path,
) -> None:
"""import_from_url must return already_exists when image_id is in image_id_map."""
async with recipe_harness(monkeypatch, tmp_path) as harness:
harness.scanner.cached_raw = [
{"id": "existing-recipe", "title": "My Recipe"},
]
harness.scanner.image_id_map_override = {
"99999": "existing-recipe",
}
response = await harness.client.get(
"/api/lm/recipes/import-from-url",
params={"image_url": "https://civitai.com/images/99999"},
)
payload = await response.json()
assert response.status == 200
assert payload["already_exists"] is True
assert payload["recipe_id"] == "existing-recipe"
assert payload["name"] == "My Recipe"
async def test_import_from_url_proceeds_when_image_id_not_in_map(
monkeypatch, tmp_path: Path,
) -> None:
"""When image_id is absent from image_id_map, import_from_url must proceed to import."""
async with recipe_harness(monkeypatch, tmp_path) as harness:
harness.scanner.image_id_map_override = {
"111": "some-other-recipe",
}
harness.civitai.image_info["99999"] = {
"id": 99999,
"url": "https://image.civitai.com/x/y/original=true/sample.jpeg",
"type": "image",
"meta": {"prompt": "test"},
}
response = await harness.client.get(
"/api/lm/recipes/import-from-url",
params={"image_url": "https://civitai.com/images/99999"},
)
# The import may succeed or fail depending on downstream stubs,
# but it must NOT return already_exists
payload = await response.json()
assert payload.get("already_exists") is not True

View File

@@ -1015,3 +1015,85 @@ async def test_get_paginated_data_sorting(recipe_scanner):
# Test Date ASC: Gamma (5), Alpha (10), Beta (20)
res = await scanner.get_paginated_data(page=1, page_size=10, sort_by="date:asc")
assert [i["id"] for i in res["items"]] == ["C", "A", "B"]
async def test_build_image_id_map_filters_correctly(recipe_scanner):
"""Only recipes with valid CivitAI source_path appear in image_id_map.
Recipes imported from local files or with empty/missing source_path
must be naturally excluded.
"""
scanner, _ = recipe_scanner
from py.services.recipe_cache import RecipeCache
scanner._cache = RecipeCache(
raw_data=[
{"id": "r1", "source_path": "https://civitai.com/images/12345"},
{"id": "r2", "source_path": "https://civitai.com/images/67890"},
{"id": "r3", "source_path": "/home/user/local_image.png"},
{"id": "r4", "source_path": ""},
{"id": "r5"},
],
sorted_by_name=[],
sorted_by_date=[],
)
result = scanner._build_image_id_map()
assert result == {
"12345": "r1",
"67890": "r2",
}
# r3 = local file path, r4 = empty string, r5 = no key → all excluded
for rid in ("r3", "r4", "r5"):
assert rid not in result.values()
async def test_add_recipe_updates_image_id_map(recipe_scanner):
"""Adding a recipe with a CivitAI URL must update image_id_map.
A recipe with a local file path must NOT produce an entry.
"""
scanner, _ = recipe_scanner
await scanner.add_recipe({
"id": "civitai-recipe",
"title": "CivitAI",
"source_path": "https://civitai.com/images/55555",
})
cache = await scanner.get_cached_data()
assert cache.image_id_map.get("55555") == "civitai-recipe"
await scanner.add_recipe({
"id": "local-recipe",
"title": "Local",
"source_path": "/path/to/local.png",
})
assert "local-recipe" not in cache.image_id_map.values()
async def test_remove_recipe_clears_image_id_map(recipe_scanner):
"""Removing a recipe that has a CivitAI image_id must clean up the map."""
scanner, _ = recipe_scanner
await scanner.add_recipe({
"id": "recipe-a",
"title": "A",
"source_path": "https://civitai.com/images/111",
})
await scanner.add_recipe({
"id": "recipe-b",
"title": "B",
"source_path": "https://civitai.com/images/222",
})
cache = await scanner.get_cached_data()
assert "111" in cache.image_id_map
assert cache.image_id_map["222"] == "recipe-b"
await scanner.remove_recipe("recipe-a")
assert "111" not in cache.image_id_map
assert cache.image_id_map["222"] == "recipe-b"

View File

@@ -465,3 +465,81 @@ class TestPersistentRecipeCache:
# Operations should complete
assert operation_counts["saves"] == 5
assert operation_counts["removes"] == 5
# -----------------------------------------------------------------------
# image_id_map persistence (Phase 1 improvement)
# -----------------------------------------------------------------------
def test_save_and_load_image_id_map_roundtrip(self, temp_db_path, sample_recipes):
"""Save image_id_map via save_cache() and verify it round-trips through load_cache()."""
cache = PersistentRecipeCache(db_path=temp_db_path)
image_id_map = {
"12345": "recipe-alpha",
"67890": "recipe-beta",
}
cache.save_cache(sample_recipes, image_id_map=image_id_map)
loaded = cache.load_cache()
assert loaded is not None
assert loaded.image_id_map == image_id_map
def test_load_without_image_id_map_returns_empty_dict(self, temp_db_path, sample_recipes):
"""Loading from a cache that has no image_id_map metadata must yield {}."""
cache = PersistentRecipeCache(db_path=temp_db_path)
# Save without image_id_map
cache.save_cache(sample_recipes)
loaded = cache.load_cache()
assert loaded is not None
assert loaded.image_id_map == {}
def test_save_cache_without_image_id_map_does_not_corrupt_existing(
self, temp_db_path, sample_recipes,
):
"""Overwriting cache without passing image_id_map must not leave stale data.
The previous image_id_map entry in cache_metadata should be replaced with {}.
"""
cache = PersistentRecipeCache(db_path=temp_db_path)
cache.save_cache(sample_recipes, image_id_map={"123": "old-recipe"})
# Overwrite without image_id_map
cache.save_cache(sample_recipes)
loaded = cache.load_cache()
assert loaded.image_id_map == {}
def test_image_id_map_survives_recipe_update(self, temp_db_path, sample_recipes):
"""Updating a single recipe must not drop the image_id_map metadata."""
cache = PersistentRecipeCache(db_path=temp_db_path)
cache.save_cache(sample_recipes, image_id_map={"123": "recipe-alpha"})
updated = dict(sample_recipes[0])
updated["title"] = "Updated"
cache.update_recipe(updated)
loaded = cache.load_cache()
assert loaded.image_id_map == {"123": "recipe-alpha"}
def test_save_image_id_map_persists_without_full_save(self, temp_db_path, sample_recipes):
"""save_image_id_map must update cache_metadata without rewriting all recipes."""
cache = PersistentRecipeCache(db_path=temp_db_path)
cache.save_cache(sample_recipes)
cache.save_image_id_map({"555": "new-recipe", "666": "another-recipe"})
loaded = cache.load_cache()
assert loaded.image_id_map == {"555": "new-recipe", "666": "another-recipe"}
def test_save_image_id_map_overwrites_previous(self, temp_db_path, sample_recipes):
"""Calling save_image_id_map twice must replace, not merge."""
cache = PersistentRecipeCache(db_path=temp_db_path)
cache.save_cache(sample_recipes, image_id_map={"111": "old"})
cache.save_image_id_map({"222": "new-only"})
loaded = cache.load_cache()
assert loaded.image_id_map == {"222": "new-only"}