perf(recipe): precompute image_id_map for O(1) CivitAI image existence checks

Build a civitai_image_id → recipe_id mapping once during cache
initialization instead of scanning all recipes on every
check_image_exists and import_from_url call.

- RecipeCache gains an image_id_map field populated by
  _build_image_id_map() during cache init
- check_image_exists and import_from_url duplicate detection
  now use the precomputed map (O(k) / O(1) vs O(n))
- Map is persisted in SQLite cache_metadata for fast startup
- Incrementally updated on add/remove/bulk_remove paths
- Fix: conn.close() before cache_metadata query (dead connection)
This commit is contained in:
Will Miao
2026-06-13 08:32:03 +08:00
parent 7cd6a53447
commit bef222c77d
7 changed files with 410 additions and 30 deletions

View File

@@ -12,7 +12,7 @@ import logging
import os
import sqlite3
import threading
from dataclasses import dataclass
from dataclasses import dataclass, field
from typing import Dict, List, Optional, Set, Tuple
from ..utils.cache_paths import CacheType, resolve_cache_path_with_migration
@@ -26,6 +26,8 @@ class PersistedRecipeData:
raw_data: List[Dict]
file_stats: Dict[str, Tuple[float, int]] # json_path -> (mtime, size)
image_id_map: Dict[str, str] = field(default_factory=dict)
"""Precomputed mapping of civitai image_id → recipe_id."""
class PersistentRecipeCache:
@@ -116,6 +118,20 @@ class PersistentRecipeCache:
if not rows:
return None
# Restore precomputed image_id_map if available
image_id_map: Dict[str, str] = {}
try:
meta_row = conn.execute(
"SELECT value FROM cache_metadata WHERE key = ?",
("image_id_map",),
).fetchone()
if meta_row:
parsed = json.loads(meta_row["value"])
if isinstance(parsed, dict):
image_id_map = parsed
except Exception:
pass # missing or corrupt — rebuilt on next cache refresh
finally:
conn.close()
except FileNotFoundError:
@@ -138,14 +154,24 @@ class PersistentRecipeCache:
row["file_size"] or 0,
)
return PersistedRecipeData(raw_data=raw_data, file_stats=file_stats)
return PersistedRecipeData(
raw_data=raw_data,
file_stats=file_stats,
image_id_map=image_id_map,
)
def save_cache(self, recipes: List[Dict], json_paths: Optional[Dict[str, str]] = None) -> None:
def save_cache(
self,
recipes: List[Dict],
json_paths: Optional[Dict[str, str]] = None,
image_id_map: Optional[Dict[str, str]] = None,
) -> None:
"""Save all recipes to SQLite cache.
Args:
recipes: List of recipe dictionaries to persist.
json_paths: Optional mapping of recipe_id -> json_path for file stats.
image_id_map: Optional precomputed civitai image_id → recipe_id mapping.
"""
if not self.is_enabled():
return
@@ -186,6 +212,12 @@ class PersistentRecipeCache:
recipe_rows,
)
# Persist image_id_map for O(1) lookups on cache load
conn.execute(
"INSERT OR REPLACE INTO cache_metadata (key, value) VALUES (?, ?)",
("image_id_map", json.dumps(image_id_map or {})),
)
conn.commit()
logger.debug("Persisted %d recipes to cache", len(recipe_rows))
finally:
@@ -273,6 +305,29 @@ class PersistentRecipeCache:
except Exception as exc:
logger.debug("Failed to remove recipe %s from cache: %s", recipe_id, exc)
def save_image_id_map(self, image_id_map: Dict[str, str]) -> None:
"""Persist the image_id_map to cache_metadata without rewriting the full cache.
This is called after ``add_recipe`` / ``remove_recipe`` mutations so
the persistent copy does not go stale between full ``save_cache`` calls.
"""
if not self.is_enabled() or not self._schema_initialized:
return
try:
with self._db_lock:
conn = self._connect()
try:
conn.execute(
"INSERT OR REPLACE INTO cache_metadata (key, value) VALUES (?, ?)",
("image_id_map", json.dumps(image_id_map)),
)
conn.commit()
finally:
conn.close()
except Exception as exc:
logger.debug("Failed to persist image_id_map: %s", exc)
def get_indexed_recipe_ids(self) -> Set[str]:
"""Return all recipe IDs in the cache.

View File

@@ -1,6 +1,6 @@
import asyncio
from typing import Iterable, List, Dict, Optional
from dataclasses import dataclass
from dataclasses import dataclass, field
from operator import itemgetter
from natsort import natsorted
@@ -14,6 +14,15 @@ class RecipeCache:
sorted_by_date: List[Dict]
folders: List[str] | None = None
folder_tree: Dict | None = None
image_id_map: Dict[str, str] = field(default_factory=dict)
"""Mapping of civitai image_id → recipe_id, precomputed at cache build time.
Built once during cache initialization (O(n)) so that
``check_image_exists`` and ``import_from_url`` duplicate checks
can look up image_id in O(1) instead of scanning all recipes.
Recipes imported from local files have no valid civitai image_id
and are naturally excluded from this map.
"""
def __post_init__(self):
self._lock = asyncio.Lock()

View File

@@ -20,6 +20,7 @@ from .metadata_service import get_default_metadata_provider
from .checkpoint_scanner import CheckpointScanner
from .settings_manager import get_settings_manager
from .recipes.errors import RecipeNotFoundError
from ..utils.civitai_utils import extract_civitai_image_id
from ..utils.utils import calculate_recipe_fingerprint, fuzzy_match
from natsort import natsorted
import sys
@@ -532,7 +533,21 @@ class RecipeScanner:
self._sort_cache_sync()
# Backfill source_path from JSON files if missing (schema migration)
if self._backfill_source_path_if_needed(recipes, json_paths):
self._persistent_cache.save_cache(recipes, json_paths)
self._cache.image_id_map = self._build_image_id_map()
self._persistent_cache.save_cache(
recipes, json_paths, self._cache.image_id_map
)
else:
# Use persisted map, or rebuild if empty (e.g. first startup
# after deploying the image_id_map feature).
if persisted.image_id_map:
self._cache.image_id_map = dict(persisted.image_id_map)
else:
self._cache.image_id_map = self._build_image_id_map()
if self._cache.image_id_map:
self._persistent_cache.save_image_id_map(
self._cache.image_id_map
)
return self._cache
else:
# Partial update: some files changed
@@ -545,8 +560,11 @@ class RecipeScanner:
self._sort_cache_sync()
# Backfill source_path from JSON files if missing (schema migration)
self._backfill_source_path_if_needed(recipes, json_paths)
self._cache.image_id_map = self._build_image_id_map()
# Persist updated cache
self._persistent_cache.save_cache(recipes, json_paths)
self._persistent_cache.save_cache(
recipes, json_paths, self._cache.image_id_map
)
return self._cache
# Fall back to full directory scan
@@ -558,9 +576,12 @@ class RecipeScanner:
self._cache.raw_data = recipes
self._update_folder_metadata(self._cache)
self._sort_cache_sync()
self._cache.image_id_map = self._build_image_id_map()
# Persist for next startup
self._persistent_cache.save_cache(recipes, json_paths)
self._persistent_cache.save_cache(
recipes, json_paths, self._cache.image_id_map
)
return self._cache
except Exception as e:
@@ -832,6 +853,28 @@ class RecipeScanner:
except Exception as e:
logger.error(f"Error sorting recipe cache: {e}")
def _build_image_id_map(self) -> Dict[str, str]:
"""Build civitai image_id → recipe_id mapping from cached recipes.
Only recipes with a valid CivitAI image URL source_path produce an
entry. Recipes imported from local files are naturally excluded.
"""
mapping: Dict[str, str] = {}
if not self._cache:
return mapping
for recipe in getattr(self._cache, "raw_data", []):
if not isinstance(recipe, dict):
continue
source = recipe.get("source_path")
if not source:
continue
image_id = extract_civitai_image_id(source)
if image_id and image_id not in mapping:
recipe_id = recipe.get("id")
if recipe_id is not None:
mapping[image_id] = str(recipe_id)
return mapping
async def _wait_for_lora_scanner(self) -> None:
"""Ensure the LoRA scanner has initialized before recipe enrichment."""
@@ -1296,11 +1339,20 @@ class RecipeScanner:
# Update FTS index
self._update_fts_index_for_recipe(recipe_data, "add")
source = recipe_data.get("source_path")
if source:
image_id = extract_civitai_image_id(source)
if image_id:
recipe_id_value = recipe_data.get("id")
if recipe_id_value is not None:
cache.image_id_map[image_id] = str(recipe_id_value)
# Persist to SQLite cache
if self._persistent_cache:
recipe_id = str(recipe_data.get("id", ""))
json_path = self._json_path_map.get(recipe_id, "")
self._persistent_cache.update_recipe(recipe_data, json_path)
self._persistent_cache.save_image_id_map(cache.image_id_map)
async def remove_recipe(self, recipe_id: str) -> bool:
"""Remove a recipe from the cache by ID."""
@@ -1319,9 +1371,15 @@ class RecipeScanner:
# Update FTS index
self._update_fts_index_for_recipe(recipe_id, "remove")
# Remove any image_id entry pointing to this recipe
stale = [k for k, v in cache.image_id_map.items() if v == recipe_id]
for k in stale:
del cache.image_id_map[k]
# Remove from SQLite cache
if self._persistent_cache:
self._persistent_cache.remove_recipe(recipe_id)
self._persistent_cache.save_image_id_map(cache.image_id_map)
self._json_path_map.pop(recipe_id, None)
return True
@@ -1332,14 +1390,21 @@ class RecipeScanner:
cache = await self.get_cached_data()
removed = await cache.bulk_remove(recipe_ids, resort=False)
if removed:
removed_ids = {str(r.get("id", "")) for r in removed}
stale = [k for k, v in cache.image_id_map.items() if v in removed_ids]
for k in stale:
del cache.image_id_map[k]
self._schedule_resort()
# Update FTS index and persistent cache for each removed recipe
for recipe in removed:
recipe_id = str(recipe.get("id", ""))
self._update_fts_index_for_recipe(recipe_id, "remove")
if self._persistent_cache:
self._persistent_cache.remove_recipe(recipe_id)
self._json_path_map.pop(recipe_id, None)
if self._persistent_cache:
self._persistent_cache.save_image_id_map(cache.image_id_map)
return len(removed)
async def scan_all_recipes(self) -> List[Dict]: