diff --git a/py/services/persistent_recipe_cache.py b/py/services/persistent_recipe_cache.py new file mode 100644 index 00000000..ceac32c9 --- /dev/null +++ b/py/services/persistent_recipe_cache.py @@ -0,0 +1,493 @@ +"""SQLite-based persistent cache for recipe metadata. + +This module provides fast recipe cache persistence using SQLite, enabling +quick startup by loading from cache instead of walking directories and +parsing JSON files. +""" + +from __future__ import annotations + +import json +import logging +import os +import re +import sqlite3 +import threading +from dataclasses import dataclass +from typing import Dict, List, Optional, Set, Tuple + +from ..utils.settings_paths import get_project_root, get_settings_dir + +logger = logging.getLogger(__name__) + + +@dataclass +class PersistedRecipeData: + """Lightweight structure returned by the persistent recipe cache.""" + + raw_data: List[Dict] + file_stats: Dict[str, Tuple[float, int]] # json_path -> (mtime, size) + + +class PersistentRecipeCache: + """Persist recipe metadata in SQLite for fast startup.""" + + _DEFAULT_FILENAME = "recipe_cache.sqlite" + _RECIPE_COLUMNS: Tuple[str, ...] = ( + "recipe_id", + "file_path", + "json_path", + "title", + "folder", + "base_model", + "fingerprint", + "created_date", + "modified", + "file_mtime", + "file_size", + "favorite", + "repair_version", + "preview_nsfw_level", + "loras_json", + "checkpoint_json", + "gen_params_json", + "tags_json", + ) + _instances: Dict[str, "PersistentRecipeCache"] = {} + _instance_lock = threading.Lock() + + def __init__(self, library_name: str = "default", db_path: Optional[str] = None) -> None: + self._library_name = library_name or "default" + self._db_path = db_path or self._resolve_default_path(self._library_name) + self._db_lock = threading.Lock() + self._schema_initialized = False + try: + directory = os.path.dirname(self._db_path) + if directory: + os.makedirs(directory, exist_ok=True) + except Exception as exc: + logger.warning("Could not create recipe cache directory %s: %s", directory, exc) + if self.is_enabled(): + self._initialize_schema() + + @classmethod + def get_default(cls, library_name: Optional[str] = None) -> "PersistentRecipeCache": + name = library_name or "default" + with cls._instance_lock: + if name not in cls._instances: + cls._instances[name] = cls(name) + return cls._instances[name] + + @classmethod + def clear_instances(cls) -> None: + """Clear all cached instances (useful for library switching).""" + with cls._instance_lock: + cls._instances.clear() + + def is_enabled(self) -> bool: + return os.environ.get("LORA_MANAGER_DISABLE_PERSISTENT_CACHE", "0") != "1" + + def get_database_path(self) -> str: + """Expose the resolved SQLite database path.""" + return self._db_path + + def load_cache(self) -> Optional[PersistedRecipeData]: + """Load all cached recipes from SQLite. + + Returns: + PersistedRecipeData with raw_data and file_stats if cache exists, + None if cache is empty or unavailable. + """ + if not self.is_enabled(): + return None + if not self._schema_initialized: + self._initialize_schema() + if not self._schema_initialized: + return None + + try: + with self._db_lock: + conn = self._connect(readonly=True) + try: + # Load all recipes + columns_sql = ", ".join(self._RECIPE_COLUMNS) + rows = conn.execute(f"SELECT {columns_sql} FROM recipes").fetchall() + + if not rows: + return None + + finally: + conn.close() + except FileNotFoundError: + return None + except Exception as exc: + logger.warning("Failed to load persisted recipe cache: %s", exc) + return None + + raw_data: List[Dict] = [] + file_stats: Dict[str, Tuple[float, int]] = {} + + for row in rows: + recipe = self._row_to_recipe(row) + raw_data.append(recipe) + + json_path = row["json_path"] + if json_path: + file_stats[json_path] = ( + row["file_mtime"] or 0.0, + row["file_size"] or 0, + ) + + return PersistedRecipeData(raw_data=raw_data, file_stats=file_stats) + + def save_cache(self, recipes: List[Dict], json_paths: Optional[Dict[str, str]] = None) -> None: + """Save all recipes to SQLite cache. + + Args: + recipes: List of recipe dictionaries to persist. + json_paths: Optional mapping of recipe_id -> json_path for file stats. + """ + if not self.is_enabled(): + return + if not self._schema_initialized: + self._initialize_schema() + if not self._schema_initialized: + return + + try: + with self._db_lock: + conn = self._connect() + try: + conn.execute("PRAGMA foreign_keys = ON") + conn.execute("BEGIN") + + # Clear existing data + conn.execute("DELETE FROM recipes") + + # Prepare and insert all rows + recipe_rows = [] + for recipe in recipes: + recipe_id = str(recipe.get("id", "")) + if not recipe_id: + continue + + json_path = "" + if json_paths: + json_path = json_paths.get(recipe_id, "") + + row = self._prepare_recipe_row(recipe, json_path) + recipe_rows.append(row) + + if recipe_rows: + placeholders = ", ".join(["?"] * len(self._RECIPE_COLUMNS)) + columns = ", ".join(self._RECIPE_COLUMNS) + conn.executemany( + f"INSERT INTO recipes ({columns}) VALUES ({placeholders})", + recipe_rows, + ) + + conn.commit() + logger.debug("Persisted %d recipes to cache", len(recipe_rows)) + finally: + conn.close() + except Exception as exc: + logger.warning("Failed to persist recipe cache: %s", exc) + + def get_file_stats(self) -> Dict[str, Tuple[float, int]]: + """Return stored file stats for all cached recipes. + + Returns: + Dictionary mapping json_path -> (mtime, size). + """ + if not self.is_enabled() or not self._schema_initialized: + return {} + + try: + with self._db_lock: + conn = self._connect(readonly=True) + try: + rows = conn.execute( + "SELECT json_path, file_mtime, file_size FROM recipes WHERE json_path IS NOT NULL" + ).fetchall() + return { + row["json_path"]: (row["file_mtime"] or 0.0, row["file_size"] or 0) + for row in rows + if row["json_path"] + } + finally: + conn.close() + except Exception: + return {} + + def update_recipe(self, recipe: Dict, json_path: Optional[str] = None) -> None: + """Update or insert a single recipe in the cache. + + Args: + recipe: The recipe dictionary to persist. + json_path: Optional path to the recipe JSON file. + """ + if not self.is_enabled() or not self._schema_initialized: + return + + recipe_id = str(recipe.get("id", "")) + if not recipe_id: + return + + try: + with self._db_lock: + conn = self._connect() + try: + row = self._prepare_recipe_row(recipe, json_path or "") + placeholders = ", ".join(["?"] * len(self._RECIPE_COLUMNS)) + columns = ", ".join(self._RECIPE_COLUMNS) + conn.execute( + f"INSERT OR REPLACE INTO recipes ({columns}) VALUES ({placeholders})", + row, + ) + conn.commit() + finally: + conn.close() + except Exception as exc: + logger.debug("Failed to update recipe %s in cache: %s", recipe_id, exc) + + def remove_recipe(self, recipe_id: str) -> None: + """Remove a recipe from the cache by ID. + + Args: + recipe_id: The ID of the recipe to remove. + """ + if not self.is_enabled() or not self._schema_initialized: + return + + if not recipe_id: + return + + try: + with self._db_lock: + conn = self._connect() + try: + conn.execute("DELETE FROM recipes WHERE recipe_id = ?", (str(recipe_id),)) + conn.commit() + finally: + conn.close() + except Exception as exc: + logger.debug("Failed to remove recipe %s from cache: %s", recipe_id, exc) + + def get_indexed_recipe_ids(self) -> Set[str]: + """Return all recipe IDs in the cache. + + Returns: + Set of recipe ID strings. + """ + if not self.is_enabled() or not self._schema_initialized: + return set() + + try: + with self._db_lock: + conn = self._connect(readonly=True) + try: + rows = conn.execute("SELECT recipe_id FROM recipes").fetchall() + return {row["recipe_id"] for row in rows if row["recipe_id"]} + finally: + conn.close() + except Exception: + return set() + + def get_recipe_count(self) -> int: + """Return the number of recipes in the cache.""" + if not self.is_enabled() or not self._schema_initialized: + return 0 + + try: + with self._db_lock: + conn = self._connect(readonly=True) + try: + result = conn.execute("SELECT COUNT(*) FROM recipes").fetchone() + return result[0] if result else 0 + finally: + conn.close() + except Exception: + return 0 + + # Internal helpers + + def _resolve_default_path(self, library_name: str) -> str: + override = os.environ.get("LORA_MANAGER_RECIPE_CACHE_DB") + if override: + return override + try: + settings_dir = get_settings_dir(create=True) + except Exception as exc: + logger.warning("Falling back to project directory for recipe cache: %s", exc) + settings_dir = get_project_root() + safe_name = re.sub(r"[^A-Za-z0-9_.-]", "_", library_name or "default") + if safe_name.lower() in ("default", ""): + legacy_path = os.path.join(settings_dir, self._DEFAULT_FILENAME) + if os.path.exists(legacy_path): + return legacy_path + return os.path.join(settings_dir, "recipe_cache", f"{safe_name}.sqlite") + + def _initialize_schema(self) -> None: + with self._db_lock: + if self._schema_initialized: + return + try: + with self._connect() as conn: + conn.execute("PRAGMA journal_mode=WAL") + conn.execute("PRAGMA foreign_keys = ON") + conn.executescript( + """ + CREATE TABLE IF NOT EXISTS recipes ( + recipe_id TEXT PRIMARY KEY, + file_path TEXT, + json_path TEXT, + title TEXT, + folder TEXT, + base_model TEXT, + fingerprint TEXT, + created_date REAL, + modified REAL, + file_mtime REAL, + file_size INTEGER, + favorite INTEGER DEFAULT 0, + repair_version INTEGER DEFAULT 0, + preview_nsfw_level INTEGER DEFAULT 0, + loras_json TEXT, + checkpoint_json TEXT, + gen_params_json TEXT, + tags_json TEXT + ); + + CREATE INDEX IF NOT EXISTS idx_recipes_json_path ON recipes(json_path); + CREATE INDEX IF NOT EXISTS idx_recipes_fingerprint ON recipes(fingerprint); + + CREATE TABLE IF NOT EXISTS cache_metadata ( + key TEXT PRIMARY KEY, + value TEXT + ); + """ + ) + conn.commit() + self._schema_initialized = True + except Exception as exc: + logger.warning("Failed to initialize persistent recipe cache schema: %s", exc) + + def _connect(self, readonly: bool = False) -> sqlite3.Connection: + uri = False + path = self._db_path + if readonly: + if not os.path.exists(path): + raise FileNotFoundError(path) + path = f"file:{path}?mode=ro" + uri = True + conn = sqlite3.connect(path, check_same_thread=False, uri=uri, detect_types=sqlite3.PARSE_DECLTYPES) + conn.row_factory = sqlite3.Row + return conn + + def _prepare_recipe_row(self, recipe: Dict, json_path: str) -> Tuple: + """Convert a recipe dict to a row tuple for SQLite insertion.""" + loras = recipe.get("loras") + loras_json = json.dumps(loras) if loras else None + + checkpoint = recipe.get("checkpoint") + checkpoint_json = json.dumps(checkpoint) if checkpoint else None + + gen_params = recipe.get("gen_params") + gen_params_json = json.dumps(gen_params) if gen_params else None + + tags = recipe.get("tags") + tags_json = json.dumps(tags) if tags else None + + # Get file stats if json_path exists + file_mtime = 0.0 + file_size = 0 + if json_path and os.path.exists(json_path): + try: + stat = os.stat(json_path) + file_mtime = stat.st_mtime + file_size = stat.st_size + except OSError: + pass + + return ( + str(recipe.get("id", "")), + recipe.get("file_path"), + json_path, + recipe.get("title"), + recipe.get("folder"), + recipe.get("base_model"), + recipe.get("fingerprint"), + float(recipe.get("created_date") or 0.0), + float(recipe.get("modified") or 0.0), + file_mtime, + file_size, + 1 if recipe.get("favorite") else 0, + int(recipe.get("repair_version") or 0), + int(recipe.get("preview_nsfw_level") or 0), + loras_json, + checkpoint_json, + gen_params_json, + tags_json, + ) + + def _row_to_recipe(self, row: sqlite3.Row) -> Dict: + """Convert a SQLite row to a recipe dictionary.""" + loras = [] + if row["loras_json"]: + try: + loras = json.loads(row["loras_json"]) + except json.JSONDecodeError: + pass + + checkpoint = None + if row["checkpoint_json"]: + try: + checkpoint = json.loads(row["checkpoint_json"]) + except json.JSONDecodeError: + pass + + gen_params = {} + if row["gen_params_json"]: + try: + gen_params = json.loads(row["gen_params_json"]) + except json.JSONDecodeError: + pass + + tags = [] + if row["tags_json"]: + try: + tags = json.loads(row["tags_json"]) + except json.JSONDecodeError: + pass + + recipe = { + "id": row["recipe_id"], + "file_path": row["file_path"] or "", + "title": row["title"] or "", + "folder": row["folder"] or "", + "base_model": row["base_model"] or "", + "fingerprint": row["fingerprint"] or "", + "created_date": row["created_date"] or 0.0, + "modified": row["modified"] or 0.0, + "favorite": bool(row["favorite"]), + "repair_version": row["repair_version"] or 0, + "preview_nsfw_level": row["preview_nsfw_level"] or 0, + "loras": loras, + "gen_params": gen_params, + } + + if tags: + recipe["tags"] = tags + + if checkpoint: + recipe["checkpoint"] = checkpoint + + return recipe + + +def get_persistent_recipe_cache() -> PersistentRecipeCache: + """Get the default persistent recipe cache instance for the active library.""" + from .settings_manager import get_settings_manager + + library_name = get_settings_manager().get_active_library_name() + return PersistentRecipeCache.get_default(library_name) diff --git a/py/services/recipe_fts_index.py b/py/services/recipe_fts_index.py index a97e2fa9..c49bb5dc 100644 --- a/py/services/recipe_fts_index.py +++ b/py/services/recipe_fts_index.py @@ -403,6 +403,78 @@ class RecipeFTSIndex: except Exception: return 0 + def get_indexed_recipe_ids(self) -> Set[str]: + """Return all recipe IDs currently in the index. + + Returns: + Set of recipe ID strings. + """ + if not self._schema_initialized: + self.initialize() + + if not self._schema_initialized: + return set() + + try: + with self._lock: + conn = self._connect(readonly=True) + try: + cursor = conn.execute("SELECT recipe_id FROM recipe_fts") + return {row[0] for row in cursor.fetchall() if row[0]} + finally: + conn.close() + except FileNotFoundError: + return set() + except Exception as exc: + logger.debug("Failed to get indexed recipe IDs: %s", exc) + return set() + + def validate_index(self, recipe_count: int, recipe_ids: Set[str]) -> bool: + """Check if the FTS index matches the expected recipes. + + This method validates whether the existing FTS index can be reused + without a full rebuild. It checks: + 1. The index has been initialized + 2. The count matches + 3. The recipe IDs match + + Args: + recipe_count: Expected number of recipes. + recipe_ids: Expected set of recipe IDs. + + Returns: + True if the index is valid and can be reused, False otherwise. + """ + if not self._schema_initialized: + self.initialize() + + if not self._schema_initialized: + return False + + try: + indexed_count = self.get_indexed_count() + if indexed_count != recipe_count: + logger.debug( + "FTS index count mismatch: indexed=%d, expected=%d", + indexed_count, recipe_count + ) + return False + + indexed_ids = self.get_indexed_recipe_ids() + if indexed_ids != recipe_ids: + missing = recipe_ids - indexed_ids + extra = indexed_ids - recipe_ids + if missing: + logger.debug("FTS index missing %d recipe IDs", len(missing)) + if extra: + logger.debug("FTS index has %d extra recipe IDs", len(extra)) + return False + + return True + except Exception as exc: + logger.debug("FTS index validation failed: %s", exc) + return False + # Internal helpers def _connect(self, readonly: bool = False) -> sqlite3.Connection: diff --git a/py/services/recipe_scanner.py b/py/services/recipe_scanner.py index f025312d..5228fea9 100644 --- a/py/services/recipe_scanner.py +++ b/py/services/recipe_scanner.py @@ -9,6 +9,7 @@ from typing import Any, Callable, Dict, Iterable, List, Optional, Set, Tuple from ..config import config from .recipe_cache import RecipeCache from .recipe_fts_index import RecipeFTSIndex +from .persistent_recipe_cache import PersistentRecipeCache, get_persistent_recipe_cache from .service_registry import ServiceRegistry from .lora_scanner import LoraScanner from .metadata_service import get_default_metadata_provider @@ -78,6 +79,9 @@ class RecipeScanner: # FTS index for fast search self._fts_index: Optional[RecipeFTSIndex] = None self._fts_index_task: Optional[asyncio.Task] = None + # Persistent cache for fast startup + self._persistent_cache: Optional[PersistentRecipeCache] = None + self._json_path_map: Dict[str, str] = {} # recipe_id -> json_path if lora_scanner: self._lora_scanner = lora_scanner if checkpoint_scanner: @@ -109,6 +113,11 @@ class RecipeScanner: self._fts_index.clear() self._fts_index = None + # Reset persistent cache instance for new library + self._persistent_cache = None + self._json_path_map = {} + PersistentRecipeCache.clear_instances() + self._cache = None self._initialization_task = None self._is_initializing = False @@ -321,12 +330,17 @@ class RecipeScanner: with open(recipe_json_path, 'w', encoding='utf-8') as f: json.dump(recipe, f, indent=4, ensure_ascii=False) - # 4. Update EXIF if image exists + # 4. Update persistent SQLite cache + if self._persistent_cache: + self._persistent_cache.update_recipe(recipe, recipe_json_path) + self._json_path_map[str(recipe_id)] = recipe_json_path + + # 5. Update EXIF if image exists image_path = recipe.get('file_path') if image_path and os.path.exists(image_path): from ..utils.exif_utils import ExifUtils ExifUtils.append_recipe_metadata(image_path, recipe) - + return True except Exception as e: logger.error(f"Error persisting recipe {recipe_id}: {e}") @@ -408,117 +422,268 @@ class RecipeScanner: logger.error(f"Recipe Scanner: Error initializing cache in background: {e}") def _initialize_recipe_cache_sync(self): - """Synchronous version of recipe cache initialization for thread pool execution""" + """Synchronous version of recipe cache initialization for thread pool execution. + + Uses persistent cache for fast startup when available: + 1. Try to load from persistent SQLite cache + 2. Reconcile with filesystem (check mtime/size for changes) + 3. Fall back to full directory scan if cache miss or reconciliation fails + 4. Persist results for next startup + """ try: # Create a new event loop for this thread loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) - - # Create a synchronous method to bypass the async lock - def sync_initialize_cache(): - # We need to implement scan_all_recipes logic synchronously here - # instead of calling the async method to avoid event loop issues - recipes = [] - recipes_dir = self.recipes_dir - - if not recipes_dir or not os.path.exists(recipes_dir): - logger.warning(f"Recipes directory not found: {recipes_dir}") - return recipes - - # Get all recipe JSON files in the recipes directory - recipe_files = [] - for root, _, files in os.walk(recipes_dir): - recipe_count = sum(1 for f in files if f.lower().endswith('.recipe.json')) - if recipe_count > 0: - for file in files: - if file.lower().endswith('.recipe.json'): - recipe_files.append(os.path.join(root, file)) - - # Process each recipe file - for recipe_path in recipe_files: - try: - with open(recipe_path, 'r', encoding='utf-8') as f: - recipe_data = json.load(f) - - # Validate recipe data - if not recipe_data or not isinstance(recipe_data, dict): - logger.warning(f"Invalid recipe data in {recipe_path}") - continue - - # Ensure required fields exist - required_fields = ['id', 'file_path', 'title'] - if not all(field in recipe_data for field in required_fields): - logger.warning(f"Missing required fields in {recipe_path}") - continue - - # Ensure the image file exists and prioritize local siblings - image_path = recipe_data.get('file_path') - if image_path: - recipe_dir = os.path.dirname(recipe_path) - image_filename = os.path.basename(image_path) - local_sibling_path = os.path.normpath(os.path.join(recipe_dir, image_filename)) - - # If local sibling exists and stored path is different, prefer local - if os.path.exists(local_sibling_path) and os.path.normpath(image_path) != local_sibling_path: - recipe_data['file_path'] = local_sibling_path - # Persist the repair - try: - with open(recipe_path, 'w', encoding='utf-8') as f: - json.dump(recipe_data, f, indent=4, ensure_ascii=False) - logger.info(f"Updated recipe image path to local sibling: {local_sibling_path}") - except Exception as e: - logger.warning(f"Failed to persist repair for {recipe_path}: {e}") - elif not os.path.exists(image_path): - logger.warning(f"Recipe image not found and no local sibling: {image_path}") - - # Ensure loras array exists - if 'loras' not in recipe_data: - recipe_data['loras'] = [] - - # Ensure gen_params exists - if 'gen_params' not in recipe_data: - recipe_data['gen_params'] = {} - - # Add to list without async operations - recipes.append(recipe_data) - except Exception as e: - logger.error(f"Error loading recipe file {recipe_path}: {e}") - import traceback - traceback.print_exc(file=sys.stderr) - - # Update cache with the collected data - self._cache.raw_data = recipes - self._update_folder_metadata(self._cache) - - # Create a simplified resort function that doesn't use await - if hasattr(self._cache, "resort"): - try: - # Sort by name - self._cache.sorted_by_name = natsorted( - self._cache.raw_data, - key=lambda x: x.get('title', '').lower() - ) - - # Sort by date (modified or created) - self._cache.sorted_by_date = sorted( - self._cache.raw_data, - key=lambda x: x.get('modified', x.get('created_date', 0)), - reverse=True - ) - except Exception as e: - logger.error(f"Error sorting recipe cache: {e}") - + + # Initialize persistent cache + if self._persistent_cache is None: + self._persistent_cache = get_persistent_recipe_cache() + + recipes_dir = self.recipes_dir + if not recipes_dir or not os.path.exists(recipes_dir): + logger.warning(f"Recipes directory not found: {recipes_dir}") return self._cache - - # Run our sync initialization that avoids lock conflicts - return sync_initialize_cache() + + # Try to load from persistent cache first + persisted = self._persistent_cache.load_cache() + if persisted: + recipes, changed, json_paths = self._reconcile_recipe_cache(persisted, recipes_dir) + self._json_path_map = json_paths + + if not changed: + # Fast path: use cached data directly + logger.info("Recipe cache hit: loaded %d recipes from persistent cache", len(recipes)) + self._cache.raw_data = recipes + self._update_folder_metadata(self._cache) + self._sort_cache_sync() + return self._cache + else: + # Partial update: some files changed + logger.info("Recipe cache partial hit: reconciled %d recipes with filesystem", len(recipes)) + self._cache.raw_data = recipes + self._update_folder_metadata(self._cache) + self._sort_cache_sync() + # Persist updated cache + self._persistent_cache.save_cache(recipes, json_paths) + return self._cache + + # Fall back to full directory scan + logger.info("Recipe cache miss: performing full directory scan") + recipes, json_paths = self._full_directory_scan_sync(recipes_dir) + self._json_path_map = json_paths + + # Update cache with the collected data + self._cache.raw_data = recipes + self._update_folder_metadata(self._cache) + self._sort_cache_sync() + + # Persist for next startup + self._persistent_cache.save_cache(recipes, json_paths) + + return self._cache except Exception as e: logger.error(f"Error in thread-based recipe cache initialization: {e}") + import traceback + traceback.print_exc(file=sys.stderr) return self._cache if hasattr(self, '_cache') else None finally: # Clean up the event loop loop.close() + def _reconcile_recipe_cache( + self, + persisted: "PersistedRecipeData", + recipes_dir: str, + ) -> Tuple[List[Dict], bool, Dict[str, str]]: + """Reconcile persisted cache with current filesystem state. + + Args: + persisted: The persisted recipe data from SQLite cache. + recipes_dir: Path to the recipes directory. + + Returns: + Tuple of (recipes list, changed flag, json_paths dict). + """ + from .persistent_recipe_cache import PersistedRecipeData + + recipes: List[Dict] = [] + json_paths: Dict[str, str] = {} + changed = False + + # Build set of current recipe files + current_files: Dict[str, Tuple[float, int]] = {} + for root, _, files in os.walk(recipes_dir): + for file in files: + if file.lower().endswith('.recipe.json'): + file_path = os.path.join(root, file) + try: + stat = os.stat(file_path) + current_files[file_path] = (stat.st_mtime, stat.st_size) + except OSError: + continue + + # Build lookup of persisted recipes by json_path + persisted_by_path: Dict[str, Dict] = {} + for recipe in persisted.raw_data: + recipe_id = str(recipe.get('id', '')) + if recipe_id: + # Find the json_path from file_stats + for json_path, (mtime, size) in persisted.file_stats.items(): + if os.path.basename(json_path).startswith(recipe_id): + persisted_by_path[json_path] = recipe + break + + # Also index by recipe ID for faster lookups + persisted_by_id: Dict[str, Dict] = { + str(r.get('id', '')): r for r in persisted.raw_data if r.get('id') + } + + # Process current files + for file_path, (current_mtime, current_size) in current_files.items(): + cached_stats = persisted.file_stats.get(file_path) + + if cached_stats: + cached_mtime, cached_size = cached_stats + # Check if file is unchanged + if abs(current_mtime - cached_mtime) < 1.0 and current_size == cached_size: + # Use cached data + cached_recipe = persisted_by_path.get(file_path) + if cached_recipe: + recipe_id = str(cached_recipe.get('id', '')) + # Track folder from file path + cached_recipe['folder'] = cached_recipe.get('folder') or self._calculate_folder(file_path) + recipes.append(cached_recipe) + json_paths[recipe_id] = file_path + continue + + # File is new or changed - need to re-read + changed = True + recipe_data = self._load_recipe_file_sync(file_path) + if recipe_data: + recipe_id = str(recipe_data.get('id', '')) + recipes.append(recipe_data) + json_paths[recipe_id] = file_path + + # Check for deleted files + for json_path in persisted.file_stats.keys(): + if json_path not in current_files: + changed = True + logger.debug("Recipe file deleted: %s", json_path) + + return recipes, changed, json_paths + + def _full_directory_scan_sync(self, recipes_dir: str) -> Tuple[List[Dict], Dict[str, str]]: + """Perform a full synchronous directory scan for recipes. + + Args: + recipes_dir: Path to the recipes directory. + + Returns: + Tuple of (recipes list, json_paths dict). + """ + recipes: List[Dict] = [] + json_paths: Dict[str, str] = {} + + # Get all recipe JSON files + recipe_files = [] + for root, _, files in os.walk(recipes_dir): + for file in files: + if file.lower().endswith('.recipe.json'): + recipe_files.append(os.path.join(root, file)) + + # Process each recipe file + for recipe_path in recipe_files: + recipe_data = self._load_recipe_file_sync(recipe_path) + if recipe_data: + recipe_id = str(recipe_data.get('id', '')) + recipes.append(recipe_data) + json_paths[recipe_id] = recipe_path + + return recipes, json_paths + + def _load_recipe_file_sync(self, recipe_path: str) -> Optional[Dict]: + """Load a single recipe file synchronously. + + Args: + recipe_path: Path to the recipe JSON file. + + Returns: + Recipe dictionary if valid, None otherwise. + """ + try: + with open(recipe_path, 'r', encoding='utf-8') as f: + recipe_data = json.load(f) + + # Validate recipe data + if not recipe_data or not isinstance(recipe_data, dict): + logger.warning(f"Invalid recipe data in {recipe_path}") + return None + + # Ensure required fields exist + required_fields = ['id', 'file_path', 'title'] + if not all(field in recipe_data for field in required_fields): + logger.warning(f"Missing required fields in {recipe_path}") + return None + + # Ensure the image file exists and prioritize local siblings + image_path = recipe_data.get('file_path') + path_updated = False + if image_path: + recipe_dir = os.path.dirname(recipe_path) + image_filename = os.path.basename(image_path) + local_sibling_path = os.path.normpath(os.path.join(recipe_dir, image_filename)) + + # If local sibling exists and stored path is different, prefer local + if os.path.exists(local_sibling_path) and os.path.normpath(image_path) != local_sibling_path: + recipe_data['file_path'] = local_sibling_path + path_updated = True + logger.info(f"Updated recipe image path to local sibling: {local_sibling_path}") + elif not os.path.exists(image_path): + logger.warning(f"Recipe image not found and no local sibling: {image_path}") + + if path_updated: + try: + with open(recipe_path, 'w', encoding='utf-8') as f: + json.dump(recipe_data, f, indent=4, ensure_ascii=False) + except Exception as e: + logger.warning(f"Failed to persist repair for {recipe_path}: {e}") + + # Track folder placement relative to recipes directory + recipe_data['folder'] = recipe_data.get('folder') or self._calculate_folder(recipe_path) + + # Ensure loras array exists + if 'loras' not in recipe_data: + recipe_data['loras'] = [] + + # Ensure gen_params exists + if 'gen_params' not in recipe_data: + recipe_data['gen_params'] = {} + + return recipe_data + except Exception as e: + logger.error(f"Error loading recipe file {recipe_path}: {e}") + import traceback + traceback.print_exc(file=sys.stderr) + return None + + def _sort_cache_sync(self) -> None: + """Sort cache data synchronously.""" + try: + # Sort by name + self._cache.sorted_by_name = natsorted( + self._cache.raw_data, + key=lambda x: x.get('title', '').lower() + ) + + # Sort by date (modified or created) + self._cache.sorted_by_date = sorted( + self._cache.raw_data, + key=lambda x: (x.get('modified', x.get('created_date', 0)), x.get('file_path', '')), + reverse=True + ) + except Exception as e: + logger.error(f"Error sorting recipe cache: {e}") + async def _wait_for_lora_scanner(self) -> None: """Ensure the LoRA scanner has initialized before recipe enrichment.""" @@ -570,7 +735,10 @@ class RecipeScanner: self._post_scan_task = loop.create_task(_run_enrichment(), name="recipe_cache_enrichment") def _schedule_fts_index_build(self) -> None: - """Build FTS index in background without blocking.""" + """Build FTS index in background without blocking. + + Validates existing index first and reuses it if valid. + """ if self._fts_index_task and not self._fts_index_task.done(): return # Already running @@ -587,7 +755,25 @@ class RecipeScanner: try: self._fts_index = RecipeFTSIndex() - # Run in thread pool (SQLite is blocking) + # Check if existing index is valid + recipe_ids = {str(r.get('id', '')) for r in self._cache.raw_data if r.get('id')} + recipe_count = len(self._cache.raw_data) + + # Run validation in thread pool + is_valid = await loop.run_in_executor( + None, + self._fts_index.validate_index, + recipe_count, + recipe_ids + ) + + if is_valid: + logger.info("FTS index validated, reusing existing index with %d recipes", recipe_count) + self._fts_index._ready.set() + return + + # Only rebuild if validation fails + logger.info("FTS index invalid or outdated, rebuilding...") await loop.run_in_executor( None, self._fts_index.build_index, @@ -875,6 +1061,12 @@ class RecipeScanner: # Update FTS index self._update_fts_index_for_recipe(recipe_data, 'add') + # Persist to SQLite cache + if self._persistent_cache: + recipe_id = str(recipe_data.get('id', '')) + json_path = self._json_path_map.get(recipe_id, '') + self._persistent_cache.update_recipe(recipe_data, json_path) + async def remove_recipe(self, recipe_id: str) -> bool: """Remove a recipe from the cache by ID.""" @@ -891,6 +1083,12 @@ class RecipeScanner: # Update FTS index self._update_fts_index_for_recipe(recipe_id, 'remove') + + # Remove from SQLite cache + if self._persistent_cache: + self._persistent_cache.remove_recipe(recipe_id) + self._json_path_map.pop(recipe_id, None) + return True async def bulk_remove(self, recipe_ids: Iterable[str]) -> int: @@ -900,9 +1098,13 @@ class RecipeScanner: removed = await cache.bulk_remove(recipe_ids, resort=False) if removed: self._schedule_resort() - # Update FTS index for each removed recipe - for recipe_id in (str(r.get('id', '')) for r in removed): + # Update FTS index and persistent cache for each removed recipe + for recipe in removed: + recipe_id = str(recipe.get('id', '')) self._update_fts_index_for_recipe(recipe_id, 'remove') + if self._persistent_cache: + self._persistent_cache.remove_recipe(recipe_id) + self._json_path_map.pop(recipe_id, None) return len(removed) async def scan_all_recipes(self) -> List[Dict]: @@ -1695,11 +1897,11 @@ class RecipeScanner: async def update_recipe_metadata(self, recipe_id: str, metadata: dict) -> bool: """Update recipe metadata (like title and tags) in both file system and cache - + Args: recipe_id: The ID of the recipe to update metadata: Dictionary containing metadata fields to update (title, tags, etc.) - + Returns: bool: True if successful, False otherwise """ @@ -1707,16 +1909,16 @@ class RecipeScanner: recipe_json_path = await self.get_recipe_json_path(recipe_id) if not recipe_json_path or not os.path.exists(recipe_json_path): return False - + try: # Load existing recipe data with open(recipe_json_path, 'r', encoding='utf-8') as f: recipe_data = json.load(f) - + # Update fields for key, value in metadata.items(): recipe_data[key] = value - + # Save updated recipe with open(recipe_json_path, 'w', encoding='utf-8') as f: json.dump(recipe_data, f, indent=4, ensure_ascii=False) @@ -1729,6 +1931,11 @@ class RecipeScanner: # Update FTS index self._update_fts_index_for_recipe(recipe_data, 'update') + # Update persistent SQLite cache + if self._persistent_cache: + self._persistent_cache.update_recipe(recipe_data, recipe_json_path) + self._json_path_map[recipe_id] = recipe_json_path + # If the recipe has an image, update its EXIF metadata from ..utils.exif_utils import ExifUtils image_path = recipe_data.get('file_path') @@ -1800,6 +2007,11 @@ class RecipeScanner: # Update FTS index self._update_fts_index_for_recipe(recipe_data, 'update') + # Update persistent SQLite cache + if self._persistent_cache: + self._persistent_cache.update_recipe(recipe_data, recipe_json_path) + self._json_path_map[recipe_id] = recipe_json_path + updated_lora = dict(lora_entry) if target_lora is not None: preview_url = target_lora.get('preview_url') @@ -1923,26 +2135,31 @@ class RecipeScanner: if not recipes_to_update: return 0, 0 - # Persist changes to disk + # Persist changes to disk and SQLite cache async with self._mutation_lock: for recipe in recipes_to_update: - recipe_id = recipe.get('id') + recipe_id = str(recipe.get('id', '')) if not recipe_id: continue - + recipe_path = os.path.join(self.recipes_dir, f"{recipe_id}.recipe.json") try: self._write_recipe_file(recipe_path, recipe) file_updated_count += 1 logger.info(f"Updated file_name in recipe {recipe_path}: -> {new_file_name}") + + # Update persistent SQLite cache + if self._persistent_cache: + self._persistent_cache.update_recipe(recipe, recipe_path) + self._json_path_map[recipe_id] = recipe_path except Exception as e: logger.error(f"Error updating recipe file {recipe_path}: {e}") - + # We don't necessarily need to resort because LoRA file_name isn't a sort key, # but we might want to schedule a resort if we're paranoid or if searching relies on sorted state. # Given it's a rename of a dependency, search results might change if searching by LoRA name. self._schedule_resort() - + return file_updated_count, cache_updated_count async def find_recipes_by_fingerprint(self, fingerprint: str) -> list: diff --git a/tests/test_persistent_recipe_cache.py b/tests/test_persistent_recipe_cache.py new file mode 100644 index 00000000..669ecfa4 --- /dev/null +++ b/tests/test_persistent_recipe_cache.py @@ -0,0 +1,257 @@ +"""Tests for PersistentRecipeCache.""" + +import json +import os +import tempfile +from typing import Dict, List + +import pytest + +from py.services.persistent_recipe_cache import PersistentRecipeCache, PersistedRecipeData + + +@pytest.fixture +def temp_db_path(): + """Create a temporary database path.""" + with tempfile.NamedTemporaryFile(suffix=".sqlite", delete=False) as f: + path = f.name + yield path + # Cleanup + if os.path.exists(path): + os.unlink(path) + # Also clean up WAL files + for suffix in ["-wal", "-shm"]: + wal_path = path + suffix + if os.path.exists(wal_path): + os.unlink(wal_path) + + +@pytest.fixture +def sample_recipes() -> List[Dict]: + """Create sample recipe data.""" + return [ + { + "id": "recipe-001", + "file_path": "/path/to/image1.png", + "title": "Test Recipe 1", + "folder": "folder1", + "base_model": "SD1.5", + "fingerprint": "abc123", + "created_date": 1700000000.0, + "modified": 1700000100.0, + "favorite": True, + "repair_version": 3, + "preview_nsfw_level": 1, + "loras": [ + {"hash": "hash1", "file_name": "lora1", "strength": 0.8}, + {"hash": "hash2", "file_name": "lora2", "strength": 1.0}, + ], + "checkpoint": {"name": "model.safetensors", "hash": "cphash"}, + "gen_params": {"prompt": "test prompt", "negative_prompt": "bad"}, + "tags": ["tag1", "tag2"], + }, + { + "id": "recipe-002", + "file_path": "/path/to/image2.png", + "title": "Test Recipe 2", + "folder": "", + "base_model": "SDXL", + "fingerprint": "def456", + "created_date": 1700000200.0, + "modified": 1700000300.0, + "favorite": False, + "repair_version": 2, + "preview_nsfw_level": 0, + "loras": [{"hash": "hash3", "file_name": "lora3", "strength": 0.5}], + "gen_params": {"prompt": "another prompt"}, + "tags": [], + }, + ] + + +class TestPersistentRecipeCache: + """Tests for PersistentRecipeCache class.""" + + def test_init_creates_db(self, temp_db_path): + """Test that initialization creates the database.""" + cache = PersistentRecipeCache(db_path=temp_db_path) + assert cache.is_enabled() + assert os.path.exists(temp_db_path) + + def test_save_and_load_roundtrip(self, temp_db_path, sample_recipes): + """Test save and load cycle preserves data.""" + cache = PersistentRecipeCache(db_path=temp_db_path) + + # Save recipes + json_paths = { + "recipe-001": "/path/to/recipe-001.recipe.json", + "recipe-002": "/path/to/recipe-002.recipe.json", + } + cache.save_cache(sample_recipes, json_paths) + + # Load recipes + loaded = cache.load_cache() + assert loaded is not None + assert len(loaded.raw_data) == 2 + + # Verify first recipe + r1 = next(r for r in loaded.raw_data if r["id"] == "recipe-001") + assert r1["title"] == "Test Recipe 1" + assert r1["folder"] == "folder1" + assert r1["base_model"] == "SD1.5" + assert r1["fingerprint"] == "abc123" + assert r1["favorite"] is True + assert r1["repair_version"] == 3 + assert len(r1["loras"]) == 2 + assert r1["loras"][0]["hash"] == "hash1" + assert r1["checkpoint"]["name"] == "model.safetensors" + assert r1["gen_params"]["prompt"] == "test prompt" + assert r1["tags"] == ["tag1", "tag2"] + + # Verify second recipe + r2 = next(r for r in loaded.raw_data if r["id"] == "recipe-002") + assert r2["title"] == "Test Recipe 2" + assert r2["folder"] == "" + assert r2["favorite"] is False + + def test_empty_cache_returns_none(self, temp_db_path): + """Test that loading empty cache returns None.""" + cache = PersistentRecipeCache(db_path=temp_db_path) + loaded = cache.load_cache() + assert loaded is None + + def test_update_single_recipe(self, temp_db_path, sample_recipes): + """Test updating a single recipe.""" + cache = PersistentRecipeCache(db_path=temp_db_path) + cache.save_cache(sample_recipes) + + # Update a recipe + updated_recipe = dict(sample_recipes[0]) + updated_recipe["title"] = "Updated Title" + updated_recipe["favorite"] = False + cache.update_recipe(updated_recipe, "/path/to/recipe-001.recipe.json") + + # Load and verify + loaded = cache.load_cache() + r1 = next(r for r in loaded.raw_data if r["id"] == "recipe-001") + assert r1["title"] == "Updated Title" + assert r1["favorite"] is False + + def test_remove_recipe(self, temp_db_path, sample_recipes): + """Test removing a recipe.""" + cache = PersistentRecipeCache(db_path=temp_db_path) + cache.save_cache(sample_recipes) + + # Remove a recipe + cache.remove_recipe("recipe-001") + + # Load and verify + loaded = cache.load_cache() + assert len(loaded.raw_data) == 1 + assert loaded.raw_data[0]["id"] == "recipe-002" + + def test_get_indexed_recipe_ids(self, temp_db_path, sample_recipes): + """Test getting all indexed recipe IDs.""" + cache = PersistentRecipeCache(db_path=temp_db_path) + cache.save_cache(sample_recipes) + + ids = cache.get_indexed_recipe_ids() + assert ids == {"recipe-001", "recipe-002"} + + def test_get_recipe_count(self, temp_db_path, sample_recipes): + """Test getting recipe count.""" + cache = PersistentRecipeCache(db_path=temp_db_path) + assert cache.get_recipe_count() == 0 + + cache.save_cache(sample_recipes) + assert cache.get_recipe_count() == 2 + + cache.remove_recipe("recipe-001") + assert cache.get_recipe_count() == 1 + + def test_file_stats(self, temp_db_path, sample_recipes): + """Test file stats tracking.""" + cache = PersistentRecipeCache(db_path=temp_db_path) + + json_paths = { + "recipe-001": "/path/to/recipe-001.recipe.json", + "recipe-002": "/path/to/recipe-002.recipe.json", + } + cache.save_cache(sample_recipes, json_paths) + + stats = cache.get_file_stats() + # File stats will be (0.0, 0) since files don't exist + assert len(stats) == 2 + + def test_disabled_cache(self, temp_db_path, sample_recipes, monkeypatch): + """Test that disabled cache returns None.""" + monkeypatch.setenv("LORA_MANAGER_DISABLE_PERSISTENT_CACHE", "1") + + cache = PersistentRecipeCache(db_path=temp_db_path) + assert not cache.is_enabled() + cache.save_cache(sample_recipes) + assert cache.load_cache() is None + + def test_invalid_recipe_skipped(self, temp_db_path): + """Test that recipes without ID are skipped.""" + cache = PersistentRecipeCache(db_path=temp_db_path) + + recipes = [ + {"title": "No ID recipe"}, # Missing ID + {"id": "valid-001", "title": "Valid recipe"}, + ] + cache.save_cache(recipes) + + loaded = cache.load_cache() + assert len(loaded.raw_data) == 1 + assert loaded.raw_data[0]["id"] == "valid-001" + + def test_get_default_singleton(self, monkeypatch): + """Test singleton behavior.""" + # Use temp directory + with tempfile.TemporaryDirectory() as tmpdir: + monkeypatch.setenv("LORA_MANAGER_RECIPE_CACHE_DB", os.path.join(tmpdir, "test.sqlite")) + + PersistentRecipeCache.clear_instances() + cache1 = PersistentRecipeCache.get_default("test_lib") + cache2 = PersistentRecipeCache.get_default("test_lib") + assert cache1 is cache2 + + cache3 = PersistentRecipeCache.get_default("other_lib") + assert cache1 is not cache3 + + PersistentRecipeCache.clear_instances() + + def test_loras_json_handling(self, temp_db_path): + """Test that complex loras data is preserved.""" + cache = PersistentRecipeCache(db_path=temp_db_path) + + recipes = [ + { + "id": "complex-001", + "title": "Complex Loras", + "loras": [ + { + "hash": "abc123", + "file_name": "test_lora", + "strength": 0.75, + "modelVersionId": 12345, + "modelName": "Test Model", + "isDeleted": False, + }, + { + "hash": "def456", + "file_name": "another_lora", + "strength": 1.0, + "clip_strength": 0.8, + }, + ], + } + ] + cache.save_cache(recipes) + + loaded = cache.load_cache() + loras = loaded.raw_data[0]["loras"] + assert len(loras) == 2 + assert loras[0]["modelVersionId"] == 12345 + assert loras[1]["clip_strength"] == 0.8 diff --git a/tests/test_recipe_fts_index_validation.py b/tests/test_recipe_fts_index_validation.py new file mode 100644 index 00000000..5add49ba --- /dev/null +++ b/tests/test_recipe_fts_index_validation.py @@ -0,0 +1,183 @@ +"""Tests for RecipeFTSIndex validation methods.""" + +import os +import tempfile +from typing import Dict, List + +import pytest + +from py.services.recipe_fts_index import RecipeFTSIndex + + +@pytest.fixture +def temp_db_path(): + """Create a temporary database path.""" + with tempfile.NamedTemporaryFile(suffix=".sqlite", delete=False) as f: + path = f.name + yield path + # Cleanup + if os.path.exists(path): + os.unlink(path) + for suffix in ["-wal", "-shm"]: + wal_path = path + suffix + if os.path.exists(wal_path): + os.unlink(wal_path) + + +@pytest.fixture +def sample_recipes() -> List[Dict]: + """Create sample recipe data for FTS indexing.""" + return [ + { + "id": "recipe-001", + "title": "Anime Character Portrait", + "tags": ["anime", "portrait", "character"], + "loras": [ + {"file_name": "anime_style", "modelName": "Anime Style LoRA"}, + {"file_name": "character_v2", "modelName": "Character Design V2"}, + ], + "gen_params": { + "prompt": "masterpiece, best quality, 1girl", + "negative_prompt": "bad quality, worst quality", + }, + }, + { + "id": "recipe-002", + "title": "Landscape Photography", + "tags": ["landscape", "photography", "nature"], + "loras": [ + {"file_name": "landscape_lora", "modelName": "Landscape Enhancement"}, + ], + "gen_params": { + "prompt": "beautiful landscape, mountains, sunset", + "negative_prompt": "ugly, blurry", + }, + }, + { + "id": "recipe-003", + "title": "Fantasy Art Scene", + "tags": ["fantasy", "art"], + "loras": [], + "gen_params": { + "prompt": "fantasy world, dragons, magic", + }, + }, + ] + + +class TestFTSIndexValidation: + """Tests for FTS index validation methods.""" + + def test_validate_index_empty_returns_false(self, temp_db_path): + """Test that validation fails on empty index.""" + fts = RecipeFTSIndex(db_path=temp_db_path) + fts.initialize() + + # Empty index should not validate against non-empty recipe set + result = fts.validate_index(3, {"recipe-001", "recipe-002", "recipe-003"}) + assert result is False + + def test_validate_index_count_mismatch(self, temp_db_path, sample_recipes): + """Test validation fails when counts don't match.""" + fts = RecipeFTSIndex(db_path=temp_db_path) + fts.build_index(sample_recipes) + + # Validate with wrong count + result = fts.validate_index(5, {"recipe-001", "recipe-002", "recipe-003"}) + assert result is False + + def test_validate_index_id_mismatch(self, temp_db_path, sample_recipes): + """Test validation fails when IDs don't match.""" + fts = RecipeFTSIndex(db_path=temp_db_path) + fts.build_index(sample_recipes) + + # Validate with wrong IDs + result = fts.validate_index(3, {"recipe-001", "recipe-002", "recipe-999"}) + assert result is False + + def test_validate_index_success(self, temp_db_path, sample_recipes): + """Test successful validation.""" + fts = RecipeFTSIndex(db_path=temp_db_path) + fts.build_index(sample_recipes) + + # Validate with correct count and IDs + result = fts.validate_index(3, {"recipe-001", "recipe-002", "recipe-003"}) + assert result is True + + def test_get_indexed_recipe_ids(self, temp_db_path, sample_recipes): + """Test getting indexed recipe IDs.""" + fts = RecipeFTSIndex(db_path=temp_db_path) + fts.build_index(sample_recipes) + + ids = fts.get_indexed_recipe_ids() + assert ids == {"recipe-001", "recipe-002", "recipe-003"} + + def test_get_indexed_recipe_ids_empty(self, temp_db_path): + """Test getting IDs from empty index.""" + fts = RecipeFTSIndex(db_path=temp_db_path) + fts.initialize() + + ids = fts.get_indexed_recipe_ids() + assert ids == set() + + def test_validate_after_add_recipe(self, temp_db_path, sample_recipes): + """Test validation after adding a recipe.""" + fts = RecipeFTSIndex(db_path=temp_db_path) + fts.build_index(sample_recipes[:2]) # Only first 2 + + # Validation should fail with all 3 IDs + result = fts.validate_index(3, {"recipe-001", "recipe-002", "recipe-003"}) + assert result is False + + # Add third recipe + fts.add_recipe(sample_recipes[2]) + + # Now validation should pass + result = fts.validate_index(3, {"recipe-001", "recipe-002", "recipe-003"}) + assert result is True + + def test_validate_after_remove_recipe(self, temp_db_path, sample_recipes): + """Test validation after removing a recipe.""" + fts = RecipeFTSIndex(db_path=temp_db_path) + fts.build_index(sample_recipes) + + # Remove a recipe + fts.remove_recipe("recipe-002") + + # Validation should fail with original 3 IDs + result = fts.validate_index(3, {"recipe-001", "recipe-002", "recipe-003"}) + assert result is False + + # Validation should pass with 2 remaining IDs + result = fts.validate_index(2, {"recipe-001", "recipe-003"}) + assert result is True + + def test_validate_index_uninitialized(self, temp_db_path): + """Test validation on uninitialized index.""" + fts = RecipeFTSIndex(db_path=temp_db_path) + # Don't call initialize + + # Should initialize automatically and return False for non-empty set + result = fts.validate_index(1, {"recipe-001"}) + assert result is False + + def test_indexed_count_after_clear(self, temp_db_path, sample_recipes): + """Test count after clearing index.""" + fts = RecipeFTSIndex(db_path=temp_db_path) + fts.build_index(sample_recipes) + assert fts.get_indexed_count() == 3 + + fts.clear() + assert fts.get_indexed_count() == 0 + + def test_search_still_works_after_validation(self, temp_db_path, sample_recipes): + """Test that search works correctly after validation.""" + fts = RecipeFTSIndex(db_path=temp_db_path) + fts.build_index(sample_recipes) + + # Validate (which checks state) + fts.validate_index(3, {"recipe-001", "recipe-002", "recipe-003"}) + + # Search should still work + results = fts.search("anime") + assert "recipe-001" in results