From 86118d0654cf0d7ef060547353ef2cb52c832d5f Mon Sep 17 00:00:00 2001 From: Will Miao Date: Tue, 12 May 2026 20:39:09 +0800 Subject: [PATCH] fix(recipes): persist source_path in SQLite cache and eliminate source_url redundancy - Add source_path column to PersistentRecipeCache SQLite schema with migration for existing databases (ALTER TABLE ADD COLUMN) - Backfill source_path from recipe JSON files on first startup after migration to avoid requiring manual cache rebuild - Remove all source_url recipe field references (import_remote_recipe, import_from_url, check_image_exists, enrichment, batch_import) and consolidate on source_path as the single source of truth - Add civitai.green to supported Civitai page hosts - Register check-image-exists and import-from-url recipe endpoints --- py/recipes/enrichment.py | 6 +- py/routes/handlers/recipe_handlers.py | 174 ++++++++++++++++++++++++- py/routes/recipe_route_registrar.py | 4 + py/services/batch_import_service.py | 2 +- py/services/persistent_recipe_cache.py | 11 ++ py/services/recipe_scanner.py | 33 +++++ py/utils/civitai_utils.py | 2 +- 7 files changed, 221 insertions(+), 11 deletions(-) diff --git a/py/recipes/enrichment.py b/py/recipes/enrichment.py index 908548ab..2df0c8e4 100644 --- a/py/recipes/enrichment.py +++ b/py/recipes/enrichment.py @@ -36,14 +36,14 @@ class RecipeEnricher: civitai_meta = None model_version_id = None - source_url = recipe.get("source_url") or recipe.get("source_path", "") + source_path = recipe.get("source_path", "") # Check if it's a Civitai image URL - image_id = extract_civitai_image_id(str(source_url)) + image_id = extract_civitai_image_id(str(source_path)) if image_id: try: image_info = await civitai_client.get_image_info( - image_id, source_url=str(source_url) + image_id, source_url=str(source_path) ) if image_info: # Handle nested meta often found in Civitai API responses diff --git a/py/routes/handlers/recipe_handlers.py b/py/routes/handlers/recipe_handlers.py index 40f703a3..2a7b778b 100644 --- a/py/routes/handlers/recipe_handlers.py +++ b/py/routes/handlers/recipe_handlers.py @@ -93,6 +93,8 @@ class RecipeHandlerSet: "cancel_batch_import": self.batch_import.cancel_batch_import, "start_directory_import": self.batch_import.start_directory_import, "browse_directory": self.batch_import.browse_directory, + "check_image_exists": self.management.check_image_exists, + "import_from_url": self.management.import_from_url, } @@ -541,7 +543,7 @@ class RecipeQueryHandler: ) response_data.append( { - "type": "source_url", + "type": "source_path", "fingerprint": url, "count": len(recipes), "recipes": recipes, @@ -772,13 +774,9 @@ class RecipeManagementHandler: "base_model": params.get("base_model", "") or "", "loras": lora_entries, "gen_params": gen_params_request or {}, - "source_url": image_url, + "source_path": params.get("source_path") or image_url, } - source_path = params.get("source_path") - if source_path: - metadata["source_path"] = source_path - # Checkpoint handling if checkpoint_entry: metadata["checkpoint"] = checkpoint_entry @@ -1289,6 +1287,170 @@ class RecipeManagementHandler: return "" + async def check_image_exists(self, request: web.Request) -> web.Response: + try: + await self._ensure_dependencies_ready() + recipe_scanner = self._recipe_scanner_getter() + if recipe_scanner is None: + raise RuntimeError("Recipe scanner unavailable") + + image_ids_raw = request.query.get("image_ids", "") + if not image_ids_raw: + return web.json_response({"success": True, "results": {}}) + + requested_ids = set() + for raw in image_ids_raw.split(","): + stripped = raw.strip() + if stripped and stripped.isdigit(): + requested_ids.add(stripped) + + if not requested_ids: + return web.json_response({"success": True, "results": {}}) + + cache = await recipe_scanner.get_cached_data() + + # Build lookup: image_id -> recipe_id from stored source_path + image_to_recipe = {} + for recipe in getattr(cache, "raw_data", []): + source = recipe.get("source_path") + if not source: + continue + image_id = extract_civitai_image_id(source) + if image_id and image_id not in image_to_recipe: + image_to_recipe[image_id] = recipe.get("id") + + results = {} + for img_id in requested_ids: + recipe_id = image_to_recipe.get(img_id) + results[img_id] = { + "in_library": recipe_id is not None, + "recipe_id": recipe_id, + } + + return web.json_response({"success": True, "results": results}) + except Exception as exc: + self._logger.error( + "Error checking image existence: %s", exc, exc_info=True + ) + return web.json_response({"error": str(exc)}, status=500) + + async def import_from_url(self, request: web.Request) -> web.Response: + try: + await self._ensure_dependencies_ready() + recipe_scanner = self._recipe_scanner_getter() + if recipe_scanner is None: + raise RuntimeError("Recipe scanner unavailable") + + image_url = request.query.get("image_url") + if not image_url: + raise RecipeValidationError("Missing required field: image_url") + + image_id = extract_civitai_image_id(image_url) + if not image_id: + raise RecipeValidationError( + "Could not extract Civitai image ID from URL" + ) + + # Check for duplicate + cache = await recipe_scanner.get_cached_data() + for recipe in getattr(cache, "raw_data", []): + source = recipe.get("source_path") + if source: + existing_id = extract_civitai_image_id(source) + if existing_id == image_id: + return web.json_response({ + "success": True, + "recipe_id": recipe.get("id"), + "name": recipe.get("title", ""), + "already_exists": True, + }) + + # Download image and extract metadata + image_bytes, extension, civitai_meta = ( + await self._download_remote_media(image_url) + ) + + # Extract embedded EXIF metadata + embedded_gen_params = {} + try: + with tempfile.NamedTemporaryFile( + suffix=extension, delete=False + ) as temp_img: + temp_img.write(image_bytes) + temp_img_path = temp_img.name + + try: + raw_embedded = ExifUtils.extract_image_metadata(temp_img_path) + if raw_embedded: + parser = ( + self._analysis_service._recipe_parser_factory.create_parser( + raw_embedded + ) + ) + if parser: + parsed_embedded = await parser.parse_metadata( + raw_embedded, recipe_scanner=recipe_scanner + ) + if parsed_embedded and "gen_params" in parsed_embedded: + embedded_gen_params = parsed_embedded["gen_params"] + finally: + if os.path.exists(temp_img_path): + os.unlink(temp_img_path) + except Exception as exc: + self._logger.warning( + "Failed to extract embedded metadata: %s", exc + ) + + # Build metadata + metadata: Dict[str, Any] = { + "base_model": "", + "loras": [], + "gen_params": embedded_gen_params or {}, + "source_path": image_url, + } + + # Enrich via Civitai API + civitai_client = self._civitai_client_getter() + await RecipeEnricher.enrich_recipe( + recipe=metadata, + civitai_client=civitai_client, + request_params={}, + ) + + # Auto-generate name from prompt or fallback + prompt = ( + metadata.get("gen_params", {}).get("prompt") + or metadata.get("gen_params", {}).get("positivePrompt") + or "" + ) + if prompt: + name = " ".join(str(prompt).split()[:10]) + else: + name = f"Civitai Image {image_id}" + + # Parse tags from params if available + tags = self._parse_tags(request.query.get("tags")) + + result = await self._persistence_service.save_recipe( + recipe_scanner=recipe_scanner, + image_bytes=image_bytes, + image_base64=None, + name=name, + tags=tags, + metadata=metadata, + extension=extension, + ) + return web.json_response(result.payload, status=result.status) + except RecipeValidationError as exc: + return web.json_response({"error": str(exc)}, status=400) + except RecipeDownloadError as exc: + return web.json_response({"error": str(exc)}, status=400) + except Exception as exc: + self._logger.error( + "Error importing recipe from URL: %s", exc, exc_info=True + ) + return web.json_response({"error": str(exc)}, status=500) + class RecipeAnalysisHandler: """Analyze images to extract recipe metadata.""" diff --git a/py/routes/recipe_route_registrar.py b/py/routes/recipe_route_registrar.py index 95aedee5..b4c4bf05 100644 --- a/py/routes/recipe_route_registrar.py +++ b/py/routes/recipe_route_registrar.py @@ -70,6 +70,10 @@ ROUTE_DEFINITIONS: tuple[RouteDefinition, ...] = ( "POST", "/api/lm/recipes/batch-import/directory", "start_directory_import" ), RouteDefinition("POST", "/api/lm/recipes/browse-directory", "browse_directory"), + RouteDefinition( + "GET", "/api/lm/recipes/check-image-exists", "check_image_exists" + ), + RouteDefinition("GET", "/api/lm/recipes/import-from-url", "import_from_url"), ) diff --git a/py/services/batch_import_service.py b/py/services/batch_import_service.py index d501094e..976c8490 100644 --- a/py/services/batch_import_service.py +++ b/py/services/batch_import_service.py @@ -224,7 +224,7 @@ class BatchImportService: return False for recipe in getattr(cache, "raw_data", []): - source_path = recipe.get("source_path") or recipe.get("source_url") + source_path = recipe.get("source_path") if source_path and source_path == source: return True return False diff --git a/py/services/persistent_recipe_cache.py b/py/services/persistent_recipe_cache.py index 438ff7ae..10f1dc7a 100644 --- a/py/services/persistent_recipe_cache.py +++ b/py/services/persistent_recipe_cache.py @@ -38,6 +38,7 @@ class PersistentRecipeCache: "json_path", "title", "folder", + "source_path", "base_model", "fingerprint", "created_date", @@ -334,6 +335,7 @@ class PersistentRecipeCache: json_path TEXT, title TEXT, folder TEXT, + source_path TEXT, base_model TEXT, fingerprint TEXT, created_date REAL, @@ -358,6 +360,13 @@ class PersistentRecipeCache: ); """ ) + # Migration: add source_path column to existing databases + try: + conn.execute( + "ALTER TABLE recipes ADD COLUMN source_path TEXT" + ) + except Exception: + pass # column already exists conn.commit() self._schema_initialized = True except Exception as exc: @@ -406,6 +415,7 @@ class PersistentRecipeCache: json_path, recipe.get("title"), recipe.get("folder"), + recipe.get("source_path"), recipe.get("base_model"), recipe.get("fingerprint"), float(recipe.get("created_date") or 0.0), @@ -456,6 +466,7 @@ class PersistentRecipeCache: "file_path": row["file_path"] or "", "title": row["title"] or "", "folder": row["folder"] or "", + "source_path": row["source_path"] or "", "base_model": row["base_model"] or "", "fingerprint": row["fingerprint"] or "", "created_date": row["created_date"] or 0.0, diff --git a/py/services/recipe_scanner.py b/py/services/recipe_scanner.py index 3797e5d9..ceeb8732 100644 --- a/py/services/recipe_scanner.py +++ b/py/services/recipe_scanner.py @@ -504,6 +504,9 @@ class RecipeScanner: self._cache.raw_data = recipes self._update_folder_metadata(self._cache) self._sort_cache_sync() + # Backfill source_path from JSON files if missing (schema migration) + if self._backfill_source_path_if_needed(recipes, json_paths): + self._persistent_cache.save_cache(recipes, json_paths) return self._cache else: # Partial update: some files changed @@ -514,6 +517,8 @@ class RecipeScanner: self._cache.raw_data = recipes self._update_folder_metadata(self._cache) self._sort_cache_sync() + # Backfill source_path from JSON files if missing (schema migration) + self._backfill_source_path_if_needed(recipes, json_paths) # Persist updated cache self._persistent_cache.save_cache(recipes, json_paths) return self._cache @@ -642,6 +647,34 @@ class RecipeScanner: return recipes, changed, json_paths + def _backfill_source_path_if_needed( + self, + recipes: List[Dict], + json_paths: Dict[str, str], + ) -> bool: + """Backfill source_path from recipe JSON files if missing from cache. + + Returns True if any recipes were updated (caller should persist cache). + """ + updated = False + for recipe in recipes: + if recipe.get("source_path"): + continue + recipe_id = str(recipe.get("id", "")) + json_path = json_paths.get(recipe_id) + if not json_path or not os.path.exists(json_path): + continue + try: + with open(json_path, "r", encoding="utf-8") as f: + json_data = json.load(f) + file_source_path = json_data.get("source_path") + if file_source_path: + recipe["source_path"] = file_source_path + updated = True + except Exception: + pass + return updated + def _full_directory_scan_sync( self, recipes_dir: str ) -> Tuple[List[Dict], Dict[str, str]]: diff --git a/py/utils/civitai_utils.py b/py/utils/civitai_utils.py index 7c2c9d11..de711404 100644 --- a/py/utils/civitai_utils.py +++ b/py/utils/civitai_utils.py @@ -7,7 +7,7 @@ from typing import Any, Dict, Iterable, Mapping, Sequence from urllib.parse import parse_qs, urlparse, urlunparse -_SUPPORTED_CIVITAI_PAGE_HOSTS = frozenset({"civitai.com", "civitai.red"}) +_SUPPORTED_CIVITAI_PAGE_HOSTS = frozenset({"civitai.com", "civitai.red", "civitai.green"}) DEFAULT_CIVITAI_PAGE_HOST = "civitai.com" _DEFAULT_ALLOW_COMMERCIAL_USE: Sequence[str] = ("Sell",) _LICENSE_DEFAULTS: Dict[str, Any] = {