From 86118d0654cf0d7ef060547353ef2cb52c832d5f Mon Sep 17 00:00:00 2001
From: Will Miao <willmiao.dev@gmail.com>
Date: Tue, 12 May 2026 20:39:09 +0800
Subject: [PATCH] fix(recipes): persist source_path in SQLite cache and
 eliminate source_url redundancy

- Add source_path column to PersistentRecipeCache SQLite schema with
  migration for existing databases (ALTER TABLE ADD COLUMN)
- Backfill source_path from recipe JSON files on first startup after
  migration to avoid requiring manual cache rebuild
- Remove all source_url recipe field references (import_remote_recipe,
  import_from_url, check_image_exists, enrichment, batch_import)
  and consolidate on source_path as the single source of truth
- Add civitai.green to supported Civitai page hosts
- Register check-image-exists and import-from-url recipe endpoints
---
 py/recipes/enrichment.py               |   6 +-
 py/routes/handlers/recipe_handlers.py  | 174 ++++++++++++++++++++++++-
 py/routes/recipe_route_registrar.py    |   4 +
 py/services/batch_import_service.py    |   2 +-
 py/services/persistent_recipe_cache.py |  11 ++
 py/services/recipe_scanner.py          |  33 +++++
 py/utils/civitai_utils.py              |   2 +-
 7 files changed, 221 insertions(+), 11 deletions(-)

diff --git a/py/recipes/enrichment.py b/py/recipes/enrichment.py
index 908548ab..2df0c8e4 100644
--- a/py/recipes/enrichment.py
+++ b/py/recipes/enrichment.py
@@ -36,14 +36,14 @@ class RecipeEnricher:
         civitai_meta = None
         model_version_id = None
         
-        source_url = recipe.get("source_url") or recipe.get("source_path", "")
+        source_path = recipe.get("source_path", "")
         
         # Check if it's a Civitai image URL
-        image_id = extract_civitai_image_id(str(source_url))
+        image_id = extract_civitai_image_id(str(source_path))
         if image_id:
             try:
                 image_info = await civitai_client.get_image_info(
-                    image_id, source_url=str(source_url)
+                    image_id, source_url=str(source_path)
                 )
                 if image_info:
                     # Handle nested meta often found in Civitai API responses
diff --git a/py/routes/handlers/recipe_handlers.py b/py/routes/handlers/recipe_handlers.py
index 40f703a3..2a7b778b 100644
--- a/py/routes/handlers/recipe_handlers.py
+++ b/py/routes/handlers/recipe_handlers.py
@@ -93,6 +93,8 @@ class RecipeHandlerSet:
             "cancel_batch_import": self.batch_import.cancel_batch_import,
             "start_directory_import": self.batch_import.start_directory_import,
             "browse_directory": self.batch_import.browse_directory,
+            "check_image_exists": self.management.check_image_exists,
+            "import_from_url": self.management.import_from_url,
         }
 
 
@@ -541,7 +543,7 @@ class RecipeQueryHandler:
                     )
                     response_data.append(
                         {
-                            "type": "source_url",
+                            "type": "source_path",
                             "fingerprint": url,
                             "count": len(recipes),
                             "recipes": recipes,
@@ -772,13 +774,9 @@ class RecipeManagementHandler:
                 "base_model": params.get("base_model", "") or "",
                 "loras": lora_entries,
                 "gen_params": gen_params_request or {},
-                "source_url": image_url,
+                "source_path": params.get("source_path") or image_url,
             }
 
-            source_path = params.get("source_path")
-            if source_path:
-                metadata["source_path"] = source_path
-
             # Checkpoint handling
             if checkpoint_entry:
                 metadata["checkpoint"] = checkpoint_entry
@@ -1289,6 +1287,170 @@ class RecipeManagementHandler:
 
         return ""
 
+    async def check_image_exists(self, request: web.Request) -> web.Response:
+        try:
+            await self._ensure_dependencies_ready()
+            recipe_scanner = self._recipe_scanner_getter()
+            if recipe_scanner is None:
+                raise RuntimeError("Recipe scanner unavailable")
+
+            image_ids_raw = request.query.get("image_ids", "")
+            if not image_ids_raw:
+                return web.json_response({"success": True, "results": {}})
+
+            requested_ids = set()
+            for raw in image_ids_raw.split(","):
+                stripped = raw.strip()
+                if stripped and stripped.isdigit():
+                    requested_ids.add(stripped)
+
+            if not requested_ids:
+                return web.json_response({"success": True, "results": {}})
+
+            cache = await recipe_scanner.get_cached_data()
+
+            # Build lookup: image_id -> recipe_id from stored source_path
+            image_to_recipe = {}
+            for recipe in getattr(cache, "raw_data", []):
+                source = recipe.get("source_path")
+                if not source:
+                    continue
+                image_id = extract_civitai_image_id(source)
+                if image_id and image_id not in image_to_recipe:
+                    image_to_recipe[image_id] = recipe.get("id")
+
+            results = {}
+            for img_id in requested_ids:
+                recipe_id = image_to_recipe.get(img_id)
+                results[img_id] = {
+                    "in_library": recipe_id is not None,
+                    "recipe_id": recipe_id,
+                }
+
+            return web.json_response({"success": True, "results": results})
+        except Exception as exc:
+            self._logger.error(
+                "Error checking image existence: %s", exc, exc_info=True
+            )
+            return web.json_response({"error": str(exc)}, status=500)
+
+    async def import_from_url(self, request: web.Request) -> web.Response:
+        try:
+            await self._ensure_dependencies_ready()
+            recipe_scanner = self._recipe_scanner_getter()
+            if recipe_scanner is None:
+                raise RuntimeError("Recipe scanner unavailable")
+
+            image_url = request.query.get("image_url")
+            if not image_url:
+                raise RecipeValidationError("Missing required field: image_url")
+
+            image_id = extract_civitai_image_id(image_url)
+            if not image_id:
+                raise RecipeValidationError(
+                    "Could not extract Civitai image ID from URL"
+                )
+
+            # Check for duplicate
+            cache = await recipe_scanner.get_cached_data()
+            for recipe in getattr(cache, "raw_data", []):
+                source = recipe.get("source_path")
+                if source:
+                    existing_id = extract_civitai_image_id(source)
+                    if existing_id == image_id:
+                        return web.json_response({
+                            "success": True,
+                            "recipe_id": recipe.get("id"),
+                            "name": recipe.get("title", ""),
+                            "already_exists": True,
+                        })
+
+            # Download image and extract metadata
+            image_bytes, extension, civitai_meta = (
+                await self._download_remote_media(image_url)
+            )
+
+            # Extract embedded EXIF metadata
+            embedded_gen_params = {}
+            try:
+                with tempfile.NamedTemporaryFile(
+                    suffix=extension, delete=False
+                ) as temp_img:
+                    temp_img.write(image_bytes)
+                    temp_img_path = temp_img.name
+
+                try:
+                    raw_embedded = ExifUtils.extract_image_metadata(temp_img_path)
+                    if raw_embedded:
+                        parser = (
+                            self._analysis_service._recipe_parser_factory.create_parser(
+                                raw_embedded
+                            )
+                        )
+                        if parser:
+                            parsed_embedded = await parser.parse_metadata(
+                                raw_embedded, recipe_scanner=recipe_scanner
+                            )
+                            if parsed_embedded and "gen_params" in parsed_embedded:
+                                embedded_gen_params = parsed_embedded["gen_params"]
+                finally:
+                    if os.path.exists(temp_img_path):
+                        os.unlink(temp_img_path)
+            except Exception as exc:
+                self._logger.warning(
+                    "Failed to extract embedded metadata: %s", exc
+                )
+
+            # Build metadata
+            metadata: Dict[str, Any] = {
+                "base_model": "",
+                "loras": [],
+                "gen_params": embedded_gen_params or {},
+                "source_path": image_url,
+            }
+
+            # Enrich via Civitai API
+            civitai_client = self._civitai_client_getter()
+            await RecipeEnricher.enrich_recipe(
+                recipe=metadata,
+                civitai_client=civitai_client,
+                request_params={},
+            )
+
+            # Auto-generate name from prompt or fallback
+            prompt = (
+                metadata.get("gen_params", {}).get("prompt")
+                or metadata.get("gen_params", {}).get("positivePrompt")
+                or ""
+            )
+            if prompt:
+                name = " ".join(str(prompt).split()[:10])
+            else:
+                name = f"Civitai Image {image_id}"
+
+            # Parse tags from params if available
+            tags = self._parse_tags(request.query.get("tags"))
+
+            result = await self._persistence_service.save_recipe(
+                recipe_scanner=recipe_scanner,
+                image_bytes=image_bytes,
+                image_base64=None,
+                name=name,
+                tags=tags,
+                metadata=metadata,
+                extension=extension,
+            )
+            return web.json_response(result.payload, status=result.status)
+        except RecipeValidationError as exc:
+            return web.json_response({"error": str(exc)}, status=400)
+        except RecipeDownloadError as exc:
+            return web.json_response({"error": str(exc)}, status=400)
+        except Exception as exc:
+            self._logger.error(
+                "Error importing recipe from URL: %s", exc, exc_info=True
+            )
+            return web.json_response({"error": str(exc)}, status=500)
+
 
 class RecipeAnalysisHandler:
     """Analyze images to extract recipe metadata."""
diff --git a/py/routes/recipe_route_registrar.py b/py/routes/recipe_route_registrar.py
index 95aedee5..b4c4bf05 100644
--- a/py/routes/recipe_route_registrar.py
+++ b/py/routes/recipe_route_registrar.py
@@ -70,6 +70,10 @@ ROUTE_DEFINITIONS: tuple[RouteDefinition, ...] = (
         "POST", "/api/lm/recipes/batch-import/directory", "start_directory_import"
     ),
     RouteDefinition("POST", "/api/lm/recipes/browse-directory", "browse_directory"),
+    RouteDefinition(
+        "GET", "/api/lm/recipes/check-image-exists", "check_image_exists"
+    ),
+    RouteDefinition("GET", "/api/lm/recipes/import-from-url", "import_from_url"),
 )
 
 
diff --git a/py/services/batch_import_service.py b/py/services/batch_import_service.py
index d501094e..976c8490 100644
--- a/py/services/batch_import_service.py
+++ b/py/services/batch_import_service.py
@@ -224,7 +224,7 @@ class BatchImportService:
                 return False
 
             for recipe in getattr(cache, "raw_data", []):
-                source_path = recipe.get("source_path") or recipe.get("source_url")
+                source_path = recipe.get("source_path")
                 if source_path and source_path == source:
                     return True
             return False
diff --git a/py/services/persistent_recipe_cache.py b/py/services/persistent_recipe_cache.py
index 438ff7ae..10f1dc7a 100644
--- a/py/services/persistent_recipe_cache.py
+++ b/py/services/persistent_recipe_cache.py
@@ -38,6 +38,7 @@ class PersistentRecipeCache:
         "json_path",
         "title",
         "folder",
+        "source_path",
         "base_model",
         "fingerprint",
         "created_date",
@@ -334,6 +335,7 @@ class PersistentRecipeCache:
                             json_path TEXT,
                             title TEXT,
                             folder TEXT,
+                            source_path TEXT,
                             base_model TEXT,
                             fingerprint TEXT,
                             created_date REAL,
@@ -358,6 +360,13 @@ class PersistentRecipeCache:
                         );
                         """
                     )
+                    # Migration: add source_path column to existing databases
+                    try:
+                        conn.execute(
+                            "ALTER TABLE recipes ADD COLUMN source_path TEXT"
+                        )
+                    except Exception:
+                        pass  # column already exists
                     conn.commit()
                 self._schema_initialized = True
             except Exception as exc:
@@ -406,6 +415,7 @@ class PersistentRecipeCache:
             json_path,
             recipe.get("title"),
             recipe.get("folder"),
+            recipe.get("source_path"),
             recipe.get("base_model"),
             recipe.get("fingerprint"),
             float(recipe.get("created_date") or 0.0),
@@ -456,6 +466,7 @@ class PersistentRecipeCache:
             "file_path": row["file_path"] or "",
             "title": row["title"] or "",
             "folder": row["folder"] or "",
+            "source_path": row["source_path"] or "",
             "base_model": row["base_model"] or "",
             "fingerprint": row["fingerprint"] or "",
             "created_date": row["created_date"] or 0.0,
diff --git a/py/services/recipe_scanner.py b/py/services/recipe_scanner.py
index 3797e5d9..ceeb8732 100644
--- a/py/services/recipe_scanner.py
+++ b/py/services/recipe_scanner.py
@@ -504,6 +504,9 @@ class RecipeScanner:
                     self._cache.raw_data = recipes
                     self._update_folder_metadata(self._cache)
                     self._sort_cache_sync()
+                    # Backfill source_path from JSON files if missing (schema migration)
+                    if self._backfill_source_path_if_needed(recipes, json_paths):
+                        self._persistent_cache.save_cache(recipes, json_paths)
                     return self._cache
                 else:
                     # Partial update: some files changed
@@ -514,6 +517,8 @@ class RecipeScanner:
                     self._cache.raw_data = recipes
                     self._update_folder_metadata(self._cache)
                     self._sort_cache_sync()
+                    # Backfill source_path from JSON files if missing (schema migration)
+                    self._backfill_source_path_if_needed(recipes, json_paths)
                     # Persist updated cache
                     self._persistent_cache.save_cache(recipes, json_paths)
                     return self._cache
@@ -642,6 +647,34 @@ class RecipeScanner:
 
         return recipes, changed, json_paths
 
+    def _backfill_source_path_if_needed(
+        self,
+        recipes: List[Dict],
+        json_paths: Dict[str, str],
+    ) -> bool:
+        """Backfill source_path from recipe JSON files if missing from cache.
+
+        Returns True if any recipes were updated (caller should persist cache).
+        """
+        updated = False
+        for recipe in recipes:
+            if recipe.get("source_path"):
+                continue
+            recipe_id = str(recipe.get("id", ""))
+            json_path = json_paths.get(recipe_id)
+            if not json_path or not os.path.exists(json_path):
+                continue
+            try:
+                with open(json_path, "r", encoding="utf-8") as f:
+                    json_data = json.load(f)
+                file_source_path = json_data.get("source_path")
+                if file_source_path:
+                    recipe["source_path"] = file_source_path
+                    updated = True
+            except Exception:
+                pass
+        return updated
+
     def _full_directory_scan_sync(
         self, recipes_dir: str
     ) -> Tuple[List[Dict], Dict[str, str]]:
diff --git a/py/utils/civitai_utils.py b/py/utils/civitai_utils.py
index 7c2c9d11..de711404 100644
--- a/py/utils/civitai_utils.py
+++ b/py/utils/civitai_utils.py
@@ -7,7 +7,7 @@ from typing import Any, Dict, Iterable, Mapping, Sequence
 from urllib.parse import parse_qs, urlparse, urlunparse
 
 
-_SUPPORTED_CIVITAI_PAGE_HOSTS = frozenset({"civitai.com", "civitai.red"})
+_SUPPORTED_CIVITAI_PAGE_HOSTS = frozenset({"civitai.com", "civitai.red", "civitai.green"})
 DEFAULT_CIVITAI_PAGE_HOST = "civitai.com"
 _DEFAULT_ALLOW_COMMERCIAL_USE: Sequence[str] = ("Sell",)
 _LICENSE_DEFAULTS: Dict[str, Any] = {