fix(recipes): detect duplicates for remote imports using modelVersionId and Civitai URL, #750

- Use modelVersionId as fallback for all loras in fingerprint calculation (not just deleted)
- Add URL-based duplicate detection using source_path field
- Combine both fingerprint and URL-based duplicate detection in API response
- Fix _download_remote_media return type and unbound variable issue
This commit is contained in:
Will Miao
2026-02-03 21:31:17 +08:00
parent 990a3527e4
commit c12aefa82a
5 changed files with 271 additions and 8 deletions

View File

@@ -412,10 +412,11 @@ class RecipeQueryHandler:
if recipe_scanner is None:
raise RuntimeError("Recipe scanner unavailable")
duplicate_groups = await recipe_scanner.find_all_duplicate_recipes()
fingerprint_groups = await recipe_scanner.find_all_duplicate_recipes()
url_groups = await recipe_scanner.find_duplicate_recipes_by_source()
response_data = []
for fingerprint, recipe_ids in duplicate_groups.items():
for fingerprint, recipe_ids in fingerprint_groups.items():
if len(recipe_ids) <= 1:
continue
@@ -439,12 +440,44 @@ class RecipeQueryHandler:
recipes.sort(key=lambda entry: entry.get("modified", 0), reverse=True)
response_data.append(
{
"type": "fingerprint",
"fingerprint": fingerprint,
"count": len(recipes),
"recipes": recipes,
}
)
for url, recipe_ids in url_groups.items():
if len(recipe_ids) <= 1:
continue
recipes = []
for recipe_id in recipe_ids:
recipe = await recipe_scanner.get_recipe_by_id(recipe_id)
if recipe:
recipes.append(
{
"id": recipe.get("id"),
"title": recipe.get("title"),
"file_url": recipe.get("file_url")
or self._format_recipe_file_url(recipe.get("file_path", "")),
"modified": recipe.get("modified"),
"created_date": recipe.get("created_date"),
"lora_count": len(recipe.get("loras", [])),
}
)
if len(recipes) >= 2:
recipes.sort(key=lambda entry: entry.get("modified", 0), reverse=True)
response_data.append(
{
"type": "source_url",
"fingerprint": url,
"count": len(recipes),
"recipes": recipes,
}
)
response_data.sort(key=lambda entry: entry["count"], reverse=True)
return web.json_response({"success": True, "duplicate_groups": response_data})
except Exception as exc:
@@ -1021,7 +1054,7 @@ class RecipeManagementHandler:
"exclude": False,
}
async def _download_remote_media(self, image_url: str) -> tuple[bytes, str]:
async def _download_remote_media(self, image_url: str) -> tuple[bytes, str, Any]:
civitai_client = self._civitai_client_getter()
downloader = await self._downloader_factory()
temp_path = None
@@ -1029,6 +1062,7 @@ class RecipeManagementHandler:
with tempfile.NamedTemporaryFile(delete=False) as temp_file:
temp_path = temp_file.name
download_url = image_url
image_info = None
civitai_match = re.match(r"https://civitai\.com/images/(\d+)", image_url)
if civitai_match:
if civitai_client is None:

View File

@@ -2231,3 +2231,26 @@ class RecipeScanner:
duplicate_groups = {k: v for k, v in fingerprint_groups.items() if len(v) > 1}
return duplicate_groups
async def find_duplicate_recipes_by_source(self) -> dict:
"""Find all recipe duplicates based on source_path (Civitai image URLs)
Returns:
Dictionary where keys are source URLs and values are lists of recipe IDs
"""
cache = await self.get_cached_data()
url_groups = {}
for recipe in cache.raw_data:
source_url = recipe.get('source_path', '').strip()
if not source_url:
continue
if source_url not in url_groups:
url_groups[source_url] = []
url_groups[source_url].append(recipe.get('id'))
duplicate_groups = {k: v for k, v in url_groups.items() if len(v) > 1}
return duplicate_groups

View File

@@ -138,19 +138,15 @@ def calculate_recipe_fingerprint(loras):
if not loras:
return ""
# Filter valid entries and extract hash and strength
valid_loras = []
for lora in loras:
# Skip excluded loras
if lora.get("exclude", False):
continue
# Get the hash - use modelVersionId as fallback if hash is empty
hash_value = lora.get("hash", "").lower()
if not hash_value and lora.get("isDeleted", False) and lora.get("modelVersionId"):
if not hash_value and lora.get("modelVersionId"):
hash_value = str(lora.get("modelVersionId"))
# Skip entries without a valid hash
if not hash_value:
continue