fix(recipes): detect duplicates for remote imports using modelVersionId and Civitai URL, #750

- Use modelVersionId as fallback for all loras in fingerprint calculation (not just deleted)
- Add URL-based duplicate detection using source_path field
- Combine both fingerprint and URL-based duplicate detection in API response
- Fix _download_remote_media return type and unbound variable issue
This commit is contained in:
Will Miao
2026-02-03 21:31:17 +08:00
parent 990a3527e4
commit c12aefa82a
5 changed files with 271 additions and 8 deletions

View File

@@ -0,0 +1,110 @@
"""Test for duplicate detection by source URL."""
import pytest
from unittest.mock import AsyncMock, MagicMock
@pytest.mark.asyncio
async def test_find_duplicate_recipes_by_source():
"""Test that duplicate recipes are detected by source URL."""
from py.services.recipe_scanner import RecipeScanner
scanner = MagicMock(spec=RecipeScanner)
scanner.get_cached_data = AsyncMock()
cache = MagicMock()
cache.raw_data = [
{
'id': '8705c972-ef08-47f3-8ac3-9ac3b8ff4c0b',
'source_path': 'https://civitai.com/images/119165946',
'title': 'Recipe 1'
},
{
'id': '52e636ce-ea9f-4f64-a6a9-c704bd715889',
'source_path': 'https://civitai.com/images/119165946',
'title': 'Recipe 2'
},
{
'id': '00000000-0000-0000-0000-000000000001',
'source_path': 'https://civitai.com/images/999999999',
'title': 'Recipe 3'
},
{
'id': '00000000-0000-0000-0000-000000000002',
'source_path': '',
'title': 'Recipe 4 (no source)'
},
]
scanner.get_cached_data.return_value = cache
# Call the actual method on the mocked scanner
from py.services.recipe_scanner import RecipeScanner as RealRecipeScanner
result = await RealRecipeScanner.find_duplicate_recipes_by_source(scanner)
assert len(result) == 1
assert 'https://civitai.com/images/119165946' in result
assert len(result['https://civitai.com/images/119165946']) == 2
assert '8705c972-ef08-47f3-8ac3-9ac3b8ff4c0b' in result['https://civitai.com/images/119165946']
assert '52e636ce-ea9f-4f64-a6a9-c704bd715889' in result['https://civitai.com/images/119165946']
@pytest.mark.asyncio
async def test_find_duplicate_recipes_by_source_empty():
"""Test that empty result is returned when no duplicates found."""
from py.services.recipe_scanner import RecipeScanner
scanner = MagicMock(spec=RecipeScanner)
scanner.get_cached_data = AsyncMock()
cache = MagicMock()
cache.raw_data = [
{
'id': '8705c972-ef08-47f3-8ac3-9ac3b8ff4c0b',
'source_path': 'https://civitai.com/images/119165946',
'title': 'Recipe 1'
},
{
'id': '00000000-0000-0000-0000-000000000002',
'source_path': '',
'title': 'Recipe 2 (no source)'
},
]
scanner.get_cached_data.return_value = cache
from py.services.recipe_scanner import RecipeScanner as RealRecipeScanner
result = await RealRecipeScanner.find_duplicate_recipes_by_source(scanner)
assert len(result) == 0
@pytest.mark.asyncio
async def test_find_duplicate_recipes_by_source_trimming_whitespace():
"""Test that whitespace is trimmed from source URLs."""
from py.services.recipe_scanner import RecipeScanner
scanner = MagicMock(spec=RecipeScanner)
scanner.get_cached_data = AsyncMock()
cache = MagicMock()
cache.raw_data = [
{
'id': '8705c972-ef08-47f3-8ac3-9ac3b8ff4c0b',
'source_path': 'https://civitai.com/images/119165946',
'title': 'Recipe 1'
},
{
'id': '52e636ce-ea9f-4f64-a6a9-c704bd715889',
'source_path': ' https://civitai.com/images/119165946 ',
'title': 'Recipe 2'
},
]
scanner.get_cached_data.return_value = cache
from py.services.recipe_scanner import RecipeScanner as RealRecipeScanner
result = await RealRecipeScanner.find_duplicate_recipes_by_source(scanner)
assert len(result) == 1
assert 'https://civitai.com/images/119165946' in result
assert len(result['https://civitai.com/images/119165946']) == 2

View File

@@ -0,0 +1,100 @@
"""Test for modelVersionId fallback in fingerprint calculation."""
import pytest
from py.utils.utils import calculate_recipe_fingerprint
def test_calculate_fingerprint_with_model_version_id_fallback():
"""Test that fingerprint uses modelVersionId when hash is empty, even when not deleted."""
loras = [
{
"hash": "",
"strength": 1.0,
"modelVersionId": 2639467,
"isDeleted": False,
"exclude": False
}
]
fingerprint = calculate_recipe_fingerprint(loras)
assert fingerprint == "2639467:1.0"
def test_calculate_fingerprint_with_multiple_model_version_ids():
"""Test fingerprint with multiple loras using modelVersionId fallback."""
loras = [
{
"hash": "",
"strength": 1.0,
"modelVersionId": 2639467,
"isDeleted": False,
"exclude": False
},
{
"hash": "",
"strength": 0.8,
"modelVersionId": 1234567,
"isDeleted": False,
"exclude": False
}
]
fingerprint = calculate_recipe_fingerprint(loras)
assert fingerprint == "1234567:0.8|2639467:1.0"
def test_calculate_fingerprint_with_deleted_lora():
"""Test that deleted loras with modelVersionId are still included."""
loras = [
{
"hash": "",
"strength": 1.0,
"modelVersionId": 2639467,
"isDeleted": True,
"exclude": False
}
]
fingerprint = calculate_recipe_fingerprint(loras)
assert fingerprint == "2639467:1.0"
def test_calculate_fingerprint_with_excluded_lora():
"""Test that excluded loras are skipped even with modelVersionId."""
loras = [
{
"hash": "",
"strength": 1.0,
"modelVersionId": 2639467,
"isDeleted": False,
"exclude": True
}
]
fingerprint = calculate_recipe_fingerprint(loras)
assert fingerprint == ""
def test_calculate_fingerprint_prefers_hash_over_version_id():
"""Test that hash is used even when modelVersionId is present."""
loras = [
{
"hash": "abc123",
"strength": 1.0,
"modelVersionId": 2639467,
"isDeleted": False,
"exclude": False
}
]
fingerprint = calculate_recipe_fingerprint(loras)
assert fingerprint == "abc123:1.0"
def test_calculate_fingerprint_without_hash_or_version_id():
"""Test that loras without hash or modelVersionId are skipped."""
loras = [
{
"hash": "",
"strength": 1.0,
"modelVersionId": 0,
"isDeleted": False,
"exclude": False
}
]
fingerprint = calculate_recipe_fingerprint(loras)
assert fingerprint == ""