Refactor search functionality in Lora and Recipe scanners to utilize fuzzy matching

- Introduced a new fuzzy_match utility function for improved search accuracy across Lora and Recipe scanners. - Updated search logic in LoraScanner and RecipeScanner to leverage fuzzy matching for titles, tags, and filenames, enhancing user experience. - Removed deprecated search methods to streamline the codebase and improve maintainability. - Adjusted API routes to ensure compatibility with the new search options, including recursive search handling.
2026-05-06 16:36:45 -03:00 · 2025-03-20 16:55:51 +08:00
parent 19ff2ebfe1
commit 607ab35cce
7 changed files with 98 additions and 106 deletions
--- a/py/services/lora_scanner.py
+++ b/py/services/lora_scanner.py
@@ -9,10 +9,10 @@ from operator import itemgetter
 from ..config import config
 from ..utils.file_utils import load_metadata, get_file_info
 from .lora_cache import LoraCache
-from difflib import SequenceMatcher
 from .lora_hash_index import LoraHashIndex
 from .settings_manager import settings
 from ..utils.constants import NSFW_LEVELS
+from ..utils.utils import fuzzy_match
 import sys

 logger = logging.getLogger(__name__)
@@ -132,45 +132,9 @@ class LoraScanner:
                folders=[]
            )

-    def fuzzy_match(self, text: str, pattern: str, threshold: float = 0.7) -> bool:
-        """
-        Check if text matches pattern using fuzzy matching.
-        Returns True if similarity ratio is above threshold.
-        """
-        if not pattern or not text:
-            return False
-        
-        # Convert both to lowercase for case-insensitive matching
-        text = text.lower()
-        pattern = pattern.lower()
-        
-        # Split pattern into words
-        search_words = pattern.split()
-        
-        # Check each word
-        for word in search_words:
-            # First check if word is a substring (faster)
-            if word in text:
-                continue
-            
-            # If not found as substring, try fuzzy matching
-            # Check if any part of the text matches this word
-            found_match = False
-            for text_part in text.split():
-                ratio = SequenceMatcher(None, text_part, word).ratio()
-                if ratio >= threshold:
-                    found_match = True
-                    break
-                
-            if not found_match:
-                return False
-        
-        # All words found either as substrings or fuzzy matches
-        return True
-
    async def get_paginated_data(self, page: int, page_size: int, sort_by: str = 'name', 
                               folder: str = None, search: str = None, fuzzy: bool = False,
-                               recursive: bool = False, base_models: list = None, tags: list = None,
+                               base_models: list = None, tags: list = None,
                               search_options: dict = None) -> Dict:
        """Get paginated and filtered lora data
        
@@ -181,10 +145,9 @@ class LoraScanner:
            folder: Filter by folder path
            search: Search term
            fuzzy: Use fuzzy matching for search
-            recursive: Include subfolders when folder filter is applied
            base_models: List of base models to filter by
            tags: List of tags to filter by
-            search_options: Dictionary with search options (filename, modelname, tags)
+            search_options: Dictionary with search options (filename, modelname, tags, recursive)
        """
        cache = await self.get_cached_data()

@@ -193,7 +156,8 @@ class LoraScanner:
            search_options = {
                'filename': True,
                'modelname': True,
-                'tags': False
+                'tags': False,
+                'recursive': False
            }

        # Get the base data set
@@ -208,7 +172,7 @@ class LoraScanner:
        
        # Apply folder filtering
        if folder is not None:
-            if recursive:
+            if search_options.get('recursive', False):
                # Recursive mode: match all paths starting with this folder
                filtered_data = [
                    item for item in filtered_data 
@@ -237,16 +201,47 @@ class LoraScanner:
        
        # Apply search filtering
        if search:
-            if fuzzy:
-                filtered_data = [
-                    item for item in filtered_data 
-                    if self._fuzzy_search_match(item, search, search_options)
-                ]
-            else:
-                filtered_data = [
-                    item for item in filtered_data 
-                    if self._exact_search_match(item, search, search_options)
-                ]
+            search_results = []
+            for item in filtered_data:
+                # Check filename if enabled
+                if search_options.get('filename', True):
+                    if fuzzy:
+                        if fuzzy_match(item.get('file_name', ''), search):
+                            search_results.append(item)
+                            continue
+                    else:
+                        if search.lower() in item.get('file_name', '').lower():
+                            search_results.append(item)
+                            continue
+                            
+                # Check model name if enabled
+                if search_options.get('modelname', True):
+                    if fuzzy:
+                        if fuzzy_match(item.get('model_name', ''), search):
+                            search_results.append(item)
+                            continue
+                    else:
+                        if search.lower() in item.get('model_name', '').lower():
+                            search_results.append(item)
+                            continue
+                            
+                # Check tags if enabled
+                if search_options.get('tags', False) and item.get('tags'):
+                    found_tag = False
+                    for tag in item['tags']:
+                        if fuzzy:
+                            if fuzzy_match(tag, search):
+                                found_tag = True
+                                break
+                        else:
+                            if search.lower() in tag.lower():
+                                found_tag = True
+                                break
+                    if found_tag:
+                        search_results.append(item)
+                        continue
+                        
+            filtered_data = search_results

        # Calculate pagination
        total_items = len(filtered_data)
@@ -263,44 +258,6 @@ class LoraScanner:
        
        return result

-    def _fuzzy_search_match(self, item: Dict, search: str, search_options: Dict) -> bool:
-        """Check if an item matches the search term using fuzzy matching with search options"""
-        # Check filename if enabled
-        if search_options.get('filename', True) and self.fuzzy_match(item.get('file_name', ''), search):
-            return True
-            
-        # Check model name if enabled
-        if search_options.get('modelname', True) and self.fuzzy_match(item.get('model_name', ''), search):
-            return True
-            
-        # Check tags if enabled
-        if search_options.get('tags', False) and item.get('tags'):
-            for tag in item['tags']:
-                if self.fuzzy_match(tag, search):
-                    return True
-                    
-        return False
-
-    def _exact_search_match(self, item: Dict, search: str, search_options: Dict) -> bool:
-        """Check if an item matches the search term using exact matching with search options"""
-        search = search.lower()
-        
-        # Check filename if enabled
-        if search_options.get('filename', True) and search in item.get('file_name', '').lower():
-            return True
-            
-        # Check model name if enabled
-        if search_options.get('modelname', True) and search in item.get('model_name', '').lower():
-            return True
-            
-        # Check tags if enabled
-        if search_options.get('tags', False) and item.get('tags'):
-            for tag in item['tags']:
-                if search in tag.lower():
-                    return True
-                    
-        return False
-
    def invalidate_cache(self):
        """Invalidate the current cache"""
        self._cache = None