perf(recipe_scanner): eliminate event loop blocking during cache rebuild

Refactor force_refresh path to use thread pool execution instead of blocking
the event loop shared with ComfyUI. Key changes:

- Fix 1: Route force_refresh through _initialize_recipe_cache_sync() in thread pool
- Fix 2: Add GIL release points (time.sleep(0)) every 100 files in sync loops
- Fix 3: Move RecipeCache.resort() to thread pool via run_in_executor
- Fix 4: Persist cache automatically after force_refresh
- Fix 5: Increase yield frequency in _enrich_cache_metadata (every recipe)

This eliminates the ~5 minute freeze when rebuilding 30K recipe cache.

Fixes performance issue where ComfyUI became unresponsive during recipe
scanning due to shared Python event loop blocking.
This commit is contained in:
Will Miao
2026-03-04 15:10:46 +08:00
parent 1ed503a6b5
commit 4d8113464c
3 changed files with 982 additions and 634 deletions

View File

@@ -4,6 +4,7 @@ from dataclasses import dataclass
from operator import itemgetter
from natsort import natsorted
@dataclass
class RecipeCache:
"""Cache structure for Recipe data"""
@@ -21,11 +22,18 @@ class RecipeCache:
self.folder_tree = self.folder_tree or {}
async def resort(self, name_only: bool = False):
"""Resort all cached data views"""
"""Resort all cached data views in a thread pool to avoid blocking the event loop."""
async with self._lock:
self._resort_locked(name_only=name_only)
loop = asyncio.get_event_loop()
await loop.run_in_executor(
None,
self._resort_locked,
name_only,
)
async def update_recipe_metadata(self, recipe_id: str, metadata: Dict, *, resort: bool = True) -> bool:
async def update_recipe_metadata(
self, recipe_id: str, metadata: Dict, *, resort: bool = True
) -> bool:
"""Update metadata for a specific recipe in all cached data
Args:
@@ -37,7 +45,7 @@ class RecipeCache:
"""
async with self._lock:
for item in self.raw_data:
if str(item.get('id')) == str(recipe_id):
if str(item.get("id")) == str(recipe_id):
item.update(metadata)
if resort:
self._resort_locked()
@@ -52,7 +60,9 @@ class RecipeCache:
if resort:
self._resort_locked()
async def remove_recipe(self, recipe_id: str, *, resort: bool = False) -> Optional[Dict]:
async def remove_recipe(
self, recipe_id: str, *, resort: bool = False
) -> Optional[Dict]:
"""Remove a recipe from the cache by ID.
Args:
@@ -64,14 +74,16 @@ class RecipeCache:
async with self._lock:
for index, recipe in enumerate(self.raw_data):
if str(recipe.get('id')) == str(recipe_id):
if str(recipe.get("id")) == str(recipe_id):
removed = self.raw_data.pop(index)
if resort:
self._resort_locked()
return removed
return None
async def bulk_remove(self, recipe_ids: Iterable[str], *, resort: bool = False) -> List[Dict]:
async def bulk_remove(
self, recipe_ids: Iterable[str], *, resort: bool = False
) -> List[Dict]:
"""Remove multiple recipes from the cache."""
id_set = {str(recipe_id) for recipe_id in recipe_ids}
@@ -79,21 +91,25 @@ class RecipeCache:
return []
async with self._lock:
removed = [item for item in self.raw_data if str(item.get('id')) in id_set]
removed = [item for item in self.raw_data if str(item.get("id")) in id_set]
if not removed:
return []
self.raw_data = [item for item in self.raw_data if str(item.get('id')) not in id_set]
self.raw_data = [
item for item in self.raw_data if str(item.get("id")) not in id_set
]
if resort:
self._resort_locked()
return removed
async def replace_recipe(self, recipe_id: str, new_data: Dict, *, resort: bool = False) -> bool:
async def replace_recipe(
self, recipe_id: str, new_data: Dict, *, resort: bool = False
) -> bool:
"""Replace cached data for a recipe."""
async with self._lock:
for index, recipe in enumerate(self.raw_data):
if str(recipe.get('id')) == str(recipe_id):
if str(recipe.get("id")) == str(recipe_id):
self.raw_data[index] = new_data
if resort:
self._resort_locked()
@@ -105,7 +121,7 @@ class RecipeCache:
async with self._lock:
for recipe in self.raw_data:
if str(recipe.get('id')) == str(recipe_id):
if str(recipe.get("id")) == str(recipe_id):
return dict(recipe)
return None
@@ -115,16 +131,13 @@ class RecipeCache:
async with self._lock:
return [dict(item) for item in self.raw_data]
def _resort_locked(self, *, name_only: bool = False) -> None:
def _resort_locked(self, name_only: bool = False) -> None:
"""Sort cached views. Caller must hold ``_lock``."""
self.sorted_by_name = natsorted(
self.raw_data,
key=lambda x: x.get('title', '').lower()
self.raw_data, key=lambda x: x.get("title", "").lower()
)
if not name_only:
self.sorted_by_date = sorted(
self.raw_data,
key=itemgetter('created_date', 'file_path'),
reverse=True
)
self.raw_data, key=itemgetter("created_date", "file_path"), reverse=True
)

File diff suppressed because it is too large Load Diff