checkpoint

2026-06-20 17:32:05 -03:00 · 2025-04-10 09:08:36 +08:00
parent 64c9e4aeca
commit 8fdfb68741
12 changed files with 1397 additions and 484 deletions
--- a/py/services/lora_scanner.py
+++ b/py/services/lora_scanner.py
@@ -4,13 +4,11 @@ import logging
 import asyncio
 import shutil
 import time
-from typing import List, Dict, Optional
+from typing import List, Dict, Optional, Set

 from ..utils.models import LoraMetadata
 from ..config import config
-from ..utils.file_utils import load_metadata, get_file_info, normalize_path, find_preview_file, save_metadata
-from ..utils.lora_metadata import extract_lora_metadata
-from .lora_cache import LoraCache
+from .model_scanner import ModelScanner
 from .lora_hash_index import LoraHashIndex
 from .settings_manager import settings
 from ..utils.constants import NSFW_LEVELS
@@ -19,7 +17,7 @@ import sys

 logger = logging.getLogger(__name__)

-class LoraScanner:
+class LoraScanner(ModelScanner):
    """Service for scanning and managing LoRA files"""
    
    _instance = None
@@ -31,20 +29,20 @@ class LoraScanner:
        return cls._instance
    
    def __init__(self):
-        # 确保初始化只执行一次
+        # Ensure initialization happens only once
        if not hasattr(self, '_initialized'):
-            self._cache: Optional[LoraCache] = None
-            self._hash_index = LoraHashIndex()
-            self._initialization_lock = asyncio.Lock()
-            self._initialization_task: Optional[asyncio.Task] = None
+            # Define supported file extensions
+            file_extensions = {'.safetensors'}
+            
+            # Initialize parent class
+            super().__init__(
+                model_type="lora",
+                model_class=LoraMetadata, 
+                file_extensions=file_extensions,
+                hash_index=LoraHashIndex()
+            )
            self._initialized = True
-            self.file_monitor = None  # Add this line
-            self._tags_count = {}  # Add a dictionary to store tag counts
-
-    def set_file_monitor(self, monitor):
-        """Set file monitor instance"""
-        self.file_monitor = monitor
-
+    
    @classmethod
    async def get_instance(cls):
        """Get singleton instance with async support"""
@@ -52,89 +50,74 @@ class LoraScanner:
            if cls._instance is None:
                cls._instance = cls()
            return cls._instance
-
-    async def  get_cached_data(self, force_refresh: bool = False) -> LoraCache:
-        """Get cached LoRA data, refresh if needed"""
-        async with self._initialization_lock:
+    
+    def get_model_roots(self) -> List[str]:
+        """Get lora root directories"""
+        return config.loras_roots
+        
+    async def scan_all_models(self) -> List[Dict]:
+        """Scan all LoRA directories and return metadata"""
+        all_loras = []
+        
+        # Create scan tasks for each directory
+        scan_tasks = []
+        for lora_root in self.get_model_roots():
+            task = asyncio.create_task(self._scan_directory(lora_root))
+            scan_tasks.append(task)
            
-            # 如果缓存未初始化但需要响应请求，返回空缓存
-            if self._cache is None and not force_refresh:
-                return LoraCache(
-                    raw_data=[],
-                    sorted_by_name=[],
-                    sorted_by_date=[],
-                    folders=[]
-                )
-
-            # 如果正在初始化，等待完成
-            if self._initialization_task and not self._initialization_task.done():
-                try:
-                    await self._initialization_task
-                except Exception as e:
-                    logger.error(f"Cache initialization failed: {e}")
-                    self._initialization_task = None
-            
-            if (self._cache is None or force_refresh):
+        # Wait for all tasks to complete
+        for task in scan_tasks:
+            try:
+                loras = await task
+                all_loras.extend(loras)
+            except Exception as e:
+                logger.error(f"Error scanning directory: {e}")
                
-                # 创建新的初始化任务
-                if not self._initialization_task or self._initialization_task.done():
-                    self._initialization_task = asyncio.create_task(self._initialize_cache())
+        return all_loras
+    
+    async def _scan_directory(self, root_path: str) -> List[Dict]:
+        """Scan a single directory for LoRA files"""
+        loras = []
+        original_root = root_path  # Save original root path
+        
+        async def scan_recursive(path: str, visited_paths: set):
+            """Recursively scan directory, avoiding circular symlinks"""
+            try:
+                real_path = os.path.realpath(path)
+                if real_path in visited_paths:
+                    logger.debug(f"Skipping already visited path: {path}")
+                    return
+                visited_paths.add(real_path)
                
-                try:
-                    await self._initialization_task
-                except Exception as e:
-                    logger.error(f"Cache initialization failed: {e}")
-                    # 如果缓存已存在，继续使用旧缓存
-                    if self._cache is None:
-                        raise  # 如果没有缓存，则抛出异常
-            
-            return self._cache
+                with os.scandir(path) as it:
+                    entries = list(it)
+                    for entry in entries:
+                        try:
+                            if entry.is_file(follow_symlinks=True) and any(entry.name.endswith(ext) for ext in self.file_extensions):
+                                # Use original path instead of real path
+                                file_path = entry.path.replace(os.sep, "/")
+                                await self._process_single_file(file_path, original_root, loras)
+                                await asyncio.sleep(0)
+                            elif entry.is_dir(follow_symlinks=True):
+                                # For directories, continue scanning with original path
+                                await scan_recursive(entry.path, visited_paths)
+                        except Exception as e:
+                            logger.error(f"Error processing entry {entry.path}: {e}")
+            except Exception as e:
+                logger.error(f"Error scanning {path}: {e}")

-    async def _initialize_cache(self) -> None:
-        """Initialize or refresh the cache"""
+        await scan_recursive(root_path, set())
+        return loras
+
+    async def _process_single_file(self, file_path: str, root_path: str, loras: list):
+        """Process a single file and add to results list"""
        try:
-            start_time = time.time()
-            # Clear existing hash index
-            self._hash_index.clear()
-            
-            # Clear existing tags count
-            self._tags_count = {}
-            
-            # Scan for new data
-            raw_data = await self.scan_all_loras()
-            
-            # Build hash index and tags count
-            for lora_data in raw_data:
-                if 'sha256' in lora_data and 'file_path' in lora_data:
-                    self._hash_index.add_entry(lora_data['sha256'].lower(), lora_data['file_path'])
-                
-                # Count tags
-                if 'tags' in lora_data and lora_data['tags']:
-                    for tag in lora_data['tags']:
-                        self._tags_count[tag] = self._tags_count.get(tag, 0) + 1
-            
-            # Update cache
-            self._cache = LoraCache(
-                raw_data=raw_data,
-                sorted_by_name=[],
-                sorted_by_date=[],
-                folders=[]
-            )
-            
-            # Call resort_cache to create sorted views
-            await self._cache.resort()
-
-            self._initialization_task = None
-            logger.info(f"LoRA Manager: Cache initialization completed in {time.time() - start_time:.2f} seconds, found {len(raw_data)} loras")
+            result = await self._process_model_file(file_path, root_path)
+            if result:
+                loras.append(result)
        except Exception as e:
-            logger.error(f"LoRA Manager: Error initializing cache: {e}")
-            self._cache = LoraCache(
-                raw_data=[],
-                sorted_by_name=[],
-                sorted_by_date=[],
-                folders=[]
-            )
-
+            logger.error(f"Error processing {file_path}: {e}")
+    
    async def get_paginated_data(self, page: int, page_size: int, sort_by: str = 'name', 
                               folder: str = None, search: str = None, fuzzy_search: bool = False,
                               base_models: list = None, tags: list = None,
@@ -280,240 +263,14 @@ class LoraScanner:
        
        return result

-    def invalidate_cache(self):
-        """Invalidate the current cache"""
-        self._cache = None
-
-    async def scan_all_loras(self) -> List[Dict]:
-        """Scan all LoRA directories and return metadata"""
-        all_loras = []
-        
-        # 分目录异步扫描
-        scan_tasks = []
-        for loras_root in config.loras_roots:
-            task = asyncio.create_task(self._scan_directory(loras_root))
-            scan_tasks.append(task)
-            
-        for task in scan_tasks:
-            try:
-                loras = await task
-                all_loras.extend(loras)
-            except Exception as e:
-                logger.error(f"Error scanning directory: {e}")
-                
-        return all_loras
-
-    async def _scan_directory(self, root_path: str) -> List[Dict]:
-        """Scan a single directory for LoRA files"""
-        loras = []
-        original_root = root_path  # 保存原始根路径
-        
-        async def scan_recursive(path: str, visited_paths: set):
-            """递归扫描目录，避免循环链接"""
-            try:
-                real_path = os.path.realpath(path)
-                if real_path in visited_paths:
-                    logger.debug(f"Skipping already visited path: {path}")
-                    return
-                visited_paths.add(real_path)
-                
-                with os.scandir(path) as it:
-                    entries = list(it)
-                    for entry in entries:
-                        try:
-                            if entry.is_file(follow_symlinks=True) and entry.name.endswith('.safetensors'):
-                                # 使用原始路径而不是真实路径
-                                file_path = entry.path.replace(os.sep, "/")
-                                await self._process_single_file(file_path, original_root, loras)
-                                await asyncio.sleep(0)
-                            elif entry.is_dir(follow_symlinks=True):
-                                # 对于目录，使用原始路径继续扫描
-                                await scan_recursive(entry.path, visited_paths)
-                        except Exception as e:
-                            logger.error(f"Error processing entry {entry.path}: {e}")
-            except Exception as e:
-                logger.error(f"Error scanning {path}: {e}")
-
-        await scan_recursive(root_path, set())
-        return loras
-
-    async def _process_single_file(self, file_path: str, root_path: str, loras: list):
-        """处理单个文件并添加到结果列表"""
-        try:
-            result = await self._process_lora_file(file_path, root_path)
-            if result:
-                loras.append(result)
-        except Exception as e:
-            logger.error(f"Error processing {file_path}: {e}")
-
-    async def _process_lora_file(self, file_path: str, root_path: str) -> Dict:
-        """Process a single LoRA file and return its metadata"""
-        # Try loading existing metadata
-        metadata = await load_metadata(file_path)
-        
-        if metadata is None:
-            # Try to find and use .civitai.info file first
-            civitai_info_path = f"{os.path.splitext(file_path)[0]}.civitai.info"
-            if os.path.exists(civitai_info_path):
-                try:
-                    with open(civitai_info_path, 'r', encoding='utf-8') as f:
-                        version_info = json.load(f)
-                    
-                    file_info = next((f for f in version_info.get('files', []) if f.get('primary')), None)
-                    if file_info:
-                        # Create a minimal file_info with the required fields
-                        file_name = os.path.splitext(os.path.basename(file_path))[0]
-                        file_info['name'] = file_name
-                    
-                        # Use from_civitai_info to create metadata
-                        metadata = LoraMetadata.from_civitai_info(version_info, file_info, file_path)
-                        metadata.preview_url = find_preview_file(file_name, os.path.dirname(file_path))
-                        await save_metadata(file_path, metadata)
-                        logger.debug(f"Created metadata from .civitai.info for {file_path}")
-                except Exception as e:
-                    logger.error(f"Error creating metadata from .civitai.info for {file_path}: {e}")
-            
-            # If still no metadata, create new metadata using get_file_info
-            if metadata is None:
-                metadata = await get_file_info(file_path)
-        
-        # Convert to dict and add folder info
-        lora_data = metadata.to_dict()
-        # Try to fetch missing metadata from Civitai if needed
-        await self._fetch_missing_metadata(file_path, lora_data)
-        rel_path = os.path.relpath(file_path, root_path)
-        folder = os.path.dirname(rel_path)
-        lora_data['folder'] = folder.replace(os.path.sep, '/')
-        
-        return lora_data
-            
-    async def _fetch_missing_metadata(self, file_path: str, lora_data: Dict) -> None:
-        """Fetch missing description and tags from Civitai if needed
-        
-        Args:
-            file_path: Path to the lora file
-            lora_data: Lora metadata dictionary to update
-        """
-        try:
-            # Skip if already marked as deleted on Civitai
-            if lora_data.get('civitai_deleted', False):
-                logger.debug(f"Skipping metadata fetch for {file_path}: marked as deleted on Civitai")
-                return
-
-            # Check if we need to fetch additional metadata from Civitai
-            needs_metadata_update = False
-            model_id = None
-            
-            # Check if we have Civitai model ID but missing metadata
-            if lora_data.get('civitai'):
-                # Try to get model ID directly from the correct location
-                model_id = lora_data['civitai'].get('modelId')
-                
-                if model_id:
-                    model_id = str(model_id)
-                    # Check if tags are missing or empty
-                    tags_missing = not lora_data.get('tags') or len(lora_data.get('tags', [])) == 0
-                    
-                    # Check if description is missing or empty
-                    desc_missing = not lora_data.get('modelDescription') or lora_data.get('modelDescription') in (None, "")
-                    
-                    needs_metadata_update = tags_missing or desc_missing
-            
-            # Fetch missing metadata if needed
-            if needs_metadata_update and model_id:
-                logger.debug(f"Fetching missing metadata for {file_path} with model ID {model_id}")
-                from ..services.civitai_client import CivitaiClient
-                client = CivitaiClient()
-                
-                # Get metadata and status code
-                model_metadata, status_code = await client.get_model_metadata(model_id)
-                await client.close()
-                
-                # Handle 404 status (model deleted from Civitai)
-                if status_code == 404:
-                    logger.warning(f"Model {model_id} appears to be deleted from Civitai (404 response)")
-                    # Mark as deleted to avoid future API calls
-                    lora_data['civitai_deleted'] = True
-                    
-                    # Save the updated metadata back to file
-                    metadata_path = os.path.splitext(file_path)[0] + '.metadata.json'
-                    with open(metadata_path, 'w', encoding='utf-8') as f:
-                        json.dump(lora_data, f, indent=2, ensure_ascii=False)
-                
-                # Process valid metadata if available
-                elif model_metadata:
-                    logger.debug(f"Updating metadata for {file_path} with model ID {model_id}")
-                    
-                    # Update tags if they were missing
-                    if model_metadata.get('tags') and (not lora_data.get('tags') or len(lora_data.get('tags', [])) == 0):
-                        lora_data['tags'] = model_metadata['tags']
-                    
-                    # Update description if it was missing
-                    if model_metadata.get('description') and (not lora_data.get('modelDescription') or lora_data.get('modelDescription') in (None, "")):
-                        lora_data['modelDescription'] = model_metadata['description']
-                    
-                    # Save the updated metadata back to file
-                    metadata_path = os.path.splitext(file_path)[0] + '.metadata.json'
-                    with open(metadata_path, 'w', encoding='utf-8') as f:
-                        json.dump(lora_data, f, indent=2, ensure_ascii=False)
-        except Exception as e:
-            logger.error(f"Failed to update metadata from Civitai for {file_path}: {e}")
-
-    async def update_preview_in_cache(self, file_path: str, preview_url: str) -> bool:
-        """Update preview URL in cache for a specific lora
-        
-        Args:
-            file_path: The file path of the lora to update
-            preview_url: The new preview URL
-            
-        Returns:
-            bool: True if the update was successful, False if cache doesn't exist or lora wasn't found
-        """
-        if self._cache is None:
-            return False
-
-        return await self._cache.update_preview_url(file_path, preview_url)
-
-    async def scan_single_lora(self, file_path: str) -> Optional[Dict]:
-        """Scan a single LoRA file and return its metadata"""
-        try:
-            if not os.path.exists(os.path.realpath(file_path)):
-                return None
-                
-            # 获取基本文件信息
-            metadata = await get_file_info(file_path)
-            if not metadata:
-                return None
-                
-            folder = self._calculate_folder(file_path)
-                    
-            # 确保 folder 字段存在
-            metadata_dict = metadata.to_dict()
-            metadata_dict['folder'] = folder or ''
-            
-            return metadata_dict
-            
-        except Exception as e:
-            logger.error(f"Error scanning {file_path}: {e}")
-            return None
-    
-    def _calculate_folder(self, file_path: str) -> str:
-        """Calculate the folder path for a LoRA file"""
-        # 使用原始路径计算相对路径
-        for root in config.loras_roots:
-            if file_path.startswith(root):
-                rel_path = os.path.relpath(file_path, root)
-                return os.path.dirname(rel_path).replace(os.path.sep, '/')
-        return ''
-
    async def move_model(self, source_path: str, target_path: str) -> bool:
        """Move a model and its associated files to a new location"""
        try:
-            # 保持原始路径格式
+            # Keep original path format
            source_path = source_path.replace(os.sep, '/')
            target_path = target_path.replace(os.sep, '/')
            
-            # 其余代码保持不变
+            # Rest of the code remains unchanged
            base_name = os.path.splitext(os.path.basename(source_path))[0]
            source_dir = os.path.dirname(source_path)
            
@@ -521,7 +278,7 @@ class LoraScanner:
            
            target_lora = os.path.join(target_path, f"{base_name}.safetensors").replace(os.sep, '/')

-            # 使用真实路径进行文件操作
+            # Use real paths for file operations
            real_source = os.path.realpath(source_path)
            real_target = os.path.realpath(target_lora)
            
@@ -537,7 +294,7 @@ class LoraScanner:
                    file_size
                )
            
-            # 使用真实路径进行文件操作
+            # Use real paths for file operations
            shutil.move(real_source, real_target)
            
            # Move associated files
@@ -648,7 +405,7 @@ class LoraScanner:
        except Exception as e:
            logger.error(f"Error updating metadata paths: {e}", exc_info=True)

-    # Add new methods for hash index functionality
+    # Lora-specific hash index functionality
    def has_lora_hash(self, sha256: str) -> bool:
        """Check if a LoRA with given hash exists"""
        return self._hash_index.has_hash(sha256.lower())
@@ -681,16 +438,8 @@ class LoraScanner:
        
        return None

-    # Add new method to get top tags
    async def get_top_tags(self, limit: int = 20) -> List[Dict[str, any]]:
-        """Get top tags sorted by count
-        
-        Args:
-            limit: Maximum number of tags to return
-            
-        Returns:
-            List of dictionaries with tag name and count, sorted by count
-        """
+        """Get top tags sorted by count"""
        # Make sure cache is initialized
        await self.get_cached_data()
        
@@ -705,14 +454,7 @@ class LoraScanner:
        return sorted_tags[:limit]
        
    async def get_base_models(self, limit: int = 20) -> List[Dict[str, any]]:
-        """Get base models used in loras sorted by frequency
-        
-        Args:
-            limit: Maximum number of base models to return
-            
-        Returns:
-            List of dictionaries with base model name and count, sorted by count
-        """
+        """Get base models used in loras sorted by frequency"""
        # Make sure cache is initialized
        cache = await self.get_cached_data()