Add tag filtering checkpoint

2026-05-06 16:36:45 -03:00 · 2025-03-10 13:18:56 +08:00
parent 0069f84630
commit 721bef3ff8
15 changed files with 482 additions and 50 deletions
--- a/py/services/civitai_client.py
+++ b/py/services/civitai_client.py
@@ -163,41 +163,52 @@ class CivitaiClient:
            logger.error(f"Error fetching model version info: {e}")
            return None

-    async def get_model_description(self, model_id: str) -> Optional[str]:
-        """Fetch the model description from Civitai API
+    async def get_model_metadata(self, model_id: str) -> Optional[Dict]:
+        """Fetch model metadata (description and tags) from Civitai API
        
        Args:
            model_id: The Civitai model ID
            
        Returns:
-            Optional[str]: The model description HTML or None if not found
+            Optional[Dict]: A dictionary containing model metadata or None if not found
        """
        try:
            session = await self.session
            headers = self._get_request_headers()
            url = f"{self.base_url}/models/{model_id}"
            
-            logger.info(f"Fetching model description from {url}")
+            logger.info(f"Fetching model metadata from {url}")
            
            async with session.get(url, headers=headers) as response:
                if response.status != 200:
-                    logger.warning(f"Failed to fetch model description: Status {response.status}")
+                    logger.warning(f"Failed to fetch model metadata: Status {response.status}")
                    return None
                
                data = await response.json()
-                description = data.get('description')
                
-                if description:
-                    logger.info(f"Successfully retrieved description for model {model_id}")
-                    return description
+                # Extract relevant metadata
+                metadata = {
+                    "description": data.get("description", ""),
+                    "tags": data.get("tags", [])
+                }
+                
+                if metadata["description"] or metadata["tags"]:
+                    logger.info(f"Successfully retrieved metadata for model {model_id}")
+                    return metadata
                else:
-                    logger.warning(f"No description found for model {model_id}")
+                    logger.warning(f"No metadata found for model {model_id}")
                    return None
                
        except Exception as e:
-            logger.error(f"Error fetching model description: {e}", exc_info=True)
+            logger.error(f"Error fetching model metadata: {e}", exc_info=True)
            return None

+    # Keep old method for backward compatibility, delegating to the new one
+    async def get_model_description(self, model_id: str) -> Optional[str]:
+        """Fetch the model description from Civitai API (Legacy method)"""
+        metadata = await self.get_model_metadata(model_id)
+        return metadata.get("description") if metadata else None
+
    async def close(self):
        """Close the session if it exists"""
        if self._session is not None:
--- a/py/services/file_monitor.py
+++ b/py/services/file_monitor.py
@@ -98,6 +98,10 @@ class LoraFileHandler(FileSystemEventHandler):
                        # Scan new file
                        lora_data = await self.scanner.scan_single_lora(file_path)
                        if lora_data:
+                            # Update tags count
+                            for tag in lora_data.get('tags', []):
+                                self.scanner._tags_count[tag] = self.scanner._tags_count.get(tag, 0) + 1
+                            
                            cache.raw_data.append(lora_data)
                            new_folders.add(lora_data['folder'])
                            # Update hash index
@@ -109,6 +113,16 @@ class LoraFileHandler(FileSystemEventHandler):
                            needs_resort = True
                            
                    elif action == 'remove':
+                        # Find the lora to remove so we can update tags count
+                        lora_to_remove = next((item for item in cache.raw_data if item['file_path'] == file_path), None)
+                        if lora_to_remove:
+                            # Update tags count by reducing counts
+                            for tag in lora_to_remove.get('tags', []):
+                                if tag in self.scanner._tags_count:
+                                    self.scanner._tags_count[tag] = max(0, self.scanner._tags_count[tag] - 1)
+                                    if self.scanner._tags_count[tag] == 0:
+                                        del self.scanner._tags_count[tag]
+                        
                        # Remove from cache and hash index
                        logger.info(f"Removing {file_path} from cache")
                        self.scanner._hash_index.remove_by_path(file_path)
--- a/py/services/lora_scanner.py
+++ b/py/services/lora_scanner.py
@@ -34,6 +34,7 @@ class LoraScanner:
            self._initialization_task: Optional[asyncio.Task] = None
            self._initialized = True
            self.file_monitor = None  # Add this line
+            self._tags_count = {}  # Add a dictionary to store tag counts

    def set_file_monitor(self, monitor):
        """Set file monitor instance"""
@@ -90,13 +91,21 @@ class LoraScanner:
            # Clear existing hash index
            self._hash_index.clear()
            
+            # Clear existing tags count
+            self._tags_count = {}
+            
            # Scan for new data
            raw_data = await self.scan_all_loras()
            
-            # Build hash index
+            # Build hash index and tags count
            for lora_data in raw_data:
                if 'sha256' in lora_data and 'file_path' in lora_data:
                    self._hash_index.add_entry(lora_data['sha256'], lora_data['file_path'])
+                
+                # Count tags
+                if 'tags' in lora_data and lora_data['tags']:
+                    for tag in lora_data['tags']:
+                        self._tags_count[tag] = self._tags_count.get(tag, 0) + 1
            
            # Update cache
            self._cache = LoraCache(
@@ -158,7 +167,7 @@ class LoraScanner:

    async def get_paginated_data(self, page: int, page_size: int, sort_by: str = 'name', 
                               folder: str = None, search: str = None, fuzzy: bool = False,
-                               recursive: bool = False, base_models: list = None):
+                               recursive: bool = False, base_models: list = None, tags: list = None) -> Dict:
        """Get paginated and filtered lora data
        
        Args:
@@ -170,6 +179,7 @@ class LoraScanner:
            fuzzy: Use fuzzy matching for search
            recursive: Include subfolders when folder filter is applied
            base_models: List of base models to filter by
+            tags: List of tags to filter by
        """
        cache = await self.get_cached_data()

@@ -198,6 +208,13 @@ class LoraScanner:
                if item.get('base_model') in base_models
            ]
        
+        # Apply tag filtering
+        if tags and len(tags) > 0:
+            filtered_data = [
+                item for item in filtered_data
+                if any(tag in item.get('tags', []) for tag in tags)
+            ]
+        
        # 应用搜索过滤
        if search:
            if fuzzy:
@@ -311,12 +328,67 @@ class LoraScanner:
        
        # Convert to dict and add folder info
        lora_data = metadata.to_dict()
+        # Try to fetch missing metadata from Civitai if needed
+        await self._fetch_missing_metadata(file_path, lora_data)
        rel_path = os.path.relpath(file_path, root_path)
        folder = os.path.dirname(rel_path)
        lora_data['folder'] = folder.replace(os.path.sep, '/')
        
        return lora_data

+    async def _fetch_missing_metadata(self, file_path: str, lora_data: Dict) -> None:
+        """Fetch missing description and tags from Civitai if needed
+        
+        Args:
+            file_path: Path to the lora file
+            lora_data: Lora metadata dictionary to update
+        """
+        try:
+            # Check if we need to fetch additional metadata from Civitai
+            needs_metadata_update = False
+            model_id = None
+            
+            # Check if we have Civitai model ID but missing metadata
+            if lora_data.get('civitai'):
+                # Try to get model ID directly from the correct location
+                model_id = lora_data['civitai'].get('modelId')
+                
+                if model_id:
+                    model_id = str(model_id)
+                    # Check if tags are missing or empty
+                    tags_missing = not lora_data.get('tags') or len(lora_data.get('tags', [])) == 0
+                    
+                    # Check if description is missing or empty
+                    desc_missing = not lora_data.get('modelDescription') or lora_data.get('modelDescription') in (None, "")
+                    
+                    needs_metadata_update = tags_missing or desc_missing
+            
+            # Fetch missing metadata if needed
+            if needs_metadata_update and model_id:
+                logger.info(f"Fetching missing metadata for {file_path} with model ID {model_id}")
+                from ..services.civitai_client import CivitaiClient
+                client = CivitaiClient()
+                model_metadata = await client.get_model_metadata(model_id)
+                await client.close()
+                
+                if model_metadata:
+                    logger.info(f"Updating metadata for {file_path} with model ID {model_id}")
+                    
+                    # Update tags if they were missing
+                    if model_metadata.get('tags') and (not lora_data.get('tags') or len(lora_data.get('tags', [])) == 0):
+                        lora_data['tags'] = model_metadata['tags']
+                    
+                    # Update description if it was missing
+                    if model_metadata.get('description') and (not lora_data.get('modelDescription') or lora_data.get('modelDescription') in (None, "")):
+                        lora_data['modelDescription'] = model_metadata['description']
+                    
+                    # Save the updated metadata back to file
+                    metadata_path = os.path.splitext(file_path)[0] + '.metadata.json'
+                    with open(metadata_path, 'w', encoding='utf-8') as f:
+                        json.dump(lora_data, f, indent=2, ensure_ascii=False)
+        except Exception as e:
+            logger.error(f"Failed to update metadata from Civitai for {file_path}: {e}")
+
    async def update_preview_in_cache(self, file_path: str, preview_url: str) -> bool:
        """Update preview URL in cache for a specific lora
        
@@ -427,6 +499,15 @@ class LoraScanner:
    async def update_single_lora_cache(self, original_path: str, new_path: str, metadata: Dict) -> bool:
        cache = await self.get_cached_data()
        
+        # Find the existing item to remove its tags from count
+        existing_item = next((item for item in cache.raw_data if item['file_path'] == original_path), None)
+        if existing_item and 'tags' in existing_item:
+            for tag in existing_item.get('tags', []):
+                if tag in self._tags_count:
+                    self._tags_count[tag] = max(0, self._tags_count[tag] - 1)
+                    if self._tags_count[tag] == 0:
+                        del self._tags_count[tag]
+        
        # Remove old path from hash index if exists
        self._hash_index.remove_by_path(original_path)
        
@@ -460,6 +541,11 @@ class LoraScanner:
            # Update folders list
            all_folders = set(item['folder'] for item in cache.raw_data)
            cache.folders = sorted(list(all_folders), key=lambda x: x.lower())
+            
+            # Update tags count with the new/updated tags
+            if 'tags' in metadata:
+                for tag in metadata.get('tags', []):
+                    self._tags_count[tag] = self._tags_count.get(tag, 0) + 1
        
        # Resort cache
        await cache.resort()
@@ -505,3 +591,26 @@ class LoraScanner:
        """Get hash for a LoRA by its file path"""
        return self._hash_index.get_hash(file_path)

+    # Add new method to get top tags
+    async def get_top_tags(self, limit: int = 20) -> List[Dict[str, any]]:
+        """Get top tags sorted by count
+        
+        Args:
+            limit: Maximum number of tags to return
+            
+        Returns:
+            List of dictionaries with tag name and count, sorted by count
+        """
+        # Make sure cache is initialized
+        await self.get_cached_data()
+        
+        # Sort tags by count in descending order
+        sorted_tags = sorted(
+            [{"tag": tag, "count": count} for tag, count in self._tags_count.items()],
+            key=lambda x: x['count'],
+            reverse=True
+        )
+        
+        # Return limited number
+        return sorted_tags[:limit]
+