Refactor DownloadManager and LoraFileHandler for improved file monitoring

- Simplified the path handling in DownloadManager by directly adding normalized paths to the ignore list.
- Updated LoraFileHandler to utilize a set for ignore paths, enhancing performance and clarity.
- Implemented debouncing for modified file events to prevent duplicate processing and improve efficiency.
- Enhanced the handling of file creation, modification, and deletion events for .safetensors files, ensuring accurate processing and logging.
- Adjusted cache operations to streamline the addition and removal of files based on real paths.
This commit is contained in:
Will Miao
2025-04-06 22:27:55 +08:00
parent 0b55f61fac
commit f2d36f5be9
2 changed files with 156 additions and 163 deletions

View File

@@ -75,17 +75,10 @@ class DownloadManager:
file_size = file_info.get('sizeKB', 0) * 1024 file_size = file_info.get('sizeKB', 0) * 1024
# 4. 通知文件监控系统 - 使用规范化路径和文件大小 # 4. 通知文件监控系统 - 使用规范化路径和文件大小
if self.file_monitor and self.file_monitor.handler: self.file_monitor.handler.add_ignore_path(
# Add both the normalized path and potential alternative paths save_path.replace(os.sep, '/'),
normalized_path = save_path.replace(os.sep, '/') file_size
self.file_monitor.handler.add_ignore_path(normalized_path, file_size) )
# Also add the path with file extension variations (.safetensors)
if not normalized_path.endswith('.safetensors'):
safetensors_path = os.path.splitext(normalized_path)[0] + '.safetensors'
self.file_monitor.handler.add_ignore_path(safetensors_path, file_size)
logger.debug(f"Added download path to ignore list: {normalized_path} (size: {file_size} bytes)")
# 5. 准备元数据 # 5. 准备元数据
metadata = LoraMetadata.from_civitai_info(version_info, file_info, save_path) metadata = LoraMetadata.from_civitai_info(version_info, file_info, save_path)

View File

@@ -2,9 +2,10 @@ from operator import itemgetter
import os import os
import logging import logging
import asyncio import asyncio
import time
from watchdog.observers import Observer from watchdog.observers import Observer
from watchdog.events import FileSystemEventHandler, FileCreatedEvent, FileDeletedEvent from watchdog.events import FileSystemEventHandler
from typing import List from typing import List, Dict, Set
from threading import Lock from threading import Lock
from .lora_scanner import LoraScanner from .lora_scanner import LoraScanner
from ..config import config from ..config import config
@@ -20,139 +21,174 @@ class LoraFileHandler(FileSystemEventHandler):
self.pending_changes = set() # 待处理的变更 self.pending_changes = set() # 待处理的变更
self.lock = Lock() # 线程安全锁 self.lock = Lock() # 线程安全锁
self.update_task = None # 异步更新任务 self.update_task = None # 异步更新任务
self._ignore_paths = {} # Change to dictionary to store expiration times self._ignore_paths = set() # Add ignore paths set
self._min_ignore_timeout = 5 # minimum timeout in seconds self._min_ignore_timeout = 5 # minimum timeout in seconds
self._download_speed = 1024 * 1024 # assume 1MB/s as base speed self._download_speed = 1024 * 1024 # assume 1MB/s as base speed
# Track modified files with timestamps for debouncing
self.modified_files: Dict[str, float] = {}
self.debounce_timer = None
self.debounce_delay = 3.0 # seconds to wait after last modification
# Track files that are already scheduled for processing
self.scheduled_files: Set[str] = set()
def _should_ignore(self, path: str) -> bool: def _should_ignore(self, path: str) -> bool:
"""Check if path should be ignored""" """Check if path should be ignored"""
real_path = os.path.realpath(path) # Resolve any symbolic links real_path = os.path.realpath(path) # Resolve any symbolic links
normalized_path = real_path.replace(os.sep, '/') return real_path.replace(os.sep, '/') in self._ignore_paths
# Also check with backslashes for Windows compatibility
alt_path = real_path.replace('/', '\\')
# 使用传入的事件循环而不是尝试获取当前线程的事件循环
current_time = self.loop.time()
# Check if path is in ignore list and not expired
if normalized_path in self._ignore_paths and self._ignore_paths[normalized_path] > current_time:
return True
# Also check alternative path format
if alt_path in self._ignore_paths and self._ignore_paths[alt_path] > current_time:
return True
return False
def add_ignore_path(self, path: str, file_size: int = 0): def add_ignore_path(self, path: str, file_size: int = 0):
"""Add path to ignore list with dynamic timeout based on file size""" """Add path to ignore list with dynamic timeout based on file size"""
real_path = os.path.realpath(path) # Resolve any symbolic links real_path = os.path.realpath(path) # Resolve any symbolic links
normalized_path = real_path.replace(os.sep, '/') self._ignore_paths.add(real_path.replace(os.sep, '/'))
# Calculate timeout based on file size # Short timeout (e.g. 5 seconds) is sufficient to ignore the CREATE event
# For small files, use minimum timeout timeout = 5
# For larger files, estimate download time + buffer
if file_size > 0:
# Estimate download time in seconds (size / speed) + buffer
estimated_time = (file_size / self._download_speed) + 10
timeout = max(self._min_ignore_timeout, estimated_time)
else:
timeout = self._min_ignore_timeout
current_time = self.loop.time()
expiration_time = current_time + timeout
# Store both normalized and alternative path formats
self._ignore_paths[normalized_path] = expiration_time
# Also store with backslashes for Windows compatibility
alt_path = real_path.replace('/', '\\')
self._ignore_paths[alt_path] = expiration_time
logger.debug(f"Added ignore path: {normalized_path} (expires in {timeout:.1f}s)")
self.loop.call_later( self.loop.call_later(
timeout, timeout,
self._remove_ignore_path, self._ignore_paths.discard,
normalized_path real_path.replace(os.sep, '/')
) )
def _remove_ignore_path(self, path: str):
"""Remove path from ignore list after timeout"""
if path in self._ignore_paths:
del self._ignore_paths[path]
logger.debug(f"Removed ignore path: {path}")
# Also remove alternative path format
alt_path = path.replace('/', '\\')
if alt_path in self._ignore_paths:
del self._ignore_paths[alt_path]
def on_created(self, event): def on_created(self, event):
if event.is_directory or not event.src_path.endswith('.safetensors'): if event.is_directory:
return return
# Handle safetensors files directly
if event.src_path.endswith('.safetensors'):
if self._should_ignore(event.src_path): if self._should_ignore(event.src_path):
return return
# Check if file is still being downloaded # We'll process this file directly and ignore subsequent modifications
try: # to prevent duplicate processing
file_size = os.path.getsize(event.src_path) normalized_path = os.path.realpath(event.src_path).replace(os.sep, '/')
# Record the file path and size to handle potential deletion during download if normalized_path not in self.scheduled_files:
self.add_ignore_path(event.src_path, file_size) logger.info(f"LoRA file created: {event.src_path}")
self.scheduled_files.add(normalized_path)
# Only process file if it exists and has non-zero size
if os.path.exists(event.src_path) and file_size > 0:
logger.info(f"LoRA file created: {event.src_path} (size: {file_size} bytes)")
self._schedule_update('add', event.src_path) self._schedule_update('add', event.src_path)
else:
logger.debug(f"Ignoring empty or non-existent file: {event.src_path}") # Ignore modifications for a short period after creation
except FileNotFoundError: # This helps avoid duplicate processing
# File disappeared between event and our check - likely a temporary download file self.loop.call_later(
logger.debug(f"File disappeared before processing: {event.src_path}") self.debounce_delay * 2,
except Exception as e: self.scheduled_files.discard,
logger.error(f"Error processing create event for {event.src_path}: {str(e)}") normalized_path
)
# For browser downloads, we'll catch them when they're renamed to .safetensors
def on_modified(self, event):
if event.is_directory:
return
# Only process safetensors files
if event.src_path.endswith('.safetensors'):
if self._should_ignore(event.src_path):
return
normalized_path = os.path.realpath(event.src_path).replace(os.sep, '/')
# Skip if this file is already scheduled for processing
if normalized_path in self.scheduled_files:
return
# Update the timestamp for this file
self.modified_files[normalized_path] = time.time()
# Cancel any existing timer
if self.debounce_timer:
self.debounce_timer.cancel()
# Set a new timer to process modified files after debounce period
self.debounce_timer = self.loop.call_later(
self.debounce_delay,
self.loop.call_soon_threadsafe,
self._process_modified_files
)
def _process_modified_files(self):
"""Process files that have been modified after debounce period"""
current_time = time.time()
files_to_process = []
# Find files that haven't been modified for debounce_delay seconds
for file_path, last_modified in list(self.modified_files.items()):
if current_time - last_modified >= self.debounce_delay:
# Only process if not already scheduled
if file_path not in self.scheduled_files:
files_to_process.append(file_path)
self.scheduled_files.add(file_path)
# Auto-remove from scheduled list after reasonable time
self.loop.call_later(
self.debounce_delay * 2,
self.scheduled_files.discard,
file_path
)
del self.modified_files[file_path]
# Process stable files
for file_path in files_to_process:
logger.info(f"Processing modified LoRA file: {file_path}")
self._schedule_update('add', file_path)
def on_deleted(self, event): def on_deleted(self, event):
if event.is_directory or not event.src_path.endswith('.safetensors'): if event.is_directory or not event.src_path.endswith('.safetensors'):
return return
# If this path is in our ignore list, it might be part of a download process
# Don't remove it from the cache yet
if self._should_ignore(event.src_path): if self._should_ignore(event.src_path):
logger.debug(f"Ignoring delete event for in-progress download: {event.src_path}")
return return
# Remove from scheduled files if present
normalized_path = os.path.realpath(event.src_path).replace(os.sep, '/')
self.scheduled_files.discard(normalized_path)
logger.info(f"LoRA file deleted: {event.src_path}") logger.info(f"LoRA file deleted: {event.src_path}")
self._schedule_update('remove', event.src_path) self._schedule_update('remove', event.src_path)
def on_modified(self, event): def on_moved(self, event):
if event.is_directory or not event.src_path.endswith('.safetensors'): """Handle file move/rename events"""
# If destination is a safetensors file, treat it as a new file
if event.dest_path.endswith('.safetensors'):
if self._should_ignore(event.dest_path):
return return
normalized_path = os.path.realpath(event.dest_path).replace(os.sep, '/')
# Only process if not already scheduled
if normalized_path not in self.scheduled_files:
logger.info(f"LoRA file renamed/moved to: {event.dest_path}")
self.scheduled_files.add(normalized_path)
self._schedule_update('add', event.dest_path)
# Auto-remove from scheduled list after reasonable time
self.loop.call_later(
self.debounce_delay * 2,
self.scheduled_files.discard,
normalized_path
)
# If source was a safetensors file, treat it as deleted
if event.src_path.endswith('.safetensors'):
if self._should_ignore(event.src_path): if self._should_ignore(event.src_path):
return return
try: normalized_path = os.path.realpath(event.src_path).replace(os.sep, '/')
# File modification could indicate download completion self.scheduled_files.discard(normalized_path)
file_size = os.path.getsize(event.src_path)
if file_size > 0: logger.info(f"LoRA file moved/renamed from: {event.src_path}")
logger.debug(f"LoRA file modified: {event.src_path} (size: {file_size} bytes)") self._schedule_update('remove', event.src_path)
# Update the ignore timeout based on the new size
self.add_ignore_path(event.src_path, file_size)
# Schedule an update to add the file once the ignore period expires
self._schedule_update('add', event.src_path)
except FileNotFoundError:
# File disappeared - ignore
pass
except Exception as e:
logger.error(f"Error processing modify event for {event.src_path}: {str(e)}")
def _schedule_update(self, action: str, file_path: str): #file_path is a real path def _schedule_update(self, action: str, file_path: str): #file_path is a real path
"""Schedule a cache update""" """Schedule a cache update"""
with self.lock: with self.lock:
# Store the real path rather than trying to map it here # 使用 config 中的方法映射路径
# This ensures we have the actual file system path when checking existence later mapped_path = config.map_path_to_link(file_path)
self.pending_changes.add((action, file_path)) normalized_path = mapped_path.replace(os.sep, '/')
self.pending_changes.add((action, normalized_path))
self.loop.call_soon_threadsafe(self._create_update_task) self.loop.call_soon_threadsafe(self._create_update_task)
@@ -161,8 +197,8 @@ class LoraFileHandler(FileSystemEventHandler):
if self.update_task is None or self.update_task.done(): if self.update_task is None or self.update_task.done():
self.update_task = asyncio.create_task(self._process_changes()) self.update_task = asyncio.create_task(self._process_changes())
async def _process_changes(self, delay: float = 5.0): async def _process_changes(self, delay: float = 2.0):
"""Process pending changes with debouncing - increased delay to allow downloads to complete""" """Process pending changes with debouncing"""
await asyncio.sleep(delay) await asyncio.sleep(delay)
try: try:
@@ -175,40 +211,21 @@ class LoraFileHandler(FileSystemEventHandler):
logger.info(f"Processing {len(changes)} file changes") logger.info(f"Processing {len(changes)} file changes")
# First collect all actions by file path to handle contradicting events
actions_by_path = {}
for action, file_path in changes:
# For the same file path, 'add' takes precedence over 'remove'
if file_path not in actions_by_path or action == 'add':
actions_by_path[file_path] = action
# Process the final actions
cache = await self.scanner.get_cached_data() cache = await self.scanner.get_cached_data()
needs_resort = False needs_resort = False
new_folders = set() new_folders = set()
for file_path, action in actions_by_path.items(): for action, file_path in changes:
try: try:
# For 'add' actions, verify the file still exists and is complete
if action == 'add': if action == 'add':
# Use the original real path from the event for file system checks # Check if file already exists in cache
real_path = file_path existing = next((item for item in cache.raw_data if item['file_path'] == file_path), None)
if existing:
if not os.path.exists(real_path): logger.info(f"File {file_path} already in cache, skipping")
logger.warning(f"Skipping add for non-existent file: {real_path}")
continue continue
file_size = os.path.getsize(real_path) # Scan new file
if file_size == 0: lora_data = await self.scanner.scan_single_lora(file_path)
logger.warning(f"Skipping add for empty file: {real_path}")
continue
# Map the real path to link path for the cache after confirming file exists
mapped_path = config.map_path_to_link(real_path)
normalized_path = mapped_path.replace(os.sep, '/')
# Scan new file with the mapped path
lora_data = await self.scanner.scan_single_lora(normalized_path)
if lora_data: if lora_data:
# Update tags count # Update tags count
for tag in lora_data.get('tags', []): for tag in lora_data.get('tags', []):
@@ -223,27 +240,10 @@ class LoraFileHandler(FileSystemEventHandler):
lora_data['file_path'] lora_data['file_path']
) )
needs_resort = True needs_resort = True
logger.info(f"Added LoRA to cache: {normalized_path}")
# Remove from ignore list now that it's been successfully processed
# This allows delete events to be processed immediately
real_path_normalized = os.path.realpath(real_path).replace(os.sep, '/')
alt_path = real_path_normalized.replace('/', '\\')
if real_path_normalized in self._ignore_paths:
logger.debug(f"Removing successfully processed file from ignore list: {real_path_normalized}")
del self._ignore_paths[real_path_normalized]
if alt_path in self._ignore_paths:
del self._ignore_paths[alt_path]
elif action == 'remove': elif action == 'remove':
# Map the path for removal operations
mapped_path = config.map_path_to_link(file_path)
normalized_path = mapped_path.replace(os.sep, '/')
# Find the lora to remove so we can update tags count # Find the lora to remove so we can update tags count
lora_to_remove = next((item for item in cache.raw_data if item['file_path'] == normalized_path), None) lora_to_remove = next((item for item in cache.raw_data if item['file_path'] == file_path), None)
if lora_to_remove: if lora_to_remove:
# Update tags count by reducing counts # Update tags count by reducing counts
for tag in lora_to_remove.get('tags', []): for tag in lora_to_remove.get('tags', []):
@@ -253,16 +253,16 @@ class LoraFileHandler(FileSystemEventHandler):
del self.scanner._tags_count[tag] del self.scanner._tags_count[tag]
# Remove from cache and hash index # Remove from cache and hash index
logger.info(f"Removing {normalized_path} from cache") logger.info(f"Removing {file_path} from cache")
self.scanner._hash_index.remove_by_path(normalized_path) self.scanner._hash_index.remove_by_path(file_path)
cache.raw_data = [ cache.raw_data = [
item for item in cache.raw_data item for item in cache.raw_data
if item['file_path'] != normalized_path if item['file_path'] != file_path
] ]
needs_resort = True needs_resort = True
except Exception as e: except Exception as e:
logger.error(f"Error processing {action} for {file_path}: {e}", exc_info=True) logger.error(f"Error processing {action} for {file_path}: {e}")
if needs_resort: if needs_resort:
await cache.resort() await cache.resort()
@@ -272,7 +272,7 @@ class LoraFileHandler(FileSystemEventHandler):
cache.folders = sorted(list(all_folders), key=lambda x: x.lower()) cache.folders = sorted(list(all_folders), key=lambda x: x.lower())
except Exception as e: except Exception as e:
logger.error(f"Error in process_changes: {e}", exc_info=True) logger.error(f"Error in process_changes: {e}")
class LoraFileMonitor: class LoraFileMonitor: