fix: improve duplicate filename handling and logging in ModelScanner and ModelHashIndex

2026-05-06 16:36:45 -03:00 · 2025-08-11 17:13:21 +08:00
parent 96517cbdef
commit c9289ed2dc
2 changed files with 40 additions and 13 deletions
--- a/py/services/model_hash_index.py
+++ b/py/services/model_hash_index.py
@@ -31,29 +31,34 @@ class ModelHashIndex:
                if file_path not in self._duplicate_hashes.get(sha256, []):
                    self._duplicate_hashes.setdefault(sha256, []).append(file_path)
-        # Track duplicates by filename
+        # Track duplicates by filename - FIXED LOGIC
        if filename in self._filename_to_hash:
-            old_hash = self._filename_to_hash[filename]
+            existing_hash = self._filename_to_hash[filename]
-            if old_hash != sha256:  # Different models with the same name
+            existing_path = self._hash_to_path.get(existing_hash)
-                old_path = self._hash_to_path.get(old_hash)
+            
-                if old_path:
+            # If this is a different file with the same filename
-                    if filename not in self._duplicate_filenames:
+            if existing_path and existing_path != file_path:
-                        self._duplicate_filenames[filename] = [old_path]
+                # Initialize duplicates tracking if needed
-                    if file_path not in self._duplicate_filenames.get(filename, []):
+                if filename not in self._duplicate_filenames:
-                        self._duplicate_filenames.setdefault(filename, []).append(file_path)
+                    self._duplicate_filenames[filename] = [existing_path]
                # Add current file to duplicates if not already present
                if file_path not in self._duplicate_filenames[filename]:
                    self._duplicate_filenames[filename].append(file_path)
        # Remove old path mapping if hash exists
        if sha256 in self._hash_to_path:
            old_path = self._hash_to_path[sha256]
            old_filename = self._get_filename_from_path(old_path)
-            if old_filename in self._filename_to_hash:
+            if old_filename in self._filename_to_hash and self._filename_to_hash[old_filename] == sha256:
                del self._filename_to_hash[old_filename]
-        # Remove old hash mapping if filename exists
+        # Remove old hash mapping if filename exists and points to different hash
        if filename in self._filename_to_hash:
            old_hash = self._filename_to_hash[filename]
-            if old_hash in self._hash_to_path:
+            if old_hash != sha256 and old_hash in self._hash_to_path:
-                del self._hash_to_path[old_hash]
+                # Don't delete the old hash mapping, just update filename mapping
                pass
        # Add new mappings
        self._hash_to_path[sha256] = file_path
--- a/py/services/model_scanner.py
+++ b/py/services/model_scanner.py
@@ -302,6 +302,13 @@ class ModelScanner:
                        for tag in model_data['tags']:
                            self._tags_count[tag] = self._tags_count.get(tag, 0) + 1
                # Log duplicate filename warnings after building the index
                duplicate_filenames = self._hash_index.get_duplicate_filenames()
                if duplicate_filenames:
                    logger.warning(f"Found {len(duplicate_filenames)} filename(s) with duplicates during {self.model_type} cache build:")
                    for filename, paths in duplicate_filenames.items():
                        logger.warning(f"  Duplicate filename '{filename}': {paths}")
                # Update cache
                self._cache.raw_data = raw_data
                loop.run_until_complete(self._cache.resort())
@@ -367,6 +374,13 @@ class ModelScanner:
                    for tag in model_data['tags']:
                        self._tags_count[tag] = self._tags_count.get(tag, 0) + 1
            # Log duplicate filename warnings after building the index
            duplicate_filenames = self._hash_index.get_duplicate_filenames()
            if duplicate_filenames:
                logger.warning(f"Found {len(duplicate_filenames)} filename(s) with duplicates during {self.model_type} cache build:")
                for filename, paths in duplicate_filenames.items():
                    logger.warning(f"  Duplicate filename '{filename}': {paths}")
            # Update cache
            self._cache = ModelCache(
                raw_data=raw_data,
@@ -670,6 +684,14 @@ class ModelScanner:
        if model_data.get('exclude', False):
            self._excluded_models.append(model_data['file_path'])
            return None
        # Check for duplicate filename before adding to hash index
        filename = os.path.splitext(os.path.basename(file_path))[0]
        existing_hash = self._hash_index.get_hash_by_filename(filename)
        if existing_hash and existing_hash != model_data.get('sha256', '').lower():
            existing_path = self._hash_index.get_path(existing_hash)
            if existing_path and existing_path != file_path:
                logger.warning(f"Duplicate filename detected: '{filename}' - files: '{existing_path}' and '{file_path}'")
        await self._fetch_missing_metadata(file_path, model_data)
        rel_path = os.path.relpath(file_path, root_path)