Merge branch 'main' into fix-symlink

This commit is contained in:
pixelpaws
2026-01-26 17:29:31 +08:00
committed by GitHub
17 changed files with 224282 additions and 100 deletions

View File

@@ -0,0 +1,69 @@
# Danbooru/E621 Tag Categories Reference
Reference for category values used in `danbooru_e621_merged.csv` tag files.
## Category Value Mapping
### Danbooru Categories
| Value | Description |
|-------|-------------|
| 0 | General |
| 1 | Artist |
| 2 | *(unused)* |
| 3 | Copyright |
| 4 | Character |
| 5 | Meta |
### e621 Categories
| Value | Description |
|-------|-------------|
| 6 | *(unused)* |
| 7 | General |
| 8 | Artist |
| 9 | Contributor |
| 10 | Copyright |
| 11 | Character |
| 12 | Species |
| 13 | *(unused)* |
| 14 | Meta |
| 15 | Lore |
## Danbooru Category Colors
| Description | Normal Color | Hover Color |
|-------------|--------------|-------------|
| General | #009be6 | #4bb4ff |
| Artist | #ff8a8b | #ffc3c3 |
| Copyright | #c797ff | #ddc9fb |
| Character | #35c64a | #93e49a |
| Meta | #ead084 | #f7e7c3 |
## CSV Column Structure
Each row in the merged CSV file contains 4 columns:
| Column | Description | Example |
|--------|-------------|---------|
| 1 | Tag name | `1girl`, `highres`, `solo` |
| 2 | Category value (0-15) | `0`, `5`, `7` |
| 3 | Post count | `6008644`, `5256195` |
| 4 | Aliases (comma-separated, quoted) | `"1girls,sole_female"`, empty string |
### Sample Data
```
1girl,0,6008644,"1girls,sole_female"
highres,5,5256195,"high_res,high_resolution,hires"
solo,0,5000954,"alone,female_solo,single,solo_female"
long_hair,0,4350743,"/lh,longhair"
mammal,12,3437444,"cetancodont,cetancodontamorph,feralmammal"
anthro,7,3381927,"adult_anthro,anhtro,antho,anthro_horse"
skirt,0,1557883,
```
## Source
- [PR #312: Add danbooru_e621_merged.csv](https://github.com/DominikDoom/a1111-sd-webui-tagcomplete/pull/312)
- [DraconicDragon/dbr-e621-lists-archive](https://github.com/DraconicDragon/dbr-e621-lists-archive)

View File

@@ -9,6 +9,7 @@ import json
import urllib.parse
import time
from .utils.cache_paths import CacheType, get_cache_file_path, get_legacy_cache_paths
from .utils.settings_paths import ensure_settings_file, get_settings_dir, load_settings_template
# Use an environment variable to control standalone mode
@@ -241,9 +242,8 @@ class Config:
return os.path.normpath(path).replace(os.sep, '/')
def _get_symlink_cache_path(self) -> Path:
cache_dir = Path(get_settings_dir(create=True)) / "cache"
cache_dir.mkdir(parents=True, exist_ok=True)
return cache_dir / "symlink_map.json"
canonical_path = get_cache_file_path(CacheType.SYMLINK, create_dir=True)
return Path(canonical_path)
def _symlink_roots(self) -> List[str]:
roots: List[str] = []
@@ -322,14 +322,28 @@ class Config:
def _load_persisted_cache_into_mappings(self) -> bool:
"""Load the symlink cache and store its fingerprint for comparison."""
cache_path = self._get_symlink_cache_path()
if not cache_path.exists():
return False
try:
with cache_path.open("r", encoding="utf-8") as handle:
payload = json.load(handle)
except Exception as exc:
logger.info("Failed to load symlink cache %s: %s", cache_path, exc)
# Check canonical path first, then legacy paths for migration
paths_to_check = [cache_path]
legacy_paths = get_legacy_cache_paths(CacheType.SYMLINK)
paths_to_check.extend(Path(p) for p in legacy_paths if p != str(cache_path))
loaded_path = None
payload = None
for check_path in paths_to_check:
if not check_path.exists():
continue
try:
with check_path.open("r", encoding="utf-8") as handle:
payload = json.load(handle)
loaded_path = check_path
break
except Exception as exc:
logger.info("Failed to load symlink cache %s: %s", check_path, exc)
continue
if payload is None:
return False
if not isinstance(payload, dict):
@@ -349,7 +363,37 @@ class Config:
normalized_mappings[self._normalize_path(target)] = self._normalize_path(link)
self._path_mappings = normalized_mappings
logger.info("Symlink cache loaded with %d mappings", len(self._path_mappings))
# Log migration if loaded from legacy path
if loaded_path is not None and loaded_path != cache_path:
logger.info(
"Symlink cache migrated from %s (will save to %s)",
loaded_path,
cache_path,
)
try:
if loaded_path.exists():
loaded_path.unlink()
logger.info("Cleaned up legacy symlink cache: %s", loaded_path)
try:
parent_dir = loaded_path.parent
if parent_dir.name == "cache" and not any(parent_dir.iterdir()):
parent_dir.rmdir()
logger.info("Removed empty legacy cache directory: %s", parent_dir)
except Exception:
pass
except Exception as exc:
logger.warning(
"Failed to cleanup legacy symlink cache %s: %s",
loaded_path,
exc,
)
else:
logger.info("Symlink cache loaded with %d mappings", len(self._path_mappings))
return True
def _validate_cached_mappings(self) -> bool:

View File

@@ -17,7 +17,7 @@ class PromptLM:
"text": (
"AUTOCOMPLETE_TEXT_PROMPT",
{
"placeholder": "Enter prompt...",
"placeholder": "Enter prompt... /char, /artist for quick tag search",
"tooltip": "The text to be encoded.",
},
),

View File

@@ -1231,12 +1231,31 @@ class CustomWordsHandler:
return web.json_response({"error": str(exc)}, status=500)
async def search_custom_words(self, request: web.Request) -> web.Response:
"""Search custom words with autocomplete."""
"""Search custom words with autocomplete.
Query parameters:
search: The search term to match against.
limit: Maximum number of results to return (default: 20).
category: Optional category filter. Can be:
- A category name (e.g., "character", "artist", "general")
- Comma-separated category IDs (e.g., "4,11" for character)
enriched: If "true", return enriched results with category and post_count
even without category filtering.
"""
try:
search_term = request.query.get("search", "")
limit = int(request.query.get("limit", "20"))
category_param = request.query.get("category", "")
enriched_param = request.query.get("enriched", "").lower() == "true"
results = self._service.search_words(search_term, limit)
# Parse category parameter
categories = None
if category_param:
categories = self._parse_category_param(category_param)
results = self._service.search_words(
search_term, limit, categories=categories, enriched=enriched_param
)
return web.json_response({
"success": True,
@@ -1246,6 +1265,37 @@ class CustomWordsHandler:
logger.error("Error searching custom words: %s", exc, exc_info=True)
return web.json_response({"error": str(exc)}, status=500)
def _parse_category_param(self, param: str) -> list[int] | None:
"""Parse category parameter into list of category IDs.
Args:
param: Category parameter value (name or comma-separated IDs).
Returns:
List of category IDs, or None if parsing fails.
"""
from ...services.tag_fts_index import CATEGORY_NAME_TO_IDS
param = param.strip().lower()
if not param:
return None
# Try to parse as category name first
if param in CATEGORY_NAME_TO_IDS:
return CATEGORY_NAME_TO_IDS[param]
# Try to parse as comma-separated integers
try:
category_ids = []
for part in param.split(","):
part = part.strip()
if part:
category_ids.append(int(part))
return category_ids if category_ids else None
except ValueError:
logger.debug("Invalid category parameter: %s", param)
return None
class NodeRegistryHandler:
def __init__(

View File

@@ -2,6 +2,9 @@
This service provides functionality to parse CSV-formatted custom words,
search them with priority-based ranking, and manage storage.
It also integrates with TagFTSIndex to search the Danbooru/e621 tag database
for comprehensive autocomplete suggestions with category filtering.
"""
from __future__ import annotations
@@ -10,7 +13,7 @@ import logging
import os
from dataclasses import dataclass
from pathlib import Path
from typing import List, Dict, Any, Optional
from typing import List, Dict, Any, Optional, Union
logger = logging.getLogger(__name__)
@@ -35,6 +38,7 @@ class CustomWordsService:
- Parses CSV format: word[,priority] or word[,alias][,priority]
- Searches words with priority-based ranking
- Caches parsed words for performance
- Integrates with TagFTSIndex for Danbooru/e621 tag search
"""
_instance: Optional[CustomWordsService] = None
@@ -51,6 +55,7 @@ class CustomWordsService:
self._words_cache: Dict[str, WordEntry] = {}
self._file_path: Optional[Path] = None
self._tag_index: Optional[Any] = None # Lazy-loaded TagFTSIndex
self._initialized = True
self._determine_file_path()
@@ -98,6 +103,17 @@ class CustomWordsService:
"""Get the current file path for custom words."""
return self._file_path
def _get_tag_index(self):
"""Get or create the TagFTSIndex instance (lazy initialization)."""
if self._tag_index is None:
try:
from .tag_fts_index import get_tag_fts_index
self._tag_index = get_tag_fts_index()
except Exception as e:
logger.warning(f"Failed to initialize TagFTSIndex: {e}")
self._tag_index = None
return self._tag_index
def load_words(self) -> Dict[str, WordEntry]:
"""Load and parse words from the custom words file.
@@ -160,10 +176,20 @@ class CustomWordsService:
return words
def search_words(self, search_term: str, limit: int = 20) -> List[str]:
def search_words(
self,
search_term: str,
limit: int = 20,
categories: Optional[List[int]] = None,
enriched: bool = False
) -> Union[List[str], List[Dict[str, Any]]]:
"""Search custom words with priority-based ranking.
Matching priority:
When categories are provided or enriched is True, uses TagFTSIndex to search
the Danbooru/e621 tag database and returns enriched results with category
and post_count.
Matching priority (for custom words):
1. Words with priority (sorted by priority descending)
2. Prefix matches (word starts with search term)
3. Include matches (word contains search term)
@@ -171,10 +197,29 @@ class CustomWordsService:
Args:
search_term: The search term to match against.
limit: Maximum number of results to return.
categories: Optional list of category IDs to filter by.
When provided, searches TagFTSIndex instead of custom words.
enriched: If True, return enriched results even without category filtering.
Returns:
List of matching word texts.
List of matching word texts (when categories is None and enriched is False), or
List of dicts with tag_name, category, post_count (when categories is provided
or enriched is True).
"""
# Use TagFTSIndex when categories are specified or when explicitly requested
tag_index = self._get_tag_index()
if tag_index is not None:
# Search the tag database
results = tag_index.search(search_term, categories=categories, limit=limit)
if results:
# If categories were specified or enriched requested, return enriched results
if categories is not None or enriched:
return results
# Otherwise, convert to simple string list for backward compatibility
return [r["tag_name"] for r in results]
# Fall through to custom words if no tag results
# Fall back to custom words search
words = self._words_cache if self._words_cache else self.load_words()
if not search_term:
@@ -212,14 +257,18 @@ class CustomWordsService:
# Combine results: 20% top priority + all prefix matches + rest of priority + all include
top_priority_count = max(1, limit // 5)
results = (
text_results = (
[entry.text for entry, _ in priority_matches[:top_priority_count]]
+ [entry.text for entry, _ in prefix_matches]
+ [entry.text for entry, _ in priority_matches[top_priority_count:]]
+ [entry.text for entry, _ in include_matches]
)
return results[:limit]
# If categories were requested but tag index failed, return empty enriched format
if categories is not None:
return [{"tag_name": t, "category": 0, "post_count": 0} for t in text_results[:limit]]
return text_results[:limit]
def save_words(self, content: str) -> bool:
"""Save custom words content to file.

View File

@@ -1,13 +1,12 @@
import json
import logging
import os
import re
import sqlite3
import threading
from dataclasses import dataclass
from typing import Dict, List, Mapping, Optional, Sequence, Tuple
from ..utils.settings_paths import get_project_root, get_settings_dir
from ..utils.cache_paths import CacheType, resolve_cache_path_with_migration
logger = logging.getLogger(__name__)
@@ -404,20 +403,12 @@ class PersistentModelCache:
# Internal helpers -------------------------------------------------
def _resolve_default_path(self, library_name: str) -> str:
override = os.environ.get("LORA_MANAGER_CACHE_DB")
if override:
return override
try:
settings_dir = get_settings_dir(create=True)
except Exception as exc: # pragma: no cover - defensive guard
logger.warning("Falling back to project directory for cache: %s", exc)
settings_dir = get_project_root()
safe_name = re.sub(r"[^A-Za-z0-9_.-]", "_", library_name or "default")
if safe_name.lower() in ("default", ""):
legacy_path = os.path.join(settings_dir, self._DEFAULT_FILENAME)
if os.path.exists(legacy_path):
return legacy_path
return os.path.join(settings_dir, "model_cache", f"{safe_name}.sqlite")
env_override = os.environ.get("LORA_MANAGER_CACHE_DB")
return resolve_cache_path_with_migration(
CacheType.MODEL,
library_name=library_name,
env_override=env_override,
)
def _initialize_schema(self) -> None:
with self._db_lock:

View File

@@ -10,13 +10,12 @@ from __future__ import annotations
import json
import logging
import os
import re
import sqlite3
import threading
from dataclasses import dataclass
from typing import Dict, List, Optional, Set, Tuple
from ..utils.settings_paths import get_project_root, get_settings_dir
from ..utils.cache_paths import CacheType, resolve_cache_path_with_migration
logger = logging.getLogger(__name__)
@@ -312,20 +311,12 @@ class PersistentRecipeCache:
# Internal helpers
def _resolve_default_path(self, library_name: str) -> str:
override = os.environ.get("LORA_MANAGER_RECIPE_CACHE_DB")
if override:
return override
try:
settings_dir = get_settings_dir(create=True)
except Exception as exc:
logger.warning("Falling back to project directory for recipe cache: %s", exc)
settings_dir = get_project_root()
safe_name = re.sub(r"[^A-Za-z0-9_.-]", "_", library_name or "default")
if safe_name.lower() in ("default", ""):
legacy_path = os.path.join(settings_dir, self._DEFAULT_FILENAME)
if os.path.exists(legacy_path):
return legacy_path
return os.path.join(settings_dir, "recipe_cache", f"{safe_name}.sqlite")
env_override = os.environ.get("LORA_MANAGER_RECIPE_CACHE_DB")
return resolve_cache_path_with_migration(
CacheType.RECIPE,
library_name=library_name,
env_override=env_override,
)
def _initialize_schema(self) -> None:
with self._db_lock:

View File

@@ -15,7 +15,7 @@ import threading
import time
from typing import Any, Dict, List, Optional, Set
from ..utils.settings_paths import get_settings_dir
from ..utils.cache_paths import CacheType, resolve_cache_path_with_migration
logger = logging.getLogger(__name__)
@@ -67,17 +67,11 @@ class RecipeFTSIndex:
def _resolve_default_path(self) -> str:
"""Resolve the default database path."""
override = os.environ.get("LORA_MANAGER_RECIPE_FTS_DB")
if override:
return override
try:
settings_dir = get_settings_dir(create=True)
except Exception as exc:
logger.warning("Falling back to current directory for FTS index: %s", exc)
settings_dir = "."
return os.path.join(settings_dir, self._DEFAULT_FILENAME)
env_override = os.environ.get("LORA_MANAGER_RECIPE_FTS_DB")
return resolve_cache_path_with_migration(
CacheType.RECIPE_FTS,
env_override=env_override,
)
def get_database_path(self) -> str:
"""Return the resolved database path."""

View File

@@ -0,0 +1,498 @@
"""SQLite FTS5-based full-text search index for tags.
This module provides fast tag search using SQLite's FTS5 extension,
enabling sub-100ms search times for 221k+ Danbooru/e621 tags.
"""
from __future__ import annotations
import csv
import logging
import os
import re
import sqlite3
import threading
import time
from pathlib import Path
from typing import Dict, List, Optional, Set
from ..utils.cache_paths import CacheType, resolve_cache_path_with_migration
logger = logging.getLogger(__name__)
# Category definitions for Danbooru and e621
CATEGORY_NAMES = {
# Danbooru categories
0: "general",
1: "artist",
3: "copyright",
4: "character",
5: "meta",
# e621 categories
7: "general",
8: "artist",
10: "copyright",
11: "character",
12: "species",
14: "meta",
15: "lore",
}
# Map category names to their IDs (for filtering)
CATEGORY_NAME_TO_IDS = {
"general": [0, 7],
"artist": [1, 8],
"copyright": [3, 10],
"character": [4, 11],
"meta": [5, 14],
"species": [12],
"lore": [15],
}
class TagFTSIndex:
"""SQLite FTS5-based full-text search index for tags.
Provides fast prefix-based search across the Danbooru/e621 tag database.
Supports category-based filtering and returns enriched results with
post counts and category information.
"""
_DEFAULT_FILENAME = "tag_fts.sqlite"
_CSV_FILENAME = "danbooru_e621_merged.csv"
def __init__(self, db_path: Optional[str] = None, csv_path: Optional[str] = None) -> None:
"""Initialize the FTS index.
Args:
db_path: Optional path to the SQLite database file.
If not provided, uses the default location in settings directory.
csv_path: Optional path to the CSV file containing tag data.
If not provided, looks in the refs/ directory.
"""
self._db_path = db_path or self._resolve_default_db_path()
self._csv_path = csv_path or self._resolve_default_csv_path()
self._lock = threading.Lock()
self._ready = threading.Event()
self._indexing_in_progress = False
self._schema_initialized = False
self._warned_not_ready = False
# Ensure directory exists
try:
directory = os.path.dirname(self._db_path)
if directory:
os.makedirs(directory, exist_ok=True)
except Exception as exc:
logger.warning("Could not create FTS index directory %s: %s", directory, exc)
def _resolve_default_db_path(self) -> str:
"""Resolve the default database path."""
env_override = os.environ.get("LORA_MANAGER_TAG_FTS_DB")
return resolve_cache_path_with_migration(
CacheType.TAG_FTS,
env_override=env_override,
)
def _resolve_default_csv_path(self) -> str:
"""Resolve the default CSV file path."""
# Look for the CSV in the refs/ directory relative to the package
package_dir = Path(__file__).parent.parent.parent
csv_path = package_dir / "refs" / self._CSV_FILENAME
return str(csv_path)
def get_database_path(self) -> str:
"""Return the resolved database path."""
return self._db_path
def get_csv_path(self) -> str:
"""Return the resolved CSV path."""
return self._csv_path
def is_ready(self) -> bool:
"""Check if the FTS index is ready for queries."""
return self._ready.is_set()
def is_indexing(self) -> bool:
"""Check if indexing is currently in progress."""
return self._indexing_in_progress
def initialize(self) -> None:
"""Initialize the database schema."""
if self._schema_initialized:
return
with self._lock:
if self._schema_initialized:
return
try:
conn = self._connect()
try:
conn.execute("PRAGMA journal_mode=WAL")
conn.executescript("""
-- FTS5 virtual table for full-text search
CREATE VIRTUAL TABLE IF NOT EXISTS tag_fts USING fts5(
tag_name,
tokenize='unicode61 remove_diacritics 2'
);
-- Tags table with metadata
CREATE TABLE IF NOT EXISTS tags (
rowid INTEGER PRIMARY KEY,
tag_name TEXT UNIQUE NOT NULL,
category INTEGER NOT NULL DEFAULT 0,
post_count INTEGER NOT NULL DEFAULT 0
);
-- Indexes for efficient filtering
CREATE INDEX IF NOT EXISTS idx_tags_category ON tags(category);
CREATE INDEX IF NOT EXISTS idx_tags_post_count ON tags(post_count DESC);
-- Index version tracking
CREATE TABLE IF NOT EXISTS fts_metadata (
key TEXT PRIMARY KEY,
value TEXT
);
""")
conn.commit()
self._schema_initialized = True
logger.debug("Tag FTS index schema initialized at %s", self._db_path)
finally:
conn.close()
except Exception as exc:
logger.error("Failed to initialize tag FTS schema: %s", exc)
def build_index(self) -> None:
"""Build the FTS index from the CSV file.
This method parses the danbooru_e621_merged.csv file and creates
the FTS index for fast searching.
"""
if self._indexing_in_progress:
logger.warning("Tag FTS indexing already in progress, skipping")
return
if not os.path.exists(self._csv_path):
logger.warning("CSV file not found at %s, cannot build tag index", self._csv_path)
return
self._indexing_in_progress = True
self._ready.clear()
start_time = time.time()
try:
self.initialize()
if not self._schema_initialized:
logger.error("Cannot build tag FTS index: schema not initialized")
return
with self._lock:
conn = self._connect()
try:
conn.execute("BEGIN")
# Clear existing data
conn.execute("DELETE FROM tag_fts")
conn.execute("DELETE FROM tags")
# Parse CSV and insert in batches
batch_size = 500
rows = []
total_inserted = 0
with open(self._csv_path, "r", encoding="utf-8") as f:
reader = csv.reader(f)
for row in reader:
if len(row) < 3:
continue
tag_name = row[0].strip()
if not tag_name:
continue
try:
category = int(row[1])
except (ValueError, IndexError):
category = 0
try:
post_count = int(row[2])
except (ValueError, IndexError):
post_count = 0
rows.append((tag_name, category, post_count))
if len(rows) >= batch_size:
self._insert_batch(conn, rows)
total_inserted += len(rows)
rows = []
# Insert remaining rows
if rows:
self._insert_batch(conn, rows)
total_inserted += len(rows)
# Update metadata
conn.execute(
"INSERT OR REPLACE INTO fts_metadata (key, value) VALUES (?, ?)",
("last_build_time", str(time.time()))
)
conn.execute(
"INSERT OR REPLACE INTO fts_metadata (key, value) VALUES (?, ?)",
("tag_count", str(total_inserted))
)
conn.commit()
elapsed = time.time() - start_time
logger.info("Tag FTS index built: %d tags indexed in %.2fs", total_inserted, elapsed)
finally:
conn.close()
self._ready.set()
except Exception as exc:
logger.error("Failed to build tag FTS index: %s", exc, exc_info=True)
finally:
self._indexing_in_progress = False
def _insert_batch(self, conn: sqlite3.Connection, rows: List[tuple]) -> None:
"""Insert a batch of rows into the database."""
# Insert into tags table
conn.executemany(
"INSERT OR IGNORE INTO tags (tag_name, category, post_count) VALUES (?, ?, ?)",
rows
)
# Get rowids and insert into FTS table
tag_names = [row[0] for row in rows]
placeholders = ",".join("?" * len(tag_names))
cursor = conn.execute(
f"SELECT rowid, tag_name FROM tags WHERE tag_name IN ({placeholders})",
tag_names
)
fts_rows = [(tag_name,) for rowid, tag_name in cursor.fetchall()]
if fts_rows:
conn.executemany("INSERT INTO tag_fts (tag_name) VALUES (?)", fts_rows)
def ensure_ready(self) -> bool:
"""Ensure the index is ready, building if necessary.
Returns:
True if the index is ready, False otherwise.
"""
if self.is_ready():
return True
# Check if index already exists and has data
self.initialize()
if self._schema_initialized:
count = self.get_indexed_count()
if count > 0:
self._ready.set()
logger.debug("Tag FTS index already populated with %d tags", count)
return True
# Build the index
self.build_index()
return self.is_ready()
def search(
self,
query: str,
categories: Optional[List[int]] = None,
limit: int = 20
) -> List[Dict]:
"""Search tags using FTS5 with prefix matching.
Args:
query: The search query string.
categories: Optional list of category IDs to filter by.
limit: Maximum number of results to return.
Returns:
List of dictionaries with tag_name, category, and post_count.
"""
# Ensure index is ready (lazy initialization)
if not self.ensure_ready():
if not self._warned_not_ready:
logger.debug("Tag FTS index not ready, returning empty results")
self._warned_not_ready = True
return []
if not query or not query.strip():
return []
fts_query = self._build_fts_query(query)
if not fts_query:
return []
try:
with self._lock:
conn = self._connect(readonly=True)
try:
# Build the SQL query
if categories:
placeholders = ",".join("?" * len(categories))
sql = f"""
SELECT t.tag_name, t.category, t.post_count
FROM tags t
WHERE t.tag_name IN (
SELECT tag_name FROM tag_fts WHERE tag_fts MATCH ?
)
AND t.category IN ({placeholders})
ORDER BY t.post_count DESC
LIMIT ?
"""
params = [fts_query] + categories + [limit]
else:
sql = """
SELECT t.tag_name, t.category, t.post_count
FROM tags t
WHERE t.tag_name IN (
SELECT tag_name FROM tag_fts WHERE tag_fts MATCH ?
)
ORDER BY t.post_count DESC
LIMIT ?
"""
params = [fts_query, limit]
cursor = conn.execute(sql, params)
results = []
for row in cursor.fetchall():
results.append({
"tag_name": row[0],
"category": row[1],
"post_count": row[2],
})
return results
finally:
conn.close()
except Exception as exc:
logger.debug("Tag FTS search error for query '%s': %s", query, exc)
return []
def get_indexed_count(self) -> int:
"""Return the number of tags currently indexed."""
if not self._schema_initialized:
return 0
try:
with self._lock:
conn = self._connect(readonly=True)
try:
cursor = conn.execute("SELECT COUNT(*) FROM tags")
result = cursor.fetchone()
return result[0] if result else 0
finally:
conn.close()
except Exception:
return 0
def clear(self) -> bool:
"""Clear all data from the FTS index.
Returns:
True if successful, False otherwise.
"""
try:
with self._lock:
conn = self._connect()
try:
conn.execute("DELETE FROM tag_fts")
conn.execute("DELETE FROM tags")
conn.commit()
self._ready.clear()
return True
finally:
conn.close()
except Exception as exc:
logger.error("Failed to clear tag FTS index: %s", exc)
return False
# Internal helpers
def _connect(self, readonly: bool = False) -> sqlite3.Connection:
"""Create a database connection."""
uri = False
path = self._db_path
if readonly:
if not os.path.exists(path):
raise FileNotFoundError(path)
path = f"file:{path}?mode=ro"
uri = True
conn = sqlite3.connect(path, check_same_thread=False, uri=uri)
conn.row_factory = sqlite3.Row
return conn
def _build_fts_query(self, query: str) -> str:
"""Build an FTS5 query string with prefix matching.
Args:
query: The user's search query.
Returns:
FTS5 query string.
"""
# Split query into words and clean them
words = query.lower().split()
if not words:
return ""
# Escape and add prefix wildcard to each word
prefix_terms = []
for word in words:
escaped = self._escape_fts_query(word)
if escaped:
# Add prefix wildcard for substring-like matching
prefix_terms.append(f"{escaped}*")
if not prefix_terms:
return ""
# Combine terms with implicit AND (all words must match)
return " ".join(prefix_terms)
def _escape_fts_query(self, text: str) -> str:
"""Escape special FTS5 characters.
FTS5 special characters: " ( ) * : ^ -
We keep * for prefix matching but escape others.
"""
if not text:
return ""
# Replace FTS5 special characters with space
special = ['"', "(", ")", "*", ":", "^", "-", "{", "}", "[", "]"]
result = text
for char in special:
result = result.replace(char, " ")
# Collapse multiple spaces and strip
result = re.sub(r"\s+", " ", result).strip()
return result
# Singleton instance
_tag_fts_index: Optional[TagFTSIndex] = None
_tag_fts_lock = threading.Lock()
def get_tag_fts_index() -> TagFTSIndex:
"""Get the singleton TagFTSIndex instance."""
global _tag_fts_index
if _tag_fts_index is None:
with _tag_fts_lock:
if _tag_fts_index is None:
_tag_fts_index = TagFTSIndex()
return _tag_fts_index
__all__ = [
"TagFTSIndex",
"get_tag_fts_index",
"CATEGORY_NAMES",
"CATEGORY_NAME_TO_IDS",
]

421
py/utils/cache_paths.py Normal file
View File

@@ -0,0 +1,421 @@
"""Centralized cache path resolution with automatic migration support.
This module provides a unified interface for resolving cache file paths,
with automatic migration from legacy locations to the new organized
cache directory structure.
Target structure:
{settings_dir}/
└── cache/
├── symlink/
│ └── symlink_map.json
├── model/
│ └── {library_name}.sqlite
├── recipe/
│ └── {library_name}.sqlite
└── fts/
├── recipe_fts.sqlite
└── tag_fts.sqlite
"""
from __future__ import annotations
import logging
import os
import re
import shutil
from enum import Enum
from typing import List, Optional
from .settings_paths import get_project_root, get_settings_dir
logger = logging.getLogger(__name__)
class CacheType(Enum):
"""Types of cache files managed by the cache path resolver."""
MODEL = "model"
RECIPE = "recipe"
RECIPE_FTS = "recipe_fts"
TAG_FTS = "tag_fts"
SYMLINK = "symlink"
# Subdirectory structure for each cache type
_CACHE_SUBDIRS = {
CacheType.MODEL: "model",
CacheType.RECIPE: "recipe",
CacheType.RECIPE_FTS: "fts",
CacheType.TAG_FTS: "fts",
CacheType.SYMLINK: "symlink",
}
# Filename patterns for each cache type
_CACHE_FILENAMES = {
CacheType.MODEL: "{library_name}.sqlite",
CacheType.RECIPE: "{library_name}.sqlite",
CacheType.RECIPE_FTS: "recipe_fts.sqlite",
CacheType.TAG_FTS: "tag_fts.sqlite",
CacheType.SYMLINK: "symlink_map.json",
}
def get_cache_base_dir(create: bool = True) -> str:
"""Return the base cache directory path.
Args:
create: Whether to create the directory if it does not exist.
Returns:
The absolute path to the cache base directory ({settings_dir}/cache/).
"""
settings_dir = get_settings_dir(create=create)
cache_dir = os.path.join(settings_dir, "cache")
if create:
os.makedirs(cache_dir, exist_ok=True)
return cache_dir
def _sanitize_library_name(library_name: Optional[str]) -> str:
"""Sanitize a library name for use in filenames.
Args:
library_name: The library name to sanitize.
Returns:
A sanitized version safe for use in filenames.
"""
name = library_name or "default"
return re.sub(r"[^A-Za-z0-9_.-]", "_", name)
def get_cache_file_path(
cache_type: CacheType,
library_name: Optional[str] = None,
create_dir: bool = True,
) -> str:
"""Get the canonical path for a cache file.
Args:
cache_type: The type of cache file.
library_name: The library name (only used for MODEL and RECIPE types).
create_dir: Whether to create the parent directory if it does not exist.
Returns:
The absolute path to the cache file in its canonical location.
"""
cache_base = get_cache_base_dir(create=create_dir)
subdir = _CACHE_SUBDIRS[cache_type]
cache_dir = os.path.join(cache_base, subdir)
if create_dir:
os.makedirs(cache_dir, exist_ok=True)
filename_template = _CACHE_FILENAMES[cache_type]
safe_name = _sanitize_library_name(library_name)
filename = filename_template.format(library_name=safe_name)
return os.path.join(cache_dir, filename)
def get_legacy_cache_paths(
cache_type: CacheType,
library_name: Optional[str] = None,
) -> List[str]:
"""Get a list of legacy cache file paths to check for migration.
The paths are returned in order of priority (most recent first).
Args:
cache_type: The type of cache file.
library_name: The library name (only used for MODEL and RECIPE types).
Returns:
A list of potential legacy paths to check, in order of preference.
"""
try:
settings_dir = get_settings_dir(create=False)
except Exception:
settings_dir = get_project_root()
safe_name = _sanitize_library_name(library_name)
legacy_paths: List[str] = []
if cache_type == CacheType.MODEL:
# Legacy per-library path: {settings_dir}/model_cache/{library}.sqlite
legacy_paths.append(
os.path.join(settings_dir, "model_cache", f"{safe_name}.sqlite")
)
# Legacy root-level single cache (for "default" library only)
if safe_name.lower() in ("default", ""):
legacy_paths.append(os.path.join(settings_dir, "model_cache.sqlite"))
elif cache_type == CacheType.RECIPE:
# Legacy per-library path: {settings_dir}/recipe_cache/{library}.sqlite
legacy_paths.append(
os.path.join(settings_dir, "recipe_cache", f"{safe_name}.sqlite")
)
# Legacy root-level single cache (for "default" library only)
if safe_name.lower() in ("default", ""):
legacy_paths.append(os.path.join(settings_dir, "recipe_cache.sqlite"))
elif cache_type == CacheType.RECIPE_FTS:
# Legacy root-level path
legacy_paths.append(os.path.join(settings_dir, "recipe_fts.sqlite"))
elif cache_type == CacheType.TAG_FTS:
# Legacy root-level path
legacy_paths.append(os.path.join(settings_dir, "tag_fts.sqlite"))
elif cache_type == CacheType.SYMLINK:
# Current location in cache/ but without subdirectory
legacy_paths.append(
os.path.join(settings_dir, "cache", "symlink_map.json")
)
return legacy_paths
def _cleanup_legacy_file_after_migration(
legacy_path: str,
canonical_path: str,
) -> bool:
"""Safely remove a legacy file after successful migration.
Args:
legacy_path: The legacy file path to remove.
canonical_path: The canonical path where the file was copied to.
Returns:
True if cleanup succeeded, False otherwise.
"""
try:
if not os.path.exists(canonical_path):
logger.warning(
"Skipping cleanup of %s: canonical file not found at %s",
legacy_path,
canonical_path,
)
return False
legacy_size = os.path.getsize(legacy_path)
canonical_size = os.path.getsize(canonical_path)
if legacy_size != canonical_size:
logger.warning(
"Skipping cleanup of %s: file size mismatch (legacy=%d, canonical=%d)",
legacy_path,
legacy_size,
canonical_size,
)
return False
os.remove(legacy_path)
logger.info("Cleaned up legacy cache file: %s", legacy_path)
_cleanup_empty_legacy_directories(legacy_path)
return True
except Exception as exc:
logger.warning(
"Failed to cleanup legacy cache file %s: %s",
legacy_path,
exc,
)
return False
def _cleanup_empty_legacy_directories(legacy_path: str) -> None:
"""Remove empty parent directories of a legacy file.
This function only removes directories if they are empty,
using os.rmdir() which fails on non-empty directories.
Args:
legacy_path: The legacy file path whose parent directories should be cleaned.
"""
try:
parent_dir = os.path.dirname(legacy_path)
legacy_dir_names = ("model_cache", "recipe_cache")
current = parent_dir
while current:
base_name = os.path.basename(current)
if base_name in legacy_dir_names:
if os.path.isdir(current) and not os.listdir(current):
try:
os.rmdir(current)
logger.info("Removed empty legacy directory: %s", current)
except Exception:
pass
parent = os.path.dirname(current)
if parent == current:
break
current = parent
except Exception as exc:
logger.debug("Failed to cleanup empty legacy directories: %s", exc)
def resolve_cache_path_with_migration(
cache_type: CacheType,
library_name: Optional[str] = None,
env_override: Optional[str] = None,
) -> str:
"""Resolve the cache file path, migrating from legacy locations if needed.
This function performs lazy migration: on first access, it checks if the
file exists at the canonical location. If not, it looks for legacy files
and copies them to the new location. After successful migration, the
legacy file is automatically removed.
Args:
cache_type: The type of cache file.
library_name: The library name (only used for MODEL and RECIPE types).
env_override: Optional environment variable value that overrides all
path resolution. When set, returns this path directly without
any migration.
Returns:
The resolved path to use for the cache file.
"""
# Environment override bypasses all migration logic
if env_override:
return env_override
canonical_path = get_cache_file_path(cache_type, library_name, create_dir=True)
# If file already exists at canonical location, use it
if os.path.exists(canonical_path):
return canonical_path
# Check legacy paths for migration
legacy_paths = get_legacy_cache_paths(cache_type, library_name)
for legacy_path in legacy_paths:
if os.path.exists(legacy_path):
try:
shutil.copy2(legacy_path, canonical_path)
logger.info(
"Migrated %s cache from %s to %s",
cache_type.value,
legacy_path,
canonical_path,
)
_cleanup_legacy_file_after_migration(legacy_path, canonical_path)
return canonical_path
except Exception as exc:
logger.warning(
"Failed to migrate %s cache from %s: %s",
cache_type.value,
legacy_path,
exc,
)
# No legacy file found; return canonical path (will be created fresh)
return canonical_path
def get_legacy_cache_files_for_cleanup() -> List[str]:
"""Get a list of legacy cache files that can be removed after migration.
This function returns files that exist in legacy locations and have
corresponding files in the new canonical locations.
Returns:
A list of legacy file paths that are safe to remove.
"""
files_to_remove: List[str] = []
try:
settings_dir = get_settings_dir(create=False)
except Exception:
return files_to_remove
# Check each cache type for migrated legacy files
for cache_type in CacheType:
# For MODEL and RECIPE, we need to check each library
if cache_type in (CacheType.MODEL, CacheType.RECIPE):
# Check default library
_check_legacy_for_cleanup(cache_type, "default", files_to_remove)
# Check for any per-library caches in legacy directories
legacy_dir_name = "model_cache" if cache_type == CacheType.MODEL else "recipe_cache"
legacy_dir = os.path.join(settings_dir, legacy_dir_name)
if os.path.isdir(legacy_dir):
try:
for filename in os.listdir(legacy_dir):
if filename.endswith(".sqlite"):
library_name = filename[:-7] # Remove .sqlite
_check_legacy_for_cleanup(cache_type, library_name, files_to_remove)
except Exception:
pass
else:
_check_legacy_for_cleanup(cache_type, None, files_to_remove)
return files_to_remove
def _check_legacy_for_cleanup(
cache_type: CacheType,
library_name: Optional[str],
files_to_remove: List[str],
) -> None:
"""Check if a legacy cache file can be removed after migration.
Args:
cache_type: The type of cache file.
library_name: The library name (only used for MODEL and RECIPE types).
files_to_remove: List to append removable files to.
"""
canonical_path = get_cache_file_path(cache_type, library_name, create_dir=False)
if not os.path.exists(canonical_path):
return
legacy_paths = get_legacy_cache_paths(cache_type, library_name)
for legacy_path in legacy_paths:
if os.path.exists(legacy_path) and legacy_path not in files_to_remove:
files_to_remove.append(legacy_path)
def cleanup_legacy_cache_files(dry_run: bool = True) -> List[str]:
"""Remove legacy cache files that have been migrated.
Args:
dry_run: If True, only return the list of files that would be removed
without actually removing them.
Returns:
A list of files that were (or would be) removed.
"""
files = get_legacy_cache_files_for_cleanup()
if dry_run or not files:
return files
removed: List[str] = []
for file_path in files:
try:
os.remove(file_path)
removed.append(file_path)
logger.info("Removed legacy cache file: %s", file_path)
except Exception as exc:
logger.warning("Failed to remove legacy cache file %s: %s", file_path, exc)
# Try to remove empty legacy directories
try:
settings_dir = get_settings_dir(create=False)
for legacy_dir_name in ("model_cache", "recipe_cache"):
legacy_dir = os.path.join(settings_dir, legacy_dir_name)
if os.path.isdir(legacy_dir) and not os.listdir(legacy_dir):
os.rmdir(legacy_dir)
logger.info("Removed empty legacy directory: %s", legacy_dir)
except Exception:
pass
return removed

221787
refs/danbooru_e621_merged.csv Normal file

File diff suppressed because one or more lines are too long

View File

@@ -4,6 +4,7 @@ import os
import pytest
from py import config as config_module
from py.utils import cache_paths as cache_paths_module
def _normalize(path: str) -> str:
@@ -28,9 +29,14 @@ def _setup_paths(monkeypatch: pytest.MonkeyPatch, tmp_path):
}
return mapping.get(kind, [])
def fake_get_settings_dir(create: bool = True) -> str:
return str(settings_dir)
monkeypatch.setattr(config_module.folder_paths, "get_folder_paths", fake_get_folder_paths)
monkeypatch.setattr(config_module, "standalone_mode", True)
monkeypatch.setattr(config_module, "get_settings_dir", lambda create=True: str(settings_dir))
monkeypatch.setattr(config_module, "get_settings_dir", fake_get_settings_dir)
# Also patch cache_paths module which has its own import of get_settings_dir
monkeypatch.setattr(cache_paths_module, "get_settings_dir", fake_get_settings_dir)
return loras_dir, settings_dir
@@ -57,7 +63,7 @@ def test_symlink_scan_skips_file_links(monkeypatch: pytest.MonkeyPatch, tmp_path
normalized_file_real = _normalize(os.path.realpath(file_target))
assert normalized_file_real not in cfg._path_mappings
cache_path = settings_dir / "cache" / "symlink_map.json"
cache_path = settings_dir / "cache" / "symlink" / "symlink_map.json"
assert cache_path.exists()
@@ -71,7 +77,7 @@ def test_symlink_cache_reuses_previous_scan(monkeypatch: pytest.MonkeyPatch, tmp
first_cfg = config_module.Config()
cached_mappings = dict(first_cfg._path_mappings)
cache_path = settings_dir / "cache" / "symlink_map.json"
cache_path = settings_dir / "cache" / "symlink" / "symlink_map.json"
assert cache_path.exists()
def fail_scan(self):
@@ -97,7 +103,7 @@ def test_symlink_cache_survives_noise_mtime(monkeypatch: pytest.MonkeyPatch, tmp
noise_file = recipes_dir / "touchme.txt"
first_cfg = config_module.Config()
cache_path = settings_dir / "cache" / "symlink_map.json"
cache_path = settings_dir / "cache" / "symlink" / "symlink_map.json"
assert cache_path.exists()
# Update a noisy path to bump parent directory mtime
@@ -159,9 +165,14 @@ def test_symlink_roots_are_preserved(monkeypatch: pytest.MonkeyPatch, tmp_path):
}
return mapping.get(kind, [])
def fake_get_settings_dir(create: bool = True) -> str:
return str(settings_dir)
monkeypatch.setattr(config_module.folder_paths, "get_folder_paths", fake_get_folder_paths)
monkeypatch.setattr(config_module, "standalone_mode", True)
monkeypatch.setattr(config_module, "get_settings_dir", lambda create=True: str(settings_dir))
monkeypatch.setattr(config_module, "get_settings_dir", fake_get_settings_dir)
# Also patch cache_paths module which has its own import of get_settings_dir
monkeypatch.setattr(cache_paths_module, "get_settings_dir", fake_get_settings_dir)
cfg = config_module.Config()
@@ -169,7 +180,7 @@ def test_symlink_roots_are_preserved(monkeypatch: pytest.MonkeyPatch, tmp_path):
normalized_link = _normalize(str(loras_link))
assert cfg._path_mappings[normalized_real] == normalized_link
cache_path = settings_dir / "cache" / "symlink_map.json"
cache_path = settings_dir / "cache" / "symlink" / "symlink_map.json"
payload = json.loads(cache_path.read_text(encoding="utf-8"))
assert payload["path_mappings"][normalized_real] == normalized_link
@@ -271,3 +282,60 @@ def test_retargeted_deep_symlink_triggers_rescan(monkeypatch: pytest.MonkeyPatch
second_cfg = config_module.Config()
assert _normalize(str(target_v2)) in second_cfg._path_mappings
assert _normalize(str(target_v1)) not in second_cfg._path_mappings
def test_legacy_symlink_cache_automatic_cleanup(monkeypatch: pytest.MonkeyPatch, tmp_path):
"""Test that legacy symlink cache is automatically cleaned up after migration."""
settings_dir = tmp_path / "settings"
loras_dir = tmp_path / "loras"
loras_dir.mkdir()
checkpoint_dir = tmp_path / "checkpoints"
checkpoint_dir.mkdir()
embedding_dir = tmp_path / "embeddings"
embedding_dir.mkdir()
def fake_get_folder_paths(kind: str):
mapping = {
"loras": [str(loras_dir)],
"checkpoints": [str(checkpoint_dir)],
"unet": [],
"embeddings": [str(embedding_dir)],
}
return mapping.get(kind, [])
def fake_get_settings_dir(create: bool = True) -> str:
return str(settings_dir)
monkeypatch.setattr(config_module.folder_paths, "get_folder_paths", fake_get_folder_paths)
monkeypatch.setattr(config_module, "standalone_mode", True)
monkeypatch.setattr(config_module, "get_settings_dir", fake_get_settings_dir)
monkeypatch.setattr(cache_paths_module, "get_settings_dir", fake_get_settings_dir)
# Create legacy symlink cache at old location
settings_dir.mkdir(parents=True, exist_ok=True)
legacy_cache_dir = settings_dir / "cache"
legacy_cache_dir.mkdir(exist_ok=True)
legacy_cache_path = legacy_cache_dir / "symlink_map.json"
# Write some legacy cache data
legacy_data = {
"fingerprint": {"roots": []},
"path_mappings": {
"/legacy/target": "/legacy/link"
}
}
legacy_cache_path.write_text(json.dumps(legacy_data), encoding="utf-8")
# Verify legacy file exists
assert legacy_cache_path.exists()
# Initialize Config - this should trigger migration and automatic cleanup
cfg = config_module.Config()
# New canonical cache should exist
new_cache_path = settings_dir / "cache" / "symlink" / "symlink_map.json"
assert new_cache_path.exists()
# Legacy file should be automatically cleaned up
assert not legacy_cache_path.exists()
# Config should still work correctly
assert isinstance(cfg._path_mappings, dict)

View File

@@ -37,7 +37,7 @@ def test_portable_settings_use_project_root(tmp_path, monkeypatch):
monkeypatch.delenv("LORA_MANAGER_CACHE_DB", raising=False)
cache = cache_module.PersistentModelCache(library_name="portable_lib")
expected_cache_path = tmp_path / "model_cache" / "portable_lib.sqlite"
expected_cache_path = tmp_path / "cache" / "model" / "portable_lib.sqlite"
assert cache.get_database_path() == str(expected_cache_path)
assert expected_cache_path.parent.is_dir()

262
tests/test_tag_fts_index.py Normal file
View File

@@ -0,0 +1,262 @@
"""Tests for TagFTSIndex functionality."""
import os
import tempfile
from typing import List
import pytest
from py.services.tag_fts_index import (
TagFTSIndex,
CATEGORY_NAMES,
CATEGORY_NAME_TO_IDS,
)
@pytest.fixture
def temp_db_path():
"""Create a temporary database path."""
with tempfile.NamedTemporaryFile(suffix=".sqlite", delete=False) as f:
path = f.name
yield path
# Cleanup
if os.path.exists(path):
os.unlink(path)
for suffix in ["-wal", "-shm"]:
wal_path = path + suffix
if os.path.exists(wal_path):
os.unlink(wal_path)
@pytest.fixture
def temp_csv_path():
"""Create a temporary CSV file with test data."""
with tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False, encoding="utf-8") as f:
# Write test data in the same format as danbooru_e621_merged.csv
# Format: tag_name,category,post_count,aliases
f.write('1girl,0,6008644,"1girls,sole_female"\n')
f.write('highres,5,5256195,"high_res,high_resolution,hires"\n')
f.write('solo,0,5000954,"alone,female_solo,single"\n')
f.write('hatsune_miku,4,500000,"miku"\n')
f.write('konpaku_youmu,4,150000,"youmu"\n')
f.write('artist_request,1,100000,""\n')
f.write('touhou,3,300000,"touhou_project"\n')
f.write('mammal,12,3437444,"cetancodont"\n')
f.write('anthro,7,3381927,"anthropomorphic"\n')
f.write('hi_res,14,3116617,"high_res"\n')
path = f.name
yield path
# Cleanup
if os.path.exists(path):
os.unlink(path)
class TestTagFTSIndexBasic:
"""Basic tests for TagFTSIndex initialization and schema."""
def test_initialize_creates_tables(self, temp_db_path, temp_csv_path):
"""Test that initialization creates required tables."""
fts = TagFTSIndex(db_path=temp_db_path, csv_path=temp_csv_path)
fts.initialize()
assert fts._schema_initialized is True
def test_get_database_path(self, temp_db_path, temp_csv_path):
"""Test get_database_path returns correct path."""
fts = TagFTSIndex(db_path=temp_db_path, csv_path=temp_csv_path)
assert fts.get_database_path() == temp_db_path
def test_get_csv_path(self, temp_db_path, temp_csv_path):
"""Test get_csv_path returns correct path."""
fts = TagFTSIndex(db_path=temp_db_path, csv_path=temp_csv_path)
assert fts.get_csv_path() == temp_csv_path
def test_is_ready_initially_false(self, temp_db_path, temp_csv_path):
"""Test that is_ready returns False before building index."""
fts = TagFTSIndex(db_path=temp_db_path, csv_path=temp_csv_path)
assert fts.is_ready() is False
class TestTagFTSIndexBuild:
"""Tests for building the FTS index."""
def test_build_index_from_csv(self, temp_db_path, temp_csv_path):
"""Test building index from CSV file."""
fts = TagFTSIndex(db_path=temp_db_path, csv_path=temp_csv_path)
fts.build_index()
assert fts.is_ready() is True
assert fts.get_indexed_count() == 10
def test_build_index_nonexistent_csv(self, temp_db_path):
"""Test that build_index handles missing CSV gracefully."""
fts = TagFTSIndex(db_path=temp_db_path, csv_path="/nonexistent/path.csv")
fts.build_index()
assert fts.is_ready() is False
assert fts.get_indexed_count() == 0
def test_ensure_ready_builds_index(self, temp_db_path, temp_csv_path):
"""Test that ensure_ready builds index if not ready."""
fts = TagFTSIndex(db_path=temp_db_path, csv_path=temp_csv_path)
# Initially not ready
assert fts.is_ready() is False
# ensure_ready should build the index
result = fts.ensure_ready()
assert result is True
assert fts.is_ready() is True
class TestTagFTSIndexSearch:
"""Tests for searching the FTS index."""
@pytest.fixture
def populated_fts(self, temp_db_path, temp_csv_path):
"""Create a populated FTS index."""
fts = TagFTSIndex(db_path=temp_db_path, csv_path=temp_csv_path)
fts.build_index()
return fts
def test_search_basic(self, populated_fts):
"""Test basic search functionality."""
results = populated_fts.search("1girl")
assert len(results) >= 1
assert any(r["tag_name"] == "1girl" for r in results)
def test_search_prefix(self, populated_fts):
"""Test prefix matching."""
results = populated_fts.search("hatsu")
assert len(results) >= 1
assert any(r["tag_name"] == "hatsune_miku" for r in results)
def test_search_returns_enriched_results(self, populated_fts):
"""Test that search returns enriched results with category and post_count."""
results = populated_fts.search("miku")
assert len(results) >= 1
result = results[0]
assert "tag_name" in result
assert "category" in result
assert "post_count" in result
assert result["tag_name"] == "hatsune_miku"
assert result["category"] == 4 # Character category
assert result["post_count"] == 500000
def test_search_with_category_filter(self, populated_fts):
"""Test searching with category filter."""
# Search for character tags only (categories 4 and 11)
results = populated_fts.search("konpaku", categories=[4, 11])
assert len(results) >= 1
assert all(r["category"] in [4, 11] for r in results)
def test_search_with_category_filter_excludes_others(self, populated_fts):
"""Test that category filter excludes other categories."""
# Search for "hi" but only in general category
results = populated_fts.search("hi", categories=[0, 7])
# Should not include "highres" (meta category 5) or "hi_res" (meta category 14)
assert all(r["category"] in [0, 7] for r in results)
def test_search_empty_query_returns_empty(self, populated_fts):
"""Test that empty query returns empty results."""
results = populated_fts.search("")
assert results == []
def test_search_no_matches_returns_empty(self, populated_fts):
"""Test that query with no matches returns empty results."""
results = populated_fts.search("zzzznonexistent")
assert results == []
def test_search_results_sorted_by_post_count(self, populated_fts):
"""Test that results are sorted by post_count descending."""
results = populated_fts.search("1girl", limit=10)
# Verify results are sorted by post_count descending
post_counts = [r["post_count"] for r in results]
assert post_counts == sorted(post_counts, reverse=True)
def test_search_limit(self, populated_fts):
"""Test search result limiting."""
results = populated_fts.search("girl", limit=1)
assert len(results) <= 1
class TestTagFTSIndexClear:
"""Tests for clearing the FTS index."""
def test_clear_removes_all_data(self, temp_db_path, temp_csv_path):
"""Test that clear removes all indexed data."""
fts = TagFTSIndex(db_path=temp_db_path, csv_path=temp_csv_path)
fts.build_index()
assert fts.get_indexed_count() > 0
fts.clear()
assert fts.get_indexed_count() == 0
assert fts.is_ready() is False
class TestCategoryMappings:
"""Tests for category name mappings."""
def test_category_names_complete(self):
"""Test that CATEGORY_NAMES includes all expected categories."""
expected_categories = [0, 1, 3, 4, 5, 7, 8, 10, 11, 12, 14, 15]
for cat in expected_categories:
assert cat in CATEGORY_NAMES
def test_category_name_to_ids_complete(self):
"""Test that CATEGORY_NAME_TO_IDS includes all expected names."""
expected_names = ["general", "artist", "copyright", "character", "meta", "species", "lore"]
for name in expected_names:
assert name in CATEGORY_NAME_TO_IDS
assert isinstance(CATEGORY_NAME_TO_IDS[name], list)
assert len(CATEGORY_NAME_TO_IDS[name]) > 0
def test_category_name_to_ids_includes_both_platforms(self):
"""Test that category mappings include both Danbooru and e621 IDs where applicable."""
# General should have both Danbooru (0) and e621 (7)
assert 0 in CATEGORY_NAME_TO_IDS["general"]
assert 7 in CATEGORY_NAME_TO_IDS["general"]
# Character should have both Danbooru (4) and e621 (11)
assert 4 in CATEGORY_NAME_TO_IDS["character"]
assert 11 in CATEGORY_NAME_TO_IDS["character"]
class TestFTSQueryBuilding:
"""Tests for FTS query building."""
@pytest.fixture
def fts_instance(self, temp_db_path, temp_csv_path):
"""Create an FTS instance for testing."""
return TagFTSIndex(db_path=temp_db_path, csv_path=temp_csv_path)
def test_build_fts_query_simple(self, fts_instance):
"""Test FTS query building with simple query."""
query = fts_instance._build_fts_query("test")
assert query == "test*"
def test_build_fts_query_multiple_words(self, fts_instance):
"""Test FTS query building with multiple words."""
query = fts_instance._build_fts_query("test query")
assert query == "test* query*"
def test_build_fts_query_escapes_special_chars(self, fts_instance):
"""Test that special characters are escaped."""
query = fts_instance._build_fts_query("test:query")
# Colon should be replaced with space
assert ":" not in query
def test_build_fts_query_empty_returns_empty(self, fts_instance):
"""Test that empty query returns empty string."""
query = fts_instance._build_fts_query("")
assert query == ""

View File

@@ -0,0 +1,529 @@
"""Unit tests for the cache_paths module."""
import os
import shutil
import tempfile
from pathlib import Path
import pytest
from py.utils.cache_paths import (
CacheType,
cleanup_legacy_cache_files,
get_cache_base_dir,
get_cache_file_path,
get_legacy_cache_files_for_cleanup,
get_legacy_cache_paths,
resolve_cache_path_with_migration,
)
class TestCacheType:
"""Tests for the CacheType enum."""
def test_enum_values(self):
assert CacheType.MODEL.value == "model"
assert CacheType.RECIPE.value == "recipe"
assert CacheType.RECIPE_FTS.value == "recipe_fts"
assert CacheType.TAG_FTS.value == "tag_fts"
assert CacheType.SYMLINK.value == "symlink"
class TestGetCacheBaseDir:
"""Tests for get_cache_base_dir function."""
def test_returns_cache_subdirectory(self):
cache_dir = get_cache_base_dir(create=True)
assert cache_dir.endswith("cache")
assert os.path.isdir(cache_dir)
def test_creates_directory_when_requested(self, tmp_path, monkeypatch):
settings_dir = tmp_path / "settings"
settings_dir.mkdir()
def fake_get_settings_dir(create: bool = True) -> str:
return str(settings_dir)
monkeypatch.setattr("py.utils.cache_paths.get_settings_dir", fake_get_settings_dir)
cache_dir = get_cache_base_dir(create=True)
assert os.path.isdir(cache_dir)
assert cache_dir == str(settings_dir / "cache")
class TestGetCacheFilePath:
"""Tests for get_cache_file_path function."""
def test_model_cache_path(self, tmp_path, monkeypatch):
settings_dir = tmp_path / "settings"
settings_dir.mkdir()
def fake_get_settings_dir(create: bool = True) -> str:
return str(settings_dir)
monkeypatch.setattr("py.utils.cache_paths.get_settings_dir", fake_get_settings_dir)
path = get_cache_file_path(CacheType.MODEL, "my_library", create_dir=True)
expected = settings_dir / "cache" / "model" / "my_library.sqlite"
assert path == str(expected)
assert os.path.isdir(expected.parent)
def test_recipe_cache_path(self, tmp_path, monkeypatch):
settings_dir = tmp_path / "settings"
settings_dir.mkdir()
def fake_get_settings_dir(create: bool = True) -> str:
return str(settings_dir)
monkeypatch.setattr("py.utils.cache_paths.get_settings_dir", fake_get_settings_dir)
path = get_cache_file_path(CacheType.RECIPE, "default", create_dir=True)
expected = settings_dir / "cache" / "recipe" / "default.sqlite"
assert path == str(expected)
def test_recipe_fts_path(self, tmp_path, monkeypatch):
settings_dir = tmp_path / "settings"
settings_dir.mkdir()
def fake_get_settings_dir(create: bool = True) -> str:
return str(settings_dir)
monkeypatch.setattr("py.utils.cache_paths.get_settings_dir", fake_get_settings_dir)
path = get_cache_file_path(CacheType.RECIPE_FTS, create_dir=True)
expected = settings_dir / "cache" / "fts" / "recipe_fts.sqlite"
assert path == str(expected)
def test_tag_fts_path(self, tmp_path, monkeypatch):
settings_dir = tmp_path / "settings"
settings_dir.mkdir()
def fake_get_settings_dir(create: bool = True) -> str:
return str(settings_dir)
monkeypatch.setattr("py.utils.cache_paths.get_settings_dir", fake_get_settings_dir)
path = get_cache_file_path(CacheType.TAG_FTS, create_dir=True)
expected = settings_dir / "cache" / "fts" / "tag_fts.sqlite"
assert path == str(expected)
def test_symlink_path(self, tmp_path, monkeypatch):
settings_dir = tmp_path / "settings"
settings_dir.mkdir()
def fake_get_settings_dir(create: bool = True) -> str:
return str(settings_dir)
monkeypatch.setattr("py.utils.cache_paths.get_settings_dir", fake_get_settings_dir)
path = get_cache_file_path(CacheType.SYMLINK, create_dir=True)
expected = settings_dir / "cache" / "symlink" / "symlink_map.json"
assert path == str(expected)
def test_sanitizes_library_name(self, tmp_path, monkeypatch):
settings_dir = tmp_path / "settings"
settings_dir.mkdir()
def fake_get_settings_dir(create: bool = True) -> str:
return str(settings_dir)
monkeypatch.setattr("py.utils.cache_paths.get_settings_dir", fake_get_settings_dir)
path = get_cache_file_path(CacheType.MODEL, "my/bad:name", create_dir=True)
assert "my_bad_name" in path
def test_none_library_name_defaults_to_default(self, tmp_path, monkeypatch):
settings_dir = tmp_path / "settings"
settings_dir.mkdir()
def fake_get_settings_dir(create: bool = True) -> str:
return str(settings_dir)
monkeypatch.setattr("py.utils.cache_paths.get_settings_dir", fake_get_settings_dir)
path = get_cache_file_path(CacheType.MODEL, None, create_dir=True)
assert "default.sqlite" in path
class TestGetLegacyCachePaths:
"""Tests for get_legacy_cache_paths function."""
def test_model_legacy_paths_for_default(self, tmp_path, monkeypatch):
settings_dir = tmp_path / "settings"
settings_dir.mkdir()
def fake_get_settings_dir(create: bool = True) -> str:
return str(settings_dir)
monkeypatch.setattr("py.utils.cache_paths.get_settings_dir", fake_get_settings_dir)
paths = get_legacy_cache_paths(CacheType.MODEL, "default")
assert len(paths) == 2
assert str(settings_dir / "model_cache" / "default.sqlite") in paths
assert str(settings_dir / "model_cache.sqlite") in paths
def test_model_legacy_paths_for_named_library(self, tmp_path, monkeypatch):
settings_dir = tmp_path / "settings"
settings_dir.mkdir()
def fake_get_settings_dir(create: bool = True) -> str:
return str(settings_dir)
monkeypatch.setattr("py.utils.cache_paths.get_settings_dir", fake_get_settings_dir)
paths = get_legacy_cache_paths(CacheType.MODEL, "my_library")
assert len(paths) == 1
assert str(settings_dir / "model_cache" / "my_library.sqlite") in paths
def test_recipe_legacy_paths(self, tmp_path, monkeypatch):
settings_dir = tmp_path / "settings"
settings_dir.mkdir()
def fake_get_settings_dir(create: bool = True) -> str:
return str(settings_dir)
monkeypatch.setattr("py.utils.cache_paths.get_settings_dir", fake_get_settings_dir)
paths = get_legacy_cache_paths(CacheType.RECIPE, "default")
assert len(paths) == 2
assert str(settings_dir / "recipe_cache" / "default.sqlite") in paths
assert str(settings_dir / "recipe_cache.sqlite") in paths
def test_recipe_fts_legacy_path(self, tmp_path, monkeypatch):
settings_dir = tmp_path / "settings"
settings_dir.mkdir()
def fake_get_settings_dir(create: bool = True) -> str:
return str(settings_dir)
monkeypatch.setattr("py.utils.cache_paths.get_settings_dir", fake_get_settings_dir)
paths = get_legacy_cache_paths(CacheType.RECIPE_FTS)
assert len(paths) == 1
assert str(settings_dir / "recipe_fts.sqlite") in paths
def test_tag_fts_legacy_path(self, tmp_path, monkeypatch):
settings_dir = tmp_path / "settings"
settings_dir.mkdir()
def fake_get_settings_dir(create: bool = True) -> str:
return str(settings_dir)
monkeypatch.setattr("py.utils.cache_paths.get_settings_dir", fake_get_settings_dir)
paths = get_legacy_cache_paths(CacheType.TAG_FTS)
assert len(paths) == 1
assert str(settings_dir / "tag_fts.sqlite") in paths
def test_symlink_legacy_path(self, tmp_path, monkeypatch):
settings_dir = tmp_path / "settings"
settings_dir.mkdir()
def fake_get_settings_dir(create: bool = True) -> str:
return str(settings_dir)
monkeypatch.setattr("py.utils.cache_paths.get_settings_dir", fake_get_settings_dir)
paths = get_legacy_cache_paths(CacheType.SYMLINK)
assert len(paths) == 1
assert str(settings_dir / "cache" / "symlink_map.json") in paths
class TestResolveCachePathWithMigration:
"""Tests for resolve_cache_path_with_migration function."""
def test_returns_env_override_when_set(self, tmp_path, monkeypatch):
settings_dir = tmp_path / "settings"
settings_dir.mkdir()
def fake_get_settings_dir(create: bool = True) -> str:
return str(settings_dir)
monkeypatch.setattr("py.utils.cache_paths.get_settings_dir", fake_get_settings_dir)
override_path = "/custom/path/cache.sqlite"
path = resolve_cache_path_with_migration(
CacheType.MODEL,
library_name="default",
env_override=override_path,
)
assert path == override_path
def test_returns_canonical_path_when_exists(self, tmp_path, monkeypatch):
settings_dir = tmp_path / "settings"
settings_dir.mkdir()
def fake_get_settings_dir(create: bool = True) -> str:
return str(settings_dir)
monkeypatch.setattr("py.utils.cache_paths.get_settings_dir", fake_get_settings_dir)
# Create the canonical path
canonical = settings_dir / "cache" / "model" / "default.sqlite"
canonical.parent.mkdir(parents=True)
canonical.write_text("existing")
path = resolve_cache_path_with_migration(CacheType.MODEL, "default")
assert path == str(canonical)
def test_migrates_from_legacy_root_level_cache(self, tmp_path, monkeypatch):
settings_dir = tmp_path / "settings"
settings_dir.mkdir()
def fake_get_settings_dir(create: bool = True) -> str:
return str(settings_dir)
monkeypatch.setattr("py.utils.cache_paths.get_settings_dir", fake_get_settings_dir)
# Create legacy cache at root level
legacy_path = settings_dir / "model_cache.sqlite"
legacy_path.write_text("legacy data")
path = resolve_cache_path_with_migration(CacheType.MODEL, "default")
# Should return canonical path
canonical = settings_dir / "cache" / "model" / "default.sqlite"
assert path == str(canonical)
# File should be copied to canonical location
assert canonical.exists()
assert canonical.read_text() == "legacy data"
# Legacy file should be automatically cleaned up
assert not legacy_path.exists()
def test_migrates_from_legacy_per_library_cache(self, tmp_path, monkeypatch):
settings_dir = tmp_path / "settings"
settings_dir.mkdir()
def fake_get_settings_dir(create: bool = True) -> str:
return str(settings_dir)
monkeypatch.setattr("py.utils.cache_paths.get_settings_dir", fake_get_settings_dir)
# Create legacy per-library cache
legacy_dir = settings_dir / "model_cache"
legacy_dir.mkdir()
legacy_path = legacy_dir / "my_library.sqlite"
legacy_path.write_text("legacy library data")
path = resolve_cache_path_with_migration(CacheType.MODEL, "my_library")
# Should return canonical path
canonical = settings_dir / "cache" / "model" / "my_library.sqlite"
assert path == str(canonical)
assert canonical.exists()
assert canonical.read_text() == "legacy library data"
# Legacy file should be automatically cleaned up
assert not legacy_path.exists()
# Empty legacy directory should be cleaned up
assert not legacy_dir.exists()
def test_prefers_per_library_over_root_for_migration(self, tmp_path, monkeypatch):
settings_dir = tmp_path / "settings"
settings_dir.mkdir()
def fake_get_settings_dir(create: bool = True) -> str:
return str(settings_dir)
monkeypatch.setattr("py.utils.cache_paths.get_settings_dir", fake_get_settings_dir)
# Create both legacy caches
legacy_root = settings_dir / "model_cache.sqlite"
legacy_root.write_text("root legacy")
legacy_dir = settings_dir / "model_cache"
legacy_dir.mkdir()
legacy_lib = legacy_dir / "default.sqlite"
legacy_lib.write_text("library legacy")
path = resolve_cache_path_with_migration(CacheType.MODEL, "default")
canonical = settings_dir / "cache" / "model" / "default.sqlite"
assert path == str(canonical)
# Should migrate from per-library path (first in legacy list)
assert canonical.read_text() == "library legacy"
def test_returns_canonical_path_when_no_legacy_exists(self, tmp_path, monkeypatch):
settings_dir = tmp_path / "settings"
settings_dir.mkdir()
def fake_get_settings_dir(create: bool = True) -> str:
return str(settings_dir)
monkeypatch.setattr("py.utils.cache_paths.get_settings_dir", fake_get_settings_dir)
path = resolve_cache_path_with_migration(CacheType.MODEL, "new_library")
canonical = settings_dir / "cache" / "model" / "new_library.sqlite"
assert path == str(canonical)
# Directory should be created
assert canonical.parent.exists()
# But file should not exist yet
assert not canonical.exists()
class TestLegacyCacheCleanup:
"""Tests for legacy cache cleanup functions."""
def test_get_legacy_cache_files_for_cleanup(self, tmp_path, monkeypatch):
settings_dir = tmp_path / "settings"
settings_dir.mkdir()
def fake_get_settings_dir(create: bool = True) -> str:
return str(settings_dir)
monkeypatch.setattr("py.utils.cache_paths.get_settings_dir", fake_get_settings_dir)
# Create canonical and legacy files
canonical = settings_dir / "cache" / "model" / "default.sqlite"
canonical.parent.mkdir(parents=True)
canonical.write_text("canonical")
legacy = settings_dir / "model_cache.sqlite"
legacy.write_text("legacy")
files = get_legacy_cache_files_for_cleanup()
assert str(legacy) in files
def test_cleanup_legacy_cache_files_dry_run(self, tmp_path, monkeypatch):
settings_dir = tmp_path / "settings"
settings_dir.mkdir()
def fake_get_settings_dir(create: bool = True) -> str:
return str(settings_dir)
monkeypatch.setattr("py.utils.cache_paths.get_settings_dir", fake_get_settings_dir)
# Create canonical and legacy files
canonical = settings_dir / "cache" / "model" / "default.sqlite"
canonical.parent.mkdir(parents=True)
canonical.write_text("canonical")
legacy = settings_dir / "model_cache.sqlite"
legacy.write_text("legacy")
removed = cleanup_legacy_cache_files(dry_run=True)
assert str(legacy) in removed
# File should still exist (dry run)
assert legacy.exists()
def test_cleanup_legacy_cache_files_actual(self, tmp_path, monkeypatch):
settings_dir = tmp_path / "settings"
settings_dir.mkdir()
def fake_get_settings_dir(create: bool = True) -> str:
return str(settings_dir)
monkeypatch.setattr("py.utils.cache_paths.get_settings_dir", fake_get_settings_dir)
# Create canonical and legacy files
canonical = settings_dir / "cache" / "model" / "default.sqlite"
canonical.parent.mkdir(parents=True)
canonical.write_text("canonical")
legacy = settings_dir / "model_cache.sqlite"
legacy.write_text("legacy")
removed = cleanup_legacy_cache_files(dry_run=False)
assert str(legacy) in removed
# File should be deleted
assert not legacy.exists()
class TestAutomaticCleanup:
"""Tests for automatic cleanup during migration."""
def test_automatic_cleanup_on_migration(self, tmp_path, monkeypatch):
"""Test that legacy files are automatically cleaned up after migration."""
settings_dir = tmp_path / "settings"
settings_dir.mkdir()
def fake_get_settings_dir(create: bool = True) -> str:
return str(settings_dir)
monkeypatch.setattr("py.utils.cache_paths.get_settings_dir", fake_get_settings_dir)
# Create a legacy cache file
legacy_dir = settings_dir / "model_cache"
legacy_dir.mkdir()
legacy_file = legacy_dir / "default.sqlite"
legacy_file.write_text("test data")
# Verify legacy file exists
assert legacy_file.exists()
# Trigger migration (this should auto-cleanup)
resolved_path = resolve_cache_path_with_migration(CacheType.MODEL, "default")
# Verify canonical file exists
canonical_path = settings_dir / "cache" / "model" / "default.sqlite"
assert resolved_path == str(canonical_path)
assert canonical_path.exists()
assert canonical_path.read_text() == "test data"
# Verify legacy file was cleaned up
assert not legacy_file.exists()
# Verify empty directory was cleaned up
assert not legacy_dir.exists()
def test_automatic_cleanup_with_verification(self, tmp_path, monkeypatch):
"""Test that cleanup verifies file integrity before deletion."""
settings_dir = tmp_path / "settings"
settings_dir.mkdir()
def fake_get_settings_dir(create: bool = True) -> str:
return str(settings_dir)
monkeypatch.setattr("py.utils.cache_paths.get_settings_dir", fake_get_settings_dir)
# Create legacy cache
legacy_dir = settings_dir / "recipe_cache"
legacy_dir.mkdir()
legacy_file = legacy_dir / "my_library.sqlite"
legacy_file.write_text("data")
# Trigger migration
resolved_path = resolve_cache_path_with_migration(CacheType.RECIPE, "my_library")
canonical_path = settings_dir / "cache" / "recipe" / "my_library.sqlite"
# Both should exist initially (migration successful)
assert canonical_path.exists()
assert legacy_file.exists() is False # Auto-cleanup removes it
# File content should match (integrity check)
assert canonical_path.read_text() == "data"
# Directory should be cleaned up
assert not legacy_dir.exists()
def test_automatic_cleanup_multiple_cache_types(self, tmp_path, monkeypatch):
"""Test automatic cleanup for different cache types."""
settings_dir = tmp_path / "settings"
settings_dir.mkdir()
def fake_get_settings_dir(create: bool = True) -> str:
return str(settings_dir)
monkeypatch.setattr("py.utils.cache_paths.get_settings_dir", fake_get_settings_dir)
# Test RECIPE_FTS migration
legacy_fts = settings_dir / "recipe_fts.sqlite"
legacy_fts.write_text("fts data")
resolve_cache_path_with_migration(CacheType.RECIPE_FTS)
canonical_fts = settings_dir / "cache" / "fts" / "recipe_fts.sqlite"
assert canonical_fts.exists()
assert not legacy_fts.exists()
# Test TAG_FTS migration
legacy_tag = settings_dir / "tag_fts.sqlite"
legacy_tag.write_text("tag data")
resolve_cache_path_with_migration(CacheType.TAG_FTS)
canonical_tag = settings_dir / "cache" / "fts" / "tag_fts.sqlite"
assert canonical_tag.exists()
assert not legacy_tag.exists()

View File

@@ -3,6 +3,46 @@ import { app } from "../../scripts/app.js";
import { TextAreaCaretHelper } from "./textarea_caret_helper.js";
import { getPromptCustomWordsAutocompletePreference } from "./settings.js";
// Command definitions for category filtering
const TAG_COMMANDS = {
'/character': { categories: [4, 11], label: 'Character' },
'/char': { categories: [4, 11], label: 'Character' },
'/artist': { categories: [1, 8], label: 'Artist' },
'/general': { categories: [0, 7], label: 'General' },
'/copyright': { categories: [3, 10], label: 'Copyright' },
'/meta': { categories: [5, 14], label: 'Meta' },
'/species': { categories: [12], label: 'Species' },
'/lore': { categories: [15], label: 'Lore' },
'/emb': { type: 'embedding', label: 'Embeddings' },
'/embedding': { type: 'embedding', label: 'Embeddings' },
};
// Category display information
const CATEGORY_INFO = {
0: { bg: 'rgba(0, 155, 230, 0.2)', text: '#4bb4ff', label: 'General' },
1: { bg: 'rgba(255, 138, 139, 0.2)', text: '#ffc3c3', label: 'Artist' },
3: { bg: 'rgba(199, 151, 255, 0.2)', text: '#ddc9fb', label: 'Copyright' },
4: { bg: 'rgba(53, 198, 74, 0.2)', text: '#93e49a', label: 'Character' },
5: { bg: 'rgba(234, 208, 132, 0.2)', text: '#f7e7c3', label: 'Meta' },
7: { bg: 'rgba(0, 155, 230, 0.2)', text: '#4bb4ff', label: 'General' },
8: { bg: 'rgba(255, 138, 139, 0.2)', text: '#ffc3c3', label: 'Artist' },
10: { bg: 'rgba(199, 151, 255, 0.2)', text: '#ddc9fb', label: 'Copyright' },
11: { bg: 'rgba(53, 198, 74, 0.2)', text: '#93e49a', label: 'Character' },
12: { bg: 'rgba(237, 137, 54, 0.2)', text: '#f6ad55', label: 'Species' },
14: { bg: 'rgba(234, 208, 132, 0.2)', text: '#f7e7c3', label: 'Meta' },
15: { bg: 'rgba(72, 187, 120, 0.2)', text: '#68d391', label: 'Lore' },
};
// Format post count with K/M suffix
function formatPostCount(count) {
if (count >= 1000000) {
return (count / 1000000).toFixed(1).replace(/\.0$/, '') + 'M';
} else if (count >= 1000) {
return (count / 1000).toFixed(1).replace(/\.0$/, '') + 'K';
}
return count.toString();
}
function parseUsageTipNumber(value) {
if (typeof value === 'number' && Number.isFinite(value)) {
return value;
@@ -145,7 +185,7 @@ const MODEL_BEHAVIORS = {
async getInsertText(_instance, relativePath) {
const { directories, fileName } = splitRelativePath(relativePath);
const trimmedName = removeGeneralExtension(fileName);
const folder = directories.length ? `${directories.join('\\')}\\` : '';
const folder = directories.length ? `${directories.join('/')}/` : '';
return `embedding:${folder}${trimmedName}, `;
},
},
@@ -170,7 +210,7 @@ const MODEL_BEHAVIORS = {
instance.showPreviewForItem(relativePath, itemElement);
},
hidePreview(instance) {
if (!instance.previewTooltip || instance.searchType !== 'embeddings') {
if (!instance.previewTooltip) {
return;
}
instance.previewTooltip.hide();
@@ -185,10 +225,10 @@ const MODEL_BEHAVIORS = {
const rawSearchTerm = instance.getSearchTerm(instance.inputElement.value);
const match = rawSearchTerm.match(/^emb:(.*)$/i);
if (match) {
if (match || instance.searchType === 'embeddings') {
const { directories, fileName } = splitRelativePath(relativePath);
const trimmedName = removeGeneralExtension(fileName);
const folder = directories.length ? `${directories.join('\\')}\\` : '';
const folder = directories.length ? `${directories.join('/')}/` : '';
return `embedding:${folder}${trimmedName}, `;
} else {
return `${relativePath}, `;
@@ -224,6 +264,10 @@ class AutoComplete {
this.previewTooltipPromise = null;
this.searchType = null;
// Command mode state
this.activeCommand = null; // Current active command (e.g., { categories: [4, 11], label: 'Character' })
this.showingCommands = false; // Whether showing command list dropdown
// Initialize TextAreaCaretHelper
this.helper = new TextAreaCaretHelper(inputElement, () => app.canvas.ds.scale);
@@ -425,11 +469,43 @@ class AutoComplete {
endpoint = '/lm/embeddings/relative-paths';
searchTerm = (match[1] || '').trim();
this.searchType = 'embeddings';
this.activeCommand = null;
this.showingCommands = false;
} else if (getPromptCustomWordsAutocompletePreference()) {
// Setting enabled - allow custom words search
endpoint = '/lm/custom-words/search';
searchTerm = rawSearchTerm;
this.searchType = 'custom_words';
// Setting enabled - check for command mode
const commandResult = this._parseCommandInput(rawSearchTerm);
if (commandResult.showCommands) {
// Show command list dropdown
this.showingCommands = true;
this.activeCommand = null;
this.searchType = 'commands';
this._showCommandList(commandResult.commandFilter);
return;
} else if (commandResult.command) {
// Command is active, use filtered search
this.showingCommands = false;
this.activeCommand = commandResult.command;
searchTerm = commandResult.searchTerm;
if (commandResult.command.type === 'embedding') {
// /emb or /embedding command
endpoint = '/lm/embeddings/relative-paths';
this.searchType = 'embeddings';
} else {
// Category filter command
const categories = commandResult.command.categories.join(',');
endpoint = `/lm/custom-words/search?category=${categories}`;
this.searchType = 'custom_words';
}
} else {
// No command - regular custom words search with enriched results
this.showingCommands = false;
this.activeCommand = null;
endpoint = '/lm/custom-words/search?enriched=true';
searchTerm = rawSearchTerm;
this.searchType = 'custom_words';
}
} else {
// Setting disabled - no autocomplete for non-emb: terms
this.hide();
@@ -501,24 +577,220 @@ class AutoComplete {
this.hide();
}
}
/**
* Parse command input to detect command mode
* @param {string} rawInput - Raw input text
* @returns {Object} - { showCommands, commandFilter, command, searchTerm }
*/
_parseCommandInput(rawInput) {
const trimmed = rawInput.trim();
// Check if input starts with "/"
if (!trimmed.startsWith('/')) {
return { showCommands: false, command: null, searchTerm: trimmed };
}
// Split into potential command and search term
const spaceIndex = trimmed.indexOf(' ');
if (spaceIndex === -1) {
// Still typing command (e.g., "/cha")
const partialCommand = trimmed.toLowerCase();
// Check for exact command match
if (TAG_COMMANDS[partialCommand]) {
return {
showCommands: false,
command: TAG_COMMANDS[partialCommand],
searchTerm: '',
};
}
// Show command suggestions
return {
showCommands: true,
commandFilter: partialCommand.slice(1), // Remove leading "/"
command: null,
searchTerm: '',
};
}
// Command with search term (e.g., "/char miku")
const commandPart = trimmed.slice(0, spaceIndex).toLowerCase();
const searchPart = trimmed.slice(spaceIndex + 1).trim();
if (TAG_COMMANDS[commandPart]) {
return {
showCommands: false,
command: TAG_COMMANDS[commandPart],
searchTerm: searchPart,
};
}
// Unknown command, treat as regular search
return { showCommands: false, command: null, searchTerm: trimmed };
}
/**
* Show the command list dropdown
* @param {string} filter - Optional filter for commands
*/
_showCommandList(filter = '') {
const filterLower = filter.toLowerCase();
// Get unique commands (avoid duplicates like /char and /character)
const seenLabels = new Set();
const commands = [];
for (const [cmd, info] of Object.entries(TAG_COMMANDS)) {
if (seenLabels.has(info.label)) continue;
if (!filter || cmd.slice(1).startsWith(filterLower)) {
seenLabels.add(info.label);
commands.push({ command: cmd, ...info });
}
}
if (commands.length === 0) {
this.hide();
return;
}
this.items = commands;
this._renderCommandList();
this.show();
}
/**
* Render the command list dropdown
*/
_renderCommandList() {
this.dropdown.innerHTML = '';
this.selectedIndex = -1;
this.items.forEach((item, index) => {
const itemEl = document.createElement('div');
itemEl.className = 'comfy-autocomplete-item comfy-autocomplete-command';
const cmdSpan = document.createElement('span');
cmdSpan.className = 'lm-autocomplete-command-name';
cmdSpan.textContent = item.command;
const labelSpan = document.createElement('span');
labelSpan.className = 'lm-autocomplete-command-label';
labelSpan.textContent = item.label;
itemEl.appendChild(cmdSpan);
itemEl.appendChild(labelSpan);
itemEl.style.cssText = `
padding: 8px 12px;
cursor: pointer;
color: rgba(226, 232, 240, 0.8);
border-bottom: 1px solid rgba(226, 232, 240, 0.1);
transition: all 0.2s ease;
white-space: nowrap;
overflow: hidden;
text-overflow: ellipsis;
display: flex;
justify-content: space-between;
align-items: center;
gap: 12px;
`;
itemEl.addEventListener('mouseenter', () => {
this.selectItem(index);
});
itemEl.addEventListener('click', () => {
this._insertCommand(item.command);
});
this.dropdown.appendChild(itemEl);
});
// Remove border from last item
if (this.dropdown.lastChild) {
this.dropdown.lastChild.style.borderBottom = 'none';
}
// Auto-select first item
if (this.items.length > 0) {
setTimeout(() => this.selectItem(0), 100);
}
}
/**
* Insert a command into the input
* @param {string} command - The command to insert (e.g., "/char")
*/
_insertCommand(command) {
const currentValue = this.inputElement.value;
const caretPos = this.getCaretPosition();
// Find the start of the current command being typed
const beforeCursor = currentValue.substring(0, caretPos);
const segments = beforeCursor.split(/[,\>]+/);
const lastSegment = segments[segments.length - 1];
const commandStartPos = caretPos - lastSegment.length;
// Insert command with trailing space
const insertText = command + ' ';
const newValue = currentValue.substring(0, commandStartPos) + insertText + currentValue.substring(caretPos);
const newCaretPos = commandStartPos + insertText.length;
this.inputElement.value = newValue;
// Trigger input event
const event = new Event('input', { bubbles: true });
this.inputElement.dispatchEvent(event);
this.hide();
// Focus and position cursor
this.inputElement.focus();
this.inputElement.setSelectionRange(newCaretPos, newCaretPos);
}
render() {
this.dropdown.innerHTML = '';
this.selectedIndex = -1;
// Early return if no items to prevent empty dropdown
if (!this.items || this.items.length === 0) {
return;
}
this.items.forEach((relativePath, index) => {
// Check if items are enriched (have tag_name, category, post_count)
const isEnriched = this.items[0] && typeof this.items[0] === 'object' && 'tag_name' in this.items[0];
this.items.forEach((itemData, index) => {
const item = document.createElement('div');
item.className = 'comfy-autocomplete-item';
// Create highlighted content
const highlightedContent = this.highlightMatch(relativePath, this.currentSearchTerm);
item.innerHTML = highlightedContent;
// Get the display text and path for insertion
const displayText = isEnriched ? itemData.tag_name : itemData;
const insertPath = isEnriched ? itemData.tag_name : itemData;
if (isEnriched) {
// Render enriched item with category badge and post count
this._renderEnrichedItem(item, itemData, this.currentSearchTerm);
} else {
// Create highlighted content for simple items, wrapped in a span
// to prevent flex layout from breaking up the text
const nameSpan = document.createElement('span');
nameSpan.className = 'lm-autocomplete-name';
nameSpan.innerHTML = this.highlightMatch(displayText, this.currentSearchTerm);
nameSpan.style.cssText = `
flex: 1;
min-width: 0;
overflow: hidden;
text-overflow: ellipsis;
`;
item.appendChild(nameSpan);
}
// Apply item styles with new color scheme
item.style.cssText = `
padding: 8px 12px;
@@ -530,37 +802,102 @@ class AutoComplete {
overflow: hidden;
text-overflow: ellipsis;
position: relative;
display: flex;
justify-content: space-between;
align-items: center;
gap: 8px;
`;
// Hover and selection handlers
item.addEventListener('mouseenter', () => {
this.selectItem(index);
});
item.addEventListener('mouseleave', () => {
this.hidePreview();
});
// Click handler
item.addEventListener('click', () => {
this.insertSelection(relativePath);
this.insertSelection(insertPath);
});
this.dropdown.appendChild(item);
});
// Remove border from last item
if (this.dropdown.lastChild) {
this.dropdown.lastChild.style.borderBottom = 'none';
}
// Auto-select the first item with a small delay
if (this.items.length > 0) {
setTimeout(() => {
this.selectItem(0);
}, 100); // 50ms delay
this.selectItem(0);
}, 100);
}
}
/**
* Render an enriched autocomplete item with category badge and post count
* @param {HTMLElement} itemEl - The item element to populate
* @param {Object} itemData - The enriched item data { tag_name, category, post_count }
* @param {string} searchTerm - The current search term for highlighting
*/
_renderEnrichedItem(itemEl, itemData, searchTerm) {
// Create name span with highlighted match
const nameSpan = document.createElement('span');
nameSpan.className = 'lm-autocomplete-name';
nameSpan.innerHTML = this.highlightMatch(itemData.tag_name, searchTerm);
nameSpan.style.cssText = `
flex: 1;
min-width: 0;
overflow: hidden;
text-overflow: ellipsis;
`;
// Create meta container for count and badge
const metaSpan = document.createElement('span');
metaSpan.className = 'lm-autocomplete-meta';
metaSpan.style.cssText = `
display: flex;
align-items: center;
gap: 8px;
flex-shrink: 0;
`;
// Add post count
if (itemData.post_count > 0) {
const countSpan = document.createElement('span');
countSpan.className = 'lm-autocomplete-count';
countSpan.textContent = formatPostCount(itemData.post_count);
countSpan.style.cssText = `
font-size: 11px;
color: rgba(226, 232, 240, 0.5);
`;
metaSpan.appendChild(countSpan);
}
// Add category badge
const categoryInfo = CATEGORY_INFO[itemData.category];
if (categoryInfo) {
const badgeSpan = document.createElement('span');
badgeSpan.className = 'lm-autocomplete-category';
badgeSpan.textContent = categoryInfo.label;
badgeSpan.style.cssText = `
font-size: 10px;
padding: 2px 6px;
border-radius: 10px;
background: ${categoryInfo.bg};
color: ${categoryInfo.text};
white-space: nowrap;
`;
metaSpan.appendChild(badgeSpan);
}
itemEl.appendChild(nameSpan);
itemEl.appendChild(metaSpan);
}
highlightMatch(text, searchTerm) {
const { include } = parseSearchTokens(searchTerm);
@@ -655,10 +992,11 @@ class AutoComplete {
this.dropdown.style.display = 'none';
this.isVisible = false;
this.selectedIndex = -1;
this.showingCommands = false;
// Hide preview tooltip
this.hidePreview();
// Clear selection styles from all items
const items = this.dropdown.querySelectorAll('.comfy-autocomplete-item');
items.forEach(item => {
@@ -715,7 +1053,17 @@ class AutoComplete {
case 'Enter':
e.preventDefault();
if (this.selectedIndex >= 0 && this.selectedIndex < this.items.length) {
this.insertSelection(this.items[this.selectedIndex]);
if (this.showingCommands) {
// Insert command
this._insertCommand(this.items[this.selectedIndex].command);
} else {
// Insert selection (handle enriched items)
const selectedItem = this.items[this.selectedIndex];
const insertPath = typeof selectedItem === 'object' && 'tag_name' in selectedItem
? selectedItem.tag_name
: selectedItem;
this.insertSelection(insertPath);
}
}
break;

View File

@@ -575,3 +575,84 @@ body.lm-lora-reordering * {
border-top: 1px solid rgba(255, 255, 255, 0.05);
margin: 6px 0;
}
/* Autocomplete styling */
.lm-autocomplete-name {
flex: 1;
min-width: 0;
overflow: hidden;
text-overflow: ellipsis;
}
.lm-autocomplete-meta {
display: flex;
align-items: center;
gap: 8px;
flex-shrink: 0;
}
.lm-autocomplete-count {
font-size: 11px;
color: rgba(226, 232, 240, 0.5);
}
.lm-autocomplete-category {
font-size: 10px;
padding: 2px 6px;
border-radius: 10px;
white-space: nowrap;
}
/* Category-specific badge colors */
.lm-autocomplete-category--general {
background: rgba(0, 155, 230, 0.2);
color: #4bb4ff;
}
.lm-autocomplete-category--artist {
background: rgba(255, 138, 139, 0.2);
color: #ffc3c3;
}
.lm-autocomplete-category--copyright {
background: rgba(199, 151, 255, 0.2);
color: #ddc9fb;
}
.lm-autocomplete-category--character {
background: rgba(53, 198, 74, 0.2);
color: #93e49a;
}
.lm-autocomplete-category--meta {
background: rgba(234, 208, 132, 0.2);
color: #f7e7c3;
}
.lm-autocomplete-category--species {
background: rgba(237, 137, 54, 0.2);
color: #f6ad55;
}
.lm-autocomplete-category--lore {
background: rgba(72, 187, 120, 0.2);
color: #68d391;
}
/* Command list styling */
.comfy-autocomplete-command {
display: flex;
justify-content: space-between;
align-items: center;
gap: 12px;
}
.lm-autocomplete-command-name {
font-family: 'Consolas', 'Monaco', monospace;
color: rgba(66, 153, 225, 0.9);
}
.lm-autocomplete-command-label {
font-size: 12px;
color: rgba(226, 232, 240, 0.6);
}