refactor(autocomplete): remove old CSV fallback, use TagFTSIndex exclusively

Remove all autocomplete.txt parsing logic and fallback code, simplifying
the service to use only TagFTSIndex for Danbooru/e621 tag search
with category filtering.

- Remove WordEntry dataclass and _words_cache, _file_path attributes
- Remove _determine_file_path(), get_file_path(), load_words(), save_words(),
  get_content(), _parse_csv_content() methods
- Simplify search_words() to only use TagFTSIndex, always returning
  enriched results with {tag_name, category, post_count}
- Remove GET/POST /api/lm/custom-words endpoints (unused)
- Keep GET /api/lm/custom-words/search for frontend autocomplete
- Rewrite tests to focus on TagFTSIndex integration

This reduces code by 446 lines and removes untested pysssss plugin
integration. Feature is unreleased so no backward compatibility needed.
This commit is contained in:
Will Miao
2026-01-26 20:36:00 +08:00
parent 31d94d7ea2
commit 7249c9fd4b
4 changed files with 110 additions and 447 deletions

View File

@@ -1,62 +1,8 @@
"""Tests for CustomWordsService."""
"""Tests for CustomWordsService with TagFTSIndex integration."""
import pytest
from tempfile import NamedTemporaryFile
from pathlib import Path
from py.services.custom_words_service import CustomWordsService, WordEntry, get_custom_words_service
@pytest.fixture
def temp_autocomplete_file():
"""Create a temporary autocomplete.txt file."""
import os
import tempfile
fd, path = tempfile.mkstemp(suffix='.txt')
try:
os.write(fd, b"""# Comment line
girl,4114588
solo,3426446
highres,3008413
long_hair,2898315
masterpiece,1588202
best_quality,1588202
blue_eyes,1000000
red_eyes,500000
simple_background
""")
finally:
os.close(fd)
yield Path(path)
os.unlink(path)
@pytest.fixture
def service(temp_autocomplete_file, monkeypatch):
"""Create a CustomWordsService instance with temporary file."""
# Monkey patch to use temp file
service = CustomWordsService.__new__(CustomWordsService)
def mock_determine_path():
service._file_path = temp_autocomplete_file
monkeypatch.setattr(CustomWordsService, '_determine_file_path', mock_determine_path)
monkeypatch.setattr(service, '_file_path', temp_autocomplete_file)
service.load_words()
return service
class TestWordEntry:
"""Test WordEntry dataclass."""
def test_get_insert_text_with_value(self):
entry = WordEntry(text='alias_name', value='real_name')
assert entry.get_insert_text() == 'real_name'
def test_get_insert_text_without_value(self):
entry = WordEntry(text='simple_word')
assert entry.get_insert_text() == 'simple_word'
from py.services.custom_words_service import CustomWordsService, get_custom_words_service
class TestCustomWordsService:
@@ -67,131 +13,99 @@ class TestCustomWordsService:
service2 = get_custom_words_service()
assert service1 is service2
def test_parse_csv_content_basic(self):
service = CustomWordsService.__new__(CustomWordsService)
words = service._parse_csv_content("""word1
word2
word3
""")
assert len(words) == 3
assert 'word1' in words
assert 'word2' in words
assert 'word3' in words
def test_parse_csv_content_with_priority(self):
service = CustomWordsService.__new__(CustomWordsService)
words = service._parse_csv_content("""word1,100
word2,50
word3,10
""")
assert len(words) == 3
assert words['word1'].priority == 100
assert words['word2'].priority == 50
assert words['word3'].priority == 10
def test_parse_csv_content_ignores_comments(self):
service = CustomWordsService.__new__(CustomWordsService)
words = service._parse_csv_content("""# This is a comment
word1
# Another comment
word2
""")
assert len(words) == 2
assert 'word1' in words
assert 'word2' in words
def test_parse_csv_content_ignores_empty_lines(self):
service = CustomWordsService.__new__(CustomWordsService)
words = service._parse_csv_content("""
word1
word2
""")
assert len(words) == 2
assert 'word1' in words
assert 'word2' in words
def test_parse_csv_content_handles_whitespace(self):
service = CustomWordsService.__new__(CustomWordsService)
words = service._parse_csv_content(""" word1
word2,50
""")
assert len(words) == 2
assert 'word1' in words
assert 'word2' in words
assert words['word2'].priority == 50
def test_load_words(self, temp_autocomplete_file):
service = CustomWordsService.__new__(CustomWordsService)
service._file_path = temp_autocomplete_file
words = service.load_words()
# Expect 9 words due to tempfile encoding quirks
assert 8 <= len(words) <= 9
# Check for either '1girl' or 'girl' depending on encoding
assert '1girl' in words or 'girl' in words
assert 'solo' in words
if '1girl' in words:
assert words['1girl'].priority == 4114588
if 'girl' in words:
assert words['girl'].priority == 4114588
assert words['solo'].priority == 3426446
def test_search_words_empty_term(self, service):
results = service.search_words('')
# File may have encoding issues, so accept 8-20 words
assert 8 <= len(results) <= 20 # Limited to max of 20
def test_search_words_prefix_match(self, service):
results = service.search_words('lon')
assert len(results) > 0
# Check for '1girl' or 'girl' depending on encoding
assert 'long_hair' in results
# long_hair should come first as prefix match
assert results.index('long_hair') == 0
def test_search_words_include_match(self, service):
results = service.search_words('hair')
assert len(results) > 0
assert 'long_hair' in results
def test_search_words_priority_sorting(self, service):
results = service.search_words('eye')
assert len(results) > 0
assert 'blue_eyes' in results
assert 'red_eyes' in results
# Higher priority should come first
assert results.index('blue_eyes') < results.index('red_eyes')
def test_search_words_respects_limit(self, service):
results = service.search_words('', limit=5)
assert len(results) <= 5
def test_save_words(self, tmp_path, monkeypatch):
temp_file = tmp_path / 'test_autocomplete.txt'
def test_search_words_without_tag_index(self):
service = CustomWordsService.__new__(CustomWordsService)
monkeypatch.setattr(service, '_file_path', temp_file)
def mock_get_index():
return None
content = 'test_word,100'
success = service.save_words(content)
assert success is True
assert temp_file.exists()
service._get_tag_index = mock_get_index
saved_content = temp_file.read_text(encoding='utf-8')
assert saved_content == content
results = service.search_words("test", limit=10)
assert results == []
def test_get_content_no_file(self, tmp_path, monkeypatch):
non_existent_file = tmp_path / 'nonexistent.txt'
def test_search_words_with_tag_index(self):
service = CustomWordsService.__new__(CustomWordsService)
monkeypatch.setattr(service, '_file_path', non_existent_file)
content = service.get_content()
assert content == ''
mock_tag_index = MockTagFTSIndex()
def test_get_content_with_file(self, temp_autocomplete_file, monkeypatch):
def mock_get_index():
return mock_tag_index
service._get_tag_index = mock_get_index
results = service.search_words("miku", limit=20)
assert len(results) == 2
assert results[0]["tag_name"] == "hatsune_miku"
assert results[0]["category"] == 4
assert results[0]["post_count"] == 500000
def test_search_words_with_category_filter(self):
service = CustomWordsService.__new__(CustomWordsService)
monkeypatch.setattr(service, '_file_path', temp_autocomplete_file)
content = service.get_content()
# Content may have escaped newlines in string representation
assert 'girl' in content or '1girl' in content
assert 'solo' in content
mock_tag_index = MockTagFTSIndex()
def mock_get_index():
return mock_tag_index
service._get_tag_index = mock_get_index
results = service.search_words("miku", categories=[4, 11], limit=20)
assert len(results) == 2
assert results[0]["tag_name"] == "hatsune_miku"
assert results[0]["category"] == 4
assert results[1]["tag_name"] == "hatsune_miku_(vocaloid)"
assert results[1]["category"] == 4
def test_search_words_respects_limit(self):
service = CustomWordsService.__new__(CustomWordsService)
mock_tag_index = MockTagFTSIndex()
def mock_get_index():
return mock_tag_index
service._get_tag_index = mock_get_index
results = service.search_words("miku", limit=1)
assert len(results) <= 1
def test_search_words_empty_term(self):
service = CustomWordsService.__new__(CustomWordsService)
mock_tag_index = MockTagFTSIndex()
def mock_get_index():
return mock_tag_index
service._get_tag_index = mock_get_index
results = service.search_words("", limit=20)
assert results == []
def test_search_words_uses_tag_index(self):
service = CustomWordsService.__new__(CustomWordsService)
mock_tag_index = MockTagFTSIndex()
def mock_get_index():
return mock_tag_index
service._get_tag_index = mock_get_index
results = service.search_words("test")
assert mock_tag_index.called
class MockTagFTSIndex:
"""Mock TagFTSIndex for testing."""
def __init__(self):
self.called = False
self._results = [
{"tag_name": "hatsune_miku", "category": 4, "post_count": 500000},
{"tag_name": "hatsune_miku_(vocaloid)", "category": 4, "post_count": 250000},
]
def search(self, query, categories=None, limit=20):
self.called = True
if not query:
return []
if categories:
return [r for r in self._results if r["category"] in categories][:limit]
return self._results[:limit]