refactor(autocomplete): remove old CSV fallback, use TagFTSIndex exclusively

Remove all autocomplete.txt parsing logic and fallback code, simplifying the service to use only TagFTSIndex for Danbooru/e621 tag search with category filtering. - Remove WordEntry dataclass and _words_cache, _file_path attributes - Remove _determine_file_path(), get_file_path(), load_words(), save_words(), get_content(), _parse_csv_content() methods - Simplify search_words() to only use TagFTSIndex, always returning enriched results with {tag_name, category, post_count} - Remove GET/POST /api/lm/custom-words endpoints (unused) - Keep GET /api/lm/custom-words/search for frontend autocomplete - Rewrite tests to focus on TagFTSIndex integration This reduces code by 446 lines and removes untested pysssss plugin integration. Feature is unreleased so no backward compatibility needed.
2026-05-06 16:36:45 -03:00 · 2026-01-26 20:36:00 +08:00
parent 31d94d7ea2
commit 7249c9fd4b
4 changed files with 110 additions and 447 deletions
--- a/tests/test_custom_words_service.py
+++ b/tests/test_custom_words_service.py
@@ -1,62 +1,8 @@
-"""Tests for CustomWordsService."""
+"""Tests for CustomWordsService with TagFTSIndex integration."""

 import pytest
-from tempfile import NamedTemporaryFile
-from pathlib import Path

-from py.services.custom_words_service import CustomWordsService, WordEntry, get_custom_words_service
-
-
-@pytest.fixture
-def temp_autocomplete_file():
-    """Create a temporary autocomplete.txt file."""
-    import os
-    import tempfile
-    fd, path = tempfile.mkstemp(suffix='.txt')
-    try:
-        os.write(fd, b"""# Comment line
-girl,4114588
-solo,3426446
-highres,3008413
-long_hair,2898315
-masterpiece,1588202
-best_quality,1588202
-blue_eyes,1000000
-red_eyes,500000
-simple_background
-""")
-    finally:
-        os.close(fd)
-    yield Path(path)
-    os.unlink(path)
-
-
-@pytest.fixture
-def service(temp_autocomplete_file, monkeypatch):
-    """Create a CustomWordsService instance with temporary file."""
-    # Monkey patch to use temp file
-    service = CustomWordsService.__new__(CustomWordsService)
-
-    def mock_determine_path():
-        service._file_path = temp_autocomplete_file
-
-    monkeypatch.setattr(CustomWordsService, '_determine_file_path', mock_determine_path)
-    monkeypatch.setattr(service, '_file_path', temp_autocomplete_file)
-
-    service.load_words()
-    return service
-
-
-class TestWordEntry:
-    """Test WordEntry dataclass."""
-
-    def test_get_insert_text_with_value(self):
-        entry = WordEntry(text='alias_name', value='real_name')
-        assert entry.get_insert_text() == 'real_name'
-
-    def test_get_insert_text_without_value(self):
-        entry = WordEntry(text='simple_word')
-        assert entry.get_insert_text() == 'simple_word'
+from py.services.custom_words_service import CustomWordsService, get_custom_words_service


 class TestCustomWordsService:
@@ -67,131 +13,99 @@ class TestCustomWordsService:
        service2 = get_custom_words_service()
        assert service1 is service2

-    def test_parse_csv_content_basic(self):
-        service = CustomWordsService.__new__(CustomWordsService)
-        words = service._parse_csv_content("""word1
-word2
-word3
-""")
-        assert len(words) == 3
-        assert 'word1' in words
-        assert 'word2' in words
-        assert 'word3' in words
-
-    def test_parse_csv_content_with_priority(self):
-        service = CustomWordsService.__new__(CustomWordsService)
-        words = service._parse_csv_content("""word1,100
-word2,50
-word3,10
-""")
-        assert len(words) == 3
-        assert words['word1'].priority == 100
-        assert words['word2'].priority == 50
-        assert words['word3'].priority == 10
-
-    def test_parse_csv_content_ignores_comments(self):
-        service = CustomWordsService.__new__(CustomWordsService)
-        words = service._parse_csv_content("""# This is a comment
-word1
-# Another comment
-word2
-""")
-        assert len(words) == 2
-        assert 'word1' in words
-        assert 'word2' in words
-
-    def test_parse_csv_content_ignores_empty_lines(self):
-        service = CustomWordsService.__new__(CustomWordsService)
-        words = service._parse_csv_content("""
-word1
-
-word2
-
-""")
-        assert len(words) == 2
-        assert 'word1' in words
-        assert 'word2' in words
-
-    def test_parse_csv_content_handles_whitespace(self):
-        service = CustomWordsService.__new__(CustomWordsService)
-        words = service._parse_csv_content("""  word1  
-  word2,50  
-""")
-        assert len(words) == 2
-        assert 'word1' in words
-        assert 'word2' in words
-        assert words['word2'].priority == 50
-
-    def test_load_words(self, temp_autocomplete_file):
-        service = CustomWordsService.__new__(CustomWordsService)
-        service._file_path = temp_autocomplete_file
-        words = service.load_words()
-        # Expect 9 words due to tempfile encoding quirks
-        assert 8 <= len(words) <= 9
-        # Check for either '1girl' or 'girl' depending on encoding
-        assert '1girl' in words or 'girl' in words
-        assert 'solo' in words
-        if '1girl' in words:
-            assert words['1girl'].priority == 4114588
-        if 'girl' in words:
-            assert words['girl'].priority == 4114588
-        assert words['solo'].priority == 3426446
-
-    def test_search_words_empty_term(self, service):
-        results = service.search_words('')
-        # File may have encoding issues, so accept 8-20 words
-        assert 8 <= len(results) <= 20  # Limited to max of 20
-
-    def test_search_words_prefix_match(self, service):
-        results = service.search_words('lon')
-        assert len(results) > 0
-        # Check for '1girl' or 'girl' depending on encoding
-        assert 'long_hair' in results
-        # long_hair should come first as prefix match
-        assert results.index('long_hair') == 0
-
-    def test_search_words_include_match(self, service):
-        results = service.search_words('hair')
-        assert len(results) > 0
-        assert 'long_hair' in results
-
-    def test_search_words_priority_sorting(self, service):
-        results = service.search_words('eye')
-        assert len(results) > 0
-        assert 'blue_eyes' in results
-        assert 'red_eyes' in results
-        # Higher priority should come first
-        assert results.index('blue_eyes') < results.index('red_eyes')
-
-    def test_search_words_respects_limit(self, service):
-        results = service.search_words('', limit=5)
-        assert len(results) <= 5
-
-    def test_save_words(self, tmp_path, monkeypatch):
-        temp_file = tmp_path / 'test_autocomplete.txt'
+    def test_search_words_without_tag_index(self):
        service = CustomWordsService.__new__(CustomWordsService)

-        monkeypatch.setattr(service, '_file_path', temp_file)
+        def mock_get_index():
+            return None

-        content = 'test_word,100'
-        success = service.save_words(content)
-        assert success is True
-        assert temp_file.exists()
+        service._get_tag_index = mock_get_index

-        saved_content = temp_file.read_text(encoding='utf-8')
-        assert saved_content == content
+        results = service.search_words("test", limit=10)
+        assert results == []

-    def test_get_content_no_file(self, tmp_path, monkeypatch):
-        non_existent_file = tmp_path / 'nonexistent.txt'
+    def test_search_words_with_tag_index(self):
        service = CustomWordsService.__new__(CustomWordsService)
-        monkeypatch.setattr(service, '_file_path', non_existent_file)
-        content = service.get_content()
-        assert content == ''
+        mock_tag_index = MockTagFTSIndex()

-    def test_get_content_with_file(self, temp_autocomplete_file, monkeypatch):
+        def mock_get_index():
+            return mock_tag_index
+
+        service._get_tag_index = mock_get_index
+
+        results = service.search_words("miku", limit=20)
+        assert len(results) == 2
+        assert results[0]["tag_name"] == "hatsune_miku"
+        assert results[0]["category"] == 4
+        assert results[0]["post_count"] == 500000
+
+    def test_search_words_with_category_filter(self):
        service = CustomWordsService.__new__(CustomWordsService)
-        monkeypatch.setattr(service, '_file_path', temp_autocomplete_file)
-        content = service.get_content()
-        # Content may have escaped newlines in string representation
-        assert 'girl' in content or '1girl' in content
-        assert 'solo' in content
+        mock_tag_index = MockTagFTSIndex()
+
+        def mock_get_index():
+            return mock_tag_index
+
+        service._get_tag_index = mock_get_index
+
+        results = service.search_words("miku", categories=[4, 11], limit=20)
+        assert len(results) == 2
+        assert results[0]["tag_name"] == "hatsune_miku"
+        assert results[0]["category"] == 4
+        assert results[1]["tag_name"] == "hatsune_miku_(vocaloid)"
+        assert results[1]["category"] == 4
+
+    def test_search_words_respects_limit(self):
+        service = CustomWordsService.__new__(CustomWordsService)
+        mock_tag_index = MockTagFTSIndex()
+
+        def mock_get_index():
+            return mock_tag_index
+
+        service._get_tag_index = mock_get_index
+
+        results = service.search_words("miku", limit=1)
+        assert len(results) <= 1
+
+    def test_search_words_empty_term(self):
+        service = CustomWordsService.__new__(CustomWordsService)
+        mock_tag_index = MockTagFTSIndex()
+
+        def mock_get_index():
+            return mock_tag_index
+
+        service._get_tag_index = mock_get_index
+
+        results = service.search_words("", limit=20)
+        assert results == []
+
+    def test_search_words_uses_tag_index(self):
+        service = CustomWordsService.__new__(CustomWordsService)
+        mock_tag_index = MockTagFTSIndex()
+
+        def mock_get_index():
+            return mock_tag_index
+
+        service._get_tag_index = mock_get_index
+
+        results = service.search_words("test")
+        assert mock_tag_index.called
+
+
+class MockTagFTSIndex:
+    """Mock TagFTSIndex for testing."""
+
+    def __init__(self):
+        self.called = False
+        self._results = [
+            {"tag_name": "hatsune_miku", "category": 4, "post_count": 500000},
+            {"tag_name": "hatsune_miku_(vocaloid)", "category": 4, "post_count": 250000},
+        ]
+
+    def search(self, query, categories=None, limit=20):
+        self.called = True
+        if not query:
+            return []
+        if categories:
+            return [r for r in self._results if r["category"] in categories][:limit]
+        return self._results[:limit]