fix(autocomplete): improve tag search ranking with popularity-based sorting

- Add LOG10(post_count) weighting to BM25 score for better relevance ranking - Prioritize tag_name prefix matches above alias matches using CASE statement - Remove frontend re-scoring logic to trust backend排序 results - Fix pagination consistency: page N+1 scores <= page N minimum score Key improvements: - '1girl' (6M posts) now ranks #1 instead of #149 for search '1' - tag_name prefix matches always appear before alias matches - Popular tags rank higher than obscure ones with same prefix - Consistent ordering across pagination boundaries Test coverage: - Add test_search_tag_name_prefix_match_priority - Add test_search_ranks_popular_tags_higher - Add test_search_pagination_ordering_consistency - Add test_search_rank_score_includes_popularity_weight - Update test data with 15 tags starting with '1' Fixes issues with autocomplete dropdown showing inconsistent results when scrolling through paginated search results.
2026-05-06 16:36:45 -03:00 · 2026-03-16 19:09:07 +08:00
parent ef38bda04f
commit b5a0725d2c
4 changed files with 526 additions and 151 deletions
--- a/tests/frontend/components/autocomplete.behavior.test.js
+++ b/tests/frontend/components/autocomplete.behavior.test.js
@@ -156,4 +156,115 @@ describe('AutoComplete widget interactions', () => {
    expect(highlighted).toContain('detail');
    expect(highlighted).not.toMatch(/beta<\/span>/i);
  });
+
+  it('handles arrow key navigation with virtual scrolling', async () => {
+    vi.useFakeTimers();
+
+    const mockItems = Array.from({ length: 50 }, (_, i) => `model_${i.toString().padStart(2, '0')}.safetensors`);
+
+    fetchApiMock.mockResolvedValue({
+      json: () => Promise.resolve({ success: true, relative_paths: mockItems }),
+    });
+
+    caretHelperInstance.getBeforeCursor.mockReturnValue('model');
+    caretHelperInstance.getCursorOffset.mockReturnValue({ left: 15, top: 25 });
+
+    const input = document.createElement('textarea');
+    document.body.append(input);
+
+    const { AutoComplete } = await import(AUTOCOMPLETE_MODULE);
+    const autoComplete = new AutoComplete(input, 'loras', {
+      debounceDelay: 0,
+      showPreview: false,
+      enableVirtualScroll: true,
+      itemHeight: 40,
+      visibleItems: 15,
+      pageSize: 20,
+    });
+
+    input.value = 'model';
+    input.dispatchEvent(new Event('input', { bubbles: true }));
+
+    await vi.runAllTimersAsync();
+    await Promise.resolve();
+
+    expect(autoComplete.items.length).toBeGreaterThan(0);
+    expect(autoComplete.selectedIndex).toBe(0);
+
+    const initialSelectedEl = autoComplete.contentContainer?.querySelector('.comfy-autocomplete-item-selected');
+    expect(initialSelectedEl).toBeDefined();
+
+    const arrowDownEvent = new KeyboardEvent('keydown', { key: 'ArrowDown', bubbles: true });
+    input.dispatchEvent(arrowDownEvent);
+
+    expect(autoComplete.selectedIndex).toBe(1);
+
+    const secondSelectedEl = autoComplete.contentContainer?.querySelector('.comfy-autocomplete-item-selected');
+    expect(secondSelectedEl).toBeDefined();
+    expect(secondSelectedEl?.dataset.index).toBe('1');
+
+    const arrowUpEvent = new KeyboardEvent('keydown', { key: 'ArrowUp', bubbles: true });
+    input.dispatchEvent(arrowUpEvent);
+
+    expect(autoComplete.selectedIndex).toBe(0);
+
+    const firstSelectedElAgain = autoComplete.contentContainer?.querySelector('.comfy-autocomplete-item-selected');
+    expect(firstSelectedElAgain).toBeDefined();
+    expect(firstSelectedElAgain?.dataset.index).toBe('0');
+  });
+
+  it('maintains selection when scrolling to invisible items', async () => {
+    vi.useFakeTimers();
+
+    const mockItems = Array.from({ length: 100 }, (_, i) => `item_${i.toString().padStart(3, '0')}.safetensors`);
+
+    fetchApiMock.mockResolvedValue({
+      json: () => Promise.resolve({ success: true, relative_paths: mockItems }),
+    });
+
+    caretHelperInstance.getBeforeCursor.mockReturnValue('item');
+    caretHelperInstance.getCursorOffset.mockReturnValue({ left: 15, top: 25 });
+
+    const input = document.createElement('textarea');
+    input.style.width = '400px';
+    input.style.height = '200px';
+    document.body.append(input);
+
+    const { AutoComplete } = await import(AUTOCOMPLETE_MODULE);
+    const autoComplete = new AutoComplete(input, 'loras', {
+      debounceDelay: 0,
+      showPreview: false,
+      enableVirtualScroll: true,
+      itemHeight: 40,
+      visibleItems: 15,
+      pageSize: 20,
+    });
+
+    input.value = 'item';
+    input.dispatchEvent(new Event('input', { bubbles: true }));
+
+    await vi.runAllTimersAsync();
+    await Promise.resolve();
+
+    expect(autoComplete.items.length).toBeGreaterThan(0);
+
+    autoComplete.selectedIndex = 14;
+
+    const scrollTopBefore = autoComplete.scrollContainer?.scrollTop || 0;
+
+    const arrowDownEvent = new KeyboardEvent('keydown', { key: 'ArrowDown', bubbles: true });
+    input.dispatchEvent(arrowDownEvent);
+
+    await vi.runAllTimersAsync();
+    await Promise.resolve();
+
+    expect(autoComplete.selectedIndex).toBe(15);
+
+    const selectedEl = autoComplete.contentContainer?.querySelector('.comfy-autocomplete-item-selected');
+    expect(selectedEl).toBeDefined();
+    expect(selectedEl?.dataset.index).toBe('15');
+
+    const scrollTopAfter = autoComplete.scrollContainer?.scrollTop || 0;
+    expect(scrollTopAfter).toBeGreaterThanOrEqual(scrollTopBefore);
+  });
 });
--- a/tests/test_tag_fts_index.py
+++ b/tests/test_tag_fts_index.py
@@ -31,10 +31,27 @@ def temp_db_path():
@pytest.fixture
 def temp_csv_path():
    """Create a temporary CSV file with test data."""
-    with tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False, encoding="utf-8") as f:
+    with tempfile.NamedTemporaryFile(
+        mode="w", suffix=".csv", delete=False, encoding="utf-8"
+    ) as f:
        # Write test data in the same format as danbooru_e621_merged.csv
        # Format: tag_name,category,post_count,aliases
+        # Include multiple tags starting with "1" to test popularity-based ranking
        f.write('1girl,0,6008644,"1girls,sole_female"\n')
+        f.write('1boy,0,1405457,"1boys,sole_male"\n')
+        f.write('1:1,14,377032,""\n')
+        f.write('16:9,14,152866,""\n')
+        f.write('1other,0,70962,""\n')
+        f.write('16:10,14,14739,""\n')
+        f.write('1990s_(style),0,9369,""\n')
+        f.write('1_eye,0,7179,""\n')
+        f.write('1:2,14,5865,""\n')
+        f.write('1980s_(style),0,5665,""\n')
+        f.write('1koma,0,4384,""\n')
+        f.write('1_horn,0,2122,""\n')
+        f.write('101_dalmatian_street,3,1933,""\n')
+        f.write('1upgobbo,3,1731,""\n')
+        f.write('14:9,14,1038,""\n')
        f.write('highres,5,5256195,"high_res,high_resolution,hires"\n')
        f.write('solo,0,5000954,"alone,female_solo,single"\n')
        f.write('hatsune_miku,4,500000,"miku"\n')
@@ -86,7 +103,7 @@ class TestTagFTSIndexBuild:
        fts.build_index()

        assert fts.is_ready() is True
-        assert fts.get_indexed_count() == 10
+        assert fts.get_indexed_count() == 24

    def test_build_index_nonexistent_csv(self, temp_db_path):
        """Test that build_index handles missing CSV gracefully."""
@@ -187,6 +204,76 @@ class TestTagFTSIndexSearch:
        results = populated_fts.search("girl", limit=1)
        assert len(results) <= 1

+    def test_search_tag_name_prefix_match_priority(self, populated_fts):
+        """Test that tag_name prefix matches rank higher than alias matches."""
+        results = populated_fts.search("1", limit=20)
+
+        assert len(results) > 0, "Should return results for '1'"
+
+        # Find first alias match (if any)
+        first_alias_idx = None
+        for i, result in enumerate(results):
+            if result.get("matched_alias"):
+                first_alias_idx = i
+                break
+
+        # All tag_name prefix matches should come before alias matches
+        if first_alias_idx is not None:
+            for i in range(first_alias_idx):
+                assert results[i]["tag_name"].lower().startswith("1"), (
+                    f"Tag at index {i} should start with '1' before alias matches"
+                )
+
+    def test_search_ranks_popular_tags_higher(self, populated_fts):
+        """Test that tags with higher post_count rank higher among prefix matches."""
+        results = populated_fts.search("1", limit=20)
+
+        # Filter to only tag_name prefix matches
+        prefix_matches = [r for r in results if r["tag_name"].lower().startswith("1")]
+
+        assert len(prefix_matches) > 1, "Should have multiple prefix matches"
+
+        # Verify descending post_count order among prefix matches
+        for i in range(len(prefix_matches) - 1):
+            assert (
+                prefix_matches[i]["post_count"] >= prefix_matches[i + 1]["post_count"]
+            ), (
+                f"Tags should be sorted by post_count: {prefix_matches[i]['tag_name']} ({prefix_matches[i]['post_count']}) >= {prefix_matches[i + 1]['tag_name']} ({prefix_matches[i + 1]['post_count']})"
+            )
+
+    def test_search_pagination_ordering_consistency(self, populated_fts):
+        """Test that pagination maintains consistent ordering."""
+        page1 = populated_fts.search("1", limit=10, offset=0)
+        page2 = populated_fts.search("1", limit=10, offset=10)
+
+        assert len(page1) > 0, "Page 1 should have results"
+        assert len(page2) > 0, "Page 2 should have results"
+
+        # Page 2 scores should all be <= Page 1 min score
+        page1_min_score = min(r["rank_score"] for r in page1)
+        page2_max_score = max(r["rank_score"] for r in page2)
+
+        assert page2_max_score <= page1_min_score, (
+            f"Page 2 max score ({page2_max_score}) should be <= Page 1 min score ({page1_min_score})"
+        )
+
+    def test_search_rank_score_includes_popularity_weight(self, populated_fts):
+        """Test that rank_score includes post_count popularity weighting."""
+        results = populated_fts.search("1", limit=5)
+
+        assert len(results) >= 2, "Need at least 2 results to compare"
+
+        # 1girl has 6M posts, should have higher rank_score than tags with fewer posts
+        girl_result = next((r for r in results if r["tag_name"] == "1girl"), None)
+        assert girl_result is not None, "1girl should be in results"
+
+        # Find a tag with significantly fewer posts
+        low_post_result = next((r for r in results if r["post_count"] < 10000), None)
+        if low_post_result:
+            assert girl_result["rank_score"] > low_post_result["rank_score"], (
+                f"1girl (6M posts) should have higher score than {low_post_result['tag_name']} ({low_post_result['post_count']} posts)"
+            )
+

 class TestAliasSearch:
    """Tests for alias search functionality."""
@@ -204,7 +291,9 @@ class TestAliasSearch:
        results = populated_fts.search("miku")

        assert len(results) >= 1
-        hatsune_result = next((r for r in results if r["tag_name"] == "hatsune_miku"), None)
+        hatsune_result = next(
+            (r for r in results if r["tag_name"] == "hatsune_miku"), None
+        )
        assert hatsune_result is not None
        assert hatsune_result["matched_alias"] == "miku"

@@ -214,7 +303,9 @@ class TestAliasSearch:
        results = populated_fts.search("hatsune")

        assert len(results) >= 1
-        hatsune_result = next((r for r in results if r["tag_name"] == "hatsune_miku"), None)
+        hatsune_result = next(
+            (r for r in results if r["tag_name"] == "hatsune_miku"), None
+        )
        assert hatsune_result is not None
        assert "matched_alias" not in hatsune_result

@@ -301,7 +392,9 @@ class TestSlashPrefixAliases:
    @pytest.fixture
    def fts_with_slash_aliases(self, temp_db_path):
        """Create an FTS index with slash-prefixed aliases."""
-        with tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False, encoding="utf-8") as f:
+        with tempfile.NamedTemporaryFile(
+            mode="w", suffix=".csv", delete=False, encoding="utf-8"
+        ) as f:
            # Format: tag_name,category,post_count,aliases
            f.write('long_hair,0,4350743,"/lh,longhair,very_long_hair"\n')
            f.write('breasts,0,3439214,"/b,boobs,oppai"\n')
@@ -380,7 +473,15 @@ class TestCategoryMappings:

    def test_category_name_to_ids_complete(self):
        """Test that CATEGORY_NAME_TO_IDS includes all expected names."""
-        expected_names = ["general", "artist", "copyright", "character", "meta", "species", "lore"]
+        expected_names = [
+            "general",
+            "artist",
+            "copyright",
+            "character",
+            "meta",
+            "species",
+            "lore",
+        ]
        for name in expected_names:
            assert name in CATEGORY_NAME_TO_IDS
            assert isinstance(CATEGORY_NAME_TO_IDS[name], list)