feat(i18n): add structural consistency tests for locale files and enhance existing tests

This commit is contained in:
Will Miao
2025-09-01 19:29:50 +08:00
parent 40ce2baa7b
commit 33334da0bb
3 changed files with 556 additions and 40 deletions

View File

@@ -770,9 +770,9 @@
},
"update": {
"title": "检查更新",
"currentVersion": "当前版本",
"updateAvailable": "更新可用",
"noChangelogAvailable": "没有详细的更新日志可用。请查看 GitHub 以获取更多信息。",
"currentVersion": "当前版本",
"newVersion": "新版本",
"commit": "提交",
"viewOnGitHub": "在 GitHub 查看",

View File

@@ -182,20 +182,20 @@
"help": "設定從 Civitai 下載時不同模型類型的資料夾結構。",
"availablePlaceholders": "可用佔位符:",
"templateOptions": {
"flatStructure": "扁平結構",
"byBaseModel": "依基礎模型",
"byAuthor": "依作者",
"byFirstTag": "依第一標籤",
"baseModelFirstTag": "基礎模型 + 第一標籤",
"baseModelAuthor": "基礎模型 + 作者",
"authorFirstTag": "作者 + 第一標籤",
"customTemplate": "自訂範本"
"flatStructure": "扁平結構",
"byBaseModel": "依基礎模型",
"byAuthor": "依作者",
"byFirstTag": "依第一標籤",
"baseModelFirstTag": "基礎模型 + 第一標籤",
"baseModelAuthor": "基礎模型 + 作者",
"authorFirstTag": "作者 + 第一標籤",
"customTemplate": "自訂範本"
},
"customTemplatePlaceholder": "輸入自訂範本(例如:{base_model}/{author}/{first_tag}",
"modelTypes": {
"lora": "LoRA",
"checkpoint": "Checkpoint",
"embedding": "Embedding"
"lora": "LoRA",
"checkpoint": "Checkpoint",
"embedding": "Embedding"
},
"baseModelPathMappings": "基礎模型路徑對應",
"baseModelPathMappingsHelp": "自訂特定基礎模型的資料夾名稱例如「Flux.1 D」→「flux」",
@@ -204,12 +204,12 @@
"customPathPlaceholder": "自訂路徑例如flux",
"removeMapping": "移除對應",
"validation": {
"validFlat": "有效(扁平結構)",
"invalidChars": "偵測到無效字元",
"doubleSlashes": "不允許連續斜線",
"leadingTrailingSlash": "不能以斜線開始或結束",
"invalidPlaceholder": "無效佔位符:{placeholder}",
"validTemplate": "範本有效"
"validFlat": "有效(扁平結構)",
"invalidChars": "偵測到無效字元",
"doubleSlashes": "不允許連續斜線",
"leadingTrailingSlash": "不能以斜線開始或結束",
"invalidPlaceholder": "無效佔位符:{placeholder}",
"validTemplate": "範本有效"
}
},
"exampleImages": {
@@ -469,11 +469,11 @@
"title": "設定內容分級",
"current": "目前",
"levels": {
"pg": "PG",
"pg13": "PG13",
"r": "R",
"x": "X",
"xxx": "XXX"
"pg": "PG",
"pg13": "PG13",
"r": "R",
"x": "X",
"xxx": "XXX"
}
},
"deleteModel": {

View File

@@ -8,9 +8,8 @@ import os
import sys
import json
import re
import asyncio
import glob
from typing import Set, List, Dict
from typing import Set, Dict, List, Tuple, Any
# Add the parent directory to the path so we can import the modules
sys.path.insert(0, os.path.dirname(os.path.dirname(__file__)))
@@ -20,6 +19,511 @@ def test_json_files_exist():
print("Testing JSON locale files...")
return test_json_structure_validation()
def test_locale_files_structural_consistency():
"""Test that all locale files have identical structure, line counts, and formatting."""
print("\nTesting locale files structural consistency...")
locales_dir = os.path.join(os.path.dirname(__file__), 'locales')
if not os.path.exists(locales_dir):
print("❌ Locales directory does not exist!")
return False
# Get all locale files
locale_files = []
for file in os.listdir(locales_dir):
if file.endswith('.json'):
locale_files.append(file)
if not locale_files:
print("❌ No locale files found!")
return False
# Use en.json as the reference
reference_file = 'en.json'
if reference_file not in locale_files:
print(f"❌ Reference file {reference_file} not found!")
return False
locale_files.remove(reference_file)
locale_files.insert(0, reference_file) # Put reference first
success = True
# Load and parse the reference file
reference_path = os.path.join(locales_dir, reference_file)
try:
with open(reference_path, 'r', encoding='utf-8') as f:
reference_lines = f.readlines()
reference_content = ''.join(reference_lines)
reference_data = json.loads(reference_content)
reference_structure = get_json_structure(reference_data)
print(f"📋 Reference file {reference_file}:")
print(f" Lines: {len(reference_lines)}")
print(f" Keys: {len(get_all_translation_keys(reference_data))}")
except Exception as e:
print(f"❌ Error reading reference file {reference_file}: {e}")
return False
# Compare each locale file with the reference
for locale_file in locale_files[1:]: # Skip reference file
locale_path = os.path.join(locales_dir, locale_file)
locale_name = locale_file.replace('.json', '')
try:
with open(locale_path, 'r', encoding='utf-8') as f:
locale_lines = f.readlines()
locale_content = ''.join(locale_lines)
locale_data = json.loads(locale_content)
locale_structure = get_json_structure(locale_data)
# Test 1: Line count consistency
if len(locale_lines) != len(reference_lines):
print(f"{locale_name}: Line count mismatch!")
print(f" Reference: {len(reference_lines)} lines")
print(f" {locale_name}: {len(locale_lines)} lines")
success = False
continue
# Test 2: Structural consistency (key order and nesting)
structure_issues = compare_json_structures(reference_structure, locale_structure)
if structure_issues:
print(f"{locale_name}: Structure mismatch!")
for issue in structure_issues[:5]: # Show first 5 issues
print(f" - {issue}")
if len(structure_issues) > 5:
print(f" ... and {len(structure_issues) - 5} more issues")
success = False
continue
# Test 3: Line-by-line format consistency (excluding translation values)
format_issues = compare_line_formats(reference_lines, locale_lines, locale_name)
if format_issues:
print(f"{locale_name}: Format mismatch!")
for issue in format_issues[:5]: # Show first 5 issues
print(f" - {issue}")
if len(format_issues) > 5:
print(f" ... and {len(format_issues) - 5} more issues")
success = False
continue
# Test 4: Key completeness
reference_keys = get_all_translation_keys(reference_data)
locale_keys = get_all_translation_keys(locale_data)
missing_keys = reference_keys - locale_keys
extra_keys = locale_keys - reference_keys
if missing_keys or extra_keys:
print(f"{locale_name}: Key mismatch!")
if missing_keys:
print(f" Missing {len(missing_keys)} keys")
if extra_keys:
print(f" Extra {len(extra_keys)} keys")
success = False
continue
print(f"{locale_name}: Structure and format consistent")
except json.JSONDecodeError as e:
print(f"{locale_name}: Invalid JSON syntax: {e}")
success = False
except Exception as e:
print(f"{locale_name}: Error during validation: {e}")
success = False
if success:
print(f"\n✅ All {len(locale_files)} locale files have consistent structure and formatting")
return success
def get_json_structure(data: Any, path: str = '') -> Dict[str, Any]:
"""
Extract the structural information from JSON data.
Returns a dictionary describing the structure without the actual values.
"""
if isinstance(data, dict):
structure = {}
for key, value in data.items():
current_path = f"{path}.{key}" if path else key
if isinstance(value, dict):
structure[key] = get_json_structure(value, current_path)
elif isinstance(value, list):
structure[key] = {'_type': 'array', '_length': len(value)}
if value: # If array is not empty, analyze first element
structure[key]['_element_type'] = get_json_structure(value[0], f"{current_path}[0]")
else:
structure[key] = {'_type': type(value).__name__}
return structure
elif isinstance(data, list):
return {'_type': 'array', '_length': len(data)}
else:
return {'_type': type(data).__name__}
def compare_json_structures(ref_structure: Dict[str, Any], locale_structure: Dict[str, Any], path: str = '') -> List[str]:
"""
Compare two JSON structures and return a list of differences.
"""
issues = []
# Check for missing keys in locale
for key in ref_structure:
current_path = f"{path}.{key}" if path else key
if key not in locale_structure:
issues.append(f"Missing key: {current_path}")
elif isinstance(ref_structure[key], dict) and '_type' not in ref_structure[key]:
# It's a nested object, recurse
if isinstance(locale_structure[key], dict) and '_type' not in locale_structure[key]:
issues.extend(compare_json_structures(ref_structure[key], locale_structure[key], current_path))
else:
issues.append(f"Structure mismatch at {current_path}: expected object, got {type(locale_structure[key])}")
elif ref_structure[key] != locale_structure[key]:
issues.append(f"Type mismatch at {current_path}: expected {ref_structure[key]}, got {locale_structure[key]}")
# Check for extra keys in locale
for key in locale_structure:
current_path = f"{path}.{key}" if path else key
if key not in ref_structure:
issues.append(f"Extra key: {current_path}")
return issues
def extract_line_structure(line: str) -> Dict[str, str]:
"""
Extract structural elements from a JSON line.
Returns indentation, key (if present), and structural characters.
"""
# Get indentation (leading whitespace)
indentation = len(line) - len(line.lstrip())
# Remove leading/trailing whitespace for analysis
stripped_line = line.strip()
# Extract key if this is a key-value line
key_match = re.match(r'^"([^"]+)"\s*:\s*', stripped_line)
key = key_match.group(1) if key_match else ''
# Extract structural characters (everything except the actual translation value)
if key:
# For key-value lines, extract everything except the value
# Handle string values in quotes with better escaping support
value_pattern = r'^"[^"]+"\s*:\s*("(?:[^"\\]|\\.)*")(.*?)$'
value_match = re.match(value_pattern, stripped_line)
if value_match:
# Preserve the structure but replace the actual string content
structural_chars = f'"{key}": "VALUE"{value_match.group(2)}'
else:
# Handle non-string values (objects, arrays, booleans, numbers)
colon_pos = stripped_line.find(':')
if colon_pos != -1:
after_colon = stripped_line[colon_pos + 1:].strip()
if after_colon.startswith('"'):
# String value - find the end quote with proper escaping
end_quote = find_closing_quote(after_colon, 1)
if end_quote != -1:
structural_chars = f'"{key}": "VALUE"{after_colon[end_quote + 1:]}'
else:
structural_chars = f'"{key}": "VALUE"'
elif after_colon.startswith('{'):
# Object value
structural_chars = f'"{key}": {{'
elif after_colon.startswith('['):
# Array value
structural_chars = f'"{key}": ['
else:
# Other values (numbers, booleans, null)
# Replace the actual value with a placeholder
value_end = find_value_end(after_colon)
structural_chars = f'"{key}": VALUE{after_colon[value_end:]}'
else:
structural_chars = stripped_line
else:
# For non key-value lines (brackets, etc.), keep as-is
structural_chars = stripped_line
return {
'indentation': str(indentation),
'key': key,
'structural_chars': structural_chars
}
def find_value_end(text: str) -> int:
"""
Find the end of a non-string JSON value (number, boolean, null).
"""
for i, char in enumerate(text):
if char in ',}]':
return i
return len(text)
def find_closing_quote(text: str, start: int) -> int:
"""
Find the position of the closing quote, handling escaped quotes properly.
"""
i = start
while i < len(text):
if text[i] == '"':
# Count preceding backslashes
backslash_count = 0
j = i - 1
while j >= 0 and text[j] == '\\':
backslash_count += 1
j -= 1
# If even number of backslashes (including 0), the quote is not escaped
if backslash_count % 2 == 0:
return i
i += 1
return -1
def compare_line_formats(ref_lines: List[str], locale_lines: List[str], locale_name: str) -> List[str]:
"""
Compare line-by-line formatting between reference and locale files.
Only checks structural elements (indentation, brackets, commas) and ignores translation values.
"""
issues = []
for i, (ref_line, locale_line) in enumerate(zip(ref_lines, locale_lines)):
line_num = i + 1
# Skip empty lines and lines with only whitespace
if not ref_line.strip() and not locale_line.strip():
continue
# Extract structural elements from each line
ref_structure = extract_line_structure(ref_line)
locale_structure = extract_line_structure(locale_line)
# Compare structural elements with more tolerance
structure_issues = []
# Check indentation (must be exact)
if ref_structure['indentation'] != locale_structure['indentation']:
structure_issues.append(f"indentation ({ref_structure['indentation']} vs {locale_structure['indentation']})")
# Check keys (must be exact for structural consistency)
if ref_structure['key'] != locale_structure['key']:
structure_issues.append(f"key ('{ref_structure['key']}' vs '{locale_structure['key']}')")
# Check structural characters with improved normalization
ref_normalized = normalize_structural_chars(ref_structure['structural_chars'])
locale_normalized = normalize_structural_chars(locale_structure['structural_chars'])
if ref_normalized != locale_normalized:
# Additional check: if both lines have the same key and similar structure,
# this might be a false positive due to translation content differences
if (ref_structure['key'] and locale_structure['key'] and
ref_structure['key'] == locale_structure['key']):
# Check if the difference is only in the translation value
ref_has_string_value = '"VALUE"' in ref_normalized
locale_has_string_value = '"VALUE"' in locale_normalized
if ref_has_string_value and locale_has_string_value:
# Both have string values, check if structure around value is same
ref_structure_only = re.sub(r'"VALUE"', '"X"', ref_normalized)
locale_structure_only = re.sub(r'"VALUE"', '"X"', locale_normalized)
if ref_structure_only == locale_structure_only:
# Structure is actually the same, skip this as false positive
continue
structure_issues.append(f"structure ('{ref_normalized}' vs '{locale_normalized}')")
if structure_issues:
issues.append(f"Line {line_num}: {', '.join(structure_issues)}")
return issues
def normalize_structural_chars(structural_chars: str) -> str:
"""
Normalize structural characters for comparison by replacing variable content
with placeholders while preserving the actual structure.
"""
# Normalize the structural characters more carefully
normalized = structural_chars
# Replace quoted strings with a consistent placeholder, handling escapes
# This regex matches strings while properly handling escaped quotes
string_pattern = r'"(?:[^"\\]|\\.)*"(?=\s*[,}\]:}]|$)'
# Find all string matches and replace with placeholder
strings = re.findall(string_pattern, normalized)
for string_match in strings:
# Only replace if this looks like a translation value, not a key
if ':' in normalized:
# Check if this string comes after a colon (likely a value)
parts = normalized.split(':', 1)
if len(parts) == 2 and string_match in parts[1]:
normalized = normalized.replace(string_match, '"VALUE"', 1)
# Normalize whitespace around structural characters
normalized = re.sub(r'\s*:\s*', ': ', normalized)
normalized = re.sub(r'\s*,\s*', ', ', normalized)
normalized = re.sub(r'\s*{\s*', '{ ', normalized)
normalized = re.sub(r'\s*}\s*', ' }', normalized)
return normalized.strip()
def test_locale_files_formatting_consistency():
"""Test that all locale files have identical formatting (whitespace, indentation, etc.)."""
print("\nTesting locale files formatting consistency...")
locales_dir = os.path.join(os.path.dirname(__file__), 'locales')
expected_locales = ['en', 'zh-CN', 'zh-TW', 'ja', 'ru', 'de', 'fr', 'es', 'ko']
# Read reference file (en.json)
reference_path = os.path.join(locales_dir, 'en.json')
try:
with open(reference_path, 'r', encoding='utf-8') as f:
reference_lines = f.readlines()
except Exception as e:
print(f"❌ Error reading reference file: {e}")
return False
success = True
# Compare each locale file
for locale in expected_locales[1:]: # Skip 'en' as it's the reference
locale_path = os.path.join(locales_dir, f'{locale}.json')
if not os.path.exists(locale_path):
print(f"{locale}.json does not exist!")
success = False
continue
try:
with open(locale_path, 'r', encoding='utf-8') as f:
locale_lines = f.readlines()
# Compare line count
if len(locale_lines) != len(reference_lines):
print(f"{locale}.json: Line count differs from reference")
print(f" Reference: {len(reference_lines)} lines")
print(f" {locale}: {len(locale_lines)} lines")
success = False
continue
# Compare formatting with improved algorithm
formatting_issues = compare_line_formats(reference_lines, locale_lines, locale)
if formatting_issues:
print(f"{locale}.json: Formatting issues found")
# Show only the first few issues to avoid spam
shown_issues = 0
for issue in formatting_issues:
if shown_issues < 3: # Reduced from 5 to 3
print(f" - {issue}")
shown_issues += 1
else:
break
if len(formatting_issues) > 3:
print(f" ... and {len(formatting_issues) - 3} more issues")
# Provide debug info for first issue to help identify false positives
if formatting_issues:
first_issue = formatting_issues[0]
line_match = re.match(r'Line (\d+):', first_issue)
if line_match:
line_num = int(line_match.group(1)) - 1 # Convert to 0-based
if 0 <= line_num < len(reference_lines):
print(f" Debug - Reference line {line_num + 1}: {repr(reference_lines[line_num].rstrip())}")
print(f" Debug - {locale} line {line_num + 1}: {repr(locale_lines[line_num].rstrip())}")
success = False
else:
print(f"{locale}.json: Formatting consistent with reference")
except Exception as e:
print(f"❌ Error validating {locale}.json: {e}")
success = False
if success:
print("✅ All locale files have consistent formatting")
else:
print("💡 Note: Some formatting differences may be false positives due to translation content.")
print(" If translations are correct but structure appears different, the test may need refinement.")
return success
def test_locale_key_ordering():
"""Test that all locale files maintain the same key ordering as the reference."""
print("\nTesting locale files key ordering...")
locales_dir = os.path.join(os.path.dirname(__file__), 'locales')
expected_locales = ['en', 'zh-CN', 'zh-TW', 'ja', 'ru', 'de', 'fr', 'es', 'ko']
# Load reference file
reference_path = os.path.join(locales_dir, 'en.json')
try:
with open(reference_path, 'r', encoding='utf-8') as f:
reference_data = json.load(f, object_pairs_hook=lambda x: x) # Preserve order
reference_key_order = get_key_order(reference_data)
except Exception as e:
print(f"❌ Error reading reference file: {e}")
return False
success = True
for locale in expected_locales[1:]: # Skip 'en' as it's the reference
locale_path = os.path.join(locales_dir, f'{locale}.json')
if not os.path.exists(locale_path):
continue
try:
with open(locale_path, 'r', encoding='utf-8') as f:
locale_data = json.load(f, object_pairs_hook=lambda x: x) # Preserve order
locale_key_order = get_key_order(locale_data)
if reference_key_order != locale_key_order:
print(f"{locale}.json: Key ordering differs from reference")
# Find the first difference
for i, (ref_key, locale_key) in enumerate(zip(reference_key_order, locale_key_order)):
if ref_key != locale_key:
print(f" First difference at position {i}: '{ref_key}' vs '{locale_key}'")
break
success = False
else:
print(f"{locale}.json: Key ordering matches reference")
except Exception as e:
print(f"❌ Error validating {locale}.json key ordering: {e}")
success = False
return success
def get_key_order(data: Any, path: str = '') -> List[str]:
"""
Extract the order of keys from nested JSON data.
Returns a list of all keys in their order of appearance.
"""
keys = []
if isinstance(data, list):
# Handle list of key-value pairs (from object_pairs_hook)
for key, value in data:
current_path = f"{path}.{key}" if path else key
keys.append(current_path)
if isinstance(value, list): # Nested object as list of pairs
keys.extend(get_key_order(value, current_path))
elif isinstance(data, dict):
for key, value in data.items():
current_path = f"{path}.{key}" if path else key
keys.append(current_path)
if isinstance(value, (dict, list)):
keys.extend(get_key_order(value, current_path))
return keys
def test_server_i18n():
"""Test the Python server-side i18n system."""
print("\nTesting Python server-side i18n...")
@@ -237,7 +741,7 @@ def get_all_translation_keys(data: dict, prefix: str = '', include_containers: b
def test_static_code_analysis():
"""Test static code analysis to detect missing translation keys."""
print("\nTesting static code analysis for translation keys...")
# print("\nTesting static code analysis for translation keys...")
# Load English translations as reference
locales_dir = os.path.join(os.path.dirname(__file__), 'locales')
@@ -245,7 +749,7 @@ def test_static_code_analysis():
en_data = json.load(f)
available_keys = get_all_translation_keys(en_data)
print(f"Available translation keys in en.json: {len(available_keys)}")
# print(f"Available translation keys in en.json: {len(available_keys)}")
# Known false positives to exclude from analysis
# These are typically HTML attributes, CSS classes, or other non-translation strings
@@ -276,9 +780,9 @@ def test_static_code_analysis():
if file_keys:
rel_path = os.path.relpath(js_file, os.path.dirname(__file__))
js_files_with_keys.append((rel_path, len(file_keys)))
print(f" Found {len(file_keys)} keys in {rel_path}")
# print(f" Found {len(file_keys)} keys in {rel_path}")
print(f"Total unique keys found in JavaScript files: {len(js_keys)}")
# print(f"Total unique keys found in JavaScript files: {len(js_keys)}")
# Extract keys from HTML template files
templates_dir = os.path.join(os.path.dirname(__file__), 'templates')
@@ -298,13 +802,13 @@ def test_static_code_analysis():
if file_keys:
rel_path = os.path.relpath(html_file, os.path.dirname(__file__))
html_files_with_keys.append((rel_path, len(file_keys)))
print(f" Found {len(file_keys)} keys in {rel_path}")
# print(f" Found {len(file_keys)} keys in {rel_path}")
print(f"Total unique keys found in HTML templates: {len(html_keys)}")
# print(f"Total unique keys found in HTML templates: {len(html_keys)}")
# Combine all used keys
all_used_keys = js_keys.union(html_keys)
print(f"Total unique keys used in code: {len(all_used_keys)}")
# print(f"Total unique keys used in code: {len(all_used_keys)}")
# Check for missing keys
missing_keys = all_used_keys - available_keys
@@ -356,14 +860,14 @@ def test_static_code_analysis():
print(f" {category}: {len(keys)} keys")
# Summary statistics
print(f"\n📊 Static Code Analysis Summary:")
print(f" JavaScript files analyzed: {len(js_files)}")
print(f" JavaScript files with translations: {len(js_files_with_keys)}")
print(f" HTML template files analyzed: {len(html_files)}")
print(f" HTML template files with translations: {len(html_files_with_keys)}")
print(f" Translation keys in en.json: {len(available_keys)}")
print(f" Translation keys used in code: {len(all_used_keys)}")
print(f" Usage coverage: {len(all_used_keys)/len(available_keys)*100:.1f}%")
# print(f"\n📊 Static Code Analysis Summary:")
# print(f" JavaScript files analyzed: {len(js_files)}")
# print(f" JavaScript files with translations: {len(js_files_with_keys)}")
# print(f" HTML template files analyzed: {len(html_files)}")
# print(f" HTML template files with translations: {len(html_files_with_keys)}")
# print(f" Translation keys in en.json: {len(available_keys)}")
# print(f" Translation keys used in code: {len(all_used_keys)}")
# print(f" Usage coverage: {len(all_used_keys)/len(available_keys)*100:.1f}%")
return success
@@ -451,6 +955,18 @@ def main():
if not test_json_files_exist():
success = False
# Test comprehensive structural consistency
if not test_locale_files_structural_consistency():
success = False
# Test formatting consistency
if not test_locale_files_formatting_consistency():
success = False
# Test key ordering
if not test_locale_key_ordering():
success = False
# Test server i18n
if not test_server_i18n():
success = False