diff --git a/scripts/sync_translation_keys.py b/scripts/sync_translation_keys.py new file mode 100644 index 00000000..4244bde0 --- /dev/null +++ b/scripts/sync_translation_keys.py @@ -0,0 +1,293 @@ +#!/usr/bin/env python3 +""" +Translation Key Synchronization Script + +This script synchronizes new translation keys from en.json to all other locale files +while maintaining exact formatting consistency to pass test_i18n.py validation. + +Features: +- Preserves exact line-by-line formatting +- Maintains proper indentation and structure +- Adds missing keys with placeholder translations +- Handles nested objects correctly +- Ensures all locale files have identical structure + +Usage: + python scripts/sync_translation_keys.py [--dry-run] [--verbose] +""" + +import os +import sys +import json +import re +import argparse +from typing import Dict, List, Set, Tuple, Any, Optional +from collections import OrderedDict + +# Add the parent directory to the path so we can import modules if needed +sys.path.insert(0, os.path.dirname(os.path.dirname(__file__))) + +class TranslationKeySynchronizer: + """Synchronizes translation keys across locale files while maintaining formatting.""" + + def __init__(self, locales_dir: str, verbose: bool = False): + self.locales_dir = locales_dir + self.verbose = verbose + self.reference_locale = 'en' + self.target_locales = ['zh-CN', 'zh-TW', 'ja', 'ru', 'de', 'fr', 'es', 'ko'] + + def log(self, message: str, level: str = 'INFO'): + """Log a message if verbose mode is enabled.""" + if self.verbose or level == 'ERROR': + print(f"[{level}] {message}") + + def load_json_preserve_order(self, file_path: str) -> Tuple[Dict[str, Any], List[str]]: + """ + Load a JSON file preserving the exact order and formatting. + Returns both the parsed data and the original lines. + """ + with open(file_path, 'r', encoding='utf-8') as f: + lines = f.readlines() + content = ''.join(lines) + + # Parse JSON while preserving order + data = json.loads(content, object_pairs_hook=OrderedDict) + return data, lines + + def get_all_leaf_keys(self, data: Any, prefix: str = '') -> Dict[str, Any]: + """ + Extract all leaf keys (non-object values) with their full paths. + Returns a dictionary mapping full key paths to their values. + """ + keys = {} + + if isinstance(data, (dict, OrderedDict)): + for key, value in data.items(): + full_key = f"{prefix}.{key}" if prefix else key + + if isinstance(value, (dict, OrderedDict)): + # Recursively get nested keys + keys.update(self.get_all_leaf_keys(value, full_key)) + else: + # Leaf node - actual translatable value + keys[full_key] = value + + return keys + + def merge_json_structures(self, reference_data: Dict[str, Any], target_data: Dict[str, Any]) -> Dict[str, Any]: + """ + Merge the reference JSON structure with existing target translations. + This creates a new structure that matches the reference exactly but preserves + existing translations where available. + """ + def merge_recursive(ref_obj, target_obj): + if isinstance(ref_obj, (dict, OrderedDict)): + result = OrderedDict() + for key, ref_value in ref_obj.items(): + if key in target_obj and isinstance(target_obj[key], type(ref_value)): + # Key exists in target with same type + if isinstance(ref_value, (dict, OrderedDict)): + # Recursively merge nested objects + result[key] = merge_recursive(ref_value, target_obj[key]) + else: + # Use existing translation + result[key] = target_obj[key] + else: + # Key missing in target or type mismatch + if isinstance(ref_value, (dict, OrderedDict)): + # Recursively handle nested objects + result[key] = merge_recursive(ref_value, {}) + else: + # Create placeholder translation + result[key] = f"[TODO: Translate] {ref_value}" + return result + else: + # For non-dict values, use reference (this shouldn't happen at root level) + return ref_obj + + return merge_recursive(reference_data, target_data) + + def format_json_like_reference(self, data: Dict[str, Any], reference_lines: List[str]) -> List[str]: + """ + Format the merged JSON data to match the reference file's formatting exactly. + """ + # Use json.dumps with proper formatting to match the reference style + formatted_json = json.dumps(data, indent=4, ensure_ascii=False, separators=(',', ': ')) + + # Split into lines and ensure consistent line endings + formatted_lines = [line + '\n' for line in formatted_json.split('\n')] + + # Make sure the last line doesn't have extra newlines + if formatted_lines and formatted_lines[-1].strip() == '': + formatted_lines = formatted_lines[:-1] + + # Ensure the last line ends with just a newline + if formatted_lines and not formatted_lines[-1].endswith('\n'): + formatted_lines[-1] += '\n' + + return formatted_lines + + def synchronize_locale_simple(self, locale: str, reference_data: Dict[str, Any], + reference_lines: List[str], dry_run: bool = False) -> bool: + """ + Synchronize a locale file using JSON structure merging. + """ + locale_file = os.path.join(self.locales_dir, f'{locale}.json') + + if not os.path.exists(locale_file): + self.log(f"Locale file {locale_file} does not exist!", 'ERROR') + return False + + try: + target_data, _ = self.load_json_preserve_order(locale_file) + except Exception as e: + self.log(f"Error loading {locale_file}: {e}", 'ERROR') + return False + + # Get keys to check for missing ones + ref_keys = self.get_all_leaf_keys(reference_data) + target_keys = self.get_all_leaf_keys(target_data) + missing_keys = set(ref_keys.keys()) - set(target_keys.keys()) + + if not missing_keys: + self.log(f"Locale {locale} is already up to date") + return False + + self.log(f"Found {len(missing_keys)} missing keys in {locale}:") + for key in sorted(missing_keys): + self.log(f" - {key}") + + if dry_run: + self.log(f"DRY RUN: Would update {locale} with {len(missing_keys)} new keys") + return True + + # Merge the structures + try: + merged_data = self.merge_json_structures(reference_data, target_data) + + # Format to match reference style + new_lines = self.format_json_like_reference(merged_data, reference_lines) + + # Validate that the result is valid JSON + reconstructed_content = ''.join(new_lines) + json.loads(reconstructed_content) # This will raise an exception if invalid + + # Write the updated file + with open(locale_file, 'w', encoding='utf-8') as f: + f.writelines(new_lines) + + self.log(f"Successfully updated {locale} with {len(missing_keys)} new keys") + return True + + except json.JSONDecodeError as e: + self.log(f"Generated invalid JSON for {locale}: {e}", 'ERROR') + return False + except Exception as e: + self.log(f"Error updating {locale_file}: {e}", 'ERROR') + return False + + def synchronize_all(self, dry_run: bool = False) -> bool: + """ + Synchronize all locale files with the reference. + Returns True if all operations were successful. + """ + # Load reference file + reference_file = os.path.join(self.locales_dir, f'{self.reference_locale}.json') + + if not os.path.exists(reference_file): + self.log(f"Reference file {reference_file} does not exist!", 'ERROR') + return False + + try: + reference_data, reference_lines = self.load_json_preserve_order(reference_file) + reference_keys = self.get_all_leaf_keys(reference_data) + except Exception as e: + self.log(f"Error loading reference file: {e}", 'ERROR') + return False + + self.log(f"Loaded reference file with {len(reference_keys)} keys") + + success = True + changes_made = False + + # Synchronize each target locale + for locale in self.target_locales: + try: + if self.synchronize_locale_simple(locale, reference_data, reference_lines, dry_run): + changes_made = True + except Exception as e: + self.log(f"Error synchronizing {locale}: {e}", 'ERROR') + success = False + + if changes_made: + self.log("Synchronization completed with changes") + else: + self.log("All locale files are already up to date") + + return success + +def main(): + """Main entry point for the script.""" + parser = argparse.ArgumentParser( + description='Synchronize translation keys from en.json to all other locale files' + ) + parser.add_argument( + '--dry-run', + action='store_true', + help='Show what would be changed without making actual changes' + ) + parser.add_argument( + '--verbose', '-v', + action='store_true', + help='Enable verbose output' + ) + parser.add_argument( + '--locales-dir', + default=None, + help='Path to locales directory (default: auto-detect from script location)' + ) + + args = parser.parse_args() + + # Determine locales directory + if args.locales_dir: + locales_dir = args.locales_dir + else: + # Auto-detect based on script location + script_dir = os.path.dirname(os.path.abspath(__file__)) + locales_dir = os.path.join(os.path.dirname(script_dir), 'locales') + + if not os.path.exists(locales_dir): + print(f"ERROR: Locales directory not found: {locales_dir}") + sys.exit(1) + + print(f"Translation Key Synchronization") + print(f"Locales directory: {locales_dir}") + print(f"Mode: {'DRY RUN' if args.dry_run else 'LIVE UPDATE'}") + print("-" * 50) + + # Create synchronizer and run + synchronizer = TranslationKeySynchronizer(locales_dir, args.verbose) + + try: + success = synchronizer.synchronize_all(args.dry_run) + + if success: + print("\n✅ Synchronization completed successfully!") + if not args.dry_run: + print("💡 Run 'python test_i18n.py' to verify formatting consistency") + else: + print("\n❌ Synchronization completed with errors!") + sys.exit(1) + + except KeyboardInterrupt: + print("\n⚠️ Operation cancelled by user") + sys.exit(1) + except Exception as e: + print(f"\n❌ Unexpected error: {e}") + import traceback + traceback.print_exc() + sys.exit(1) + +if __name__ == '__main__': + main()