languageExport.py 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155
  1. #!/usr/bin/env python3
  2. '''
  3. languageExport.py [--single]
  4. Export LCD language strings to CSV files for easier translation.
  5. Use languageImport.py to import CSV into the language files.
  6. Use --single to export all languages to a single CSV file.
  7. '''
  8. import re
  9. from pathlib import Path
  10. from sys import argv
  11. from languageUtil import namebyid
  12. LANGHOME = "Marlin/src/lcd/language"
  13. # Write multiple sheets if true, otherwise write one giant sheet
  14. MULTISHEET = '--single' not in argv[1:]
  15. OUTDIR = 'out-csv'
  16. # Check for the path to the language files
  17. if not Path(LANGHOME).is_dir():
  18. print("Error: Couldn't find the '%s' directory." % LANGHOME)
  19. print("Edit LANGHOME or cd to the root of the repo before running.")
  20. exit(1)
  21. # A limit just for testing
  22. LIMIT = 0
  23. # A dictionary to contain strings for each language.
  24. # Init with 'en' so English will always be first.
  25. language_strings = { 'en': {} }
  26. # A dictionary to contain all distinct LCD string names
  27. names = {}
  28. # Get all "language_*.h" files
  29. langfiles = sorted(list(Path(LANGHOME).glob('language_*.h')))
  30. # Read each language file
  31. for langfile in langfiles:
  32. # Get the language code from the filename
  33. langcode = langfile.name.replace('language_', '').replace('.h', '')
  34. # Skip 'test' and any others that we don't want
  35. if langcode in ['test']: continue
  36. # Open the file
  37. f = open(langfile, 'r', encoding='utf-8')
  38. if not f: continue
  39. # Flags to indicate a wide or tall section
  40. wideflag, tallflag = False, False
  41. # A counter for the number of strings in the file
  42. stringcount = 0
  43. # A dictionary to hold all the strings
  44. strings = { 'narrow': {}, 'wide': {}, 'tall': {} }
  45. # Read each line in the file
  46. for line in f:
  47. # Clean up the line for easier parsing
  48. line = line.split("//")[0].strip()
  49. if line.endswith(';'): line = line[:-1].strip()
  50. # Check for wide or tall sections, assume no complicated nesting
  51. if line.startswith("#endif") or line.startswith("#else"):
  52. wideflag, tallflag = False, False
  53. elif re.match(r'#if.*WIDTH\s*>=?\s*2[01].*', line): wideflag = True
  54. elif re.match(r'#if.*LCD_HEIGHT\s*>=?\s*4.*', line): tallflag = True
  55. # For string-defining lines capture the string data
  56. match = re.match(r'LSTR\s+([A-Z0-9_]+)\s*=\s*(.+)\s*', line)
  57. if match:
  58. # Name and quote-sanitized value
  59. name, value = match.group(1), match.group(2).replace('\\"', '$$$')
  60. # Remove all _UxGT wrappers from the value in a non-greedy way
  61. value = re.sub(r'_UxGT\((".*?")\)', r'\1', value)
  62. # Multi-line strings get one or more bars | for identification
  63. multiline = 0
  64. multimatch = re.match(r'.*MSG_(\d)_LINE\s*\(\s*(.+?)\s*\).*', value)
  65. if multimatch:
  66. multiline = int(multimatch.group(1))
  67. value = '|' + re.sub(r'"\s*,\s*"', '|', multimatch.group(2))
  68. # Wrap inline defines in parentheses
  69. value = re.sub(r' *([A-Z0-9]+_[A-Z0-9_]+) *', r'(\1)', value)
  70. # Remove quotes around strings
  71. value = re.sub(r'"(.*?)"', r'\1', value).replace('$$$', '""')
  72. # Store all unique names as dictionary keys
  73. names[name] = 1
  74. # Store the string as narrow or wide
  75. strings['tall' if tallflag else 'wide' if wideflag else 'narrow'][name] = value
  76. # Increment the string counter
  77. stringcount += 1
  78. # Break for testing
  79. if LIMIT and stringcount >= LIMIT: break
  80. # Close the file
  81. f.close()
  82. # Store the array in the dict
  83. language_strings[langcode] = strings
  84. # Get the language codes from the dictionary
  85. langcodes = list(language_strings.keys())
  86. # Print the array
  87. #print(language_strings)
  88. # Report the total number of unique strings
  89. print("Found %s distinct LCD strings." % len(names))
  90. # Write a single language entry to the CSV file with narrow, wide, and tall strings
  91. def write_csv_lang(f, strings, name):
  92. f.write(',')
  93. if name in strings['narrow']: f.write('"%s"' % strings['narrow'][name])
  94. f.write(',')
  95. if name in strings['wide']: f.write('"%s"' % strings['wide'][name])
  96. f.write(',')
  97. if name in strings['tall']: f.write('"%s"' % strings['tall'][name])
  98. if MULTISHEET:
  99. #
  100. # Export a separate sheet for each language
  101. #
  102. Path.mkdir(Path(OUTDIR), exist_ok=True)
  103. for lang in langcodes:
  104. with open("%s/language_%s.csv" % (OUTDIR, lang), 'w', encoding='utf-8') as f:
  105. lname = lang + ' ' + namebyid(lang)
  106. header = ['name', lname, lname + ' (wide)', lname + ' (tall)']
  107. f.write('"' + '","'.join(header) + '"\n')
  108. for name in names.keys():
  109. f.write('"' + name + '"')
  110. write_csv_lang(f, language_strings[lang], name)
  111. f.write('\n')
  112. else:
  113. #
  114. # Export one large sheet containing all languages
  115. #
  116. with open("languages.csv", 'w', encoding='utf-8') as f:
  117. header = ['name']
  118. for lang in langcodes:
  119. lname = lang + ' ' + namebyid(lang)
  120. header += [lname, lname + ' (wide)', lname + ' (tall)']
  121. f.write('"' + '","'.join(header) + '"\n')
  122. for name in names.keys():
  123. f.write('"' + name + '"')
  124. for lang in langcodes: write_csv_lang(f, language_strings[lang], name)
  125. f.write('\n')