pom_merger.py 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524
  1. import re
  2. from datetime import date
  3. try:
  4. from Levenshtein import distance as levenshtein_distance
  5. except ImportError:
  6. print("you need to do 'python -m pip install python-Levenshtein'");
  7. exit(0);
  8. datastore = dict();
  9. datastore_trim = dict();# key:string -> TranslationLine
  10. regex_only_letters = re.compile(r"[^a-zA-Z]")
  11. allow_msgctxt = True;
  12. ignore_case = False;
  13. remove_comment = False;
  14. percent_error_similar = 0
  15. language_code = "??"
  16. language = ""
  17. max_similar = 3;
  18. database_out = "";
  19. def trim(str):
  20. redo = True;
  21. while redo:
  22. while len(str) > 0 and (
  23. str[0] == ":"
  24. or str[0] == "."
  25. or str[0] == ","
  26. or str[0] == "!"
  27. ):
  28. str = str[1:];
  29. while len(str) > 0 and (
  30. str[-1] == ":"
  31. or str[-1] == "."
  32. or str[-1] == ","
  33. or str[-1] == "!"
  34. ):
  35. str = str[:-1];
  36. str_stripped = str.strip();
  37. if str == str_stripped:
  38. redo = False
  39. else:
  40. str = str_stripped;
  41. return str;
  42. class TranslationFiles:
  43. file_in = ""
  44. file_out = ""
  45. file_todo = ""
  46. database = ""
  47. class TranslationLine:
  48. header_comment = ""
  49. raw_msgid = ""
  50. msgid = ""
  51. raw_msgstr = ""
  52. msgstr = ""
  53. multivalue = False
  54. def main():
  55. global datastore, datastore_trim, regex_only_letters, allow_msgctxt, ignore_case, remove_comment;
  56. global percent_error_similar, language, language_code, max_similar, database_out;
  57. data_files = list(); # list of file paths
  58. ui_dir = "";
  59. operations = list(); # list of TranslationFiles
  60. settings_stream = open("./settings.ini", mode="r", encoding="utf-8")
  61. lines = settings_stream.read().splitlines()
  62. for line in lines:
  63. if line.startswith("data"):
  64. if line.startswith("database_out"):
  65. database_out = line[line.index('=')+1:].strip();
  66. else:
  67. data_files.append(line[line.index('=')+1:].strip());
  68. if line.startswith("input"):
  69. operations.append(TranslationFiles());
  70. operations[-1].file_in = line[line.index('=')+1:].strip();
  71. if line.startswith("output") and operations:
  72. operations[-1].file_out = line[line.index('=')+1:].strip();
  73. if line.startswith("todo") and operations:
  74. operations[-1].file_todo = line[line.index('=')+1:].strip();
  75. if line.startswith("ui_dir"):
  76. ui_dir = line[line.index('=')+1:].strip();
  77. if line.startswith("allow_msgctxt"):
  78. allow_msgctxt = (line[line.index('=')+1:].strip().lower() == "true");
  79. print("Don't comment msgctxt" if allow_msgctxt else "Commenting msgctxt");
  80. if line.startswith("allow_msgctxt"):
  81. allow_msgctxt = (line[line.index('=')+1:].strip().lower() == "true");
  82. print("Don't comment msgctxt" if allow_msgctxt else "Commenting msgctxt");
  83. if line.startswith("remove_comment"):
  84. remove_comment = (line[line.index('=')+1:].strip().lower() == "true");
  85. if remove_comment:
  86. print("Will not output the comments");
  87. if line.startswith("percent_error_similar"):
  88. percent_error_similar = float(line[line.index('=')+1:].strip());
  89. print("percent_error_similar set to " + str(percent_error_similar));
  90. if line.startswith("max_similar"):
  91. max_similar = int(line[line.index('=')+1:].strip());
  92. print("max_similar set to " + str(max_similar));
  93. if line.startswith("language"):
  94. if line.startswith("language_code"):
  95. language_code = line[line.index('=')+1:].strip();
  96. print("language_code set to " + language_code);
  97. else:
  98. language = line[line.index('=')+1:].strip();
  99. print("language set to " + language);
  100. # all_lines = list();
  101. for data_file in data_files:
  102. new_data = createKnowledge(data_file);
  103. for dataline in new_data:
  104. if len(dataline.msgstr) > 0:
  105. if not dataline.msgid in datastore:
  106. datastore[dataline.msgid] = dataline;
  107. datastore_trim[trim(dataline.msgid)] = dataline;
  108. if dataline.msgid == " Layers,":
  109. print(trim(dataline.msgid)+" is inside? "+("oui" if "Layers" in datastore_trim else "non"));
  110. else:
  111. str_old_val = datastore[dataline.msgid].msgstr;
  112. str_test_val = dataline.msgstr;
  113. length_old = len(regex_only_letters.sub("", str_old_val));
  114. length_new = len(regex_only_letters.sub("", str_test_val));
  115. # if already exist, only change it if the previous was lower than 3 char
  116. if length_new > length_old and length_old < 3:
  117. print(str_old_val.replace('\n', ' ')+" replaced by "+str_test_val.replace('\n', ' '));
  118. datastore[dataline.msgid].msgstr = str_test_val;
  119. datastore_trim[trim(dataline.msgid)].msgstr = str_test_val;
  120. print("finish reading" + data_file + " of size "+ str(len(new_data)) + ", now we had "+ str(len(datastore)) + " items");
  121. if ignore_case:
  122. temp = list();
  123. for msgid in datastore:
  124. if not msgid.lower() in datastore:
  125. temp.append(msgid);
  126. for msgid in temp:
  127. datastore[msgid.lower()] = datastore[msgid];
  128. temp = list();
  129. for msgid in datastore_trim:
  130. if not msgid.lower() in datastore_trim:
  131. temp.append(msgid);
  132. for msgid in temp:
  133. datastore_trim[msgid.lower()] = datastore_trim[msgid];
  134. for operation in operations:
  135. print("Translating " + operation.file_in);
  136. dict_ope = dict();
  137. ope_file_in = list();
  138. lst_temp = createKnowledge(operation.file_in);
  139. print("String from source files: " + str(len(lst_temp)));
  140. nbTrans = 0;
  141. #remove duplicate
  142. for line in lst_temp:
  143. if not line.msgid in dict_ope:
  144. dict_ope[line.msgid] = line;
  145. ope_file_in.append(line);
  146. if line.msgstr:
  147. nbTrans+=1;
  148. print(line.header_comment);
  149. print(line.raw_msgid);
  150. print(line.msgid);
  151. print(line.raw_msgstr);
  152. print(line.msgstr);
  153. #add def from conf files
  154. if ui_dir:
  155. new_data = parse_ui_file(ui_dir+"/extruder.ui");
  156. new_data.extend(parse_ui_file(ui_dir+"/extruder.ui"));
  157. new_data.extend(parse_ui_file(ui_dir+"/filament.ui"));
  158. new_data.extend(parse_ui_file(ui_dir+"/milling.ui"));
  159. new_data.extend(parse_ui_file(ui_dir+"/print.ui"));
  160. new_data.extend(parse_ui_file(ui_dir+"/printer_fff.ui"));
  161. new_data.extend(parse_ui_file(ui_dir+"/printer_sla.ui"));
  162. new_data.extend(parse_ui_file(ui_dir+"/sla_material.ui"));
  163. new_data.extend(parse_ui_file(ui_dir+"/sla_print.ui"));
  164. print("String from ui files: " + str(len(new_data)));
  165. for dataline in new_data:
  166. if not dataline.msgid in dict_ope:
  167. dict_ope[dataline.msgid] = dataline;
  168. ope_file_in.append(dataline);
  169. print("String to translate: " + str(len(ope_file_in) - nbTrans)+" and already translated: "+str(nbTrans));
  170. #create database
  171. if database_out:
  172. outputDatabase(database_out);
  173. #create TODO file
  174. if operation.file_todo:
  175. outputUntranslated(ope_file_in, operation.file_todo);
  176. #create .po file
  177. if operation.file_out:
  178. translate(ope_file_in, operation.file_out);
  179. print("End of merge");
  180. def createKnowledge(file_path_in):
  181. read_data_lines = list();
  182. try:
  183. file_in_stream = open(file_path_in, mode="r", encoding="utf-8")
  184. lines = file_in_stream.read().splitlines();
  185. lines.append("");
  186. line_idx = 0;
  187. current_line = TranslationLine();
  188. nb = 0;
  189. while line_idx < len(lines):
  190. if not lines[line_idx].startswith("msgid") or len(lines[line_idx]) <= 7:
  191. if (lines[line_idx].startswith("#")
  192. or lines[line_idx].startswith("msgctxt")
  193. or len(lines[line_idx].strip()) == 0
  194. ):
  195. if not allow_msgctxt and lines[line_idx].startswith("msgctxt"):
  196. current_line.header_comment += "\n#, " + lines[line_idx];
  197. else:
  198. current_line.header_comment += "\n" + lines[line_idx];
  199. line_idx+=1;
  200. continue;
  201. # get the msgid line
  202. current_line.raw_msgid = lines[line_idx];
  203. current_line.msgid = lines[line_idx][7:];
  204. #get the next line (can be whatever)
  205. line_idx+=1;
  206. if line_idx >= len(lines):
  207. return read_data_lines;
  208. #populate the full current_line.msgid string
  209. while lines[line_idx].startswith("\"") or lines[line_idx].startswith("msgid"):
  210. current_line.raw_msgid += "\n" + lines[line_idx];
  211. if lines[line_idx].startswith("msgid"):
  212. current_line.multivalue = True;
  213. if lines[line_idx].startswith("\""):
  214. current_line.msgid = current_line.msgid[0:-1];
  215. current_line.msgid += lines[line_idx][1:];
  216. else:
  217. current_line.msgid += "\n" + lines[line_idx];
  218. #todo: do something for msgid_plural. Not needed right now...
  219. #get the next line (can be whatever)
  220. line_idx+=1;
  221. if line_idx >= len(lines):
  222. return read_data_lines;
  223. #check validity of the id
  224. if len(current_line.msgid) < 3:
  225. current_line = TranslationLine();
  226. continue;
  227. current_line.msgid = current_line.msgid[0:-1];
  228. #there should be a msgstr just after
  229. if not lines[line_idx].startswith("msgstr") or len(lines[line_idx]) <= 8:
  230. current_line = TranslationLine();
  231. continue;
  232. current_line.raw_msgstr = lines[line_idx];
  233. if lines[line_idx][7] == "\"":
  234. current_line.msgstr = lines[line_idx][8:];
  235. elif lines[line_idx][6] == "[":
  236. current_line.msgstr = lines[line_idx][11:];
  237. else:
  238. #can't parse
  239. print("error, can't parse msgstr: '"+lines[line_idx]+"'");
  240. current_line.msgstr = "";
  241. line_idx+=1;
  242. if line_idx >= len(lines):
  243. return read_data_lines;
  244. while lines[line_idx].startswith("\"") or lines[line_idx].startswith("msgstr"):
  245. current_line.raw_msgstr += "\n" + lines[line_idx];
  246. if lines[line_idx].startswith("\""):
  247. current_line.msgstr = current_line.msgstr[0:-1];
  248. current_line.msgstr += lines[line_idx][1:];
  249. elif lines[line_idx].startswith("msgstr["):
  250. current_line.msgstr = lines[line_idx][11:];
  251. current_line.multivalue = True;
  252. else:
  253. current_line.msgstr += "\n" + lines[line_idx];
  254. #get the next line (can be whatever)
  255. line_idx+=1;
  256. if line_idx >= len(lines):
  257. return read_data_lines;
  258. if current_line.msgstr:
  259. current_line.msgstr = current_line.msgstr[0:-1];
  260. read_data_lines.append(current_line);
  261. current_line = TranslationLine();
  262. except Exception as error:
  263. print("Warning, cannot read file " + file_path_in);
  264. print(error);
  265. return read_data_lines;
  266. def getTranslation(item):
  267. if len(item.msgid) == 0:
  268. return "";
  269. if item.msgid in datastore:
  270. return datastore[item.msgid].raw_msgstr;
  271. elif item.msgid in datastore_trim:
  272. good = datastore_trim[item.msgid];
  273. if not good.multivalue:
  274. return "msgstr \""+trim(good.msgstr)+"\"";
  275. else:
  276. item_msg_trim = trim(item.msgid);
  277. if item_msg_trim in datastore:
  278. good = datastore[trim(item.msgid)];
  279. if not good.multivalue:
  280. if good.msgid in item.msgid:
  281. start_at = item.msgid.index(good.msgid);
  282. return "msgstr \"" + item.msgid[0:start_at] + good.msgstr + item.msgid[start_at+len(good.msgid):] + "\"";
  283. elif item_msg_trim in datastore_trim:
  284. good = datastore_trim[item_msg_trim];
  285. if not good.multivalue:
  286. good_msg_trim = trim(good.msgid);
  287. if good_msg_trim in item.msgid:
  288. start_at = item.msgid.index(good_msg_trim);
  289. return "msgstr \"" + item.msgid[0:start_at] + trim(good.msgstr) + item.msgid[start_at+len(good_msg_trim):] + "\"";
  290. if ignore_case:
  291. lowercase = TranslationLine();
  292. lowercase.msgid = item.msgid.lower();
  293. if lowercase.msgid != item.msgid:
  294. lowercase.header_comment = item.header_comment;
  295. lowercase.raw_msgid = item.raw_msgid;
  296. lowercase.raw_msgstr = item.raw_msgstr;
  297. lowercase.msgstr = item.msgstr;
  298. lowercase.multivalue = item.multivalue;
  299. return getTranslation(lowercase)
  300. return "";
  301. def getTranslationNear(msgid_to_search, percent):
  302. max_word_diff = 1 + int(percent * len(msgid_to_search));
  303. possible_solutions = list();
  304. for msgid in datastore:
  305. dist = levenshtein_distance(msgid, msgid_to_search);
  306. if dist < max_word_diff:
  307. possible_solutions.append( (dist, datastore[msgid]) );
  308. possible_solutions.sort(key=lambda x:x[0]);
  309. return possible_solutions;
  310. def outputUntranslated(data_to_translate, file_path_out):
  311. try:
  312. file_out_stream = open(file_path_out, mode="w", encoding="utf-8")
  313. nb_lines = 0;
  314. #sort to have an easier time translating.
  315. # idealy, they shoud be grouped by proximity, but it's abit more complicated to code
  316. sorted_lines = list()
  317. for dataline in data_to_translate:
  318. if not dataline.msgstr.strip() and dataline.msgid and len(getTranslation(dataline).strip()) == 0:
  319. sorted_lines.append(dataline);
  320. sorted_lines.sort(key=lambda x:x.msgid.lower())
  321. nb_iter = 0;
  322. # output bits that are empty
  323. for dataline in sorted_lines:
  324. if nb_iter%100 == 99:
  325. print('.');
  326. else:
  327. print('.', end = '');
  328. nb_iter += 1;
  329. file_out_stream.write(dataline.header_comment);
  330. file_out_stream.write("\n");
  331. # get translation that are near enough to be copy-pasted by humans.
  332. good_enough = getTranslationNear(dataline.msgid, 0.4);
  333. if len(good_enough) >0:
  334. file_out_stream.write("#Similar to me: "+dataline.msgid+"\n");
  335. for index in range(min(len(good_enough), max_similar)):
  336. file_out_stream.write("# "+str(good_enough[index][0])+("" if len(str(good_enough[index][0]))>2 else " " if len(str(good_enough[index][0]))==2 else " ")
  337. +" changes: " + good_enough[index][1].msgid+"\n");
  338. file_out_stream.write("# translation: " + good_enough[index][1].msgstr+"\n");
  339. file_out_stream.write(dataline.raw_msgid);
  340. file_out_stream.write("\n");
  341. file_out_stream.write(dataline.raw_msgstr);
  342. file_out_stream.write("\n");
  343. nb_lines+=1;
  344. print("There is " + str(nb_lines) +" string untranslated");
  345. except Exception as error:
  346. print("error, cannot write file " + file_path_out);
  347. print(error);
  348. def translate(data_to_translate, file_path_out):
  349. # try:
  350. file_out_stream = open(file_path_out, mode="w", encoding="utf-8")
  351. file_out_stream.write("# Translation file for "+(language if len(language)>0 else language_code)+"\n");
  352. file_out_stream.write("# Copyright (C) 2021\n");
  353. file_out_stream.write("# This file is distributed under the same license as Slic3r.\n");
  354. file_out_stream.write("#\n");
  355. file_out_stream.write("msgid \"\"\n");
  356. file_out_stream.write("msgstr \"\"\n");
  357. file_out_stream.write("\"Project-Id-Version: Slic3r\\n\"\n");
  358. file_out_stream.write("\"POT-Creation-Date: "+date.today().strftime('%Y-%m-%d %H:%M%z')+"\\n\"\n");
  359. file_out_stream.write("\"PO-Revision-Date: "+date.today().strftime('%Y-%m-%d %H:%M%z')+"\\n\"\n");
  360. file_out_stream.write("\"Last-Translator:\\n\"\n");
  361. file_out_stream.write("\"Language-Team:\\n\"\n");
  362. file_out_stream.write("\"MIME-Version: 1.0\\n\"\n");
  363. file_out_stream.write("\"Content-Type: text/plain; charset=UTF-8\\n\"\n");
  364. file_out_stream.write("\"Content-Transfer-Encoding: 8bit\\n\"\n");
  365. file_out_stream.write("\"Language:"+language_code+"\\n\"\n");
  366. nb_lines = 0;
  367. data_to_translate.sort(key=lambda x:x.msgid.lower().strip())
  368. # translate bits that are empty
  369. for dataline in data_to_translate:
  370. if not dataline.msgstr.strip():
  371. transl = getTranslation(dataline)
  372. if len(transl) > 9 or ( len(transl) > 3 and not transl.startswith('msgstr "')):
  373. file_out_stream.write("\n")
  374. if not remove_comment:
  375. file_out_stream.write(dataline.header_comment.strip())
  376. file_out_stream.write("\n")
  377. file_out_stream.write(dataline.raw_msgid)
  378. file_out_stream.write("\n")
  379. file_out_stream.write(transl)
  380. file_out_stream.write("\n")
  381. nb_lines+=1;
  382. if dataline.raw_msgid.count('%') != transl.count('%'):
  383. print("WARNING: not same number of '%' ( "+ str(dataline.raw_msgid.count('%')) + " => " + str(transl.count('%')) + ")"
  384. +"\n for string:'" + dataline.msgid + " '\n=>'"+transl[8:]);
  385. else:
  386. file_out_stream.write("\n")
  387. if not remove_comment:
  388. file_out_stream.write(dataline.header_comment.strip())
  389. file_out_stream.write("\n")
  390. file_out_stream.write(dataline.raw_msgid)
  391. file_out_stream.write("\n")
  392. file_out_stream.write(dataline.raw_msgstr)
  393. file_out_stream.write("\n")
  394. if dataline.raw_msgid.count('%') != dataline.raw_msgstr.count('%'):
  395. print("WARNING: not same number of '%'( "+ str(dataline.raw_msgid.count('%')) + " => " + str(dataline.raw_msgstr.count('%')) + ")"
  396. +"\n for string:'" + dataline.msgid + " '\n=>'"+dataline.msgstr);
  397. nb_lines+=1;
  398. print("There is " + str(nb_lines) +" string translated in the .po");
  399. # except Exception as error:
  400. # print("error, cannot write file " + file_path_out);
  401. # print(error);
  402. def outputDatabase(file_path_out):
  403. try:
  404. file_out_stream = open(file_path_out, mode="w", encoding="utf-8")
  405. nb_lines = 0;
  406. for msgid in datastore:
  407. dataline = datastore[msgid];
  408. #don't store commenta nymore. there is none is the slic3r.po anyway
  409. #file_out_stream.write(dataline.header_comment);
  410. #note that the header_comment contains the \n already so comment this line is you don't comment header_comment
  411. file_out_stream.write("\n");
  412. file_out_stream.write(dataline.raw_msgid);
  413. file_out_stream.write("\n");
  414. file_out_stream.write(dataline.raw_msgstr);
  415. file_out_stream.write("\n");
  416. nb_lines+=1;
  417. print("There is " + str(nb_lines) +" in your database file");
  418. except Exception as error:
  419. print("error, cannot write file " + file_path_out);
  420. print(error);
  421. def parse_ui_file(file_path):
  422. read_data_lines = list();
  423. # try:
  424. file_in_stream = open(file_path, mode="r", encoding="utf-8")
  425. lines = file_in_stream.read().splitlines();
  426. lines.append("");
  427. line_idx = 0;
  428. nb = 0;
  429. while line_idx < len(lines):
  430. items = lines[line_idx].strip().split(":");
  431. if len(items) > 1:
  432. if items[0]=="page":
  433. current_line = TranslationLine();
  434. current_line.header_comment = "\n#: "+file_path;#+":"+str(line_idx);
  435. current_line.raw_msgid = "msgid \""+items[1].strip()+"\"";
  436. current_line.msgid = items[1].strip();
  437. current_line.raw_msgstr = "msgstr \"\"";
  438. current_line.msgstr = "";
  439. read_data_lines.append(current_line);
  440. if items[0]=="group" or items[0]=="line":
  441. current_line = TranslationLine();
  442. current_line.header_comment = "\n#: "+file_path;#+":"+str(line_idx);
  443. current_line.raw_msgid = "msgid \""+items[-1].strip()+"\"";
  444. current_line.msgid = items[-1].strip();
  445. current_line.raw_msgstr = "msgstr \"\"";
  446. current_line.msgstr = "";
  447. read_data_lines.append(current_line);
  448. if items[0]=="setting":
  449. for item in items:
  450. if item.startswith("label$") or item.startswith("full_label$") or item.startswith("sidetext$") or item.startswith("tooltip$"):
  451. if item.split("$")[-1] != '_' and len(item.split("$")[-1]) > 0 :
  452. current_line = TranslationLine();
  453. current_line.header_comment = "\n#: "+file_path+" : l"+str(line_idx);
  454. current_line.msgid = item.split("$")[-1].strip();
  455. current_line.raw_msgid = "msgid \""+current_line.msgid+"\"";
  456. current_line.raw_msgstr = "msgstr \"\"";
  457. current_line.msgstr = "";
  458. read_data_lines.append(current_line);
  459. line_idx+=1;
  460. return read_data_lines;
  461. main();