import re from datetime import date try: from Levenshtein import distance as levenshtein_distance except ImportError: print("you need to do 'python -m pip install python-Levenshtein'"); exit(0); datastore = dict(); datastore_trim = dict();# key:string -> TranslationLine regex_only_letters = re.compile(r"[^a-zA-Z]") allow_msgctxt = True; ignore_case = False; remove_comment = False; percent_error_similar = 0 language_code = "??" language = "" max_similar = 3; database_out = ""; def trim(str): redo = True; while redo: while len(str) > 0 and ( str[0] == ":" or str[0] == "." or str[0] == "," or str[0] == "!" ): str = str[1:]; while len(str) > 0 and ( str[-1] == ":" or str[-1] == "." or str[-1] == "," or str[-1] == "!" ): str = str[:-1]; str_stripped = str.strip(); if str == str_stripped: redo = False else: str = str_stripped; return str; class TranslationFiles: file_in = "" file_out = "" file_todo = "" database = "" class TranslationLine: header_comment = "" raw_msgid = "" msgid = "" raw_msgstr = "" msgstr = "" multivalue = False def main(): global datastore, datastore_trim, regex_only_letters, allow_msgctxt, ignore_case, remove_comment; global percent_error_similar, language, language_code, max_similar, database_out; data_files = list(); # list of file paths ui_dir = ""; operations = list(); # list of TranslationFiles settings_stream = open("./settings.ini", mode="r", encoding="utf-8") lines = settings_stream.read().splitlines() for line in lines: if line.startswith("data"): if line.startswith("database_out"): database_out = line[line.index('=')+1:].strip(); else: data_files.append(line[line.index('=')+1:].strip()); if line.startswith("input"): operations.append(TranslationFiles()); operations[-1].file_in = line[line.index('=')+1:].strip(); if line.startswith("output") and operations: operations[-1].file_out = line[line.index('=')+1:].strip(); if line.startswith("todo") and operations: operations[-1].file_todo = line[line.index('=')+1:].strip(); if line.startswith("ui_dir"): ui_dir = line[line.index('=')+1:].strip(); if line.startswith("allow_msgctxt"): allow_msgctxt = (line[line.index('=')+1:].strip().lower() == "true"); print("Don't comment msgctxt" if allow_msgctxt else "Commenting msgctxt"); if line.startswith("allow_msgctxt"): allow_msgctxt = (line[line.index('=')+1:].strip().lower() == "true"); print("Don't comment msgctxt" if allow_msgctxt else "Commenting msgctxt"); if line.startswith("remove_comment"): remove_comment = (line[line.index('=')+1:].strip().lower() == "true"); if remove_comment: print("Will not output the comments"); if line.startswith("percent_error_similar"): percent_error_similar = float(line[line.index('=')+1:].strip()); print("percent_error_similar set to " + str(percent_error_similar)); if line.startswith("max_similar"): max_similar = int(line[line.index('=')+1:].strip()); print("max_similar set to " + str(max_similar)); if line.startswith("language"): if line.startswith("language_code"): language_code = line[line.index('=')+1:].strip(); print("language_code set to " + language_code); else: language = line[line.index('=')+1:].strip(); print("language set to " + language); # all_lines = list(); for data_file in data_files: new_data = createKnowledge(data_file); for dataline in new_data: if len(dataline.msgstr) > 0: if not dataline.msgid in datastore: datastore[dataline.msgid] = dataline; datastore_trim[trim(dataline.msgid)] = dataline; if dataline.msgid == " Layers,": print(trim(dataline.msgid)+" is inside? "+("oui" if "Layers" in datastore_trim else "non")); else: str_old_val = datastore[dataline.msgid].msgstr; str_test_val = dataline.msgstr; length_old = len(regex_only_letters.sub("", str_old_val)); length_new = len(regex_only_letters.sub("", str_test_val)); # if already exist, only change it if the previous was lower than 3 char if length_new > length_old and length_old < 3: print(str_old_val.replace('\n', ' ')+" replaced by "+str_test_val.replace('\n', ' ')); datastore[dataline.msgid].msgstr = str_test_val; datastore_trim[trim(dataline.msgid)].msgstr = str_test_val; print("finish reading" + data_file + " of size "+ str(len(new_data)) + ", now we had "+ str(len(datastore)) + " items"); if ignore_case: temp = list(); for msgid in datastore: if not msgid.lower() in datastore: temp.append(msgid); for msgid in temp: datastore[msgid.lower()] = datastore[msgid]; temp = list(); for msgid in datastore_trim: if not msgid.lower() in datastore_trim: temp.append(msgid); for msgid in temp: datastore_trim[msgid.lower()] = datastore_trim[msgid]; for operation in operations: print("Translating " + operation.file_in); dict_ope = dict(); ope_file_in = list(); lst_temp = createKnowledge(operation.file_in); print("String from source files: " + str(len(lst_temp))); nbTrans = 0; #remove duplicate for line in lst_temp: if not line.msgid in dict_ope: dict_ope[line.msgid] = line; ope_file_in.append(line); if line.msgstr: nbTrans+=1; print(line.header_comment); print(line.raw_msgid); print(line.msgid); print(line.raw_msgstr); print(line.msgstr); #add def from conf files if ui_dir: new_data = parse_ui_file(ui_dir+"/extruder.ui"); new_data.extend(parse_ui_file(ui_dir+"/extruder.ui")); new_data.extend(parse_ui_file(ui_dir+"/filament.ui")); new_data.extend(parse_ui_file(ui_dir+"/milling.ui")); new_data.extend(parse_ui_file(ui_dir+"/print.ui")); new_data.extend(parse_ui_file(ui_dir+"/printer_fff.ui")); new_data.extend(parse_ui_file(ui_dir+"/printer_sla.ui")); new_data.extend(parse_ui_file(ui_dir+"/sla_material.ui")); new_data.extend(parse_ui_file(ui_dir+"/sla_print.ui")); print("String from ui files: " + str(len(new_data))); for dataline in new_data: if not dataline.msgid in dict_ope: dict_ope[dataline.msgid] = dataline; ope_file_in.append(dataline); print("String to translate: " + str(len(ope_file_in) - nbTrans)+" and already translated: "+str(nbTrans)); #create database if database_out: outputDatabase(database_out); #create TODO file if operation.file_todo: outputUntranslated(ope_file_in, operation.file_todo); #create .po file if operation.file_out: translate(ope_file_in, operation.file_out); print("End of merge"); def createKnowledge(file_path_in): read_data_lines = list(); try: file_in_stream = open(file_path_in, mode="r", encoding="utf-8") lines = file_in_stream.read().splitlines(); lines.append(""); line_idx = 0; current_line = TranslationLine(); nb = 0; while line_idx < len(lines): if not lines[line_idx].startswith("msgid") or len(lines[line_idx]) <= 7: if (lines[line_idx].startswith("#") or lines[line_idx].startswith("msgctxt") or len(lines[line_idx].strip()) == 0 ): if not allow_msgctxt and lines[line_idx].startswith("msgctxt"): current_line.header_comment += "\n#, " + lines[line_idx]; else: current_line.header_comment += "\n" + lines[line_idx]; line_idx+=1; continue; # get the msgid line current_line.raw_msgid = lines[line_idx]; current_line.msgid = lines[line_idx][7:]; #get the next line (can be whatever) line_idx+=1; if line_idx >= len(lines): return read_data_lines; #populate the full current_line.msgid string while lines[line_idx].startswith("\"") or lines[line_idx].startswith("msgid"): current_line.raw_msgid += "\n" + lines[line_idx]; if lines[line_idx].startswith("msgid"): current_line.multivalue = True; if lines[line_idx].startswith("\""): current_line.msgid = current_line.msgid[0:-1]; current_line.msgid += lines[line_idx][1:]; else: current_line.msgid += "\n" + lines[line_idx]; #todo: do something for msgid_plural. Not needed right now... #get the next line (can be whatever) line_idx+=1; if line_idx >= len(lines): return read_data_lines; #check validity of the id if len(current_line.msgid) < 3: current_line = TranslationLine(); continue; current_line.msgid = current_line.msgid[0:-1]; #there should be a msgstr just after if not lines[line_idx].startswith("msgstr") or len(lines[line_idx]) <= 8: current_line = TranslationLine(); continue; current_line.raw_msgstr = lines[line_idx]; if lines[line_idx][7] == "\"": current_line.msgstr = lines[line_idx][8:]; elif lines[line_idx][6] == "[": current_line.msgstr = lines[line_idx][11:]; else: #can't parse print("error, can't parse msgstr: '"+lines[line_idx]+"'"); current_line.msgstr = ""; line_idx+=1; if line_idx >= len(lines): return read_data_lines; while lines[line_idx].startswith("\"") or lines[line_idx].startswith("msgstr"): current_line.raw_msgstr += "\n" + lines[line_idx]; if lines[line_idx].startswith("\""): current_line.msgstr = current_line.msgstr[0:-1]; current_line.msgstr += lines[line_idx][1:]; elif lines[line_idx].startswith("msgstr["): current_line.msgstr = lines[line_idx][11:]; current_line.multivalue = True; else: current_line.msgstr += "\n" + lines[line_idx]; #get the next line (can be whatever) line_idx+=1; if line_idx >= len(lines): return read_data_lines; if current_line.msgstr: current_line.msgstr = current_line.msgstr[0:-1]; read_data_lines.append(current_line); current_line = TranslationLine(); except Exception as error: print("Warning, cannot read file " + file_path_in); print(error); return read_data_lines; def getTranslation(item): if len(item.msgid) == 0: return ""; if item.msgid in datastore: return datastore[item.msgid].raw_msgstr; elif item.msgid in datastore_trim: good = datastore_trim[item.msgid]; if not good.multivalue: return "msgstr \""+trim(good.msgstr)+"\""; else: item_msg_trim = trim(item.msgid); if item_msg_trim in datastore: good = datastore[trim(item.msgid)]; if not good.multivalue: if good.msgid in item.msgid: start_at = item.msgid.index(good.msgid); return "msgstr \"" + item.msgid[0:start_at] + good.msgstr + item.msgid[start_at+len(good.msgid):] + "\""; elif item_msg_trim in datastore_trim: good = datastore_trim[item_msg_trim]; if not good.multivalue: good_msg_trim = trim(good.msgid); if good_msg_trim in item.msgid: start_at = item.msgid.index(good_msg_trim); return "msgstr \"" + item.msgid[0:start_at] + trim(good.msgstr) + item.msgid[start_at+len(good_msg_trim):] + "\""; if ignore_case: lowercase = TranslationLine(); lowercase.msgid = item.msgid.lower(); if lowercase.msgid != item.msgid: lowercase.header_comment = item.header_comment; lowercase.raw_msgid = item.raw_msgid; lowercase.raw_msgstr = item.raw_msgstr; lowercase.msgstr = item.msgstr; lowercase.multivalue = item.multivalue; return getTranslation(lowercase) return ""; def getTranslationNear(msgid_to_search, percent): max_word_diff = 1 + int(percent * len(msgid_to_search)); possible_solutions = list(); for msgid in datastore: dist = levenshtein_distance(msgid, msgid_to_search); if dist < max_word_diff: possible_solutions.append( (dist, datastore[msgid]) ); possible_solutions.sort(key=lambda x:x[0]); return possible_solutions; def outputUntranslated(data_to_translate, file_path_out): try: file_out_stream = open(file_path_out, mode="w", encoding="utf-8") nb_lines = 0; #sort to have an easier time translating. # idealy, they shoud be grouped by proximity, but it's abit more complicated to code sorted_lines = list() for dataline in data_to_translate: if not dataline.msgstr.strip() and dataline.msgid and len(getTranslation(dataline).strip()) == 0: sorted_lines.append(dataline); sorted_lines.sort(key=lambda x:x.msgid.lower()) nb_iter = 0; # output bits that are empty for dataline in sorted_lines: if nb_iter%100 == 99: print('.'); else: print('.', end = ''); nb_iter += 1; file_out_stream.write(dataline.header_comment); file_out_stream.write("\n"); # get translation that are near enough to be copy-pasted by humans. good_enough = getTranslationNear(dataline.msgid, 0.4); if len(good_enough) >0: file_out_stream.write("#Similar to me: "+dataline.msgid+"\n"); for index in range(min(len(good_enough), max_similar)): file_out_stream.write("# "+str(good_enough[index][0])+("" if len(str(good_enough[index][0]))>2 else " " if len(str(good_enough[index][0]))==2 else " ") +" changes: " + good_enough[index][1].msgid+"\n"); file_out_stream.write("# translation: " + good_enough[index][1].msgstr+"\n"); file_out_stream.write(dataline.raw_msgid); file_out_stream.write("\n"); file_out_stream.write(dataline.raw_msgstr); file_out_stream.write("\n"); nb_lines+=1; print("There is " + str(nb_lines) +" string untranslated"); except Exception as error: print("error, cannot write file " + file_path_out); print(error); def translate(data_to_translate, file_path_out): # try: file_out_stream = open(file_path_out, mode="w", encoding="utf-8") file_out_stream.write("# Translation file for "+(language if len(language)>0 else language_code)+"\n"); file_out_stream.write("# Copyright (C) 2021\n"); file_out_stream.write("# This file is distributed under the same license as Slic3r.\n"); file_out_stream.write("#\n"); file_out_stream.write("msgid \"\"\n"); file_out_stream.write("msgstr \"\"\n"); file_out_stream.write("\"Project-Id-Version: Slic3r\\n\"\n"); file_out_stream.write("\"POT-Creation-Date: "+date.today().strftime('%Y-%m-%d %H:%M%z')+"\\n\"\n"); file_out_stream.write("\"PO-Revision-Date: "+date.today().strftime('%Y-%m-%d %H:%M%z')+"\\n\"\n"); file_out_stream.write("\"Last-Translator:\\n\"\n"); file_out_stream.write("\"Language-Team:\\n\"\n"); file_out_stream.write("\"MIME-Version: 1.0\\n\"\n"); file_out_stream.write("\"Content-Type: text/plain; charset=UTF-8\\n\"\n"); file_out_stream.write("\"Content-Transfer-Encoding: 8bit\\n\"\n"); file_out_stream.write("\"Language:"+language_code+"\\n\"\n"); nb_lines = 0; data_to_translate.sort(key=lambda x:x.msgid.lower().strip()) # translate bits that are empty for dataline in data_to_translate: if not dataline.msgstr.strip(): transl = getTranslation(dataline) if len(transl) > 9 or ( len(transl) > 3 and not transl.startswith('msgstr "')): file_out_stream.write("\n") if not remove_comment: file_out_stream.write(dataline.header_comment.strip()) file_out_stream.write("\n") file_out_stream.write(dataline.raw_msgid) file_out_stream.write("\n") file_out_stream.write(transl) file_out_stream.write("\n") nb_lines+=1; if dataline.raw_msgid.count('%') != transl.count('%'): print("WARNING: not same number of '%' ( "+ str(dataline.raw_msgid.count('%')) + " => " + str(transl.count('%')) + ")" +"\n for string:'" + dataline.msgid + " '\n=>'"+transl[8:]); else: file_out_stream.write("\n") if not remove_comment: file_out_stream.write(dataline.header_comment.strip()) file_out_stream.write("\n") file_out_stream.write(dataline.raw_msgid) file_out_stream.write("\n") file_out_stream.write(dataline.raw_msgstr) file_out_stream.write("\n") if dataline.raw_msgid.count('%') != dataline.raw_msgstr.count('%'): print("WARNING: not same number of '%'( "+ str(dataline.raw_msgid.count('%')) + " => " + str(dataline.raw_msgstr.count('%')) + ")" +"\n for string:'" + dataline.msgid + " '\n=>'"+dataline.msgstr); nb_lines+=1; print("There is " + str(nb_lines) +" string translated in the .po"); # except Exception as error: # print("error, cannot write file " + file_path_out); # print(error); def outputDatabase(file_path_out): try: file_out_stream = open(file_path_out, mode="w", encoding="utf-8") nb_lines = 0; for msgid in datastore: dataline = datastore[msgid]; #don't store commenta nymore. there is none is the slic3r.po anyway #file_out_stream.write(dataline.header_comment); #note that the header_comment contains the \n already so comment this line is you don't comment header_comment file_out_stream.write("\n"); file_out_stream.write(dataline.raw_msgid); file_out_stream.write("\n"); file_out_stream.write(dataline.raw_msgstr); file_out_stream.write("\n"); nb_lines+=1; print("There is " + str(nb_lines) +" in your database file"); except Exception as error: print("error, cannot write file " + file_path_out); print(error); def parse_ui_file(file_path): read_data_lines = list(); # try: file_in_stream = open(file_path, mode="r", encoding="utf-8") lines = file_in_stream.read().splitlines(); lines.append(""); line_idx = 0; nb = 0; while line_idx < len(lines): items = lines[line_idx].strip().split(":"); if len(items) > 1: if items[0]=="page": current_line = TranslationLine(); current_line.header_comment = "\n#: "+file_path;#+":"+str(line_idx); current_line.raw_msgid = "msgid \""+items[1].strip()+"\""; current_line.msgid = items[1].strip(); current_line.raw_msgstr = "msgstr \"\""; current_line.msgstr = ""; read_data_lines.append(current_line); if items[0]=="group" or items[0]=="line": current_line = TranslationLine(); current_line.header_comment = "\n#: "+file_path;#+":"+str(line_idx); current_line.raw_msgid = "msgid \""+items[-1].strip()+"\""; current_line.msgid = items[-1].strip(); current_line.raw_msgstr = "msgstr \"\""; current_line.msgstr = ""; read_data_lines.append(current_line); if items[0]=="setting": for item in items: if item.startswith("label$") or item.startswith("full_label$") or item.startswith("sidetext$") or item.startswith("tooltip$"): if item.split("$")[-1] != '_' and len(item.split("$")[-1]) > 0 : current_line = TranslationLine(); current_line.header_comment = "\n#: "+file_path+" : l"+str(line_idx); current_line.msgid = item.split("$")[-1].strip(); current_line.raw_msgid = "msgid \""+current_line.msgid+"\""; current_line.raw_msgstr = "msgstr \"\""; current_line.msgstr = ""; read_data_lines.append(current_line); line_idx+=1; return read_data_lines; main();