123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524 |
- import re
- from datetime import date
- try:
- from Levenshtein import distance as levenshtein_distance
- except ImportError:
- print("you need to do 'python -m pip install python-Levenshtein'");
- exit(0);
- datastore = dict();
- datastore_trim = dict();# key:string -> TranslationLine
- regex_only_letters = re.compile(r"[^a-zA-Z]")
- allow_msgctxt = True;
- ignore_case = False;
- remove_comment = False;
- percent_error_similar = 0
- language_code = "??"
- language = ""
- max_similar = 3;
- database_out = "";
- def trim(str):
- redo = True;
- while redo:
- while len(str) > 0 and (
- str[0] == ":"
- or str[0] == "."
- or str[0] == ","
- or str[0] == "!"
- ):
- str = str[1:];
- while len(str) > 0 and (
- str[-1] == ":"
- or str[-1] == "."
- or str[-1] == ","
- or str[-1] == "!"
- ):
- str = str[:-1];
- str_stripped = str.strip();
- if str == str_stripped:
- redo = False
- else:
- str = str_stripped;
- return str;
- class TranslationFiles:
- file_in = ""
- file_out = ""
- file_todo = ""
- database = ""
- class TranslationLine:
- header_comment = ""
- raw_msgid = ""
- msgid = ""
- raw_msgstr = ""
- msgstr = ""
- multivalue = False
- def main():
- global datastore, datastore_trim, regex_only_letters, allow_msgctxt, ignore_case, remove_comment;
- global percent_error_similar, language, language_code, max_similar, database_out;
- data_files = list(); # list of file paths
- ui_dir = "";
- operations = list(); # list of TranslationFiles
- settings_stream = open("./settings.ini", mode="r", encoding="utf-8")
- lines = settings_stream.read().splitlines()
- for line in lines:
- if line.startswith("data"):
- if line.startswith("database_out"):
- database_out = line[line.index('=')+1:].strip();
- else:
- data_files.append(line[line.index('=')+1:].strip());
-
- if line.startswith("input"):
- operations.append(TranslationFiles());
- operations[-1].file_in = line[line.index('=')+1:].strip();
-
- if line.startswith("output") and operations:
- operations[-1].file_out = line[line.index('=')+1:].strip();
-
- if line.startswith("todo") and operations:
- operations[-1].file_todo = line[line.index('=')+1:].strip();
-
-
- if line.startswith("ui_dir"):
- ui_dir = line[line.index('=')+1:].strip();
-
- if line.startswith("allow_msgctxt"):
- allow_msgctxt = (line[line.index('=')+1:].strip().lower() == "true");
- print("Don't comment msgctxt" if allow_msgctxt else "Commenting msgctxt");
-
- if line.startswith("allow_msgctxt"):
- allow_msgctxt = (line[line.index('=')+1:].strip().lower() == "true");
- print("Don't comment msgctxt" if allow_msgctxt else "Commenting msgctxt");
- if line.startswith("remove_comment"):
- remove_comment = (line[line.index('=')+1:].strip().lower() == "true");
- if remove_comment:
- print("Will not output the comments");
-
- if line.startswith("percent_error_similar"):
- percent_error_similar = float(line[line.index('=')+1:].strip());
- print("percent_error_similar set to " + str(percent_error_similar));
-
- if line.startswith("max_similar"):
- max_similar = int(line[line.index('=')+1:].strip());
- print("max_similar set to " + str(max_similar));
-
- if line.startswith("language"):
- if line.startswith("language_code"):
- language_code = line[line.index('=')+1:].strip();
- print("language_code set to " + language_code);
- else:
- language = line[line.index('=')+1:].strip();
- print("language set to " + language);
-
- # all_lines = list();
- for data_file in data_files:
- new_data = createKnowledge(data_file);
- for dataline in new_data:
- if len(dataline.msgstr) > 0:
- if not dataline.msgid in datastore:
- datastore[dataline.msgid] = dataline;
- datastore_trim[trim(dataline.msgid)] = dataline;
- if dataline.msgid == " Layers,":
- print(trim(dataline.msgid)+" is inside? "+("oui" if "Layers" in datastore_trim else "non"));
- else:
- str_old_val = datastore[dataline.msgid].msgstr;
- str_test_val = dataline.msgstr;
- length_old = len(regex_only_letters.sub("", str_old_val));
- length_new = len(regex_only_letters.sub("", str_test_val));
- # if already exist, only change it if the previous was lower than 3 char
- if length_new > length_old and length_old < 3:
- print(str_old_val.replace('\n', ' ')+" replaced by "+str_test_val.replace('\n', ' '));
- datastore[dataline.msgid].msgstr = str_test_val;
- datastore_trim[trim(dataline.msgid)].msgstr = str_test_val;
- print("finish reading" + data_file + " of size "+ str(len(new_data)) + ", now we had "+ str(len(datastore)) + " items");
- if ignore_case:
- temp = list();
- for msgid in datastore:
- if not msgid.lower() in datastore:
- temp.append(msgid);
- for msgid in temp:
- datastore[msgid.lower()] = datastore[msgid];
- temp = list();
- for msgid in datastore_trim:
- if not msgid.lower() in datastore_trim:
- temp.append(msgid);
- for msgid in temp:
- datastore_trim[msgid.lower()] = datastore_trim[msgid];
- for operation in operations:
- print("Translating " + operation.file_in);
- dict_ope = dict();
- ope_file_in = list();
- lst_temp = createKnowledge(operation.file_in);
- print("String from source files: " + str(len(lst_temp)));
- nbTrans = 0;
- #remove duplicate
- for line in lst_temp:
- if not line.msgid in dict_ope:
- dict_ope[line.msgid] = line;
- ope_file_in.append(line);
- if line.msgstr:
- nbTrans+=1;
- print(line.header_comment);
- print(line.raw_msgid);
- print(line.msgid);
- print(line.raw_msgstr);
- print(line.msgstr);
- #add def from conf files
- if ui_dir:
- new_data = parse_ui_file(ui_dir+"/extruder.ui");
- new_data.extend(parse_ui_file(ui_dir+"/extruder.ui"));
- new_data.extend(parse_ui_file(ui_dir+"/filament.ui"));
- new_data.extend(parse_ui_file(ui_dir+"/milling.ui"));
- new_data.extend(parse_ui_file(ui_dir+"/print.ui"));
- new_data.extend(parse_ui_file(ui_dir+"/printer_fff.ui"));
- new_data.extend(parse_ui_file(ui_dir+"/printer_sla.ui"));
- new_data.extend(parse_ui_file(ui_dir+"/sla_material.ui"));
- new_data.extend(parse_ui_file(ui_dir+"/sla_print.ui"));
- print("String from ui files: " + str(len(new_data)));
- for dataline in new_data:
- if not dataline.msgid in dict_ope:
- dict_ope[dataline.msgid] = dataline;
- ope_file_in.append(dataline);
- print("String to translate: " + str(len(ope_file_in) - nbTrans)+" and already translated: "+str(nbTrans));
-
- #create database
- if database_out:
- outputDatabase(database_out);
-
- #create TODO file
- if operation.file_todo:
- outputUntranslated(ope_file_in, operation.file_todo);
-
- #create .po file
- if operation.file_out:
- translate(ope_file_in, operation.file_out);
-
- print("End of merge");
- def createKnowledge(file_path_in):
- read_data_lines = list();
- try:
- file_in_stream = open(file_path_in, mode="r", encoding="utf-8")
- lines = file_in_stream.read().splitlines();
- lines.append("");
- line_idx = 0;
- current_line = TranslationLine();
- nb = 0;
- while line_idx < len(lines):
- if not lines[line_idx].startswith("msgid") or len(lines[line_idx]) <= 7:
- if (lines[line_idx].startswith("#")
- or lines[line_idx].startswith("msgctxt")
- or len(lines[line_idx].strip()) == 0
- ):
- if not allow_msgctxt and lines[line_idx].startswith("msgctxt"):
- current_line.header_comment += "\n#, " + lines[line_idx];
- else:
- current_line.header_comment += "\n" + lines[line_idx];
- line_idx+=1;
- continue;
- # get the msgid line
- current_line.raw_msgid = lines[line_idx];
- current_line.msgid = lines[line_idx][7:];
- #get the next line (can be whatever)
- line_idx+=1;
- if line_idx >= len(lines):
- return read_data_lines;
- #populate the full current_line.msgid string
- while lines[line_idx].startswith("\"") or lines[line_idx].startswith("msgid"):
- current_line.raw_msgid += "\n" + lines[line_idx];
- if lines[line_idx].startswith("msgid"):
- current_line.multivalue = True;
- if lines[line_idx].startswith("\""):
- current_line.msgid = current_line.msgid[0:-1];
- current_line.msgid += lines[line_idx][1:];
- else:
- current_line.msgid += "\n" + lines[line_idx];
- #todo: do something for msgid_plural. Not needed right now...
-
- #get the next line (can be whatever)
- line_idx+=1;
- if line_idx >= len(lines):
- return read_data_lines;
- #check validity of the id
- if len(current_line.msgid) < 3:
- current_line = TranslationLine();
- continue;
- current_line.msgid = current_line.msgid[0:-1];
- #there should be a msgstr just after
- if not lines[line_idx].startswith("msgstr") or len(lines[line_idx]) <= 8:
- current_line = TranslationLine();
- continue;
- current_line.raw_msgstr = lines[line_idx];
- if lines[line_idx][7] == "\"":
- current_line.msgstr = lines[line_idx][8:];
- elif lines[line_idx][6] == "[":
- current_line.msgstr = lines[line_idx][11:];
- else:
- #can't parse
- print("error, can't parse msgstr: '"+lines[line_idx]+"'");
- current_line.msgstr = "";
- line_idx+=1;
- if line_idx >= len(lines):
- return read_data_lines;
- while lines[line_idx].startswith("\"") or lines[line_idx].startswith("msgstr"):
- current_line.raw_msgstr += "\n" + lines[line_idx];
- if lines[line_idx].startswith("\""):
- current_line.msgstr = current_line.msgstr[0:-1];
- current_line.msgstr += lines[line_idx][1:];
- elif lines[line_idx].startswith("msgstr["):
- current_line.msgstr = lines[line_idx][11:];
- current_line.multivalue = True;
- else:
- current_line.msgstr += "\n" + lines[line_idx];
- #get the next line (can be whatever)
- line_idx+=1;
- if line_idx >= len(lines):
- return read_data_lines;
-
- if current_line.msgstr:
- current_line.msgstr = current_line.msgstr[0:-1];
- read_data_lines.append(current_line);
- current_line = TranslationLine();
- except Exception as error:
- print("Warning, cannot read file " + file_path_in);
- print(error);
- return read_data_lines;
- def getTranslation(item):
- if len(item.msgid) == 0:
- return "";
- if item.msgid in datastore:
- return datastore[item.msgid].raw_msgstr;
- elif item.msgid in datastore_trim:
- good = datastore_trim[item.msgid];
- if not good.multivalue:
- return "msgstr \""+trim(good.msgstr)+"\"";
- else:
- item_msg_trim = trim(item.msgid);
- if item_msg_trim in datastore:
- good = datastore[trim(item.msgid)];
- if not good.multivalue:
- if good.msgid in item.msgid:
- start_at = item.msgid.index(good.msgid);
- return "msgstr \"" + item.msgid[0:start_at] + good.msgstr + item.msgid[start_at+len(good.msgid):] + "\"";
- elif item_msg_trim in datastore_trim:
- good = datastore_trim[item_msg_trim];
- if not good.multivalue:
- good_msg_trim = trim(good.msgid);
- if good_msg_trim in item.msgid:
- start_at = item.msgid.index(good_msg_trim);
- return "msgstr \"" + item.msgid[0:start_at] + trim(good.msgstr) + item.msgid[start_at+len(good_msg_trim):] + "\"";
- if ignore_case:
- lowercase = TranslationLine();
- lowercase.msgid = item.msgid.lower();
- if lowercase.msgid != item.msgid:
- lowercase.header_comment = item.header_comment;
- lowercase.raw_msgid = item.raw_msgid;
- lowercase.raw_msgstr = item.raw_msgstr;
- lowercase.msgstr = item.msgstr;
- lowercase.multivalue = item.multivalue;
- return getTranslation(lowercase)
- return "";
- def getTranslationNear(msgid_to_search, percent):
- max_word_diff = 1 + int(percent * len(msgid_to_search));
- possible_solutions = list();
- for msgid in datastore:
- dist = levenshtein_distance(msgid, msgid_to_search);
- if dist < max_word_diff:
- possible_solutions.append( (dist, datastore[msgid]) );
- possible_solutions.sort(key=lambda x:x[0]);
- return possible_solutions;
- def outputUntranslated(data_to_translate, file_path_out):
- try:
- file_out_stream = open(file_path_out, mode="w", encoding="utf-8")
- nb_lines = 0;
- #sort to have an easier time translating.
- # idealy, they shoud be grouped by proximity, but it's abit more complicated to code
- sorted_lines = list()
- for dataline in data_to_translate:
- if not dataline.msgstr.strip() and dataline.msgid and len(getTranslation(dataline).strip()) == 0:
- sorted_lines.append(dataline);
- sorted_lines.sort(key=lambda x:x.msgid.lower())
-
- nb_iter = 0;
- # output bits that are empty
- for dataline in sorted_lines:
- if nb_iter%100 == 99:
- print('.');
- else:
- print('.', end = '');
- nb_iter += 1;
- file_out_stream.write(dataline.header_comment);
- file_out_stream.write("\n");
- # get translation that are near enough to be copy-pasted by humans.
- good_enough = getTranslationNear(dataline.msgid, 0.4);
- if len(good_enough) >0:
- file_out_stream.write("#Similar to me: "+dataline.msgid+"\n");
- for index in range(min(len(good_enough), max_similar)):
- file_out_stream.write("# "+str(good_enough[index][0])+("" if len(str(good_enough[index][0]))>2 else " " if len(str(good_enough[index][0]))==2 else " ")
- +" changes: " + good_enough[index][1].msgid+"\n");
- file_out_stream.write("# translation: " + good_enough[index][1].msgstr+"\n");
- file_out_stream.write(dataline.raw_msgid);
- file_out_stream.write("\n");
- file_out_stream.write(dataline.raw_msgstr);
- file_out_stream.write("\n");
- nb_lines+=1;
- print("There is " + str(nb_lines) +" string untranslated");
- except Exception as error:
- print("error, cannot write file " + file_path_out);
- print(error);
- def translate(data_to_translate, file_path_out):
- # try:
- file_out_stream = open(file_path_out, mode="w", encoding="utf-8")
- file_out_stream.write("# Translation file for "+(language if len(language)>0 else language_code)+"\n");
- file_out_stream.write("# Copyright (C) 2021\n");
- file_out_stream.write("# This file is distributed under the same license as Slic3r.\n");
- file_out_stream.write("#\n");
- file_out_stream.write("msgid \"\"\n");
- file_out_stream.write("msgstr \"\"\n");
- file_out_stream.write("\"Project-Id-Version: Slic3r\\n\"\n");
- file_out_stream.write("\"POT-Creation-Date: "+date.today().strftime('%Y-%m-%d %H:%M%z')+"\\n\"\n");
- file_out_stream.write("\"PO-Revision-Date: "+date.today().strftime('%Y-%m-%d %H:%M%z')+"\\n\"\n");
- file_out_stream.write("\"Last-Translator:\\n\"\n");
- file_out_stream.write("\"Language-Team:\\n\"\n");
- file_out_stream.write("\"MIME-Version: 1.0\\n\"\n");
- file_out_stream.write("\"Content-Type: text/plain; charset=UTF-8\\n\"\n");
- file_out_stream.write("\"Content-Transfer-Encoding: 8bit\\n\"\n");
- file_out_stream.write("\"Language:"+language_code+"\\n\"\n");
- nb_lines = 0;
- data_to_translate.sort(key=lambda x:x.msgid.lower().strip())
- # translate bits that are empty
- for dataline in data_to_translate:
- if not dataline.msgstr.strip():
- transl = getTranslation(dataline)
- if len(transl) > 9 or ( len(transl) > 3 and not transl.startswith('msgstr "')):
- file_out_stream.write("\n")
- if not remove_comment:
- file_out_stream.write(dataline.header_comment.strip())
- file_out_stream.write("\n")
- file_out_stream.write(dataline.raw_msgid)
- file_out_stream.write("\n")
- file_out_stream.write(transl)
- file_out_stream.write("\n")
- nb_lines+=1;
- if dataline.raw_msgid.count('%') != transl.count('%'):
- print("WARNING: not same number of '%' ( "+ str(dataline.raw_msgid.count('%')) + " => " + str(transl.count('%')) + ")"
- +"\n for string:'" + dataline.msgid + " '\n=>'"+transl[8:]);
- else:
- file_out_stream.write("\n")
- if not remove_comment:
- file_out_stream.write(dataline.header_comment.strip())
- file_out_stream.write("\n")
- file_out_stream.write(dataline.raw_msgid)
- file_out_stream.write("\n")
- file_out_stream.write(dataline.raw_msgstr)
- file_out_stream.write("\n")
- if dataline.raw_msgid.count('%') != dataline.raw_msgstr.count('%'):
- print("WARNING: not same number of '%'( "+ str(dataline.raw_msgid.count('%')) + " => " + str(dataline.raw_msgstr.count('%')) + ")"
- +"\n for string:'" + dataline.msgid + " '\n=>'"+dataline.msgstr);
- nb_lines+=1;
- print("There is " + str(nb_lines) +" string translated in the .po");
- # except Exception as error:
- # print("error, cannot write file " + file_path_out);
- # print(error);
- def outputDatabase(file_path_out):
- try:
- file_out_stream = open(file_path_out, mode="w", encoding="utf-8")
- nb_lines = 0;
-
- for msgid in datastore:
- dataline = datastore[msgid];
- #don't store commenta nymore. there is none is the slic3r.po anyway
- #file_out_stream.write(dataline.header_comment);
- #note that the header_comment contains the \n already so comment this line is you don't comment header_comment
- file_out_stream.write("\n");
- file_out_stream.write(dataline.raw_msgid);
- file_out_stream.write("\n");
- file_out_stream.write(dataline.raw_msgstr);
- file_out_stream.write("\n");
- nb_lines+=1;
- print("There is " + str(nb_lines) +" in your database file");
- except Exception as error:
- print("error, cannot write file " + file_path_out);
- print(error);
- def parse_ui_file(file_path):
- read_data_lines = list();
- # try:
- file_in_stream = open(file_path, mode="r", encoding="utf-8")
- lines = file_in_stream.read().splitlines();
- lines.append("");
- line_idx = 0;
- nb = 0;
- while line_idx < len(lines):
- items = lines[line_idx].strip().split(":");
- if len(items) > 1:
- if items[0]=="page":
- current_line = TranslationLine();
- current_line.header_comment = "\n#: "+file_path;#+":"+str(line_idx);
- current_line.raw_msgid = "msgid \""+items[1].strip()+"\"";
- current_line.msgid = items[1].strip();
- current_line.raw_msgstr = "msgstr \"\"";
- current_line.msgstr = "";
- read_data_lines.append(current_line);
- if items[0]=="group" or items[0]=="line":
- current_line = TranslationLine();
- current_line.header_comment = "\n#: "+file_path;#+":"+str(line_idx);
- current_line.raw_msgid = "msgid \""+items[-1].strip()+"\"";
- current_line.msgid = items[-1].strip();
- current_line.raw_msgstr = "msgstr \"\"";
- current_line.msgstr = "";
- read_data_lines.append(current_line);
- if items[0]=="setting":
- for item in items:
- if item.startswith("label$") or item.startswith("full_label$") or item.startswith("sidetext$") or item.startswith("tooltip$"):
- if item.split("$")[-1] != '_' and len(item.split("$")[-1]) > 0 :
- current_line = TranslationLine();
- current_line.header_comment = "\n#: "+file_path+" : l"+str(line_idx);
- current_line.msgid = item.split("$")[-1].strip();
- current_line.raw_msgid = "msgid \""+current_line.msgid+"\"";
- current_line.raw_msgstr = "msgstr \"\"";
- current_line.msgstr = "";
- read_data_lines.append(current_line);
- line_idx+=1;
-
- return read_data_lines;
-
- main();
|