1#!/usr/local/bin/python3.8 2# -*- coding: utf-8 -*- 3 4import os, re, sys, codecs, difflib 5from optparse import OptionParser 6from subprocess import Popen, PIPE, call 7from textwrap import TextWrapper, _whitespace 8from collections import defaultdict, OrderedDict, Counter 9from platform import system 10from unicodedata import east_asian_width 11from tempfile import NamedTemporaryFile 12 13usage = "usage: %prog [options] commands\n" \ 14 "Without any command, it starts in interactive mode.\n" \ 15 "Read docs/translations.txt for details." 16parser = OptionParser(usage=usage) 17parser.add_option("--commit_author", help="Commit author", 18 default="Translators <crawl-ref-discuss@lists.sourceforge.net>") 19parser.add_option("-d", "--diff", help="Diff format (unified, context, n)", 20 default='n') 21parser.add_option("-f", "--force", action="store_true", 22 help="Overwrite files even if no change detected") 23parser.add_option("-l", "--language", help="Specify which languages to work on") 24parser.add_option("-r", "--resource", help="Specify which resources to work on") 25parser.add_option("-s", "--source", help="Work on source files (same as -l en)", 26 action="store_true") 27parser.add_option("-t", "--translations", help="Work on translations", 28 action="store_true") 29parser.add_option("-a", "--auto_fix", action="store_true", 30 help="Apply some automatic fixes to punctuation") 31 32(options, args) = parser.parse_args() 33cmd = args[0] if args else '' 34 35# Absolute path to the source directory 36tx_abs_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) 37 38# Absolute path to the git root 39git_root = os.path.abspath(os.path.join(tx_abs_path, "..", "..")) 40 41# Relative path from the git root to the transifex directory 42tx_rel_path = os.path.join('crawl-ref', 'source') 43 44# Absolute path to the transifex config file 45tx_config = os.path.join(tx_abs_path, '.tx', 'config') 46 47# Relative path from the source directory to the descript directory 48descript_tx_path = os.path.join('dat', 'descript') 49 50# Relative path from the git root to the descript directory 51descript_git_path = os.path.join(tx_rel_path, descript_tx_path) 52 53# Absolute path to the descript directory 54descript_abs_path = os.path.join(tx_abs_path, descript_tx_path) 55 56try: 57 os.chdir(descript_abs_path) 58except OSError, e: 59 sys.exit(e) 60 61sep_re = re.compile('[, ]+') # basic separator for user input 62txt_sep_re = re.compile('%{4,}') # txt file entry separator 63cmd_re = re.compile('<(\w)>') # used to find the key in menu command strings 64# Those languages have special wrapping with fullwidth character support 65east_asian_languages = {'ja_JP', 'ko_KR', 'zh_CN'} 66no_space_languages = {'ja_JP', 'zh_CN'} 67east_asian_punctuation = u'、。,!:;)' 68 69# This object serves as an intermediate step between txt and ini files. 70# Entries are in a raw format: no wrapping, every new line is significant. 71# they are indexed by [(lang, res)][key] and are of type Entry 72raw_entries = defaultdict(OrderedDict) 73 74# Main commands 75def wrap_txt(): 76 txt_files.load_files() 77 txt_files.merge_files() 78 txt_files.update() 79 menu.res_files = txt_files 80 81def create_ini(): 82 txt_files.load_files() 83 txt_files.merge_files() 84 ini_files.load_files() 85 ini_files.update() 86 menu.res_files = ini_files 87 88def merge_ini(): 89 txt_files.load_files() 90 res_index.en_src = False # For en, load the fake translations 91 ini_files.load_files() 92 ini_files.merge_files() 93 res_index.en_src = True 94 txt_files.update() 95 menu.res_files = txt_files 96 97def setup_transifex(): 98 """Initialize the transifex config file""" 99 100 os.chdir(tx_abs_path) 101 call_tx(['init']) 102 tx_set = ['set', '--auto-local', '-s', 'en_AU', '-t', 'INI', '--execute'] 103 for res in res_index.default_resources: 104 res_file = res + '.ini' 105 source_file = os.path.join(descript_tx_path, res_file) 106 expr = os.path.join(descript_tx_path, '<lang>', res_file) 107 call_tx(tx_set + ['-r', 'dcss.' + res, expr, '-f', source_file]) 108 os.chdir(descript_abs_path) 109 110def call_tx(args, silent = False): 111 """wrapper to call the transifex client""" 112 113 if silent: 114 stderr = open(os.devnull, 'wb') 115 else: 116 stderr = None 117 118 # On windows, we need shell=True to search the PATH for the tx command 119 if sys.platform == 'win32': 120 python_path = os.path.split(sys.executable)[0] 121 tx_path = os.path.join(python_path, 'Scripts', 'tx') 122 return call(['python', tx_path] + args, stderr=stderr) 123 else: 124 return call(['tx'] + args, stderr=stderr) 125 126# Utility functions 127def title(text): 128 """Add a dash square around a string. Used when showing a diff.""" 129 text = "### " + text + " ###" 130 dash_line = "#" * len(text) 131 text = dash_line + "\n" + text + "\n" + dash_line + "\n" 132 return text 133 134def unwrap(text, no_space): 135 """Mostly replicates libutil.cc:unwrap_desc""" 136 if not text: 137 return "" 138 139 # Protect all consecutive empty lines 140 text = re.sub("\n{2,}", lambda m: r'\n' * len(m.group(0)), text) 141 text = text.replace("\n ", "\\n ") 142 143 # Don't unwrap lua separator at the beginning of a line 144 text = text.replace("\n}}", "\\n}}") 145 text = text.replace("\n{{", "\\n{{") 146 147 text = text.replace(u"—\n—", u"——") 148 text = text.replace(">\n<", "><") 149 text = text.replace("\n", " ") 150 text = text.replace("\\n", "\n") 151 152 # Remove superfluous spaces surrounded by wide characters 153 if no_space: 154 i = 0 155 j = text.find(" ") 156 while j != -1: 157 i += j 158 # text has been rstriped so no risk of finding a space at the end 159 if i and wide_char(text[i-1]) and wide_char(text[i+1]): 160 text = text[:i] + text[i+1:] 161 else: 162 i += 1 163 j = text[i:].find(" ") 164 165 return text 166 167def wrap(text, eac, no_space): 168 """Wrap long lines using a TextWrapper object""" 169 lines = [] 170 for line in text.splitlines(): 171 if line: 172 # This allows breaking lines between tags 173 line = line.replace("><", ">\f<") 174 if no_space: 175 # Need to rstrip the lines because when the wrapper tries to 176 # add a single character to the end of the line, it might fail 177 # and add an empty string, preventing the removal of whitespace 178 lines += map(unicode.rstrip, FW_NS_wrapper.wrap(line)) 179 elif eac: 180 lines += map(unicode.rstrip, FWwrapper.wrap(line)) 181 else: 182 lines += wrapper.wrap(line) 183 elif not lines or lines[-1] != '': # remove consecutive empty lines 184 lines += [''] 185 186 lines[:] = [line.replace(">\f<", "><") for line in lines] 187 188# Languages which have no spaces are split on punctuation which make them 189# sometimes wrapped to the beginning of the next line. Since it's quite ugly, 190# we manually move them back to the end of the previous line. 191 if eac or no_space: 192 fixed_lines = [] 193 for line in lines: 194 while line and line[0] in east_asian_punctuation and fixed_lines \ 195 and fixed_lines[-1][-1] != line[0]: 196 fixed_lines[-1] += line[0] 197 line = line[1:] 198 if line: 199 line = line.lstrip() 200 else: 201 line = None 202 break 203 204 if line is not None: 205 fixed_lines.append(line) 206 lines = fixed_lines 207 208 return "\n".join(lines) 209 210def diff(val, new_val): 211 """Returns a diff showing the differences between 2 strings""" 212 try: 213 diff_func = {'unified': difflib.unified_diff, 214 'context': difflib.context_diff, 215 'n': difflib.ndiff}[options.diff] 216 except KeyError: 217 sys.exit("Invalid diff option: %s" % options.diff) 218 return "\n".join(diff_func(val, new_val)) 219 220def progress(name, i, n): 221 """Generic function for showing the progression of a treatment in percent""" 222 print "\r%s %d%%" % (name, i * 100 / n), 223 if i == n: 224 print 225 226def emphasize(s): 227 """Add terminal control characters to a string to make it bright and 228 underlined. Under windows, control characters are not supported so we just 229 surround the string in chevrons""" 230 if system() != 'Windows': 231 return u'\033[1m\033[4m%s\033[0m' % s 232 else: 233 return '<' + s + '>' 234 235def change_counter(c): 236 return " ".join(["%s:%-3d" % (k, c[k]) if c[k] else " " * (len(k) + 4) \ 237 for k in sorted(res_index.changes)]) 238 239def wide_char(c): 240 return c != u'—' and east_asian_width(c) in 'WFA' 241 242def auto_fix(s, lang): 243 """Use with care, it can break things""" 244 s = auto_fix.re_hyphen.sub(u"\\1—\\2", s) 245 s = auto_fix.re_ns.sub(u" \\1", s) 246 if lang == 'fr': # Those ones can break languages which use »« for quotes 247 s = auto_fix.re_ns_opening_quote.sub(u"« ", s) 248 s = auto_fix.re_ns_closing_quote.sub(u" »", s) 249 s = auto_fix.re_missing_space.sub(u" \\1", s) 250 s = auto_fix.re_missing_space2.sub(u"« ", s) 251 252 if s.find('{{') == -1: # Don't mess with lua strings 253 s = auto_fix.re_ascii_single_quotes.sub(u"‘\\1’", s) 254 s = auto_fix.re_ascii_double_quotes.sub(u"“\\1”", s) 255 256 # replace english quotes by localized ones 257 if lang == 'fr': 258 s = auto_fix.re_english_double_quotes.sub(u"« \\1 »", s) 259 s = auto_fix.re_english_single_quotes.sub(u"“\\1”", s) 260 elif lang == 'de' or lang == 'cs': 261 s = auto_fix.re_english_single_quotes.sub(u"‚\\1‘", s) 262 s = auto_fix.re_english_double_quotes.sub(u"„\\1“", s) 263 elif lang == 'da': 264 s = auto_fix.re_english_single_quotes.sub(u"„\\1“", s) 265 s = auto_fix.re_english_double_quotes.sub(u"»\\1«", s) 266 elif lang == 'el' or lang == 'es' or lang == 'it' or lang == 'pt': 267 s = auto_fix.re_english_double_quotes.sub(u"«\\1»", s) 268 s = auto_fix.re_english_single_quotes.sub(u"“\\1”", s) 269 elif lang == 'fi': 270 s = auto_fix.re_english_single_quotes.sub(u"’\\1’", s) 271 s = auto_fix.re_english_double_quotes.sub(u"”\\1”", s) 272 elif lang == 'ja': 273 s = auto_fix.re_english_single_quotes.sub(u"『\\1』", s) 274 s = auto_fix.re_english_double_quotes.sub(u"「\\1」", s) 275 elif lang == 'lt': 276 s = auto_fix.re_english_single_quotes.sub(u"„\\1”", s) 277 s = auto_fix.re_english_double_quotes.sub(u"„\\1”", s) 278 elif lang == 'lv' or lang == 'ru': 279 s = auto_fix.re_english_double_quotes.sub(u"«\\1»", s) 280 s = auto_fix.re_english_single_quotes.sub(u"„\\1”", s) 281 elif lang == 'pl' or lang == 'hu': 282 s = auto_fix.re_english_single_quotes.sub(u"»\\1«", s) 283 s = auto_fix.re_english_double_quotes.sub(u"„\\1”", s) 284 return s 285 286auto_fix.re_hyphen = re.compile("(\s)[-–](\s)") # Replace hyphens by em dashes 287auto_fix.re_ns = re.compile("\s([!?:;])") 288auto_fix.re_ns_opening_quote = re.compile(u"«\s") 289auto_fix.re_ns_closing_quote = re.compile(u"\s»") 290auto_fix.re_missing_space = re.compile(u"(?<=\w)([!?:;»](?!\d))", re.U) 291auto_fix.re_missing_space2 = re.compile(u"«(?=\w)", re.U) 292 293auto_fix.re_ascii_single_quotes = re.compile(u"(?<=\W)'(.*?)'(?=\W)", re.S) 294auto_fix.re_ascii_double_quotes = re.compile(u'"(.*?)"', re.S) 295auto_fix.re_english_single_quotes = re.compile(u'‘([^‚‘’]*?)’', re.S) 296auto_fix.re_english_double_quotes = re.compile(u'“(.*?)”', re.S) 297 298"""Subclasses to properly handle wrapping fullwidth unicode character which take 2992 columns to be displayed on a terminal 300See http://code.activestate.com/lists/python-list/631628/""" 301class FullWidthUnicode(unicode): 302 def __len__(self): 303 return sum(2 if wide_char(c) else 1 for c in self) 304 305 def __getslice__(self, i, j): 306 k = 0 307 while k < i: 308 if wide_char(self[k]): 309 i -= 1 310 k += 1 311 k = i 312 while k < j and k < unicode.__len__(self): 313 if wide_char(self[k]): 314 j -= 1 315 k += 1 316 return FullWidthUnicode(unicode.__getslice__(self, i, j)) 317 318class FullWidthTextWrapper(TextWrapper): 319 def __init__(self, **kwargs): 320 if 'no_space' in kwargs: 321 kwargs.pop('no_space') 322 # Those languages don't use spaces. Break lines on punctuation. 323 self.wordsep_simple_re = re.compile(u'([\s%s]+)|(—)(?=—)' % east_asian_punctuation) 324 TextWrapper.__init__(self, **kwargs) 325 326 def _split(self, text): 327 return map(FullWidthUnicode, TextWrapper._split(self, text)) 328 329class ResourceIndex(): 330 """Class which holds current language / resource settings and serves as an 331 iterator for ResourceCollection. 332 self.changes holds a list of the types of change currently selected 333 (changed, new or removed). This is used to select which value to 334 display when iterating through entries for showing a diff or writing a 335 resource file. 336 Note that not selecting "removed" only affects diffs and temporary files 337 created for editing. When writing the resource file, removed keys are never 338 written no matter what is in the changes array.""" 339 340 def __init__(self): 341 self.default_languages = [ 'en' ] 342 self.default_resources = [] 343 self.languages = [] 344 self.resources = [] 345 self.en_src = True # When True, the english language maps to the source 346 # files. When False, it maps to the fake translations 347 self.changes = [] 348 lang_re = re.compile("[a-z]{2}_[A-Z]{2}") 349 350 # Initialize languages with directories in the descript dir 351 # and resource with txt files 352 for f in sorted(os.listdir('.')): 353 (basename, ext) = os.path.splitext(f) 354 if ext.lower() == '.txt': 355 self.default_resources.append(basename) 356 elif os.path.isdir(f) and lang_re.match(f): 357 self.default_languages.append(f) 358 if not os.path.exists(f[:2]): 359 os.makedirs(f[:2]) 360 361 if options.source: 362 self.languages = ['en'] 363 elif options.language: 364 self.set_languages(options.language) 365 elif options.translations: 366 self.languages = self.default_languages[1:] 367 else: 368 self.languages = self.default_languages[:] 369 370 if options.resource: 371 self.set_resources(options.resource) 372 else: 373 self.resources = self.default_resources[:] 374 375 def __iter__(self): 376 return iter([('',r) if self.en_src and l == 'en' else (l, r) \ 377 for l in self.languages for r in self.resources]) 378 379 def __len__(self): 380 return len(self.languages) * len(self.resources) 381 382 def __str__(self): 383 s = '' 384 for index_t in "languages", "resources": 385 index = getattr(self, index_t) 386 s += index_t.title() + ": " 387 if self.is_default(index_t): 388 s += "All (%d)\n" % len(index) 389 else: 390 s += ", ".join(index) + "\n" 391 return s 392 393 def is_default(self, index_t): 394 index = getattr(self, index_t) 395 default_index = getattr(self, "default_" + index_t) 396 return len(index) == len(default_index) 397 398 def print_index(self, index_t, only_selected = False): 399 index = getattr(self, index_t) 400 default_index = getattr(self, "default_" + index_t) 401 if only_selected: 402 idx_l = index 403 else: 404 idx_l = [emphasize(i) if i in index else i for i in default_index] 405 print "%s: %s" % (index_t.title(), ", ".join(idx_l)) 406 407 def set_index(self, index_t, opt): 408 """When opt is True, the method is being called during program startup 409 with the option value as argument. This reduce the verbosity compared to 410 calling it in interactive mode.""" 411 412 if not opt: 413 self.print_index(index_t) 414 index = getattr(self, index_t) 415 default_index = getattr(self, "default_" + index_t) 416 417 if opt: 418 a = opt 419 else: 420 a = raw_input("Select %s (Empty reset to defaults): " % index_t) 421 422 del index[:] 423 for i in sep_re.split(a): 424 if i in default_index: 425 index.append(i) 426 elif i: 427 matches = [m for m in default_index if m.startswith(i)] 428 if len(matches) == 1: 429 index.append(matches[0]) 430 elif not matches: 431 print >> sys.stderr, "Invalid %s: %s" % (index_t[:-1], i) 432 else: 433 print >> sys.stderr, "Multiple matches for %s: %s" \ 434 % (i, ", ".join(matches)) 435 436 if not index: 437 setattr(self, index_t, default_index[:]) 438 print "Reset %s to default" % index_t 439 elif not opt: 440 print 441 self.print_index(index_t, True) 442 443 def set_languages(self, opt = ''): 444 self.set_index('languages', opt) 445 446 def set_resources(self, opt = ''): 447 self.set_index('resources', opt) 448 449 def set_changes(self, change_t_list): 450 self.changes = change_t_list 451 452 def get_index(self, index_t): 453 return getattr(self, index_t)[0] 454 455 def next_index(self, index_t): 456 element = self.get_index(index_t) 457 default_index = getattr(self, "default_" + index_t) 458 if default_index[-1] == element: 459 setattr(self, index_t, [default_index[0]]) 460 else: 461 setattr(self, index_t, [default_index[default_index.index(element) + 1]]) 462 463class Entry(): 464 """Class for a raw entry. Elements of raw_entries are of this type.""" 465 def __init__(self): 466 self.value = '' 467 self.tags = OrderedDict() 468 469 def __getitem__(self, key): 470 if key in self.tags: 471 return self.tags[key] 472 else: 473 return '' 474 475 def __setitem__(self, key, value): 476 self.tags[key] = value 477 478class TxtEntry(): 479 """This class is only used when reading a txt file. Instances of this class 480 are never stored, we directly store values in ResourceFile.""" 481 def __init__(self): 482 self.key = "" 483 self.value = "" 484 self.key_comment = "" 485 self.value_comment = "" 486 487 def save(self, res_file): 488 res_file.entries[self.key] = self.value 489 if self.key_comment: 490 res_file.key_comment[self.key] = self.key_comment 491 if self.value_comment: 492 res_file.value_comment[self.key] = self.value_comment 493 self.__init__() 494 495class ResourceFile(): 496 """Holds all the logic which is common between txt and ini files. 497 self.entries hold the dictionary of key/value read from the file. It is 498 initialized in the subclasses because source files use an OrderedDict. 499 self.diff have a dictionary per change type with the new values.""" 500 def __init__(self, lang, res): 501 self.diff = defaultdict(dict) 502 self.language = lang 503 self.resource = res 504 self.path = res + "." + self.ext 505 self.path = os.path.join(self.lang_dir, self.path) 506 self.git_path = os.path.join(descript_git_path, self.path).replace("\\", "/") 507 self.mtime = 0 508 self.modified = False 509 self.staged = False 510 self.new = False 511 512 def __setitem__(self, key, value): 513 """Called by the subclass which has already done the conversion. 514 Determine the change type and store the new value in the appropriate 515 dict of self.diff""" 516 if key not in self.entries: 517 change_t = 'new' 518 elif value != self.entries[key]: 519 change_t = 'changed' 520 else: 521 return 522 self.diff[change_t][key] = value 523 524 # If the key was previously removed and edited back in 525 # delete it from the 'removed' dict 526 if key in self.diff['removed']: 527 del self.diff['removed'][key] 528 529 def items(self, diff_only): 530 """Returns an iterator to a list of (key, value) tuples, depending on 531 what is selected in res_index.changes and what is found in self.diff. 532 When diff_only is true, only return changed or new values (for diff and 533 edit). When it is false, return the original value for unchanged ones 534 (for writing file).""" 535 536 items = [] 537 for key in self.source_keys(): 538 found_diff = False 539 for change_t in res_index.changes: 540 if change_t == 'removed' or change_t not in self.diff: continue 541 if key in self.diff[change_t]: 542 items.append((key, self.diff[change_t][key])) 543 found_diff = True 544 if not found_diff and not diff_only and key in self.entries \ 545 and not key in self.diff['removed']: 546 items.append((key, self.entries[key])) 547 548 return iter(items) 549 550 def diff_count(self): 551 """Returns a Counter object representing what's in self.diff""" 552 c = Counter() 553 for change_t in self.diff: 554 count = len(self.diff[change_t]) 555 if count: 556 c[change_t] = count 557 return c 558 559 def lang(self): 560 """Source files have self.language empty, but they are in english""" 561 return self.language if self.language else 'en' 562 563 def clear(self, keep_entries = False): 564 if not keep_entries: 565 self.entries.clear() 566 self.diff.clear() 567 568 def changed(self): 569 """Returns true if there are pending change for the file depending on 570 what is selected in res_index.changes""" 571 for change_t in res_index.changes: 572 if change_t in self.diff: 573 return True 574 return False 575 576 def source_keys(self): 577 """Returns an ordered list of the keys of the source corresponding to 578 this resource file. This list is used as a reference when iterating 579 through keys. It helps keep the order consistent and translations can't 580 exist if there isn't a source associated to them anyway.""" 581 keys = self.source_res.entries.keys() 582 583 # To allow submitting new quotes from another resource, they are sorted 584 if self.resource == 'quotes' and 'new' in self.diff: 585 for k in self.diff['new']: 586 if k not in keys: 587 keys.append(k) 588 keys.sort() 589 return keys 590 591 def diff_txt(self, diff_format): 592 """When diff_format is True, returns a string with a diff for each new 593 or changed entry. When it is False, returns the new value instead (for 594 editing purpose).""" 595 diff_txt = '' 596 for (key, value) in self.items(True): 597 if key in self.entries: 598 orig = self.format_entry(key, self.entries[key]) 599 else: 600 orig = "" 601 value = self.format_entry(key, value) 602 if diff_format: 603 diff_txt += diff(orig.splitlines(), value.splitlines()) + "\n" 604 else: 605 diff_txt += value 606 diff_txt += self.separator() 607 608 if 'removed' in res_index.changes and 'removed' in self.diff: 609 for k, v in self.diff['removed'].items(): 610 value = self.format_entry(k, v) 611 if diff_format: 612 diff_txt += diff(value.splitlines(), []) + "\n" 613 else: 614 diff_txt += value 615 diff_txt += self.separator() 616 617 return diff_txt 618 619 def load(self): 620 if not self.entries and not self.diff: 621 self.read_file() 622 623 def read_file(self): 624 """Called by the subclasses to handle the basic checks. Returns the 625 content of the file (list of lines) to the subclass which does the 626 actual parsing.""" 627 if not os.path.exists(self.path): 628 return [] 629 630 # If the corresponding source file isn't loaded we load it first 631 if self.language and not len(self.source_keys()): 632 self.source_res.read_file() 633 634 # Don't reload the file if it hasn't changed since we loaded it before. 635 file_mtime = os.stat(self.path).st_mtime 636 if self.mtime == file_mtime: 637 self.clear(True) 638 return [] 639 else: 640 self.clear() 641 self.mtime = file_mtime 642 643 return codecs.open(self.path, encoding='utf-8').readlines() 644 645 def merge_file(self): 646 """Iterate through the entries loaded from the file, convert them in a 647 raw format and store them in raw_entries""" 648 entries = raw_entries[(self.lang(), self.resource)] 649 entries.clear() 650 for (key, value) in self.entries.items(): 651 entries[key] = self.raw_entry(value) 652 653 def update(self, update_removed_keys = True): 654 """Update the resource file with the content of raw_entries. New values 655 will be converted in the resource format and stored in the appropriate 656 diff dictionary by the __setitem__ methods""" 657 entries = raw_entries[(self.lang(), self.resource)] 658 for key in self.source_keys(): 659 if key not in entries: continue 660 self[key] = entries[key] 661 if update_removed_keys and (self.lang() != 'en' or self.ext == 'ini'): 662 self.update_removed_keys() 663 664 def write_file(self): 665 """Write the content of the resource to a file""" 666 f = codecs.open(self.path, "w", encoding='utf-8') 667 f.write(self.header()) 668 for key, e in self.items(False): 669 f.write(self.format_entry(key, e)) 670 f.write(self.separator()) 671 self.modified = True 672 self.mtime = 0 673 674 def update_removed_keys(self): 675 """If the resource has keys which are not present in the source, they 676 will be removed. Store them in self.diff['removed'] to show them in diff 677 and allow editing (useful to fix renamed keys).""" 678 entries = raw_entries[(self.lang(), self.resource)] 679 for k in self.entries.keys(): 680 if k not in self.source_keys() or k not in entries: 681 self.diff['removed'][k] = self.entries[k] 682 683 def edit_file(self): 684 """Create a temporary file with the values of the changed keys, start 685 a text editor, then load the file.""" 686 tmp = NamedTemporaryFile(prefix=self.language + "-" + self.resource, 687 suffix="." + self.ext, delete=False) 688 tmp.file.write(self.diff_txt(False).encode('utf-8')) 689 tmp.file.close() 690 EDITOR = os.environ.get('EDITOR','vim') 691 try: 692 call([EDITOR, tmp.name]) 693 except OSError: 694 print >> sys.stderr, "Cannot start text editor." \ 695 "Set the EDITOR environement variable." 696 return False 697 tmp_res = self.__class__(self.language, self.resource) 698 tmp_res.path = tmp.name 699 tmp_res.read_file() 700 tmp_res.merge_file() 701 os.remove(tmp.name) 702 self.update(False) 703 return True 704 705class TxtFile(ResourceFile): 706 """Subclass of ResourceFile to handle files in crawl's native format of 707 description files.""" 708 def __init__(self, lang, res): 709 if lang: 710 self.entries = dict() 711 self.source_res = txt_files[('', res)] 712 self.lang_dir = lang[:2] 713 else: 714 self.entries = OrderedDict() 715 self.source_res = self 716 self.lang_dir = '' 717 self.key_comment = dict() 718 self.value_comment = dict() 719 self.ext = 'txt' 720 self.eac = lang in east_asian_languages 721 self.no_space = lang in no_space_languages 722 ResourceFile.__init__(self, lang, res) 723 724 def __setitem__(self, key, entry): 725 """Converts a generic entry in txt format then calls the base class 726 __setitem__ method to store it in the appropriate self.diff dict""" 727 value = "" 728 for tag, tag_value in entry.tags.items(): 729 # If it has a quote tag, we store the new quote in its own entry 730 if tag == 'quote': 731 e = Entry() 732 e.value = tag_value 733 quote_res = txt_files[(self.language, 'quotes')] 734 quote_res.load() 735 quote_res[key] = e 736 737 # add the quote resource to the index 738 if 'quotes' not in res_index.resources: 739 res_index.resources.append('quotes') 740 741 # If we're adding a foreign quote and the source doesn't have 742 # one, we also create it in the corresponding source 743 if self.language and key not in quote_res.source_res.entries: 744 en_quote_res = txt_files[('', 'quotes')] 745 en_quote_res.load() 746 en_quote_res[key] = e 747 748 # Add english to the index 749 if 'en' not in res_index.languages: 750 res_index.languages.insert(0, 'en') 751 752 elif tag_value is True: 753 value += ":%s\n" % tag 754 else: 755 value += ":%s %s\n" % (tag, tag_value) 756 757 if options.auto_fix: 758 raw_value = auto_fix(entry.value, self.lang()) 759 else: 760 raw_value = entry.value 761 762 if entry['nowrap']: 763 value += raw_value 764 else: 765 value += wrap(raw_value, self.eac, self.no_space) 766 767 value += "\n" 768 ResourceFile.__setitem__(self, key, value) 769 770 def format_entry(self, key, value): 771 """Convert the key/value pair in crawl's native desc format""" 772 ret = self.key_comment.get(key, "") 773 ret += key + "\n\n" 774 ret += self.value_comment.get(key, "") 775 ret += value 776 return ret 777 778 def header(self): 779 """Added to the beginning of the file""" 780 return self.separator() 781 782 def separator(self): 783 """Separate entries in the file""" 784 return "%%%%\n" 785 786 def raw_entry(self, value): 787 """Convert a value in txt format to a raw entry.""" 788 e = Entry() 789 for line in value.splitlines(): 790 if len(line) > 1 and line[0] == ':' and line[1] != ' ': 791 l = line[1:].rstrip().split(' ', 1) 792 e[l[0]] = l[1] if len(l) == 2 else True 793 else: 794 e.value += line + "\n" 795 e.value = e.value.rstrip() 796 if not e['nowrap']: 797 e.value = unwrap(e.value, self.no_space) 798 return e 799 800 def read_file(self): 801 """Parse the content of a txt file and stores it in self.entries""" 802 te = TxtEntry() 803 for line in ResourceFile.read_file(self): 804 if line[0] == '#': 805 if te.key: 806 te.value_comment += line 807 else: 808 te.key_comment += line 809 elif txt_sep_re.match(line): 810 if te.key: 811 te.save(self) 812 elif line[0] == '\n' and not te.value: 813 continue 814 elif not te.key: 815 te.key = line.strip() 816 else: 817 te.value += line 818 819 if te.key: 820 te.save(self) 821 822 return len(self.entries) 823 824 def search_removed_keys(self): 825 # No removed key in the source, it's the reference 826 if self.language: 827 ResourceFile.search_removed_keys(self) 828 829class IniFile(ResourceFile): 830 """Subclass of ResourceFile to handle files in ini format to be pushed to 831 or pulled from transifex.""" 832 def __init__(self, lang, res): 833 self.entries = dict() 834 self.source_res = txt_files[('', res)] 835 self.ext = 'ini' 836 self.lang_dir = lang 837 ResourceFile.__init__(self, lang, res) 838 839 def __setitem__(self, key, e): 840 """Converts a generic entry in ini format then calls the base class 841 __setitem__ method to store it in the appropriate self.diff dict""" 842 843 # Delete entries with only a link. There's no point in translating them. 844 if len(e.value) > 1 and e.value[0] == '<' and e.value[-1] == '>'\ 845 and e.value.find("\n") == -1 and e.value[1:].find("<") == -1: 846 if key in self.entries: 847 self.diff['removed'][key] = self.entries[key] 848 del self.entries[key] 849 return 850 851 value = "" 852 for tag, tag_value in e.tags.items(): 853 if tag_value is True: 854 value += r":%s\n" % tag 855 else: 856 value += r":%s %s\n" % (tag, tag_value) 857 value += e.value.replace("\n", r'\n') + "\n" 858 ResourceFile.__setitem__(self, key, value) 859 860 def header(self): 861 return "" 862 863 def separator(self): 864 return "" 865 866 def format_entry(self, key, value): 867 """Convert the key/value pair in ini format""" 868 return "%s=%s" % (key, value) 869 870 def read_file(self): 871 """Parse the content of an ini file and stores it in self.entries""" 872 for line in ResourceFile.read_file(self): 873 if not line or line[0] == '#' or line.find('=') == -1: continue 874 (key, value) = line.split('=', 1) 875 self.entries[key] = value.replace('"', '"').replace('\\\\', '\\') 876 877 return len(self.entries) 878 879 def raw_entry(self, value): 880 """Convert a value in ini format to a raw entry.""" 881 e = Entry() 882 tag_name = '' 883 for line in value.rstrip().split(r'\n'): 884 if len(line) > 1 and line[0] == ':' and line[1] != ' ': 885 if not e.value: 886 l = line[1:].split(' ', 1) 887 e[l[0]] = l[1] if len(l) == 2 else True 888 else: 889 tag_name = line[1:] 890 elif tag_name: 891 if e[tag_name]: 892 e[tag_name] += "\n" 893 e[tag_name] += line 894 else: 895 e.value += line + "\n" 896 e.value = e.value.rstrip() 897 return e 898 899class ResourceCollection(OrderedDict): 900 """A container class holding a collection of resource files. It uses 901 res_index to iterate through its resources""" 902 def __init__(self): 903 OrderedDict.__init__(self) 904 self.diff_count = Counter() 905 self.git_count = Counter() 906 self.modified = False 907 908 def __iter__(self): 909 return iter([self[res_i] for res_i in res_index]) 910 911 def __len__(self): 912 return len(res_index) 913 914 def clear(self): 915 self.diff_count.clear() 916 self.modified = False 917 918 def paths(self): 919 return [res.path for res in self] 920 921 def merge_files(self): 922 for i, res in enumerate(self, start=1): 923 progress("Merging %s files" % self.ext, i, len(self)) 924 res.merge_file() 925 926 def load_files(self): 927 self.clear() 928 n_files = n_entries = 0 929 for i, res in enumerate(self, start=1): 930 progress("Loading %s files" % self.ext, i, len(self)) 931 n = res.read_file() 932 if n: 933 n_files += 1 934 n_entries += n 935 if n_files: 936 print "Loaded %d entr%s from %d %s file%s" \ 937 % (n_entries, ["y", "ies"][n_entries!=1], 938 n_files, self.ext, "s"[n_files==1:]) 939 940 def update(self): 941 for i, res in enumerate(self, start=1): 942 progress("Updating %s files" % self.ext, i, len(self)) 943 res.update() 944 self.update_diff_count() 945 946 def update_diff_count(self): 947 self.diff_count.clear() 948 for res in self: 949 self.diff_count += res.diff_count() 950 res_index.changes = self.diff_count.keys() 951 952 def diff(self, diff_format): 953 diff_text = '' 954 for res in self: 955 if res.changed(): 956 diff_text += title(res.path) + "\n" 957 diff_text += res.diff_txt(diff_format) + "\n" 958 return diff_text 959 960 def show_diff(self): 961 diff_text = self.diff(True) 962 try: 963 Popen("less", stdin=PIPE).communicate(diff_text.encode('utf-8')) 964 except OSError: 965 print diff_text 966 967 def edit_files(self): 968 for res in self: 969 if res.changed(): 970 if not res.edit_file(): 971 break 972 self.update_diff_count() 973 974 def write_files(self): 975 for res in self: 976 if res.changed() or options.force and list(res.items(False)): 977 res.write_file() 978 for change_t in res_index.changes: 979 if change_t in res.diff: 980 del res.diff[change_t] 981 self.update_diff_count() 982 983 def undo_changes(self): 984 for res in self: 985 res.clear(True) 986 self.diff_count.clear() 987 988class TxtCollection(ResourceCollection): 989 """Collection of txt files. It holds a few git methods""" 990 def __init__(self): 991 self.ext = 'txt' 992 ResourceCollection.__init__(self) 993 994 def __missing__(self, key): 995 self[key] = TxtFile(*key) 996 return self[key] 997 998 def refresh_state(self): 999 """Run git status and check the result for each file in the collection""" 1000 if not git: return 1001 1002 git_states = dict() 1003 self.git_count.clear() 1004 for line in Popen(["git", "status", "--porcelain"] + self.paths(), 1005 stdout=PIPE).communicate()[0].splitlines(): 1006 git_states[line[3:]] = line[0:2] 1007 for res in self: 1008 if res.git_path not in git_states: 1009 res.modified = res.staged = res.new = False 1010 continue 1011 1012 st = git_states[res.git_path] 1013 if st[0] == 'M' or st[0] == 'A': 1014 res.staged = True 1015 self.git_count['staged'] += 1 1016 if st[1] == 'M': 1017 res.modified = True 1018 self.git_count['modified'] += 1 1019 elif st == '??': 1020 res.new = True 1021 self.git_count['new'] += 1 1022 1023 def git_status(self): 1024 call(["git", "status"] + self.paths()) 1025 1026 def git_add_hunks(self): 1027 self.git_add(True) 1028 1029 def git_add(self, hunks = False): 1030 files = [] 1031 for res in self: 1032 if res.modified or res.new: 1033 files.append(res.path) 1034 1035 cmd_list = ['git', 'add'] 1036 if hunks: 1037 cmd_list.append('-p') 1038 cmd_list += files 1039 call(cmd_list) 1040 1041 def git_reset(self): 1042 files = [] 1043 for res in self: 1044 if res.modified: 1045 files.append(res.path) 1046 elif res.new: 1047 os.remove(res.path) 1048 1049 cmd_list = ['git', 'checkout'] 1050 cmd_list += files 1051 call(cmd_list) 1052 1053class IniCollection(ResourceCollection): 1054 """Collection of ini files with methods to interface with the transifex 1055 client push and pull commands""" 1056 def __init__(self): 1057 self.ext = 'ini' 1058 ResourceCollection.__init__(self) 1059 1060 def __missing__(self, key): 1061 self[key] = IniFile(*key) 1062 return self[key] 1063 1064 def refresh_state(self): 1065 self.modified = False 1066 for res in self: 1067 if res.modified: 1068 self.modified = True 1069 1070 def tx_pull(self): 1071 tx_cmd = ['pull'] 1072 if options.force: 1073 tx_cmd.append('-f') 1074 all_lang = res_index.is_default('languages') 1075 all_res = res_index.is_default('resources') 1076 if all_lang and all_res: 1077 call_tx(tx_cmd + ['-a']) 1078 elif all_res: 1079 for lang in res_index.languages: 1080 call_tx(tx_cmd + ['-l', lang]) 1081 elif all_lang: 1082 for res in res_index.resources: 1083 call_tx(tx_cmd + ['-r', 'dcss.' + res]) 1084 else: 1085 for res in self: 1086 call_tx(tx_cmd + ['-l', res.lang(), '-r', 'dcss.' + res.resource]) 1087 1088 def tx_push(self): 1089 tx_push = ['push'] 1090 if options.force: 1091 tx_push.append('-f') 1092 for res in self: 1093 if not res.modified: continue 1094 resource = ['-r', 'dcss.' + res.resource] 1095 language = ['-l', res.lang()] 1096 if not res.language: 1097 # We push the source then reset the fake translation resource 1098 ret = call_tx(tx_push + ['-s'] + resource) 1099 if self[('en', res.resource)].entries: 1100 a = raw_input("Reset the %s fake translation (y/n)? " % res.resource).lower() 1101 if a and a[0] == 'y': 1102 call_tx(['delete', '-f'] + language + resource) 1103 else: 1104 ret = call_tx(tx_push + ['-t'] + language + resource) 1105 if ret == 0: 1106 res.modified = False 1107 1108class Menu(OrderedDict): 1109 """Create a simple text based interactive menu. 1110 The inherited OrderedDict is used to store groups of commands 1111 cmds keys are the command hotkey letter, values are either a function or a 1112 list whose first member is the function and the next ones are arguments. 1113 cmd is the command string, it can be used to queue several commands. 1114 res_files points to the current resource file collection which is being 1115 worked on.""" 1116 1117 def __init__(self, cmd = ''): 1118 OrderedDict.__init__(self) 1119 self.cmds = dict() 1120 self.cmd = cmd 1121 self.menu_desc = '' 1122 self.res_files = txt_files 1123 self.show_res = len(res_index.languages) == 1 1124 1125 def __missing__(self, key): 1126 self[key] = [] 1127 return self[key] 1128 1129 def change_summary(self): 1130 if not self.res_files.diff_count: 1131 print "No changes\n" 1132 return 1133 1134 print "Change summary:" 1135 lang_total = defaultdict(Counter) 1136 padding_size = 5 1137 1138 for res in self.res_files: 1139 if not res.diff: continue 1140 if self.show_res: 1141 padding_size = max(padding_size, len(res.path)) 1142 else: 1143 lang_total[res.lang()] += res.diff_count() 1144 1145 cur_lang = '' 1146 for res in self.res_files: 1147 if not res.diff: continue 1148 lang = res.lang() 1149 if lang != cur_lang and lang_total[lang]: 1150 print "%-*s %s" % (padding_size, lang, 1151 change_counter(lang_total[lang])) 1152 cur_lang = lang 1153 if self.show_res: 1154 print "%-*s %s" % (padding_size, res.path, 1155 change_counter(res.diff_count())) 1156 1157 print "%-*s %s" % (padding_size, 'Total', 1158 change_counter(self.res_files.diff_count)) 1159 print 1160 1161 def git_summary(self): 1162 if not self.res_files.git_count: 1163 return 1164 1165 padding_size = 5 1166 print "Git status:" 1167 for key, count in self.res_files.git_count.most_common(): 1168 print "%-*s: %d" % (padding_size, key, count) 1169 print 1170 1171 def git_commit(self): 1172 call(['git', 'commit', '-e', '-s', '-m', '[Transifex]', 1173 '--author=' + options.commit_author]) 1174 1175 def toggle_details(self): 1176 self.show_res = not self.show_res 1177 1178 def set_languages(self): 1179 res_index.set_languages() 1180 self.res_files.update_diff_count() 1181 self.show_res = len(res_index.languages) == 1 1182 1183 def set_resources(self): 1184 res_index.set_resources() 1185 self.res_files.update_diff_count() 1186 1187 def set_changes(self): 1188 """Creates a submenu to select which kind of change to work on.""" 1189 submenu = Menu() 1190 lbl = "Select entries" 1191 change_ts = self.res_files.diff_count.keys() 1192 for type in change_ts + ['all']: 1193 cmd_lbl = '<' + type[0] + '>' + type[1:] 1194 if type == 'all': 1195 submenu.add_cmd(lbl, cmd_lbl, [res_index.set_changes, change_ts]) 1196 else: 1197 submenu.add_cmd(lbl, cmd_lbl, [res_index.set_changes, [type]]) 1198 submenu.build_menu_desc() 1199 submenu.show_menu() 1200 1201 def next_index(self, index_t): 1202 """When only one index is selected (language or resource), this commands 1203 allows to jump to the next one. It will search for one with pending 1204 changes. If none is found after having looped through all of them, we 1205 simply select the next one""" 1206 current = res_index.get_index(index_t) 1207 while 1: 1208 res_index.next_index(index_t) 1209 self.res_files.update_diff_count() 1210 if self.res_files.diff_count or current == res_index.get_index(index_t): 1211 break 1212 1213 # If we haven't found something with a change, we looped. In this case, 1214 # we advance one more time. 1215 if not self.res_files.diff_count: 1216 res_index.next_index(index_t) 1217 1218 res_index.print_index(index_t, True) 1219 1220 def add_cmd(self, group, label, cmd): 1221 """Adds a command to the menu. The label must contain a letter between 1222 chevrons which will be the command hotkey""" 1223 m = cmd_re.search(label) 1224 if not m: sys.exit("Invalid command: %s" % label) 1225 key = m.group(1) 1226 if key in self: sys.exit("Duplicate command for key %s: %s and %s" \ 1227 % (key, label, self[key])) 1228 if system() != 'Windows': 1229 label = label.replace("<" + key + ">", emphasize(key)) 1230 self[group].append(label) 1231 self.cmds[key] = cmd 1232 1233 def build_main_menu(self): 1234 self.cmds.clear() 1235 self.clear() 1236 self.menu_desc = '' 1237 1238 lbl_cmds = "Commands" 1239 self.add_cmd(lbl_cmds, 'wrap <t>xt files', wrap_txt) 1240 self.add_cmd(lbl_cmds, '<m>erge ini files', merge_ini) 1241 self.add_cmd(lbl_cmds, 'update <i>ni files', create_ini) 1242 self.add_cmd(lbl_cmds, '<q>uit', sys.exit) 1243 1244 lbl_review = "Review changes" 1245 if self.res_files.diff_count or options.force: 1246 self.add_cmd(lbl_review, "<w>rite files", self.res_files.write_files) 1247 if self.res_files.diff_count: 1248 if self.show_res: 1249 self.add_cmd(lbl_review, "<v>iew languages", self.toggle_details) 1250 else: 1251 self.add_cmd(lbl_review, "<v>iew resources", self.toggle_details) 1252 self.add_cmd(lbl_review, "show <d>iff", self.res_files.show_diff) 1253 self.add_cmd(lbl_review, "<e>dit", self.res_files.edit_files) 1254 self.add_cmd(lbl_review, "e<x>punge changes", self.res_files.undo_changes) 1255 1256 lbl_select = "Select" 1257 self.add_cmd(lbl_select, "<l>anguages", self.set_languages) 1258 self.add_cmd(lbl_select, "<r>esources", self.set_resources) 1259 if len(self.res_files.diff_count) > 1: 1260 self.add_cmd(lbl_select, "chan<g>es", self.set_changes) 1261 1262 if len(res_index.resources) == 1: 1263 self.add_cmd(lbl_select, "<n>ext resource", [self.next_index, 'resources']) 1264 elif len(res_index.languages) == 1: 1265 self.add_cmd(lbl_select, "<n>ext language", [self.next_index, 'languages']) 1266 1267 if git: 1268 lbl_git = "Git" 1269 if self.res_files.git_count: 1270 self.add_cmd(lbl_git, "<s>tatus", self.res_files.git_status) 1271 if self.res_files.git_count['modified'] or self.res_files.git_count['new']: 1272 self.add_cmd(lbl_git, "<a>dd", self.res_files.git_add) 1273 self.add_cmd(lbl_git, "select <h>unks", self.res_files.git_add_hunks) 1274 self.add_cmd(lbl_git, "chec<k>out", self.res_files.git_reset) 1275 if self.res_files.git_count['staged']: 1276 self.add_cmd(lbl_git, "<c>ommit", self.git_commit) 1277 1278 if transifex: 1279 lbl_tx = "Transifex" 1280 self.add_cmd(lbl_tx, "<p>ull", ini_files.tx_pull) 1281 if self.res_files.modified: 1282 self.add_cmd(lbl_tx, "p<u>sh", self.res_files.tx_push) 1283 1284 self.build_menu_desc() 1285 print 1286 print self.change_summary() 1287 self.git_summary() 1288 1289 def build_menu_desc(self): 1290 for group, labels in self.items(): 1291 self.menu_desc += "%s: %s" % (group, ", ".join(labels)) + "\n" 1292 1293 def show_menu(self): 1294 """It reads the command line argument and treat each letter as a command 1295 When there is no more command, it switches to interactive mode.""" 1296 if not self.cmd: 1297 self.cmd = raw_input(self.menu_desc).lower() 1298 choice = self.cmd[:1] 1299 self.cmd = self.cmd[1:] 1300 if choice in self.cmds: 1301 func = self.cmds[choice] 1302 if isinstance(func, list): 1303 # If it's a list, then the first item is the function, 1304 # the other ones are arguments 1305 func[0](*func[1:]) 1306 else: 1307 func() 1308 else: 1309 print "Huh?" 1310 self.cmd = "" 1311 1312 def main_menu(self): 1313 print res_index, 1314 while 1: 1315 self.res_files.refresh_state() 1316 self.build_main_menu() 1317 self.show_menu() 1318 1319wrapper_args = { 1320 'width' : 79, 1321 'break_on_hyphens' : False, 1322 'break_long_words' : False, 1323 'replace_whitespace' : False} 1324 1325wrapper = TextWrapper(**wrapper_args) 1326# Use hardcoded whitespaces instead of \s because the latter matches 1327# non-breaking spaces (see textwrap.py:30). 1328wrapper.wordsep_simple_re_uni = re.compile(r'([%s]+)' % _whitespace) 1329FWwrapper = FullWidthTextWrapper(**wrapper_args) 1330wrapper_args['no_space'] = True 1331FW_NS_wrapper = FullWidthTextWrapper(**wrapper_args) 1332 1333# We initialize the resource index early because we might need it if we have to 1334# initialize the transifex configuration. 1335res_index = ResourceIndex() 1336 1337# Can we use the transifex client? 1338try: 1339 call_tx([], True) 1340 transifex = True 1341except OSError: 1342 transifex = False 1343 1344# Is transifex configured? 1345if transifex: 1346 if not os.path.exists(tx_config): 1347 setup_transifex() 1348 1349# Can we use git? 1350try: 1351 call(['git'], stdout=open(os.devnull, 'wb')) 1352 git = True 1353except OSError: 1354 git = False 1355 1356# Create the global variables for managing resources. 1357txt_files = TxtCollection() 1358ini_files = IniCollection() 1359menu = Menu(cmd) 1360menu.main_menu() 1361