1#!/usr/local/bin/python3.8
2# -*- coding: utf-8 -*-
3
4import os, re, sys, codecs, difflib
5from optparse import OptionParser
6from subprocess import Popen, PIPE, call
7from textwrap import TextWrapper, _whitespace
8from collections import defaultdict, OrderedDict, Counter
9from platform import system
10from unicodedata import east_asian_width
11from tempfile import NamedTemporaryFile
12
13usage = "usage: %prog [options] commands\n" \
14        "Without any command, it starts in interactive mode.\n" \
15        "Read docs/translations.txt for details."
16parser = OptionParser(usage=usage)
17parser.add_option("--commit_author", help="Commit author",
18            default="Translators <crawl-ref-discuss@lists.sourceforge.net>")
19parser.add_option("-d", "--diff", help="Diff format (unified, context, n)",
20                  default='n')
21parser.add_option("-f", "--force", action="store_true",
22                  help="Overwrite files even if no change detected")
23parser.add_option("-l", "--language", help="Specify which languages to work on")
24parser.add_option("-r", "--resource", help="Specify which resources to work on")
25parser.add_option("-s", "--source", help="Work on source files (same as -l en)",
26                  action="store_true")
27parser.add_option("-t", "--translations", help="Work on translations",
28                  action="store_true")
29parser.add_option("-a", "--auto_fix", action="store_true",
30                  help="Apply some automatic fixes to punctuation")
31
32(options, args) = parser.parse_args()
33cmd = args[0] if args else ''
34
35# Absolute path to the source directory
36tx_abs_path = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
37
38# Absolute path to the git root
39git_root = os.path.abspath(os.path.join(tx_abs_path, "..", ".."))
40
41# Relative path from the git root to the transifex directory
42tx_rel_path = os.path.join('crawl-ref', 'source')
43
44# Absolute path to the transifex config file
45tx_config = os.path.join(tx_abs_path, '.tx', 'config')
46
47# Relative path from the source directory to the descript directory
48descript_tx_path = os.path.join('dat', 'descript')
49
50# Relative path from the git root to the descript directory
51descript_git_path = os.path.join(tx_rel_path, descript_tx_path)
52
53# Absolute path to the descript directory
54descript_abs_path = os.path.join(tx_abs_path, descript_tx_path)
55
56try:
57    os.chdir(descript_abs_path)
58except OSError, e:
59    sys.exit(e)
60
61sep_re = re.compile('[, ]+') # basic separator for user input
62txt_sep_re = re.compile('%{4,}') # txt file entry separator
63cmd_re = re.compile('<(\w)>') # used to find the key in menu command strings
64# Those languages have special wrapping with fullwidth character support
65east_asian_languages = {'ja_JP', 'ko_KR', 'zh_CN'}
66no_space_languages = {'ja_JP', 'zh_CN'}
67east_asian_punctuation = u'、。,!:;)'
68
69# This object serves as an intermediate step between txt and ini files.
70# Entries are in a raw format: no wrapping, every new line is significant.
71# they are indexed by [(lang, res)][key] and are of type Entry
72raw_entries = defaultdict(OrderedDict)
73
74# Main commands
75def wrap_txt():
76    txt_files.load_files()
77    txt_files.merge_files()
78    txt_files.update()
79    menu.res_files = txt_files
80
81def create_ini():
82    txt_files.load_files()
83    txt_files.merge_files()
84    ini_files.load_files()
85    ini_files.update()
86    menu.res_files = ini_files
87
88def merge_ini():
89    txt_files.load_files()
90    res_index.en_src = False # For en, load the fake translations
91    ini_files.load_files()
92    ini_files.merge_files()
93    res_index.en_src = True
94    txt_files.update()
95    menu.res_files = txt_files
96
97def setup_transifex():
98    """Initialize the transifex config file"""
99
100    os.chdir(tx_abs_path)
101    call_tx(['init'])
102    tx_set = ['set', '--auto-local', '-s', 'en_AU', '-t', 'INI', '--execute']
103    for res in res_index.default_resources:
104        res_file = res + '.ini'
105        source_file = os.path.join(descript_tx_path, res_file)
106        expr = os.path.join(descript_tx_path, '<lang>', res_file)
107        call_tx(tx_set + ['-r', 'dcss.' + res, expr, '-f', source_file])
108    os.chdir(descript_abs_path)
109
110def call_tx(args, silent = False):
111    """wrapper to call the transifex client"""
112
113    if silent:
114        stderr = open(os.devnull, 'wb')
115    else:
116        stderr = None
117
118    # On windows, we need shell=True to search the PATH for the tx command
119    if sys.platform == 'win32':
120        python_path = os.path.split(sys.executable)[0]
121        tx_path = os.path.join(python_path, 'Scripts', 'tx')
122        return call(['python', tx_path] + args, stderr=stderr)
123    else:
124        return call(['tx'] + args, stderr=stderr)
125
126# Utility functions
127def title(text):
128    """Add a dash square around a string. Used when showing a diff."""
129    text = "### " + text + " ###"
130    dash_line = "#" * len(text)
131    text = dash_line + "\n" + text + "\n" + dash_line + "\n"
132    return text
133
134def unwrap(text, no_space):
135    """Mostly replicates libutil.cc:unwrap_desc"""
136    if not text:
137        return ""
138
139    # Protect all consecutive empty lines
140    text = re.sub("\n{2,}", lambda m: r'\n' * len(m.group(0)), text)
141    text = text.replace("\n ", "\\n ")
142
143    # Don't unwrap lua separator at the beginning of a line
144    text = text.replace("\n}}", "\\n}}")
145    text = text.replace("\n{{", "\\n{{")
146
147    text = text.replace(u"—\n—", u"——")
148    text = text.replace(">\n<", "><")
149    text = text.replace("\n", " ")
150    text = text.replace("\\n", "\n")
151
152    # Remove superfluous spaces surrounded by wide characters
153    if no_space:
154        i = 0
155        j = text.find(" ")
156        while j != -1:
157           i += j
158           # text has been rstriped so no risk of finding a space at the end
159           if i and wide_char(text[i-1]) and wide_char(text[i+1]):
160                text = text[:i] + text[i+1:]
161           else:
162                i += 1
163           j = text[i:].find(" ")
164
165    return text
166
167def wrap(text, eac, no_space):
168    """Wrap long lines using a TextWrapper object"""
169    lines = []
170    for line in text.splitlines():
171        if line:
172            # This allows breaking lines between tags
173            line = line.replace("><", ">\f<")
174            if no_space:
175                # Need to rstrip the lines because when the wrapper tries to
176                # add a single character to the end of the line, it might fail
177                # and add an empty string, preventing the removal of whitespace
178                lines += map(unicode.rstrip, FW_NS_wrapper.wrap(line))
179            elif eac:
180                lines += map(unicode.rstrip, FWwrapper.wrap(line))
181            else:
182                lines += wrapper.wrap(line)
183        elif not lines or lines[-1] != '': # remove consecutive empty lines
184            lines += ['']
185
186    lines[:] = [line.replace(">\f<", "><") for line in lines]
187
188# Languages which have no spaces are split on punctuation which make them
189# sometimes wrapped to the beginning of the next line. Since it's quite ugly,
190# we manually move them back to the end of the previous line.
191    if eac or no_space:
192        fixed_lines = []
193        for line in lines:
194            while line and line[0] in east_asian_punctuation and fixed_lines \
195                  and fixed_lines[-1][-1] != line[0]:
196                fixed_lines[-1] += line[0]
197                line = line[1:]
198                if line:
199                    line = line.lstrip()
200                else:
201                    line = None
202                    break
203
204            if line is not None:
205                fixed_lines.append(line)
206        lines = fixed_lines
207
208    return "\n".join(lines)
209
210def diff(val, new_val):
211    """Returns a diff showing the differences between 2 strings"""
212    try:
213        diff_func = {'unified': difflib.unified_diff,
214                     'context': difflib.context_diff,
215                     'n': difflib.ndiff}[options.diff]
216    except KeyError:
217        sys.exit("Invalid diff option: %s" % options.diff)
218    return "\n".join(diff_func(val, new_val))
219
220def progress(name, i, n):
221    """Generic function for showing the progression of a treatment in percent"""
222    print "\r%s %d%%" % (name, i * 100 / n),
223    if i == n:
224        print
225
226def emphasize(s):
227    """Add terminal control characters to a string to make it bright and
228    underlined. Under windows, control characters are not supported so we just
229    surround the string in chevrons"""
230    if system() != 'Windows':
231        return u'\033[1m\033[4m%s\033[0m' % s
232    else:
233        return '<' + s + '>'
234
235def change_counter(c):
236    return " ".join(["%s:%-3d" % (k, c[k]) if c[k] else " " * (len(k) + 4) \
237                     for k in sorted(res_index.changes)])
238
239def wide_char(c):
240    return c != u'—' and east_asian_width(c) in 'WFA'
241
242def auto_fix(s, lang):
243    """Use with care, it can break things"""
244    s = auto_fix.re_hyphen.sub(u"\\1—\\2", s)
245    s = auto_fix.re_ns.sub(u" \\1", s)
246    if lang == 'fr': # Those ones can break languages which use »« for quotes
247        s = auto_fix.re_ns_opening_quote.sub(u"« ", s)
248        s = auto_fix.re_ns_closing_quote.sub(u" »", s)
249        s = auto_fix.re_missing_space.sub(u" \\1", s)
250        s = auto_fix.re_missing_space2.sub(u"« ", s)
251
252    if s.find('{{') == -1: # Don't mess with lua strings
253        s = auto_fix.re_ascii_single_quotes.sub(u"‘\\1’", s)
254        s = auto_fix.re_ascii_double_quotes.sub(u"“\\1”", s)
255
256    # replace english quotes by localized ones
257    if lang == 'fr':
258        s = auto_fix.re_english_double_quotes.sub(u"« \\1 »", s)
259        s = auto_fix.re_english_single_quotes.sub(u"“\\1”", s)
260    elif lang == 'de' or lang == 'cs':
261        s = auto_fix.re_english_single_quotes.sub(u"‚\\1‘", s)
262        s = auto_fix.re_english_double_quotes.sub(u"„\\1“", s)
263    elif lang == 'da':
264        s = auto_fix.re_english_single_quotes.sub(u"„\\1“", s)
265        s = auto_fix.re_english_double_quotes.sub(u"»\\1«", s)
266    elif lang == 'el' or lang == 'es' or lang == 'it' or lang == 'pt':
267        s = auto_fix.re_english_double_quotes.sub(u"«\\1»", s)
268        s = auto_fix.re_english_single_quotes.sub(u"“\\1”", s)
269    elif lang == 'fi':
270        s = auto_fix.re_english_single_quotes.sub(u"’\\1’", s)
271        s = auto_fix.re_english_double_quotes.sub(u"”\\1”", s)
272    elif lang == 'ja':
273        s = auto_fix.re_english_single_quotes.sub(u"『\\1』", s)
274        s = auto_fix.re_english_double_quotes.sub(u"「\\1」", s)
275    elif lang == 'lt':
276        s = auto_fix.re_english_single_quotes.sub(u"„\\1”", s)
277        s = auto_fix.re_english_double_quotes.sub(u"„\\1”", s)
278    elif lang == 'lv' or lang == 'ru':
279        s = auto_fix.re_english_double_quotes.sub(u"«\\1»", s)
280        s = auto_fix.re_english_single_quotes.sub(u"„\\1”", s)
281    elif lang == 'pl' or lang == 'hu':
282        s = auto_fix.re_english_single_quotes.sub(u"»\\1«", s)
283        s = auto_fix.re_english_double_quotes.sub(u"„\\1”", s)
284    return s
285
286auto_fix.re_hyphen = re.compile("(\s)[-–](\s)") # Replace hyphens by em dashes
287auto_fix.re_ns = re.compile("\s([!?:;])")
288auto_fix.re_ns_opening_quote = re.compile(u"«\s")
289auto_fix.re_ns_closing_quote = re.compile(u"\s»")
290auto_fix.re_missing_space = re.compile(u"(?<=\w)([!?:;»](?!\d))", re.U)
291auto_fix.re_missing_space2 = re.compile(u"«(?=\w)", re.U)
292
293auto_fix.re_ascii_single_quotes = re.compile(u"(?<=\W)'(.*?)'(?=\W)", re.S)
294auto_fix.re_ascii_double_quotes = re.compile(u'"(.*?)"', re.S)
295auto_fix.re_english_single_quotes = re.compile(u'‘([^‚‘’]*?)’', re.S)
296auto_fix.re_english_double_quotes = re.compile(u'“(.*?)”', re.S)
297
298"""Subclasses to properly handle wrapping fullwidth unicode character which take
2992 columns to be displayed on a terminal
300See http://code.activestate.com/lists/python-list/631628/"""
301class FullWidthUnicode(unicode):
302    def __len__(self):
303        return sum(2 if wide_char(c) else 1 for c in self)
304
305    def __getslice__(self, i, j):
306        k = 0
307        while k < i:
308            if wide_char(self[k]):
309                i -= 1
310            k += 1
311        k = i
312        while k < j and k < unicode.__len__(self):
313            if wide_char(self[k]):
314                j -= 1
315            k += 1
316        return FullWidthUnicode(unicode.__getslice__(self, i, j))
317
318class FullWidthTextWrapper(TextWrapper):
319    def __init__(self, **kwargs):
320        if 'no_space' in kwargs:
321            kwargs.pop('no_space')
322            # Those languages don't use spaces. Break lines on punctuation.
323            self.wordsep_simple_re = re.compile(u'([\s%s]+)|(—)(?=—)' % east_asian_punctuation)
324        TextWrapper.__init__(self, **kwargs)
325
326    def _split(self, text):
327        return map(FullWidthUnicode, TextWrapper._split(self, text))
328
329class ResourceIndex():
330    """Class which holds current language / resource settings and serves as an
331    iterator for ResourceCollection.
332    self.changes holds a list of the types of change currently selected
333    (changed, new or removed). This is used to select which value to
334    display when iterating through entries for showing a diff or writing a
335    resource file.
336    Note that not selecting "removed" only affects diffs and temporary files
337    created for editing. When writing the resource file, removed keys are never
338    written no matter what is in the changes array."""
339
340    def __init__(self):
341        self.default_languages = [ 'en' ]
342        self.default_resources = []
343        self.languages = []
344        self.resources = []
345        self.en_src = True # When True, the english language maps to the source
346                           # files. When False, it maps to the fake translations
347        self.changes = []
348        lang_re = re.compile("[a-z]{2}_[A-Z]{2}")
349
350        # Initialize languages with directories in the descript dir
351        # and resource with txt files
352        for f in sorted(os.listdir('.')):
353            (basename, ext) = os.path.splitext(f)
354            if ext.lower() == '.txt':
355                self.default_resources.append(basename)
356            elif os.path.isdir(f) and lang_re.match(f):
357                self.default_languages.append(f)
358                if not os.path.exists(f[:2]):
359                    os.makedirs(f[:2])
360
361        if options.source:
362            self.languages = ['en']
363        elif options.language:
364            self.set_languages(options.language)
365        elif options.translations:
366            self.languages = self.default_languages[1:]
367        else:
368            self.languages = self.default_languages[:]
369
370        if options.resource:
371            self.set_resources(options.resource)
372        else:
373            self.resources = self.default_resources[:]
374
375    def __iter__(self):
376        return iter([('',r) if self.en_src and l == 'en' else (l, r) \
377                    for l in self.languages for r in self.resources])
378
379    def __len__(self):
380        return len(self.languages) * len(self.resources)
381
382    def __str__(self):
383        s = ''
384        for index_t in "languages", "resources":
385            index = getattr(self, index_t)
386            s += index_t.title() + ": "
387            if self.is_default(index_t):
388                s += "All (%d)\n" % len(index)
389            else:
390                s += ", ".join(index) + "\n"
391        return s
392
393    def is_default(self, index_t):
394        index = getattr(self, index_t)
395        default_index = getattr(self, "default_" + index_t)
396        return len(index) == len(default_index)
397
398    def print_index(self, index_t, only_selected = False):
399        index = getattr(self, index_t)
400        default_index = getattr(self, "default_" + index_t)
401        if only_selected:
402            idx_l = index
403        else:
404            idx_l = [emphasize(i) if i in index else i for i in default_index]
405        print "%s: %s" % (index_t.title(), ", ".join(idx_l))
406
407    def set_index(self, index_t, opt):
408        """When opt is True, the method is being called during program startup
409        with the option value as argument. This reduce the verbosity compared to
410        calling it in interactive mode."""
411
412        if not opt:
413            self.print_index(index_t)
414        index = getattr(self, index_t)
415        default_index = getattr(self, "default_" + index_t)
416
417        if opt:
418            a = opt
419        else:
420            a = raw_input("Select %s (Empty reset to defaults): " % index_t)
421
422        del index[:]
423        for i in sep_re.split(a):
424            if i in default_index:
425                index.append(i)
426            elif i:
427                matches = [m for m in default_index if m.startswith(i)]
428                if len(matches) == 1:
429                    index.append(matches[0])
430                elif not matches:
431                    print >> sys.stderr, "Invalid %s: %s" % (index_t[:-1], i)
432                else:
433                    print >> sys.stderr, "Multiple matches for %s: %s" \
434                                         % (i, ", ".join(matches))
435
436        if not index:
437            setattr(self, index_t, default_index[:])
438            print "Reset %s to default" % index_t
439        elif not opt:
440            print
441            self.print_index(index_t, True)
442
443    def set_languages(self, opt = ''):
444        self.set_index('languages', opt)
445
446    def set_resources(self, opt = ''):
447        self.set_index('resources', opt)
448
449    def set_changes(self, change_t_list):
450        self.changes = change_t_list
451
452    def get_index(self, index_t):
453        return getattr(self, index_t)[0]
454
455    def next_index(self, index_t):
456        element = self.get_index(index_t)
457        default_index = getattr(self, "default_" + index_t)
458        if default_index[-1] == element:
459            setattr(self, index_t, [default_index[0]])
460        else:
461            setattr(self, index_t, [default_index[default_index.index(element) + 1]])
462
463class Entry():
464    """Class for a raw entry. Elements of raw_entries are of this type."""
465    def __init__(self):
466        self.value = ''
467        self.tags = OrderedDict()
468
469    def __getitem__(self, key):
470        if key in self.tags:
471            return self.tags[key]
472        else:
473            return ''
474
475    def __setitem__(self, key, value):
476        self.tags[key] = value
477
478class TxtEntry():
479    """This class is only used when reading a txt file. Instances of this class
480    are never stored, we directly store values in ResourceFile."""
481    def __init__(self):
482        self.key = ""
483        self.value = ""
484        self.key_comment = ""
485        self.value_comment = ""
486
487    def save(self, res_file):
488        res_file.entries[self.key] = self.value
489        if self.key_comment:
490            res_file.key_comment[self.key] = self.key_comment
491        if self.value_comment:
492            res_file.value_comment[self.key] = self.value_comment
493        self.__init__()
494
495class ResourceFile():
496    """Holds all the logic which is common between txt and ini files.
497    self.entries hold the dictionary of key/value read from the file. It is
498    initialized in the subclasses because source files use an OrderedDict.
499    self.diff have a dictionary per change type with the new values."""
500    def __init__(self, lang, res):
501        self.diff = defaultdict(dict)
502        self.language = lang
503        self.resource = res
504        self.path = res + "." + self.ext
505        self.path = os.path.join(self.lang_dir, self.path)
506        self.git_path = os.path.join(descript_git_path, self.path).replace("\\", "/")
507        self.mtime = 0
508        self.modified = False
509        self.staged = False
510        self.new = False
511
512    def __setitem__(self, key, value):
513        """Called by the subclass which has already done the conversion.
514        Determine the change type and store the new value in the appropriate
515        dict of self.diff"""
516        if key not in self.entries:
517            change_t = 'new'
518        elif value != self.entries[key]:
519            change_t = 'changed'
520        else:
521            return
522        self.diff[change_t][key] = value
523
524        # If the key was previously removed and edited back in
525        # delete it from the 'removed' dict
526        if key in self.diff['removed']:
527            del self.diff['removed'][key]
528
529    def items(self, diff_only):
530        """Returns an iterator to a list of (key, value) tuples, depending on
531        what is selected in res_index.changes and what is found in self.diff.
532        When diff_only is true, only return changed or new values (for diff and
533        edit). When it is false, return the original value for unchanged ones
534        (for writing file)."""
535
536        items = []
537        for key in self.source_keys():
538            found_diff = False
539            for change_t in res_index.changes:
540                if change_t == 'removed' or change_t not in self.diff: continue
541                if key in self.diff[change_t]:
542                    items.append((key, self.diff[change_t][key]))
543                    found_diff = True
544            if not found_diff and not diff_only and key in self.entries \
545               and not key in self.diff['removed']:
546                items.append((key, self.entries[key]))
547
548        return iter(items)
549
550    def diff_count(self):
551        """Returns a Counter object representing what's in self.diff"""
552        c = Counter()
553        for change_t in self.diff:
554            count = len(self.diff[change_t])
555            if count:
556                c[change_t] = count
557        return c
558
559    def lang(self):
560        """Source files have self.language empty, but they are in english"""
561        return self.language if self.language else 'en'
562
563    def clear(self, keep_entries = False):
564        if not keep_entries:
565            self.entries.clear()
566        self.diff.clear()
567
568    def changed(self):
569        """Returns true if there are pending change for the file depending on
570        what is selected in res_index.changes"""
571        for change_t in res_index.changes:
572            if change_t in self.diff:
573                return True
574        return False
575
576    def source_keys(self):
577        """Returns an ordered list of the keys of the source corresponding to
578        this resource file. This list is used as a reference when iterating
579        through keys. It helps keep the order consistent and translations can't
580        exist if there isn't a source associated to them anyway."""
581        keys = self.source_res.entries.keys()
582
583        # To allow submitting new quotes from another resource, they are sorted
584        if self.resource == 'quotes' and 'new' in self.diff:
585            for k in self.diff['new']:
586                if k not in keys:
587                    keys.append(k)
588            keys.sort()
589        return keys
590
591    def diff_txt(self, diff_format):
592        """When diff_format is True, returns a string with a diff for each new
593        or changed entry. When it is False, returns the new value instead (for
594        editing purpose)."""
595        diff_txt = ''
596        for (key, value) in self.items(True):
597            if key in self.entries:
598                orig = self.format_entry(key, self.entries[key])
599            else:
600                orig = ""
601            value = self.format_entry(key, value)
602            if diff_format:
603                diff_txt += diff(orig.splitlines(), value.splitlines()) + "\n"
604            else:
605                diff_txt += value
606            diff_txt += self.separator()
607
608        if 'removed' in res_index.changes and 'removed' in self.diff:
609            for k, v in self.diff['removed'].items():
610                value = self.format_entry(k, v)
611                if diff_format:
612                    diff_txt += diff(value.splitlines(), []) + "\n"
613                else:
614                    diff_txt += value
615                diff_txt += self.separator()
616
617        return diff_txt
618
619    def load(self):
620        if not self.entries and not self.diff:
621            self.read_file()
622
623    def read_file(self):
624        """Called by the subclasses to handle the basic checks. Returns the
625        content of the file (list of lines) to the subclass which does the
626        actual parsing."""
627        if not os.path.exists(self.path):
628            return []
629
630        # If the corresponding source file isn't loaded we load it first
631        if self.language and not len(self.source_keys()):
632            self.source_res.read_file()
633
634        # Don't reload the file if it hasn't changed since we loaded it before.
635        file_mtime = os.stat(self.path).st_mtime
636        if self.mtime == file_mtime:
637            self.clear(True)
638            return []
639        else:
640            self.clear()
641            self.mtime = file_mtime
642
643        return codecs.open(self.path, encoding='utf-8').readlines()
644
645    def merge_file(self):
646        """Iterate through the entries loaded from the file, convert them in a
647        raw format and store them in raw_entries"""
648        entries = raw_entries[(self.lang(), self.resource)]
649        entries.clear()
650        for (key, value) in self.entries.items():
651            entries[key] = self.raw_entry(value)
652
653    def update(self, update_removed_keys = True):
654        """Update the resource file with the content of raw_entries. New values
655        will be converted in the resource format and stored in the appropriate
656        diff dictionary by the __setitem__ methods"""
657        entries = raw_entries[(self.lang(), self.resource)]
658        for key in self.source_keys():
659            if key not in entries: continue
660            self[key] = entries[key]
661        if update_removed_keys and (self.lang() != 'en' or self.ext == 'ini'):
662            self.update_removed_keys()
663
664    def write_file(self):
665        """Write the content of the resource to a file"""
666        f = codecs.open(self.path, "w", encoding='utf-8')
667        f.write(self.header())
668        for key, e in self.items(False):
669            f.write(self.format_entry(key, e))
670            f.write(self.separator())
671        self.modified = True
672        self.mtime = 0
673
674    def update_removed_keys(self):
675        """If the resource has keys which are not present in the source, they
676        will be removed. Store them in self.diff['removed'] to show them in diff
677        and allow editing (useful to fix renamed keys)."""
678        entries = raw_entries[(self.lang(), self.resource)]
679        for k in self.entries.keys():
680            if k not in self.source_keys() or k not in entries:
681                self.diff['removed'][k] = self.entries[k]
682
683    def edit_file(self):
684        """Create a temporary file with the values of the changed keys, start
685        a text editor, then load the file."""
686        tmp = NamedTemporaryFile(prefix=self.language + "-" + self.resource,
687                                 suffix="." + self.ext, delete=False)
688        tmp.file.write(self.diff_txt(False).encode('utf-8'))
689        tmp.file.close()
690        EDITOR = os.environ.get('EDITOR','vim')
691        try:
692            call([EDITOR, tmp.name])
693        except OSError:
694            print >> sys.stderr, "Cannot start text editor." \
695                                 "Set the EDITOR environement variable."
696            return False
697        tmp_res = self.__class__(self.language, self.resource)
698        tmp_res.path = tmp.name
699        tmp_res.read_file()
700        tmp_res.merge_file()
701        os.remove(tmp.name)
702        self.update(False)
703        return True
704
705class TxtFile(ResourceFile):
706    """Subclass of ResourceFile to handle files in crawl's native format of
707    description files."""
708    def __init__(self, lang, res):
709        if lang:
710            self.entries = dict()
711            self.source_res = txt_files[('', res)]
712            self.lang_dir = lang[:2]
713        else:
714            self.entries = OrderedDict()
715            self.source_res = self
716            self.lang_dir = ''
717        self.key_comment = dict()
718        self.value_comment = dict()
719        self.ext = 'txt'
720        self.eac = lang in east_asian_languages
721        self.no_space = lang in no_space_languages
722        ResourceFile.__init__(self, lang, res)
723
724    def __setitem__(self, key, entry):
725        """Converts a generic entry in txt format then calls the base class
726        __setitem__ method to store it in the appropriate self.diff dict"""
727        value = ""
728        for tag, tag_value in entry.tags.items():
729            # If it has a quote tag, we store the new quote in its own entry
730            if tag == 'quote':
731                e = Entry()
732                e.value = tag_value
733                quote_res = txt_files[(self.language, 'quotes')]
734                quote_res.load()
735                quote_res[key] = e
736
737                # add the quote resource to the index
738                if 'quotes' not in res_index.resources:
739                    res_index.resources.append('quotes')
740
741                # If we're adding a foreign quote and the source doesn't have
742                # one, we also create it in the corresponding source
743                if self.language and key not in quote_res.source_res.entries:
744                    en_quote_res = txt_files[('', 'quotes')]
745                    en_quote_res.load()
746                    en_quote_res[key] = e
747
748                    # Add english to the index
749                    if 'en' not in res_index.languages:
750                        res_index.languages.insert(0, 'en')
751
752            elif tag_value is True:
753                value += ":%s\n" % tag
754            else:
755                value += ":%s %s\n" % (tag, tag_value)
756
757        if options.auto_fix:
758            raw_value = auto_fix(entry.value, self.lang())
759        else:
760            raw_value = entry.value
761
762        if entry['nowrap']:
763            value += raw_value
764        else:
765            value += wrap(raw_value, self.eac, self.no_space)
766
767        value += "\n"
768        ResourceFile.__setitem__(self, key, value)
769
770    def format_entry(self, key, value):
771        """Convert the key/value pair in crawl's native desc format"""
772        ret = self.key_comment.get(key, "")
773        ret += key + "\n\n"
774        ret += self.value_comment.get(key, "")
775        ret += value
776        return ret
777
778    def header(self):
779        """Added to the beginning of the file"""
780        return self.separator()
781
782    def separator(self):
783        """Separate entries in the file"""
784        return "%%%%\n"
785
786    def raw_entry(self, value):
787        """Convert a value in txt format to a raw entry."""
788        e = Entry()
789        for line in value.splitlines():
790            if len(line) > 1 and line[0] == ':' and line[1] != ' ':
791                l = line[1:].rstrip().split(' ', 1)
792                e[l[0]] = l[1] if len(l) == 2 else True
793            else:
794                e.value += line + "\n"
795        e.value = e.value.rstrip()
796        if not e['nowrap']:
797            e.value = unwrap(e.value, self.no_space)
798        return e
799
800    def read_file(self):
801        """Parse the content of a txt file and stores it in self.entries"""
802        te = TxtEntry()
803        for line in ResourceFile.read_file(self):
804            if line[0] == '#':
805                if te.key:
806                    te.value_comment += line
807                else:
808                    te.key_comment += line
809            elif txt_sep_re.match(line):
810                if te.key:
811                    te.save(self)
812            elif line[0] == '\n' and not te.value:
813                continue
814            elif not te.key:
815                te.key = line.strip()
816            else:
817                te.value += line
818
819        if te.key:
820            te.save(self)
821
822        return len(self.entries)
823
824    def search_removed_keys(self):
825        # No removed key in the source, it's the reference
826        if self.language:
827            ResourceFile.search_removed_keys(self)
828
829class IniFile(ResourceFile):
830    """Subclass of ResourceFile to handle files in ini format to be pushed to
831    or pulled from transifex."""
832    def __init__(self, lang, res):
833        self.entries = dict()
834        self.source_res = txt_files[('', res)]
835        self.ext = 'ini'
836        self.lang_dir = lang
837        ResourceFile.__init__(self, lang, res)
838
839    def __setitem__(self, key, e):
840        """Converts a generic entry in ini format then calls the base class
841        __setitem__ method to store it in the appropriate self.diff dict"""
842
843        # Delete entries with only a link. There's no point in translating them.
844        if len(e.value) > 1 and e.value[0] == '<' and e.value[-1] == '>'\
845           and e.value.find("\n") == -1 and e.value[1:].find("<") == -1:
846            if key in self.entries:
847                self.diff['removed'][key] = self.entries[key]
848                del self.entries[key]
849            return
850
851        value = ""
852        for tag, tag_value in e.tags.items():
853            if tag_value is True:
854                value += r":%s\n" % tag
855            else:
856                value += r":%s %s\n" % (tag, tag_value)
857        value += e.value.replace("\n", r'\n') + "\n"
858        ResourceFile.__setitem__(self, key, value)
859
860    def header(self):
861        return ""
862
863    def separator(self):
864        return ""
865
866    def format_entry(self, key, value):
867        """Convert the key/value pair in ini format"""
868        return "%s=%s" % (key, value)
869
870    def read_file(self):
871        """Parse the content of an ini file and stores it in self.entries"""
872        for line in ResourceFile.read_file(self):
873            if not line or line[0] == '#' or line.find('=') == -1: continue
874            (key, value) = line.split('=', 1)
875            self.entries[key] = value.replace('&quot;', '"').replace('\\\\', '\\')
876
877        return len(self.entries)
878
879    def raw_entry(self, value):
880        """Convert a value in ini format to a raw entry."""
881        e = Entry()
882        tag_name = ''
883        for line in value.rstrip().split(r'\n'):
884            if len(line) > 1 and line[0] == ':' and line[1] != ' ':
885                if not e.value:
886                    l = line[1:].split(' ', 1)
887                    e[l[0]] = l[1] if len(l) == 2 else True
888                else:
889                    tag_name = line[1:]
890            elif tag_name:
891                if e[tag_name]:
892                    e[tag_name] += "\n"
893                e[tag_name] += line
894            else:
895                e.value += line + "\n"
896        e.value = e.value.rstrip()
897        return e
898
899class ResourceCollection(OrderedDict):
900    """A container class holding a collection of resource files. It uses
901    res_index to iterate through its resources"""
902    def __init__(self):
903        OrderedDict.__init__(self)
904        self.diff_count = Counter()
905        self.git_count = Counter()
906        self.modified = False
907
908    def __iter__(self):
909        return iter([self[res_i] for res_i in res_index])
910
911    def __len__(self):
912        return len(res_index)
913
914    def clear(self):
915        self.diff_count.clear()
916        self.modified = False
917
918    def paths(self):
919        return [res.path for res in self]
920
921    def merge_files(self):
922        for i, res in enumerate(self, start=1):
923            progress("Merging %s files" % self.ext, i, len(self))
924            res.merge_file()
925
926    def load_files(self):
927        self.clear()
928        n_files = n_entries = 0
929        for i, res in enumerate(self, start=1):
930            progress("Loading %s files" % self.ext, i, len(self))
931            n = res.read_file()
932            if n:
933                n_files += 1
934                n_entries += n
935        if n_files:
936            print "Loaded %d entr%s from %d %s file%s" \
937                  % (n_entries, ["y", "ies"][n_entries!=1],
938                     n_files, self.ext, "s"[n_files==1:])
939
940    def update(self):
941        for i, res in enumerate(self, start=1):
942            progress("Updating %s files" % self.ext, i, len(self))
943            res.update()
944        self.update_diff_count()
945
946    def update_diff_count(self):
947        self.diff_count.clear()
948        for res in self:
949            self.diff_count += res.diff_count()
950        res_index.changes = self.diff_count.keys()
951
952    def diff(self, diff_format):
953        diff_text = ''
954        for res in self:
955            if res.changed():
956                diff_text += title(res.path) + "\n"
957                diff_text += res.diff_txt(diff_format) + "\n"
958        return diff_text
959
960    def show_diff(self):
961        diff_text = self.diff(True)
962        try:
963            Popen("less", stdin=PIPE).communicate(diff_text.encode('utf-8'))
964        except OSError:
965            print diff_text
966
967    def edit_files(self):
968        for res in self:
969            if res.changed():
970                if not res.edit_file():
971                    break
972        self.update_diff_count()
973
974    def write_files(self):
975        for res in self:
976            if res.changed() or options.force and list(res.items(False)):
977                res.write_file()
978                for change_t in res_index.changes:
979                    if change_t in res.diff:
980                        del res.diff[change_t]
981        self.update_diff_count()
982
983    def undo_changes(self):
984        for res in self:
985            res.clear(True)
986        self.diff_count.clear()
987
988class TxtCollection(ResourceCollection):
989    """Collection of txt files. It holds a few git methods"""
990    def __init__(self):
991        self.ext = 'txt'
992        ResourceCollection.__init__(self)
993
994    def __missing__(self, key):
995        self[key] = TxtFile(*key)
996        return self[key]
997
998    def refresh_state(self):
999        """Run git status and check the result for each file in the collection"""
1000        if not git: return
1001
1002        git_states = dict()
1003        self.git_count.clear()
1004        for line in Popen(["git", "status", "--porcelain"] + self.paths(),
1005                          stdout=PIPE).communicate()[0].splitlines():
1006            git_states[line[3:]] = line[0:2]
1007        for res in self:
1008            if res.git_path not in git_states:
1009                res.modified = res.staged = res.new = False
1010                continue
1011
1012            st = git_states[res.git_path]
1013            if st[0] == 'M' or st[0] == 'A':
1014                res.staged = True
1015                self.git_count['staged'] += 1
1016            if st[1] == 'M':
1017                res.modified = True
1018                self.git_count['modified'] += 1
1019            elif st == '??':
1020                res.new = True
1021                self.git_count['new'] += 1
1022
1023    def git_status(self):
1024        call(["git", "status"] + self.paths())
1025
1026    def git_add_hunks(self):
1027        self.git_add(True)
1028
1029    def git_add(self, hunks = False):
1030        files = []
1031        for res in self:
1032            if res.modified or res.new:
1033                files.append(res.path)
1034
1035        cmd_list = ['git', 'add']
1036        if hunks:
1037            cmd_list.append('-p')
1038        cmd_list += files
1039        call(cmd_list)
1040
1041    def git_reset(self):
1042        files = []
1043        for res in self:
1044            if res.modified:
1045                files.append(res.path)
1046            elif res.new:
1047                os.remove(res.path)
1048
1049        cmd_list = ['git', 'checkout']
1050        cmd_list += files
1051        call(cmd_list)
1052
1053class IniCollection(ResourceCollection):
1054    """Collection of ini files with methods to interface with the transifex
1055    client push and pull commands"""
1056    def __init__(self):
1057        self.ext = 'ini'
1058        ResourceCollection.__init__(self)
1059
1060    def __missing__(self, key):
1061        self[key] = IniFile(*key)
1062        return self[key]
1063
1064    def refresh_state(self):
1065        self.modified = False
1066        for res in self:
1067            if res.modified:
1068                self.modified = True
1069
1070    def tx_pull(self):
1071        tx_cmd = ['pull']
1072        if options.force:
1073            tx_cmd.append('-f')
1074        all_lang = res_index.is_default('languages')
1075        all_res = res_index.is_default('resources')
1076        if all_lang and all_res:
1077            call_tx(tx_cmd + ['-a'])
1078        elif all_res:
1079            for lang in res_index.languages:
1080                call_tx(tx_cmd + ['-l', lang])
1081        elif all_lang:
1082            for res in res_index.resources:
1083                call_tx(tx_cmd + ['-r', 'dcss.' + res])
1084        else:
1085            for res in self:
1086                call_tx(tx_cmd + ['-l', res.lang(), '-r', 'dcss.' + res.resource])
1087
1088    def tx_push(self):
1089        tx_push = ['push']
1090        if options.force:
1091            tx_push.append('-f')
1092        for res in self:
1093            if not res.modified: continue
1094            resource = ['-r', 'dcss.' + res.resource]
1095            language = ['-l', res.lang()]
1096            if not res.language:
1097                # We push the source then reset the fake translation resource
1098                ret = call_tx(tx_push + ['-s'] + resource)
1099                if self[('en', res.resource)].entries:
1100                    a = raw_input("Reset the %s fake translation (y/n)? " % res.resource).lower()
1101                    if a and a[0] == 'y':
1102                        call_tx(['delete', '-f'] + language + resource)
1103            else:
1104                ret = call_tx(tx_push + ['-t'] + language + resource)
1105            if ret == 0:
1106                res.modified = False
1107
1108class Menu(OrderedDict):
1109    """Create a simple text based interactive menu.
1110    The inherited OrderedDict is used to store groups of commands
1111    cmds keys are the command hotkey letter, values are either a function or a
1112    list whose first member is the function and the next ones are arguments.
1113    cmd is the command string, it can be used to queue several commands.
1114    res_files points to the current resource file collection which is being
1115    worked on."""
1116
1117    def __init__(self, cmd = ''):
1118        OrderedDict.__init__(self)
1119        self.cmds = dict()
1120        self.cmd = cmd
1121        self.menu_desc = ''
1122        self.res_files = txt_files
1123        self.show_res = len(res_index.languages) == 1
1124
1125    def __missing__(self, key):
1126        self[key] = []
1127        return self[key]
1128
1129    def change_summary(self):
1130        if not self.res_files.diff_count:
1131            print  "No changes\n"
1132            return
1133
1134        print "Change summary:"
1135        lang_total = defaultdict(Counter)
1136        padding_size = 5
1137
1138        for res in self.res_files:
1139            if not res.diff: continue
1140            if self.show_res:
1141                padding_size = max(padding_size, len(res.path))
1142            else:
1143                lang_total[res.lang()] += res.diff_count()
1144
1145        cur_lang = ''
1146        for res in self.res_files:
1147            if not res.diff: continue
1148            lang = res.lang()
1149            if lang != cur_lang and lang_total[lang]:
1150                print "%-*s %s" % (padding_size, lang,
1151                                   change_counter(lang_total[lang]))
1152            cur_lang = lang
1153            if self.show_res:
1154                print "%-*s %s" % (padding_size, res.path,
1155                                   change_counter(res.diff_count()))
1156
1157        print "%-*s %s" % (padding_size, 'Total',
1158                           change_counter(self.res_files.diff_count))
1159        print
1160
1161    def git_summary(self):
1162        if not self.res_files.git_count:
1163            return
1164
1165        padding_size = 5
1166        print "Git status:"
1167        for key, count in self.res_files.git_count.most_common():
1168            print "%-*s: %d" % (padding_size, key, count)
1169        print
1170
1171    def git_commit(self):
1172        call(['git', 'commit', '-e', '-s', '-m', '[Transifex]',
1173              '--author=' + options.commit_author])
1174
1175    def toggle_details(self):
1176        self.show_res = not self.show_res
1177
1178    def set_languages(self):
1179        res_index.set_languages()
1180        self.res_files.update_diff_count()
1181        self.show_res = len(res_index.languages) == 1
1182
1183    def set_resources(self):
1184        res_index.set_resources()
1185        self.res_files.update_diff_count()
1186
1187    def set_changes(self):
1188        """Creates a submenu to select which kind of change to work on."""
1189        submenu = Menu()
1190        lbl = "Select entries"
1191        change_ts = self.res_files.diff_count.keys()
1192        for type in change_ts + ['all']:
1193            cmd_lbl = '<' + type[0] + '>' + type[1:]
1194            if type == 'all':
1195                submenu.add_cmd(lbl, cmd_lbl, [res_index.set_changes, change_ts])
1196            else:
1197                submenu.add_cmd(lbl, cmd_lbl, [res_index.set_changes, [type]])
1198        submenu.build_menu_desc()
1199        submenu.show_menu()
1200
1201    def next_index(self, index_t):
1202        """When only one index is selected (language or resource), this commands
1203        allows to jump to the next one. It will search for one with pending
1204        changes. If none is found after having looped through all of them, we
1205        simply select the next one"""
1206        current = res_index.get_index(index_t)
1207        while 1:
1208            res_index.next_index(index_t)
1209            self.res_files.update_diff_count()
1210            if self.res_files.diff_count or current == res_index.get_index(index_t):
1211                break
1212
1213        # If we haven't found something with a change, we looped. In this case,
1214        # we advance one more time.
1215        if not self.res_files.diff_count:
1216            res_index.next_index(index_t)
1217
1218        res_index.print_index(index_t, True)
1219
1220    def add_cmd(self, group, label, cmd):
1221        """Adds a command to the menu. The label must contain a letter between
1222        chevrons which will be the command hotkey"""
1223        m = cmd_re.search(label)
1224        if not m: sys.exit("Invalid command: %s" % label)
1225        key = m.group(1)
1226        if key in self: sys.exit("Duplicate command for key %s: %s and %s" \
1227                                 % (key, label, self[key]))
1228        if system() != 'Windows':
1229            label = label.replace("<" + key  + ">", emphasize(key))
1230        self[group].append(label)
1231        self.cmds[key] = cmd
1232
1233    def build_main_menu(self):
1234        self.cmds.clear()
1235        self.clear()
1236        self.menu_desc = ''
1237
1238        lbl_cmds = "Commands"
1239        self.add_cmd(lbl_cmds, 'wrap <t>xt files', wrap_txt)
1240        self.add_cmd(lbl_cmds, '<m>erge ini files', merge_ini)
1241        self.add_cmd(lbl_cmds, 'update <i>ni files', create_ini)
1242        self.add_cmd(lbl_cmds, '<q>uit', sys.exit)
1243
1244        lbl_review = "Review changes"
1245        if self.res_files.diff_count or options.force:
1246            self.add_cmd(lbl_review, "<w>rite files", self.res_files.write_files)
1247        if self.res_files.diff_count:
1248            if self.show_res:
1249                self.add_cmd(lbl_review, "<v>iew languages", self.toggle_details)
1250            else:
1251                self.add_cmd(lbl_review, "<v>iew resources", self.toggle_details)
1252            self.add_cmd(lbl_review, "show <d>iff", self.res_files.show_diff)
1253            self.add_cmd(lbl_review, "<e>dit", self.res_files.edit_files)
1254            self.add_cmd(lbl_review, "e<x>punge changes", self.res_files.undo_changes)
1255
1256        lbl_select = "Select"
1257        self.add_cmd(lbl_select, "<l>anguages", self.set_languages)
1258        self.add_cmd(lbl_select, "<r>esources", self.set_resources)
1259        if len(self.res_files.diff_count) > 1:
1260            self.add_cmd(lbl_select, "chan<g>es", self.set_changes)
1261
1262        if len(res_index.resources) == 1:
1263            self.add_cmd(lbl_select, "<n>ext resource", [self.next_index, 'resources'])
1264        elif len(res_index.languages) == 1:
1265            self.add_cmd(lbl_select, "<n>ext language", [self.next_index, 'languages'])
1266
1267        if git:
1268            lbl_git = "Git"
1269            if self.res_files.git_count:
1270                self.add_cmd(lbl_git, "<s>tatus", self.res_files.git_status)
1271            if self.res_files.git_count['modified'] or self.res_files.git_count['new']:
1272                self.add_cmd(lbl_git, "<a>dd", self.res_files.git_add)
1273                self.add_cmd(lbl_git, "select <h>unks", self.res_files.git_add_hunks)
1274                self.add_cmd(lbl_git, "chec<k>out", self.res_files.git_reset)
1275            if self.res_files.git_count['staged']:
1276                self.add_cmd(lbl_git, "<c>ommit", self.git_commit)
1277
1278        if transifex:
1279            lbl_tx = "Transifex"
1280            self.add_cmd(lbl_tx, "<p>ull", ini_files.tx_pull)
1281            if self.res_files.modified:
1282                self.add_cmd(lbl_tx, "p<u>sh", self.res_files.tx_push)
1283
1284        self.build_menu_desc()
1285        print
1286        print self.change_summary()
1287        self.git_summary()
1288
1289    def build_menu_desc(self):
1290        for group, labels in self.items():
1291            self.menu_desc += "%s: %s" % (group, ", ".join(labels)) + "\n"
1292
1293    def show_menu(self):
1294        """It reads the command line argument and treat each letter as a command
1295        When there is no more command, it switches to interactive mode."""
1296        if not self.cmd:
1297            self.cmd = raw_input(self.menu_desc).lower()
1298        choice = self.cmd[:1]
1299        self.cmd = self.cmd[1:]
1300        if choice in self.cmds:
1301            func = self.cmds[choice]
1302            if isinstance(func, list):
1303                # If it's a list, then the first item is the function,
1304                # the other ones are arguments
1305                func[0](*func[1:])
1306            else:
1307                func()
1308        else:
1309            print "Huh?"
1310            self.cmd = ""
1311
1312    def main_menu(self):
1313        print res_index,
1314        while 1:
1315            self.res_files.refresh_state()
1316            self.build_main_menu()
1317            self.show_menu()
1318
1319wrapper_args = {
1320    'width' : 79,
1321    'break_on_hyphens' : False,
1322    'break_long_words' : False,
1323    'replace_whitespace' : False}
1324
1325wrapper = TextWrapper(**wrapper_args)
1326# Use hardcoded whitespaces instead of \s because the latter matches
1327# non-breaking spaces (see textwrap.py:30).
1328wrapper.wordsep_simple_re_uni = re.compile(r'([%s]+)' % _whitespace)
1329FWwrapper = FullWidthTextWrapper(**wrapper_args)
1330wrapper_args['no_space'] = True
1331FW_NS_wrapper = FullWidthTextWrapper(**wrapper_args)
1332
1333# We initialize the resource index early because we might need it if we have to
1334# initialize the transifex configuration.
1335res_index = ResourceIndex()
1336
1337# Can we use the transifex client?
1338try:
1339    call_tx([], True)
1340    transifex = True
1341except OSError:
1342    transifex = False
1343
1344# Is transifex configured?
1345if transifex:
1346    if not os.path.exists(tx_config):
1347        setup_transifex()
1348
1349# Can we use git?
1350try:
1351    call(['git'], stdout=open(os.devnull, 'wb'))
1352    git = True
1353except OSError:
1354    git = False
1355
1356# Create the global variables for managing resources.
1357txt_files = TxtCollection()
1358ini_files = IniCollection()
1359menu = Menu(cmd)
1360menu.main_menu()
1361