1#
2# Copyright 2004-2006,2008-2010 Zuza Software Foundation
3#
4# This file is part of the Translate Toolkit.
5#
6# This program is free software; you can redistribute it and/or modify
7# it under the terms of the GNU General Public License as published by
8# the Free Software Foundation; either version 2 of the License, or
9# (at your option) any later version.
10#
11# This program is distributed in the hope that it will be useful,
12# but WITHOUT ANY WARRANTY; without even the implied warranty of
13# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14# GNU General Public License for more details.
15#
16# You should have received a copy of the GNU General Public License
17# along with this program; if not, see <http://www.gnu.org/licenses/>.
18
19"""Insert debug messages into XLIFF and Gettext PO localization files.
20
21See: http://docs.translatehouse.org/projects/translate-toolkit/en/latest/commands/podebug.html
22for examples and usage instructions.
23"""
24
25import os
26import re
27from hashlib import md5
28
29from translate.convert import dtd2po
30from translate.storage import factory
31from translate.storage.placeables import StringElem, general, parse as rich_parse
32
33
34def add_prefix(prefix, stringelems):
35    for stringelem in stringelems:
36        for string in stringelem.flatten():
37            if len(string.sub) > 0:
38                string.sub[0] = prefix + string.sub[0]
39                break
40    return stringelems
41
42
43podebug_parsers = general.parsers
44podebug_parsers.remove(general.CapsPlaceable.parse)
45podebug_parsers.remove(general.CamelCasePlaceable.parse)
46
47
48class podebug:
49    def __init__(
50        self,
51        format=None,
52        rewritestyle=None,
53        ignoreoption=None,
54        preserveplaceholders=False,
55    ):
56        if format is None:
57            self.format = ""
58        else:
59            self.format = format
60        self.rewritefunc = getattr(self, "rewrite_%s" % rewritestyle, None)
61        self.ignorefunc = getattr(self, "ignore_%s" % ignoreoption, None)
62        self.preserveplaceholders = preserveplaceholders
63
64    def apply_to_translatables(self, string, func):
65        """Applies func to all translatable strings in string."""
66        string.map(
67            lambda e: e.apply_to_strings(func),
68            lambda e: e.isleaf() and e.istranslatable,
69        )
70
71    @classmethod
72    def rewritelist(cls):
73        return [
74            rewrite.replace("rewrite_", "")
75            for rewrite in dir(cls)
76            if rewrite.startswith("rewrite_")
77        ]
78
79    def _rewrite_prepend_append(self, string, prepend, append=None):
80        if append is None:
81            append = prepend
82        if not isinstance(string, StringElem):
83            string = StringElem(string)
84        string.sub.insert(0, prepend)
85        if str(string).endswith("\n"):
86            # Try and remove the last character from the tree
87            try:
88                lastnode = string.flatten()[-1]
89                if isinstance(lastnode.sub[-1], str):
90                    lastnode.sub[-1] = lastnode.sub[-1].rstrip("\n")
91            except IndexError:
92                pass
93            string.sub.append(append + "\n")
94        else:
95            string.sub.append(append)
96        return string
97
98    def rewrite_xxx(self, string):
99        return self._rewrite_prepend_append(string, "xxx")
100
101    def rewrite_bracket(self, string):
102        return self._rewrite_prepend_append(string, "[", "]")
103
104    def rewrite_en(self, string):
105        if not isinstance(string, StringElem):
106            string = StringElem(string)
107        return string
108
109    def rewrite_blank(self, string):
110        return StringElem("")
111
112    def rewrite_chef(self, string):
113        """Rewrite using Mock Swedish as made famous by Monty Python"""
114        if not isinstance(string, StringElem):
115            string = StringElem(string)
116        # From Dive into Python which itself got it elsewhere
117        # http://www.renderx.com/demos/examples/diveintopython.pdf
118        subs = (
119            (r"a([nu])", r"u\1"),
120            (r"A([nu])", r"U\1"),
121            (r"a\B", r"e"),
122            (r"A\B", r"E"),
123            (r"en\b", r"ee"),
124            (r"\Bew", r"oo"),
125            (r"\Be\b", r"e-a"),
126            (r"\be", r"i"),
127            (r"\bE", r"I"),
128            (r"\Bf", r"ff"),
129            (r"\Bir", r"ur"),
130            (r"(\w*?)i(\w*?)$", r"\1ee\2"),
131            (r"\bow", r"oo"),
132            (r"\bo", r"oo"),
133            (r"\bO", r"Oo"),
134            (r"the", r"zee"),
135            (r"The", r"Zee"),
136            (r"th\b", r"t"),
137            (r"\Btion", r"shun"),
138            (r"\Bu", r"oo"),
139            (r"\BU", r"Oo"),
140            (r"v", r"f"),
141            (r"V", r"F"),
142            (r"w", r"w"),
143            (r"W", r"W"),
144            (r"([a-z])[.]", r"\1. Bork Bork Bork!"),
145        )
146        for a, b in subs:
147            self.apply_to_translatables(string, lambda s: re.sub(a, b, s))
148        return string
149
150    PRESERVE_PLACEABLE_PARSERS = [
151        general.UrlPlaceable.parse,
152        general.EmailPlaceable.parse,
153        general.XMLTagPlaceable.parse,
154        general.DoubleAtPlaceable.parse,
155        general.BracePlaceable.parse,
156        general.PythonFormattingPlaceable.parse,
157    ]
158    # These parsers extract placeholders that should NOT be transformed during character-level rewrites
159    # when the preserveplaceholders flag is True. It is not the full set of placeable parsers available
160    # as some of them are not appropriate for this usage.
161
162    def transform_characters_preserving_placeholders(self, s, transform):
163        rich_string = rich_parse(s, self.PRESERVE_PLACEABLE_PARSERS)
164        string_elements = rich_string.depth_first(filter=lambda e: e.isleaf())
165
166        transformed = []
167
168        for element in string_elements:
169            if element.istranslatable:
170                for character in str(element):
171                    transformed.append(transform(character))
172            else:
173                transformed.append(element.sub[0])
174
175        return "".join(transformed)
176
177    REWRITE_UNICODE_MAP = (
178        "ȦƁƇḒḖƑƓĦĪĴĶĿḾȠǾƤɊŘŞŦŬṼẆẊẎẐ" + "[\\]^_`" + "ȧƀƈḓḗƒɠħīĵķŀḿƞǿƥɋřşŧŭṽẇẋẏẑ"
179    )
180
181    def rewrite_unicode(self, string):
182        """Convert to Unicode characters that look like the source string"""
183        if not isinstance(string, StringElem):
184            string = StringElem(string)
185
186        def transpose(char):
187            loc = ord(char) - 65
188            if loc < 0 or loc > 56:
189                return char
190            return self.REWRITE_UNICODE_MAP[loc]
191
192        def transformer(s):
193            if self.preserveplaceholders:
194                return self.transform_characters_preserving_placeholders(s, transpose)
195            else:
196                return "".join(transpose(c) for c in s)
197
198        self.apply_to_translatables(string, transformer)
199        return string
200
201    REWRITE_FLIPPED_MAP = (
202        "¡„#$%⅋,()⁎+´-˙/012Ɛᔭ59Ƚ86:;<=>¿@"
203        + "∀ԐↃᗡƎℲ⅁HIſӼ⅂WNOԀÒᴚS⊥∩ɅMX⅄Z"
204        + "[\\]ᵥ_,"
205        + "ɐqɔpǝɟƃɥıɾʞʅɯuodbɹsʇnʌʍxʎz"
206    )
207    # Brackets should be swapped if the string will be reversed in memory.
208    # If a right-to-left override is used, the brackets should be
209    # unchanged.
210    # Some alternatives:
211    #  D: ᗡ◖
212    #  K: Ж⋊Ӽ
213    #  @: Ҩ - Seems only related in Dejavu Sans
214    #  Q: Ὄ Ό Ὀ Ὃ Ὄ Ṑ Ò Ỏ
215    #  _: ‾ - left out for now for the sake of GTK accelerators
216
217    def rewrite_flipped(self, string):
218        """Convert the string to look flipped upside down."""
219        if not isinstance(string, StringElem):
220            string = StringElem(string)
221
222        def transpose(char):
223            loc = ord(char) - 33
224            if loc < 0 or loc > 89:
225                return char
226            return self.REWRITE_FLIPPED_MAP[loc]
227
228        def transformer(s):
229            if self.preserveplaceholders:
230                return "\u202e" + self.transform_characters_preserving_placeholders(
231                    s, transpose
232                )
233            else:
234                return "\u202e" + "".join(transpose(c) for c in s)
235            # To reverse instead of using the RTL override:
236            # return ''.join(reversed([transpose(c) for c in s]))
237
238        self.apply_to_translatables(string, transformer)
239        return string
240
241    @classmethod
242    def ignorelist(cls):
243        return [
244            ignore.replace("ignore_", "")
245            for ignore in dir(cls)
246            if ignore.startswith("ignore_")
247        ]
248
249    def ignore_openoffice(self, unit):
250        for location in unit.getlocations():
251            if location.startswith("Common.xcu#..Common.View.Localisation"):
252                return True
253            elif location.startswith("profile.lng#STR_DIR_MENU_NEW_"):
254                return True
255            elif location.startswith("profile.lng#STR_DIR_MENU_WIZARD_"):
256                return True
257        return False
258
259    def ignore_libreoffice(self, unit):
260        return self.ignore_openoffice(unit)
261
262    def ignore_mozilla(self, unit):
263        locations = unit.getlocations()
264        if len(locations) == 1 and locations[0].lower().endswith(".accesskey"):
265            return True
266        for location in locations:
267            if dtd2po.is_css_entity(location):
268                return True
269            if location in ["brandShortName", "brandFullName", "vendorShortName"]:
270                return True
271            if location.lower().endswith(".commandkey") or location.endswith(".key"):
272                return True
273        return False
274
275    def ignore_gtk(self, unit):
276        if unit.source == "default:LTR":
277            return True
278        return False
279
280    def ignore_kde(self, unit):
281        if unit.source == "LTR":
282            return True
283        return False
284
285    def convertunit(self, unit, prefix):
286        if self.ignorefunc:
287            if self.ignorefunc(unit):
288                return unit
289        if prefix.find("@hash_placeholder@") != -1:
290            if unit.getlocations():
291                hashable = unit.getlocations()[0]
292            else:
293                hashable = unit.source
294            prefix = prefix.replace(
295                "@hash_placeholder@",
296                md5(hashable.encode("utf-8")).hexdigest()[: self.hash_len],
297            )
298        if unit.istranslated():
299            rich_string = unit.rich_target
300        else:
301            rich_string = unit.rich_source
302        if not isinstance(rich_string, StringElem):
303            rich_string = [
304                rich_parse(string, podebug_parsers) for string in rich_string
305            ]
306        if self.rewritefunc:
307            rewritten = [self.rewritefunc(string) for string in rich_string]
308            if rewritten:
309                rich_string = rewritten
310        unit.rich_target = add_prefix(prefix, rich_string)
311        return unit
312
313    def convertstore(self, store):
314        prefix = self.format
315        for formatstr in re.findall("%[0-9c]*[sfFbBdh]", self.format):
316            if formatstr.endswith("s"):
317                formatted = self.shrinkfilename(store.filename)
318            elif formatstr.endswith("f"):
319                formatted = store.filename
320                formatted = os.path.splitext(formatted)[0]
321            elif formatstr.endswith("F"):
322                formatted = store.filename
323            elif formatstr.endswith("b"):
324                formatted = os.path.basename(store.filename)
325                formatted = os.path.splitext(formatted)[0]
326            elif formatstr.endswith("B"):
327                formatted = os.path.basename(store.filename)
328            elif formatstr.endswith("d"):
329                formatted = os.path.dirname(store.filename)
330            elif formatstr.endswith("h"):
331                try:
332                    self.hash_len = int(
333                        "".join(c for c in formatstr[1:-1] if c.isdigit())
334                    )
335                except ValueError:
336                    self.hash_len = 4
337                formatted = "@hash_placeholder@"
338            else:
339                continue
340            formatoptions = formatstr[1:-1]
341            if formatoptions and not formatstr.endswith("h"):
342                if "c" in formatoptions and formatted:
343                    formatted = formatted[0] + "".join(
344                        c for c in formatted[1:] if c.lower() not in "aeiou"
345                    )
346                length = "".join(c for c in formatoptions if c.isdigit())
347                if length:
348                    formatted = formatted[: int(length)]
349            prefix = prefix.replace(formatstr, formatted)
350        for unit in store.units:
351            if not unit.istranslatable():
352                continue
353            unit = self.convertunit(unit, prefix)
354        return store
355
356    def shrinkfilename(self, filename):
357        if filename.startswith("." + os.sep):
358            filename = filename.replace("." + os.sep, "", 1)
359        dirname = os.path.dirname(filename)
360        dirparts = dirname.split(os.sep)
361        if not dirparts:
362            dirshrunk = ""
363        else:
364            dirshrunk = dirparts[0][:4] + "-"
365            if len(dirparts) > 1:
366                dirshrunk += "".join(dirpart[0] for dirpart in dirparts[1:]) + "-"
367        baseshrunk = os.path.basename(filename)[:4]
368        if "." in baseshrunk:
369            baseshrunk = baseshrunk[: baseshrunk.find(".")]
370        return dirshrunk + baseshrunk
371
372
373def convertpo(
374    inputfile,
375    outputfile,
376    templatefile,
377    format=None,
378    rewritestyle=None,
379    ignoreoption=None,
380    preserveplaceholders=None,
381):
382    """Reads in inputfile, changes it to have debug strings, writes to outputfile."""
383    # note that templatefile is not used, but it is required by the converter...
384    inputstore = factory.getobject(inputfile)
385    if inputstore.isempty():
386        return 0
387    convertor = podebug(
388        format=format,
389        rewritestyle=rewritestyle,
390        ignoreoption=ignoreoption,
391        preserveplaceholders=preserveplaceholders,
392    )
393    outputstore = convertor.convertstore(inputstore)
394    outputstore.serialize(outputfile)
395    return 1
396
397
398def main():
399    from translate.convert import convert
400
401    formats = {
402        "po": ("po", convertpo),
403        "pot": ("po", convertpo),
404        "xlf": ("xlf", convertpo),
405        "xliff": ("xliff", convertpo),
406        "tmx": ("tmx", convertpo),
407    }
408    parser = convert.ConvertOptionParser(formats, description=__doc__)
409    # TODO: add documentation on format strings...
410    parser.add_option(
411        "-f", "--format", dest="format", default="", help="specify format string"
412    )
413    parser.add_option(
414        "",
415        "--rewrite",
416        dest="rewritestyle",
417        type="choice",
418        choices=podebug.rewritelist(),
419        metavar="STYLE",
420        help="the translation rewrite style: %s" % ", ".join(podebug.rewritelist()),
421    )
422    parser.add_option(
423        "",
424        "--ignore",
425        dest="ignoreoption",
426        type="choice",
427        choices=podebug.ignorelist(),
428        metavar="APPLICATION",
429        help="apply tagging ignore rules for the given application: %s"
430        % ", ".join(podebug.ignorelist()),
431    )
432    parser.add_option(
433        "",
434        "--preserveplaceholders",
435        dest="preserveplaceholders",
436        default=False,
437        action="store_true",
438        help="attempt to exclude characters that are part of placeholders when performing character-level"
439        " rewrites so that consuming applications can still use the placeholders to generate final "
440        "output",
441    )
442    parser.passthrough.append("format")
443    parser.passthrough.append("rewritestyle")
444    parser.passthrough.append("ignoreoption")
445    parser.passthrough.append("preserveplaceholders")
446    parser.run()
447
448
449if __name__ == "__main__":
450    main()
451