1# 2# Copyright 2004-2006,2008-2010 Zuza Software Foundation 3# 4# This file is part of the Translate Toolkit. 5# 6# This program is free software; you can redistribute it and/or modify 7# it under the terms of the GNU General Public License as published by 8# the Free Software Foundation; either version 2 of the License, or 9# (at your option) any later version. 10# 11# This program is distributed in the hope that it will be useful, 12# but WITHOUT ANY WARRANTY; without even the implied warranty of 13# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14# GNU General Public License for more details. 15# 16# You should have received a copy of the GNU General Public License 17# along with this program; if not, see <http://www.gnu.org/licenses/>. 18 19"""Insert debug messages into XLIFF and Gettext PO localization files. 20 21See: http://docs.translatehouse.org/projects/translate-toolkit/en/latest/commands/podebug.html 22for examples and usage instructions. 23""" 24 25import os 26import re 27from hashlib import md5 28 29from translate.convert import dtd2po 30from translate.storage import factory 31from translate.storage.placeables import StringElem, general, parse as rich_parse 32 33 34def add_prefix(prefix, stringelems): 35 for stringelem in stringelems: 36 for string in stringelem.flatten(): 37 if len(string.sub) > 0: 38 string.sub[0] = prefix + string.sub[0] 39 break 40 return stringelems 41 42 43podebug_parsers = general.parsers 44podebug_parsers.remove(general.CapsPlaceable.parse) 45podebug_parsers.remove(general.CamelCasePlaceable.parse) 46 47 48class podebug: 49 def __init__( 50 self, 51 format=None, 52 rewritestyle=None, 53 ignoreoption=None, 54 preserveplaceholders=False, 55 ): 56 if format is None: 57 self.format = "" 58 else: 59 self.format = format 60 self.rewritefunc = getattr(self, "rewrite_%s" % rewritestyle, None) 61 self.ignorefunc = getattr(self, "ignore_%s" % ignoreoption, None) 62 self.preserveplaceholders = preserveplaceholders 63 64 def apply_to_translatables(self, string, func): 65 """Applies func to all translatable strings in string.""" 66 string.map( 67 lambda e: e.apply_to_strings(func), 68 lambda e: e.isleaf() and e.istranslatable, 69 ) 70 71 @classmethod 72 def rewritelist(cls): 73 return [ 74 rewrite.replace("rewrite_", "") 75 for rewrite in dir(cls) 76 if rewrite.startswith("rewrite_") 77 ] 78 79 def _rewrite_prepend_append(self, string, prepend, append=None): 80 if append is None: 81 append = prepend 82 if not isinstance(string, StringElem): 83 string = StringElem(string) 84 string.sub.insert(0, prepend) 85 if str(string).endswith("\n"): 86 # Try and remove the last character from the tree 87 try: 88 lastnode = string.flatten()[-1] 89 if isinstance(lastnode.sub[-1], str): 90 lastnode.sub[-1] = lastnode.sub[-1].rstrip("\n") 91 except IndexError: 92 pass 93 string.sub.append(append + "\n") 94 else: 95 string.sub.append(append) 96 return string 97 98 def rewrite_xxx(self, string): 99 return self._rewrite_prepend_append(string, "xxx") 100 101 def rewrite_bracket(self, string): 102 return self._rewrite_prepend_append(string, "[", "]") 103 104 def rewrite_en(self, string): 105 if not isinstance(string, StringElem): 106 string = StringElem(string) 107 return string 108 109 def rewrite_blank(self, string): 110 return StringElem("") 111 112 def rewrite_chef(self, string): 113 """Rewrite using Mock Swedish as made famous by Monty Python""" 114 if not isinstance(string, StringElem): 115 string = StringElem(string) 116 # From Dive into Python which itself got it elsewhere 117 # http://www.renderx.com/demos/examples/diveintopython.pdf 118 subs = ( 119 (r"a([nu])", r"u\1"), 120 (r"A([nu])", r"U\1"), 121 (r"a\B", r"e"), 122 (r"A\B", r"E"), 123 (r"en\b", r"ee"), 124 (r"\Bew", r"oo"), 125 (r"\Be\b", r"e-a"), 126 (r"\be", r"i"), 127 (r"\bE", r"I"), 128 (r"\Bf", r"ff"), 129 (r"\Bir", r"ur"), 130 (r"(\w*?)i(\w*?)$", r"\1ee\2"), 131 (r"\bow", r"oo"), 132 (r"\bo", r"oo"), 133 (r"\bO", r"Oo"), 134 (r"the", r"zee"), 135 (r"The", r"Zee"), 136 (r"th\b", r"t"), 137 (r"\Btion", r"shun"), 138 (r"\Bu", r"oo"), 139 (r"\BU", r"Oo"), 140 (r"v", r"f"), 141 (r"V", r"F"), 142 (r"w", r"w"), 143 (r"W", r"W"), 144 (r"([a-z])[.]", r"\1. Bork Bork Bork!"), 145 ) 146 for a, b in subs: 147 self.apply_to_translatables(string, lambda s: re.sub(a, b, s)) 148 return string 149 150 PRESERVE_PLACEABLE_PARSERS = [ 151 general.UrlPlaceable.parse, 152 general.EmailPlaceable.parse, 153 general.XMLTagPlaceable.parse, 154 general.DoubleAtPlaceable.parse, 155 general.BracePlaceable.parse, 156 general.PythonFormattingPlaceable.parse, 157 ] 158 # These parsers extract placeholders that should NOT be transformed during character-level rewrites 159 # when the preserveplaceholders flag is True. It is not the full set of placeable parsers available 160 # as some of them are not appropriate for this usage. 161 162 def transform_characters_preserving_placeholders(self, s, transform): 163 rich_string = rich_parse(s, self.PRESERVE_PLACEABLE_PARSERS) 164 string_elements = rich_string.depth_first(filter=lambda e: e.isleaf()) 165 166 transformed = [] 167 168 for element in string_elements: 169 if element.istranslatable: 170 for character in str(element): 171 transformed.append(transform(character)) 172 else: 173 transformed.append(element.sub[0]) 174 175 return "".join(transformed) 176 177 REWRITE_UNICODE_MAP = ( 178 "ȦƁƇḒḖƑƓĦĪĴĶĿḾȠǾƤɊŘŞŦŬṼẆẊẎẐ" + "[\\]^_`" + "ȧƀƈḓḗƒɠħīĵķŀḿƞǿƥɋřşŧŭṽẇẋẏẑ" 179 ) 180 181 def rewrite_unicode(self, string): 182 """Convert to Unicode characters that look like the source string""" 183 if not isinstance(string, StringElem): 184 string = StringElem(string) 185 186 def transpose(char): 187 loc = ord(char) - 65 188 if loc < 0 or loc > 56: 189 return char 190 return self.REWRITE_UNICODE_MAP[loc] 191 192 def transformer(s): 193 if self.preserveplaceholders: 194 return self.transform_characters_preserving_placeholders(s, transpose) 195 else: 196 return "".join(transpose(c) for c in s) 197 198 self.apply_to_translatables(string, transformer) 199 return string 200 201 REWRITE_FLIPPED_MAP = ( 202 "¡„#$%⅋,()⁎+´-˙/012Ɛᔭ59Ƚ86:;<=>¿@" 203 + "∀ԐↃᗡƎℲ⅁HIſӼ⅂WNOԀÒᴚS⊥∩ɅMX⅄Z" 204 + "[\\]ᵥ_," 205 + "ɐqɔpǝɟƃɥıɾʞʅɯuodbɹsʇnʌʍxʎz" 206 ) 207 # Brackets should be swapped if the string will be reversed in memory. 208 # If a right-to-left override is used, the brackets should be 209 # unchanged. 210 # Some alternatives: 211 # D: ᗡ◖ 212 # K: Ж⋊Ӽ 213 # @: Ҩ - Seems only related in Dejavu Sans 214 # Q: Ὄ Ό Ὀ Ὃ Ὄ Ṑ Ò Ỏ 215 # _: ‾ - left out for now for the sake of GTK accelerators 216 217 def rewrite_flipped(self, string): 218 """Convert the string to look flipped upside down.""" 219 if not isinstance(string, StringElem): 220 string = StringElem(string) 221 222 def transpose(char): 223 loc = ord(char) - 33 224 if loc < 0 or loc > 89: 225 return char 226 return self.REWRITE_FLIPPED_MAP[loc] 227 228 def transformer(s): 229 if self.preserveplaceholders: 230 return "\u202e" + self.transform_characters_preserving_placeholders( 231 s, transpose 232 ) 233 else: 234 return "\u202e" + "".join(transpose(c) for c in s) 235 # To reverse instead of using the RTL override: 236 # return ''.join(reversed([transpose(c) for c in s])) 237 238 self.apply_to_translatables(string, transformer) 239 return string 240 241 @classmethod 242 def ignorelist(cls): 243 return [ 244 ignore.replace("ignore_", "") 245 for ignore in dir(cls) 246 if ignore.startswith("ignore_") 247 ] 248 249 def ignore_openoffice(self, unit): 250 for location in unit.getlocations(): 251 if location.startswith("Common.xcu#..Common.View.Localisation"): 252 return True 253 elif location.startswith("profile.lng#STR_DIR_MENU_NEW_"): 254 return True 255 elif location.startswith("profile.lng#STR_DIR_MENU_WIZARD_"): 256 return True 257 return False 258 259 def ignore_libreoffice(self, unit): 260 return self.ignore_openoffice(unit) 261 262 def ignore_mozilla(self, unit): 263 locations = unit.getlocations() 264 if len(locations) == 1 and locations[0].lower().endswith(".accesskey"): 265 return True 266 for location in locations: 267 if dtd2po.is_css_entity(location): 268 return True 269 if location in ["brandShortName", "brandFullName", "vendorShortName"]: 270 return True 271 if location.lower().endswith(".commandkey") or location.endswith(".key"): 272 return True 273 return False 274 275 def ignore_gtk(self, unit): 276 if unit.source == "default:LTR": 277 return True 278 return False 279 280 def ignore_kde(self, unit): 281 if unit.source == "LTR": 282 return True 283 return False 284 285 def convertunit(self, unit, prefix): 286 if self.ignorefunc: 287 if self.ignorefunc(unit): 288 return unit 289 if prefix.find("@hash_placeholder@") != -1: 290 if unit.getlocations(): 291 hashable = unit.getlocations()[0] 292 else: 293 hashable = unit.source 294 prefix = prefix.replace( 295 "@hash_placeholder@", 296 md5(hashable.encode("utf-8")).hexdigest()[: self.hash_len], 297 ) 298 if unit.istranslated(): 299 rich_string = unit.rich_target 300 else: 301 rich_string = unit.rich_source 302 if not isinstance(rich_string, StringElem): 303 rich_string = [ 304 rich_parse(string, podebug_parsers) for string in rich_string 305 ] 306 if self.rewritefunc: 307 rewritten = [self.rewritefunc(string) for string in rich_string] 308 if rewritten: 309 rich_string = rewritten 310 unit.rich_target = add_prefix(prefix, rich_string) 311 return unit 312 313 def convertstore(self, store): 314 prefix = self.format 315 for formatstr in re.findall("%[0-9c]*[sfFbBdh]", self.format): 316 if formatstr.endswith("s"): 317 formatted = self.shrinkfilename(store.filename) 318 elif formatstr.endswith("f"): 319 formatted = store.filename 320 formatted = os.path.splitext(formatted)[0] 321 elif formatstr.endswith("F"): 322 formatted = store.filename 323 elif formatstr.endswith("b"): 324 formatted = os.path.basename(store.filename) 325 formatted = os.path.splitext(formatted)[0] 326 elif formatstr.endswith("B"): 327 formatted = os.path.basename(store.filename) 328 elif formatstr.endswith("d"): 329 formatted = os.path.dirname(store.filename) 330 elif formatstr.endswith("h"): 331 try: 332 self.hash_len = int( 333 "".join(c for c in formatstr[1:-1] if c.isdigit()) 334 ) 335 except ValueError: 336 self.hash_len = 4 337 formatted = "@hash_placeholder@" 338 else: 339 continue 340 formatoptions = formatstr[1:-1] 341 if formatoptions and not formatstr.endswith("h"): 342 if "c" in formatoptions and formatted: 343 formatted = formatted[0] + "".join( 344 c for c in formatted[1:] if c.lower() not in "aeiou" 345 ) 346 length = "".join(c for c in formatoptions if c.isdigit()) 347 if length: 348 formatted = formatted[: int(length)] 349 prefix = prefix.replace(formatstr, formatted) 350 for unit in store.units: 351 if not unit.istranslatable(): 352 continue 353 unit = self.convertunit(unit, prefix) 354 return store 355 356 def shrinkfilename(self, filename): 357 if filename.startswith("." + os.sep): 358 filename = filename.replace("." + os.sep, "", 1) 359 dirname = os.path.dirname(filename) 360 dirparts = dirname.split(os.sep) 361 if not dirparts: 362 dirshrunk = "" 363 else: 364 dirshrunk = dirparts[0][:4] + "-" 365 if len(dirparts) > 1: 366 dirshrunk += "".join(dirpart[0] for dirpart in dirparts[1:]) + "-" 367 baseshrunk = os.path.basename(filename)[:4] 368 if "." in baseshrunk: 369 baseshrunk = baseshrunk[: baseshrunk.find(".")] 370 return dirshrunk + baseshrunk 371 372 373def convertpo( 374 inputfile, 375 outputfile, 376 templatefile, 377 format=None, 378 rewritestyle=None, 379 ignoreoption=None, 380 preserveplaceholders=None, 381): 382 """Reads in inputfile, changes it to have debug strings, writes to outputfile.""" 383 # note that templatefile is not used, but it is required by the converter... 384 inputstore = factory.getobject(inputfile) 385 if inputstore.isempty(): 386 return 0 387 convertor = podebug( 388 format=format, 389 rewritestyle=rewritestyle, 390 ignoreoption=ignoreoption, 391 preserveplaceholders=preserveplaceholders, 392 ) 393 outputstore = convertor.convertstore(inputstore) 394 outputstore.serialize(outputfile) 395 return 1 396 397 398def main(): 399 from translate.convert import convert 400 401 formats = { 402 "po": ("po", convertpo), 403 "pot": ("po", convertpo), 404 "xlf": ("xlf", convertpo), 405 "xliff": ("xliff", convertpo), 406 "tmx": ("tmx", convertpo), 407 } 408 parser = convert.ConvertOptionParser(formats, description=__doc__) 409 # TODO: add documentation on format strings... 410 parser.add_option( 411 "-f", "--format", dest="format", default="", help="specify format string" 412 ) 413 parser.add_option( 414 "", 415 "--rewrite", 416 dest="rewritestyle", 417 type="choice", 418 choices=podebug.rewritelist(), 419 metavar="STYLE", 420 help="the translation rewrite style: %s" % ", ".join(podebug.rewritelist()), 421 ) 422 parser.add_option( 423 "", 424 "--ignore", 425 dest="ignoreoption", 426 type="choice", 427 choices=podebug.ignorelist(), 428 metavar="APPLICATION", 429 help="apply tagging ignore rules for the given application: %s" 430 % ", ".join(podebug.ignorelist()), 431 ) 432 parser.add_option( 433 "", 434 "--preserveplaceholders", 435 dest="preserveplaceholders", 436 default=False, 437 action="store_true", 438 help="attempt to exclude characters that are part of placeholders when performing character-level" 439 " rewrites so that consuming applications can still use the placeholders to generate final " 440 "output", 441 ) 442 parser.passthrough.append("format") 443 parser.passthrough.append("rewritestyle") 444 parser.passthrough.append("ignoreoption") 445 parser.passthrough.append("preserveplaceholders") 446 parser.run() 447 448 449if __name__ == "__main__": 450 main() 451