1# -*- coding: utf-8 -*- 2# 3# Copyright (C) 2011-2021 Edgewall Software 4# All rights reserved. 5# 6# This software is licensed as described in the file COPYING, which 7# you should have received as part of this distribution. The terms 8# are also available at https://trac.edgewall.org/wiki/TracLicense. 9# 10# This software consists of voluntary contributions made by many 11# individuals. For the exact contribution history, see the revision 12# history and logs, available at https://trac.edgewall.org/log/. 13 14"""Extra commands for setup.py. 15 16We provide a few extra command classes in `l10n_cmdclass` for 17localization tasks. We also modify the standard commands 18`distutils.command.build` and `setuptools.command.install_lib` classes 19in order to call the l10n commands for compiling catalogs at the right 20time during install. 21 22""" 23 24from html.parser import HTMLParser 25import io 26import os 27import re 28from tokenize import generate_tokens, COMMENT, NAME, OP, STRING 29 30from jinja2.ext import babel_extract as jinja2_extractor 31 32from distutils import log as distlog 33from distutils.cmd import Command 34from distutils.command.build import build as _build 35from distutils.errors import DistutilsOptionError 36from setuptools.command.install_lib import install_lib as _install_lib 37 38 39def simplify_message(message): 40 """Transforms an extracted messsage (string or tuple) into one in 41 which the repeated white-space has been simplified to a single 42 space. 43 44 """ 45 tuple_len = len(message) if isinstance(message, tuple) else 0 46 if tuple_len: 47 message = message[0] 48 message = ' '.join(message.split()) 49 if tuple_len: 50 message = (message,) + (None,) * (tuple_len - 1) 51 return message 52 53 54class ScriptExtractor(HTMLParser): 55 def __init__(self, out): 56 HTMLParser.__init__(self) 57 self.out = out 58 self.in_javascript = False 59 60 def handle_starttag(self, tag, attrs): 61 if tag == 'script': 62 self.in_javascript = True 63 64 def handle_startendtag(self, tag, attrs): 65 self.in_javascript = False 66 67 def handle_charref(self, name): 68 if self.in_javascript: 69 self.out.write('&#%s;' % name) 70 71 def handle_entityref(self, name): 72 if self.in_javascript: 73 self.out.write('&%s;' % name) 74 75 def handle_data(self, data): 76 if self.in_javascript: 77 self.out.write(data) 78 79 def handle_endtag(self, tag): 80 self.in_javascript = False 81 82 def no_op(*args, **kwargs): 83 pass 84 85 handle_comment = handle_decl = handle_pi = no_op 86 87 88try: 89 from babel.messages.catalog import TranslationError 90 from babel.messages.extract import extract_javascript 91 from babel.messages.frontend import extract_messages, init_catalog, \ 92 compile_catalog, update_catalog 93 from babel.messages.pofile import read_po 94 from babel.support import Translations 95 from babel.util import parse_encoding 96 97 _DEFAULT_KWARGS_MAPS = { 98 'Option': {'doc': 4}, 99 'BoolOption': {'doc': 4}, 100 'IntOption': {'doc': 4}, 101 'FloatOption': {'doc': 4}, 102 'ListOption': {'doc': 6}, 103 'ChoiceOption': {'doc': 4}, 104 'PathOption': {'doc': 4}, 105 'ExtensionOption': {'doc': 5}, 106 'OrderedExtensionsOption': {'doc': 6}, 107 } 108 109 _DEFAULT_CLEANDOC_KEYWORDS = ( 110 'ConfigSection', 'Option', 'BoolOption', 'IntOption', 'FloatOption', 111 'ListOption', 'ChoiceOption', 'PathOption', 'ExtensionOption', 112 'OrderedExtensionsOption', 'cleandoc_', 113 ) 114 115 def extract_python(fileobj, keywords, comment_tags, options): 116 """Extract messages from Python source code, This is patched 117 extract_python from Babel to support keyword argument mapping. 118 119 `kwargs_maps` option: names of keyword arguments will be mapping to 120 index of messages array. 121 122 `cleandoc_keywords` option: a list of keywords to clean up the 123 extracted messages with `cleandoc`. 124 """ 125 from trac.util.text import cleandoc 126 127 funcname = lineno = message_lineno = None 128 kwargs_maps = func_kwargs_map = None 129 call_stack = -1 130 buf = [] 131 messages = [] 132 messages_kwargs = {} 133 translator_comments = [] 134 in_def = in_translator_comments = False 135 comment_tag = None 136 137 encoding = str(parse_encoding(fileobj) or 138 options.get('encoding', 'iso-8859-1')) 139 kwargs_maps = _DEFAULT_KWARGS_MAPS.copy() 140 if 'kwargs_maps' in options: 141 kwargs_maps.update(options['kwargs_maps']) 142 cleandoc_keywords = set(_DEFAULT_CLEANDOC_KEYWORDS) 143 if 'cleandoc_keywords' in options: 144 cleandoc_keywords.update(options['cleandoc_keywords']) 145 146 tokens = generate_tokens(fileobj.readline) 147 tok = value = None 148 for _ in tokens: 149 prev_tok, prev_value = tok, value 150 tok, value, (lineno, _), _, _ = _ 151 if call_stack == -1 and tok == NAME and value in ('def', 'class'): 152 in_def = True 153 elif tok == OP and value == '(': 154 if in_def: 155 # Avoid false positives for declarations such as: 156 # def gettext(arg='message'): 157 in_def = False 158 continue 159 if funcname: 160 message_lineno = lineno 161 call_stack += 1 162 kwarg_name = None 163 elif in_def and tok == OP and value == ':': 164 # End of a class definition without parens 165 in_def = False 166 continue 167 elif call_stack == -1 and tok == COMMENT: 168 # Strip the comment token from the line 169 value = value.decode(encoding)[1:].strip() 170 if in_translator_comments and \ 171 translator_comments[-1][0] == lineno - 1: 172 # We're already inside a translator comment, continue 173 # appending 174 translator_comments.append((lineno, value)) 175 continue 176 # If execution reaches this point, let's see if comment line 177 # starts with one of the comment tags 178 for comment_tag in comment_tags: 179 if value.startswith(comment_tag): 180 in_translator_comments = True 181 translator_comments.append((lineno, value)) 182 break 183 elif funcname and call_stack == 0: 184 if tok == OP and value == ')': 185 if buf: 186 message = ''.join(buf) 187 if kwarg_name in func_kwargs_map: 188 messages_kwargs[kwarg_name] = message 189 else: 190 messages.append(message) 191 del buf[:] 192 else: 193 messages.append(None) 194 195 for name, message in messages_kwargs.items(): 196 if name not in func_kwargs_map: 197 continue 198 index = func_kwargs_map[name] 199 while index >= len(messages): 200 messages.append(None) 201 messages[index - 1] = message 202 203 if funcname in cleandoc_keywords: 204 messages = [m and cleandoc(m) for m in messages] 205 if len(messages) > 1: 206 messages = tuple(messages) 207 else: 208 messages = messages[0] 209 # Comments don't apply unless they immediately preceed the 210 # message 211 if translator_comments and \ 212 translator_comments[-1][0] < message_lineno - 1: 213 translator_comments = [] 214 215 yield (message_lineno, funcname, messages, 216 [comment[1] for comment in translator_comments]) 217 218 funcname = lineno = message_lineno = None 219 kwarg_name = func_kwargs_map = None 220 call_stack = -1 221 messages = [] 222 messages_kwargs = {} 223 translator_comments = [] 224 in_translator_comments = False 225 elif tok == STRING: 226 # Unwrap quotes in a safe manner, maintaining the string's 227 # encoding 228 # https://sourceforge.net/tracker/?func=detail&atid=355470& 229 # aid=617979&group_id=5470 230 value = eval('# coding=%s\n%s' % (encoding, value), 231 {'__builtins__':{}}, {}) 232 if isinstance(value, bytes): 233 value = value.decode(encoding) 234 buf.append(value) 235 elif tok == OP and value == '=' and prev_tok == NAME: 236 kwarg_name = prev_value 237 elif tok == OP and value == ',': 238 if buf: 239 message = ''.join(buf) 240 if kwarg_name in func_kwargs_map: 241 messages_kwargs[kwarg_name] = message 242 else: 243 messages.append(message) 244 del buf[:] 245 else: 246 messages.append(None) 247 kwarg_name = None 248 if translator_comments: 249 # We have translator comments, and since we're on a 250 # comma(,) user is allowed to break into a new line 251 # Let's increase the last comment's lineno in order 252 # for the comment to still be a valid one 253 old_lineno, old_comment = translator_comments.pop() 254 translator_comments.append((old_lineno+1, old_comment)) 255 elif call_stack > 0 and tok == OP and value == ')': 256 call_stack -= 1 257 elif funcname and call_stack == -1: 258 funcname = func_kwargs_map = kwarg_name = None 259 elif tok == NAME and value in keywords: 260 funcname = value 261 func_kwargs_map = kwargs_maps.get(funcname, {}) 262 kwarg_name = None 263 264 265 def extract_javascript_script(fileobj, keywords, comment_tags, options): 266 """Extract messages from Javascript embedded in <script> tags. 267 268 Select <script type="javascript/text"> tags and delegate to 269 `extract_javascript`. 270 """ 271 if not fileobj.name: 272 return [] 273 out = io.StringIO() 274 extractor = ScriptExtractor(out) 275 extractor.feed(str(fileobj.read(), 'utf-8')) 276 extractor.close() 277 out.seek(0) 278 return extract_javascript(out, keywords, comment_tags, options) 279 280 281 def extract_html(fileobj, keywords, comment_tags, options): 282 """Extracts translatable texts from templates. 283 284 We simplify white-space found in translatable texts collected 285 via the ``gettext`` function (which is what the ``trans`` 286 directives use), otherwise we would have near duplicates 287 (e.g. admin.html, prefs.html). 288 289 We assume the template function ``gettext`` will do the same 290 before trying to fetch the translation from the catalog. 291 292 """ 293 if fileobj: 294 extractor = jinja2_extractor 295 fileobj.seek(0) 296 for m in extractor(fileobj, keywords, comment_tags, options): 297 # lineno, func, message, comments = m 298 if m[1] in ('gettext', None): 299 # Jinja2 trans 300 yield m[0], m[1], simplify_message(m[2]), m[3] 301 else: 302 yield m 303 304 305 extract_text = extract_html 306 307 308 class generate_messages_js(Command): 309 """Generating message javascripts command for use ``setup.py`` scripts. 310 """ 311 312 description = 'generate message javascript files from binary MO files' 313 user_options = [ 314 ('domain=', 'D', 315 "domain of PO file (default 'messages')"), 316 ('input-dir=', 'I', 317 'path to base directory containing the catalogs'), 318 ('input-file=', 'i', 319 'name of the input file'), 320 ('output-dir=', 'O', 321 "name of the output directory"), 322 ('output-file=', 'o', 323 "name of the output file (default " 324 "'<output_dir>/<locale>.js')"), 325 ('locale=', 'l', 326 'locale of the catalog to compile'), 327 ] 328 329 def initialize_options(self): 330 self.domain = 'messages' 331 self.input_dir = None 332 self.input_file = None 333 self.output_dir = None 334 self.output_file = None 335 self.locale = None 336 337 def finalize_options(self): 338 if not self.input_file and not self.input_dir: 339 raise DistutilsOptionError('you must specify either the input ' 340 'file or directory') 341 if not self.output_file and not self.output_dir: 342 raise DistutilsOptionError('you must specify either the ' 343 'output file or directory') 344 345 def run(self): 346 mo_files = [] 347 js_files = [] 348 349 def js_path(dir, locale): 350 return os.path.join(dir, locale + '.js') 351 352 if not self.input_file: 353 if self.locale: 354 mo_files.append((self.locale, 355 os.path.join(self.input_dir, self.locale, 356 'LC_MESSAGES', 357 self.domain + '.mo'))) 358 js_files.append(js_path(self.output_dir, self.locale)) 359 else: 360 for locale in os.listdir(self.input_dir): 361 mo_file = os.path.join(self.input_dir, locale, 362 'LC_MESSAGES', 363 self.domain + '.mo') 364 if os.path.exists(mo_file): 365 mo_files.append((locale, mo_file)) 366 js_files.append(js_path(self.output_dir, locale)) 367 else: 368 mo_files.append((self.locale, self.input_file)) 369 if self.output_file: 370 js_files.append(self.output_file) 371 else: 372 js_files.append(js_path(self.output_dir, self.locale)) 373 374 if not mo_files: 375 raise DistutilsOptionError('no compiled catalogs found') 376 377 if not os.path.isdir(self.output_dir): 378 os.mkdir(self.output_dir) 379 380 for idx, (locale, mo_file) in enumerate(mo_files): 381 js_file = js_files[idx] 382 distlog.info('generating messages javascript %r to %r', 383 mo_file, js_file) 384 385 with open(mo_file, 'rb') as infile: 386 t = Translations(infile, self.domain) 387 catalog = t._catalog 388 389 with open(js_file, 'w', encoding='utf-8') as outfile: 390 write_js(outfile, catalog, self.domain, locale) 391 392 393 class check_catalog(Command): 394 """Check message catalog command for use ``setup.py`` scripts.""" 395 396 description = 'check message catalog files, like `msgfmt --check`' 397 user_options = [ 398 ('domain=', 'D', 399 "domain of PO file (default 'messages')"), 400 ('input-dir=', 'I', 401 'path to base directory containing the catalogs'), 402 ('input-file=', 'i', 403 'name of the input file'), 404 ('locale=', 'l', 405 'locale of the catalog to compile'), 406 ] 407 408 def initialize_options(self): 409 self.domain = 'messages' 410 self.input_dir = None 411 self.input_file = None 412 self.locale = None 413 414 def finalize_options(self): 415 if not self.input_file and not self.input_dir: 416 raise DistutilsOptionError('you must specify either the input ' 417 'file or directory') 418 419 def run(self): 420 for filename in self._get_po_files(): 421 distlog.info('checking catalog %s', filename) 422 with open(filename, 'rb') as f: 423 catalog = read_po(f, domain=self.domain) 424 for message in catalog: 425 for error in self._check_message(catalog, message): 426 distlog.warn('%s:%d: %s', filename, message.lineno, 427 error) 428 429 def _get_po_files(self): 430 if self.input_file: 431 return [self.input_file] 432 433 if self.locale: 434 return [os.path.join(self.input_dir, self.locale, 435 'LC_MESSAGES', self.domain + '.po')] 436 437 files = [] 438 for locale in os.listdir(self.input_dir): 439 filename = os.path.join(self.input_dir, locale, 'LC_MESSAGES', 440 self.domain + '.po') 441 if os.path.exists(filename): 442 files.append(filename) 443 return sorted(files) 444 445 def _check_message(self, catalog, message): 446 for e in message.check(catalog): 447 yield e 448 for e in check_markup(catalog, message): 449 yield e 450 451 def check_markup(catalog, message): 452 """Verify markups in the translation.""" 453 def to_array(value): 454 if not isinstance(value, (list, tuple)): 455 value = (value,) 456 return value 457 msgids = to_array(message.id) 458 msgstrs = to_array(message.string) 459 for msgid_idx, msgid in enumerate(msgids): 460 msgid_name = 'msgid' if msgid_idx == 0 else 'msgid_plural' 461 for msgstr_idx, msgstr in enumerate(msgstrs): 462 if msgid and msgstr and msgid != msgstr: 463 msgstr_name = 'msgstr' if len(msgids) == 1 else \ 464 'msgstr[%d]' % msgstr_idx 465 for e in _check_markup_0(msgid, msgid_name, msgstr, 466 msgstr_name): 467 yield e 468 469 def _check_markup_0(msgid, msgid_name, msgstr, msgstr_name): 470 from xml.etree import ElementTree 471 472 def count_tags(text): 473 text = '<html>\n%s\n</html>' % text.encode('utf-8') 474 counts = {} 475 for event in ElementTree.iterparse(io.BytesIO(text)): 476 tag = event[1].tag 477 counts.setdefault(tag, 0) 478 counts[tag] += 1 479 counts['html'] -= 1 480 return counts 481 482 try: 483 msgid_counts = count_tags(msgid) 484 except ElementTree.ParseError: 485 return 486 try: 487 msgstr_counts = count_tags(msgstr) 488 except ElementTree.ParseError as e: 489 yield TranslationError(e) 490 return 491 492 for tag in (set(msgid_counts) | set(msgstr_counts)): 493 msgid_count = msgid_counts.get(tag, 0) 494 msgstr_count = msgstr_counts.get(tag, 0) 495 if msgid_count != msgstr_count: 496 yield TranslationError( 497 "mismatched '%s' tag between %s and %s (%d != %d)" % 498 (tag, msgid_name, msgstr_name, msgid_count, msgstr_count)) 499 500 def write_js(fileobj, catalog, domain, locale): 501 from trac.util.presentation import to_json 502 data = {'domain': domain, 'locale': locale} 503 504 messages = {} 505 for msgid, msgstr in catalog.items(): 506 if isinstance(msgid, (list, tuple)): 507 messages.setdefault(msgid[0], {}) 508 messages[msgid[0]][msgid[1]] = msgstr 509 elif msgid: 510 messages[msgid] = msgstr 511 else: 512 for line in msgstr.splitlines(): 513 line = line.strip() 514 if not line: 515 continue 516 if ':' not in line: 517 continue 518 name, val = line.split(':', 1) 519 name = name.strip().lower() 520 if name == 'plural-forms': 521 data['plural_expr'] = pluralexpr(val) 522 break 523 data['messages'] = messages 524 data = to_json(data) 525 if isinstance(data, bytes): 526 data = str(data, 'utf-8') 527 528 fileobj.write('// Generated messages javascript file ' 529 'from compiled MO file\n') 530 fileobj.write('babel.Translations.load(') 531 fileobj.write(data) 532 fileobj.write(').install();\n') 533 534 def pluralexpr(forms): 535 match = re.search(r'\bplural\s*=\s*([^;]+)', forms) 536 if not match: 537 raise ValueError('Failed to parse plural_forms %r' % (forms,)) 538 return match.group(1) 539 540 541 def get_command_overriders(): 542 # 'bdist_wininst' runs a 'build', so make the latter 543 # run a 'compile_catalog' before 'build_py' 544 class build(_build): 545 sub_commands = [('compile_catalog', None)] + _build.sub_commands 546 547 # 'bdist_egg' isn't that nice, all it does is an 'install_lib' 548 class install_lib(_install_lib): # playing setuptools' own tricks ;-) 549 def l10n_run(self): 550 self.run_command('compile_catalog') 551 def run(self): 552 self.l10n_run() 553 # When bdist_egg is called on distribute 0.6.29 and later, the 554 # egg file includes no *.mo and *.js files which are generated 555 # in l10n_run() method. 556 # We remove build_py.data_files property to re-compute in order 557 # to avoid the issue (#11640). 558 build_py = self.get_finalized_command('build_py') 559 if 'data_files' in build_py.__dict__ and \ 560 not any(any(name.endswith('.mo') for name in filenames) 561 for pkg, src_dir, build_dir, filenames 562 in build_py.data_files): 563 del build_py.__dict__['data_files'] 564 _install_lib.run(self) 565 return build, install_lib 566 567 def get_l10n_cmdclass(): 568 build, install_lib = get_command_overriders() 569 return { 570 'build': build, 'install_lib': install_lib, 571 'check_catalog': check_catalog, 572 } 573 574 def get_l10n_js_cmdclass(): 575 build, _install_lib = get_command_overriders() 576 build.sub_commands.insert(0, ('generate_messages_js', None)) 577 build.sub_commands.insert(0, ('compile_catalog_js', None)) 578 class install_lib(_install_lib): 579 def l10n_run(self): 580 self.run_command('compile_catalog_js') 581 self.run_command('generate_messages_js') 582 self.run_command('compile_catalog') 583 return { 584 'build': build, 'install_lib': install_lib, 585 'check_catalog': check_catalog, 586 'extract_messages_js': extract_messages, 587 'init_catalog_js': init_catalog, 588 'compile_catalog_js': compile_catalog, 589 'update_catalog_js': update_catalog, 590 'generate_messages_js': generate_messages_js, 591 'check_catalog_js': check_catalog, 592 } 593 594 def get_l10n_trac_cmdclass(): 595 build, _install_lib = get_command_overriders() 596 build.sub_commands.insert(0, ('generate_messages_js', None)) 597 build.sub_commands.insert(0, ('compile_catalog_js', None)) 598 build.sub_commands.insert(0, ('compile_catalog_tracini', None)) 599 class install_lib(_install_lib): 600 def l10n_run(self): 601 self.run_command('compile_catalog_tracini') 602 self.run_command('compile_catalog_js') 603 self.run_command('generate_messages_js') 604 self.run_command('compile_catalog') 605 return { 606 'build': build, 'install_lib': install_lib, 607 'check_catalog': check_catalog, 608 'extract_messages_js': extract_messages, 609 'init_catalog_js': init_catalog, 610 'compile_catalog_js': compile_catalog, 611 'update_catalog_js': update_catalog, 612 'generate_messages_js': generate_messages_js, 613 'check_catalog_js': check_catalog, 614 'extract_messages_tracini': extract_messages, 615 'init_catalog_tracini': init_catalog, 616 'compile_catalog_tracini': compile_catalog, 617 'update_catalog_tracini': update_catalog, 618 'check_catalog_tracini': check_catalog, 619 } 620 621except ImportError: 622 def get_l10n_cmdclass(): 623 return 624 def get_l10n_js_cmdclass(): 625 return 626 def get_l10n_trac_cmdclass(): 627 return 628