Trac-1.5.3/trac/dist.py

# -*- coding: utf-8 -*-
#
# Copyright (C) 2011-2021 Edgewall Software
# All rights reserved.
#
# This software is licensed as described in the file COPYING, which
# you should have received as part of this distribution. The terms
# are also available at https://trac.edgewall.org/wiki/TracLicense.
#
# This software consists of voluntary contributions made by many
# individuals. For the exact contribution history, see the revision
# history and logs, available at https://trac.edgewall.org/log/.

"""Extra commands for setup.py.

We provide a few extra command classes in `l10n_cmdclass` for
localization tasks.  We also modify the standard commands
`distutils.command.build` and `setuptools.command.install_lib` classes
in order to call the l10n commands for compiling catalogs at the right
time during install.

"""

from html.parser import HTMLParser
import io
import os
import re
from tokenize import generate_tokens, COMMENT, NAME, OP, STRING

from jinja2.ext import babel_extract as jinja2_extractor

from distutils import log as distlog
from distutils.cmd import Command
from distutils.command.build import build as _build
from distutils.errors import DistutilsOptionError
from setuptools.command.install_lib import install_lib as _install_lib


def simplify_message(message):
    """Transforms an extracted messsage (string or tuple) into one in
    which the repeated white-space has been simplified to a single
    space.

    """
    tuple_len = len(message) if isinstance(message, tuple) else 0
    if tuple_len:
        message = message[0]
    message = ' '.join(message.split())
    if tuple_len:
        message = (message,) + (None,) * (tuple_len - 1)
    return message


class ScriptExtractor(HTMLParser):
    def __init__(self, out):
        HTMLParser.__init__(self)
        self.out = out
        self.in_javascript = False

    def handle_starttag(self, tag, attrs):
        if tag == 'script':
            self.in_javascript = True

    def handle_startendtag(self, tag, attrs):
        self.in_javascript = False

    def handle_charref(self, name):
        if self.in_javascript:
            self.out.write('&#%s;' % name)

    def handle_entityref(self, name):
        if self.in_javascript:
            self.out.write('&%s;' % name)

    def handle_data(self, data):
        if self.in_javascript:
            self.out.write(data)

    def handle_endtag(self, tag):
        self.in_javascript = False

    def no_op(*args, **kwargs):
        pass

    handle_comment = handle_decl = handle_pi = no_op


try:
    from babel.messages.catalog import TranslationError
    from babel.messages.extract import extract_javascript
    from babel.messages.frontend import extract_messages, init_catalog, \
                                        compile_catalog, update_catalog
    from babel.messages.pofile import read_po
    from babel.support import Translations
    from babel.util import parse_encoding

    _DEFAULT_KWARGS_MAPS = {
        'Option': {'doc': 4},
        'BoolOption': {'doc': 4},
        'IntOption': {'doc': 4},
        'FloatOption': {'doc': 4},
        'ListOption': {'doc': 6},
        'ChoiceOption': {'doc': 4},
        'PathOption': {'doc': 4},
        'ExtensionOption': {'doc': 5},
        'OrderedExtensionsOption': {'doc': 6},
    }

    _DEFAULT_CLEANDOC_KEYWORDS = (
        'ConfigSection', 'Option', 'BoolOption', 'IntOption', 'FloatOption',
        'ListOption', 'ChoiceOption', 'PathOption', 'ExtensionOption',
        'OrderedExtensionsOption', 'cleandoc_',
    )

    def extract_python(fileobj, keywords, comment_tags, options):
        """Extract messages from Python source code, This is patched
        extract_python from Babel to support keyword argument mapping.

        `kwargs_maps` option: names of keyword arguments will be mapping to
        index of messages array.

        `cleandoc_keywords` option: a list of keywords to clean up the
        extracted messages with `cleandoc`.
        """
        from trac.util.text import cleandoc

        funcname = lineno = message_lineno = None
        kwargs_maps = func_kwargs_map = None
        call_stack = -1
        buf = []
        messages = []
        messages_kwargs = {}
        translator_comments = []
        in_def = in_translator_comments = False
        comment_tag = None

        encoding = str(parse_encoding(fileobj) or
                       options.get('encoding', 'iso-8859-1'))
        kwargs_maps = _DEFAULT_KWARGS_MAPS.copy()
        if 'kwargs_maps' in options:
            kwargs_maps.update(options['kwargs_maps'])
        cleandoc_keywords = set(_DEFAULT_CLEANDOC_KEYWORDS)
        if 'cleandoc_keywords' in options:
            cleandoc_keywords.update(options['cleandoc_keywords'])

        tokens = generate_tokens(fileobj.readline)
        tok = value = None
        for _ in tokens:
            prev_tok, prev_value = tok, value
            tok, value, (lineno, _), _, _ = _
            if call_stack == -1 and tok == NAME and value in ('def', 'class'):
                in_def = True
            elif tok == OP and value == '(':
                if in_def:
                    # Avoid false positives for declarations such as:
                    # def gettext(arg='message'):
                    in_def = False
                    continue
                if funcname:
                    message_lineno = lineno
                    call_stack += 1
                kwarg_name = None
            elif in_def and tok == OP and value == ':':
                # End of a class definition without parens
                in_def = False
                continue
            elif call_stack == -1 and tok == COMMENT:
                # Strip the comment token from the line
                value = value.decode(encoding)[1:].strip()
                if in_translator_comments and \
                        translator_comments[-1][0] == lineno - 1:
                    # We're already inside a translator comment, continue
                    # appending
                    translator_comments.append((lineno, value))
                    continue
                # If execution reaches this point, let's see if comment line
                # starts with one of the comment tags
                for comment_tag in comment_tags:
                    if value.startswith(comment_tag):
                        in_translator_comments = True
                        translator_comments.append((lineno, value))
                        break
            elif funcname and call_stack == 0:
                if tok == OP and value == ')':
                    if buf:
                        message = ''.join(buf)
                        if kwarg_name in func_kwargs_map:
                            messages_kwargs[kwarg_name] = message
                        else:
                            messages.append(message)
                        del buf[:]
                    else:
                        messages.append(None)

                    for name, message in messages_kwargs.items():
                        if name not in func_kwargs_map:
                            continue
                        index = func_kwargs_map[name]
                        while index >= len(messages):
                            messages.append(None)
                        messages[index - 1] = message

                    if funcname in cleandoc_keywords:
                        messages = [m and cleandoc(m) for m in messages]
                    if len(messages) > 1:
                        messages = tuple(messages)
                    else:
                        messages = messages[0]
                    # Comments don't apply unless they immediately preceed the
                    # message
                    if translator_comments and \
                            translator_comments[-1][0] < message_lineno - 1:
                        translator_comments = []

                    yield (message_lineno, funcname, messages,
                           [comment[1] for comment in translator_comments])

                    funcname = lineno = message_lineno = None
                    kwarg_name = func_kwargs_map = None
                    call_stack = -1
                    messages = []
                    messages_kwargs = {}
                    translator_comments = []
                    in_translator_comments = False
                elif tok == STRING:
                    # Unwrap quotes in a safe manner, maintaining the string's
                    # encoding
                    # https://sourceforge.net/tracker/?func=detail&atid=355470&
                    # aid=617979&group_id=5470
                    value = eval('# coding=%s\n%s' % (encoding, value),
                                 {'__builtins__':{}}, {})
                    if isinstance(value, bytes):
                        value = value.decode(encoding)
                    buf.append(value)
                elif tok == OP and value == '=' and prev_tok == NAME:
                    kwarg_name = prev_value
                elif tok == OP and value == ',':
                    if buf:
                        message = ''.join(buf)
                        if kwarg_name in func_kwargs_map:
                            messages_kwargs[kwarg_name] = message
                        else:
                            messages.append(message)
                        del buf[:]
                    else:
                        messages.append(None)
                    kwarg_name = None
                    if translator_comments:
                        # We have translator comments, and since we're on a
                        # comma(,) user is allowed to break into a new line
                        # Let's increase the last comment's lineno in order
                        # for the comment to still be a valid one
                        old_lineno, old_comment = translator_comments.pop()
                        translator_comments.append((old_lineno+1, old_comment))
            elif call_stack > 0 and tok == OP and value == ')':
                call_stack -= 1
            elif funcname and call_stack == -1:
                funcname = func_kwargs_map = kwarg_name = None
            elif tok == NAME and value in keywords:
                funcname = value
                func_kwargs_map = kwargs_maps.get(funcname, {})
                kwarg_name = None


    def extract_javascript_script(fileobj, keywords, comment_tags, options):
        """Extract messages from Javascript embedded in <script> tags.

        Select <script type="javascript/text"> tags and delegate to
        `extract_javascript`.
        """
        if not fileobj.name:
            return []
        out = io.StringIO()
        extractor = ScriptExtractor(out)
        extractor.feed(str(fileobj.read(), 'utf-8'))
        extractor.close()
        out.seek(0)
        return extract_javascript(out, keywords, comment_tags, options)


    def extract_html(fileobj, keywords, comment_tags, options):
        """Extracts translatable texts from templates.

        We simplify white-space found in translatable texts collected
        via the ``gettext`` function (which is what the ``trans``
        directives use), otherwise we would have near duplicates
        (e.g. admin.html, prefs.html).

        We assume the template function ``gettext`` will do the same
        before trying to fetch the translation from the catalog.

        """
        if fileobj:
            extractor = jinja2_extractor
            fileobj.seek(0)
            for m in extractor(fileobj, keywords, comment_tags, options):
                # lineno, func, message, comments = m
                if m[1] in ('gettext', None):
                    # Jinja2 trans
                    yield m[0], m[1], simplify_message(m[2]), m[3]
                else:
                    yield m


    extract_text = extract_html


    class generate_messages_js(Command):
        """Generating message javascripts command for use ``setup.py`` scripts.
        """

        description = 'generate message javascript files from binary MO files'
        user_options = [
            ('domain=', 'D',
             "domain of PO file (default 'messages')"),
            ('input-dir=', 'I',
             'path to base directory containing the catalogs'),
            ('input-file=', 'i',
             'name of the input file'),
            ('output-dir=', 'O',
             "name of the output directory"),
            ('output-file=', 'o',
             "name of the output file (default "
             "'<output_dir>/<locale>.js')"),
            ('locale=', 'l',
             'locale of the catalog to compile'),
        ]

        def initialize_options(self):
            self.domain = 'messages'
            self.input_dir = None
            self.input_file = None
            self.output_dir = None
            self.output_file = None
            self.locale = None

        def finalize_options(self):
            if not self.input_file and not self.input_dir:
                raise DistutilsOptionError('you must specify either the input '
                                           'file or directory')
            if not self.output_file and not self.output_dir:
                raise DistutilsOptionError('you must specify either the '
                                           'output file or directory')

        def run(self):
            mo_files = []
            js_files = []

            def js_path(dir, locale):
                return os.path.join(dir, locale + '.js')

            if not self.input_file:
                if self.locale:
                    mo_files.append((self.locale,
                                     os.path.join(self.input_dir, self.locale,
                                                  'LC_MESSAGES',
                                                  self.domain + '.mo')))
                    js_files.append(js_path(self.output_dir, self.locale))
                else:
                    for locale in os.listdir(self.input_dir):
                        mo_file = os.path.join(self.input_dir, locale,
                                               'LC_MESSAGES',
                                               self.domain + '.mo')
                        if os.path.exists(mo_file):
                            mo_files.append((locale, mo_file))
                            js_files.append(js_path(self.output_dir, locale))
            else:
                mo_files.append((self.locale, self.input_file))
                if self.output_file:
                    js_files.append(self.output_file)
                else:
                    js_files.append(js_path(self.output_dir, self.locale))

            if not mo_files:
                raise DistutilsOptionError('no compiled catalogs found')

            if not os.path.isdir(self.output_dir):
                os.mkdir(self.output_dir)

            for idx, (locale, mo_file) in enumerate(mo_files):
                js_file = js_files[idx]
                distlog.info('generating messages javascript %r to %r',
                             mo_file, js_file)

                with open(mo_file, 'rb') as infile:
                    t = Translations(infile, self.domain)
                    catalog = t._catalog

                with open(js_file, 'w', encoding='utf-8') as outfile:
                    write_js(outfile, catalog, self.domain, locale)


    class check_catalog(Command):
        """Check message catalog command for use ``setup.py`` scripts."""

        description = 'check message catalog files, like `msgfmt --check`'
        user_options = [
            ('domain=', 'D',
             "domain of PO file (default 'messages')"),
            ('input-dir=', 'I',
             'path to base directory containing the catalogs'),
            ('input-file=', 'i',
             'name of the input file'),
            ('locale=', 'l',
             'locale of the catalog to compile'),
        ]

        def initialize_options(self):
            self.domain = 'messages'
            self.input_dir = None
            self.input_file = None
            self.locale = None

        def finalize_options(self):
            if not self.input_file and not self.input_dir:
                raise DistutilsOptionError('you must specify either the input '
                                           'file or directory')

        def run(self):
            for filename in self._get_po_files():
                distlog.info('checking catalog %s', filename)
                with open(filename, 'rb') as f:
                    catalog = read_po(f, domain=self.domain)
                for message in catalog:
                    for error in self._check_message(catalog, message):
                        distlog.warn('%s:%d: %s', filename, message.lineno,
                                     error)

        def _get_po_files(self):
            if self.input_file:
                return [self.input_file]

            if self.locale:
                return [os.path.join(self.input_dir, self.locale,
                                     'LC_MESSAGES', self.domain + '.po')]

            files = []
            for locale in os.listdir(self.input_dir):
                filename = os.path.join(self.input_dir, locale, 'LC_MESSAGES',
                                        self.domain + '.po')
                if os.path.exists(filename):
                    files.append(filename)
            return sorted(files)

        def _check_message(self, catalog, message):
            for e in message.check(catalog):
                yield e
            for e in check_markup(catalog, message):
                yield e

    def check_markup(catalog, message):
        """Verify markups in the translation."""
        def to_array(value):
            if not isinstance(value, (list, tuple)):
                value = (value,)
            return value
        msgids = to_array(message.id)
        msgstrs = to_array(message.string)
        for msgid_idx, msgid in enumerate(msgids):
            msgid_name = 'msgid' if msgid_idx == 0 else 'msgid_plural'
            for msgstr_idx, msgstr in enumerate(msgstrs):
                if msgid and msgstr and msgid != msgstr:
                    msgstr_name = 'msgstr' if len(msgids) == 1 else \
                                  'msgstr[%d]' % msgstr_idx
                    for e in _check_markup_0(msgid, msgid_name, msgstr,
                                             msgstr_name):
                        yield e

    def _check_markup_0(msgid, msgid_name, msgstr, msgstr_name):
        from xml.etree import ElementTree

        def count_tags(text):
            text = '<html>\n%s\n</html>' % text.encode('utf-8')
            counts = {}
            for event in ElementTree.iterparse(io.BytesIO(text)):
                tag = event[1].tag
                counts.setdefault(tag, 0)
                counts[tag] += 1
            counts['html'] -= 1
            return counts

        try:
            msgid_counts = count_tags(msgid)
        except ElementTree.ParseError:
            return
        try:
            msgstr_counts = count_tags(msgstr)
        except ElementTree.ParseError as e:
            yield TranslationError(e)
            return

        for tag in (set(msgid_counts) | set(msgstr_counts)):
            msgid_count = msgid_counts.get(tag, 0)
            msgstr_count = msgstr_counts.get(tag, 0)
            if msgid_count != msgstr_count:
                yield TranslationError(
                    "mismatched '%s' tag between %s and %s (%d != %d)" %
                    (tag, msgid_name, msgstr_name, msgid_count, msgstr_count))

    def write_js(fileobj, catalog, domain, locale):
        from trac.util.presentation import to_json
        data = {'domain': domain, 'locale': locale}

        messages = {}
        for msgid, msgstr in catalog.items():
            if isinstance(msgid, (list, tuple)):
                messages.setdefault(msgid[0], {})
                messages[msgid[0]][msgid[1]] = msgstr
            elif msgid:
                messages[msgid] = msgstr
            else:
                for line in msgstr.splitlines():
                    line = line.strip()
                    if not line:
                        continue
                    if ':' not in line:
                        continue
                    name, val = line.split(':', 1)
                    name = name.strip().lower()
                    if name == 'plural-forms':
                        data['plural_expr'] = pluralexpr(val)
                        break
        data['messages'] = messages
        data = to_json(data)
        if isinstance(data, bytes):
            data = str(data, 'utf-8')

        fileobj.write('// Generated messages javascript file '
                      'from compiled MO file\n')
        fileobj.write('babel.Translations.load(')
        fileobj.write(data)
        fileobj.write(').install();\n')

    def pluralexpr(forms):
        match = re.search(r'\bplural\s*=\s*([^;]+)', forms)
        if not match:
            raise ValueError('Failed to parse plural_forms %r' % (forms,))
        return match.group(1)


    def get_command_overriders():
        # 'bdist_wininst' runs a 'build', so make the latter
        # run a 'compile_catalog' before 'build_py'
        class build(_build):
            sub_commands = [('compile_catalog', None)] + _build.sub_commands

        # 'bdist_egg' isn't that nice, all it does is an 'install_lib'
        class install_lib(_install_lib): # playing setuptools' own tricks ;-)
            def l10n_run(self):
                self.run_command('compile_catalog')
            def run(self):
                self.l10n_run()
                # When bdist_egg is called on distribute 0.6.29 and later, the
                # egg file includes no *.mo and *.js files which are generated
                # in l10n_run() method.
                # We remove build_py.data_files property to re-compute in order
                # to avoid the issue (#11640).
                build_py = self.get_finalized_command('build_py')
                if 'data_files' in build_py.__dict__ and \
                   not any(any(name.endswith('.mo') for name in filenames)
                           for pkg, src_dir, build_dir, filenames
                           in build_py.data_files):
                    del build_py.__dict__['data_files']
                _install_lib.run(self)
        return build, install_lib

    def get_l10n_cmdclass():
        build, install_lib = get_command_overriders()
        return {
            'build': build, 'install_lib': install_lib,
            'check_catalog': check_catalog,
        }

    def get_l10n_js_cmdclass():
        build, _install_lib = get_command_overriders()
        build.sub_commands.insert(0, ('generate_messages_js', None))
        build.sub_commands.insert(0, ('compile_catalog_js', None))
        class install_lib(_install_lib):
            def l10n_run(self):
                self.run_command('compile_catalog_js')
                self.run_command('generate_messages_js')
                self.run_command('compile_catalog')
        return {
            'build': build, 'install_lib': install_lib,
            'check_catalog': check_catalog,
            'extract_messages_js': extract_messages,
            'init_catalog_js': init_catalog,
            'compile_catalog_js': compile_catalog,
            'update_catalog_js': update_catalog,
            'generate_messages_js': generate_messages_js,
            'check_catalog_js': check_catalog,
        }

    def get_l10n_trac_cmdclass():
        build, _install_lib = get_command_overriders()
        build.sub_commands.insert(0, ('generate_messages_js', None))
        build.sub_commands.insert(0, ('compile_catalog_js', None))
        build.sub_commands.insert(0, ('compile_catalog_tracini', None))
        class install_lib(_install_lib):
            def l10n_run(self):
                self.run_command('compile_catalog_tracini')
                self.run_command('compile_catalog_js')
                self.run_command('generate_messages_js')
                self.run_command('compile_catalog')
        return {
            'build': build, 'install_lib': install_lib,
            'check_catalog': check_catalog,
            'extract_messages_js': extract_messages,
            'init_catalog_js': init_catalog,
            'compile_catalog_js': compile_catalog,
            'update_catalog_js': update_catalog,
            'generate_messages_js': generate_messages_js,
            'check_catalog_js': check_catalog,
            'extract_messages_tracini': extract_messages,
            'init_catalog_tracini': init_catalog,
            'compile_catalog_tracini': compile_catalog,
            'update_catalog_tracini': update_catalog,
            'check_catalog_tracini': check_catalog,
        }

except ImportError:
    def get_l10n_cmdclass():
        return
    def get_l10n_js_cmdclass():
        return
    def get_l10n_trac_cmdclass():
        return