compare_locales/compare/content.py

# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.

'Mozilla l10n compare locales tool'

from __future__ import absolute_import
from __future__ import print_function
import codecs
import os
import shutil
import re

from compare_locales import parser
from compare_locales import mozpath
from compare_locales.checks import getChecker, EntityPos
from compare_locales.keyedtuple import KeyedTuple

from .observer import ObserverList
from .utils import AddRemove


class ContentComparer:
    keyRE = re.compile('[kK]ey')
    nl = re.compile('\n', re.M)

    def __init__(self, quiet=0):
        '''Create a ContentComparer.
        observer is usually a instance of Observer. The return values
        of the notify method are used to control the handling of missing
        entities.
        '''
        self.observers = ObserverList(quiet=quiet)

    def create_merge_dir(self, merge_file):
        outdir = mozpath.dirname(merge_file)
        if not os.path.isdir(outdir):
            os.makedirs(outdir)

    def merge(self, ref_entities, ref_file, l10n_file, merge_file,
              missing, skips, ctx, capabilities, encoding):
        '''Create localized file in merge dir

        `ref_entities` and `ref_map` are the parser result of the
        reference file
        `ref_file` and `l10n_file` are the File objects for the reference and
        the l10n file, resp.
        `merge_file` is the output path for the generated content. This is None
        if we're just comparing or validating.
        `missing` are the missing messages in l10n - potentially copied from
        reference
        `skips` are entries to be dropped from the localized file
        `ctx` is the parsing context
        `capabilities` are the capabilities for the merge algorithm
        `encoding` is the encoding to be used when serializing, usually utf-8
        '''

        if not merge_file:
            return

        if capabilities == parser.CAN_NONE:
            return

        self.create_merge_dir(merge_file)

        if capabilities & parser.CAN_COPY:
            # copy the l10n file if it's good, or the reference file if not
            if skips or missing:
                src = ref_file.fullpath
            else:
                src = l10n_file.fullpath
            shutil.copyfile(src, merge_file)
            print("copied reference to " + merge_file)
            return

        if not (capabilities & parser.CAN_SKIP):
            return

        # Start with None in case the merge file doesn't need to be created.
        f = None

        if skips:
            # skips come in ordered by key name, we need them in file order
            skips.sort(key=lambda s: s.span[0])

            # we need to skip a few erroneous blocks in the input, copy by hand
            f = codecs.open(merge_file, 'wb', encoding)
            offset = 0
            for skip in skips:
                chunk = skip.span
                f.write(ctx.contents[offset:chunk[0]])
                offset = chunk[1]
            f.write(ctx.contents[offset:])

        if f is None:
            # l10n file is a good starting point
            shutil.copyfile(l10n_file.fullpath, merge_file)

        if not (capabilities & parser.CAN_MERGE):
            if f:
                f.close()
            return

        if skips or missing:
            if f is None:
                f = codecs.open(merge_file, 'ab', encoding)
            trailing = (['\n'] +
                        [ref_entities[key].all for key in missing] +
                        [ref_entities[skip.key].all for skip in skips
                         if not isinstance(skip, parser.Junk)])

            def ensureNewline(s):
                if not s.endswith('\n'):
                    return s + '\n'
                return s

            print("adding to " + merge_file)
            f.write(''.join(map(ensureNewline, trailing)))

        if f is not None:
            f.close()

    def remove(self, ref_file, l10n, merge_file):
        '''Obsolete l10n file.

        Copy to merge stage if we can.
        '''
        self.observers.notify('obsoleteFile', l10n, None)
        self.merge(
            KeyedTuple([]), ref_file, l10n, merge_file,
            [], [], None, parser.CAN_COPY, None
        )

    def compare(self, ref_file, l10n, merge_file, extra_tests=None):
        try:
            p = parser.getParser(ref_file.file)
        except UserWarning:
            # no comparison, XXX report?
            # At least, merge
            self.merge(
                KeyedTuple([]), ref_file, l10n, merge_file, [], [], None,
                parser.CAN_COPY, None)
            return
        try:
            p.readFile(ref_file)
        except Exception as e:
            self.observers.notify('error', ref_file, str(e))
            return
        ref_entities = p.parse()
        try:
            p.readFile(l10n)
            l10n_entities = p.parse()
            l10n_ctx = p.ctx
        except Exception as e:
            self.observers.notify('error', l10n, str(e))
            return

        ar = AddRemove()
        ar.set_left(ref_entities.keys())
        ar.set_right(l10n_entities.keys())
        report = missing = obsolete = changed = unchanged = keys = 0
        missing_w = changed_w = unchanged_w = 0  # word stats
        missings = []
        skips = []
        checker = getChecker(l10n, extra_tests=extra_tests)
        if checker and checker.needs_reference:
            checker.set_reference(ref_entities)
        for msg in p.findDuplicates(ref_entities):
            self.observers.notify('warning', l10n, msg)
        for msg in p.findDuplicates(l10n_entities):
            self.observers.notify('error', l10n, msg)
        for action, entity_id in ar:
            if action == 'delete':
                # missing entity
                if isinstance(ref_entities[entity_id], parser.Junk):
                    self.observers.notify(
                        'warning', l10n, 'Parser error in en-US'
                    )
                    continue
                _rv = self.observers.notify('missingEntity', l10n, entity_id)
                if _rv == "ignore":
                    continue
                if _rv == "error":
                    # only add to missing entities for l10n-merge on error,
                    # not report
                    missings.append(entity_id)
                    missing += 1
                    refent = ref_entities[entity_id]
                    missing_w += refent.count_words()
                else:
                    # just report
                    report += 1
            elif action == 'add':
                # obsolete entity or junk
                if isinstance(l10n_entities[entity_id],
                              parser.Junk):
                    junk = l10n_entities[entity_id]
                    self.observers.notify(
                        'error', l10n,
                        junk.error_message()
                    )
                    if merge_file is not None:
                        skips.append(junk)
                elif (
                    self.observers.notify('obsoleteEntity', l10n, entity_id)
                    != 'ignore'
                ):
                    obsolete += 1
            else:
                # entity found in both ref and l10n, check for changed
                refent = ref_entities[entity_id]
                l10nent = l10n_entities[entity_id]
                if self.keyRE.search(entity_id):
                    keys += 1
                else:
                    if refent.equals(l10nent):
                        self.doUnchanged(l10nent)
                        unchanged += 1
                        unchanged_w += refent.count_words()
                    else:
                        self.doChanged(ref_file, refent, l10nent)
                        changed += 1
                        changed_w += refent.count_words()
                        # run checks:
                if checker:
                    for tp, pos, msg, cat in checker.check(refent, l10nent):
                        if isinstance(pos, EntityPos):
                            line, col = l10nent.position(pos)
                        else:
                            line, col = l10nent.value_position(pos)
                        # skip error entities when merging
                        if tp == 'error' and merge_file is not None:
                            skips.append(l10nent)
                        self.observers.notify(
                            tp, l10n,
                            u"%s at line %d, column %d for %s" %
                            (msg, line, col, refent.key)
                        )
                pass

        if merge_file is not None:
            self.merge(
                ref_entities, ref_file,
                l10n, merge_file, missings, skips, l10n_ctx,
                p.capabilities, p.encoding)

        stats = {
            'missing': missing,
            'missing_w': missing_w,
            'report': report,
            'obsolete': obsolete,
            'changed': changed,
            'changed_w': changed_w,
            'unchanged': unchanged,
            'unchanged_w': unchanged_w,
            'keys': keys,
        }
        self.observers.updateStats(l10n, stats)
        pass

    def add(self, orig, missing, merge_file):
        ''' Add missing localized file.'''
        f = orig
        try:
            p = parser.getParser(f.file)
        except UserWarning:
            p = None

        # if we don't support this file, assume CAN_COPY to mimick
        # l10n dir as closely as possible
        caps = p.capabilities if p else parser.CAN_COPY
        if (caps & (parser.CAN_COPY | parser.CAN_MERGE)):
            # even if we can merge, pretend we can only copy
            self.merge(
                KeyedTuple([]), orig, missing, merge_file,
                ['trigger copy'], [], None, parser.CAN_COPY, None
            )

        if self.observers.notify('missingFile', missing, None) == "ignore":
            # filter said that we don't need this file, don't count it
            return

        if p is None:
            # We don't have a parser, cannot count missing strings
            return

        try:
            p.readFile(f)
            entities = p.parse()
        except Exception as ex:
            self.observers.notify('error', f, str(ex))
            return
        # strip parse errors
        entities = [e for e in entities if not isinstance(e, parser.Junk)]
        self.observers.updateStats(missing, {'missing': len(entities)})
        missing_w = 0
        for e in entities:
            missing_w += e.count_words()
        self.observers.updateStats(missing, {'missing_w': missing_w})

    def doUnchanged(self, entity):
        # overload this if needed
        pass

    def doChanged(self, file, ref_entity, l10n_entity):
        # overload this if needed
        pass