1# This Source Code Form is subject to the terms of the Mozilla Public
2# License, v. 2.0. If a copy of the MPL was not distributed with this
3# file, You can obtain one at http://mozilla.org/MPL/2.0/.
4
5'Mozilla l10n compare locales tool'
6
7from __future__ import absolute_import
8from __future__ import print_function
9import codecs
10import os
11import shutil
12import re
13
14from compare_locales import parser
15from compare_locales import mozpath
16from compare_locales.checks import getChecker, EntityPos
17from compare_locales.keyedtuple import KeyedTuple
18
19from .observer import ObserverList
20from .utils import AddRemove
21
22
23class ContentComparer:
24    keyRE = re.compile('[kK]ey')
25    nl = re.compile('\n', re.M)
26
27    def __init__(self, quiet=0):
28        '''Create a ContentComparer.
29        observer is usually a instance of Observer. The return values
30        of the notify method are used to control the handling of missing
31        entities.
32        '''
33        self.observers = ObserverList(quiet=quiet)
34
35    def create_merge_dir(self, merge_file):
36        outdir = mozpath.dirname(merge_file)
37        if not os.path.isdir(outdir):
38            os.makedirs(outdir)
39
40    def merge(self, ref_entities, ref_file, l10n_file, merge_file,
41              missing, skips, ctx, capabilities, encoding):
42        '''Create localized file in merge dir
43
44        `ref_entities` and `ref_map` are the parser result of the
45        reference file
46        `ref_file` and `l10n_file` are the File objects for the reference and
47        the l10n file, resp.
48        `merge_file` is the output path for the generated content. This is None
49        if we're just comparing or validating.
50        `missing` are the missing messages in l10n - potentially copied from
51        reference
52        `skips` are entries to be dropped from the localized file
53        `ctx` is the parsing context
54        `capabilities` are the capabilities for the merge algorithm
55        `encoding` is the encoding to be used when serializing, usually utf-8
56        '''
57
58        if not merge_file:
59            return
60
61        if capabilities == parser.CAN_NONE:
62            return
63
64        self.create_merge_dir(merge_file)
65
66        if capabilities & parser.CAN_COPY:
67            # copy the l10n file if it's good, or the reference file if not
68            if skips or missing:
69                src = ref_file.fullpath
70            else:
71                src = l10n_file.fullpath
72            shutil.copyfile(src, merge_file)
73            print("copied reference to " + merge_file)
74            return
75
76        if not (capabilities & parser.CAN_SKIP):
77            return
78
79        # Start with None in case the merge file doesn't need to be created.
80        f = None
81
82        if skips:
83            # skips come in ordered by key name, we need them in file order
84            skips.sort(key=lambda s: s.span[0])
85
86            # we need to skip a few erroneous blocks in the input, copy by hand
87            f = codecs.open(merge_file, 'wb', encoding)
88            offset = 0
89            for skip in skips:
90                chunk = skip.span
91                f.write(ctx.contents[offset:chunk[0]])
92                offset = chunk[1]
93            f.write(ctx.contents[offset:])
94
95        if f is None:
96            # l10n file is a good starting point
97            shutil.copyfile(l10n_file.fullpath, merge_file)
98
99        if not (capabilities & parser.CAN_MERGE):
100            if f:
101                f.close()
102            return
103
104        if skips or missing:
105            if f is None:
106                f = codecs.open(merge_file, 'ab', encoding)
107            trailing = (['\n'] +
108                        [ref_entities[key].all for key in missing] +
109                        [ref_entities[skip.key].all for skip in skips
110                         if not isinstance(skip, parser.Junk)])
111
112            def ensureNewline(s):
113                if not s.endswith('\n'):
114                    return s + '\n'
115                return s
116
117            print("adding to " + merge_file)
118            f.write(''.join(map(ensureNewline, trailing)))
119
120        if f is not None:
121            f.close()
122
123    def remove(self, ref_file, l10n, merge_file):
124        '''Obsolete l10n file.
125
126        Copy to merge stage if we can.
127        '''
128        self.observers.notify('obsoleteFile', l10n, None)
129        self.merge(
130            KeyedTuple([]), ref_file, l10n, merge_file,
131            [], [], None, parser.CAN_COPY, None
132        )
133
134    def compare(self, ref_file, l10n, merge_file, extra_tests=None):
135        try:
136            p = parser.getParser(ref_file.file)
137        except UserWarning:
138            # no comparison, XXX report?
139            # At least, merge
140            self.merge(
141                KeyedTuple([]), ref_file, l10n, merge_file, [], [], None,
142                parser.CAN_COPY, None)
143            return
144        try:
145            p.readFile(ref_file)
146        except Exception as e:
147            self.observers.notify('error', ref_file, str(e))
148            return
149        ref_entities = p.parse()
150        try:
151            p.readFile(l10n)
152            l10n_entities = p.parse()
153            l10n_ctx = p.ctx
154        except Exception as e:
155            self.observers.notify('error', l10n, str(e))
156            return
157
158        ar = AddRemove()
159        ar.set_left(ref_entities.keys())
160        ar.set_right(l10n_entities.keys())
161        report = missing = obsolete = changed = unchanged = keys = 0
162        missing_w = changed_w = unchanged_w = 0  # word stats
163        missings = []
164        skips = []
165        checker = getChecker(l10n, extra_tests=extra_tests)
166        if checker and checker.needs_reference:
167            checker.set_reference(ref_entities)
168        for msg in p.findDuplicates(ref_entities):
169            self.observers.notify('warning', l10n, msg)
170        for msg in p.findDuplicates(l10n_entities):
171            self.observers.notify('error', l10n, msg)
172        for action, entity_id in ar:
173            if action == 'delete':
174                # missing entity
175                if isinstance(ref_entities[entity_id], parser.Junk):
176                    self.observers.notify(
177                        'warning', l10n, 'Parser error in en-US'
178                    )
179                    continue
180                _rv = self.observers.notify('missingEntity', l10n, entity_id)
181                if _rv == "ignore":
182                    continue
183                if _rv == "error":
184                    # only add to missing entities for l10n-merge on error,
185                    # not report
186                    missings.append(entity_id)
187                    missing += 1
188                    refent = ref_entities[entity_id]
189                    missing_w += refent.count_words()
190                else:
191                    # just report
192                    report += 1
193            elif action == 'add':
194                # obsolete entity or junk
195                if isinstance(l10n_entities[entity_id],
196                              parser.Junk):
197                    junk = l10n_entities[entity_id]
198                    self.observers.notify(
199                        'error', l10n,
200                        junk.error_message()
201                    )
202                    if merge_file is not None:
203                        skips.append(junk)
204                elif (
205                    self.observers.notify('obsoleteEntity', l10n, entity_id)
206                    != 'ignore'
207                ):
208                    obsolete += 1
209            else:
210                # entity found in both ref and l10n, check for changed
211                refent = ref_entities[entity_id]
212                l10nent = l10n_entities[entity_id]
213                if self.keyRE.search(entity_id):
214                    keys += 1
215                else:
216                    if refent.equals(l10nent):
217                        self.doUnchanged(l10nent)
218                        unchanged += 1
219                        unchanged_w += refent.count_words()
220                    else:
221                        self.doChanged(ref_file, refent, l10nent)
222                        changed += 1
223                        changed_w += refent.count_words()
224                        # run checks:
225                if checker:
226                    for tp, pos, msg, cat in checker.check(refent, l10nent):
227                        if isinstance(pos, EntityPos):
228                            line, col = l10nent.position(pos)
229                        else:
230                            line, col = l10nent.value_position(pos)
231                        # skip error entities when merging
232                        if tp == 'error' and merge_file is not None:
233                            skips.append(l10nent)
234                        self.observers.notify(
235                            tp, l10n,
236                            u"%s at line %d, column %d for %s" %
237                            (msg, line, col, refent.key)
238                        )
239                pass
240
241        if merge_file is not None:
242            self.merge(
243                ref_entities, ref_file,
244                l10n, merge_file, missings, skips, l10n_ctx,
245                p.capabilities, p.encoding)
246
247        stats = {
248            'missing': missing,
249            'missing_w': missing_w,
250            'report': report,
251            'obsolete': obsolete,
252            'changed': changed,
253            'changed_w': changed_w,
254            'unchanged': unchanged,
255            'unchanged_w': unchanged_w,
256            'keys': keys,
257        }
258        self.observers.updateStats(l10n, stats)
259        pass
260
261    def add(self, orig, missing, merge_file):
262        ''' Add missing localized file.'''
263        f = orig
264        try:
265            p = parser.getParser(f.file)
266        except UserWarning:
267            p = None
268
269        # if we don't support this file, assume CAN_COPY to mimick
270        # l10n dir as closely as possible
271        caps = p.capabilities if p else parser.CAN_COPY
272        if (caps & (parser.CAN_COPY | parser.CAN_MERGE)):
273            # even if we can merge, pretend we can only copy
274            self.merge(
275                KeyedTuple([]), orig, missing, merge_file,
276                ['trigger copy'], [], None, parser.CAN_COPY, None
277            )
278
279        if self.observers.notify('missingFile', missing, None) == "ignore":
280            # filter said that we don't need this file, don't count it
281            return
282
283        if p is None:
284            # We don't have a parser, cannot count missing strings
285            return
286
287        try:
288            p.readFile(f)
289            entities = p.parse()
290        except Exception as ex:
291            self.observers.notify('error', f, str(ex))
292            return
293        # strip parse errors
294        entities = [e for e in entities if not isinstance(e, parser.Junk)]
295        self.observers.updateStats(missing, {'missing': len(entities)})
296        missing_w = 0
297        for e in entities:
298            missing_w += e.count_words()
299        self.observers.updateStats(missing, {'missing_w': missing_w})
300
301    def doUnchanged(self, entity):
302        # overload this if needed
303        pass
304
305    def doChanged(self, file, ref_entity, l10n_entity):
306        # overload this if needed
307        pass
308