1# This Source Code Form is subject to the terms of the Mozilla Public 2# License, v. 2.0. If a copy of the MPL was not distributed with this 3# file, You can obtain one at http://mozilla.org/MPL/2.0/. 4 5'Mozilla l10n compare locales tool' 6 7from __future__ import absolute_import 8from __future__ import print_function 9import codecs 10import os 11import shutil 12import re 13 14from compare_locales import parser 15from compare_locales import mozpath 16from compare_locales.checks import getChecker, EntityPos 17from compare_locales.keyedtuple import KeyedTuple 18 19from .observer import ObserverList 20from .utils import AddRemove 21 22 23class ContentComparer: 24 keyRE = re.compile('[kK]ey') 25 nl = re.compile('\n', re.M) 26 27 def __init__(self, quiet=0): 28 '''Create a ContentComparer. 29 observer is usually a instance of Observer. The return values 30 of the notify method are used to control the handling of missing 31 entities. 32 ''' 33 self.observers = ObserverList(quiet=quiet) 34 35 def create_merge_dir(self, merge_file): 36 outdir = mozpath.dirname(merge_file) 37 if not os.path.isdir(outdir): 38 os.makedirs(outdir) 39 40 def merge(self, ref_entities, ref_file, l10n_file, merge_file, 41 missing, skips, ctx, capabilities, encoding): 42 '''Create localized file in merge dir 43 44 `ref_entities` and `ref_map` are the parser result of the 45 reference file 46 `ref_file` and `l10n_file` are the File objects for the reference and 47 the l10n file, resp. 48 `merge_file` is the output path for the generated content. This is None 49 if we're just comparing or validating. 50 `missing` are the missing messages in l10n - potentially copied from 51 reference 52 `skips` are entries to be dropped from the localized file 53 `ctx` is the parsing context 54 `capabilities` are the capabilities for the merge algorithm 55 `encoding` is the encoding to be used when serializing, usually utf-8 56 ''' 57 58 if not merge_file: 59 return 60 61 if capabilities == parser.CAN_NONE: 62 return 63 64 self.create_merge_dir(merge_file) 65 66 if capabilities & parser.CAN_COPY: 67 # copy the l10n file if it's good, or the reference file if not 68 if skips or missing: 69 src = ref_file.fullpath 70 else: 71 src = l10n_file.fullpath 72 shutil.copyfile(src, merge_file) 73 print("copied reference to " + merge_file) 74 return 75 76 if not (capabilities & parser.CAN_SKIP): 77 return 78 79 # Start with None in case the merge file doesn't need to be created. 80 f = None 81 82 if skips: 83 # skips come in ordered by key name, we need them in file order 84 skips.sort(key=lambda s: s.span[0]) 85 86 # we need to skip a few erroneous blocks in the input, copy by hand 87 f = codecs.open(merge_file, 'wb', encoding) 88 offset = 0 89 for skip in skips: 90 chunk = skip.span 91 f.write(ctx.contents[offset:chunk[0]]) 92 offset = chunk[1] 93 f.write(ctx.contents[offset:]) 94 95 if f is None: 96 # l10n file is a good starting point 97 shutil.copyfile(l10n_file.fullpath, merge_file) 98 99 if not (capabilities & parser.CAN_MERGE): 100 if f: 101 f.close() 102 return 103 104 if skips or missing: 105 if f is None: 106 f = codecs.open(merge_file, 'ab', encoding) 107 trailing = (['\n'] + 108 [ref_entities[key].all for key in missing] + 109 [ref_entities[skip.key].all for skip in skips 110 if not isinstance(skip, parser.Junk)]) 111 112 def ensureNewline(s): 113 if not s.endswith('\n'): 114 return s + '\n' 115 return s 116 117 print("adding to " + merge_file) 118 f.write(''.join(map(ensureNewline, trailing))) 119 120 if f is not None: 121 f.close() 122 123 def remove(self, ref_file, l10n, merge_file): 124 '''Obsolete l10n file. 125 126 Copy to merge stage if we can. 127 ''' 128 self.observers.notify('obsoleteFile', l10n, None) 129 self.merge( 130 KeyedTuple([]), ref_file, l10n, merge_file, 131 [], [], None, parser.CAN_COPY, None 132 ) 133 134 def compare(self, ref_file, l10n, merge_file, extra_tests=None): 135 try: 136 p = parser.getParser(ref_file.file) 137 except UserWarning: 138 # no comparison, XXX report? 139 # At least, merge 140 self.merge( 141 KeyedTuple([]), ref_file, l10n, merge_file, [], [], None, 142 parser.CAN_COPY, None) 143 return 144 try: 145 p.readFile(ref_file) 146 except Exception as e: 147 self.observers.notify('error', ref_file, str(e)) 148 return 149 ref_entities = p.parse() 150 try: 151 p.readFile(l10n) 152 l10n_entities = p.parse() 153 l10n_ctx = p.ctx 154 except Exception as e: 155 self.observers.notify('error', l10n, str(e)) 156 return 157 158 ar = AddRemove() 159 ar.set_left(ref_entities.keys()) 160 ar.set_right(l10n_entities.keys()) 161 report = missing = obsolete = changed = unchanged = keys = 0 162 missing_w = changed_w = unchanged_w = 0 # word stats 163 missings = [] 164 skips = [] 165 checker = getChecker(l10n, extra_tests=extra_tests) 166 if checker and checker.needs_reference: 167 checker.set_reference(ref_entities) 168 for msg in p.findDuplicates(ref_entities): 169 self.observers.notify('warning', l10n, msg) 170 for msg in p.findDuplicates(l10n_entities): 171 self.observers.notify('error', l10n, msg) 172 for action, entity_id in ar: 173 if action == 'delete': 174 # missing entity 175 if isinstance(ref_entities[entity_id], parser.Junk): 176 self.observers.notify( 177 'warning', l10n, 'Parser error in en-US' 178 ) 179 continue 180 _rv = self.observers.notify('missingEntity', l10n, entity_id) 181 if _rv == "ignore": 182 continue 183 if _rv == "error": 184 # only add to missing entities for l10n-merge on error, 185 # not report 186 missings.append(entity_id) 187 missing += 1 188 refent = ref_entities[entity_id] 189 missing_w += refent.count_words() 190 else: 191 # just report 192 report += 1 193 elif action == 'add': 194 # obsolete entity or junk 195 if isinstance(l10n_entities[entity_id], 196 parser.Junk): 197 junk = l10n_entities[entity_id] 198 self.observers.notify( 199 'error', l10n, 200 junk.error_message() 201 ) 202 if merge_file is not None: 203 skips.append(junk) 204 elif ( 205 self.observers.notify('obsoleteEntity', l10n, entity_id) 206 != 'ignore' 207 ): 208 obsolete += 1 209 else: 210 # entity found in both ref and l10n, check for changed 211 refent = ref_entities[entity_id] 212 l10nent = l10n_entities[entity_id] 213 if self.keyRE.search(entity_id): 214 keys += 1 215 else: 216 if refent.equals(l10nent): 217 self.doUnchanged(l10nent) 218 unchanged += 1 219 unchanged_w += refent.count_words() 220 else: 221 self.doChanged(ref_file, refent, l10nent) 222 changed += 1 223 changed_w += refent.count_words() 224 # run checks: 225 if checker: 226 for tp, pos, msg, cat in checker.check(refent, l10nent): 227 if isinstance(pos, EntityPos): 228 line, col = l10nent.position(pos) 229 else: 230 line, col = l10nent.value_position(pos) 231 # skip error entities when merging 232 if tp == 'error' and merge_file is not None: 233 skips.append(l10nent) 234 self.observers.notify( 235 tp, l10n, 236 u"%s at line %d, column %d for %s" % 237 (msg, line, col, refent.key) 238 ) 239 pass 240 241 if merge_file is not None: 242 self.merge( 243 ref_entities, ref_file, 244 l10n, merge_file, missings, skips, l10n_ctx, 245 p.capabilities, p.encoding) 246 247 stats = { 248 'missing': missing, 249 'missing_w': missing_w, 250 'report': report, 251 'obsolete': obsolete, 252 'changed': changed, 253 'changed_w': changed_w, 254 'unchanged': unchanged, 255 'unchanged_w': unchanged_w, 256 'keys': keys, 257 } 258 self.observers.updateStats(l10n, stats) 259 pass 260 261 def add(self, orig, missing, merge_file): 262 ''' Add missing localized file.''' 263 f = orig 264 try: 265 p = parser.getParser(f.file) 266 except UserWarning: 267 p = None 268 269 # if we don't support this file, assume CAN_COPY to mimick 270 # l10n dir as closely as possible 271 caps = p.capabilities if p else parser.CAN_COPY 272 if (caps & (parser.CAN_COPY | parser.CAN_MERGE)): 273 # even if we can merge, pretend we can only copy 274 self.merge( 275 KeyedTuple([]), orig, missing, merge_file, 276 ['trigger copy'], [], None, parser.CAN_COPY, None 277 ) 278 279 if self.observers.notify('missingFile', missing, None) == "ignore": 280 # filter said that we don't need this file, don't count it 281 return 282 283 if p is None: 284 # We don't have a parser, cannot count missing strings 285 return 286 287 try: 288 p.readFile(f) 289 entities = p.parse() 290 except Exception as ex: 291 self.observers.notify('error', f, str(ex)) 292 return 293 # strip parse errors 294 entities = [e for e in entities if not isinstance(e, parser.Junk)] 295 self.observers.updateStats(missing, {'missing': len(entities)}) 296 missing_w = 0 297 for e in entities: 298 missing_w += e.count_words() 299 self.observers.updateStats(missing, {'missing_w': missing_w}) 300 301 def doUnchanged(self, entity): 302 # overload this if needed 303 pass 304 305 def doChanged(self, file, ref_entity, l10n_entity): 306 # overload this if needed 307 pass 308