1# coding=utf8 2"""Migration Transforms. 3 4Transforms are AST nodes which describe how legacy translations should be 5migrated. They are created inert and only return the migrated AST nodes when 6they are evaluated by a MigrationContext. 7 8All Transforms evaluate to Fluent Patterns. This makes them suitable for 9defining migrations of values of message, attributes and variants. The special 10CONCAT Transform is capable of joining multiple Patterns returned by evaluating 11other Transforms into a single Pattern. It can also concatenate Pattern 12elements: TextElements and Placeables. 13 14The COPY, REPLACE and PLURALS Transforms inherit from Source which is a special 15AST Node defining the location (the file path and the id) of the legacy 16translation. During the migration, the current MigrationContext scans the 17migration spec for Source nodes and extracts the information about all legacy 18translations being migrated. For instance, 19 20 COPY('file.dtd', 'hello') 21 22is equivalent to: 23 24 FTL.Pattern([ 25 Source('file.dtd', 'hello') 26 ]) 27 28Sometimes it's useful to work with text rather than (path, key) source 29definitions. This is the case when the migrated translation requires some 30hardcoded text, e.g. <a> and </a> when multiple translations become a single 31one with a DOM overlay. In such cases it's best to use FTL.TextElements: 32 33 FTL.Message( 34 id=FTL.Identifier('update-failed'), 35 value=CONCAT( 36 COPY('aboutDialog.dtd', 'update.failed.start'), 37 FTL.TextElement('<a>'), 38 COPY('aboutDialog.dtd', 'update.failed.linkText'), 39 FTL.TextElement('</a>'), 40 COPY('aboutDialog.dtd', 'update.failed.end'), 41 ) 42 ) 43 44The REPLACE_IN_TEXT Transform also takes TextElements as input, making it 45possible to pass it as the foreach function of the PLURALS Transform. In the 46example below, each slice of the plural string is converted into a 47TextElement by PLURALS and then run through the REPLACE_IN_TEXT transform. 48 49 FTL.Message( 50 FTL.Identifier('delete-all'), 51 value=PLURALS( 52 'aboutDownloads.dtd', 53 'deleteAll', 54 VARIABLE_REFERENCE('num'), 55 lambda text: REPLACE_IN_TEXT( 56 text, 57 { 58 '#1': VARIABLE_REFERENCE('num') 59 } 60 ) 61 ) 62 ) 63""" 64 65from __future__ import unicode_literals 66from __future__ import absolute_import 67import re 68 69from fluent.syntax import ast as FTL 70from fluent.syntax.visitor import Transformer 71from .errors import NotSupportedError 72 73 74def chain_elements(elements): 75 '''Flatten a list of FTL nodes into an iterator over PatternElements.''' 76 for element in elements: 77 if isinstance(element, FTL.Pattern): 78 # PY3 yield from element.elements 79 for child in element.elements: 80 yield child 81 elif isinstance(element, FTL.PatternElement): 82 yield element 83 elif isinstance(element, FTL.Expression): 84 yield FTL.Placeable(element) 85 else: 86 raise RuntimeError( 87 'Expected Pattern, PatternElement or Expression') 88 89 90re_leading_ws = re.compile( 91 r'\A(?:(?P<whitespace> +)(?P<text>.*?)|(?P<block_text>\n.*?))\Z', 92 re.S, 93) 94re_trailing_ws = re.compile( 95 r'\A(?:(?P<text>.*?)(?P<whitespace> +)|(?P<block_text>.*\n))\Z', 96 re.S 97) 98 99 100def extract_whitespace(regex, element): 101 '''Extract leading or trailing whitespace from a TextElement. 102 103 Return a tuple of (Placeable, TextElement) in which the Placeable 104 encodes the extracted whitespace as a StringLiteral and the 105 TextElement has the same amount of whitespace removed. The 106 Placeable with the extracted whitespace is always returned first. 107 If the element starts or ends with a newline, add an empty 108 StringLiteral. 109 ''' 110 match = re.search(regex, element.value) 111 if match: 112 # If white-space is None, we're a newline. Add an 113 # empty { "" } 114 whitespace = match.group('whitespace') or '' 115 placeable = FTL.Placeable(FTL.StringLiteral(whitespace)) 116 if whitespace == element.value: 117 return placeable, None 118 else: 119 # Either text or block_text matched the rest. 120 text = match.group('text') or match.group('block_text') 121 return placeable, FTL.TextElement(text) 122 else: 123 return None, element 124 125 126class Transform(FTL.BaseNode): 127 def __call__(self, ctx): 128 raise NotImplementedError 129 130 @staticmethod 131 def pattern_of(*elements): 132 normalized = [] 133 134 # Normalize text content: convert text content to TextElements, join 135 # adjacent text and prune empty. Text content is either existing 136 # TextElements or whitespace-only StringLiterals. This may result in 137 # leading and trailing whitespace being put back into TextElements if 138 # the new Pattern is built from existing Patterns (CONCAT(COPY...)). 139 # The leading and trailing whitespace of the new Pattern will be 140 # extracted later into new StringLiterals. 141 for element in chain_elements(elements): 142 if isinstance(element, FTL.TextElement): 143 text_content = element.value 144 elif isinstance(element, FTL.Placeable) \ 145 and isinstance(element.expression, FTL.StringLiteral) \ 146 and re.match(r'^ *$', element.expression.value): 147 text_content = element.expression.value 148 else: 149 # The element does not contain text content which should be 150 # normalized. It may be a number, a reference, or 151 # a StringLiteral which should be preserved in the Pattern. 152 normalized.append(element) 153 continue 154 155 previous = normalized[-1] if len(normalized) else None 156 if isinstance(previous, FTL.TextElement): 157 # Join adjacent TextElements. 158 previous.value += text_content 159 elif len(text_content) > 0: 160 # Normalize non-empty text to a TextElement. 161 normalized.append(FTL.TextElement(text_content)) 162 else: 163 # Prune empty text. 164 pass 165 166 # Store empty values explicitly as {""}. 167 if len(normalized) == 0: 168 empty = FTL.Placeable(FTL.StringLiteral('')) 169 return FTL.Pattern([empty]) 170 171 # Extract explicit leading whitespace into a StringLiteral. 172 if isinstance(normalized[0], FTL.TextElement): 173 ws, text = extract_whitespace(re_leading_ws, normalized[0]) 174 normalized[:1] = [ws, text] 175 176 # Extract explicit trailing whitespace into a StringLiteral. 177 if isinstance(normalized[-1], FTL.TextElement): 178 ws, text = extract_whitespace(re_trailing_ws, normalized[-1]) 179 normalized[-1:] = [text, ws] 180 181 return FTL.Pattern([ 182 element 183 for element in normalized 184 if element is not None 185 ]) 186 187 188class Source(Transform): 189 """Base class for Transforms that get translations from source files. 190 191 The contract is that the first argument is the source path, and the 192 second is a key representing legacy string IDs, or Fluent id.attr. 193 """ 194 def __init__(self, path, key): 195 self.path = path 196 self.key = key 197 198 199class FluentSource(Source): 200 """Declare a Fluent source translation to be copied over. 201 202 When evaluated, it clones the Pattern of the parsed source. 203 """ 204 def __init__(self, path, key): 205 if not path.endswith('.ftl'): 206 raise NotSupportedError( 207 'Please use COPY to migrate from legacy files ' 208 '({})'.format(path) 209 ) 210 if key[0] == '-' and '.' in key: 211 raise NotSupportedError( 212 'Cannot migrate from Term Attributes, as they are' 213 'locale-dependent ({})'.format(path) 214 ) 215 super(FluentSource, self).__init__(path, key) 216 217 def __call__(self, ctx): 218 pattern = ctx.get_fluent_source_pattern(self.path, self.key) 219 return pattern.clone() 220 221 222class COPY_PATTERN(FluentSource): 223 """Create a Pattern with the translation value from the given source. 224 225 The given key can be a Message ID, Message ID.attribute_name, or 226 Term ID. Accessing Term attributes is not supported, as they're internal 227 to the localization. 228 """ 229 pass 230 231 232class TransformPattern(FluentSource, Transformer): 233 """Base class for modifying a Fluent pattern as part of a migration. 234 235 Implement visit_* methods of the Transformer pattern to do the 236 actual modifications. 237 """ 238 def __call__(self, ctx): 239 pattern = super(TransformPattern, self).__call__(ctx) 240 return self.visit(pattern) 241 242 def visit_Pattern(self, node): 243 # Make sure we're creating valid Patterns after restructuring 244 # transforms. 245 node = self.generic_visit(node) 246 pattern = Transform.pattern_of(*node.elements) 247 return pattern 248 249 def visit_Placeable(self, node): 250 # Ensure we have a Placeable with an expression still. 251 # Transforms could have replaced the expression with 252 # a Pattern or PatternElement, in which case we 253 # just pass that through. 254 # Patterns then get flattened by visit_Pattern. 255 node = self.generic_visit(node) 256 if isinstance(node.expression, (FTL.Pattern, FTL.PatternElement)): 257 return node.expression 258 return node 259 260 261class LegacySource(Source): 262 """Declare the source translation to be migrated with other transforms. 263 264 When evaluated, `Source` returns a TextElement with the content from the 265 source translation. Escaped characters are unescaped by the 266 compare-locales parser according to the file format: 267 268 - in properties files: \\uXXXX, 269 - in DTD files: known named, decimal, and hexadecimal HTML entities. 270 271 Consult the following files for the list of known named HTML entities: 272 273 https://github.com/python/cpython/blob/2.7/Lib/htmlentitydefs.py 274 https://github.com/python/cpython/blob/3.6/Lib/html/entities.py 275 276 By default, leading and trailing whitespace on each line as well as 277 leading and trailing empty lines will be stripped from the source 278 translation's content. Set `trim=False` to disable this behavior. 279 """ 280 281 def __init__(self, path, key, trim=None): 282 if path.endswith('.ftl'): 283 raise NotSupportedError( 284 'Please use COPY_PATTERN to migrate from Fluent files ' 285 '({})'.format(path)) 286 287 super(LegacySource, self).__init__(path, key) 288 self.trim = trim 289 290 def get_text(self, ctx): 291 return ctx.get_legacy_source(self.path, self.key) 292 293 @staticmethod 294 def trim_text(text): 295 # strip leading white-space from each line 296 text = re.sub('^[ \t]+', '', text, flags=re.M) 297 # strip trailing white-space from each line 298 text = re.sub('[ \t]+$', '', text, flags=re.M) 299 # strip leading and trailing empty lines 300 text = text.strip('\r\n') 301 return text 302 303 def __call__(self, ctx): 304 text = self.get_text(ctx) 305 if self.trim is not False: 306 text = self.trim_text(text) 307 return FTL.TextElement(text) 308 309 310class COPY(LegacySource): 311 """Create a Pattern with the translation value from the given source.""" 312 313 def __call__(self, ctx): 314 element = super(COPY, self).__call__(ctx) 315 return Transform.pattern_of(element) 316 317 318PRINTF = re.compile( 319 r'%(?P<good>%|' 320 r'(?:(?P<number>[1-9][0-9]*)\$)?' 321 r'(?P<width>\*|[0-9]+)?' 322 r'(?P<prec>\.(?:\*|[0-9]+)?)?' 323 r'(?P<spec>[duxXosScpfg]))' 324) 325 326 327def number(): 328 i = 1 329 while True: 330 yield i 331 i += 1 332 333 334def normalize_printf(text): 335 """Normalize printf arguments so that they're all numbered. 336 Gecko forbids mixing unnumbered and numbered ones, so 337 we just need to convert unnumbered to numbered ones. 338 Also remove ones that have zero width, as they're intended 339 to be removed from the output by the localizer. 340 """ 341 next_number = number() 342 343 def normalized(match): 344 if match.group('good') == '%': 345 return '%' 346 hidden = match.group('width') == '0' 347 if match.group('number'): 348 return '' if hidden else match.group() 349 num = next(next_number) 350 return '' if hidden else '%{}${}'.format(num, match.group('spec')) 351 352 return PRINTF.sub(normalized, text) 353 354 355class REPLACE_IN_TEXT(Transform): 356 """Create a Pattern from a TextElement and replace legacy placeables. 357 358 The original placeables are defined as keys on the `replacements` dict. 359 For each key the value must be defined as a FTL Pattern, Placeable, 360 TextElement or Expression to be interpolated. 361 """ 362 363 def __init__(self, element, replacements, normalize_printf=False): 364 self.element = element 365 self.replacements = replacements 366 self.normalize_printf = normalize_printf 367 368 def __call__(self, ctx): 369 # For each specified replacement, find all indices of the original 370 # placeable in the source translation. If missing, the list of indices 371 # will be empty. 372 value = self.element.value 373 if self.normalize_printf: 374 value = normalize_printf(value) 375 key_indices = { 376 key: [m.start() for m in re.finditer(re.escape(key), value)] 377 for key in self.replacements.keys() 378 } 379 380 # Build a dict of indices to replacement keys. 381 keys_indexed = {} 382 for key, indices in key_indices.items(): 383 for index in indices: 384 keys_indexed[index] = key 385 386 # Order the replacements by the position of the original placeable in 387 # the translation. 388 replacements = ( 389 (key, ctx.evaluate(self.replacements[key])) 390 for index, key 391 in sorted(keys_indexed.items(), key=lambda x: x[0]) 392 ) 393 394 # A list of PatternElements built from the legacy translation and the 395 # FTL replacements. It may contain empty or adjacent TextElements. 396 elements = [] 397 tail = value 398 399 # Convert original placeables and text into FTL Nodes. For each 400 # original placeable the translation will be partitioned around it and 401 # the text before it will be converted into an `FTL.TextElement` and 402 # the placeable will be replaced with its replacement. 403 for key, node in replacements: 404 before, key, tail = tail.partition(key) 405 elements.append(FTL.TextElement(before)) 406 elements.append(node) 407 408 # Don't forget about the tail after the loop ends. 409 elements.append(FTL.TextElement(tail)) 410 return Transform.pattern_of(*elements) 411 412 413class REPLACE(LegacySource): 414 """Create a Pattern with interpolations from given source. 415 416 Interpolations in the translation value from the given source will be 417 replaced with FTL placeables using the `REPLACE_IN_TEXT` transform. 418 """ 419 420 def __init__( 421 self, path, key, replacements, **kwargs 422 ): 423 # We default normalize_printf to False except for .properties files. 424 # We still allow the caller to override the default value. 425 normalize_printf = False 426 if 'normalize_printf' in kwargs: 427 normalize_printf = kwargs['normalize_printf'] 428 del kwargs['normalize_printf'] 429 elif path.endswith('.properties'): 430 normalize_printf = True 431 432 super(REPLACE, self).__init__(path, key, **kwargs) 433 self.replacements = replacements 434 self.normalize_printf = normalize_printf 435 436 def __call__(self, ctx): 437 element = super(REPLACE, self).__call__(ctx) 438 return REPLACE_IN_TEXT( 439 element, self.replacements, 440 normalize_printf=self.normalize_printf 441 )(ctx) 442 443 444class PLURALS(LegacySource): 445 """Create a Pattern with plurals from given source. 446 447 Build an `FTL.SelectExpression` with the supplied `selector` and variants 448 extracted from the source. The original translation should be a 449 semicolon-separated list of plural forms. Each form will be converted 450 into a TextElement and run through the `foreach` function, which should 451 return an `FTL.Node` or a `Transform`. By default, the `foreach` function 452 creates a valid Pattern from the TextElement passed into it. 453 """ 454 DEFAULT_ORDER = ('zero', 'one', 'two', 'few', 'many', 'other') 455 456 def __init__(self, path, key, selector, foreach=Transform.pattern_of, 457 **kwargs): 458 super(PLURALS, self).__init__(path, key, **kwargs) 459 self.selector = selector 460 self.foreach = foreach 461 462 def __call__(self, ctx): 463 element = super(PLURALS, self).__call__(ctx) 464 selector = ctx.evaluate(self.selector) 465 keys = ctx.plural_categories 466 forms = [ 467 FTL.TextElement(part) 468 for part in element.value.split(';') 469 ] 470 471 # The default CLDR form should be the last we have in DEFAULT_ORDER, 472 # usually `other`, but in some cases `many`. If we don't have a variant 473 # for that, we'll append one, using the, in CLDR order, last existing 474 # variant in the legacy translation. That may or may not be the last 475 # variant. 476 default_key = [ 477 key for key in reversed(self.DEFAULT_ORDER) if key in keys 478 ][0] 479 480 # Match keys to legacy forms in the order they are defined in Gecko's 481 # PluralForm.jsm. Filter out empty forms. 482 pairs = [ 483 (key, var) 484 for key, var in zip(keys, forms) 485 if var.value 486 ] 487 488 # A special case for legacy translations which don't define any 489 # plural forms. 490 if len(pairs) == 0: 491 return Transform.pattern_of() 492 493 # A special case for languages with one plural category or one legacy 494 # variant. We don't need to insert a SelectExpression for them. 495 if len(pairs) == 1: 496 _, only_form = pairs[0] 497 only_variant = ctx.evaluate(self.foreach(only_form)) 498 return Transform.pattern_of(only_variant) 499 500 # Make sure the default key is defined. If it's missing, use the last 501 # form (in CLDR order) found in the legacy translation. 502 pairs.sort(key=lambda pair: self.DEFAULT_ORDER.index(pair[0])) 503 last_key, last_form = pairs[-1] 504 if last_key != default_key: 505 pairs.append((default_key, last_form)) 506 507 def createVariant(key, form): 508 # Run the legacy plural form through `foreach` which returns an 509 # `FTL.Node` describing the transformation required for each 510 # variant. Then evaluate it to a migrated FTL node. 511 value = ctx.evaluate(self.foreach(form)) 512 return FTL.Variant( 513 key=FTL.Identifier(key), 514 value=value, 515 default=key == default_key 516 ) 517 518 select = FTL.SelectExpression( 519 selector=selector, 520 variants=[ 521 createVariant(key, form) 522 for key, form in pairs 523 ] 524 ) 525 526 return Transform.pattern_of(select) 527 528 529class CONCAT(Transform): 530 """Create a new Pattern from Patterns, PatternElements and Expressions. 531 532 When called with at least two elements, `CONCAT` disables the trimming 533 behavior of the elements which are subclasses of `LegacySource` by 534 setting `trim=False`, unless `trim` has already been set explicitly. The 535 following two `CONCAT` calls are equivalent: 536 537 CONCAT( 538 FTL.TextElement("Hello"), 539 COPY("file.properties", "hello") 540 ) 541 542 CONCAT( 543 FTL.TextElement("Hello"), 544 COPY("file.properties", "hello", trim=False) 545 ) 546 547 Set `trim=True` explicitly to force trimming: 548 549 CONCAT( 550 FTL.TextElement("Hello "), 551 COPY("file.properties", "hello", trim=True) 552 ) 553 554 When called with a single element and when the element is a subclass of 555 `LegacySource`, the trimming behavior is not changed. The following two 556 transforms are equivalent: 557 558 CONCAT(COPY("file.properties", "hello")) 559 560 COPY("file.properties", "hello") 561 """ 562 563 def __init__(self, *elements, **kwargs): 564 # We want to support both passing elements as *elements in the 565 # migration specs and as elements=[]. The latter is used by 566 # FTL.BaseNode.traverse when it recreates the traversed node using its 567 # attributes as kwargs. 568 self.elements = list(kwargs.get('elements', elements)) 569 570 # We want to make CONCAT(COPY()) equivalent to COPY() so that it's 571 # always safe (no-op) to wrap transforms in a CONCAT. This is used by 572 # the implementation of transforms_from. 573 if len(self.elements) > 1: 574 for elem in self.elements: 575 # Only change trim if it hasn't been set explicitly. 576 if isinstance(elem, LegacySource) and elem.trim is None: 577 elem.trim = False 578 579 def __call__(self, ctx): 580 return Transform.pattern_of(*self.elements) 581