1# This Source Code Form is subject to the terms of the Mozilla Public 2# License, v. 2.0. If a copy of the MPL was not distributed with this 3# file, You can obtain one at http://mozilla.org/MPL/2.0/. 4 5'Merge resources across channels.' 6 7from collections import OrderedDict, defaultdict 8from codecs import encode 9 10 11from compare_locales import parser as cl 12from compare_locales.compare import AddRemove 13 14 15class MergeNotSupportedError(ValueError): 16 pass 17 18 19def merge_channels(name, *resources): 20 try: 21 parser = cl.getParser(name) 22 except UserWarning: 23 raise MergeNotSupportedError( 24 'Unsupported file format ({}).'.format(name)) 25 26 # A map of comments to the keys of entities they belong to. 27 comments = {} 28 29 def parse_resource(resource): 30 # The counter dict keeps track of number of identical comments. 31 counter = defaultdict(int) 32 parser.readContents(resource) 33 pairs = [get_key_value(entity, counter) for entity in parser.walk()] 34 return OrderedDict(pairs) 35 36 def get_key_value(entity, counter): 37 if isinstance(entity, cl.Comment): 38 counter[entity.val] += 1 39 # Use the (value, index) tuple as the key. AddRemove will 40 # de-deplicate identical comments at the same index. 41 return ((entity.val, counter[entity.val]), entity) 42 43 if isinstance(entity, cl.Whitespace): 44 # Use the Whitespace instance as the key so that it's always 45 # unique. Adjecent whitespace will be folded into the longer one in 46 # prune. 47 return (entity, entity) 48 49 # When comments change, AddRemove gives us one 'add' and one 'delete' 50 # (because a comment's key is its content). In merge_two we'll try to 51 # de-duplicate comments by looking at the entity they belong to. Set 52 # up the back-reference from the comment to its entity here. 53 if isinstance(entity, cl.Entity) and entity.pre_comment: 54 comments[entity.pre_comment] = entity.key 55 56 return (entity.key, entity) 57 58 entities = reduce( 59 lambda x, y: merge_two(comments, x, y), 60 map(parse_resource, resources)) 61 62 return encode(serialize_legacy_resource(entities), parser.encoding) 63 64 65def merge_two(comments, newer, older): 66 diff = AddRemove() 67 diff.set_left(newer.keys()) 68 diff.set_right(older.keys()) 69 70 def get_entity(key): 71 entity = newer.get(key, None) 72 73 # Always prefer the newer version. 74 if entity is not None: 75 return entity 76 77 entity = older.get(key) 78 79 # If it's an old comment attached to an entity, try to find that 80 # entity in newer and return None to use its comment instead in prune. 81 if isinstance(entity, cl.Comment) and entity in comments: 82 next_entity = newer.get(comments[entity], None) 83 if next_entity is not None and next_entity.pre_comment: 84 # We'll prune this before returning the merged result. 85 return None 86 87 return entity 88 89 # Create a flat sequence of all entities in order reported by AddRemove. 90 contents = [(key, get_entity(key)) for _, key in diff] 91 92 def prune(acc, cur): 93 _, entity = cur 94 if entity is None: 95 # Prune Nones which stand for duplicated comments. 96 return acc 97 98 if len(acc) and isinstance(entity, cl.Whitespace): 99 _, prev_entity = acc[-1] 100 101 if isinstance(prev_entity, cl.Whitespace): 102 # Prefer the longer whitespace. 103 if len(entity.all) > len(prev_entity.all): 104 acc[-1] = (entity, entity) 105 return acc 106 107 acc.append(cur) 108 return acc 109 110 pruned = reduce(prune, contents, []) 111 return OrderedDict(pruned) 112 113 114def serialize_legacy_resource(entities): 115 return "".join((entity.all for entity in entities.values())) 116