1# This Source Code Form is subject to the terms of the Mozilla Public
2# License, v. 2.0. If a copy of the MPL was not distributed with this
3# file, You can obtain one at http://mozilla.org/MPL/2.0/.
4
5'''Merge resources across channels.
6
7Merging resources is done over a series of parsed resources, or source
8strings.
9The nomenclature is that the resources are ordered from newest to oldest.
10The generated file structure is taken from the newest file, and then the
11next-newest, etc. The values of the returned entities are taken from the
12newest to the oldest resource, too.
13
14In merge_resources, there's an option to choose the values from oldest
15to newest instead.
16'''
17
18from collections import OrderedDict, defaultdict
19from codecs import encode
20import six
21
22
23from compare_locales import parser as cl
24from compare_locales.parser.base import StickyEntry
25from compare_locales.compare.utils import AddRemove
26
27
28class MergeNotSupportedError(ValueError):
29    pass
30
31
32def merge_channels(name, resources):
33    try:
34        parser = cl.getParser(name)
35    except UserWarning:
36        raise MergeNotSupportedError(
37            'Unsupported file format ({}).'.format(name))
38
39    entities = merge_resources(parser, resources)
40    return encode(serialize_legacy_resource(entities), parser.encoding)
41
42
43def merge_resources(parser, resources, keep_newest=True):
44    '''Merge parsed or unparsed resources, returning a enumerable of Entities.
45
46    Resources are ordered from newest to oldest in the input. The structure
47    of the generated content is taken from the newest resource first, and
48    then filled by the next etc.
49    Values are also taken from the newest, unless keep_newest is False,
50    then values are taken from the oldest first.
51    '''
52
53    def parse_resource(resource):
54        # The counter dict keeps track of number of identical comments.
55        counter = defaultdict(int)
56        if isinstance(resource, bytes):
57            parser.readContents(resource)
58            resource = parser.walk()
59        pairs = [get_key_value(entity, counter) for entity in resource]
60        return OrderedDict(pairs)
61
62    def get_key_value(entity, counter):
63        if isinstance(entity, cl.Comment):
64            counter[entity.val] += 1
65            # Use the (value, index) tuple as the key. AddRemove will
66            # de-deplicate identical comments at the same index.
67            return ((entity.val, counter[entity.val]), entity)
68
69        if isinstance(entity, cl.Whitespace):
70            # Use the Whitespace instance as the key so that it's always
71            # unique. Adjecent whitespace will be folded into the longer one in
72            # prune.
73            return (entity, entity)
74
75        return (entity.key, entity)
76
77    entities = six.moves.reduce(
78        lambda x, y: merge_two(x, y, keep_newer=keep_newest),
79        map(parse_resource, resources))
80    return entities.values()
81
82
83def merge_two(newer, older, keep_newer=True):
84    '''Merge two OrderedDicts.
85
86    The order of the result dict is determined by `newer`.
87    The values in the dict are the newer ones by default, too.
88    If `keep_newer` is False, the values will be taken from the older
89    dict.
90    '''
91    diff = AddRemove()
92    diff.set_left(newer.keys())
93    diff.set_right(older.keys())
94
95    # Create a flat sequence of all entities in order reported by AddRemove.
96    get_entity = get_newer_entity if keep_newer else get_older_entity
97    contents = [(key, get_entity(newer, older, key)) for _, key in diff]
98
99    def prune(acc, cur):
100        _, entity = cur
101        if entity is None:
102            # Prune Nones which stand for duplicated comments.
103            return acc
104
105        if len(acc) and isinstance(entity, cl.Whitespace):
106            _, prev_entity = acc[-1]
107
108            if isinstance(prev_entity, cl.Whitespace):
109                # Prefer the longer whitespace.
110                if len(entity.all) > len(prev_entity.all):
111                    acc[-1] = (entity, entity)
112                return acc
113
114        acc.append(cur)
115        return acc
116
117    pruned = six.moves.reduce(prune, contents, [])
118    return OrderedDict(pruned)
119
120
121def get_newer_entity(newer, older, key):
122    entity = newer.get(key, None)
123
124    # Always prefer the newer version.
125    if entity is not None:
126        return entity
127
128    return older.get(key)
129
130
131def get_older_entity(newer, older, key):
132    entity = older.get(key, None)
133
134    # If we don't have an older version, or it's a StickyEntry,
135    # get a newer version
136    if entity is None or isinstance(entity, StickyEntry):
137        return newer.get(key)
138
139    return entity
140
141
142def serialize_legacy_resource(entities):
143    return "".join((entity.all for entity in entities))
144