1# coding: utf-8
2
3# Copyright (c) Jupyter Development Team.
4# Distributed under the terms of the Modified BSD License.
5
6import itertools
7import copy
8
9from .diff_format import DiffOp, DiffEntry, op_addrange, op_removerange
10from .log import NBDiffFormatError
11
12
13_addops = (DiffOp.ADD, DiffOp.ADDRANGE)
14
15
16if hasattr(itertools, "accumulate"):
17    _accum = itertools.accumulate
18else:
19    def _accum(seq):
20        total = 0
21        for x in seq:
22            total += x
23            yield total
24
25
26def offset_op(e, n):
27    """Recreate sequence diff entry with offset added to key."""
28    e = DiffEntry(e)
29    e.key += n
30    return e
31
32
33def count_consumed_symbols(e):
34    """Count how many symbols are consumed from each sequence by a single sequence diff entry."""
35    op = e.op
36    if op == DiffOp.ADDRANGE:
37        return (0, len(e.valuelist))
38    elif op == DiffOp.REMOVERANGE:
39        return (e.length, 0)
40    elif op == DiffOp.PATCH:
41        return (1, 1)
42    else:
43        raise NBDiffFormatError("Invalid op '{}'".format(op))
44
45
46def source_as_string(source):
47    """Return source as a single string, joined as lines if it's a list."""
48    if isinstance(source, list):
49        source = "\n".join(line.strip("\n") for line in source)
50    if not isinstance(source, str):
51        raise TypeError("Invalid argument type. Should be string or sequence of strings."
52                        "Got %r" % source)
53    return source
54
55
56def _overlaps(existing, new):
57    """Check whether existing diff op shares a key with the new diffop, and if
58    they also have the same op type.
59    """
60    if not existing:
61        return False
62    existing = existing[-1]  # Only need to check last op!
63    if existing.op == new.op:
64        if existing.key == new.key:
65            # Found a match, combine ops
66            return True
67        elif (existing.op == DiffOp.REMOVERANGE and
68              existing.key + existing.length >= new.key):
69            # Overlapping deletes
70            # Above check is open ended to allow sanity check here:
71            if existing.key + existing.length != new.key:
72                raise RuntimeError('Unexpected diff keys/lengths')
73            return True
74    elif (existing.op in _addops and
75          new.op in _addops and
76          existing.key == new.key):
77        # Addrange and single add can both point to same key
78        return True
79    return False
80
81
82def _combine_ops(existing, new):
83    """Combines two ops into a new one that does the same
84    """
85    if new.op in _addops:
86        if existing.op == DiffOp.ADD:
87            # Convert to range for compatibility
88            d = op_addrange(existing.key, [existing.value])
89        else:
90            d = copy.deepcopy(existing)
91        if new.op == DiffOp.ADDRANGE:
92            d.valuelist += new.valuelist
93        else:
94            if isinstance(d.valuelist, str):
95                d.valuelist += new.value
96            else:
97                d.valuelist.append(new.value)
98        return d
99    elif new.op == DiffOp.REMOVERANGE:
100        assert existing.op == DiffOp.REMOVERANGE, "Unexpect diff op. Invalid use of _combine_ops"
101        return op_removerange(existing.key, existing.length + new.length)
102
103
104def flatten_list_of_string_diff(a, linebased_diff):
105    """Translates a diff of strings split by str.splitlines(True) to a diff of
106    the joined multiline string.
107    """
108    if isinstance(a, str):
109        a = a.splitlines(True)
110
111    line_to_char = [0] + list(_accum(len(ia) for ia in a))
112    charbased_diff = []
113    for e in linebased_diff:
114        op = e.op
115        line_offset = line_to_char[e.key]
116        if op == DiffOp.PATCH:
117            # For patches, each entry applies to chars within a line,
118            # and will have keys (=char indices) relative to line start,
119            # so we simply need to offset each key with line offset
120            for p in e.diff:
121                d = copy.deepcopy(p)
122                d.key += line_offset
123                charbased_diff.append(d)
124        else:
125            # Other ops simply have keys which refer to lines
126            if op == DiffOp.ADDRANGE:
127                d = op_addrange(line_offset, "".join(e.valuelist))
128            elif op == DiffOp.REMOVERANGE:
129                d = op_removerange(
130                    line_offset, line_to_char[e.key + e.length] - line_offset)
131            else:
132                # Other ops simply need to adjust key as add/replace's value
133                # will already be a string
134                d = copy.deepcopy(e)
135                d.key = line_offset
136            charbased_diff.append(d)
137
138    # Combine overlapping diffs
139    combined = []
140    for d in charbased_diff:
141        # If it overlaps with an existing op, combine them to one
142        if _overlaps(combined, d):
143            combined[-1] = _combine_ops(combined[-1], d)
144        else:
145            combined.append(d)
146
147    combined.sort(key=lambda x: x.key)
148    return combined
149
150
151def to_clean_dicts(di):
152    "Recursively convert dict-like objects to straight python dicts."
153    if isinstance(di, dict):
154        return {k: to_clean_dicts(v) for k, v in di.items()}
155    elif isinstance(di, list):
156        return [to_clean_dicts(v) for v in di]
157    else:
158        return di
159
160
161def to_diffentry_dicts(di):  # TODO: Better name, validate_diff? as_diff?
162    "Recursively convert dict objects to DiffEntry objects with attribute access."
163    if isinstance(di, dict):
164        return DiffEntry(**{k: to_diffentry_dicts(v) for k, v in di.items()})
165    elif isinstance(di, list):
166        return [to_diffentry_dicts(v) for v in di]
167    else:
168        return di
169
170def as_dict_based_diff(di):
171    """Converting to dict-based diff format for dicts for convenience.
172
173    NB! Only one level, not recursive.
174
175    This step will be unnecessary if we change the diff format to work this way always.
176    """
177    return {e.key: e for e in di}
178
179
180def revert_as_dict_based_diff(di):
181    "Reverts as_dict_based_diff."
182    return [di[k] for k in sorted(di)]
183
184
185def to_json_patch(d, path=""):
186    """Convert nbdime diff object into the RFC6902 JSON Patch format.
187
188    This is untested and will need some details worked out.
189    """
190    print("Warning: to_json_patch is not thouroughly tested.")
191    jp = []
192    offset = 0
193    for e in d:
194        op = e.op
195        if op == DiffOp.ADD:
196            assert isinstance(e.key, str), "'add' diff op needs string key"
197            p = "/".join([path, e.key])
198            jp.append({"op": "add", "path": p, "value": e.value})
199        elif op == DiffOp.REPLACE:
200            assert isinstance(e.key, str), "'replace' diff op needs string key"
201            p = "/".join([path, e.key])
202            jp.append({"op": "replace", "path": p, "value": e.value})
203        elif op == DiffOp.REMOVE:
204            assert isinstance(e.key, str), "'remove' diff op needs string key"
205            p = "/".join([path, e.key])
206            jp.append({"op": "remove", "path": p})
207        elif op == DiffOp.ADDRANGE:
208            # JSONPatch only has single value add, no addrange,
209            # repeat addition after increasing index instead
210            assert isinstance(e.key, int), "'addrange' diff op needs integer key"
211            for value in e.valuelist:
212                p = "/".join([path, str(e.key + offset)])
213                jp.append({"op": "add", "path": p, "value": value})
214                offset += 1
215        elif op == DiffOp.REMOVERANGE:
216            assert isinstance(e.key, int), "'removerange' diff op needs integer key"
217            # JSONPatch only has single value remove, no removerange,
218            # repeat removal at same index instead
219            p = "/".join((path, str(e.key + offset)))
220            for _ in range(e.length):
221                jp.append({"op": "remove", "path": p})
222                offset -= 1
223        elif op == DiffOp.PATCH:
224            # JSONPatch has no recursion, recurse here to flatten diff
225            key = e.key
226            if isinstance(key, int):
227                key += offset
228            p = "/".join([path, str(key)])
229            jp.extend(to_json_patch(e.diff, p))
230    return jp
231