1# coding: utf-8 2 3# Copyright (c) Jupyter Development Team. 4# Distributed under the terms of the Modified BSD License. 5 6import itertools 7import copy 8 9from .diff_format import DiffOp, DiffEntry, op_addrange, op_removerange 10from .log import NBDiffFormatError 11 12 13_addops = (DiffOp.ADD, DiffOp.ADDRANGE) 14 15 16if hasattr(itertools, "accumulate"): 17 _accum = itertools.accumulate 18else: 19 def _accum(seq): 20 total = 0 21 for x in seq: 22 total += x 23 yield total 24 25 26def offset_op(e, n): 27 """Recreate sequence diff entry with offset added to key.""" 28 e = DiffEntry(e) 29 e.key += n 30 return e 31 32 33def count_consumed_symbols(e): 34 """Count how many symbols are consumed from each sequence by a single sequence diff entry.""" 35 op = e.op 36 if op == DiffOp.ADDRANGE: 37 return (0, len(e.valuelist)) 38 elif op == DiffOp.REMOVERANGE: 39 return (e.length, 0) 40 elif op == DiffOp.PATCH: 41 return (1, 1) 42 else: 43 raise NBDiffFormatError("Invalid op '{}'".format(op)) 44 45 46def source_as_string(source): 47 """Return source as a single string, joined as lines if it's a list.""" 48 if isinstance(source, list): 49 source = "\n".join(line.strip("\n") for line in source) 50 if not isinstance(source, str): 51 raise TypeError("Invalid argument type. Should be string or sequence of strings." 52 "Got %r" % source) 53 return source 54 55 56def _overlaps(existing, new): 57 """Check whether existing diff op shares a key with the new diffop, and if 58 they also have the same op type. 59 """ 60 if not existing: 61 return False 62 existing = existing[-1] # Only need to check last op! 63 if existing.op == new.op: 64 if existing.key == new.key: 65 # Found a match, combine ops 66 return True 67 elif (existing.op == DiffOp.REMOVERANGE and 68 existing.key + existing.length >= new.key): 69 # Overlapping deletes 70 # Above check is open ended to allow sanity check here: 71 if existing.key + existing.length != new.key: 72 raise RuntimeError('Unexpected diff keys/lengths') 73 return True 74 elif (existing.op in _addops and 75 new.op in _addops and 76 existing.key == new.key): 77 # Addrange and single add can both point to same key 78 return True 79 return False 80 81 82def _combine_ops(existing, new): 83 """Combines two ops into a new one that does the same 84 """ 85 if new.op in _addops: 86 if existing.op == DiffOp.ADD: 87 # Convert to range for compatibility 88 d = op_addrange(existing.key, [existing.value]) 89 else: 90 d = copy.deepcopy(existing) 91 if new.op == DiffOp.ADDRANGE: 92 d.valuelist += new.valuelist 93 else: 94 if isinstance(d.valuelist, str): 95 d.valuelist += new.value 96 else: 97 d.valuelist.append(new.value) 98 return d 99 elif new.op == DiffOp.REMOVERANGE: 100 assert existing.op == DiffOp.REMOVERANGE, "Unexpect diff op. Invalid use of _combine_ops" 101 return op_removerange(existing.key, existing.length + new.length) 102 103 104def flatten_list_of_string_diff(a, linebased_diff): 105 """Translates a diff of strings split by str.splitlines(True) to a diff of 106 the joined multiline string. 107 """ 108 if isinstance(a, str): 109 a = a.splitlines(True) 110 111 line_to_char = [0] + list(_accum(len(ia) for ia in a)) 112 charbased_diff = [] 113 for e in linebased_diff: 114 op = e.op 115 line_offset = line_to_char[e.key] 116 if op == DiffOp.PATCH: 117 # For patches, each entry applies to chars within a line, 118 # and will have keys (=char indices) relative to line start, 119 # so we simply need to offset each key with line offset 120 for p in e.diff: 121 d = copy.deepcopy(p) 122 d.key += line_offset 123 charbased_diff.append(d) 124 else: 125 # Other ops simply have keys which refer to lines 126 if op == DiffOp.ADDRANGE: 127 d = op_addrange(line_offset, "".join(e.valuelist)) 128 elif op == DiffOp.REMOVERANGE: 129 d = op_removerange( 130 line_offset, line_to_char[e.key + e.length] - line_offset) 131 else: 132 # Other ops simply need to adjust key as add/replace's value 133 # will already be a string 134 d = copy.deepcopy(e) 135 d.key = line_offset 136 charbased_diff.append(d) 137 138 # Combine overlapping diffs 139 combined = [] 140 for d in charbased_diff: 141 # If it overlaps with an existing op, combine them to one 142 if _overlaps(combined, d): 143 combined[-1] = _combine_ops(combined[-1], d) 144 else: 145 combined.append(d) 146 147 combined.sort(key=lambda x: x.key) 148 return combined 149 150 151def to_clean_dicts(di): 152 "Recursively convert dict-like objects to straight python dicts." 153 if isinstance(di, dict): 154 return {k: to_clean_dicts(v) for k, v in di.items()} 155 elif isinstance(di, list): 156 return [to_clean_dicts(v) for v in di] 157 else: 158 return di 159 160 161def to_diffentry_dicts(di): # TODO: Better name, validate_diff? as_diff? 162 "Recursively convert dict objects to DiffEntry objects with attribute access." 163 if isinstance(di, dict): 164 return DiffEntry(**{k: to_diffentry_dicts(v) for k, v in di.items()}) 165 elif isinstance(di, list): 166 return [to_diffentry_dicts(v) for v in di] 167 else: 168 return di 169 170def as_dict_based_diff(di): 171 """Converting to dict-based diff format for dicts for convenience. 172 173 NB! Only one level, not recursive. 174 175 This step will be unnecessary if we change the diff format to work this way always. 176 """ 177 return {e.key: e for e in di} 178 179 180def revert_as_dict_based_diff(di): 181 "Reverts as_dict_based_diff." 182 return [di[k] for k in sorted(di)] 183 184 185def to_json_patch(d, path=""): 186 """Convert nbdime diff object into the RFC6902 JSON Patch format. 187 188 This is untested and will need some details worked out. 189 """ 190 print("Warning: to_json_patch is not thouroughly tested.") 191 jp = [] 192 offset = 0 193 for e in d: 194 op = e.op 195 if op == DiffOp.ADD: 196 assert isinstance(e.key, str), "'add' diff op needs string key" 197 p = "/".join([path, e.key]) 198 jp.append({"op": "add", "path": p, "value": e.value}) 199 elif op == DiffOp.REPLACE: 200 assert isinstance(e.key, str), "'replace' diff op needs string key" 201 p = "/".join([path, e.key]) 202 jp.append({"op": "replace", "path": p, "value": e.value}) 203 elif op == DiffOp.REMOVE: 204 assert isinstance(e.key, str), "'remove' diff op needs string key" 205 p = "/".join([path, e.key]) 206 jp.append({"op": "remove", "path": p}) 207 elif op == DiffOp.ADDRANGE: 208 # JSONPatch only has single value add, no addrange, 209 # repeat addition after increasing index instead 210 assert isinstance(e.key, int), "'addrange' diff op needs integer key" 211 for value in e.valuelist: 212 p = "/".join([path, str(e.key + offset)]) 213 jp.append({"op": "add", "path": p, "value": value}) 214 offset += 1 215 elif op == DiffOp.REMOVERANGE: 216 assert isinstance(e.key, int), "'removerange' diff op needs integer key" 217 # JSONPatch only has single value remove, no removerange, 218 # repeat removal at same index instead 219 p = "/".join((path, str(e.key + offset))) 220 for _ in range(e.length): 221 jp.append({"op": "remove", "path": p}) 222 offset -= 1 223 elif op == DiffOp.PATCH: 224 # JSONPatch has no recursion, recurse here to flatten diff 225 key = e.key 226 if isinstance(key, int): 227 key += offset 228 p = "/".join([path, str(key)]) 229 jp.extend(to_json_patch(e.diff, p)) 230 return jp 231