1from __future__ import print_function 2from builtins import object 3import binascii 4from Codeville.bencode import bdecode, bencode 5from Codeville.client_helpers import create_handle, gen_diff 6from Codeville.DFS import DFS 7from Codeville.history import sync_history, write_changeset 8from Codeville.history import roothandle, rootnode 9from Codeville.history import read_diff, write_diff, write_index 10from Codeville.history import handle_contents_at_point 11from Codeville.history import handle_name_at_point 12from Codeville.history import HistoryError 13from Codeville.old.history import handle_contents_at_point as old_handle_contents_at_point 14from Codeville.old.history import handle_name_at_point as old_handle_name_at_point 15import copy 16import sha 17from sys import stdout 18import zlib 19 20class UpgradeRepository(object): 21 def __init__(self, old_repo, new_repo, txn): 22 self.point_map = {} 23 self.handle_map = {} 24 self.all_old_handles = {} 25 26 self.old_repo = old_repo 27 self.new_repo = new_repo 28 self.txn = txn 29 return 30 31 def sort_history(self, handle_list): 32 history_dfs = DFS(self._history_deps, [self.old_repo]) 33 for point in handle_list: 34 history_dfs.search(point) 35 return history_dfs.result() 36 37 def _history_deps(node, args): 38 co = args[0] 39 40 cset = bdecode(co.lcrepo.get(node)) 41 cset['precursors'].reverse() 42 43 return cset['precursors'] 44 45 _history_deps = staticmethod(_history_deps) 46 47 def sort_names(self, handles): 48 name_dfs = DFS(self._name_deps, [handles]) 49 for old_handle in list(handles.keys()): 50 name_dfs.search(old_handle) 51 return name_dfs.result() 52 53 def _name_deps(node, args): 54 handles = args[0] 55 56 if node in handles and 'parent' in handles[node]: 57 parent = handles[node]['parent'] 58 if parent in handles and 'name' in handles[parent]: 59 return [parent] 60 61 return [] 62 63 _name_deps = staticmethod(_name_deps) 64 65 def clean_merges(self, UR, dagdb, point): 66 clean_merges = {} 67 handles = [] 68 for handle in list(UR.all_old_handles.keys()): 69 if handle + point not in dagdb: 70 continue 71 72 hinfo = bdecode(dagdb.get(handle + point)) 73 if 'handle' in hinfo: 74 continue 75 76 if len(hinfo['precursors']) <= 1: 77 continue 78 79 clean_merges[handle] = 1 80 handles.append(handle) 81 82 return clean_merges, handles 83 84 85def upgrade(old_repo, new_repo, changes, txn): 86 UR = UpgradeRepository(old_repo, new_repo, txn) 87 88 for old_handle in list(old_repo.staticdb.keys()): 89 hinfo = bdecode(old_repo.staticdb.get(old_handle)) 90 if hinfo['type'] == 'file': 91 UR.all_old_handles[old_handle] = hinfo 92 93 # sort the history 94 ordering = UR.sort_history(changes) 95 96 # sort again for better dag construction 97 ordering.reverse() 98 ordering = UR.sort_history(ordering) 99 100 assert rootnode == ordering[0] 101 102 print("%d changesets to convert" % (len(ordering), )) 103 104 for point in ordering: 105 new_point = convert_cset(UR, point) 106 107 stdout.write('.') 108 stdout.flush() 109 110 return UR 111 112def convert_cset(UR, point): 113 indices = {} 114 115 old_cset = bdecode(UR.old_repo.lcrepo.get(point)) 116 117 new_cset = {} 118 new_cset['precursors'] = [UR.point_map[pre] for pre in old_cset['precursors']] 119 120 if 'time' in old_cset: 121 new_cset['time'] = old_cset['time'] 122 123 if 'user' in old_cset: 124 new_cset['user'] = old_cset['user'] 125 126 # some heuristics for comments and whether this was a server change 127 clean_merge = True 128 force_new_cset = False 129 130 if 'comment' in old_cset: 131 clean_merge = False 132 new_cset['comment'] = old_cset['comment'].rstrip() 133 if len(new_cset['comment']): 134 new_cset['comment'] = new_cset['comment'] + '\n' 135 136 elif point == rootnode: 137 pass 138 139 elif old_cset['handles'] != {} or len(old_cset['precursors']) != 2: 140 clean_merge = False 141 new_cset['comment'] = '--- comment inserted by cdvupgrade ---\n' 142 143 # sort the handles 144 handle_list = UR.sort_names(old_cset['handles']) 145 146 # find implicit clean content merges 147 clean_merges, hl = UR.clean_merges(UR, UR.old_repo.contents.dagdb, point) 148 handle_list.extend(hl) 149 150 # find implicit clean name merges 151 clean_nmerges, hl = UR.clean_merges(UR, UR.old_repo.names.dagdb, point) 152 handle_list.extend(hl) 153 154 new_cset['handles'] = handles = {} 155 for old_handle in handle_list: 156 old_hinfo = None 157 try: 158 old_hinfo = old_cset['handles'][old_handle] 159 except KeyError: 160 old_hinfo = {} 161 162 # not much has changed 163 new_hinfo = copy.copy(old_hinfo) 164 165 new_handle = None 166 if old_handle in UR.handle_map: 167 new_handle = UR.handle_map[old_handle] 168 169 # make name changes explicit 170 if old_handle in clean_nmerges: 171 name = old_handle_name_at_point(UR.old_repo, old_handle, point, None) 172 new_hinfo['parent'] = name['parent'] 173 new_hinfo['name'] = name['name'] 174 175 # fixup the parent pointers 176 if 'parent' in old_hinfo: 177 new_hinfo['parent'] = UR.handle_map[old_hinfo['parent']] 178 179 if 'hash' in old_hinfo or old_handle in clean_merges: 180 # figure out what the file is supposed to look like now 181 lines = old_handle_contents_at_point(UR.old_repo, old_handle, point, None)['lines'] 182 183 # if the file is being added, there are no precursors 184 precursors = [] 185 if new_handle is not None and 'add' not in old_hinfo: 186 precursors = new_cset['precursors'] 187 188 # generate the diff against the new repo 189 dinfo = gen_diff(UR.new_repo, new_handle, precursors, lines, UR.txn) 190 if 'add' in old_hinfo: 191 dinfo['add'] = 1 192 assert dinfo['matches'] == [] 193 194 if dinfo is not None: 195 diff = bencode(dinfo) 196 new_hinfo['hash'] = sha.new(diff).digest() 197 198 # if this used to be a clean merge, we have to replace it 199 if old_handle not in old_cset or 'hash' not in old_cset[old_handle]: 200 force_new_cset = True 201 202 elif 'hash' in new_hinfo: 203 del new_hinfo['hash'] 204 205 # sanity check 206 if new_handle is None: 207 assert 'add' in old_hinfo 208 assert old_hinfo['add']['type'] == 'file' 209 210 # if the file is new, we have to create the handle before writing 211 # the diff 212 if 'add' in old_hinfo: 213 nhandle = create_handle(new_cset['precursors'], new_hinfo) 214 assert new_handle is None or new_handle == nhandle 215 new_handle = nhandle 216 UR.handle_map[old_handle] = new_handle 217 218 # write out the new diff 219 if 'hash' in new_hinfo: 220 zdiff = zlib.compress(diff, 6) 221 indices[new_handle] = write_diff(UR.new_repo, new_handle, zdiff, UR.txn) 222 223 elif 'add' in old_hinfo: 224 assert old_hinfo['add']['type'] == 'dir' 225 226 nhandle = create_handle(new_cset['precursors'], new_hinfo) 227 assert new_handle is None or new_handle == nhandle 228 new_handle = nhandle 229 UR.handle_map[old_handle] = new_handle 230 231 if new_hinfo != {}: 232 handles[new_handle] = new_hinfo 233 234 # if it used to be a clean merge, preserve the line of clean merge heads 235 index_point = None 236 if clean_merge and force_new_cset: 237 forced_cset = new_cset 238 239 forced_cset['comment'] = '--- change created by cdvupgrade ---\n' 240 241 bforced_cset = bencode(forced_cset) 242 forced_point = sha.new(bforced_cset).digest() 243 UR.new_repo.lcrepo.put(forced_point, bforced_cset, txn=UR.txn) 244 245 index_point = forced_point 246 247 new_cset = {'precursors': [forced_cset['precursors'][0], forced_point], 248 'user': forced_cset['user'], 249 'time': forced_cset['time'], 250 'handles': {}} 251 252 # calculate the new point name and write it out 253 bnew_cset = bencode(new_cset) 254 new_point = sha.new(bnew_cset).digest() 255 UR.new_repo.lcrepo.put(new_point, bnew_cset, txn=UR.txn) 256 257 UR.point_map[point] = new_point 258 259 if index_point is None: 260 index_point = new_point 261 262 # now that we know the new point name, write out the indices 263 for new_handle, index in list(indices.items()): 264 write_index(UR.new_repo, index_point, new_handle, index, UR.txn) 265 266 # diff generation depends on history syncing 267 named, modified = sync_history(UR.new_repo, new_point, UR.txn) 268 269 for new_handle in modified: 270 handle_contents_at_point(UR.new_repo, new_handle, new_point, UR.txn) 271 272 return new_point 273