1from __future__ import print_function
2from builtins import object
3import binascii
4from Codeville.bencode import bdecode, bencode
5from Codeville.client_helpers import create_handle, gen_diff
6from Codeville.DFS import DFS
7from Codeville.history import sync_history, write_changeset
8from Codeville.history import roothandle, rootnode
9from Codeville.history import read_diff, write_diff, write_index
10from Codeville.history import handle_contents_at_point
11from Codeville.history import handle_name_at_point
12from Codeville.history import HistoryError
13from Codeville.old.history import handle_contents_at_point as old_handle_contents_at_point
14from Codeville.old.history import handle_name_at_point as old_handle_name_at_point
15import copy
16import sha
17from sys import stdout
18import zlib
19
20class UpgradeRepository(object):
21    def __init__(self, old_repo, new_repo, txn):
22        self.point_map       = {}
23        self.handle_map      = {}
24        self.all_old_handles = {}
25
26        self.old_repo = old_repo
27        self.new_repo = new_repo
28        self.txn      = txn
29        return
30
31    def sort_history(self, handle_list):
32        history_dfs = DFS(self._history_deps, [self.old_repo])
33        for point in handle_list:
34            history_dfs.search(point)
35        return history_dfs.result()
36
37    def _history_deps(node, args):
38        co = args[0]
39
40        cset = bdecode(co.lcrepo.get(node))
41        cset['precursors'].reverse()
42
43        return cset['precursors']
44
45    _history_deps = staticmethod(_history_deps)
46
47    def sort_names(self, handles):
48        name_dfs = DFS(self._name_deps, [handles])
49        for old_handle in list(handles.keys()):
50            name_dfs.search(old_handle)
51        return name_dfs.result()
52
53    def _name_deps(node, args):
54        handles = args[0]
55
56        if node in handles and 'parent' in handles[node]:
57            parent = handles[node]['parent']
58            if parent in handles and 'name' in handles[parent]:
59                return [parent]
60
61        return []
62
63    _name_deps = staticmethod(_name_deps)
64
65    def clean_merges(self, UR, dagdb, point):
66        clean_merges = {}
67        handles      = []
68        for handle in list(UR.all_old_handles.keys()):
69            if handle + point not in dagdb:
70                continue
71
72            hinfo = bdecode(dagdb.get(handle + point))
73            if 'handle' in hinfo:
74                continue
75
76            if len(hinfo['precursors']) <= 1:
77                continue
78
79            clean_merges[handle] = 1
80            handles.append(handle)
81
82        return clean_merges, handles
83
84
85def upgrade(old_repo, new_repo, changes, txn):
86    UR = UpgradeRepository(old_repo, new_repo, txn)
87
88    for old_handle in list(old_repo.staticdb.keys()):
89        hinfo = bdecode(old_repo.staticdb.get(old_handle))
90        if hinfo['type'] == 'file':
91            UR.all_old_handles[old_handle] = hinfo
92
93    # sort the history
94    ordering = UR.sort_history(changes)
95
96    # sort again for better dag construction
97    ordering.reverse()
98    ordering = UR.sort_history(ordering)
99
100    assert rootnode == ordering[0]
101
102    print("%d changesets to convert" % (len(ordering), ))
103
104    for point in ordering:
105        new_point = convert_cset(UR, point)
106
107        stdout.write('.')
108        stdout.flush()
109
110    return UR
111
112def convert_cset(UR, point):
113    indices = {}
114
115    old_cset = bdecode(UR.old_repo.lcrepo.get(point))
116
117    new_cset = {}
118    new_cset['precursors'] = [UR.point_map[pre] for pre in old_cset['precursors']]
119
120    if 'time' in old_cset:
121        new_cset['time'] = old_cset['time']
122
123    if 'user' in old_cset:
124        new_cset['user'] = old_cset['user']
125
126    # some heuristics for comments and whether this was a server change
127    clean_merge = True
128    force_new_cset = False
129
130    if 'comment' in old_cset:
131        clean_merge = False
132        new_cset['comment'] = old_cset['comment'].rstrip()
133        if len(new_cset['comment']):
134            new_cset['comment'] = new_cset['comment'] + '\n'
135
136    elif point == rootnode:
137        pass
138
139    elif old_cset['handles'] != {} or len(old_cset['precursors']) != 2:
140        clean_merge = False
141        new_cset['comment'] = '--- comment inserted by cdvupgrade ---\n'
142
143    # sort the handles
144    handle_list = UR.sort_names(old_cset['handles'])
145
146    # find implicit clean content merges
147    clean_merges, hl = UR.clean_merges(UR, UR.old_repo.contents.dagdb, point)
148    handle_list.extend(hl)
149
150    # find implicit clean name merges
151    clean_nmerges, hl = UR.clean_merges(UR, UR.old_repo.names.dagdb, point)
152    handle_list.extend(hl)
153
154    new_cset['handles'] = handles = {}
155    for old_handle in handle_list:
156        old_hinfo = None
157        try:
158            old_hinfo = old_cset['handles'][old_handle]
159        except KeyError:
160            old_hinfo = {}
161
162        # not much has changed
163        new_hinfo = copy.copy(old_hinfo)
164
165        new_handle = None
166        if old_handle in UR.handle_map:
167            new_handle = UR.handle_map[old_handle]
168
169        # make name changes explicit
170        if old_handle in clean_nmerges:
171            name = old_handle_name_at_point(UR.old_repo, old_handle, point, None)
172            new_hinfo['parent'] = name['parent']
173            new_hinfo['name'] = name['name']
174
175        # fixup the parent pointers
176        if 'parent' in old_hinfo:
177            new_hinfo['parent'] = UR.handle_map[old_hinfo['parent']]
178
179        if 'hash' in old_hinfo or old_handle in clean_merges:
180            # figure out what the file is supposed to look like now
181            lines = old_handle_contents_at_point(UR.old_repo, old_handle, point, None)['lines']
182
183            # if the file is being added, there are no precursors
184            precursors = []
185            if new_handle is not None and 'add' not in old_hinfo:
186                precursors = new_cset['precursors']
187
188            # generate the diff against the new repo
189            dinfo = gen_diff(UR.new_repo, new_handle, precursors, lines, UR.txn)
190            if 'add' in old_hinfo:
191                dinfo['add'] = 1
192                assert dinfo['matches'] == []
193
194            if dinfo is not None:
195                diff = bencode(dinfo)
196                new_hinfo['hash'] = sha.new(diff).digest()
197
198                # if this used to be a clean merge, we have to replace it
199                if old_handle not in old_cset or 'hash' not in old_cset[old_handle]:
200                    force_new_cset = True
201
202            elif 'hash' in new_hinfo:
203                del new_hinfo['hash']
204
205            # sanity check
206            if new_handle is None:
207                assert 'add' in old_hinfo
208                assert old_hinfo['add']['type'] == 'file'
209
210            # if the file is new, we have to create the handle before writing
211            # the diff
212            if 'add' in old_hinfo:
213                nhandle = create_handle(new_cset['precursors'], new_hinfo)
214                assert new_handle is None or new_handle == nhandle
215                new_handle = nhandle
216                UR.handle_map[old_handle] = new_handle
217
218            # write out the new diff
219            if 'hash' in new_hinfo:
220                zdiff = zlib.compress(diff, 6)
221                indices[new_handle] = write_diff(UR.new_repo, new_handle, zdiff, UR.txn)
222
223        elif 'add' in old_hinfo:
224            assert old_hinfo['add']['type'] == 'dir'
225
226            nhandle = create_handle(new_cset['precursors'], new_hinfo)
227            assert new_handle is None or new_handle == nhandle
228            new_handle = nhandle
229            UR.handle_map[old_handle] = new_handle
230
231        if new_hinfo != {}:
232            handles[new_handle] = new_hinfo
233
234    # if it used to be a clean merge, preserve the line of clean merge heads
235    index_point = None
236    if clean_merge and force_new_cset:
237        forced_cset = new_cset
238
239        forced_cset['comment'] = '--- change created by cdvupgrade ---\n'
240
241        bforced_cset = bencode(forced_cset)
242        forced_point = sha.new(bforced_cset).digest()
243        UR.new_repo.lcrepo.put(forced_point, bforced_cset, txn=UR.txn)
244
245        index_point = forced_point
246
247        new_cset = {'precursors': [forced_cset['precursors'][0], forced_point],
248                    'user':       forced_cset['user'],
249                    'time':       forced_cset['time'],
250                    'handles':    {}}
251
252    # calculate the new point name and write it out
253    bnew_cset = bencode(new_cset)
254    new_point = sha.new(bnew_cset).digest()
255    UR.new_repo.lcrepo.put(new_point, bnew_cset, txn=UR.txn)
256
257    UR.point_map[point] = new_point
258
259    if index_point is None:
260        index_point = new_point
261
262    # now that we know the new point name, write out the indices
263    for new_handle, index in list(indices.items()):
264        write_index(UR.new_repo, index_point, new_handle, index, UR.txn)
265
266    # diff generation depends on history syncing
267    named, modified = sync_history(UR.new_repo, new_point, UR.txn)
268
269    for new_handle in modified:
270        handle_contents_at_point(UR.new_repo, new_handle, new_point, UR.txn)
271
272    return new_point
273