codeville-0.1.18/Codeville/upgrade.py

from __future__ import print_function
from builtins import object
import binascii
from Codeville.bencode import bdecode, bencode
from Codeville.client_helpers import create_handle, gen_diff
from Codeville.DFS import DFS
from Codeville.history import sync_history, write_changeset
from Codeville.history import roothandle, rootnode
from Codeville.history import read_diff, write_diff, write_index
from Codeville.history import handle_contents_at_point
from Codeville.history import handle_name_at_point
from Codeville.history import HistoryError
from Codeville.old.history import handle_contents_at_point as old_handle_contents_at_point
from Codeville.old.history import handle_name_at_point as old_handle_name_at_point
import copy
import sha
from sys import stdout
import zlib

class UpgradeRepository(object):
    def __init__(self, old_repo, new_repo, txn):
        self.point_map       = {}
        self.handle_map      = {}
        self.all_old_handles = {}

        self.old_repo = old_repo
        self.new_repo = new_repo
        self.txn      = txn
        return

    def sort_history(self, handle_list):
        history_dfs = DFS(self._history_deps, [self.old_repo])
        for point in handle_list:
            history_dfs.search(point)
        return history_dfs.result()

    def _history_deps(node, args):
        co = args[0]

        cset = bdecode(co.lcrepo.get(node))
        cset['precursors'].reverse()

        return cset['precursors']

    _history_deps = staticmethod(_history_deps)

    def sort_names(self, handles):
        name_dfs = DFS(self._name_deps, [handles])
        for old_handle in list(handles.keys()):
            name_dfs.search(old_handle)
        return name_dfs.result()

    def _name_deps(node, args):
        handles = args[0]

        if node in handles and 'parent' in handles[node]:
            parent = handles[node]['parent']
            if parent in handles and 'name' in handles[parent]:
                return [parent]

        return []

    _name_deps = staticmethod(_name_deps)

    def clean_merges(self, UR, dagdb, point):
        clean_merges = {}
        handles      = []
        for handle in list(UR.all_old_handles.keys()):
            if handle + point not in dagdb:
                continue

            hinfo = bdecode(dagdb.get(handle + point))
            if 'handle' in hinfo:
                continue

            if len(hinfo['precursors']) <= 1:
                continue

            clean_merges[handle] = 1
            handles.append(handle)

        return clean_merges, handles


def upgrade(old_repo, new_repo, changes, txn):
    UR = UpgradeRepository(old_repo, new_repo, txn)

    for old_handle in list(old_repo.staticdb.keys()):
        hinfo = bdecode(old_repo.staticdb.get(old_handle))
        if hinfo['type'] == 'file':
            UR.all_old_handles[old_handle] = hinfo

    # sort the history
    ordering = UR.sort_history(changes)

    # sort again for better dag construction
    ordering.reverse()
    ordering = UR.sort_history(ordering)

    assert rootnode == ordering[0]

    print("%d changesets to convert" % (len(ordering), ))

    for point in ordering:
        new_point = convert_cset(UR, point)

        stdout.write('.')
        stdout.flush()

    return UR

def convert_cset(UR, point):
    indices = {}

    old_cset = bdecode(UR.old_repo.lcrepo.get(point))

    new_cset = {}
    new_cset['precursors'] = [UR.point_map[pre] for pre in old_cset['precursors']]

    if 'time' in old_cset:
        new_cset['time'] = old_cset['time']

    if 'user' in old_cset:
        new_cset['user'] = old_cset['user']

    # some heuristics for comments and whether this was a server change
    clean_merge = True
    force_new_cset = False

    if 'comment' in old_cset:
        clean_merge = False
        new_cset['comment'] = old_cset['comment'].rstrip()
        if len(new_cset['comment']):
            new_cset['comment'] = new_cset['comment'] + '\n'

    elif point == rootnode:
        pass

    elif old_cset['handles'] != {} or len(old_cset['precursors']) != 2:
        clean_merge = False
        new_cset['comment'] = '--- comment inserted by cdvupgrade ---\n'

    # sort the handles
    handle_list = UR.sort_names(old_cset['handles'])

    # find implicit clean content merges
    clean_merges, hl = UR.clean_merges(UR, UR.old_repo.contents.dagdb, point)
    handle_list.extend(hl)

    # find implicit clean name merges
    clean_nmerges, hl = UR.clean_merges(UR, UR.old_repo.names.dagdb, point)
    handle_list.extend(hl)

    new_cset['handles'] = handles = {}
    for old_handle in handle_list:
        old_hinfo = None
        try:
            old_hinfo = old_cset['handles'][old_handle]
        except KeyError:
            old_hinfo = {}

        # not much has changed
        new_hinfo = copy.copy(old_hinfo)

        new_handle = None
        if old_handle in UR.handle_map:
            new_handle = UR.handle_map[old_handle]

        # make name changes explicit
        if old_handle in clean_nmerges:
            name = old_handle_name_at_point(UR.old_repo, old_handle, point, None)
            new_hinfo['parent'] = name['parent']
            new_hinfo['name'] = name['name']

        # fixup the parent pointers
        if 'parent' in old_hinfo:
            new_hinfo['parent'] = UR.handle_map[old_hinfo['parent']]

        if 'hash' in old_hinfo or old_handle in clean_merges:
            # figure out what the file is supposed to look like now
            lines = old_handle_contents_at_point(UR.old_repo, old_handle, point, None)['lines']

            # if the file is being added, there are no precursors
            precursors = []
            if new_handle is not None and 'add' not in old_hinfo:
                precursors = new_cset['precursors']

            # generate the diff against the new repo
            dinfo = gen_diff(UR.new_repo, new_handle, precursors, lines, UR.txn)
            if 'add' in old_hinfo:
                dinfo['add'] = 1
                assert dinfo['matches'] == []

            if dinfo is not None:
                diff = bencode(dinfo)
                new_hinfo['hash'] = sha.new(diff).digest()

                # if this used to be a clean merge, we have to replace it
                if old_handle not in old_cset or 'hash' not in old_cset[old_handle]:
                    force_new_cset = True

            elif 'hash' in new_hinfo:
                del new_hinfo['hash']

            # sanity check
            if new_handle is None:
                assert 'add' in old_hinfo
                assert old_hinfo['add']['type'] == 'file'

            # if the file is new, we have to create the handle before writing
            # the diff
            if 'add' in old_hinfo:
                nhandle = create_handle(new_cset['precursors'], new_hinfo)
                assert new_handle is None or new_handle == nhandle
                new_handle = nhandle
                UR.handle_map[old_handle] = new_handle

            # write out the new diff
            if 'hash' in new_hinfo:
                zdiff = zlib.compress(diff, 6)
                indices[new_handle] = write_diff(UR.new_repo, new_handle, zdiff, UR.txn)

        elif 'add' in old_hinfo:
            assert old_hinfo['add']['type'] == 'dir'

            nhandle = create_handle(new_cset['precursors'], new_hinfo)
            assert new_handle is None or new_handle == nhandle
            new_handle = nhandle
            UR.handle_map[old_handle] = new_handle

        if new_hinfo != {}:
            handles[new_handle] = new_hinfo

    # if it used to be a clean merge, preserve the line of clean merge heads
    index_point = None
    if clean_merge and force_new_cset:
        forced_cset = new_cset

        forced_cset['comment'] = '--- change created by cdvupgrade ---\n'

        bforced_cset = bencode(forced_cset)
        forced_point = sha.new(bforced_cset).digest()
        UR.new_repo.lcrepo.put(forced_point, bforced_cset, txn=UR.txn)

        index_point = forced_point

        new_cset = {'precursors': [forced_cset['precursors'][0], forced_point],
                    'user':       forced_cset['user'],
                    'time':       forced_cset['time'],
                    'handles':    {}}

    # calculate the new point name and write it out
    bnew_cset = bencode(new_cset)
    new_point = sha.new(bnew_cset).digest()
    UR.new_repo.lcrepo.put(new_point, bnew_cset, txn=UR.txn)

    UR.point_map[point] = new_point

    if index_point is None:
        index_point = new_point

    # now that we know the new point name, write out the indices
    for new_handle, index in list(indices.items()):
        write_index(UR.new_repo, index_point, new_handle, index, UR.txn)

    # diff generation depends on history syncing
    named, modified = sync_history(UR.new_repo, new_point, UR.txn)

    for new_handle in modified:
        handle_contents_at_point(UR.new_repo, new_handle, new_point, UR.txn)

    return new_point