1# bzr.py - bzr support for the convert extension
2#
3#  Copyright 2008, 2009 Marek Kubica <marek@xivilization.net> and others
4#
5# This software may be used and distributed according to the terms of the
6# GNU General Public License version 2 or any later version.
7
8# This module is for handling Breezy imports or `brz`, but it's also compatible
9# with Bazaar or `bzr`, that was formerly known as Bazaar-NG;
10# it cannot access `bar` repositories, but they were never used very much.
11from __future__ import absolute_import
12
13import os
14
15from mercurial.i18n import _
16from mercurial import (
17    demandimport,
18    error,
19    pycompat,
20    util,
21)
22from . import common
23
24
25# these do not work with demandimport, blacklist
26demandimport.IGNORES.update(
27    [
28        b'breezy.transactions',
29        b'breezy.urlutils',
30        b'ElementPath',
31    ]
32)
33
34try:
35    # bazaar imports
36    import breezy.bzr.bzrdir
37    import breezy.errors
38    import breezy.revision
39    import breezy.revisionspec
40
41    bzrdir = breezy.bzr.bzrdir
42    errors = breezy.errors
43    revision = breezy.revision
44    revisionspec = breezy.revisionspec
45    revisionspec.RevisionSpec
46except ImportError:
47    pass
48
49supportedkinds = ('file', 'symlink')
50
51
52class bzr_source(common.converter_source):
53    """Reads Bazaar repositories by using the Bazaar Python libraries"""
54
55    def __init__(self, ui, repotype, path, revs=None):
56        super(bzr_source, self).__init__(ui, repotype, path, revs=revs)
57
58        if not os.path.exists(os.path.join(path, b'.bzr')):
59            raise common.NoRepo(
60                _(b'%s does not look like a Bazaar repository') % path
61            )
62
63        try:
64            # access breezy stuff
65            bzrdir
66        except NameError:
67            raise common.NoRepo(_(b'Bazaar modules could not be loaded'))
68
69        path = util.abspath(path)
70        self._checkrepotype(path)
71        try:
72            bzr_dir = bzrdir.BzrDir.open(path.decode())
73            self.sourcerepo = bzr_dir.open_repository()
74        except errors.NoRepositoryPresent:
75            raise common.NoRepo(
76                _(b'%s does not look like a Bazaar repository') % path
77            )
78        self._parentids = {}
79        self._saverev = ui.configbool(b'convert', b'bzr.saverev')
80
81    def _checkrepotype(self, path):
82        # Lightweight checkouts detection is informational but probably
83        # fragile at API level. It should not terminate the conversion.
84        try:
85            dir = bzrdir.BzrDir.open_containing(path.decode())[0]
86            try:
87                tree = dir.open_workingtree(recommend_upgrade=False)
88                branch = tree.branch
89            except (errors.NoWorkingTree, errors.NotLocalUrl):
90                tree = None
91                branch = dir.open_branch()
92            if (
93                tree is not None
94                and tree.controldir.root_transport.base
95                != branch.controldir.root_transport.base
96            ):
97                self.ui.warn(
98                    _(
99                        b'warning: lightweight checkouts may cause '
100                        b'conversion failures, try with a regular '
101                        b'branch instead.\n'
102                    )
103                )
104        except Exception:
105            self.ui.note(_(b'bzr source type could not be determined\n'))
106
107    def before(self):
108        """Before the conversion begins, acquire a read lock
109        for all the operations that might need it. Fortunately
110        read locks don't block other reads or writes to the
111        repository, so this shouldn't have any impact on the usage of
112        the source repository.
113
114        The alternative would be locking on every operation that
115        needs locks (there are currently two: getting the file and
116        getting the parent map) and releasing immediately after,
117        but this approach can take even 40% longer."""
118        self.sourcerepo.lock_read()
119
120    def after(self):
121        self.sourcerepo.unlock()
122
123    def _bzrbranches(self):
124        return self.sourcerepo.find_branches(using=True)
125
126    def getheads(self):
127        if not self.revs:
128            # Set using=True to avoid nested repositories (see issue3254)
129            heads = sorted([b.last_revision() for b in self._bzrbranches()])
130        else:
131            revid = None
132            for branch in self._bzrbranches():
133                try:
134                    revspec = self.revs[0].decode()
135                    r = revisionspec.RevisionSpec.from_string(revspec)
136                    info = r.in_history(branch)
137                except errors.BzrError:
138                    pass
139                revid = info.rev_id
140            if revid is None:
141                raise error.Abort(
142                    _(b'%s is not a valid revision') % self.revs[0]
143                )
144            heads = [revid]
145        # Empty repositories return 'null:', which cannot be retrieved
146        heads = [h for h in heads if h != b'null:']
147        return heads
148
149    def getfile(self, name, rev):
150        name = name.decode()
151        revtree = self.sourcerepo.revision_tree(rev)
152
153        try:
154            kind = revtree.kind(name)
155        except breezy.errors.NoSuchFile:
156            return None, None
157        if kind not in supportedkinds:
158            # the file is not available anymore - was deleted
159            return None, None
160        mode = self._modecache[(name.encode(), rev)]
161        if kind == 'symlink':
162            target = revtree.get_symlink_target(name)
163            if target is None:
164                raise error.Abort(
165                    _(b'%s.%s symlink has no target') % (name, rev)
166                )
167            return target.encode(), mode
168        else:
169            sio = revtree.get_file(name)
170            return sio.read(), mode
171
172    def getchanges(self, version, full):
173        if full:
174            raise error.Abort(_(b"convert from cvs does not support --full"))
175        self._modecache = {}
176        self._revtree = self.sourcerepo.revision_tree(version)
177        # get the parentids from the cache
178        parentids = self._parentids.pop(version)
179        # only diff against first parent id
180        prevtree = self.sourcerepo.revision_tree(parentids[0])
181        files, changes = self._gettreechanges(self._revtree, prevtree)
182        return files, changes, set()
183
184    def getcommit(self, version):
185        rev = self.sourcerepo.get_revision(version)
186        # populate parent id cache
187        if not rev.parent_ids:
188            parents = []
189            self._parentids[version] = (revision.NULL_REVISION,)
190        else:
191            parents = self._filterghosts(rev.parent_ids)
192            self._parentids[version] = parents
193
194        branch = rev.properties.get('branch-nick', 'default')
195        if branch == 'trunk':
196            branch = 'default'
197        return common.commit(
198            parents=parents,
199            date=b'%d %d' % (rev.timestamp, -rev.timezone),
200            author=self.recode(rev.committer),
201            desc=self.recode(rev.message),
202            branch=branch.encode('utf8'),
203            rev=version,
204            saverev=self._saverev,
205        )
206
207    def gettags(self):
208        bytetags = {}
209        for branch in self._bzrbranches():
210            if not branch.supports_tags():
211                return {}
212            tagdict = branch.tags.get_tag_dict()
213            for name, rev in pycompat.iteritems(tagdict):
214                bytetags[self.recode(name)] = rev
215        return bytetags
216
217    def getchangedfiles(self, rev, i):
218        self._modecache = {}
219        curtree = self.sourcerepo.revision_tree(rev)
220        if i is not None:
221            parentid = self._parentids[rev][i]
222        else:
223            # no parent id, get the empty revision
224            parentid = revision.NULL_REVISION
225
226        prevtree = self.sourcerepo.revision_tree(parentid)
227        changes = [e[0] for e in self._gettreechanges(curtree, prevtree)[0]]
228        return changes
229
230    def _gettreechanges(self, current, origin):
231        revid = current._revision_id
232        changes = []
233        renames = {}
234        seen = set()
235
236        # Fall back to the deprecated attribute for legacy installations.
237        try:
238            inventory = origin.root_inventory
239        except AttributeError:
240            inventory = origin.inventory
241
242        # Process the entries by reverse lexicographic name order to
243        # handle nested renames correctly, most specific first.
244
245        def key(c):
246            return c.path[0] or c.path[1] or ""
247
248        curchanges = sorted(
249            current.iter_changes(origin),
250            key=key,
251            reverse=True,
252        )
253        for change in curchanges:
254            paths = change.path
255            kind = change.kind
256            executable = change.executable
257            if paths[0] == u'' or paths[1] == u'':
258                # ignore changes to tree root
259                continue
260
261            # bazaar tracks directories, mercurial does not, so
262            # we have to rename the directory contents
263            if kind[1] == 'directory':
264                if kind[0] not in (None, 'directory'):
265                    # Replacing 'something' with a directory, record it
266                    # so it can be removed.
267                    changes.append((self.recode(paths[0]), revid))
268
269                if kind[0] == 'directory' and None not in paths:
270                    renaming = paths[0] != paths[1]
271                    # neither an add nor an delete - a move
272                    # rename all directory contents manually
273                    subdir = inventory.path2id(paths[0])
274                    # get all child-entries of the directory
275                    for name, entry in inventory.iter_entries(subdir):
276                        # hg does not track directory renames
277                        if entry.kind == 'directory':
278                            continue
279                        frompath = self.recode(paths[0] + '/' + name)
280                        if frompath in seen:
281                            # Already handled by a more specific change entry
282                            # This is important when you have:
283                            # a => b
284                            # a/c => a/c
285                            # Here a/c must not be renamed into b/c
286                            continue
287                        seen.add(frompath)
288                        if not renaming:
289                            continue
290                        topath = self.recode(paths[1] + '/' + name)
291                        # register the files as changed
292                        changes.append((frompath, revid))
293                        changes.append((topath, revid))
294                        # add to mode cache
295                        mode = (
296                            (entry.executable and b'x')
297                            or (entry.kind == 'symlink' and b's')
298                            or b''
299                        )
300                        self._modecache[(topath, revid)] = mode
301                        # register the change as move
302                        renames[topath] = frompath
303
304                # no further changes, go to the next change
305                continue
306
307            # we got unicode paths, need to convert them
308            path, topath = paths
309            if path is not None:
310                path = self.recode(path)
311            if topath is not None:
312                topath = self.recode(topath)
313            seen.add(path or topath)
314
315            if topath is None:
316                # file deleted
317                changes.append((path, revid))
318                continue
319
320            # renamed
321            if path and path != topath:
322                renames[topath] = path
323                changes.append((path, revid))
324
325            # populate the mode cache
326            kind, executable = [e[1] for e in (kind, executable)]
327            mode = (executable and b'x') or (kind == 'symlink' and b'l') or b''
328            self._modecache[(topath, revid)] = mode
329            changes.append((topath, revid))
330
331        return changes, renames
332
333    def _filterghosts(self, ids):
334        """Filters out ghost revisions which hg does not support, see
335        <http://bazaar-vcs.org/GhostRevision>
336        """
337        parentmap = self.sourcerepo.get_parent_map(ids)
338        parents = tuple([parent for parent in ids if parent in parentmap])
339        return parents
340