1# gnuarch.py - GNU Arch support for the convert extension
2#
3#  Copyright 2008, 2009 Aleix Conchillo Flaque <aleix@member.fsf.org>
4#  and others
5#
6# This software may be used and distributed according to the terms of the
7# GNU General Public License version 2 or any later version.
8from __future__ import absolute_import
9
10import os
11import shutil
12import stat
13import tempfile
14
15from mercurial.i18n import _
16from mercurial import (
17    encoding,
18    error,
19    mail,
20    pycompat,
21    util,
22)
23from mercurial.utils import (
24    dateutil,
25    procutil,
26)
27from . import common
28
29
30class gnuarch_source(common.converter_source, common.commandline):
31    class gnuarch_rev(object):
32        def __init__(self, rev):
33            self.rev = rev
34            self.summary = b''
35            self.date = None
36            self.author = b''
37            self.continuationof = None
38            self.add_files = []
39            self.mod_files = []
40            self.del_files = []
41            self.ren_files = {}
42            self.ren_dirs = {}
43
44    def __init__(self, ui, repotype, path, revs=None):
45        super(gnuarch_source, self).__init__(ui, repotype, path, revs=revs)
46
47        if not os.path.exists(os.path.join(path, b'{arch}')):
48            raise common.NoRepo(
49                _(b"%s does not look like a GNU Arch repository") % path
50            )
51
52        # Could use checktool, but we want to check for baz or tla.
53        self.execmd = None
54        if procutil.findexe(b'baz'):
55            self.execmd = b'baz'
56        else:
57            if procutil.findexe(b'tla'):
58                self.execmd = b'tla'
59            else:
60                raise error.Abort(_(b'cannot find a GNU Arch tool'))
61
62        common.commandline.__init__(self, ui, self.execmd)
63
64        self.path = os.path.realpath(path)
65        self.tmppath = None
66
67        self.treeversion = None
68        self.lastrev = None
69        self.changes = {}
70        self.parents = {}
71        self.tags = {}
72        self.encoding = encoding.encoding
73        self.archives = []
74
75    def before(self):
76        # Get registered archives
77        self.archives = [
78            i.rstrip(b'\n') for i in self.runlines0(b'archives', b'-n')
79        ]
80
81        if self.execmd == b'tla':
82            output = self.run0(b'tree-version', self.path)
83        else:
84            output = self.run0(b'tree-version', b'-d', self.path)
85        self.treeversion = output.strip()
86
87        # Get name of temporary directory
88        version = self.treeversion.split(b'/')
89        self.tmppath = os.path.join(
90            pycompat.fsencode(tempfile.gettempdir()), b'hg-%s' % version[1]
91        )
92
93        # Generate parents dictionary
94        self.parents[None] = []
95        treeversion = self.treeversion
96        child = None
97        while treeversion:
98            self.ui.status(_(b'analyzing tree version %s...\n') % treeversion)
99
100            archive = treeversion.split(b'/')[0]
101            if archive not in self.archives:
102                self.ui.status(
103                    _(
104                        b'tree analysis stopped because it points to '
105                        b'an unregistered archive %s...\n'
106                    )
107                    % archive
108                )
109                break
110
111            # Get the complete list of revisions for that tree version
112            output, status = self.runlines(
113                b'revisions', b'-r', b'-f', treeversion
114            )
115            self.checkexit(
116                status, b'failed retrieving revisions for %s' % treeversion
117            )
118
119            # No new iteration unless a revision has a continuation-of header
120            treeversion = None
121
122            for l in output:
123                rev = l.strip()
124                self.changes[rev] = self.gnuarch_rev(rev)
125                self.parents[rev] = []
126
127                # Read author, date and summary
128                catlog, status = self.run(b'cat-log', b'-d', self.path, rev)
129                if status:
130                    catlog = self.run0(b'cat-archive-log', rev)
131                self._parsecatlog(catlog, rev)
132
133                # Populate the parents map
134                self.parents[child].append(rev)
135
136                # Keep track of the current revision as the child of the next
137                # revision scanned
138                child = rev
139
140                # Check if we have to follow the usual incremental history
141                # or if we have to 'jump' to a different treeversion given
142                # by the continuation-of header.
143                if self.changes[rev].continuationof:
144                    treeversion = b'--'.join(
145                        self.changes[rev].continuationof.split(b'--')[:-1]
146                    )
147                    break
148
149                # If we reached a base-0 revision w/o any continuation-of
150                # header, it means the tree history ends here.
151                if rev[-6:] == b'base-0':
152                    break
153
154    def after(self):
155        self.ui.debug(b'cleaning up %s\n' % self.tmppath)
156        shutil.rmtree(self.tmppath, ignore_errors=True)
157
158    def getheads(self):
159        return self.parents[None]
160
161    def getfile(self, name, rev):
162        if rev != self.lastrev:
163            raise error.Abort(_(b'internal calling inconsistency'))
164
165        if not os.path.lexists(os.path.join(self.tmppath, name)):
166            return None, None
167
168        return self._getfile(name, rev)
169
170    def getchanges(self, rev, full):
171        if full:
172            raise error.Abort(_(b"convert from arch does not support --full"))
173        self._update(rev)
174        changes = []
175        copies = {}
176
177        for f in self.changes[rev].add_files:
178            changes.append((f, rev))
179
180        for f in self.changes[rev].mod_files:
181            changes.append((f, rev))
182
183        for f in self.changes[rev].del_files:
184            changes.append((f, rev))
185
186        for src in self.changes[rev].ren_files:
187            to = self.changes[rev].ren_files[src]
188            changes.append((src, rev))
189            changes.append((to, rev))
190            copies[to] = src
191
192        for src in self.changes[rev].ren_dirs:
193            to = self.changes[rev].ren_dirs[src]
194            chgs, cps = self._rendirchanges(src, to)
195            changes += [(f, rev) for f in chgs]
196            copies.update(cps)
197
198        self.lastrev = rev
199        return sorted(set(changes)), copies, set()
200
201    def getcommit(self, rev):
202        changes = self.changes[rev]
203        return common.commit(
204            author=changes.author,
205            date=changes.date,
206            desc=changes.summary,
207            parents=self.parents[rev],
208            rev=rev,
209        )
210
211    def gettags(self):
212        return self.tags
213
214    def _execute(self, cmd, *args, **kwargs):
215        cmdline = [self.execmd, cmd]
216        cmdline += args
217        cmdline = [procutil.shellquote(arg) for arg in cmdline]
218        bdevnull = pycompat.bytestr(os.devnull)
219        cmdline += [b'>', bdevnull, b'2>', bdevnull]
220        cmdline = b' '.join(cmdline)
221        self.ui.debug(cmdline, b'\n')
222        return os.system(pycompat.rapply(procutil.tonativestr, cmdline))
223
224    def _update(self, rev):
225        self.ui.debug(b'applying revision %s...\n' % rev)
226        changeset, status = self.runlines(b'replay', b'-d', self.tmppath, rev)
227        if status:
228            # Something went wrong while merging (baz or tla
229            # issue?), get latest revision and try from there
230            shutil.rmtree(self.tmppath, ignore_errors=True)
231            self._obtainrevision(rev)
232        else:
233            old_rev = self.parents[rev][0]
234            self.ui.debug(
235                b'computing changeset between %s and %s...\n' % (old_rev, rev)
236            )
237            self._parsechangeset(changeset, rev)
238
239    def _getfile(self, name, rev):
240        mode = os.lstat(os.path.join(self.tmppath, name)).st_mode
241        if stat.S_ISLNK(mode):
242            data = util.readlink(os.path.join(self.tmppath, name))
243            if mode:
244                mode = b'l'
245            else:
246                mode = b''
247        else:
248            data = util.readfile(os.path.join(self.tmppath, name))
249            mode = (mode & 0o111) and b'x' or b''
250        return data, mode
251
252    def _exclude(self, name):
253        exclude = [b'{arch}', b'.arch-ids', b'.arch-inventory']
254        for exc in exclude:
255            if name.find(exc) != -1:
256                return True
257        return False
258
259    def _readcontents(self, path):
260        files = []
261        contents = os.listdir(path)
262        while len(contents) > 0:
263            c = contents.pop()
264            p = os.path.join(path, c)
265            # os.walk could be used, but here we avoid internal GNU
266            # Arch files and directories, thus saving a lot time.
267            if not self._exclude(p):
268                if os.path.isdir(p):
269                    contents += [os.path.join(c, f) for f in os.listdir(p)]
270                else:
271                    files.append(c)
272        return files
273
274    def _rendirchanges(self, src, dest):
275        changes = []
276        copies = {}
277        files = self._readcontents(os.path.join(self.tmppath, dest))
278        for f in files:
279            s = os.path.join(src, f)
280            d = os.path.join(dest, f)
281            changes.append(s)
282            changes.append(d)
283            copies[d] = s
284        return changes, copies
285
286    def _obtainrevision(self, rev):
287        self.ui.debug(b'obtaining revision %s...\n' % rev)
288        output = self._execute(b'get', rev, self.tmppath)
289        self.checkexit(output)
290        self.ui.debug(b'analyzing revision %s...\n' % rev)
291        files = self._readcontents(self.tmppath)
292        self.changes[rev].add_files += files
293
294    def _stripbasepath(self, path):
295        if path.startswith(b'./'):
296            return path[2:]
297        return path
298
299    def _parsecatlog(self, data, rev):
300        try:
301            catlog = mail.parsebytes(data)
302
303            # Commit date
304            self.changes[rev].date = dateutil.datestr(
305                dateutil.strdate(catlog['Standard-date'], b'%Y-%m-%d %H:%M:%S')
306            )
307
308            # Commit author
309            self.changes[rev].author = self.recode(catlog['Creator'])
310
311            # Commit description
312            self.changes[rev].summary = b'\n\n'.join(
313                (
314                    self.recode(catlog['Summary']),
315                    self.recode(catlog.get_payload()),
316                )
317            )
318            self.changes[rev].summary = self.recode(self.changes[rev].summary)
319
320            # Commit revision origin when dealing with a branch or tag
321            if 'Continuation-of' in catlog:
322                self.changes[rev].continuationof = self.recode(
323                    catlog['Continuation-of']
324                )
325        except Exception:
326            raise error.Abort(_(b'could not parse cat-log of %s') % rev)
327
328    def _parsechangeset(self, data, rev):
329        for l in data:
330            l = l.strip()
331            # Added file (ignore added directory)
332            if l.startswith(b'A') and not l.startswith(b'A/'):
333                file = self._stripbasepath(l[1:].strip())
334                if not self._exclude(file):
335                    self.changes[rev].add_files.append(file)
336            # Deleted file (ignore deleted directory)
337            elif l.startswith(b'D') and not l.startswith(b'D/'):
338                file = self._stripbasepath(l[1:].strip())
339                if not self._exclude(file):
340                    self.changes[rev].del_files.append(file)
341            # Modified binary file
342            elif l.startswith(b'Mb'):
343                file = self._stripbasepath(l[2:].strip())
344                if not self._exclude(file):
345                    self.changes[rev].mod_files.append(file)
346            # Modified link
347            elif l.startswith(b'M->'):
348                file = self._stripbasepath(l[3:].strip())
349                if not self._exclude(file):
350                    self.changes[rev].mod_files.append(file)
351            # Modified file
352            elif l.startswith(b'M'):
353                file = self._stripbasepath(l[1:].strip())
354                if not self._exclude(file):
355                    self.changes[rev].mod_files.append(file)
356            # Renamed file (or link)
357            elif l.startswith(b'=>'):
358                files = l[2:].strip().split(b' ')
359                if len(files) == 1:
360                    files = l[2:].strip().split(b'\t')
361                src = self._stripbasepath(files[0])
362                dst = self._stripbasepath(files[1])
363                if not self._exclude(src) and not self._exclude(dst):
364                    self.changes[rev].ren_files[src] = dst
365            # Conversion from file to link or from link to file (modified)
366            elif l.startswith(b'ch'):
367                file = self._stripbasepath(l[2:].strip())
368                if not self._exclude(file):
369                    self.changes[rev].mod_files.append(file)
370            # Renamed directory
371            elif l.startswith(b'/>'):
372                dirs = l[2:].strip().split(b' ')
373                if len(dirs) == 1:
374                    dirs = l[2:].strip().split(b'\t')
375                src = self._stripbasepath(dirs[0])
376                dst = self._stripbasepath(dirs[1])
377                if not self._exclude(src) and not self._exclude(dst):
378                    self.changes[rev].ren_dirs[src] = dst
379