1# common.py - common code for the convert extension
2#
3#  Copyright 2005-2009 Olivia Mackall <olivia@selenic.com> and others
4#
5# This software may be used and distributed according to the terms of the
6# GNU General Public License version 2 or any later version.
7from __future__ import absolute_import
8
9import base64
10import datetime
11import errno
12import os
13import re
14import shlex
15import subprocess
16
17from mercurial.i18n import _
18from mercurial.pycompat import open
19from mercurial import (
20    encoding,
21    error,
22    phases,
23    pycompat,
24    util,
25)
26from mercurial.utils import procutil
27
28pickle = util.pickle
29propertycache = util.propertycache
30
31
32def _encodeornone(d):
33    if d is None:
34        return
35    return d.encode('latin1')
36
37
38class _shlexpy3proxy(object):
39    def __init__(self, l):
40        self._l = l
41
42    def __iter__(self):
43        return (_encodeornone(v) for v in self._l)
44
45    def get_token(self):
46        return _encodeornone(self._l.get_token())
47
48    @property
49    def infile(self):
50        return self._l.infile or b'<unknown>'
51
52    @property
53    def lineno(self):
54        return self._l.lineno
55
56
57def shlexer(data=None, filepath=None, wordchars=None, whitespace=None):
58    if data is None:
59        if pycompat.ispy3:
60            data = open(filepath, b'r', encoding='latin1')
61        else:
62            data = open(filepath, b'r')
63    else:
64        if filepath is not None:
65            raise error.ProgrammingError(
66                b'shlexer only accepts data or filepath, not both'
67            )
68        if pycompat.ispy3:
69            data = data.decode('latin1')
70    l = shlex.shlex(data, infile=filepath, posix=True)
71    if whitespace is not None:
72        l.whitespace_split = True
73        if pycompat.ispy3:
74            l.whitespace += whitespace.decode('latin1')
75        else:
76            l.whitespace += whitespace
77    if wordchars is not None:
78        if pycompat.ispy3:
79            l.wordchars += wordchars.decode('latin1')
80        else:
81            l.wordchars += wordchars
82    if pycompat.ispy3:
83        return _shlexpy3proxy(l)
84    return l
85
86
87if pycompat.ispy3:
88    base64_encodebytes = base64.encodebytes
89    base64_decodebytes = base64.decodebytes
90else:
91    base64_encodebytes = base64.encodestring
92    base64_decodebytes = base64.decodestring
93
94
95def encodeargs(args):
96    def encodearg(s):
97        lines = base64_encodebytes(s)
98        lines = [l.splitlines()[0] for l in pycompat.iterbytestr(lines)]
99        return b''.join(lines)
100
101    s = pickle.dumps(args)
102    return encodearg(s)
103
104
105def decodeargs(s):
106    s = base64_decodebytes(s)
107    return pickle.loads(s)
108
109
110class MissingTool(Exception):
111    pass
112
113
114def checktool(exe, name=None, abort=True):
115    name = name or exe
116    if not procutil.findexe(exe):
117        if abort:
118            exc = error.Abort
119        else:
120            exc = MissingTool
121        raise exc(_(b'cannot find required "%s" tool') % name)
122
123
124class NoRepo(Exception):
125    pass
126
127
128SKIPREV = b'SKIP'
129
130
131class commit(object):
132    def __init__(
133        self,
134        author,
135        date,
136        desc,
137        parents,
138        branch=None,
139        rev=None,
140        extra=None,
141        sortkey=None,
142        saverev=True,
143        phase=phases.draft,
144        optparents=None,
145        ctx=None,
146    ):
147        self.author = author or b'unknown'
148        self.date = date or b'0 0'
149        self.desc = desc
150        self.parents = parents  # will be converted and used as parents
151        self.optparents = optparents or []  # will be used if already converted
152        self.branch = branch
153        self.rev = rev
154        self.extra = extra or {}
155        self.sortkey = sortkey
156        self.saverev = saverev
157        self.phase = phase
158        self.ctx = ctx  # for hg to hg conversions
159
160
161class converter_source(object):
162    """Conversion source interface"""
163
164    def __init__(self, ui, repotype, path=None, revs=None):
165        """Initialize conversion source (or raise NoRepo("message")
166        exception if path is not a valid repository)"""
167        self.ui = ui
168        self.path = path
169        self.revs = revs
170        self.repotype = repotype
171
172        self.encoding = b'utf-8'
173
174    def checkhexformat(self, revstr, mapname=b'splicemap'):
175        """fails if revstr is not a 40 byte hex. mercurial and git both uses
176        such format for their revision numbering
177        """
178        if not re.match(br'[0-9a-fA-F]{40,40}$', revstr):
179            raise error.Abort(
180                _(b'%s entry %s is not a valid revision identifier')
181                % (mapname, revstr)
182            )
183
184    def before(self):
185        pass
186
187    def after(self):
188        pass
189
190    def targetfilebelongstosource(self, targetfilename):
191        """Returns true if the given targetfile belongs to the source repo. This
192        is useful when only a subdirectory of the target belongs to the source
193        repo."""
194        # For normal full repo converts, this is always True.
195        return True
196
197    def setrevmap(self, revmap):
198        """set the map of already-converted revisions"""
199
200    def getheads(self):
201        """Return a list of this repository's heads"""
202        raise NotImplementedError
203
204    def getfile(self, name, rev):
205        """Return a pair (data, mode) where data is the file content
206        as a string and mode one of '', 'x' or 'l'. rev is the
207        identifier returned by a previous call to getchanges().
208        Data is None if file is missing/deleted in rev.
209        """
210        raise NotImplementedError
211
212    def getchanges(self, version, full):
213        """Returns a tuple of (files, copies, cleanp2).
214
215        files is a sorted list of (filename, id) tuples for all files
216        changed between version and its first parent returned by
217        getcommit(). If full, all files in that revision is returned.
218        id is the source revision id of the file.
219
220        copies is a dictionary of dest: source
221
222        cleanp2 is the set of files filenames that are clean against p2.
223        (Files that are clean against p1 are already not in files (unless
224        full). This makes it possible to handle p2 clean files similarly.)
225        """
226        raise NotImplementedError
227
228    def getcommit(self, version):
229        """Return the commit object for version"""
230        raise NotImplementedError
231
232    def numcommits(self):
233        """Return the number of commits in this source.
234
235        If unknown, return None.
236        """
237        return None
238
239    def gettags(self):
240        """Return the tags as a dictionary of name: revision
241
242        Tag names must be UTF-8 strings.
243        """
244        raise NotImplementedError
245
246    def recode(self, s, encoding=None):
247        if not encoding:
248            encoding = self.encoding or b'utf-8'
249
250        if isinstance(s, pycompat.unicode):
251            return s.encode("utf-8")
252        try:
253            return s.decode(pycompat.sysstr(encoding)).encode("utf-8")
254        except UnicodeError:
255            try:
256                return s.decode("latin-1").encode("utf-8")
257            except UnicodeError:
258                return s.decode(pycompat.sysstr(encoding), "replace").encode(
259                    "utf-8"
260                )
261
262    def getchangedfiles(self, rev, i):
263        """Return the files changed by rev compared to parent[i].
264
265        i is an index selecting one of the parents of rev.  The return
266        value should be the list of files that are different in rev and
267        this parent.
268
269        If rev has no parents, i is None.
270
271        This function is only needed to support --filemap
272        """
273        raise NotImplementedError
274
275    def converted(self, rev, sinkrev):
276        '''Notify the source that a revision has been converted.'''
277
278    def hasnativeorder(self):
279        """Return true if this source has a meaningful, native revision
280        order. For instance, Mercurial revisions are store sequentially
281        while there is no such global ordering with Darcs.
282        """
283        return False
284
285    def hasnativeclose(self):
286        """Return true if this source has ability to close branch."""
287        return False
288
289    def lookuprev(self, rev):
290        """If rev is a meaningful revision reference in source, return
291        the referenced identifier in the same format used by getcommit().
292        return None otherwise.
293        """
294        return None
295
296    def getbookmarks(self):
297        """Return the bookmarks as a dictionary of name: revision
298
299        Bookmark names are to be UTF-8 strings.
300        """
301        return {}
302
303    def checkrevformat(self, revstr, mapname=b'splicemap'):
304        """revstr is a string that describes a revision in the given
305        source control system.  Return true if revstr has correct
306        format.
307        """
308        return True
309
310
311class converter_sink(object):
312    """Conversion sink (target) interface"""
313
314    def __init__(self, ui, repotype, path):
315        """Initialize conversion sink (or raise NoRepo("message")
316        exception if path is not a valid repository)
317
318        created is a list of paths to remove if a fatal error occurs
319        later"""
320        self.ui = ui
321        self.path = path
322        self.created = []
323        self.repotype = repotype
324
325    def revmapfile(self):
326        """Path to a file that will contain lines
327        source_rev_id sink_rev_id
328        mapping equivalent revision identifiers for each system."""
329        raise NotImplementedError
330
331    def authorfile(self):
332        """Path to a file that will contain lines
333        srcauthor=dstauthor
334        mapping equivalent authors identifiers for each system."""
335        return None
336
337    def putcommit(
338        self, files, copies, parents, commit, source, revmap, full, cleanp2
339    ):
340        """Create a revision with all changed files listed in 'files'
341        and having listed parents. 'commit' is a commit object
342        containing at a minimum the author, date, and message for this
343        changeset.  'files' is a list of (path, version) tuples,
344        'copies' is a dictionary mapping destinations to sources,
345        'source' is the source repository, and 'revmap' is a mapfile
346        of source revisions to converted revisions. Only getfile() and
347        lookuprev() should be called on 'source'. 'full' means that 'files'
348        is complete and all other files should be removed.
349        'cleanp2' is a set of the filenames that are unchanged from p2
350        (only in the common merge case where there two parents).
351
352        Note that the sink repository is not told to update itself to
353        a particular revision (or even what that revision would be)
354        before it receives the file data.
355        """
356        raise NotImplementedError
357
358    def puttags(self, tags):
359        """Put tags into sink.
360
361        tags: {tagname: sink_rev_id, ...} where tagname is an UTF-8 string.
362        Return a pair (tag_revision, tag_parent_revision), or (None, None)
363        if nothing was changed.
364        """
365        raise NotImplementedError
366
367    def setbranch(self, branch, pbranches):
368        """Set the current branch name. Called before the first putcommit
369        on the branch.
370        branch: branch name for subsequent commits
371        pbranches: (converted parent revision, parent branch) tuples"""
372
373    def setfilemapmode(self, active):
374        """Tell the destination that we're using a filemap
375
376        Some converter_sources (svn in particular) can claim that a file
377        was changed in a revision, even if there was no change.  This method
378        tells the destination that we're using a filemap and that it should
379        filter empty revisions.
380        """
381
382    def before(self):
383        pass
384
385    def after(self):
386        pass
387
388    def putbookmarks(self, bookmarks):
389        """Put bookmarks into sink.
390
391        bookmarks: {bookmarkname: sink_rev_id, ...}
392        where bookmarkname is an UTF-8 string.
393        """
394
395    def hascommitfrommap(self, rev):
396        """Return False if a rev mentioned in a filemap is known to not be
397        present."""
398        raise NotImplementedError
399
400    def hascommitforsplicemap(self, rev):
401        """This method is for the special needs for splicemap handling and not
402        for general use. Returns True if the sink contains rev, aborts on some
403        special cases."""
404        raise NotImplementedError
405
406
407class commandline(object):
408    def __init__(self, ui, command):
409        self.ui = ui
410        self.command = command
411
412    def prerun(self):
413        pass
414
415    def postrun(self):
416        pass
417
418    def _cmdline(self, cmd, *args, **kwargs):
419        kwargs = pycompat.byteskwargs(kwargs)
420        cmdline = [self.command, cmd] + list(args)
421        for k, v in pycompat.iteritems(kwargs):
422            if len(k) == 1:
423                cmdline.append(b'-' + k)
424            else:
425                cmdline.append(b'--' + k.replace(b'_', b'-'))
426            try:
427                if len(k) == 1:
428                    cmdline.append(b'' + v)
429                else:
430                    cmdline[-1] += b'=' + v
431            except TypeError:
432                pass
433        cmdline = [procutil.shellquote(arg) for arg in cmdline]
434        if not self.ui.debugflag:
435            cmdline += [b'2>', pycompat.bytestr(os.devnull)]
436        cmdline = b' '.join(cmdline)
437        return cmdline
438
439    def _run(self, cmd, *args, **kwargs):
440        def popen(cmdline):
441            p = subprocess.Popen(
442                procutil.tonativestr(cmdline),
443                shell=True,
444                bufsize=-1,
445                close_fds=procutil.closefds,
446                stdout=subprocess.PIPE,
447            )
448            return p
449
450        return self._dorun(popen, cmd, *args, **kwargs)
451
452    def _run2(self, cmd, *args, **kwargs):
453        return self._dorun(procutil.popen2, cmd, *args, **kwargs)
454
455    def _run3(self, cmd, *args, **kwargs):
456        return self._dorun(procutil.popen3, cmd, *args, **kwargs)
457
458    def _dorun(self, openfunc, cmd, *args, **kwargs):
459        cmdline = self._cmdline(cmd, *args, **kwargs)
460        self.ui.debug(b'running: %s\n' % (cmdline,))
461        self.prerun()
462        try:
463            return openfunc(cmdline)
464        finally:
465            self.postrun()
466
467    def run(self, cmd, *args, **kwargs):
468        p = self._run(cmd, *args, **kwargs)
469        output = p.communicate()[0]
470        self.ui.debug(output)
471        return output, p.returncode
472
473    def runlines(self, cmd, *args, **kwargs):
474        p = self._run(cmd, *args, **kwargs)
475        output = p.stdout.readlines()
476        p.wait()
477        self.ui.debug(b''.join(output))
478        return output, p.returncode
479
480    def checkexit(self, status, output=b''):
481        if status:
482            if output:
483                self.ui.warn(_(b'%s error:\n') % self.command)
484                self.ui.warn(output)
485            msg = procutil.explainexit(status)
486            raise error.Abort(b'%s %s' % (self.command, msg))
487
488    def run0(self, cmd, *args, **kwargs):
489        output, status = self.run(cmd, *args, **kwargs)
490        self.checkexit(status, output)
491        return output
492
493    def runlines0(self, cmd, *args, **kwargs):
494        output, status = self.runlines(cmd, *args, **kwargs)
495        self.checkexit(status, b''.join(output))
496        return output
497
498    @propertycache
499    def argmax(self):
500        # POSIX requires at least 4096 bytes for ARG_MAX
501        argmax = 4096
502        try:
503            argmax = os.sysconf("SC_ARG_MAX")
504        except (AttributeError, ValueError):
505            pass
506
507        # Windows shells impose their own limits on command line length,
508        # down to 2047 bytes for cmd.exe under Windows NT/2k and 2500 bytes
509        # for older 4nt.exe. See http://support.microsoft.com/kb/830473 for
510        # details about cmd.exe limitations.
511
512        # Since ARG_MAX is for command line _and_ environment, lower our limit
513        # (and make happy Windows shells while doing this).
514        return argmax // 2 - 1
515
516    def _limit_arglist(self, arglist, cmd, *args, **kwargs):
517        cmdlen = len(self._cmdline(cmd, *args, **kwargs))
518        limit = self.argmax - cmdlen
519        numbytes = 0
520        fl = []
521        for fn in arglist:
522            b = len(fn) + 3
523            if numbytes + b < limit or len(fl) == 0:
524                fl.append(fn)
525                numbytes += b
526            else:
527                yield fl
528                fl = [fn]
529                numbytes = b
530        if fl:
531            yield fl
532
533    def xargs(self, arglist, cmd, *args, **kwargs):
534        for l in self._limit_arglist(arglist, cmd, *args, **kwargs):
535            self.run0(cmd, *(list(args) + l), **kwargs)
536
537
538class mapfile(dict):
539    def __init__(self, ui, path):
540        super(mapfile, self).__init__()
541        self.ui = ui
542        self.path = path
543        self.fp = None
544        self.order = []
545        self._read()
546
547    def _read(self):
548        if not self.path:
549            return
550        try:
551            fp = open(self.path, b'rb')
552        except IOError as err:
553            if err.errno != errno.ENOENT:
554                raise
555            return
556        for i, line in enumerate(util.iterfile(fp)):
557            line = line.splitlines()[0].rstrip()
558            if not line:
559                # Ignore blank lines
560                continue
561            try:
562                key, value = line.rsplit(b' ', 1)
563            except ValueError:
564                raise error.Abort(
565                    _(b'syntax error in %s(%d): key/value pair expected')
566                    % (self.path, i + 1)
567                )
568            if key not in self:
569                self.order.append(key)
570            super(mapfile, self).__setitem__(key, value)
571        fp.close()
572
573    def __setitem__(self, key, value):
574        if self.fp is None:
575            try:
576                self.fp = open(self.path, b'ab')
577            except IOError as err:
578                raise error.Abort(
579                    _(b'could not open map file %r: %s')
580                    % (self.path, encoding.strtolocal(err.strerror))
581                )
582        self.fp.write(util.tonativeeol(b'%s %s\n' % (key, value)))
583        self.fp.flush()
584        super(mapfile, self).__setitem__(key, value)
585
586    def close(self):
587        if self.fp:
588            self.fp.close()
589            self.fp = None
590
591
592def makedatetimestamp(t):
593    """Like dateutil.makedate() but for time t instead of current time"""
594    delta = datetime.datetime.utcfromtimestamp(
595        t
596    ) - datetime.datetime.fromtimestamp(t)
597    tz = delta.days * 86400 + delta.seconds
598    return t, tz
599