1# Copyright (C) 2005-2011 Canonical Ltd
2#
3# This program is free software; you can redistribute it and/or modify
4# it under the terms of the GNU General Public License as published by
5# the Free Software Foundation; either version 2 of the License, or
6# (at your option) any later version.
7#
8# This program is distributed in the hope that it will be useful,
9# but WITHOUT ANY WARRANTY; without even the implied warranty of
10# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11# GNU General Public License for more details.
12#
13# You should have received a copy of the GNU General Public License
14# along with this program; if not, write to the Free Software
15# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
17import errno
18import os
19import re
20import stat
21import sys
22import time
23import codecs
24
25from .lazy_import import lazy_import
26lazy_import(globals(), """
27from datetime import datetime
28import getpass
29import locale
30import ntpath
31import posixpath
32import select
33# We need to import both shutil and rmtree as we export the later on posix
34# and need the former on windows
35import shutil
36from shutil import rmtree
37import socket
38import subprocess
39# We need to import both tempfile and mkdtemp as we export the later on posix
40# and need the former on windows
41import tempfile
42from tempfile import mkdtemp
43import unicodedata
44
45from breezy import (
46    config,
47    trace,
48    win32utils,
49    )
50from breezy.i18n import gettext
51""")
52
53from hashlib import (
54    md5,
55    sha1 as sha,
56    )
57
58
59import breezy
60from . import (
61    _fs_enc,
62    errors,
63    )
64
65
66# On win32, O_BINARY is used to indicate the file should
67# be opened in binary mode, rather than text mode.
68# On other platforms, O_BINARY doesn't exist, because
69# they always open in binary mode, so it is okay to
70# OR with 0 on those platforms.
71# O_NOINHERIT and O_TEXT exists only on win32 too.
72O_BINARY = getattr(os, 'O_BINARY', 0)
73O_TEXT = getattr(os, 'O_TEXT', 0)
74O_NOINHERIT = getattr(os, 'O_NOINHERIT', 0)
75
76
77class UnsupportedTimezoneFormat(errors.BzrError):
78
79    _fmt = ('Unsupported timezone format "%(timezone)s", '
80            'options are "utc", "original", "local".')
81
82    def __init__(self, timezone):
83        self.timezone = timezone
84
85
86def make_readonly(filename):
87    """Make a filename read-only."""
88    mod = os.lstat(filename).st_mode
89    if not stat.S_ISLNK(mod):
90        mod = mod & 0o777555
91        chmod_if_possible(filename, mod)
92
93
94def make_writable(filename):
95    mod = os.lstat(filename).st_mode
96    if not stat.S_ISLNK(mod):
97        mod = mod | 0o200
98        chmod_if_possible(filename, mod)
99
100
101def chmod_if_possible(filename, mode):
102    # Set file mode if that can be safely done.
103    # Sometimes even on unix the filesystem won't allow it - see
104    # https://bugs.launchpad.net/bzr/+bug/606537
105    try:
106        # It is probably faster to just do the chmod, rather than
107        # doing a stat, and then trying to compare
108        os.chmod(filename, mode)
109    except (IOError, OSError) as e:
110        # Permission/access denied seems to commonly happen on smbfs; there's
111        # probably no point warning about it.
112        # <https://bugs.launchpad.net/bzr/+bug/606537>
113        if getattr(e, 'errno') in (errno.EPERM, errno.EACCES):
114            trace.mutter("ignore error on chmod of %r: %r" % (
115                filename, e))
116            return
117        raise
118
119
120def minimum_path_selection(paths):
121    """Return the smallset subset of paths which are outside paths.
122
123    :param paths: A container (and hence not None) of paths.
124    :return: A set of paths sufficient to include everything in paths via
125        is_inside, drawn from the paths parameter.
126    """
127    if len(paths) < 2:
128        return set(paths)
129
130    def sort_key(path):
131        if isinstance(path, bytes):
132            return path.split(b'/')
133        else:
134            return path.split('/')
135    sorted_paths = sorted(list(paths), key=sort_key)
136
137    search_paths = [sorted_paths[0]]
138    for path in sorted_paths[1:]:
139        if not is_inside(search_paths[-1], path):
140            # This path is unique, add it
141            search_paths.append(path)
142
143    return set(search_paths)
144
145
146_QUOTE_RE = None
147
148
149def quotefn(f):
150    """Return a quoted filename filename
151
152    This previously used backslash quoting, but that works poorly on
153    Windows."""
154    # TODO: I'm not really sure this is the best format either.x
155    global _QUOTE_RE
156    if _QUOTE_RE is None:
157        _QUOTE_RE = re.compile(r'([^a-zA-Z0-9.,:/\\_~-])')
158
159    if _QUOTE_RE.search(f):
160        return '"' + f + '"'
161    else:
162        return f
163
164
165_directory_kind = 'directory'
166
167
168def get_umask():
169    """Return the current umask"""
170    # Assume that people aren't messing with the umask while running
171    # XXX: This is not thread safe, but there is no way to get the
172    #      umask without setting it
173    umask = os.umask(0)
174    os.umask(umask)
175    return umask
176
177
178_kind_marker_map = {
179    "file": "",
180    _directory_kind: "/",
181    "symlink": "@",
182    'tree-reference': '+',
183}
184
185
186def kind_marker(kind):
187    try:
188        return _kind_marker_map[kind]
189    except KeyError:
190        # Slightly faster than using .get(, '') when the common case is that
191        # kind will be found
192        return ''
193
194
195lexists = getattr(os.path, 'lexists', None)
196if lexists is None:
197    def lexists(f):
198        try:
199            stat = getattr(os, 'lstat', os.stat)
200            stat(f)
201            return True
202        except OSError as e:
203            if e.errno == errno.ENOENT:
204                return False
205            else:
206                raise errors.BzrError(
207                    gettext("lstat/stat of ({0!r}): {1!r}").format(f, e))
208
209
210def fancy_rename(old, new, rename_func, unlink_func):
211    """A fancy rename, when you don't have atomic rename.
212
213    :param old: The old path, to rename from
214    :param new: The new path, to rename to
215    :param rename_func: The potentially non-atomic rename function
216    :param unlink_func: A way to delete the target file if the full rename
217        succeeds
218    """
219    # sftp rename doesn't allow overwriting, so play tricks:
220    base = os.path.basename(new)
221    dirname = os.path.dirname(new)
222    # callers use different encodings for the paths so the following MUST
223    # respect that. We rely on python upcasting to unicode if new is unicode
224    # and keeping a str if not.
225    tmp_name = 'tmp.%s.%.9f.%d.%s' % (base, time.time(),
226                                      os.getpid(), rand_chars(10))
227    tmp_name = pathjoin(dirname, tmp_name)
228
229    # Rename the file out of the way, but keep track if it didn't exist
230    # We don't want to grab just any exception
231    # something like EACCES should prevent us from continuing
232    # The downside is that the rename_func has to throw an exception
233    # with an errno = ENOENT, or NoSuchFile
234    file_existed = False
235    try:
236        rename_func(new, tmp_name)
237    except (errors.NoSuchFile,):
238        pass
239    except IOError as e:
240        # RBC 20060103 abstraction leakage: the paramiko SFTP clients rename
241        # function raises an IOError with errno is None when a rename fails.
242        # This then gets caught here.
243        if e.errno not in (None, errno.ENOENT, errno.ENOTDIR):
244            raise
245    except Exception as e:
246        if (getattr(e, 'errno', None) is None
247                or e.errno not in (errno.ENOENT, errno.ENOTDIR)):
248            raise
249    else:
250        file_existed = True
251
252    success = False
253    try:
254        # This may throw an exception, in which case success will
255        # not be set.
256        rename_func(old, new)
257        success = True
258    except (IOError, OSError) as e:
259        # source and target may be aliases of each other (e.g. on a
260        # case-insensitive filesystem), so we may have accidentally renamed
261        # source by when we tried to rename target
262        if (file_existed and e.errno in (None, errno.ENOENT)
263                and old.lower() == new.lower()):
264            # source and target are the same file on a case-insensitive
265            # filesystem, so we don't generate an exception
266            pass
267        else:
268            raise
269    finally:
270        if file_existed:
271            # If the file used to exist, rename it back into place
272            # otherwise just delete it from the tmp location
273            if success:
274                unlink_func(tmp_name)
275            else:
276                rename_func(tmp_name, new)
277
278
279# In Python 2.4.2 and older, os.path.abspath and os.path.realpath
280# choke on a Unicode string containing a relative path if
281# os.getcwd() returns a non-sys.getdefaultencoding()-encoded
282# string.
283def _posix_abspath(path):
284    # jam 20060426 rather than encoding to fsencoding
285    # copy posixpath.abspath, but use os.getcwdu instead
286    if not posixpath.isabs(path):
287        path = posixpath.join(getcwd(), path)
288    return _posix_normpath(path)
289
290
291def _posix_realpath(path):
292    return posixpath.realpath(path.encode(_fs_enc)).decode(_fs_enc)
293
294
295def _posix_normpath(path):
296    path = posixpath.normpath(path)
297    # Bug 861008: posixpath.normpath() returns a path normalized according to
298    # the POSIX standard, which stipulates (for compatibility reasons) that two
299    # leading slashes must not be simplified to one, and only if there are 3 or
300    # more should they be simplified as one. So we treat the leading 2 slashes
301    # as a special case here by simply removing the first slash, as we consider
302    # that breaking POSIX compatibility for this obscure feature is acceptable.
303    # This is not a paranoid precaution, as we notably get paths like this when
304    # the repo is hosted at the root of the filesystem, i.e. in "/".
305    if path.startswith('//'):
306        path = path[1:]
307    return path
308
309
310def _posix_get_home_dir():
311    """Get the home directory of the current user as a unicode path"""
312    path = posixpath.expanduser("~")
313    try:
314        return path.decode(_fs_enc)
315    except AttributeError:
316        return path
317    except UnicodeDecodeError:
318        raise errors.BadFilenameEncoding(path, _fs_enc)
319
320
321def _posix_getuser_unicode():
322    """Get username from environment or password database as unicode"""
323    return getpass.getuser()
324
325
326def _win32_fixdrive(path):
327    """Force drive letters to be consistent.
328
329    win32 is inconsistent whether it returns lower or upper case
330    and even if it was consistent the user might type the other
331    so we force it to uppercase
332    running python.exe under cmd.exe return capital C:\\
333    running win32 python inside a cygwin shell returns lowercase c:\\
334    """
335    drive, path = ntpath.splitdrive(path)
336    return drive.upper() + path
337
338
339def _win32_abspath(path):
340    # Real ntpath.abspath doesn't have a problem with a unicode cwd
341    return _win32_fixdrive(ntpath.abspath(path).replace('\\', '/'))
342
343
344def _win32_realpath(path):
345    # Real ntpath.realpath doesn't have a problem with a unicode cwd
346    return _win32_fixdrive(ntpath.realpath(path).replace('\\', '/'))
347
348
349def _win32_pathjoin(*args):
350    return ntpath.join(*args).replace('\\', '/')
351
352
353def _win32_normpath(path):
354    return _win32_fixdrive(ntpath.normpath(path).replace('\\', '/'))
355
356
357def _win32_getcwd():
358    return _win32_fixdrive(_getcwd().replace('\\', '/'))
359
360
361def _win32_mkdtemp(*args, **kwargs):
362    return _win32_fixdrive(tempfile.mkdtemp(*args, **kwargs).replace('\\', '/'))
363
364
365def _win32_rename(old, new):
366    """We expect to be able to atomically replace 'new' with old.
367
368    On win32, if new exists, it must be moved out of the way first,
369    and then deleted.
370    """
371    try:
372        fancy_rename(old, new, rename_func=os.rename, unlink_func=os.unlink)
373    except OSError as e:
374        if e.errno in (errno.EPERM, errno.EACCES, errno.EBUSY, errno.EINVAL):
375            # If we try to rename a non-existant file onto cwd, we get
376            # EPERM or EACCES instead of ENOENT, this will raise ENOENT
377            # if the old path doesn't exist, sometimes we get EACCES
378            # On Linux, we seem to get EBUSY, on Mac we get EINVAL
379            os.lstat(old)
380        raise
381
382
383def _mac_getcwd():
384    return unicodedata.normalize('NFC', _getcwd())
385
386
387def _rename_wrap_exception(rename_func):
388    """Adds extra information to any exceptions that come from rename().
389
390    The exception has an updated message and 'old_filename' and 'new_filename'
391    attributes.
392    """
393
394    def _rename_wrapper(old, new):
395        try:
396            rename_func(old, new)
397        except OSError as e:
398            detailed_error = OSError(e.errno, e.strerror +
399                                     " [occurred when renaming '%s' to '%s']" %
400                                     (old, new))
401            detailed_error.old_filename = old
402            detailed_error.new_filename = new
403            raise detailed_error
404
405    return _rename_wrapper
406
407
408_getcwd = os.getcwd
409
410
411# Default rename wraps os.rename()
412rename = _rename_wrap_exception(os.rename)
413
414# Default is to just use the python builtins, but these can be rebound on
415# particular platforms.
416abspath = _posix_abspath
417realpath = _posix_realpath
418pathjoin = os.path.join
419normpath = _posix_normpath
420_get_home_dir = _posix_get_home_dir
421getuser_unicode = _posix_getuser_unicode
422getcwd = _getcwd
423dirname = os.path.dirname
424basename = os.path.basename
425split = os.path.split
426splitext = os.path.splitext
427# These were already lazily imported into local scope
428# mkdtemp = tempfile.mkdtemp
429# rmtree = shutil.rmtree
430lstat = os.lstat
431fstat = os.fstat
432
433
434def wrap_stat(st):
435    return st
436
437
438MIN_ABS_PATHLENGTH = 1
439
440
441if sys.platform == 'win32':
442    abspath = _win32_abspath
443    realpath = _win32_realpath
444    pathjoin = _win32_pathjoin
445    normpath = _win32_normpath
446    getcwd = _win32_getcwd
447    mkdtemp = _win32_mkdtemp
448    rename = _rename_wrap_exception(_win32_rename)
449    try:
450        from . import _walkdirs_win32
451    except ImportError:
452        pass
453    else:
454        lstat = _walkdirs_win32.lstat
455        fstat = _walkdirs_win32.fstat
456        wrap_stat = _walkdirs_win32.wrap_stat
457
458    MIN_ABS_PATHLENGTH = 3
459
460    def _win32_delete_readonly(function, path, excinfo):
461        """Error handler for shutil.rmtree function [for win32]
462        Helps to remove files and dirs marked as read-only.
463        """
464        exception = excinfo[1]
465        if function in (os.remove, os.rmdir) \
466                and isinstance(exception, OSError) \
467                and exception.errno == errno.EACCES:
468            make_writable(path)
469            function(path)
470        else:
471            raise
472
473    def rmtree(path, ignore_errors=False, onerror=_win32_delete_readonly):
474        """Replacer for shutil.rmtree: could remove readonly dirs/files"""
475        return shutil.rmtree(path, ignore_errors, onerror)
476
477    _get_home_dir = win32utils.get_home_location
478    getuser_unicode = win32utils.get_user_name
479
480elif sys.platform == 'darwin':
481    getcwd = _mac_getcwd
482
483
484def get_terminal_encoding(trace=False):
485    """Find the best encoding for printing to the screen.
486
487    This attempts to check both sys.stdout and sys.stdin to see
488    what encoding they are in, and if that fails it falls back to
489    osutils.get_user_encoding().
490    The problem is that on Windows, locale.getpreferredencoding()
491    is not the same encoding as that used by the console:
492    http://mail.python.org/pipermail/python-list/2003-May/162357.html
493
494    On my standard US Windows XP, the preferred encoding is
495    cp1252, but the console is cp437
496
497    :param trace: If True trace the selected encoding via mutter().
498    """
499    from .trace import mutter
500    output_encoding = getattr(sys.stdout, 'encoding', None)
501    if not output_encoding:
502        input_encoding = getattr(sys.stdin, 'encoding', None)
503        if not input_encoding:
504            output_encoding = get_user_encoding()
505            if trace:
506                mutter('encoding stdout as osutils.get_user_encoding() %r',
507                       output_encoding)
508        else:
509            output_encoding = input_encoding
510            if trace:
511                mutter('encoding stdout as sys.stdin encoding %r',
512                       output_encoding)
513    else:
514        if trace:
515            mutter('encoding stdout as sys.stdout encoding %r', output_encoding)
516    if output_encoding == 'cp0':
517        # invalid encoding (cp0 means 'no codepage' on Windows)
518        output_encoding = get_user_encoding()
519        if trace:
520            mutter('cp0 is invalid encoding.'
521                   ' encoding stdout as osutils.get_user_encoding() %r',
522                   output_encoding)
523    # check encoding
524    try:
525        codecs.lookup(output_encoding)
526    except LookupError:
527        sys.stderr.write('brz: warning:'
528                         ' unknown terminal encoding %s.\n'
529                         '  Using encoding %s instead.\n'
530                         % (output_encoding, get_user_encoding())
531                         )
532        output_encoding = get_user_encoding()
533
534    return output_encoding
535
536
537def normalizepath(f):
538    if getattr(os.path, 'realpath', None) is not None:
539        F = realpath
540    else:
541        F = abspath
542    [p, e] = os.path.split(f)
543    if e == "" or e == "." or e == "..":
544        return F(f)
545    else:
546        return pathjoin(F(p), e)
547
548
549def isdir(f):
550    """True if f is an accessible directory."""
551    try:
552        return stat.S_ISDIR(os.lstat(f)[stat.ST_MODE])
553    except OSError:
554        return False
555
556
557def isfile(f):
558    """True if f is a regular file."""
559    try:
560        return stat.S_ISREG(os.lstat(f)[stat.ST_MODE])
561    except OSError:
562        return False
563
564
565def islink(f):
566    """True if f is a symlink."""
567    try:
568        return stat.S_ISLNK(os.lstat(f)[stat.ST_MODE])
569    except OSError:
570        return False
571
572
573def is_inside(dir, fname):
574    """True if fname is inside dir.
575
576    The parameters should typically be passed to osutils.normpath first, so
577    that . and .. and repeated slashes are eliminated, and the separators
578    are canonical for the platform.
579
580    The empty string as a dir name is taken as top-of-tree and matches
581    everything.
582    """
583    # XXX: Most callers of this can actually do something smarter by
584    # looking at the inventory
585    if dir == fname:
586        return True
587
588    if dir in ('', b''):
589        return True
590
591    if isinstance(dir, bytes):
592        if not dir.endswith(b'/'):
593            dir += b'/'
594    else:
595        if not dir.endswith('/'):
596            dir += '/'
597
598    return fname.startswith(dir)
599
600
601def is_inside_any(dir_list, fname):
602    """True if fname is inside any of given dirs."""
603    for dirname in dir_list:
604        if is_inside(dirname, fname):
605            return True
606    return False
607
608
609def is_inside_or_parent_of_any(dir_list, fname):
610    """True if fname is a child or a parent of any of the given files."""
611    for dirname in dir_list:
612        if is_inside(dirname, fname) or is_inside(fname, dirname):
613            return True
614    return False
615
616
617def pumpfile(from_file, to_file, read_length=-1, buff_size=32768,
618             report_activity=None, direction='read'):
619    """Copy contents of one file to another.
620
621    The read_length can either be -1 to read to end-of-file (EOF) or
622    it can specify the maximum number of bytes to read.
623
624    The buff_size represents the maximum size for each read operation
625    performed on from_file.
626
627    :param report_activity: Call this as bytes are read, see
628        Transport._report_activity
629    :param direction: Will be passed to report_activity
630
631    :return: The number of bytes copied.
632    """
633    length = 0
634    if read_length >= 0:
635        # read specified number of bytes
636
637        while read_length > 0:
638            num_bytes_to_read = min(read_length, buff_size)
639
640            block = from_file.read(num_bytes_to_read)
641            if not block:
642                # EOF reached
643                break
644            if report_activity is not None:
645                report_activity(len(block), direction)
646            to_file.write(block)
647
648            actual_bytes_read = len(block)
649            read_length -= actual_bytes_read
650            length += actual_bytes_read
651    else:
652        # read to EOF
653        while True:
654            block = from_file.read(buff_size)
655            if not block:
656                # EOF reached
657                break
658            if report_activity is not None:
659                report_activity(len(block), direction)
660            to_file.write(block)
661            length += len(block)
662    return length
663
664
665def pump_string_file(bytes, file_handle, segment_size=None):
666    """Write bytes to file_handle in many smaller writes.
667
668    :param bytes: The string to write.
669    :param file_handle: The file to write to.
670    """
671    # Write data in chunks rather than all at once, because very large
672    # writes fail on some platforms (e.g. Windows with SMB  mounted
673    # drives).
674    if not segment_size:
675        segment_size = 5242880  # 5MB
676    offsets = range(0, len(bytes), segment_size)
677    view = memoryview(bytes)
678    write = file_handle.write
679    for offset in offsets:
680        write(view[offset:offset + segment_size])
681
682
683def file_iterator(input_file, readsize=32768):
684    while True:
685        b = input_file.read(readsize)
686        if len(b) == 0:
687            break
688        yield b
689
690
691# GZ 2017-09-16: Makes sense in general for hexdigest() result to be text, but
692# used as bytes through most interfaces so encode with this wrapper.
693def _hexdigest(hashobj):
694    return hashobj.hexdigest().encode()
695
696
697def sha_file(f):
698    """Calculate the hexdigest of an open file.
699
700    The file cursor should be already at the start.
701    """
702    s = sha()
703    BUFSIZE = 128 << 10
704    while True:
705        b = f.read(BUFSIZE)
706        if not b:
707            break
708        s.update(b)
709    return _hexdigest(s)
710
711
712def size_sha_file(f):
713    """Calculate the size and hexdigest of an open file.
714
715    The file cursor should be already at the start and
716    the caller is responsible for closing the file afterwards.
717    """
718    size = 0
719    s = sha()
720    BUFSIZE = 128 << 10
721    while True:
722        b = f.read(BUFSIZE)
723        if not b:
724            break
725        size += len(b)
726        s.update(b)
727    return size, _hexdigest(s)
728
729
730def sha_file_by_name(fname):
731    """Calculate the SHA1 of a file by reading the full text"""
732    s = sha()
733    f = os.open(fname, os.O_RDONLY | O_BINARY | O_NOINHERIT)
734    try:
735        while True:
736            b = os.read(f, 1 << 16)
737            if not b:
738                return _hexdigest(s)
739            s.update(b)
740    finally:
741        os.close(f)
742
743
744def sha_strings(strings, _factory=sha):
745    """Return the sha-1 of concatenation of strings"""
746    s = _factory()
747    for string in strings:
748        s.update(string)
749    return _hexdigest(s)
750
751
752def sha_string(f, _factory=sha):
753    # GZ 2017-09-16: Dodgy if factory is ever not sha, probably shouldn't be.
754    return _hexdigest(_factory(f))
755
756
757def fingerprint_file(f):
758    b = f.read()
759    return {'size': len(b),
760            'sha1': _hexdigest(sha(b))}
761
762
763def compare_files(a, b):
764    """Returns true if equal in contents"""
765    BUFSIZE = 4096
766    while True:
767        ai = a.read(BUFSIZE)
768        bi = b.read(BUFSIZE)
769        if ai != bi:
770            return False
771        if not ai:
772            return True
773
774
775def local_time_offset(t=None):
776    """Return offset of local zone from GMT, either at present or at time t."""
777    if t is None:
778        t = time.time()
779    offset = datetime.fromtimestamp(t) - datetime.utcfromtimestamp(t)
780    return offset.days * 86400 + offset.seconds
781
782
783weekdays = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
784_default_format_by_weekday_num = [wd + " %Y-%m-%d %H:%M:%S" for wd in weekdays]
785
786
787def format_date(t, offset=0, timezone='original', date_fmt=None,
788                show_offset=True):
789    """Return a formatted date string.
790
791    :param t: Seconds since the epoch.
792    :param offset: Timezone offset in seconds east of utc.
793    :param timezone: How to display the time: 'utc', 'original' for the
794         timezone specified by offset, or 'local' for the process's current
795         timezone.
796    :param date_fmt: strftime format.
797    :param show_offset: Whether to append the timezone.
798    """
799    (date_fmt, tt, offset_str) = \
800        _format_date(t, offset, timezone, date_fmt, show_offset)
801    date_fmt = date_fmt.replace('%a', weekdays[tt[6]])
802    date_str = time.strftime(date_fmt, tt)
803    return date_str + offset_str
804
805
806# Cache of formatted offset strings
807_offset_cache = {}
808
809
810def format_date_with_offset_in_original_timezone(t, offset=0,
811                                                 _cache=_offset_cache):
812    """Return a formatted date string in the original timezone.
813
814    This routine may be faster then format_date.
815
816    :param t: Seconds since the epoch.
817    :param offset: Timezone offset in seconds east of utc.
818    """
819    if offset is None:
820        offset = 0
821    tt = time.gmtime(t + offset)
822    date_fmt = _default_format_by_weekday_num[tt[6]]
823    date_str = time.strftime(date_fmt, tt)
824    offset_str = _cache.get(offset, None)
825    if offset_str is None:
826        offset_str = ' %+03d%02d' % (offset / 3600, (offset / 60) % 60)
827        _cache[offset] = offset_str
828    return date_str + offset_str
829
830
831def format_local_date(t, offset=0, timezone='original', date_fmt=None,
832                      show_offset=True):
833    """Return an unicode date string formatted according to the current locale.
834
835    :param t: Seconds since the epoch.
836    :param offset: Timezone offset in seconds east of utc.
837    :param timezone: How to display the time: 'utc', 'original' for the
838         timezone specified by offset, or 'local' for the process's current
839         timezone.
840    :param date_fmt: strftime format.
841    :param show_offset: Whether to append the timezone.
842    """
843    (date_fmt, tt, offset_str) = \
844        _format_date(t, offset, timezone, date_fmt, show_offset)
845    date_str = time.strftime(date_fmt, tt)
846    if not isinstance(date_str, str):
847        date_str = date_str.decode(get_user_encoding(), 'replace')
848    return date_str + offset_str
849
850
851def _format_date(t, offset, timezone, date_fmt, show_offset):
852    if timezone == 'utc':
853        tt = time.gmtime(t)
854        offset = 0
855    elif timezone == 'original':
856        if offset is None:
857            offset = 0
858        tt = time.gmtime(t + offset)
859    elif timezone == 'local':
860        tt = time.localtime(t)
861        offset = local_time_offset(t)
862    else:
863        raise UnsupportedTimezoneFormat(timezone)
864    if date_fmt is None:
865        date_fmt = "%a %Y-%m-%d %H:%M:%S"
866    if show_offset:
867        offset_str = ' %+03d%02d' % (offset / 3600, (offset / 60) % 60)
868    else:
869        offset_str = ''
870    return (date_fmt, tt, offset_str)
871
872
873def compact_date(when):
874    return time.strftime('%Y%m%d%H%M%S', time.gmtime(when))
875
876
877def format_delta(delta):
878    """Get a nice looking string for a time delta.
879
880    :param delta: The time difference in seconds, can be positive or negative.
881        positive indicates time in the past, negative indicates time in the
882        future. (usually time.time() - stored_time)
883    :return: String formatted to show approximate resolution
884    """
885    delta = int(delta)
886    if delta >= 0:
887        direction = 'ago'
888    else:
889        direction = 'in the future'
890        delta = -delta
891
892    seconds = delta
893    if seconds < 90:  # print seconds up to 90 seconds
894        if seconds == 1:
895            return '%d second %s' % (seconds, direction,)
896        else:
897            return '%d seconds %s' % (seconds, direction)
898
899    minutes = int(seconds / 60)
900    seconds -= 60 * minutes
901    if seconds == 1:
902        plural_seconds = ''
903    else:
904        plural_seconds = 's'
905    if minutes < 90:  # print minutes, seconds up to 90 minutes
906        if minutes == 1:
907            return '%d minute, %d second%s %s' % (
908                minutes, seconds, plural_seconds, direction)
909        else:
910            return '%d minutes, %d second%s %s' % (
911                minutes, seconds, plural_seconds, direction)
912
913    hours = int(minutes / 60)
914    minutes -= 60 * hours
915    if minutes == 1:
916        plural_minutes = ''
917    else:
918        plural_minutes = 's'
919
920    if hours == 1:
921        return '%d hour, %d minute%s %s' % (hours, minutes,
922                                            plural_minutes, direction)
923    return '%d hours, %d minute%s %s' % (hours, minutes,
924                                         plural_minutes, direction)
925
926
927def filesize(f):
928    """Return size of given open file."""
929    return os.fstat(f.fileno())[stat.ST_SIZE]
930
931
932# Alias os.urandom to support platforms (which?) without /dev/urandom and
933# override if it doesn't work. Avoid checking on windows where there is
934# significant initialisation cost that can be avoided for some bzr calls.
935
936rand_bytes = os.urandom
937
938if rand_bytes.__module__ != "nt":
939    try:
940        rand_bytes(1)
941    except NotImplementedError:
942        # not well seeded, but better than nothing
943        def rand_bytes(n):
944            import random
945            s = ''
946            while n:
947                s += chr(random.randint(0, 255))
948                n -= 1
949            return s
950
951
952ALNUM = '0123456789abcdefghijklmnopqrstuvwxyz'
953
954
955def rand_chars(num):
956    """Return a random string of num alphanumeric characters
957
958    The result only contains lowercase chars because it may be used on
959    case-insensitive filesystems.
960    """
961    s = ''
962    for raw_byte in rand_bytes(num):
963        s += ALNUM[raw_byte % 36]
964    return s
965
966
967# TODO: We could later have path objects that remember their list
968# decomposition (might be too tricksy though.)
969
970def splitpath(p):
971    """Turn string into list of parts."""
972    use_bytes = isinstance(p, bytes)
973    if os.path.sep == '\\':
974        # split on either delimiter because people might use either on
975        # Windows
976        if use_bytes:
977            ps = re.split(b'[\\\\/]', p)
978        else:
979            ps = re.split(r'[\\/]', p)
980    else:
981        if use_bytes:
982            ps = p.split(b'/')
983        else:
984            ps = p.split('/')
985
986    if use_bytes:
987        parent_dir = b'..'
988        current_empty_dir = (b'.', b'')
989    else:
990        parent_dir = '..'
991        current_empty_dir = ('.', '')
992
993    rps = []
994    for f in ps:
995        if f == parent_dir:
996            raise errors.BzrError(gettext("sorry, %r not allowed in path") % f)
997        elif f in current_empty_dir:
998            pass
999        else:
1000            rps.append(f)
1001    return rps
1002
1003
1004def joinpath(p):
1005    for f in p:
1006        if (f == '..') or (f is None) or (f == ''):
1007            raise errors.BzrError(gettext("sorry, %r not allowed in path") % f)
1008    return pathjoin(*p)
1009
1010
1011def parent_directories(filename):
1012    """Return the list of parent directories, deepest first.
1013
1014    For example, parent_directories("a/b/c") -> ["a/b", "a"].
1015    """
1016    parents = []
1017    parts = splitpath(dirname(filename))
1018    while parts:
1019        parents.append(joinpath(parts))
1020        parts.pop()
1021    return parents
1022
1023
1024_extension_load_failures = []
1025
1026
1027def failed_to_load_extension(exception):
1028    """Handle failing to load a binary extension.
1029
1030    This should be called from the ImportError block guarding the attempt to
1031    import the native extension.  If this function returns, the pure-Python
1032    implementation should be loaded instead::
1033
1034    >>> try:
1035    >>>     import breezy._fictional_extension_pyx
1036    >>> except ImportError, e:
1037    >>>     breezy.osutils.failed_to_load_extension(e)
1038    >>>     import breezy._fictional_extension_py
1039    """
1040    # NB: This docstring is just an example, not a doctest, because doctest
1041    # currently can't cope with the use of lazy imports in this namespace --
1042    # mbp 20090729
1043
1044    # This currently doesn't report the failure at the time it occurs, because
1045    # they tend to happen very early in startup when we can't check config
1046    # files etc, and also we want to report all failures but not spam the user
1047    # with 10 warnings.
1048    exception_str = str(exception)
1049    if exception_str not in _extension_load_failures:
1050        trace.mutter("failed to load compiled extension: %s" % exception_str)
1051        _extension_load_failures.append(exception_str)
1052
1053
1054def report_extension_load_failures():
1055    if not _extension_load_failures:
1056        return
1057    if config.GlobalConfig().suppress_warning('missing_extensions'):
1058        return
1059    # the warnings framework should by default show this only once
1060    from .trace import warning
1061    warning(
1062        "brz: warning: some compiled extensions could not be loaded; "
1063        "see ``brz help missing-extensions``")
1064    # we no longer show the specific missing extensions here, because it makes
1065    # the message too long and scary - see
1066    # https://bugs.launchpad.net/bzr/+bug/430529
1067
1068
1069try:
1070    from ._chunks_to_lines_pyx import chunks_to_lines
1071except ImportError as e:
1072    failed_to_load_extension(e)
1073    from ._chunks_to_lines_py import chunks_to_lines
1074
1075
1076def split_lines(s):
1077    """Split s into lines, but without removing the newline characters."""
1078    # Trivially convert a fulltext into a 'chunked' representation, and let
1079    # chunks_to_lines do the heavy lifting.
1080    if isinstance(s, bytes):
1081        # chunks_to_lines only supports 8-bit strings
1082        return chunks_to_lines([s])
1083    else:
1084        return _split_lines(s)
1085
1086
1087def _split_lines(s):
1088    """Split s into lines, but without removing the newline characters.
1089
1090    This supports Unicode or plain string objects.
1091    """
1092    nl = b'\n' if isinstance(s, bytes) else u'\n'
1093    lines = s.split(nl)
1094    result = [line + nl for line in lines[:-1]]
1095    if lines[-1]:
1096        result.append(lines[-1])
1097    return result
1098
1099
1100def hardlinks_good():
1101    return sys.platform not in ('win32', 'cygwin', 'darwin')
1102
1103
1104def link_or_copy(src, dest):
1105    """Hardlink a file, or copy it if it can't be hardlinked."""
1106    if not hardlinks_good():
1107        shutil.copyfile(src, dest)
1108        return
1109    try:
1110        os.link(src, dest)
1111    except (OSError, IOError) as e:
1112        if e.errno != errno.EXDEV:
1113            raise
1114        shutil.copyfile(src, dest)
1115
1116
1117def delete_any(path):
1118    """Delete a file, symlink or directory.
1119
1120    Will delete even if readonly.
1121    """
1122    try:
1123        _delete_file_or_dir(path)
1124    except (OSError, IOError) as e:
1125        if e.errno in (errno.EPERM, errno.EACCES):
1126            # make writable and try again
1127            try:
1128                make_writable(path)
1129            except (OSError, IOError):
1130                pass
1131            _delete_file_or_dir(path)
1132        else:
1133            raise
1134
1135
1136def _delete_file_or_dir(path):
1137    # Look Before You Leap (LBYL) is appropriate here instead of Easier to Ask for
1138    # Forgiveness than Permission (EAFP) because:
1139    # - root can damage a solaris file system by using unlink,
1140    # - unlink raises different exceptions on different OSes (linux: EISDIR, win32:
1141    #   EACCES, OSX: EPERM) when invoked on a directory.
1142    if isdir(path):  # Takes care of symlinks
1143        os.rmdir(path)
1144    else:
1145        os.unlink(path)
1146
1147
1148def has_symlinks():
1149    if getattr(os, 'symlink', None) is not None:
1150        return True
1151    else:
1152        return False
1153
1154
1155def has_hardlinks():
1156    if getattr(os, 'link', None) is not None:
1157        return True
1158    else:
1159        return False
1160
1161
1162def host_os_dereferences_symlinks():
1163    return (has_symlinks()
1164            and sys.platform not in ('cygwin', 'win32'))
1165
1166
1167def readlink(abspath):
1168    """Return a string representing the path to which the symbolic link points.
1169
1170    :param abspath: The link absolute unicode path.
1171
1172    This his guaranteed to return the symbolic link in unicode in all python
1173    versions.
1174    """
1175    link = abspath.encode(_fs_enc)
1176    target = os.readlink(link)
1177    target = target.decode(_fs_enc)
1178    return target
1179
1180
1181def contains_whitespace(s):
1182    """True if there are any whitespace characters in s."""
1183    # string.whitespace can include '\xa0' in certain locales, because it is
1184    # considered "non-breaking-space" as part of ISO-8859-1. But it
1185    # 1) Isn't a breaking whitespace
1186    # 2) Isn't one of ' \t\r\n' which are characters we sometimes use as
1187    #    separators
1188    # 3) '\xa0' isn't unicode safe since it is >128.
1189
1190    if isinstance(s, str):
1191        ws = ' \t\n\r\v\f'
1192    else:
1193        ws = (b' ', b'\t', b'\n', b'\r', b'\v', b'\f')
1194    for ch in ws:
1195        if ch in s:
1196            return True
1197    else:
1198        return False
1199
1200
1201def contains_linebreaks(s):
1202    """True if there is any vertical whitespace in s."""
1203    for ch in '\f\n\r':
1204        if ch in s:
1205            return True
1206    else:
1207        return False
1208
1209
1210def relpath(base, path):
1211    """Return path relative to base, or raise PathNotChild exception.
1212
1213    The path may be either an absolute path or a path relative to the
1214    current working directory.
1215
1216    os.path.commonprefix (python2.4) has a bad bug that it works just
1217    on string prefixes, assuming that '/u' is a prefix of '/u2'.  This
1218    avoids that problem.
1219
1220    NOTE: `base` should not have a trailing slash otherwise you'll get
1221    PathNotChild exceptions regardless of `path`.
1222    """
1223
1224    if len(base) < MIN_ABS_PATHLENGTH:
1225        # must have space for e.g. a drive letter
1226        raise ValueError(gettext('%r is too short to calculate a relative path')
1227                         % (base,))
1228
1229    rp = abspath(path)
1230
1231    s = []
1232    head = rp
1233    while True:
1234        if len(head) <= len(base) and head != base:
1235            raise errors.PathNotChild(rp, base)
1236        if head == base:
1237            break
1238        head, tail = split(head)
1239        if tail:
1240            s.append(tail)
1241
1242    if s:
1243        return pathjoin(*reversed(s))
1244    else:
1245        return ''
1246
1247
1248def _cicp_canonical_relpath(base, path):
1249    """Return the canonical path relative to base.
1250
1251    Like relpath, but on case-insensitive-case-preserving file-systems, this
1252    will return the relpath as stored on the file-system rather than in the
1253    case specified in the input string, for all existing portions of the path.
1254
1255    This will cause O(N) behaviour if called for every path in a tree; if you
1256    have a number of paths to convert, you should use canonical_relpaths().
1257    """
1258    # TODO: it should be possible to optimize this for Windows by using the
1259    # win32 API FindFiles function to look for the specified name - but using
1260    # os.listdir() still gives us the correct, platform agnostic semantics in
1261    # the short term.
1262
1263    rel = relpath(base, path)
1264    # '.' will have been turned into ''
1265    if not rel:
1266        return rel
1267
1268    abs_base = abspath(base)
1269    current = abs_base
1270
1271    # use an explicit iterator so we can easily consume the rest on early exit.
1272    bit_iter = iter(rel.split('/'))
1273    for bit in bit_iter:
1274        lbit = bit.lower()
1275        try:
1276            next_entries = scandir(current)
1277        except OSError:  # enoent, eperm, etc
1278            # We can't find this in the filesystem, so just append the
1279            # remaining bits.
1280            current = pathjoin(current, bit, *list(bit_iter))
1281            break
1282        for entry in next_entries:
1283            if lbit == entry.name.lower():
1284                current = entry.path
1285                break
1286        else:
1287            # got to the end, nothing matched, so we just return the
1288            # non-existing bits as they were specified (the filename may be
1289            # the target of a move, for example).
1290            current = pathjoin(current, bit, *list(bit_iter))
1291            break
1292    return current[len(abs_base):].lstrip('/')
1293
1294
1295# XXX - TODO - we need better detection/integration of case-insensitive
1296# file-systems; Linux often sees FAT32 devices (or NFS-mounted OSX
1297# filesystems), for example, so could probably benefit from the same basic
1298# support there.  For now though, only Windows and OSX get that support, and
1299# they get it for *all* file-systems!
1300if sys.platform in ('win32', 'darwin'):
1301    canonical_relpath = _cicp_canonical_relpath
1302else:
1303    canonical_relpath = relpath
1304
1305
1306def canonical_relpaths(base, paths):
1307    """Create an iterable to canonicalize a sequence of relative paths.
1308
1309    The intent is for this implementation to use a cache, vastly speeding
1310    up multiple transformations in the same directory.
1311    """
1312    # but for now, we haven't optimized...
1313    return [canonical_relpath(base, p) for p in paths]
1314
1315
1316def decode_filename(filename):
1317    """Decode the filename using the filesystem encoding
1318
1319    If it is unicode, it is returned.
1320    Otherwise it is decoded from the the filesystem's encoding. If decoding
1321    fails, a errors.BadFilenameEncoding exception is raised.
1322    """
1323    if isinstance(filename, str):
1324        return filename
1325    try:
1326        return filename.decode(_fs_enc)
1327    except UnicodeDecodeError:
1328        raise errors.BadFilenameEncoding(filename, _fs_enc)
1329
1330
1331def safe_unicode(unicode_or_utf8_string):
1332    """Coerce unicode_or_utf8_string into unicode.
1333
1334    If it is unicode, it is returned.
1335    Otherwise it is decoded from utf-8. If decoding fails, the exception is
1336    wrapped in a BzrBadParameterNotUnicode exception.
1337    """
1338    if isinstance(unicode_or_utf8_string, str):
1339        return unicode_or_utf8_string
1340    try:
1341        return unicode_or_utf8_string.decode('utf8')
1342    except UnicodeDecodeError:
1343        raise errors.BzrBadParameterNotUnicode(unicode_or_utf8_string)
1344
1345
1346def safe_utf8(unicode_or_utf8_string):
1347    """Coerce unicode_or_utf8_string to a utf8 string.
1348
1349    If it is a str, it is returned.
1350    If it is Unicode, it is encoded into a utf-8 string.
1351    """
1352    if isinstance(unicode_or_utf8_string, bytes):
1353        # TODO: jam 20070209 This is overkill, and probably has an impact on
1354        #       performance if we are dealing with lots of apis that want a
1355        #       utf-8 revision id
1356        try:
1357            # Make sure it is a valid utf-8 string
1358            unicode_or_utf8_string.decode('utf-8')
1359        except UnicodeDecodeError:
1360            raise errors.BzrBadParameterNotUnicode(unicode_or_utf8_string)
1361        return unicode_or_utf8_string
1362    return unicode_or_utf8_string.encode('utf-8')
1363
1364
1365_platform_normalizes_filenames = False
1366if sys.platform == 'darwin':
1367    _platform_normalizes_filenames = True
1368
1369
1370def normalizes_filenames():
1371    """Return True if this platform normalizes unicode filenames.
1372
1373    Only Mac OSX.
1374    """
1375    return _platform_normalizes_filenames
1376
1377
1378def _accessible_normalized_filename(path):
1379    """Get the unicode normalized path, and if you can access the file.
1380
1381    On platforms where the system normalizes filenames (Mac OSX),
1382    you can access a file by any path which will normalize correctly.
1383    On platforms where the system does not normalize filenames
1384    (everything else), you have to access a file by its exact path.
1385
1386    Internally, bzr only supports NFC normalization, since that is
1387    the standard for XML documents.
1388
1389    So return the normalized path, and a flag indicating if the file
1390    can be accessed by that path.
1391    """
1392
1393    if isinstance(path, bytes):
1394        path = path.decode(sys.getfilesystemencoding())
1395    return unicodedata.normalize('NFC', path), True
1396
1397
1398def _inaccessible_normalized_filename(path):
1399    __doc__ = _accessible_normalized_filename.__doc__
1400
1401    if isinstance(path, bytes):
1402        path = path.decode(sys.getfilesystemencoding())
1403    normalized = unicodedata.normalize('NFC', path)
1404    return normalized, normalized == path
1405
1406
1407if _platform_normalizes_filenames:
1408    normalized_filename = _accessible_normalized_filename
1409else:
1410    normalized_filename = _inaccessible_normalized_filename
1411
1412
1413def set_signal_handler(signum, handler, restart_syscall=True):
1414    """A wrapper for signal.signal that also calls siginterrupt(signum, False)
1415    on platforms that support that.
1416
1417    :param restart_syscall: if set, allow syscalls interrupted by a signal to
1418        automatically restart (by calling `signal.siginterrupt(signum,
1419        False)`).  May be ignored if the feature is not available on this
1420        platform or Python version.
1421    """
1422    try:
1423        import signal
1424        siginterrupt = signal.siginterrupt
1425    except ImportError:
1426        # This python implementation doesn't provide signal support, hence no
1427        # handler exists
1428        return None
1429    except AttributeError:
1430        # siginterrupt doesn't exist on this platform, or for this version
1431        # of Python.
1432        def siginterrupt(signum, flag): return None
1433    if restart_syscall:
1434        def sig_handler(*args):
1435            # Python resets the siginterrupt flag when a signal is
1436            # received.  <http://bugs.python.org/issue8354>
1437            # As a workaround for some cases, set it back the way we want it.
1438            siginterrupt(signum, False)
1439            # Now run the handler function passed to set_signal_handler.
1440            handler(*args)
1441    else:
1442        sig_handler = handler
1443    old_handler = signal.signal(signum, sig_handler)
1444    if restart_syscall:
1445        siginterrupt(signum, False)
1446    return old_handler
1447
1448
1449default_terminal_width = 80
1450"""The default terminal width for ttys.
1451
1452This is defined so that higher levels can share a common fallback value when
1453terminal_width() returns None.
1454"""
1455
1456# Keep some state so that terminal_width can detect if _terminal_size has
1457# returned a different size since the process started.  See docstring and
1458# comments of terminal_width for details.
1459# _terminal_size_state has 3 possible values: no_data, unchanged, and changed.
1460_terminal_size_state = 'no_data'
1461_first_terminal_size = None
1462
1463
1464def terminal_width():
1465    """Return terminal width.
1466
1467    None is returned if the width can't established precisely.
1468
1469    The rules are:
1470    - if BRZ_COLUMNS is set, returns its value
1471    - if there is no controlling terminal, returns None
1472    - query the OS, if the queried size has changed since the last query,
1473      return its value,
1474    - if COLUMNS is set, returns its value,
1475    - if the OS has a value (even though it's never changed), return its value.
1476
1477    From there, we need to query the OS to get the size of the controlling
1478    terminal.
1479
1480    On Unices we query the OS by:
1481    - get termios.TIOCGWINSZ
1482    - if an error occurs or a negative value is obtained, returns None
1483
1484    On Windows we query the OS by:
1485    - win32utils.get_console_size() decides,
1486    - returns None on error (provided default value)
1487    """
1488    # Note to implementors: if changing the rules for determining the width,
1489    # make sure you've considered the behaviour in these cases:
1490    #  - M-x shell in emacs, where $COLUMNS is set and TIOCGWINSZ returns 0,0.
1491    #  - brz log | less, in bash, where $COLUMNS not set and TIOCGWINSZ returns
1492    #    0,0.
1493    #  - (add more interesting cases here, if you find any)
1494    # Some programs implement "Use $COLUMNS (if set) until SIGWINCH occurs",
1495    # but we don't want to register a signal handler because it is impossible
1496    # to do so without risking EINTR errors in Python <= 2.6.5 (see
1497    # <http://bugs.python.org/issue8354>).  Instead we check TIOCGWINSZ every
1498    # time so we can notice if the reported size has changed, which should have
1499    # a similar effect.
1500
1501    # If BRZ_COLUMNS is set, take it, user is always right
1502    # Except if they specified 0 in which case, impose no limit here
1503    try:
1504        width = int(os.environ['BRZ_COLUMNS'])
1505    except (KeyError, ValueError):
1506        width = None
1507    if width is not None:
1508        if width > 0:
1509            return width
1510        else:
1511            return None
1512
1513    isatty = getattr(sys.stdout, 'isatty', None)
1514    if isatty is None or not isatty():
1515        # Don't guess, setting BRZ_COLUMNS is the recommended way to override.
1516        return None
1517
1518    # Query the OS
1519    width, height = os_size = _terminal_size(None, None)
1520    global _first_terminal_size, _terminal_size_state
1521    if _terminal_size_state == 'no_data':
1522        _first_terminal_size = os_size
1523        _terminal_size_state = 'unchanged'
1524    elif (_terminal_size_state == 'unchanged' and
1525          _first_terminal_size != os_size):
1526        _terminal_size_state = 'changed'
1527
1528    # If the OS claims to know how wide the terminal is, and this value has
1529    # ever changed, use that.
1530    if _terminal_size_state == 'changed':
1531        if width is not None and width > 0:
1532            return width
1533
1534    # If COLUMNS is set, use it.
1535    try:
1536        return int(os.environ['COLUMNS'])
1537    except (KeyError, ValueError):
1538        pass
1539
1540    # Finally, use an unchanged size from the OS, if we have one.
1541    if _terminal_size_state == 'unchanged':
1542        if width is not None and width > 0:
1543            return width
1544
1545    # The width could not be determined.
1546    return None
1547
1548
1549def _win32_terminal_size(width, height):
1550    width, height = win32utils.get_console_size(
1551        defaultx=width, defaulty=height)
1552    return width, height
1553
1554
1555def _ioctl_terminal_size(width, height):
1556    try:
1557        import struct
1558        import fcntl
1559        import termios
1560        s = struct.pack('HHHH', 0, 0, 0, 0)
1561        x = fcntl.ioctl(1, termios.TIOCGWINSZ, s)
1562        height, width = struct.unpack('HHHH', x)[0:2]
1563    except (IOError, AttributeError):
1564        pass
1565    return width, height
1566
1567
1568_terminal_size = None
1569"""Returns the terminal size as (width, height).
1570
1571:param width: Default value for width.
1572:param height: Default value for height.
1573
1574This is defined specifically for each OS and query the size of the controlling
1575terminal. If any error occurs, the provided default values should be returned.
1576"""
1577if sys.platform == 'win32':
1578    _terminal_size = _win32_terminal_size
1579else:
1580    _terminal_size = _ioctl_terminal_size
1581
1582
1583def supports_executable(path):
1584    """Return if filesystem at path supports executable bit.
1585
1586    :param path: Path for which to check the file system
1587    :return: boolean indicating whether executable bit can be stored/relied upon
1588    """
1589    if sys.platform == 'win32':
1590        return False
1591    try:
1592        fs_type = get_fs_type(path)
1593    except errors.DependencyNotPresent as e:
1594        trace.mutter('Unable to get fs type for %r: %s', path, e)
1595    else:
1596        if fs_type in ('vfat', 'ntfs'):
1597            # filesystems known to not support executable bit
1598            return False
1599    return True
1600
1601
1602def supports_symlinks(path):
1603    """Return if the filesystem at path supports the creation of symbolic links.
1604
1605    """
1606    if not has_symlinks():
1607        return False
1608    try:
1609        fs_type = get_fs_type(path)
1610    except errors.DependencyNotPresent as e:
1611        trace.mutter('Unable to get fs type for %r: %s', path, e)
1612    else:
1613        if fs_type in ('vfat', 'ntfs'):
1614            # filesystems known to not support symlinks
1615            return False
1616    return True
1617
1618
1619def supports_posix_readonly():
1620    """Return True if 'readonly' has POSIX semantics, False otherwise.
1621
1622    Notably, a win32 readonly file cannot be deleted, unlike POSIX where the
1623    directory controls creation/deletion, etc.
1624
1625    And under win32, readonly means that the directory itself cannot be
1626    deleted.  The contents of a readonly directory can be changed, unlike POSIX
1627    where files in readonly directories cannot be added, deleted or renamed.
1628    """
1629    return sys.platform != "win32"
1630
1631
1632def set_or_unset_env(env_variable, value):
1633    """Modify the environment, setting or removing the env_variable.
1634
1635    :param env_variable: The environment variable in question
1636    :param value: The value to set the environment to. If None, then
1637        the variable will be removed.
1638    :return: The original value of the environment variable.
1639    """
1640    orig_val = os.environ.get(env_variable)
1641    if value is None:
1642        if orig_val is not None:
1643            del os.environ[env_variable]
1644    else:
1645        os.environ[env_variable] = value
1646    return orig_val
1647
1648
1649_validWin32PathRE = re.compile(r'^([A-Za-z]:[/\\])?[^:<>*"?\|]*$')
1650
1651
1652def check_legal_path(path):
1653    """Check whether the supplied path is legal.
1654    This is only required on Windows, so we don't test on other platforms
1655    right now.
1656    """
1657    if sys.platform != "win32":
1658        return
1659    if _validWin32PathRE.match(path) is None:
1660        raise errors.IllegalPath(path)
1661
1662
1663_WIN32_ERROR_DIRECTORY = 267  # Similar to errno.ENOTDIR
1664
1665
1666try:
1667    scandir = os.scandir
1668except AttributeError:  # Python < 3
1669    lazy_import(globals(), """\
1670from scandir import scandir
1671""")
1672
1673
1674def _is_error_enotdir(e):
1675    """Check if this exception represents ENOTDIR.
1676
1677    Unfortunately, python is very inconsistent about the exception
1678    here. The cases are:
1679      1) Linux, Mac OSX all versions seem to set errno == ENOTDIR
1680      2) Windows, Python2.4, uses errno == ERROR_DIRECTORY (267)
1681         which is the windows error code.
1682      3) Windows, Python2.5 uses errno == EINVAL and
1683         winerror == ERROR_DIRECTORY
1684
1685    :param e: An Exception object (expected to be OSError with an errno
1686        attribute, but we should be able to cope with anything)
1687    :return: True if this represents an ENOTDIR error. False otherwise.
1688    """
1689    en = getattr(e, 'errno', None)
1690    if (en == errno.ENOTDIR or
1691        (sys.platform == 'win32' and
1692            (en == _WIN32_ERROR_DIRECTORY or
1693             (en == errno.EINVAL
1694              and getattr(e, 'winerror', None) == _WIN32_ERROR_DIRECTORY)
1695             ))):
1696        return True
1697    return False
1698
1699
1700def walkdirs(top, prefix=""):
1701    """Yield data about all the directories in a tree.
1702
1703    This yields all the data about the contents of a directory at a time.
1704    After each directory has been yielded, if the caller has mutated the list
1705    to exclude some directories, they are then not descended into.
1706
1707    The data yielded is of the form:
1708    ((directory-relpath, directory-path-from-top),
1709    [(relpath, basename, kind, lstat, path-from-top), ...]),
1710     - directory-relpath is the relative path of the directory being returned
1711       with respect to top. prefix is prepended to this.
1712     - directory-path-from-root is the path including top for this directory.
1713       It is suitable for use with os functions.
1714     - relpath is the relative path within the subtree being walked.
1715     - basename is the basename of the path
1716     - kind is the kind of the file now. If unknown then the file is not
1717       present within the tree - but it may be recorded as versioned. See
1718       versioned_kind.
1719     - lstat is the stat data *if* the file was statted.
1720     - planned, not implemented:
1721       path_from_tree_root is the path from the root of the tree.
1722
1723    :param prefix: Prefix the relpaths that are yielded with 'prefix'. This
1724        allows one to walk a subtree but get paths that are relative to a tree
1725        rooted higher up.
1726    :return: an iterator over the dirs.
1727    """
1728    # TODO there is a bit of a smell where the results of the directory-
1729    # summary in this, and the path from the root, may not agree
1730    # depending on top and prefix - i.e. ./foo and foo as a pair leads to
1731    # potentially confusing output. We should make this more robust - but
1732    # not at a speed cost. RBC 20060731
1733    _directory = _directory_kind
1734    pending = [(safe_unicode(prefix), "", _directory, None, safe_unicode(top))]
1735    while pending:
1736        # 0 - relpath, 1- basename, 2- kind, 3- stat, 4-toppath
1737        relroot, _, _, _, top = pending.pop()
1738        if relroot:
1739            relprefix = relroot + u'/'
1740        else:
1741            relprefix = ''
1742        top_slash = top + u'/'
1743
1744        dirblock = []
1745        try:
1746            for entry in scandir(top):
1747                name = decode_filename(entry.name)
1748                statvalue = entry.stat(follow_symlinks=False)
1749                kind = file_kind_from_stat_mode(statvalue.st_mode)
1750                dirblock.append((relprefix + name, name, kind, statvalue, entry.path))
1751        except OSError as e:
1752            if not _is_error_enotdir(e):
1753                raise
1754        except UnicodeDecodeError as e:
1755            raise errors.BadFilenameEncoding(e.object, _fs_enc)
1756        dirblock.sort()
1757        yield (relroot, top), dirblock
1758
1759        # push the user specified dirs from dirblock
1760        pending.extend(d for d in reversed(dirblock) if d[2] == _directory)
1761
1762
1763class DirReader(object):
1764    """An interface for reading directories."""
1765
1766    def top_prefix_to_starting_dir(self, top, prefix=""):
1767        """Converts top and prefix to a starting dir entry
1768
1769        :param top: A utf8 path
1770        :param prefix: An optional utf8 path to prefix output relative paths
1771            with.
1772        :return: A tuple starting with prefix, and ending with the native
1773            encoding of top.
1774        """
1775        raise NotImplementedError(self.top_prefix_to_starting_dir)
1776
1777    def read_dir(self, prefix, top):
1778        """Read a specific dir.
1779
1780        :param prefix: A utf8 prefix to be preprended to the path basenames.
1781        :param top: A natively encoded path to read.
1782        :return: A list of the directories contents. Each item contains:
1783            (utf8_relpath, utf8_name, kind, lstatvalue, native_abspath)
1784        """
1785        raise NotImplementedError(self.read_dir)
1786
1787
1788_selected_dir_reader = None
1789
1790
1791def _walkdirs_utf8(top, prefix=""):
1792    """Yield data about all the directories in a tree.
1793
1794    This yields the same information as walkdirs() only each entry is yielded
1795    in utf-8. On platforms which have a filesystem encoding of utf8 the paths
1796    are returned as exact byte-strings.
1797
1798    :return: yields a tuple of (dir_info, [file_info])
1799        dir_info is (utf8_relpath, path-from-top)
1800        file_info is (utf8_relpath, utf8_name, kind, lstat, path-from-top)
1801        if top is an absolute path, path-from-top is also an absolute path.
1802        path-from-top might be unicode or utf8, but it is the correct path to
1803        pass to os functions to affect the file in question. (such as os.lstat)
1804    """
1805    global _selected_dir_reader
1806    if _selected_dir_reader is None:
1807        if sys.platform == "win32":
1808            try:
1809                from ._walkdirs_win32 import Win32ReadDir
1810                _selected_dir_reader = Win32ReadDir()
1811            except ImportError:
1812                pass
1813        elif _fs_enc in ('utf-8', 'ascii'):
1814            try:
1815                from ._readdir_pyx import UTF8DirReader
1816                _selected_dir_reader = UTF8DirReader()
1817            except ImportError as e:
1818                failed_to_load_extension(e)
1819                pass
1820
1821    if _selected_dir_reader is None:
1822        # Fallback to the python version
1823        _selected_dir_reader = UnicodeDirReader()
1824
1825    # 0 - relpath, 1- basename, 2- kind, 3- stat, 4-toppath
1826    # But we don't actually uses 1-3 in pending, so set them to None
1827    pending = [[_selected_dir_reader.top_prefix_to_starting_dir(top, prefix)]]
1828    read_dir = _selected_dir_reader.read_dir
1829    _directory = _directory_kind
1830    while pending:
1831        relroot, _, _, _, top = pending[-1].pop()
1832        if not pending[-1]:
1833            pending.pop()
1834        dirblock = sorted(read_dir(relroot, top))
1835        yield (relroot, top), dirblock
1836        # push the user specified dirs from dirblock
1837        next = [d for d in reversed(dirblock) if d[2] == _directory]
1838        if next:
1839            pending.append(next)
1840
1841
1842class UnicodeDirReader(DirReader):
1843    """A dir reader for non-utf8 file systems, which transcodes."""
1844
1845    __slots__ = ['_utf8_encode']
1846
1847    def __init__(self):
1848        self._utf8_encode = codecs.getencoder('utf8')
1849
1850    def top_prefix_to_starting_dir(self, top, prefix=""):
1851        """See DirReader.top_prefix_to_starting_dir."""
1852        return (safe_utf8(prefix), None, None, None, safe_unicode(top))
1853
1854    def read_dir(self, prefix, top):
1855        """Read a single directory from a non-utf8 file system.
1856
1857        top, and the abspath element in the output are unicode, all other paths
1858        are utf8. Local disk IO is done via unicode calls to listdir etc.
1859
1860        This is currently the fallback code path when the filesystem encoding is
1861        not UTF-8. It may be better to implement an alternative so that we can
1862        safely handle paths that are not properly decodable in the current
1863        encoding.
1864
1865        See DirReader.read_dir for details.
1866        """
1867        _utf8_encode = self._utf8_encode
1868
1869        def _fs_decode(s): return s.decode(_fs_enc)
1870
1871        def _fs_encode(s): return s.encode(_fs_enc)
1872
1873        if prefix:
1874            relprefix = prefix + b'/'
1875        else:
1876            relprefix = b''
1877        top_slash = top + '/'
1878
1879        dirblock = []
1880        append = dirblock.append
1881        for entry in scandir(safe_utf8(top)):
1882            try:
1883                name = _fs_decode(entry.name)
1884            except UnicodeDecodeError:
1885                raise errors.BadFilenameEncoding(
1886                    relprefix + entry.name, _fs_enc)
1887            abspath = top_slash + name
1888            name_utf8 = _utf8_encode(name)[0]
1889            statvalue = entry.stat(follow_symlinks=False)
1890            kind = file_kind_from_stat_mode(statvalue.st_mode)
1891            append((relprefix + name_utf8, name_utf8, kind, statvalue, abspath))
1892        return sorted(dirblock)
1893
1894
1895def copy_tree(from_path, to_path, handlers={}):
1896    """Copy all of the entries in from_path into to_path.
1897
1898    :param from_path: The base directory to copy.
1899    :param to_path: The target directory. If it does not exist, it will
1900        be created.
1901    :param handlers: A dictionary of functions, which takes a source and
1902        destinations for files, directories, etc.
1903        It is keyed on the file kind, such as 'directory', 'symlink', or 'file'
1904        'file', 'directory', and 'symlink' should always exist.
1905        If they are missing, they will be replaced with 'os.mkdir()',
1906        'os.readlink() + os.symlink()', and 'shutil.copy2()', respectively.
1907    """
1908    # Now, just copy the existing cached tree to the new location
1909    # We use a cheap trick here.
1910    # Absolute paths are prefixed with the first parameter
1911    # relative paths are prefixed with the second.
1912    # So we can get both the source and target returned
1913    # without any extra work.
1914
1915    def copy_dir(source, dest):
1916        os.mkdir(dest)
1917
1918    def copy_link(source, dest):
1919        """Copy the contents of a symlink"""
1920        link_to = os.readlink(source)
1921        os.symlink(link_to, dest)
1922
1923    real_handlers = {'file': shutil.copy2,
1924                     'symlink': copy_link,
1925                     'directory': copy_dir,
1926                     }
1927    real_handlers.update(handlers)
1928
1929    if not os.path.exists(to_path):
1930        real_handlers['directory'](from_path, to_path)
1931
1932    for dir_info, entries in walkdirs(from_path, prefix=to_path):
1933        for relpath, name, kind, st, abspath in entries:
1934            real_handlers[kind](abspath, relpath)
1935
1936
1937def copy_ownership_from_path(dst, src=None):
1938    """Copy usr/grp ownership from src file/dir to dst file/dir.
1939
1940    If src is None, the containing directory is used as source. If chown
1941    fails, the error is ignored and a warning is printed.
1942    """
1943    chown = getattr(os, 'chown', None)
1944    if chown is None:
1945        return
1946
1947    if src is None:
1948        src = os.path.dirname(dst)
1949        if src == '':
1950            src = '.'
1951
1952    try:
1953        s = os.stat(src)
1954        chown(dst, s.st_uid, s.st_gid)
1955    except OSError:
1956        trace.warning(
1957            'Unable to copy ownership from "%s" to "%s". '
1958            'You may want to set it manually.', src, dst)
1959        trace.log_exception_quietly()
1960
1961
1962def path_prefix_key(path):
1963    """Generate a prefix-order path key for path.
1964
1965    This can be used to sort paths in the same way that walkdirs does.
1966    """
1967    return (dirname(path), path)
1968
1969
1970def compare_paths_prefix_order(path_a, path_b):
1971    """Compare path_a and path_b to generate the same order walkdirs uses."""
1972    key_a = path_prefix_key(path_a)
1973    key_b = path_prefix_key(path_b)
1974    return (key_a > key_b) - (key_a < key_b)
1975
1976
1977_cached_user_encoding = None
1978
1979
1980def get_user_encoding():
1981    """Find out what the preferred user encoding is.
1982
1983    This is generally the encoding that is used for command line parameters
1984    and file contents. This may be different from the terminal encoding
1985    or the filesystem encoding.
1986
1987    :return: A string defining the preferred user encoding
1988    """
1989    global _cached_user_encoding
1990    if _cached_user_encoding is not None:
1991        return _cached_user_encoding
1992
1993    if os.name == 'posix' and getattr(locale, 'CODESET', None) is not None:
1994        # Use the existing locale settings and call nl_langinfo directly
1995        # rather than going through getpreferredencoding. This avoids
1996        # <http://bugs.python.org/issue6202> on OSX Python 2.6 and the
1997        # possibility of the setlocale call throwing an error.
1998        user_encoding = locale.nl_langinfo(locale.CODESET)
1999    else:
2000        # GZ 2011-12-19: On windows could call GetACP directly instead.
2001        user_encoding = locale.getpreferredencoding(False)
2002
2003    try:
2004        user_encoding = codecs.lookup(user_encoding).name
2005    except LookupError:
2006        if user_encoding not in ("", "cp0"):
2007            sys.stderr.write('brz: warning:'
2008                             ' unknown encoding %s.'
2009                             ' Continuing with ascii encoding.\n'
2010                             % user_encoding
2011                             )
2012        user_encoding = 'ascii'
2013    else:
2014        # Get 'ascii' when setlocale has not been called or LANG=C or unset.
2015        if user_encoding == 'ascii':
2016            if sys.platform == 'darwin':
2017                # OSX is special-cased in Python to have a UTF-8 filesystem
2018                # encoding and previously had LANG set here if not present.
2019                user_encoding = 'utf-8'
2020            # GZ 2011-12-19: Maybe UTF-8 should be the default in this case
2021            #                for some other posix platforms as well.
2022
2023    _cached_user_encoding = user_encoding
2024    return user_encoding
2025
2026
2027def get_diff_header_encoding():
2028    return get_terminal_encoding()
2029
2030
2031def get_host_name():
2032    """Return the current unicode host name.
2033
2034    This is meant to be used in place of socket.gethostname() because that
2035    behaves inconsistently on different platforms.
2036    """
2037    if sys.platform == "win32":
2038        return win32utils.get_host_name()
2039    else:
2040        import socket
2041        return socket.gethostname()
2042
2043
2044# We must not read/write any more than 64k at a time from/to a socket so we
2045# don't risk "no buffer space available" errors on some platforms.  Windows in
2046# particular is likely to throw WSAECONNABORTED or WSAENOBUFS if given too much
2047# data at once.
2048MAX_SOCKET_CHUNK = 64 * 1024
2049
2050_end_of_stream_errors = [errno.ECONNRESET, errno.EPIPE, errno.EINVAL]
2051for _eno in ['WSAECONNRESET', 'WSAECONNABORTED']:
2052    _eno = getattr(errno, _eno, None)
2053    if _eno is not None:
2054        _end_of_stream_errors.append(_eno)
2055del _eno
2056
2057
2058def read_bytes_from_socket(sock, report_activity=None,
2059                           max_read_size=MAX_SOCKET_CHUNK):
2060    """Read up to max_read_size of bytes from sock and notify of progress.
2061
2062    Translates "Connection reset by peer" into file-like EOF (return an
2063    empty string rather than raise an error), and repeats the recv if
2064    interrupted by a signal.
2065    """
2066    while True:
2067        try:
2068            data = sock.recv(max_read_size)
2069        except socket.error as e:
2070            eno = e.args[0]
2071            if eno in _end_of_stream_errors:
2072                # The connection was closed by the other side.  Callers expect
2073                # an empty string to signal end-of-stream.
2074                return b""
2075            elif eno == errno.EINTR:
2076                # Retry the interrupted recv.
2077                continue
2078            raise
2079        else:
2080            if report_activity is not None:
2081                report_activity(len(data), 'read')
2082            return data
2083
2084
2085def recv_all(socket, count):
2086    """Receive an exact number of bytes.
2087
2088    Regular Socket.recv() may return less than the requested number of bytes,
2089    depending on what's in the OS buffer.  MSG_WAITALL is not available
2090    on all platforms, but this should work everywhere.  This will return
2091    less than the requested amount if the remote end closes.
2092
2093    This isn't optimized and is intended mostly for use in testing.
2094    """
2095    b = b''
2096    while len(b) < count:
2097        new = read_bytes_from_socket(socket, None, count - len(b))
2098        if new == b'':
2099            break  # eof
2100        b += new
2101    return b
2102
2103
2104def send_all(sock, bytes, report_activity=None):
2105    """Send all bytes on a socket.
2106
2107    Breaks large blocks in smaller chunks to avoid buffering limitations on
2108    some platforms, and catches EINTR which may be thrown if the send is
2109    interrupted by a signal.
2110
2111    This is preferred to socket.sendall(), because it avoids portability bugs
2112    and provides activity reporting.
2113
2114    :param report_activity: Call this as bytes are read, see
2115        Transport._report_activity
2116    """
2117    sent_total = 0
2118    byte_count = len(bytes)
2119    view = memoryview(bytes)
2120    while sent_total < byte_count:
2121        try:
2122            sent = sock.send(view[sent_total:sent_total + MAX_SOCKET_CHUNK])
2123        except (socket.error, IOError) as e:
2124            if e.args[0] in _end_of_stream_errors:
2125                raise errors.ConnectionReset(
2126                    "Error trying to write to socket", e)
2127            if e.args[0] != errno.EINTR:
2128                raise
2129        else:
2130            if sent == 0:
2131                raise errors.ConnectionReset('Sending to %s returned 0 bytes'
2132                                             % (sock,))
2133            sent_total += sent
2134            if report_activity is not None:
2135                report_activity(sent, 'write')
2136
2137
2138def connect_socket(address):
2139    # Slight variation of the socket.create_connection() function (provided by
2140    # python-2.6) that can fail if getaddrinfo returns an empty list. We also
2141    # provide it for previous python versions. Also, we don't use the timeout
2142    # parameter (provided by the python implementation) so we don't implement
2143    # it either).
2144    err = socket.error('getaddrinfo returns an empty list')
2145    host, port = address
2146    for res in socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM):
2147        af, socktype, proto, canonname, sa = res
2148        sock = None
2149        try:
2150            sock = socket.socket(af, socktype, proto)
2151            sock.connect(sa)
2152            return sock
2153
2154        except socket.error as e:
2155            err = e
2156            # 'err' is now the most recent error
2157            if sock is not None:
2158                sock.close()
2159    raise err
2160
2161
2162def dereference_path(path):
2163    """Determine the real path to a file.
2164
2165    All parent elements are dereferenced.  But the file itself is not
2166    dereferenced.
2167    :param path: The original path.  May be absolute or relative.
2168    :return: the real path *to* the file
2169    """
2170    parent, base = os.path.split(path)
2171    # The pathjoin for '.' is a workaround for Python bug #1213894.
2172    # (initial path components aren't dereferenced)
2173    return pathjoin(realpath(pathjoin('.', parent)), base)
2174
2175
2176def supports_mapi():
2177    """Return True if we can use MAPI to launch a mail client."""
2178    return sys.platform == "win32"
2179
2180
2181def resource_string(package, resource_name):
2182    """Load a resource from a package and return it as a string.
2183
2184    Note: Only packages that start with breezy are currently supported.
2185
2186    This is designed to be a lightweight implementation of resource
2187    loading in a way which is API compatible with the same API from
2188    pkg_resources. See
2189    http://peak.telecommunity.com/DevCenter/PkgResources#basic-resource-access.
2190    If and when pkg_resources becomes a standard library, this routine
2191    can delegate to it.
2192    """
2193    # Check package name is within breezy
2194    if package == "breezy":
2195        resource_relpath = resource_name
2196    elif package.startswith("breezy."):
2197        package = package[len("breezy."):].replace('.', os.sep)
2198        resource_relpath = pathjoin(package, resource_name)
2199    else:
2200        raise errors.BzrError('resource package %s not in breezy' % package)
2201
2202    # Map the resource to a file and read its contents
2203    base = dirname(breezy.__file__)
2204    if getattr(sys, 'frozen', None):    # bzr.exe
2205        base = abspath(pathjoin(base, '..', '..'))
2206    with open(pathjoin(base, resource_relpath), "rt") as f:
2207        return f.read()
2208
2209
2210def file_kind_from_stat_mode_thunk(mode):
2211    global file_kind_from_stat_mode
2212    if file_kind_from_stat_mode is file_kind_from_stat_mode_thunk:
2213        try:
2214            from ._readdir_pyx import UTF8DirReader
2215            file_kind_from_stat_mode = UTF8DirReader().kind_from_mode
2216        except ImportError:
2217            # This is one time where we won't warn that an extension failed to
2218            # load. The extension is never available on Windows anyway.
2219            from ._readdir_py import (
2220                _kind_from_mode as file_kind_from_stat_mode
2221                )
2222    return file_kind_from_stat_mode(mode)
2223
2224
2225file_kind_from_stat_mode = file_kind_from_stat_mode_thunk
2226
2227
2228def file_stat(f, _lstat=os.lstat):
2229    try:
2230        # XXX cache?
2231        return _lstat(f)
2232    except OSError as e:
2233        if getattr(e, 'errno', None) in (errno.ENOENT, errno.ENOTDIR):
2234            raise errors.NoSuchFile(f)
2235        raise
2236
2237
2238def file_kind(f, _lstat=os.lstat):
2239    stat_value = file_stat(f, _lstat)
2240    return file_kind_from_stat_mode(stat_value.st_mode)
2241
2242
2243def until_no_eintr(f, *a, **kw):
2244    """Run f(*a, **kw), retrying if an EINTR error occurs.
2245
2246    WARNING: you must be certain that it is safe to retry the call repeatedly
2247    if EINTR does occur.  This is typically only true for low-level operations
2248    like os.read.  If in any doubt, don't use this.
2249
2250    Keep in mind that this is not a complete solution to EINTR.  There is
2251    probably code in the Python standard library and other dependencies that
2252    may encounter EINTR if a signal arrives (and there is signal handler for
2253    that signal).  So this function can reduce the impact for IO that breezy
2254    directly controls, but it is not a complete solution.
2255    """
2256    # Borrowed from Twisted's twisted.python.util.untilConcludes function.
2257    while True:
2258        try:
2259            return f(*a, **kw)
2260        except (IOError, OSError) as e:
2261            if e.errno == errno.EINTR:
2262                continue
2263            raise
2264
2265
2266if sys.platform == "win32":
2267    def getchar():
2268        import msvcrt
2269        return msvcrt.getch()
2270else:
2271    def getchar():
2272        import tty
2273        import termios
2274        fd = sys.stdin.fileno()
2275        settings = termios.tcgetattr(fd)
2276        try:
2277            tty.setraw(fd)
2278            ch = sys.stdin.read(1)
2279        finally:
2280            termios.tcsetattr(fd, termios.TCSADRAIN, settings)
2281        return ch
2282
2283if sys.platform.startswith('linux'):
2284    def _local_concurrency():
2285        try:
2286            return os.sysconf('SC_NPROCESSORS_ONLN')
2287        except (ValueError, OSError, AttributeError):
2288            return None
2289elif sys.platform == 'darwin':
2290    def _local_concurrency():
2291        return subprocess.Popen(['sysctl', '-n', 'hw.availcpu'],
2292                                stdout=subprocess.PIPE).communicate()[0]
2293elif "bsd" in sys.platform:
2294    def _local_concurrency():
2295        return subprocess.Popen(['sysctl', '-n', 'hw.ncpu'],
2296                                stdout=subprocess.PIPE).communicate()[0]
2297elif sys.platform == 'sunos5':
2298    def _local_concurrency():
2299        return subprocess.Popen(['psrinfo', '-p', ],
2300                                stdout=subprocess.PIPE).communicate()[0]
2301elif sys.platform == "win32":
2302    def _local_concurrency():
2303        # This appears to return the number of cores.
2304        return os.environ.get('NUMBER_OF_PROCESSORS')
2305else:
2306    def _local_concurrency():
2307        # Who knows ?
2308        return None
2309
2310
2311_cached_local_concurrency = None
2312
2313
2314def local_concurrency(use_cache=True):
2315    """Return how many processes can be run concurrently.
2316
2317    Rely on platform specific implementations and default to 1 (one) if
2318    anything goes wrong.
2319    """
2320    global _cached_local_concurrency
2321
2322    if _cached_local_concurrency is not None and use_cache:
2323        return _cached_local_concurrency
2324
2325    concurrency = os.environ.get('BRZ_CONCURRENCY', None)
2326    if concurrency is None:
2327        import multiprocessing
2328        try:
2329            concurrency = multiprocessing.cpu_count()
2330        except NotImplementedError:
2331            # multiprocessing.cpu_count() isn't implemented on all platforms
2332            try:
2333                concurrency = _local_concurrency()
2334            except (OSError, IOError):
2335                pass
2336    try:
2337        concurrency = int(concurrency)
2338    except (TypeError, ValueError):
2339        concurrency = 1
2340    if use_cache:
2341        _cached_local_concurrency = concurrency
2342    return concurrency
2343
2344
2345class UnicodeOrBytesToBytesWriter(codecs.StreamWriter):
2346    """A stream writer that doesn't decode str arguments."""
2347
2348    def __init__(self, encode, stream, errors='strict'):
2349        codecs.StreamWriter.__init__(self, stream, errors)
2350        self.encode = encode
2351
2352    def write(self, object):
2353        if isinstance(object, str):
2354            self.stream.write(object)
2355        else:
2356            data, _ = self.encode(object, self.errors)
2357            self.stream.write(data)
2358
2359
2360if sys.platform == 'win32':
2361    def open_file(filename, mode='r', bufsize=-1):
2362        """This function is used to override the ``open`` builtin.
2363
2364        But it uses O_NOINHERIT flag so the file handle is not inherited by
2365        child processes.  Deleting or renaming a closed file opened with this
2366        function is not blocking child processes.
2367        """
2368        writing = 'w' in mode
2369        appending = 'a' in mode
2370        updating = '+' in mode
2371        binary = 'b' in mode
2372
2373        flags = O_NOINHERIT
2374        # see http://msdn.microsoft.com/en-us/library/yeby3zcb%28VS.71%29.aspx
2375        # for flags for each modes.
2376        if binary:
2377            flags |= O_BINARY
2378        else:
2379            flags |= O_TEXT
2380
2381        if writing:
2382            if updating:
2383                flags |= os.O_RDWR
2384            else:
2385                flags |= os.O_WRONLY
2386            flags |= os.O_CREAT | os.O_TRUNC
2387        elif appending:
2388            if updating:
2389                flags |= os.O_RDWR
2390            else:
2391                flags |= os.O_WRONLY
2392            flags |= os.O_CREAT | os.O_APPEND
2393        else:  # reading
2394            if updating:
2395                flags |= os.O_RDWR
2396            else:
2397                flags |= os.O_RDONLY
2398
2399        return os.fdopen(os.open(filename, flags), mode, bufsize)
2400else:
2401    open_file = open
2402
2403
2404def available_backup_name(base, exists):
2405    """Find a non-existing backup file name.
2406
2407    This will *not* create anything, this only return a 'free' entry.  This
2408    should be used for checking names in a directory below a locked
2409    tree/branch/repo to avoid race conditions. This is LBYL (Look Before You
2410    Leap) and generally discouraged.
2411
2412    :param base: The base name.
2413
2414    :param exists: A callable returning True if the path parameter exists.
2415    """
2416    counter = 1
2417    name = "%s.~%d~" % (base, counter)
2418    while exists(name):
2419        counter += 1
2420        name = "%s.~%d~" % (base, counter)
2421    return name
2422
2423
2424def set_fd_cloexec(fd):
2425    """Set a Unix file descriptor's FD_CLOEXEC flag.  Do nothing if platform
2426    support for this is not available.
2427    """
2428    try:
2429        import fcntl
2430        old = fcntl.fcntl(fd, fcntl.F_GETFD)
2431        fcntl.fcntl(fd, fcntl.F_SETFD, old | fcntl.FD_CLOEXEC)
2432    except (ImportError, AttributeError):
2433        # Either the fcntl module or specific constants are not present
2434        pass
2435
2436
2437def find_executable_on_path(name):
2438    """Finds an executable on the PATH.
2439
2440    On Windows, this will try to append each extension in the PATHEXT
2441    environment variable to the name, if it cannot be found with the name
2442    as given.
2443
2444    :param name: The base name of the executable.
2445    :return: The path to the executable found or None.
2446    """
2447    if sys.platform == 'win32':
2448        exts = os.environ.get('PATHEXT', '').split(os.pathsep)
2449        exts = [ext.lower() for ext in exts]
2450        base, ext = os.path.splitext(name)
2451        if ext != '':
2452            if ext.lower() not in exts:
2453                return None
2454            name = base
2455            exts = [ext]
2456    else:
2457        exts = ['']
2458    path = os.environ.get('PATH')
2459    if path is not None:
2460        path = path.split(os.pathsep)
2461        for ext in exts:
2462            for d in path:
2463                f = os.path.join(d, name) + ext
2464                if os.access(f, os.X_OK):
2465                    return f
2466    if sys.platform == 'win32':
2467        app_path = win32utils.get_app_path(name)
2468        if app_path != name:
2469            return app_path
2470    return None
2471
2472
2473def _posix_is_local_pid_dead(pid):
2474    """True if pid doesn't correspond to live process on this machine"""
2475    try:
2476        # Special meaning of unix kill: just check if it's there.
2477        os.kill(pid, 0)
2478    except OSError as e:
2479        if e.errno == errno.ESRCH:
2480            # On this machine, and really not found: as sure as we can be
2481            # that it's dead.
2482            return True
2483        elif e.errno == errno.EPERM:
2484            # exists, though not ours
2485            return False
2486        else:
2487            trace.mutter("os.kill(%d, 0) failed: %s" % (pid, e))
2488            # Don't really know.
2489            return False
2490    else:
2491        # Exists and our process: not dead.
2492        return False
2493
2494
2495if sys.platform == "win32":
2496    is_local_pid_dead = win32utils.is_local_pid_dead
2497else:
2498    is_local_pid_dead = _posix_is_local_pid_dead
2499
2500_maybe_ignored = ['EAGAIN', 'EINTR', 'ENOTSUP', 'EOPNOTSUPP', 'EACCES']
2501_fdatasync_ignored = [getattr(errno, name) for name in _maybe_ignored
2502                      if getattr(errno, name, None) is not None]
2503
2504
2505def fdatasync(fileno):
2506    """Flush file contents to disk if possible.
2507
2508    :param fileno: Integer OS file handle.
2509    :raises TransportNotPossible: If flushing to disk is not possible.
2510    """
2511    fn = getattr(os, 'fdatasync', getattr(os, 'fsync', None))
2512    if fn is not None:
2513        try:
2514            fn(fileno)
2515        except IOError as e:
2516            # See bug #1075108, on some platforms fdatasync exists, but can
2517            # raise ENOTSUP. However, we are calling fdatasync to be helpful
2518            # and reduce the chance of corruption-on-powerloss situations. It
2519            # is not a mandatory call, so it is ok to suppress failures.
2520            trace.mutter("ignoring error calling fdatasync: %s" % (e,))
2521            if getattr(e, 'errno', None) not in _fdatasync_ignored:
2522                raise
2523
2524
2525def ensure_empty_directory_exists(path, exception_class):
2526    """Make sure a local directory exists and is empty.
2527
2528    If it does not exist, it is created.  If it exists and is not empty, an
2529    instance of exception_class is raised.
2530    """
2531    try:
2532        os.mkdir(path)
2533    except OSError as e:
2534        if e.errno != errno.EEXIST:
2535            raise
2536        if os.listdir(path) != []:
2537            raise exception_class(path)
2538
2539
2540def read_mtab(path):
2541    """Read an fstab-style file and extract mountpoint+filesystem information.
2542
2543    :param path: Path to read from
2544    :yield: Tuples with mountpoints (as bytestrings) and filesystem names
2545    """
2546    with open(path, 'rb') as f:
2547        for line in f:
2548            if line.startswith(b'#'):
2549                continue
2550            cols = line.split()
2551            if len(cols) < 3:
2552                continue
2553            yield cols[1], cols[2].decode('ascii', 'replace')
2554
2555
2556MTAB_PATH = '/etc/mtab'
2557
2558class FilesystemFinder(object):
2559    """Find the filesystem for a particular path."""
2560
2561    def __init__(self, mountpoints):
2562        def key(x):
2563            return len(x[0])
2564        self._mountpoints = sorted(mountpoints, key=key, reverse=True)
2565
2566    @classmethod
2567    def from_mtab(cls):
2568        """Create a FilesystemFinder from an mtab-style file.
2569
2570        Note that this will silenty ignore mtab if it doesn't exist or can not
2571        be opened.
2572        """
2573        # TODO(jelmer): Use inotify to be notified when /etc/mtab changes and
2574        # we need to re-read it.
2575        try:
2576            return cls(read_mtab(MTAB_PATH))
2577        except EnvironmentError as e:
2578            trace.mutter('Unable to read mtab: %s', e)
2579            return cls([])
2580
2581    def find(self, path):
2582        """Find the filesystem used by a particular path.
2583
2584        :param path: Path to find (bytestring or text type)
2585        :return: Filesystem name (as text type) or None, if the filesystem is
2586            unknown.
2587        """
2588        for mountpoint, filesystem in self._mountpoints:
2589            if is_inside(mountpoint, path):
2590                return filesystem
2591        return None
2592
2593
2594_FILESYSTEM_FINDER = None
2595
2596
2597def get_fs_type(path):
2598    """Return the filesystem type for the partition a path is in.
2599
2600    :param path: Path to search filesystem type for
2601    :return: A FS type, as string. E.g. "ext2"
2602    """
2603    global _FILESYSTEM_FINDER
2604    if _FILESYSTEM_FINDER is None:
2605        _FILESYSTEM_FINDER = FilesystemFinder.from_mtab()
2606
2607    if not isinstance(path, bytes):
2608        path = path.encode(_fs_enc)
2609
2610    return _FILESYSTEM_FINDER.find(path)
2611
2612
2613perf_counter = time.perf_counter
2614