1# -*- coding: utf-8 -*-
2# This file is part of beets.
3# Copyright 2016, Adrian Sampson.
4#
5# Permission is hereby granted, free of charge, to any person obtaining
6# a copy of this software and associated documentation files (the
7# "Software"), to deal in the Software without restriction, including
8# without limitation the rights to use, copy, modify, merge, publish,
9# distribute, sublicense, and/or sell copies of the Software, and to
10# permit persons to whom the Software is furnished to do so, subject to
11# the following conditions:
12#
13# The above copyright notice and this permission notice shall be
14# included in all copies or substantial portions of the Software.
15
16"""Miscellaneous utility functions."""
17
18from __future__ import division, absolute_import, print_function
19import os
20import sys
21import errno
22import locale
23import re
24import shutil
25import fnmatch
26import functools
27from collections import Counter
28from multiprocessing.pool import ThreadPool
29import traceback
30import subprocess
31import platform
32import shlex
33from beets.util import hidden
34import six
35from unidecode import unidecode
36from enum import Enum
37
38
39MAX_FILENAME_LENGTH = 200
40WINDOWS_MAGIC_PREFIX = u'\\\\?\\'
41SNI_SUPPORTED = sys.version_info >= (2, 7, 9)
42
43
44class HumanReadableException(Exception):
45    """An Exception that can include a human-readable error message to
46    be logged without a traceback. Can preserve a traceback for
47    debugging purposes as well.
48
49    Has at least two fields: `reason`, the underlying exception or a
50    string describing the problem; and `verb`, the action being
51    performed during the error.
52
53    If `tb` is provided, it is a string containing a traceback for the
54    associated exception. (Note that this is not necessary in Python 3.x
55    and should be removed when we make the transition.)
56    """
57    error_kind = 'Error'  # Human-readable description of error type.
58
59    def __init__(self, reason, verb, tb=None):
60        self.reason = reason
61        self.verb = verb
62        self.tb = tb
63        super(HumanReadableException, self).__init__(self.get_message())
64
65    def _gerund(self):
66        """Generate a (likely) gerund form of the English verb.
67        """
68        if u' ' in self.verb:
69            return self.verb
70        gerund = self.verb[:-1] if self.verb.endswith(u'e') else self.verb
71        gerund += u'ing'
72        return gerund
73
74    def _reasonstr(self):
75        """Get the reason as a string."""
76        if isinstance(self.reason, six.text_type):
77            return self.reason
78        elif isinstance(self.reason, bytes):
79            return self.reason.decode('utf-8', 'ignore')
80        elif hasattr(self.reason, 'strerror'):  # i.e., EnvironmentError
81            return self.reason.strerror
82        else:
83            return u'"{0}"'.format(six.text_type(self.reason))
84
85    def get_message(self):
86        """Create the human-readable description of the error, sans
87        introduction.
88        """
89        raise NotImplementedError
90
91    def log(self, logger):
92        """Log to the provided `logger` a human-readable message as an
93        error and a verbose traceback as a debug message.
94        """
95        if self.tb:
96            logger.debug(self.tb)
97        logger.error(u'{0}: {1}', self.error_kind, self.args[0])
98
99
100class FilesystemError(HumanReadableException):
101    """An error that occurred while performing a filesystem manipulation
102    via a function in this module. The `paths` field is a sequence of
103    pathnames involved in the operation.
104    """
105    def __init__(self, reason, verb, paths, tb=None):
106        self.paths = paths
107        super(FilesystemError, self).__init__(reason, verb, tb)
108
109    def get_message(self):
110        # Use a nicer English phrasing for some specific verbs.
111        if self.verb in ('move', 'copy', 'rename'):
112            clause = u'while {0} {1} to {2}'.format(
113                self._gerund(),
114                displayable_path(self.paths[0]),
115                displayable_path(self.paths[1])
116            )
117        elif self.verb in ('delete', 'write', 'create', 'read'):
118            clause = u'while {0} {1}'.format(
119                self._gerund(),
120                displayable_path(self.paths[0])
121            )
122        else:
123            clause = u'during {0} of paths {1}'.format(
124                self.verb, u', '.join(displayable_path(p) for p in self.paths)
125            )
126
127        return u'{0} {1}'.format(self._reasonstr(), clause)
128
129
130class MoveOperation(Enum):
131    """The file operations that e.g. various move functions can carry out.
132    """
133    MOVE = 0
134    COPY = 1
135    LINK = 2
136    HARDLINK = 3
137
138
139def normpath(path):
140    """Provide the canonical form of the path suitable for storing in
141    the database.
142    """
143    path = syspath(path, prefix=False)
144    path = os.path.normpath(os.path.abspath(os.path.expanduser(path)))
145    return bytestring_path(path)
146
147
148def ancestry(path):
149    """Return a list consisting of path's parent directory, its
150    grandparent, and so on. For instance:
151
152       >>> ancestry('/a/b/c')
153       ['/', '/a', '/a/b']
154
155    The argument should *not* be the result of a call to `syspath`.
156    """
157    out = []
158    last_path = None
159    while path:
160        path = os.path.dirname(path)
161
162        if path == last_path:
163            break
164        last_path = path
165
166        if path:
167            # don't yield ''
168            out.insert(0, path)
169    return out
170
171
172def sorted_walk(path, ignore=(), ignore_hidden=False, logger=None):
173    """Like `os.walk`, but yields things in case-insensitive sorted,
174    breadth-first order.  Directory and file names matching any glob
175    pattern in `ignore` are skipped. If `logger` is provided, then
176    warning messages are logged there when a directory cannot be listed.
177    """
178    # Make sure the pathes aren't Unicode strings.
179    path = bytestring_path(path)
180    ignore = [bytestring_path(i) for i in ignore]
181
182    # Get all the directories and files at this level.
183    try:
184        contents = os.listdir(syspath(path))
185    except OSError as exc:
186        if logger:
187            logger.warning(u'could not list directory {0}: {1}'.format(
188                displayable_path(path), exc.strerror
189            ))
190        return
191    dirs = []
192    files = []
193    for base in contents:
194        base = bytestring_path(base)
195
196        # Skip ignored filenames.
197        skip = False
198        for pat in ignore:
199            if fnmatch.fnmatch(base, pat):
200                skip = True
201                break
202        if skip:
203            continue
204
205        # Add to output as either a file or a directory.
206        cur = os.path.join(path, base)
207        if (ignore_hidden and not hidden.is_hidden(cur)) or not ignore_hidden:
208            if os.path.isdir(syspath(cur)):
209                dirs.append(base)
210            else:
211                files.append(base)
212
213    # Sort lists (case-insensitive) and yield the current level.
214    dirs.sort(key=bytes.lower)
215    files.sort(key=bytes.lower)
216    yield (path, dirs, files)
217
218    # Recurse into directories.
219    for base in dirs:
220        cur = os.path.join(path, base)
221        # yield from sorted_walk(...)
222        for res in sorted_walk(cur, ignore, ignore_hidden, logger):
223            yield res
224
225
226def mkdirall(path):
227    """Make all the enclosing directories of path (like mkdir -p on the
228    parent).
229    """
230    for ancestor in ancestry(path):
231        if not os.path.isdir(syspath(ancestor)):
232            try:
233                os.mkdir(syspath(ancestor))
234            except (OSError, IOError) as exc:
235                raise FilesystemError(exc, 'create', (ancestor,),
236                                      traceback.format_exc())
237
238
239def fnmatch_all(names, patterns):
240    """Determine whether all strings in `names` match at least one of
241    the `patterns`, which should be shell glob expressions.
242    """
243    for name in names:
244        matches = False
245        for pattern in patterns:
246            matches = fnmatch.fnmatch(name, pattern)
247            if matches:
248                break
249        if not matches:
250            return False
251    return True
252
253
254def prune_dirs(path, root=None, clutter=('.DS_Store', 'Thumbs.db')):
255    """If path is an empty directory, then remove it. Recursively remove
256    path's ancestry up to root (which is never removed) where there are
257    empty directories. If path is not contained in root, then nothing is
258    removed. Glob patterns in clutter are ignored when determining
259    emptiness. If root is not provided, then only path may be removed
260    (i.e., no recursive removal).
261    """
262    path = normpath(path)
263    if root is not None:
264        root = normpath(root)
265
266    ancestors = ancestry(path)
267    if root is None:
268        # Only remove the top directory.
269        ancestors = []
270    elif root in ancestors:
271        # Only remove directories below the root.
272        ancestors = ancestors[ancestors.index(root) + 1:]
273    else:
274        # Remove nothing.
275        return
276
277    # Traverse upward from path.
278    ancestors.append(path)
279    ancestors.reverse()
280    for directory in ancestors:
281        directory = syspath(directory)
282        if not os.path.exists(directory):
283            # Directory gone already.
284            continue
285        clutter = [bytestring_path(c) for c in clutter]
286        match_paths = [bytestring_path(d) for d in os.listdir(directory)]
287        try:
288            if fnmatch_all(match_paths, clutter):
289                # Directory contains only clutter (or nothing).
290                shutil.rmtree(directory)
291            else:
292                break
293        except OSError:
294            break
295
296
297def components(path):
298    """Return a list of the path components in path. For instance:
299
300       >>> components('/a/b/c')
301       ['a', 'b', 'c']
302
303    The argument should *not* be the result of a call to `syspath`.
304    """
305    comps = []
306    ances = ancestry(path)
307    for anc in ances:
308        comp = os.path.basename(anc)
309        if comp:
310            comps.append(comp)
311        else:  # root
312            comps.append(anc)
313
314    last = os.path.basename(path)
315    if last:
316        comps.append(last)
317
318    return comps
319
320
321def arg_encoding():
322    """Get the encoding for command-line arguments (and other OS
323    locale-sensitive strings).
324    """
325    try:
326        return locale.getdefaultlocale()[1] or 'utf-8'
327    except ValueError:
328        # Invalid locale environment variable setting. To avoid
329        # failing entirely for no good reason, assume UTF-8.
330        return 'utf-8'
331
332
333def _fsencoding():
334    """Get the system's filesystem encoding. On Windows, this is always
335    UTF-8 (not MBCS).
336    """
337    encoding = sys.getfilesystemencoding() or sys.getdefaultencoding()
338    if encoding == 'mbcs':
339        # On Windows, a broken encoding known to Python as "MBCS" is
340        # used for the filesystem. However, we only use the Unicode API
341        # for Windows paths, so the encoding is actually immaterial so
342        # we can avoid dealing with this nastiness. We arbitrarily
343        # choose UTF-8.
344        encoding = 'utf-8'
345    return encoding
346
347
348def bytestring_path(path):
349    """Given a path, which is either a bytes or a unicode, returns a str
350    path (ensuring that we never deal with Unicode pathnames).
351    """
352    # Pass through bytestrings.
353    if isinstance(path, bytes):
354        return path
355
356    # On Windows, remove the magic prefix added by `syspath`. This makes
357    # ``bytestring_path(syspath(X)) == X``, i.e., we can safely
358    # round-trip through `syspath`.
359    if os.path.__name__ == 'ntpath' and path.startswith(WINDOWS_MAGIC_PREFIX):
360        path = path[len(WINDOWS_MAGIC_PREFIX):]
361
362    # Try to encode with default encodings, but fall back to utf-8.
363    try:
364        return path.encode(_fsencoding())
365    except (UnicodeError, LookupError):
366        return path.encode('utf-8')
367
368
369PATH_SEP = bytestring_path(os.sep)
370
371
372def displayable_path(path, separator=u'; '):
373    """Attempts to decode a bytestring path to a unicode object for the
374    purpose of displaying it to the user. If the `path` argument is a
375    list or a tuple, the elements are joined with `separator`.
376    """
377    if isinstance(path, (list, tuple)):
378        return separator.join(displayable_path(p) for p in path)
379    elif isinstance(path, six.text_type):
380        return path
381    elif not isinstance(path, bytes):
382        # A non-string object: just get its unicode representation.
383        return six.text_type(path)
384
385    try:
386        return path.decode(_fsencoding(), 'ignore')
387    except (UnicodeError, LookupError):
388        return path.decode('utf-8', 'ignore')
389
390
391def syspath(path, prefix=True):
392    """Convert a path for use by the operating system. In particular,
393    paths on Windows must receive a magic prefix and must be converted
394    to Unicode before they are sent to the OS. To disable the magic
395    prefix on Windows, set `prefix` to False---but only do this if you
396    *really* know what you're doing.
397    """
398    # Don't do anything if we're not on windows
399    if os.path.__name__ != 'ntpath':
400        return path
401
402    if not isinstance(path, six.text_type):
403        # Beets currently represents Windows paths internally with UTF-8
404        # arbitrarily. But earlier versions used MBCS because it is
405        # reported as the FS encoding by Windows. Try both.
406        try:
407            path = path.decode('utf-8')
408        except UnicodeError:
409            # The encoding should always be MBCS, Windows' broken
410            # Unicode representation.
411            encoding = sys.getfilesystemencoding() or sys.getdefaultencoding()
412            path = path.decode(encoding, 'replace')
413
414    # Add the magic prefix if it isn't already there.
415    # http://msdn.microsoft.com/en-us/library/windows/desktop/aa365247.aspx
416    if prefix and not path.startswith(WINDOWS_MAGIC_PREFIX):
417        if path.startswith(u'\\\\'):
418            # UNC path. Final path should look like \\?\UNC\...
419            path = u'UNC' + path[1:]
420        path = WINDOWS_MAGIC_PREFIX + path
421
422    return path
423
424
425def samefile(p1, p2):
426    """Safer equality for paths."""
427    if p1 == p2:
428        return True
429    return shutil._samefile(syspath(p1), syspath(p2))
430
431
432def remove(path, soft=True):
433    """Remove the file. If `soft`, then no error will be raised if the
434    file does not exist.
435    """
436    path = syspath(path)
437    if soft and not os.path.exists(path):
438        return
439    try:
440        os.remove(path)
441    except (OSError, IOError) as exc:
442        raise FilesystemError(exc, 'delete', (path,), traceback.format_exc())
443
444
445def copy(path, dest, replace=False):
446    """Copy a plain file. Permissions are not copied. If `dest` already
447    exists, raises a FilesystemError unless `replace` is True. Has no
448    effect if `path` is the same as `dest`. Paths are translated to
449    system paths before the syscall.
450    """
451    if samefile(path, dest):
452        return
453    path = syspath(path)
454    dest = syspath(dest)
455    if not replace and os.path.exists(dest):
456        raise FilesystemError(u'file exists', 'copy', (path, dest))
457    try:
458        shutil.copyfile(path, dest)
459    except (OSError, IOError) as exc:
460        raise FilesystemError(exc, 'copy', (path, dest),
461                              traceback.format_exc())
462
463
464def move(path, dest, replace=False):
465    """Rename a file. `dest` may not be a directory. If `dest` already
466    exists, raises an OSError unless `replace` is True. Has no effect if
467    `path` is the same as `dest`. If the paths are on different
468    filesystems (or the rename otherwise fails), a copy is attempted
469    instead, in which case metadata will *not* be preserved. Paths are
470    translated to system paths.
471    """
472    if samefile(path, dest):
473        return
474    path = syspath(path)
475    dest = syspath(dest)
476    if os.path.exists(dest) and not replace:
477        raise FilesystemError(u'file exists', 'rename', (path, dest))
478
479    # First, try renaming the file.
480    try:
481        os.rename(path, dest)
482    except OSError:
483        # Otherwise, copy and delete the original.
484        try:
485            shutil.copyfile(path, dest)
486            os.remove(path)
487        except (OSError, IOError) as exc:
488            raise FilesystemError(exc, 'move', (path, dest),
489                                  traceback.format_exc())
490
491
492def link(path, dest, replace=False):
493    """Create a symbolic link from path to `dest`. Raises an OSError if
494    `dest` already exists, unless `replace` is True. Does nothing if
495    `path` == `dest`.
496    """
497    if samefile(path, dest):
498        return
499
500    if os.path.exists(syspath(dest)) and not replace:
501        raise FilesystemError(u'file exists', 'rename', (path, dest))
502    try:
503        os.symlink(syspath(path), syspath(dest))
504    except NotImplementedError:
505        # raised on python >= 3.2 and Windows versions before Vista
506        raise FilesystemError(u'OS does not support symbolic links.'
507                              'link', (path, dest), traceback.format_exc())
508    except OSError as exc:
509        # TODO: Windows version checks can be removed for python 3
510        if hasattr('sys', 'getwindowsversion'):
511            if sys.getwindowsversion()[0] < 6:  # is before Vista
512                exc = u'OS does not support symbolic links.'
513        raise FilesystemError(exc, 'link', (path, dest),
514                              traceback.format_exc())
515
516
517def hardlink(path, dest, replace=False):
518    """Create a hard link from path to `dest`. Raises an OSError if
519    `dest` already exists, unless `replace` is True. Does nothing if
520    `path` == `dest`.
521    """
522    if samefile(path, dest):
523        return
524
525    if os.path.exists(syspath(dest)) and not replace:
526        raise FilesystemError(u'file exists', 'rename', (path, dest))
527    try:
528        os.link(syspath(path), syspath(dest))
529    except NotImplementedError:
530        raise FilesystemError(u'OS does not support hard links.'
531                              'link', (path, dest), traceback.format_exc())
532    except OSError as exc:
533        if exc.errno == errno.EXDEV:
534            raise FilesystemError(u'Cannot hard link across devices.'
535                                  'link', (path, dest), traceback.format_exc())
536        else:
537            raise FilesystemError(exc, 'link', (path, dest),
538                                  traceback.format_exc())
539
540
541def unique_path(path):
542    """Returns a version of ``path`` that does not exist on the
543    filesystem. Specifically, if ``path` itself already exists, then
544    something unique is appended to the path.
545    """
546    if not os.path.exists(syspath(path)):
547        return path
548
549    base, ext = os.path.splitext(path)
550    match = re.search(br'\.(\d)+$', base)
551    if match:
552        num = int(match.group(1))
553        base = base[:match.start()]
554    else:
555        num = 0
556    while True:
557        num += 1
558        suffix = u'.{}'.format(num).encode() + ext
559        new_path = base + suffix
560        if not os.path.exists(new_path):
561            return new_path
562
563# Note: The Windows "reserved characters" are, of course, allowed on
564# Unix. They are forbidden here because they cause problems on Samba
565# shares, which are sufficiently common as to cause frequent problems.
566# http://msdn.microsoft.com/en-us/library/windows/desktop/aa365247.aspx
567CHAR_REPLACE = [
568    (re.compile(r'[\\/]'), u'_'),  # / and \ -- forbidden everywhere.
569    (re.compile(r'^\.'), u'_'),  # Leading dot (hidden files on Unix).
570    (re.compile(r'[\x00-\x1f]'), u''),  # Control characters.
571    (re.compile(r'[<>:"\?\*\|]'), u'_'),  # Windows "reserved characters".
572    (re.compile(r'\.$'), u'_'),  # Trailing dots.
573    (re.compile(r'\s+$'), u''),  # Trailing whitespace.
574]
575
576
577def sanitize_path(path, replacements=None):
578    """Takes a path (as a Unicode string) and makes sure that it is
579    legal. Returns a new path. Only works with fragments; won't work
580    reliably on Windows when a path begins with a drive letter. Path
581    separators (including altsep!) should already be cleaned from the
582    path components. If replacements is specified, it is used *instead*
583    of the default set of replacements; it must be a list of (compiled
584    regex, replacement string) pairs.
585    """
586    replacements = replacements or CHAR_REPLACE
587
588    comps = components(path)
589    if not comps:
590        return ''
591    for i, comp in enumerate(comps):
592        for regex, repl in replacements:
593            comp = regex.sub(repl, comp)
594        comps[i] = comp
595    return os.path.join(*comps)
596
597
598def truncate_path(path, length=MAX_FILENAME_LENGTH):
599    """Given a bytestring path or a Unicode path fragment, truncate the
600    components to a legal length. In the last component, the extension
601    is preserved.
602    """
603    comps = components(path)
604
605    out = [c[:length] for c in comps]
606    base, ext = os.path.splitext(comps[-1])
607    if ext:
608        # Last component has an extension.
609        base = base[:length - len(ext)]
610        out[-1] = base + ext
611
612    return os.path.join(*out)
613
614
615def _legalize_stage(path, replacements, length, extension, fragment):
616    """Perform a single round of path legalization steps
617    (sanitation/replacement, encoding from Unicode to bytes,
618    extension-appending, and truncation). Return the path (Unicode if
619    `fragment` is set, `bytes` otherwise) and whether truncation was
620    required.
621    """
622    # Perform an initial sanitization including user replacements.
623    path = sanitize_path(path, replacements)
624
625    # Encode for the filesystem.
626    if not fragment:
627        path = bytestring_path(path)
628
629    # Preserve extension.
630    path += extension.lower()
631
632    # Truncate too-long components.
633    pre_truncate_path = path
634    path = truncate_path(path, length)
635
636    return path, path != pre_truncate_path
637
638
639def legalize_path(path, replacements, length, extension, fragment):
640    """Given a path-like Unicode string, produce a legal path. Return
641    the path and a flag indicating whether some replacements had to be
642    ignored (see below).
643
644    The legalization process (see `_legalize_stage`) consists of
645    applying the sanitation rules in `replacements`, encoding the string
646    to bytes (unless `fragment` is set), truncating components to
647    `length`, appending the `extension`.
648
649    This function performs up to three calls to `_legalize_stage` in
650    case truncation conflicts with replacements (as can happen when
651    truncation creates whitespace at the end of the string, for
652    example). The limited number of iterations iterations avoids the
653    possibility of an infinite loop of sanitation and truncation
654    operations, which could be caused by replacement rules that make the
655    string longer. The flag returned from this function indicates that
656    the path has to be truncated twice (indicating that replacements
657    made the string longer again after it was truncated); the
658    application should probably log some sort of warning.
659    """
660
661    if fragment:
662        # Outputting Unicode.
663        extension = extension.decode('utf-8', 'ignore')
664
665    first_stage_path, _ = _legalize_stage(
666        path, replacements, length, extension, fragment
667    )
668
669    # Convert back to Unicode with extension removed.
670    first_stage_path, _ = os.path.splitext(displayable_path(first_stage_path))
671
672    # Re-sanitize following truncation (including user replacements).
673    second_stage_path, retruncated = _legalize_stage(
674        first_stage_path, replacements, length, extension, fragment
675    )
676
677    # If the path was once again truncated, discard user replacements
678    # and run through one last legalization stage.
679    if retruncated:
680        second_stage_path, _ = _legalize_stage(
681            first_stage_path, None, length, extension, fragment
682        )
683
684    return second_stage_path, retruncated
685
686
687def py3_path(path):
688    """Convert a bytestring path to Unicode on Python 3 only. On Python
689    2, return the bytestring path unchanged.
690
691    This helps deal with APIs on Python 3 that *only* accept Unicode
692    (i.e., `str` objects). I philosophically disagree with this
693    decision, because paths are sadly bytes on Unix, but that's the way
694    it is. So this function helps us "smuggle" the true bytes data
695    through APIs that took Python 3's Unicode mandate too seriously.
696    """
697    if isinstance(path, six.text_type):
698        return path
699    assert isinstance(path, bytes)
700    if six.PY2:
701        return path
702    return os.fsdecode(path)
703
704
705def str2bool(value):
706    """Returns a boolean reflecting a human-entered string."""
707    return value.lower() in (u'yes', u'1', u'true', u't', u'y')
708
709
710def as_string(value):
711    """Convert a value to a Unicode object for matching with a query.
712    None becomes the empty string. Bytestrings are silently decoded.
713    """
714    if six.PY2:
715        buffer_types = buffer, memoryview  # noqa: F821
716    else:
717        buffer_types = memoryview
718
719    if value is None:
720        return u''
721    elif isinstance(value, buffer_types):
722        return bytes(value).decode('utf-8', 'ignore')
723    elif isinstance(value, bytes):
724        return value.decode('utf-8', 'ignore')
725    else:
726        return six.text_type(value)
727
728
729def text_string(value, encoding='utf-8'):
730    """Convert a string, which can either be bytes or unicode, to
731    unicode.
732
733    Text (unicode) is left untouched; bytes are decoded. This is useful
734    to convert from a "native string" (bytes on Python 2, str on Python
735    3) to a consistently unicode value.
736    """
737    if isinstance(value, bytes):
738        return value.decode(encoding)
739    return value
740
741
742def plurality(objs):
743    """Given a sequence of hashble objects, returns the object that
744    is most common in the set and the its number of appearance. The
745    sequence must contain at least one object.
746    """
747    c = Counter(objs)
748    if not c:
749        raise ValueError(u'sequence must be non-empty')
750    return c.most_common(1)[0]
751
752
753def cpu_count():
754    """Return the number of hardware thread contexts (cores or SMT
755    threads) in the system.
756    """
757    # Adapted from the soundconverter project:
758    # https://github.com/kassoulet/soundconverter
759    if sys.platform == 'win32':
760        try:
761            num = int(os.environ['NUMBER_OF_PROCESSORS'])
762        except (ValueError, KeyError):
763            num = 0
764    elif sys.platform == 'darwin':
765        try:
766            num = int(command_output(['/usr/sbin/sysctl', '-n', 'hw.ncpu']))
767        except (ValueError, OSError, subprocess.CalledProcessError):
768            num = 0
769    else:
770        try:
771            num = os.sysconf('SC_NPROCESSORS_ONLN')
772        except (ValueError, OSError, AttributeError):
773            num = 0
774    if num >= 1:
775        return num
776    else:
777        return 1
778
779
780def convert_command_args(args):
781    """Convert command arguments to bytestrings on Python 2 and
782    surrogate-escaped strings on Python 3."""
783    assert isinstance(args, list)
784
785    def convert(arg):
786        if six.PY2:
787            if isinstance(arg, six.text_type):
788                arg = arg.encode(arg_encoding())
789        else:
790            if isinstance(arg, bytes):
791                arg = arg.decode(arg_encoding(), 'surrogateescape')
792        return arg
793
794    return [convert(a) for a in args]
795
796
797def command_output(cmd, shell=False):
798    """Runs the command and returns its output after it has exited.
799
800    ``cmd`` is a list of arguments starting with the command names. The
801    arguments are bytes on Unix and strings on Windows.
802    If ``shell`` is true, ``cmd`` is assumed to be a string and passed to a
803    shell to execute.
804
805    If the process exits with a non-zero return code
806    ``subprocess.CalledProcessError`` is raised. May also raise
807    ``OSError``.
808
809    This replaces `subprocess.check_output` which can have problems if lots of
810    output is sent to stderr.
811    """
812    cmd = convert_command_args(cmd)
813
814    try:  # python >= 3.3
815        devnull = subprocess.DEVNULL
816    except AttributeError:
817        devnull = open(os.devnull, 'r+b')
818
819    proc = subprocess.Popen(
820        cmd,
821        stdout=subprocess.PIPE,
822        stderr=subprocess.PIPE,
823        stdin=devnull,
824        close_fds=platform.system() != 'Windows',
825        shell=shell
826    )
827    stdout, stderr = proc.communicate()
828    if proc.returncode:
829        raise subprocess.CalledProcessError(
830            returncode=proc.returncode,
831            cmd=' '.join(cmd),
832            output=stdout + stderr,
833        )
834    return stdout
835
836
837def max_filename_length(path, limit=MAX_FILENAME_LENGTH):
838    """Attempt to determine the maximum filename length for the
839    filesystem containing `path`. If the value is greater than `limit`,
840    then `limit` is used instead (to prevent errors when a filesystem
841    misreports its capacity). If it cannot be determined (e.g., on
842    Windows), return `limit`.
843    """
844    if hasattr(os, 'statvfs'):
845        try:
846            res = os.statvfs(path)
847        except OSError:
848            return limit
849        return min(res[9], limit)
850    else:
851        return limit
852
853
854def open_anything():
855    """Return the system command that dispatches execution to the correct
856    program.
857    """
858    sys_name = platform.system()
859    if sys_name == 'Darwin':
860        base_cmd = 'open'
861    elif sys_name == 'Windows':
862        base_cmd = 'start'
863    else:  # Assume Unix
864        base_cmd = 'xdg-open'
865    return base_cmd
866
867
868def editor_command():
869    """Get a command for opening a text file.
870
871    Use the `EDITOR` environment variable by default. If it is not
872    present, fall back to `open_anything()`, the platform-specific tool
873    for opening files in general.
874    """
875    editor = os.environ.get('EDITOR')
876    if editor:
877        return editor
878    return open_anything()
879
880
881def shlex_split(s):
882    """Split a Unicode or bytes string according to shell lexing rules.
883
884    Raise `ValueError` if the string is not a well-formed shell string.
885    This is a workaround for a bug in some versions of Python.
886    """
887    if not six.PY2 or isinstance(s, bytes):  # Shlex works fine.
888        return shlex.split(s)
889
890    elif isinstance(s, six.text_type):
891        # Work around a Python bug.
892        # http://bugs.python.org/issue6988
893        bs = s.encode('utf-8')
894        return [c.decode('utf-8') for c in shlex.split(bs)]
895
896    else:
897        raise TypeError(u'shlex_split called with non-string')
898
899
900def interactive_open(targets, command):
901    """Open the files in `targets` by `exec`ing a new `command`, given
902    as a Unicode string. (The new program takes over, and Python
903    execution ends: this does not fork a subprocess.)
904
905    Can raise `OSError`.
906    """
907    assert command
908
909    # Split the command string into its arguments.
910    try:
911        args = shlex_split(command)
912    except ValueError:  # Malformed shell tokens.
913        args = [command]
914
915    args.insert(0, args[0])  # for argv[0]
916
917    args += targets
918
919    return os.execlp(*args)
920
921
922def _windows_long_path_name(short_path):
923    """Use Windows' `GetLongPathNameW` via ctypes to get the canonical,
924    long path given a short filename.
925    """
926    if not isinstance(short_path, six.text_type):
927        short_path = short_path.decode(_fsencoding())
928
929    import ctypes
930    buf = ctypes.create_unicode_buffer(260)
931    get_long_path_name_w = ctypes.windll.kernel32.GetLongPathNameW
932    return_value = get_long_path_name_w(short_path, buf, 260)
933
934    if return_value == 0 or return_value > 260:
935        # An error occurred
936        return short_path
937    else:
938        long_path = buf.value
939        # GetLongPathNameW does not change the case of the drive
940        # letter.
941        if len(long_path) > 1 and long_path[1] == ':':
942            long_path = long_path[0].upper() + long_path[1:]
943        return long_path
944
945
946def case_sensitive(path):
947    """Check whether the filesystem at the given path is case sensitive.
948
949    To work best, the path should point to a file or a directory. If the path
950    does not exist, assume a case sensitive file system on every platform
951    except Windows.
952    """
953    # A fallback in case the path does not exist.
954    if not os.path.exists(syspath(path)):
955        # By default, the case sensitivity depends on the platform.
956        return platform.system() != 'Windows'
957
958    # If an upper-case version of the path exists but a lower-case
959    # version does not, then the filesystem must be case-sensitive.
960    # (Otherwise, we have more work to do.)
961    if not (os.path.exists(syspath(path.lower())) and
962            os.path.exists(syspath(path.upper()))):
963        return True
964
965    # Both versions of the path exist on the file system. Check whether
966    # they refer to different files by their inodes. Alas,
967    # `os.path.samefile` is only available on Unix systems on Python 2.
968    if platform.system() != 'Windows':
969        return not os.path.samefile(syspath(path.lower()),
970                                    syspath(path.upper()))
971
972    # On Windows, we check whether the canonical, long filenames for the
973    # files are the same.
974    lower = _windows_long_path_name(path.lower())
975    upper = _windows_long_path_name(path.upper())
976    return lower != upper
977
978
979def raw_seconds_short(string):
980    """Formats a human-readable M:SS string as a float (number of seconds).
981
982    Raises ValueError if the conversion cannot take place due to `string` not
983    being in the right format.
984    """
985    match = re.match(r'^(\d+):([0-5]\d)$', string)
986    if not match:
987        raise ValueError(u'String not in M:SS format')
988    minutes, seconds = map(int, match.groups())
989    return float(minutes * 60 + seconds)
990
991
992def asciify_path(path, sep_replace):
993    """Decodes all unicode characters in a path into ASCII equivalents.
994
995    Substitutions are provided by the unidecode module. Path separators in the
996    input are preserved.
997
998    Keyword arguments:
999    path -- The path to be asciified.
1000    sep_replace -- the string to be used to replace extraneous path separators.
1001    """
1002    # if this platform has an os.altsep, change it to os.sep.
1003    if os.altsep:
1004        path = path.replace(os.altsep, os.sep)
1005    path_components = path.split(os.sep)
1006    for index, item in enumerate(path_components):
1007        path_components[index] = unidecode(item).replace(os.sep, sep_replace)
1008        if os.altsep:
1009            path_components[index] = unidecode(item).replace(
1010                os.altsep,
1011                sep_replace
1012            )
1013    return os.sep.join(path_components)
1014
1015
1016def par_map(transform, items):
1017    """Apply the function `transform` to all the elements in the
1018    iterable `items`, like `map(transform, items)` but with no return
1019    value. The map *might* happen in parallel: it's parallel on Python 3
1020    and sequential on Python 2.
1021
1022    The parallelism uses threads (not processes), so this is only useful
1023    for IO-bound `transform`s.
1024    """
1025    if sys.version_info[0] < 3:
1026        # multiprocessing.pool.ThreadPool does not seem to work on
1027        # Python 2. We could consider switching to futures instead.
1028        for item in items:
1029            transform(item)
1030    else:
1031        pool = ThreadPool()
1032        pool.map(transform, items)
1033        pool.close()
1034        pool.join()
1035
1036
1037def lazy_property(func):
1038    """A decorator that creates a lazily evaluated property. On first access,
1039    the property is assigned the return value of `func`. This first value is
1040    stored, so that future accesses do not have to evaluate `func` again.
1041
1042    This behaviour is useful when `func` is expensive to evaluate, and it is
1043    not certain that the result will be needed.
1044    """
1045    field_name = '_' + func.__name__
1046
1047    @property
1048    @functools.wraps(func)
1049    def wrapper(self):
1050        if hasattr(self, field_name):
1051            return getattr(self, field_name)
1052
1053        value = func(self)
1054        setattr(self, field_name, value)
1055        return value
1056
1057    return wrapper
1058