1# -*- coding: utf-8 -*- 2# This file is part of beets. 3# Copyright 2016, Adrian Sampson. 4# 5# Permission is hereby granted, free of charge, to any person obtaining 6# a copy of this software and associated documentation files (the 7# "Software"), to deal in the Software without restriction, including 8# without limitation the rights to use, copy, modify, merge, publish, 9# distribute, sublicense, and/or sell copies of the Software, and to 10# permit persons to whom the Software is furnished to do so, subject to 11# the following conditions: 12# 13# The above copyright notice and this permission notice shall be 14# included in all copies or substantial portions of the Software. 15 16"""Miscellaneous utility functions.""" 17 18from __future__ import division, absolute_import, print_function 19import os 20import sys 21import errno 22import locale 23import re 24import shutil 25import fnmatch 26import functools 27from collections import Counter 28from multiprocessing.pool import ThreadPool 29import traceback 30import subprocess 31import platform 32import shlex 33from beets.util import hidden 34import six 35from unidecode import unidecode 36from enum import Enum 37 38 39MAX_FILENAME_LENGTH = 200 40WINDOWS_MAGIC_PREFIX = u'\\\\?\\' 41SNI_SUPPORTED = sys.version_info >= (2, 7, 9) 42 43 44class HumanReadableException(Exception): 45 """An Exception that can include a human-readable error message to 46 be logged without a traceback. Can preserve a traceback for 47 debugging purposes as well. 48 49 Has at least two fields: `reason`, the underlying exception or a 50 string describing the problem; and `verb`, the action being 51 performed during the error. 52 53 If `tb` is provided, it is a string containing a traceback for the 54 associated exception. (Note that this is not necessary in Python 3.x 55 and should be removed when we make the transition.) 56 """ 57 error_kind = 'Error' # Human-readable description of error type. 58 59 def __init__(self, reason, verb, tb=None): 60 self.reason = reason 61 self.verb = verb 62 self.tb = tb 63 super(HumanReadableException, self).__init__(self.get_message()) 64 65 def _gerund(self): 66 """Generate a (likely) gerund form of the English verb. 67 """ 68 if u' ' in self.verb: 69 return self.verb 70 gerund = self.verb[:-1] if self.verb.endswith(u'e') else self.verb 71 gerund += u'ing' 72 return gerund 73 74 def _reasonstr(self): 75 """Get the reason as a string.""" 76 if isinstance(self.reason, six.text_type): 77 return self.reason 78 elif isinstance(self.reason, bytes): 79 return self.reason.decode('utf-8', 'ignore') 80 elif hasattr(self.reason, 'strerror'): # i.e., EnvironmentError 81 return self.reason.strerror 82 else: 83 return u'"{0}"'.format(six.text_type(self.reason)) 84 85 def get_message(self): 86 """Create the human-readable description of the error, sans 87 introduction. 88 """ 89 raise NotImplementedError 90 91 def log(self, logger): 92 """Log to the provided `logger` a human-readable message as an 93 error and a verbose traceback as a debug message. 94 """ 95 if self.tb: 96 logger.debug(self.tb) 97 logger.error(u'{0}: {1}', self.error_kind, self.args[0]) 98 99 100class FilesystemError(HumanReadableException): 101 """An error that occurred while performing a filesystem manipulation 102 via a function in this module. The `paths` field is a sequence of 103 pathnames involved in the operation. 104 """ 105 def __init__(self, reason, verb, paths, tb=None): 106 self.paths = paths 107 super(FilesystemError, self).__init__(reason, verb, tb) 108 109 def get_message(self): 110 # Use a nicer English phrasing for some specific verbs. 111 if self.verb in ('move', 'copy', 'rename'): 112 clause = u'while {0} {1} to {2}'.format( 113 self._gerund(), 114 displayable_path(self.paths[0]), 115 displayable_path(self.paths[1]) 116 ) 117 elif self.verb in ('delete', 'write', 'create', 'read'): 118 clause = u'while {0} {1}'.format( 119 self._gerund(), 120 displayable_path(self.paths[0]) 121 ) 122 else: 123 clause = u'during {0} of paths {1}'.format( 124 self.verb, u', '.join(displayable_path(p) for p in self.paths) 125 ) 126 127 return u'{0} {1}'.format(self._reasonstr(), clause) 128 129 130class MoveOperation(Enum): 131 """The file operations that e.g. various move functions can carry out. 132 """ 133 MOVE = 0 134 COPY = 1 135 LINK = 2 136 HARDLINK = 3 137 138 139def normpath(path): 140 """Provide the canonical form of the path suitable for storing in 141 the database. 142 """ 143 path = syspath(path, prefix=False) 144 path = os.path.normpath(os.path.abspath(os.path.expanduser(path))) 145 return bytestring_path(path) 146 147 148def ancestry(path): 149 """Return a list consisting of path's parent directory, its 150 grandparent, and so on. For instance: 151 152 >>> ancestry('/a/b/c') 153 ['/', '/a', '/a/b'] 154 155 The argument should *not* be the result of a call to `syspath`. 156 """ 157 out = [] 158 last_path = None 159 while path: 160 path = os.path.dirname(path) 161 162 if path == last_path: 163 break 164 last_path = path 165 166 if path: 167 # don't yield '' 168 out.insert(0, path) 169 return out 170 171 172def sorted_walk(path, ignore=(), ignore_hidden=False, logger=None): 173 """Like `os.walk`, but yields things in case-insensitive sorted, 174 breadth-first order. Directory and file names matching any glob 175 pattern in `ignore` are skipped. If `logger` is provided, then 176 warning messages are logged there when a directory cannot be listed. 177 """ 178 # Make sure the pathes aren't Unicode strings. 179 path = bytestring_path(path) 180 ignore = [bytestring_path(i) for i in ignore] 181 182 # Get all the directories and files at this level. 183 try: 184 contents = os.listdir(syspath(path)) 185 except OSError as exc: 186 if logger: 187 logger.warning(u'could not list directory {0}: {1}'.format( 188 displayable_path(path), exc.strerror 189 )) 190 return 191 dirs = [] 192 files = [] 193 for base in contents: 194 base = bytestring_path(base) 195 196 # Skip ignored filenames. 197 skip = False 198 for pat in ignore: 199 if fnmatch.fnmatch(base, pat): 200 skip = True 201 break 202 if skip: 203 continue 204 205 # Add to output as either a file or a directory. 206 cur = os.path.join(path, base) 207 if (ignore_hidden and not hidden.is_hidden(cur)) or not ignore_hidden: 208 if os.path.isdir(syspath(cur)): 209 dirs.append(base) 210 else: 211 files.append(base) 212 213 # Sort lists (case-insensitive) and yield the current level. 214 dirs.sort(key=bytes.lower) 215 files.sort(key=bytes.lower) 216 yield (path, dirs, files) 217 218 # Recurse into directories. 219 for base in dirs: 220 cur = os.path.join(path, base) 221 # yield from sorted_walk(...) 222 for res in sorted_walk(cur, ignore, ignore_hidden, logger): 223 yield res 224 225 226def mkdirall(path): 227 """Make all the enclosing directories of path (like mkdir -p on the 228 parent). 229 """ 230 for ancestor in ancestry(path): 231 if not os.path.isdir(syspath(ancestor)): 232 try: 233 os.mkdir(syspath(ancestor)) 234 except (OSError, IOError) as exc: 235 raise FilesystemError(exc, 'create', (ancestor,), 236 traceback.format_exc()) 237 238 239def fnmatch_all(names, patterns): 240 """Determine whether all strings in `names` match at least one of 241 the `patterns`, which should be shell glob expressions. 242 """ 243 for name in names: 244 matches = False 245 for pattern in patterns: 246 matches = fnmatch.fnmatch(name, pattern) 247 if matches: 248 break 249 if not matches: 250 return False 251 return True 252 253 254def prune_dirs(path, root=None, clutter=('.DS_Store', 'Thumbs.db')): 255 """If path is an empty directory, then remove it. Recursively remove 256 path's ancestry up to root (which is never removed) where there are 257 empty directories. If path is not contained in root, then nothing is 258 removed. Glob patterns in clutter are ignored when determining 259 emptiness. If root is not provided, then only path may be removed 260 (i.e., no recursive removal). 261 """ 262 path = normpath(path) 263 if root is not None: 264 root = normpath(root) 265 266 ancestors = ancestry(path) 267 if root is None: 268 # Only remove the top directory. 269 ancestors = [] 270 elif root in ancestors: 271 # Only remove directories below the root. 272 ancestors = ancestors[ancestors.index(root) + 1:] 273 else: 274 # Remove nothing. 275 return 276 277 # Traverse upward from path. 278 ancestors.append(path) 279 ancestors.reverse() 280 for directory in ancestors: 281 directory = syspath(directory) 282 if not os.path.exists(directory): 283 # Directory gone already. 284 continue 285 clutter = [bytestring_path(c) for c in clutter] 286 match_paths = [bytestring_path(d) for d in os.listdir(directory)] 287 try: 288 if fnmatch_all(match_paths, clutter): 289 # Directory contains only clutter (or nothing). 290 shutil.rmtree(directory) 291 else: 292 break 293 except OSError: 294 break 295 296 297def components(path): 298 """Return a list of the path components in path. For instance: 299 300 >>> components('/a/b/c') 301 ['a', 'b', 'c'] 302 303 The argument should *not* be the result of a call to `syspath`. 304 """ 305 comps = [] 306 ances = ancestry(path) 307 for anc in ances: 308 comp = os.path.basename(anc) 309 if comp: 310 comps.append(comp) 311 else: # root 312 comps.append(anc) 313 314 last = os.path.basename(path) 315 if last: 316 comps.append(last) 317 318 return comps 319 320 321def arg_encoding(): 322 """Get the encoding for command-line arguments (and other OS 323 locale-sensitive strings). 324 """ 325 try: 326 return locale.getdefaultlocale()[1] or 'utf-8' 327 except ValueError: 328 # Invalid locale environment variable setting. To avoid 329 # failing entirely for no good reason, assume UTF-8. 330 return 'utf-8' 331 332 333def _fsencoding(): 334 """Get the system's filesystem encoding. On Windows, this is always 335 UTF-8 (not MBCS). 336 """ 337 encoding = sys.getfilesystemencoding() or sys.getdefaultencoding() 338 if encoding == 'mbcs': 339 # On Windows, a broken encoding known to Python as "MBCS" is 340 # used for the filesystem. However, we only use the Unicode API 341 # for Windows paths, so the encoding is actually immaterial so 342 # we can avoid dealing with this nastiness. We arbitrarily 343 # choose UTF-8. 344 encoding = 'utf-8' 345 return encoding 346 347 348def bytestring_path(path): 349 """Given a path, which is either a bytes or a unicode, returns a str 350 path (ensuring that we never deal with Unicode pathnames). 351 """ 352 # Pass through bytestrings. 353 if isinstance(path, bytes): 354 return path 355 356 # On Windows, remove the magic prefix added by `syspath`. This makes 357 # ``bytestring_path(syspath(X)) == X``, i.e., we can safely 358 # round-trip through `syspath`. 359 if os.path.__name__ == 'ntpath' and path.startswith(WINDOWS_MAGIC_PREFIX): 360 path = path[len(WINDOWS_MAGIC_PREFIX):] 361 362 # Try to encode with default encodings, but fall back to utf-8. 363 try: 364 return path.encode(_fsencoding()) 365 except (UnicodeError, LookupError): 366 return path.encode('utf-8') 367 368 369PATH_SEP = bytestring_path(os.sep) 370 371 372def displayable_path(path, separator=u'; '): 373 """Attempts to decode a bytestring path to a unicode object for the 374 purpose of displaying it to the user. If the `path` argument is a 375 list or a tuple, the elements are joined with `separator`. 376 """ 377 if isinstance(path, (list, tuple)): 378 return separator.join(displayable_path(p) for p in path) 379 elif isinstance(path, six.text_type): 380 return path 381 elif not isinstance(path, bytes): 382 # A non-string object: just get its unicode representation. 383 return six.text_type(path) 384 385 try: 386 return path.decode(_fsencoding(), 'ignore') 387 except (UnicodeError, LookupError): 388 return path.decode('utf-8', 'ignore') 389 390 391def syspath(path, prefix=True): 392 """Convert a path for use by the operating system. In particular, 393 paths on Windows must receive a magic prefix and must be converted 394 to Unicode before they are sent to the OS. To disable the magic 395 prefix on Windows, set `prefix` to False---but only do this if you 396 *really* know what you're doing. 397 """ 398 # Don't do anything if we're not on windows 399 if os.path.__name__ != 'ntpath': 400 return path 401 402 if not isinstance(path, six.text_type): 403 # Beets currently represents Windows paths internally with UTF-8 404 # arbitrarily. But earlier versions used MBCS because it is 405 # reported as the FS encoding by Windows. Try both. 406 try: 407 path = path.decode('utf-8') 408 except UnicodeError: 409 # The encoding should always be MBCS, Windows' broken 410 # Unicode representation. 411 encoding = sys.getfilesystemencoding() or sys.getdefaultencoding() 412 path = path.decode(encoding, 'replace') 413 414 # Add the magic prefix if it isn't already there. 415 # http://msdn.microsoft.com/en-us/library/windows/desktop/aa365247.aspx 416 if prefix and not path.startswith(WINDOWS_MAGIC_PREFIX): 417 if path.startswith(u'\\\\'): 418 # UNC path. Final path should look like \\?\UNC\... 419 path = u'UNC' + path[1:] 420 path = WINDOWS_MAGIC_PREFIX + path 421 422 return path 423 424 425def samefile(p1, p2): 426 """Safer equality for paths.""" 427 if p1 == p2: 428 return True 429 return shutil._samefile(syspath(p1), syspath(p2)) 430 431 432def remove(path, soft=True): 433 """Remove the file. If `soft`, then no error will be raised if the 434 file does not exist. 435 """ 436 path = syspath(path) 437 if soft and not os.path.exists(path): 438 return 439 try: 440 os.remove(path) 441 except (OSError, IOError) as exc: 442 raise FilesystemError(exc, 'delete', (path,), traceback.format_exc()) 443 444 445def copy(path, dest, replace=False): 446 """Copy a plain file. Permissions are not copied. If `dest` already 447 exists, raises a FilesystemError unless `replace` is True. Has no 448 effect if `path` is the same as `dest`. Paths are translated to 449 system paths before the syscall. 450 """ 451 if samefile(path, dest): 452 return 453 path = syspath(path) 454 dest = syspath(dest) 455 if not replace and os.path.exists(dest): 456 raise FilesystemError(u'file exists', 'copy', (path, dest)) 457 try: 458 shutil.copyfile(path, dest) 459 except (OSError, IOError) as exc: 460 raise FilesystemError(exc, 'copy', (path, dest), 461 traceback.format_exc()) 462 463 464def move(path, dest, replace=False): 465 """Rename a file. `dest` may not be a directory. If `dest` already 466 exists, raises an OSError unless `replace` is True. Has no effect if 467 `path` is the same as `dest`. If the paths are on different 468 filesystems (or the rename otherwise fails), a copy is attempted 469 instead, in which case metadata will *not* be preserved. Paths are 470 translated to system paths. 471 """ 472 if samefile(path, dest): 473 return 474 path = syspath(path) 475 dest = syspath(dest) 476 if os.path.exists(dest) and not replace: 477 raise FilesystemError(u'file exists', 'rename', (path, dest)) 478 479 # First, try renaming the file. 480 try: 481 os.rename(path, dest) 482 except OSError: 483 # Otherwise, copy and delete the original. 484 try: 485 shutil.copyfile(path, dest) 486 os.remove(path) 487 except (OSError, IOError) as exc: 488 raise FilesystemError(exc, 'move', (path, dest), 489 traceback.format_exc()) 490 491 492def link(path, dest, replace=False): 493 """Create a symbolic link from path to `dest`. Raises an OSError if 494 `dest` already exists, unless `replace` is True. Does nothing if 495 `path` == `dest`. 496 """ 497 if samefile(path, dest): 498 return 499 500 if os.path.exists(syspath(dest)) and not replace: 501 raise FilesystemError(u'file exists', 'rename', (path, dest)) 502 try: 503 os.symlink(syspath(path), syspath(dest)) 504 except NotImplementedError: 505 # raised on python >= 3.2 and Windows versions before Vista 506 raise FilesystemError(u'OS does not support symbolic links.' 507 'link', (path, dest), traceback.format_exc()) 508 except OSError as exc: 509 # TODO: Windows version checks can be removed for python 3 510 if hasattr('sys', 'getwindowsversion'): 511 if sys.getwindowsversion()[0] < 6: # is before Vista 512 exc = u'OS does not support symbolic links.' 513 raise FilesystemError(exc, 'link', (path, dest), 514 traceback.format_exc()) 515 516 517def hardlink(path, dest, replace=False): 518 """Create a hard link from path to `dest`. Raises an OSError if 519 `dest` already exists, unless `replace` is True. Does nothing if 520 `path` == `dest`. 521 """ 522 if samefile(path, dest): 523 return 524 525 if os.path.exists(syspath(dest)) and not replace: 526 raise FilesystemError(u'file exists', 'rename', (path, dest)) 527 try: 528 os.link(syspath(path), syspath(dest)) 529 except NotImplementedError: 530 raise FilesystemError(u'OS does not support hard links.' 531 'link', (path, dest), traceback.format_exc()) 532 except OSError as exc: 533 if exc.errno == errno.EXDEV: 534 raise FilesystemError(u'Cannot hard link across devices.' 535 'link', (path, dest), traceback.format_exc()) 536 else: 537 raise FilesystemError(exc, 'link', (path, dest), 538 traceback.format_exc()) 539 540 541def unique_path(path): 542 """Returns a version of ``path`` that does not exist on the 543 filesystem. Specifically, if ``path` itself already exists, then 544 something unique is appended to the path. 545 """ 546 if not os.path.exists(syspath(path)): 547 return path 548 549 base, ext = os.path.splitext(path) 550 match = re.search(br'\.(\d)+$', base) 551 if match: 552 num = int(match.group(1)) 553 base = base[:match.start()] 554 else: 555 num = 0 556 while True: 557 num += 1 558 suffix = u'.{}'.format(num).encode() + ext 559 new_path = base + suffix 560 if not os.path.exists(new_path): 561 return new_path 562 563# Note: The Windows "reserved characters" are, of course, allowed on 564# Unix. They are forbidden here because they cause problems on Samba 565# shares, which are sufficiently common as to cause frequent problems. 566# http://msdn.microsoft.com/en-us/library/windows/desktop/aa365247.aspx 567CHAR_REPLACE = [ 568 (re.compile(r'[\\/]'), u'_'), # / and \ -- forbidden everywhere. 569 (re.compile(r'^\.'), u'_'), # Leading dot (hidden files on Unix). 570 (re.compile(r'[\x00-\x1f]'), u''), # Control characters. 571 (re.compile(r'[<>:"\?\*\|]'), u'_'), # Windows "reserved characters". 572 (re.compile(r'\.$'), u'_'), # Trailing dots. 573 (re.compile(r'\s+$'), u''), # Trailing whitespace. 574] 575 576 577def sanitize_path(path, replacements=None): 578 """Takes a path (as a Unicode string) and makes sure that it is 579 legal. Returns a new path. Only works with fragments; won't work 580 reliably on Windows when a path begins with a drive letter. Path 581 separators (including altsep!) should already be cleaned from the 582 path components. If replacements is specified, it is used *instead* 583 of the default set of replacements; it must be a list of (compiled 584 regex, replacement string) pairs. 585 """ 586 replacements = replacements or CHAR_REPLACE 587 588 comps = components(path) 589 if not comps: 590 return '' 591 for i, comp in enumerate(comps): 592 for regex, repl in replacements: 593 comp = regex.sub(repl, comp) 594 comps[i] = comp 595 return os.path.join(*comps) 596 597 598def truncate_path(path, length=MAX_FILENAME_LENGTH): 599 """Given a bytestring path or a Unicode path fragment, truncate the 600 components to a legal length. In the last component, the extension 601 is preserved. 602 """ 603 comps = components(path) 604 605 out = [c[:length] for c in comps] 606 base, ext = os.path.splitext(comps[-1]) 607 if ext: 608 # Last component has an extension. 609 base = base[:length - len(ext)] 610 out[-1] = base + ext 611 612 return os.path.join(*out) 613 614 615def _legalize_stage(path, replacements, length, extension, fragment): 616 """Perform a single round of path legalization steps 617 (sanitation/replacement, encoding from Unicode to bytes, 618 extension-appending, and truncation). Return the path (Unicode if 619 `fragment` is set, `bytes` otherwise) and whether truncation was 620 required. 621 """ 622 # Perform an initial sanitization including user replacements. 623 path = sanitize_path(path, replacements) 624 625 # Encode for the filesystem. 626 if not fragment: 627 path = bytestring_path(path) 628 629 # Preserve extension. 630 path += extension.lower() 631 632 # Truncate too-long components. 633 pre_truncate_path = path 634 path = truncate_path(path, length) 635 636 return path, path != pre_truncate_path 637 638 639def legalize_path(path, replacements, length, extension, fragment): 640 """Given a path-like Unicode string, produce a legal path. Return 641 the path and a flag indicating whether some replacements had to be 642 ignored (see below). 643 644 The legalization process (see `_legalize_stage`) consists of 645 applying the sanitation rules in `replacements`, encoding the string 646 to bytes (unless `fragment` is set), truncating components to 647 `length`, appending the `extension`. 648 649 This function performs up to three calls to `_legalize_stage` in 650 case truncation conflicts with replacements (as can happen when 651 truncation creates whitespace at the end of the string, for 652 example). The limited number of iterations iterations avoids the 653 possibility of an infinite loop of sanitation and truncation 654 operations, which could be caused by replacement rules that make the 655 string longer. The flag returned from this function indicates that 656 the path has to be truncated twice (indicating that replacements 657 made the string longer again after it was truncated); the 658 application should probably log some sort of warning. 659 """ 660 661 if fragment: 662 # Outputting Unicode. 663 extension = extension.decode('utf-8', 'ignore') 664 665 first_stage_path, _ = _legalize_stage( 666 path, replacements, length, extension, fragment 667 ) 668 669 # Convert back to Unicode with extension removed. 670 first_stage_path, _ = os.path.splitext(displayable_path(first_stage_path)) 671 672 # Re-sanitize following truncation (including user replacements). 673 second_stage_path, retruncated = _legalize_stage( 674 first_stage_path, replacements, length, extension, fragment 675 ) 676 677 # If the path was once again truncated, discard user replacements 678 # and run through one last legalization stage. 679 if retruncated: 680 second_stage_path, _ = _legalize_stage( 681 first_stage_path, None, length, extension, fragment 682 ) 683 684 return second_stage_path, retruncated 685 686 687def py3_path(path): 688 """Convert a bytestring path to Unicode on Python 3 only. On Python 689 2, return the bytestring path unchanged. 690 691 This helps deal with APIs on Python 3 that *only* accept Unicode 692 (i.e., `str` objects). I philosophically disagree with this 693 decision, because paths are sadly bytes on Unix, but that's the way 694 it is. So this function helps us "smuggle" the true bytes data 695 through APIs that took Python 3's Unicode mandate too seriously. 696 """ 697 if isinstance(path, six.text_type): 698 return path 699 assert isinstance(path, bytes) 700 if six.PY2: 701 return path 702 return os.fsdecode(path) 703 704 705def str2bool(value): 706 """Returns a boolean reflecting a human-entered string.""" 707 return value.lower() in (u'yes', u'1', u'true', u't', u'y') 708 709 710def as_string(value): 711 """Convert a value to a Unicode object for matching with a query. 712 None becomes the empty string. Bytestrings are silently decoded. 713 """ 714 if six.PY2: 715 buffer_types = buffer, memoryview # noqa: F821 716 else: 717 buffer_types = memoryview 718 719 if value is None: 720 return u'' 721 elif isinstance(value, buffer_types): 722 return bytes(value).decode('utf-8', 'ignore') 723 elif isinstance(value, bytes): 724 return value.decode('utf-8', 'ignore') 725 else: 726 return six.text_type(value) 727 728 729def text_string(value, encoding='utf-8'): 730 """Convert a string, which can either be bytes or unicode, to 731 unicode. 732 733 Text (unicode) is left untouched; bytes are decoded. This is useful 734 to convert from a "native string" (bytes on Python 2, str on Python 735 3) to a consistently unicode value. 736 """ 737 if isinstance(value, bytes): 738 return value.decode(encoding) 739 return value 740 741 742def plurality(objs): 743 """Given a sequence of hashble objects, returns the object that 744 is most common in the set and the its number of appearance. The 745 sequence must contain at least one object. 746 """ 747 c = Counter(objs) 748 if not c: 749 raise ValueError(u'sequence must be non-empty') 750 return c.most_common(1)[0] 751 752 753def cpu_count(): 754 """Return the number of hardware thread contexts (cores or SMT 755 threads) in the system. 756 """ 757 # Adapted from the soundconverter project: 758 # https://github.com/kassoulet/soundconverter 759 if sys.platform == 'win32': 760 try: 761 num = int(os.environ['NUMBER_OF_PROCESSORS']) 762 except (ValueError, KeyError): 763 num = 0 764 elif sys.platform == 'darwin': 765 try: 766 num = int(command_output(['/usr/sbin/sysctl', '-n', 'hw.ncpu'])) 767 except (ValueError, OSError, subprocess.CalledProcessError): 768 num = 0 769 else: 770 try: 771 num = os.sysconf('SC_NPROCESSORS_ONLN') 772 except (ValueError, OSError, AttributeError): 773 num = 0 774 if num >= 1: 775 return num 776 else: 777 return 1 778 779 780def convert_command_args(args): 781 """Convert command arguments to bytestrings on Python 2 and 782 surrogate-escaped strings on Python 3.""" 783 assert isinstance(args, list) 784 785 def convert(arg): 786 if six.PY2: 787 if isinstance(arg, six.text_type): 788 arg = arg.encode(arg_encoding()) 789 else: 790 if isinstance(arg, bytes): 791 arg = arg.decode(arg_encoding(), 'surrogateescape') 792 return arg 793 794 return [convert(a) for a in args] 795 796 797def command_output(cmd, shell=False): 798 """Runs the command and returns its output after it has exited. 799 800 ``cmd`` is a list of arguments starting with the command names. The 801 arguments are bytes on Unix and strings on Windows. 802 If ``shell`` is true, ``cmd`` is assumed to be a string and passed to a 803 shell to execute. 804 805 If the process exits with a non-zero return code 806 ``subprocess.CalledProcessError`` is raised. May also raise 807 ``OSError``. 808 809 This replaces `subprocess.check_output` which can have problems if lots of 810 output is sent to stderr. 811 """ 812 cmd = convert_command_args(cmd) 813 814 try: # python >= 3.3 815 devnull = subprocess.DEVNULL 816 except AttributeError: 817 devnull = open(os.devnull, 'r+b') 818 819 proc = subprocess.Popen( 820 cmd, 821 stdout=subprocess.PIPE, 822 stderr=subprocess.PIPE, 823 stdin=devnull, 824 close_fds=platform.system() != 'Windows', 825 shell=shell 826 ) 827 stdout, stderr = proc.communicate() 828 if proc.returncode: 829 raise subprocess.CalledProcessError( 830 returncode=proc.returncode, 831 cmd=' '.join(cmd), 832 output=stdout + stderr, 833 ) 834 return stdout 835 836 837def max_filename_length(path, limit=MAX_FILENAME_LENGTH): 838 """Attempt to determine the maximum filename length for the 839 filesystem containing `path`. If the value is greater than `limit`, 840 then `limit` is used instead (to prevent errors when a filesystem 841 misreports its capacity). If it cannot be determined (e.g., on 842 Windows), return `limit`. 843 """ 844 if hasattr(os, 'statvfs'): 845 try: 846 res = os.statvfs(path) 847 except OSError: 848 return limit 849 return min(res[9], limit) 850 else: 851 return limit 852 853 854def open_anything(): 855 """Return the system command that dispatches execution to the correct 856 program. 857 """ 858 sys_name = platform.system() 859 if sys_name == 'Darwin': 860 base_cmd = 'open' 861 elif sys_name == 'Windows': 862 base_cmd = 'start' 863 else: # Assume Unix 864 base_cmd = 'xdg-open' 865 return base_cmd 866 867 868def editor_command(): 869 """Get a command for opening a text file. 870 871 Use the `EDITOR` environment variable by default. If it is not 872 present, fall back to `open_anything()`, the platform-specific tool 873 for opening files in general. 874 """ 875 editor = os.environ.get('EDITOR') 876 if editor: 877 return editor 878 return open_anything() 879 880 881def shlex_split(s): 882 """Split a Unicode or bytes string according to shell lexing rules. 883 884 Raise `ValueError` if the string is not a well-formed shell string. 885 This is a workaround for a bug in some versions of Python. 886 """ 887 if not six.PY2 or isinstance(s, bytes): # Shlex works fine. 888 return shlex.split(s) 889 890 elif isinstance(s, six.text_type): 891 # Work around a Python bug. 892 # http://bugs.python.org/issue6988 893 bs = s.encode('utf-8') 894 return [c.decode('utf-8') for c in shlex.split(bs)] 895 896 else: 897 raise TypeError(u'shlex_split called with non-string') 898 899 900def interactive_open(targets, command): 901 """Open the files in `targets` by `exec`ing a new `command`, given 902 as a Unicode string. (The new program takes over, and Python 903 execution ends: this does not fork a subprocess.) 904 905 Can raise `OSError`. 906 """ 907 assert command 908 909 # Split the command string into its arguments. 910 try: 911 args = shlex_split(command) 912 except ValueError: # Malformed shell tokens. 913 args = [command] 914 915 args.insert(0, args[0]) # for argv[0] 916 917 args += targets 918 919 return os.execlp(*args) 920 921 922def _windows_long_path_name(short_path): 923 """Use Windows' `GetLongPathNameW` via ctypes to get the canonical, 924 long path given a short filename. 925 """ 926 if not isinstance(short_path, six.text_type): 927 short_path = short_path.decode(_fsencoding()) 928 929 import ctypes 930 buf = ctypes.create_unicode_buffer(260) 931 get_long_path_name_w = ctypes.windll.kernel32.GetLongPathNameW 932 return_value = get_long_path_name_w(short_path, buf, 260) 933 934 if return_value == 0 or return_value > 260: 935 # An error occurred 936 return short_path 937 else: 938 long_path = buf.value 939 # GetLongPathNameW does not change the case of the drive 940 # letter. 941 if len(long_path) > 1 and long_path[1] == ':': 942 long_path = long_path[0].upper() + long_path[1:] 943 return long_path 944 945 946def case_sensitive(path): 947 """Check whether the filesystem at the given path is case sensitive. 948 949 To work best, the path should point to a file or a directory. If the path 950 does not exist, assume a case sensitive file system on every platform 951 except Windows. 952 """ 953 # A fallback in case the path does not exist. 954 if not os.path.exists(syspath(path)): 955 # By default, the case sensitivity depends on the platform. 956 return platform.system() != 'Windows' 957 958 # If an upper-case version of the path exists but a lower-case 959 # version does not, then the filesystem must be case-sensitive. 960 # (Otherwise, we have more work to do.) 961 if not (os.path.exists(syspath(path.lower())) and 962 os.path.exists(syspath(path.upper()))): 963 return True 964 965 # Both versions of the path exist on the file system. Check whether 966 # they refer to different files by their inodes. Alas, 967 # `os.path.samefile` is only available on Unix systems on Python 2. 968 if platform.system() != 'Windows': 969 return not os.path.samefile(syspath(path.lower()), 970 syspath(path.upper())) 971 972 # On Windows, we check whether the canonical, long filenames for the 973 # files are the same. 974 lower = _windows_long_path_name(path.lower()) 975 upper = _windows_long_path_name(path.upper()) 976 return lower != upper 977 978 979def raw_seconds_short(string): 980 """Formats a human-readable M:SS string as a float (number of seconds). 981 982 Raises ValueError if the conversion cannot take place due to `string` not 983 being in the right format. 984 """ 985 match = re.match(r'^(\d+):([0-5]\d)$', string) 986 if not match: 987 raise ValueError(u'String not in M:SS format') 988 minutes, seconds = map(int, match.groups()) 989 return float(minutes * 60 + seconds) 990 991 992def asciify_path(path, sep_replace): 993 """Decodes all unicode characters in a path into ASCII equivalents. 994 995 Substitutions are provided by the unidecode module. Path separators in the 996 input are preserved. 997 998 Keyword arguments: 999 path -- The path to be asciified. 1000 sep_replace -- the string to be used to replace extraneous path separators. 1001 """ 1002 # if this platform has an os.altsep, change it to os.sep. 1003 if os.altsep: 1004 path = path.replace(os.altsep, os.sep) 1005 path_components = path.split(os.sep) 1006 for index, item in enumerate(path_components): 1007 path_components[index] = unidecode(item).replace(os.sep, sep_replace) 1008 if os.altsep: 1009 path_components[index] = unidecode(item).replace( 1010 os.altsep, 1011 sep_replace 1012 ) 1013 return os.sep.join(path_components) 1014 1015 1016def par_map(transform, items): 1017 """Apply the function `transform` to all the elements in the 1018 iterable `items`, like `map(transform, items)` but with no return 1019 value. The map *might* happen in parallel: it's parallel on Python 3 1020 and sequential on Python 2. 1021 1022 The parallelism uses threads (not processes), so this is only useful 1023 for IO-bound `transform`s. 1024 """ 1025 if sys.version_info[0] < 3: 1026 # multiprocessing.pool.ThreadPool does not seem to work on 1027 # Python 2. We could consider switching to futures instead. 1028 for item in items: 1029 transform(item) 1030 else: 1031 pool = ThreadPool() 1032 pool.map(transform, items) 1033 pool.close() 1034 pool.join() 1035 1036 1037def lazy_property(func): 1038 """A decorator that creates a lazily evaluated property. On first access, 1039 the property is assigned the return value of `func`. This first value is 1040 stored, so that future accesses do not have to evaluate `func` again. 1041 1042 This behaviour is useful when `func` is expensive to evaluate, and it is 1043 not certain that the result will be needed. 1044 """ 1045 field_name = '_' + func.__name__ 1046 1047 @property 1048 @functools.wraps(func) 1049 def wrapper(self): 1050 if hasattr(self, field_name): 1051 return getattr(self, field_name) 1052 1053 value = func(self) 1054 setattr(self, field_name, value) 1055 return value 1056 1057 return wrapper 1058