1"""distutils.util 2 3Miscellaneous utility functions -- anything that doesn't fit into 4one of the other *util.py modules. 5""" 6 7__revision__ = "$Id: util.py 83588 2010-08-02 21:35:06Z ezio.melotti $" 8 9import sys, os, string, re 10from distutils.errors import DistutilsPlatformError 11from distutils.dep_util import newer 12from distutils.spawn import spawn 13from distutils import log 14from distutils.errors import DistutilsByteCompileError 15 16def get_platform (): 17 """Return a string that identifies the current platform. This is used 18 mainly to distinguish platform-specific build directories and 19 platform-specific built distributions. Typically includes the OS name 20 and version and the architecture (as supplied by 'os.uname()'), 21 although the exact information included depends on the OS; eg. for IRIX 22 the architecture isn't particularly important (IRIX only runs on SGI 23 hardware), but for Linux the kernel version isn't particularly 24 important. 25 26 Examples of returned values: 27 linux-i586 28 linux-alpha (?) 29 solaris-2.6-sun4u 30 irix-5.3 31 irix64-6.2 32 33 Windows will return one of: 34 win-amd64 (64bit Windows on AMD64 (aka x86_64, Intel64, EM64T, etc) 35 win-ia64 (64bit Windows on Itanium) 36 win32 (all others - specifically, sys.platform is returned) 37 38 For other non-POSIX platforms, currently just returns 'sys.platform'. 39 """ 40 if os.name == 'nt': 41 # sniff sys.version for architecture. 42 prefix = " bit (" 43 i = string.find(sys.version, prefix) 44 if i == -1: 45 return sys.platform 46 j = string.find(sys.version, ")", i) 47 look = sys.version[i+len(prefix):j].lower() 48 if look=='amd64': 49 return 'win-amd64' 50 if look=='itanium': 51 return 'win-ia64' 52 return sys.platform 53 54 if os.name != "posix" or not hasattr(os, 'uname'): 55 # XXX what about the architecture? NT is Intel or Alpha, 56 # Mac OS is M68k or PPC, etc. 57 return sys.platform 58 59 # Try to distinguish various flavours of Unix 60 61 (osname, host, release, version, machine) = os.uname() 62 63 # Convert the OS name to lowercase, remove '/' characters 64 # (to accommodate BSD/OS), and translate spaces (for "Power Macintosh") 65 osname = string.lower(osname) 66 osname = string.replace(osname, '/', '') 67 machine = string.replace(machine, ' ', '_') 68 machine = string.replace(machine, '/', '-') 69 70 if osname[:5] == "linux": 71 # At least on Linux/Intel, 'machine' is the processor -- 72 # i386, etc. 73 # XXX what about Alpha, SPARC, etc? 74 return "%s-%s" % (osname, machine) 75 elif osname[:5] == "sunos": 76 if release[0] >= "5": # SunOS 5 == Solaris 2 77 osname = "solaris" 78 release = "%d.%s" % (int(release[0]) - 3, release[2:]) 79 # fall through to standard osname-release-machine representation 80 elif osname[:4] == "irix": # could be "irix64"! 81 return "%s-%s" % (osname, release) 82 elif osname[:3] == "aix": 83 return "%s-%s.%s" % (osname, version, release) 84 elif osname[:6] == "cygwin": 85 osname = "cygwin" 86 rel_re = re.compile (r'[\d.]+') 87 m = rel_re.match(release) 88 if m: 89 release = m.group() 90 elif osname[:6] == "darwin": 91 # 92 # For our purposes, we'll assume that the system version from 93 # distutils' perspective is what MACOSX_DEPLOYMENT_TARGET is set 94 # to. This makes the compatibility story a bit more sane because the 95 # machine is going to compile and link as if it were 96 # MACOSX_DEPLOYMENT_TARGET. 97 from distutils.sysconfig import get_config_vars 98 cfgvars = get_config_vars() 99 100 macver = os.environ.get('MACOSX_DEPLOYMENT_TARGET') 101 if not macver: 102 macver = cfgvars.get('MACOSX_DEPLOYMENT_TARGET') 103 104 if 1: 105 # Always calculate the release of the running machine, 106 # needed to determine if we can build fat binaries or not. 107 108 macrelease = macver 109 # Get the system version. Reading this plist is a documented 110 # way to get the system version (see the documentation for 111 # the Gestalt Manager) 112 try: 113 f = open('/System/Library/CoreServices/SystemVersion.plist') 114 except IOError: 115 # We're on a plain darwin box, fall back to the default 116 # behaviour. 117 pass 118 else: 119 m = re.search( 120 r'<key>ProductUserVisibleVersion</key>\s*' + 121 r'<string>(.*?)</string>', f.read()) 122 f.close() 123 if m is not None: 124 macrelease = '.'.join(m.group(1).split('.')[:2]) 125 # else: fall back to the default behaviour 126 127 if not macver: 128 macver = macrelease 129 130 if macver: 131 from distutils.sysconfig import get_config_vars 132 release = macver 133 osname = "macosx" 134 135 if (macrelease + '.') >= '10.4.' and \ 136 '-arch' in get_config_vars().get('CFLAGS', '').strip(): 137 # The universal build will build fat binaries, but not on 138 # systems before 10.4 139 # 140 # Try to detect 4-way universal builds, those have machine-type 141 # 'universal' instead of 'fat'. 142 143 machine = 'fat' 144 cflags = get_config_vars().get('CFLAGS') 145 146 archs = re.findall('-arch\s+(\S+)', cflags) 147 archs = tuple(sorted(set(archs))) 148 149 if len(archs) == 1: 150 machine = archs[0] 151 elif archs == ('i386', 'ppc'): 152 machine = 'fat' 153 elif archs == ('i386', 'x86_64'): 154 machine = 'intel' 155 elif archs == ('i386', 'ppc', 'x86_64'): 156 machine = 'fat3' 157 elif archs == ('ppc64', 'x86_64'): 158 machine = 'fat64' 159 elif archs == ('i386', 'ppc', 'ppc64', 'x86_64'): 160 machine = 'universal' 161 else: 162 raise ValueError( 163 "Don't know machine value for archs=%r"%(archs,)) 164 165 elif machine == 'i386': 166 # On OSX the machine type returned by uname is always the 167 # 32-bit variant, even if the executable architecture is 168 # the 64-bit variant 169 if sys.maxint >= 2**32: 170 machine = 'x86_64' 171 172 elif machine in ('PowerPC', 'Power_Macintosh'): 173 # Pick a sane name for the PPC architecture. 174 machine = 'ppc' 175 176 # See 'i386' case 177 if sys.maxint >= 2**32: 178 machine = 'ppc64' 179 180 return "%s-%s-%s" % (osname, release, machine) 181 182# get_platform () 183 184 185def convert_path (pathname): 186 """Return 'pathname' as a name that will work on the native filesystem, 187 i.e. split it on '/' and put it back together again using the current 188 directory separator. Needed because filenames in the setup script are 189 always supplied in Unix style, and have to be converted to the local 190 convention before we can actually use them in the filesystem. Raises 191 ValueError on non-Unix-ish systems if 'pathname' either starts or 192 ends with a slash. 193 """ 194 if os.sep == '/': 195 return pathname 196 if not pathname: 197 return pathname 198 if pathname[0] == '/': 199 raise ValueError, "path '%s' cannot be absolute" % pathname 200 if pathname[-1] == '/': 201 raise ValueError, "path '%s' cannot end with '/'" % pathname 202 203 paths = string.split(pathname, '/') 204 while '.' in paths: 205 paths.remove('.') 206 if not paths: 207 return os.curdir 208 return os.path.join(*paths) 209 210# convert_path () 211 212 213def change_root (new_root, pathname): 214 """Return 'pathname' with 'new_root' prepended. If 'pathname' is 215 relative, this is equivalent to "os.path.join(new_root,pathname)". 216 Otherwise, it requires making 'pathname' relative and then joining the 217 two, which is tricky on DOS/Windows and Mac OS. 218 """ 219 os_name = os._name if sys.platform.startswith('java') else os.name 220 if os_name == 'posix': 221 if not os.path.isabs(pathname): 222 return os.path.join(new_root, pathname) 223 else: 224 return os.path.join(new_root, pathname[1:]) 225 226 elif os_name == 'nt': 227 (drive, path) = os.path.splitdrive(pathname) 228 if path[0] == '\\': 229 path = path[1:] 230 return os.path.join(new_root, path) 231 232 elif os_name == 'os2': 233 (drive, path) = os.path.splitdrive(pathname) 234 if path[0] == os.sep: 235 path = path[1:] 236 return os.path.join(new_root, path) 237 238 elif os_name == 'mac': 239 if not os.path.isabs(pathname): 240 return os.path.join(new_root, pathname) 241 else: 242 # Chop off volume name from start of path 243 elements = string.split(pathname, ":", 1) 244 pathname = ":" + elements[1] 245 return os.path.join(new_root, pathname) 246 247 else: 248 raise DistutilsPlatformError, \ 249 "nothing known about platform '%s'" % os_name 250 251 252_environ_checked = 0 253def check_environ (): 254 """Ensure that 'os.environ' has all the environment variables we 255 guarantee that users can use in config files, command-line options, 256 etc. Currently this includes: 257 HOME - user's home directory (Unix only) 258 PLAT - description of the current platform, including hardware 259 and OS (see 'get_platform()') 260 """ 261 global _environ_checked 262 if _environ_checked: 263 return 264 265 if os.name == 'posix' and 'HOME' not in os.environ: 266 import pwd 267 os.environ['HOME'] = pwd.getpwuid(os.getuid())[5] 268 269 if 'PLAT' not in os.environ: 270 os.environ['PLAT'] = get_platform() 271 272 _environ_checked = 1 273 274 275def subst_vars (s, local_vars): 276 """Perform shell/Perl-style variable substitution on 'string'. Every 277 occurrence of '$' followed by a name is considered a variable, and 278 variable is substituted by the value found in the 'local_vars' 279 dictionary, or in 'os.environ' if it's not in 'local_vars'. 280 'os.environ' is first checked/augmented to guarantee that it contains 281 certain values: see 'check_environ()'. Raise ValueError for any 282 variables not found in either 'local_vars' or 'os.environ'. 283 """ 284 check_environ() 285 def _subst (match, local_vars=local_vars): 286 var_name = match.group(1) 287 if var_name in local_vars: 288 return str(local_vars[var_name]) 289 else: 290 return os.environ[var_name] 291 292 try: 293 return re.sub(r'\$([a-zA-Z_][a-zA-Z_0-9]*)', _subst, s) 294 except KeyError, var: 295 raise ValueError, "invalid variable '$%s'" % var 296 297# subst_vars () 298 299 300def grok_environment_error (exc, prefix="error: "): 301 """Generate a useful error message from an EnvironmentError (IOError or 302 OSError) exception object. Handles Python 1.5.1 and 1.5.2 styles, and 303 does what it can to deal with exception objects that don't have a 304 filename (which happens when the error is due to a two-file operation, 305 such as 'rename()' or 'link()'. Returns the error message as a string 306 prefixed with 'prefix'. 307 """ 308 # check for Python 1.5.2-style {IO,OS}Error exception objects 309 if hasattr(exc, 'filename') and hasattr(exc, 'strerror'): 310 if exc.filename: 311 error = prefix + "%s: %s" % (exc.filename, exc.strerror) 312 else: 313 # two-argument functions in posix module don't 314 # include the filename in the exception object! 315 error = prefix + "%s" % exc.strerror 316 else: 317 error = prefix + str(exc[-1]) 318 319 return error 320 321 322# Needed by 'split_quoted()' 323_wordchars_re = _squote_re = _dquote_re = None 324def _init_regex(): 325 global _wordchars_re, _squote_re, _dquote_re 326 _wordchars_re = re.compile(r'[^\\\'\"%s ]*' % string.whitespace) 327 _squote_re = re.compile(r"'(?:[^'\\]|\\.)*'") 328 _dquote_re = re.compile(r'"(?:[^"\\]|\\.)*"') 329 330def split_quoted (s): 331 """Split a string up according to Unix shell-like rules for quotes and 332 backslashes. In short: words are delimited by spaces, as long as those 333 spaces are not escaped by a backslash, or inside a quoted string. 334 Single and double quotes are equivalent, and the quote characters can 335 be backslash-escaped. The backslash is stripped from any two-character 336 escape sequence, leaving only the escaped character. The quote 337 characters are stripped from any quoted string. Returns a list of 338 words. 339 """ 340 341 # This is a nice algorithm for splitting up a single string, since it 342 # doesn't require character-by-character examination. It was a little 343 # bit of a brain-bender to get it working right, though... 344 if _wordchars_re is None: _init_regex() 345 346 s = string.strip(s) 347 words = [] 348 pos = 0 349 350 while s: 351 m = _wordchars_re.match(s, pos) 352 end = m.end() 353 if end == len(s): 354 words.append(s[:end]) 355 break 356 357 if s[end] in string.whitespace: # unescaped, unquoted whitespace: now 358 words.append(s[:end]) # we definitely have a word delimiter 359 s = string.lstrip(s[end:]) 360 pos = 0 361 362 elif s[end] == '\\': # preserve whatever is being escaped; 363 # will become part of the current word 364 s = s[:end] + s[end+1:] 365 pos = end+1 366 367 else: 368 if s[end] == "'": # slurp singly-quoted string 369 m = _squote_re.match(s, end) 370 elif s[end] == '"': # slurp doubly-quoted string 371 m = _dquote_re.match(s, end) 372 else: 373 raise RuntimeError, \ 374 "this can't happen (bad char '%c')" % s[end] 375 376 if m is None: 377 raise ValueError, \ 378 "bad string (mismatched %s quotes?)" % s[end] 379 380 (beg, end) = m.span() 381 s = s[:beg] + s[beg+1:end-1] + s[end:] 382 pos = m.end() - 2 383 384 if pos >= len(s): 385 words.append(s) 386 break 387 388 return words 389 390# split_quoted () 391 392 393def execute (func, args, msg=None, verbose=0, dry_run=0): 394 """Perform some action that affects the outside world (eg. by 395 writing to the filesystem). Such actions are special because they 396 are disabled by the 'dry_run' flag. This method takes care of all 397 that bureaucracy for you; all you have to do is supply the 398 function to call and an argument tuple for it (to embody the 399 "external action" being performed), and an optional message to 400 print. 401 """ 402 if msg is None: 403 msg = "%s%r" % (func.__name__, args) 404 if msg[-2:] == ',)': # correct for singleton tuple 405 msg = msg[0:-2] + ')' 406 407 log.info(msg) 408 if not dry_run: 409 func(*args) 410 411 412def strtobool (val): 413 """Convert a string representation of truth to true (1) or false (0). 414 415 True values are 'y', 'yes', 't', 'true', 'on', and '1'; false values 416 are 'n', 'no', 'f', 'false', 'off', and '0'. Raises ValueError if 417 'val' is anything else. 418 """ 419 val = string.lower(val) 420 if val in ('y', 'yes', 't', 'true', 'on', '1'): 421 return 1 422 elif val in ('n', 'no', 'f', 'false', 'off', '0'): 423 return 0 424 else: 425 raise ValueError, "invalid truth value %r" % (val,) 426 427 428def byte_compile (py_files, 429 optimize=0, force=0, 430 prefix=None, base_dir=None, 431 verbose=1, dry_run=0, 432 direct=None): 433 """Byte-compile a collection of Python source files to either .pyc 434 or .pyo files in the same directory. 'py_files' is a list of files 435 to compile; any files that don't end in ".py" are silently skipped. 436 'optimize' must be one of the following: 437 0 - don't optimize (generate .pyc) 438 1 - normal optimization (like "python -O") 439 2 - extra optimization (like "python -OO") 440 If 'force' is true, all files are recompiled regardless of 441 timestamps. 442 443 The source filename encoded in each bytecode file defaults to the 444 filenames listed in 'py_files'; you can modify these with 'prefix' and 445 'basedir'. 'prefix' is a string that will be stripped off of each 446 source filename, and 'base_dir' is a directory name that will be 447 prepended (after 'prefix' is stripped). You can supply either or both 448 (or neither) of 'prefix' and 'base_dir', as you wish. 449 450 If 'dry_run' is true, doesn't actually do anything that would 451 affect the filesystem. 452 453 Byte-compilation is either done directly in this interpreter process 454 with the standard py_compile module, or indirectly by writing a 455 temporary script and executing it. Normally, you should let 456 'byte_compile()' figure out to use direct compilation or not (see 457 the source for details). The 'direct' flag is used by the script 458 generated in indirect mode; unless you know what you're doing, leave 459 it set to None. 460 """ 461 # nothing is done if sys.dont_write_bytecode is True 462 if sys.dont_write_bytecode: 463 raise DistutilsByteCompileError('byte-compiling is disabled.') 464 465 # First, if the caller didn't force us into direct or indirect mode, 466 # figure out which mode we should be in. We take a conservative 467 # approach: choose direct mode *only* if the current interpreter is 468 # in debug mode and optimize is 0. If we're not in debug mode (-O 469 # or -OO), we don't know which level of optimization this 470 # interpreter is running with, so we can't do direct 471 # byte-compilation and be certain that it's the right thing. Thus, 472 # always compile indirectly if the current interpreter is in either 473 # optimize mode, or if either optimization level was requested by 474 # the caller. 475 if direct is None: 476 direct = (__debug__ and optimize == 0) 477 478 # "Indirect" byte-compilation: write a temporary script and then 479 # run it with the appropriate flags. 480 if not direct: 481 try: 482 from tempfile import mkstemp 483 (script_fd, script_name) = mkstemp(".py") 484 except ImportError: 485 from tempfile import mktemp 486 (script_fd, script_name) = None, mktemp(".py") 487 log.info("writing byte-compilation script '%s'", script_name) 488 if not dry_run: 489 if script_fd is not None: 490 script = os.fdopen(script_fd, "w") 491 else: 492 script = open(script_name, "w") 493 494 script.write("""\ 495from distutils.util import byte_compile 496files = [ 497""") 498 499 # XXX would be nice to write absolute filenames, just for 500 # safety's sake (script should be more robust in the face of 501 # chdir'ing before running it). But this requires abspath'ing 502 # 'prefix' as well, and that breaks the hack in build_lib's 503 # 'byte_compile()' method that carefully tacks on a trailing 504 # slash (os.sep really) to make sure the prefix here is "just 505 # right". This whole prefix business is rather delicate -- the 506 # problem is that it's really a directory, but I'm treating it 507 # as a dumb string, so trailing slashes and so forth matter. 508 509 #py_files = map(os.path.abspath, py_files) 510 #if prefix: 511 # prefix = os.path.abspath(prefix) 512 513 script.write(string.join(map(repr, py_files), ",\n") + "]\n") 514 script.write(""" 515byte_compile(files, optimize=%r, force=%r, 516 prefix=%r, base_dir=%r, 517 verbose=%r, dry_run=0, 518 direct=1) 519""" % (optimize, force, prefix, base_dir, verbose)) 520 521 script.close() 522 523 cmd = [sys.executable, script_name] 524 if optimize == 1: 525 cmd.insert(1, "-O") 526 elif optimize == 2: 527 cmd.insert(1, "-OO") 528 spawn(cmd, dry_run=dry_run) 529 execute(os.remove, (script_name,), "removing %s" % script_name, 530 dry_run=dry_run) 531 532 # "Direct" byte-compilation: use the py_compile module to compile 533 # right here, right now. Note that the script generated in indirect 534 # mode simply calls 'byte_compile()' in direct mode, a weird sort of 535 # cross-process recursion. Hey, it works! 536 else: 537 from py_compile import compile 538 539 for file in py_files: 540 if file[-3:] != ".py": 541 # This lets us be lazy and not filter filenames in 542 # the "install_lib" command. 543 continue 544 545 # Terminology from the py_compile module: 546 # cfile - byte-compiled file 547 # dfile - purported source filename (same as 'file' by default) 548 if sys.platform.startswith('java'): 549 cfile = file[:-3] + '$py.class' 550 else: 551 cfile = file + (__debug__ and "c" or "o") 552 dfile = file 553 if prefix: 554 if file[:len(prefix)] != prefix: 555 raise ValueError, \ 556 ("invalid prefix: filename %r doesn't start with %r" 557 % (file, prefix)) 558 dfile = dfile[len(prefix):] 559 if base_dir: 560 dfile = os.path.join(base_dir, dfile) 561 562 cfile_base = os.path.basename(cfile) 563 if direct: 564 if force or newer(file, cfile): 565 log.info("byte-compiling %s to %s", file, cfile_base) 566 if not dry_run: 567 compile(file, cfile, dfile) 568 else: 569 log.debug("skipping byte-compilation of %s to %s", 570 file, cfile_base) 571 572# byte_compile () 573 574def rfc822_escape (header): 575 """Return a version of the string escaped for inclusion in an 576 RFC-822 header, by ensuring there are 8 spaces space after each newline. 577 """ 578 lines = string.split(header, '\n') 579 header = string.join(lines, '\n' + 8*' ') 580 return header 581