1"""distutils.util
2
3Miscellaneous utility functions -- anything that doesn't fit into
4one of the other *util.py modules.
5"""
6
7__revision__ = "$Id: util.py 83588 2010-08-02 21:35:06Z ezio.melotti $"
8
9import sys, os, string, re
10from distutils.errors import DistutilsPlatformError
11from distutils.dep_util import newer
12from distutils.spawn import spawn
13from distutils import log
14from distutils.errors import DistutilsByteCompileError
15
16def get_platform ():
17    """Return a string that identifies the current platform.  This is used
18    mainly to distinguish platform-specific build directories and
19    platform-specific built distributions.  Typically includes the OS name
20    and version and the architecture (as supplied by 'os.uname()'),
21    although the exact information included depends on the OS; eg. for IRIX
22    the architecture isn't particularly important (IRIX only runs on SGI
23    hardware), but for Linux the kernel version isn't particularly
24    important.
25
26    Examples of returned values:
27       linux-i586
28       linux-alpha (?)
29       solaris-2.6-sun4u
30       irix-5.3
31       irix64-6.2
32
33    Windows will return one of:
34       win-amd64 (64bit Windows on AMD64 (aka x86_64, Intel64, EM64T, etc)
35       win-ia64 (64bit Windows on Itanium)
36       win32 (all others - specifically, sys.platform is returned)
37
38    For other non-POSIX platforms, currently just returns 'sys.platform'.
39    """
40    if os.name == 'nt':
41        # sniff sys.version for architecture.
42        prefix = " bit ("
43        i = string.find(sys.version, prefix)
44        if i == -1:
45            return sys.platform
46        j = string.find(sys.version, ")", i)
47        look = sys.version[i+len(prefix):j].lower()
48        if look=='amd64':
49            return 'win-amd64'
50        if look=='itanium':
51            return 'win-ia64'
52        return sys.platform
53
54    if os.name != "posix" or not hasattr(os, 'uname'):
55        # XXX what about the architecture? NT is Intel or Alpha,
56        # Mac OS is M68k or PPC, etc.
57        return sys.platform
58
59    # Try to distinguish various flavours of Unix
60
61    (osname, host, release, version, machine) = os.uname()
62
63    # Convert the OS name to lowercase, remove '/' characters
64    # (to accommodate BSD/OS), and translate spaces (for "Power Macintosh")
65    osname = string.lower(osname)
66    osname = string.replace(osname, '/', '')
67    machine = string.replace(machine, ' ', '_')
68    machine = string.replace(machine, '/', '-')
69
70    if osname[:5] == "linux":
71        # At least on Linux/Intel, 'machine' is the processor --
72        # i386, etc.
73        # XXX what about Alpha, SPARC, etc?
74        return  "%s-%s" % (osname, machine)
75    elif osname[:5] == "sunos":
76        if release[0] >= "5":           # SunOS 5 == Solaris 2
77            osname = "solaris"
78            release = "%d.%s" % (int(release[0]) - 3, release[2:])
79        # fall through to standard osname-release-machine representation
80    elif osname[:4] == "irix":              # could be "irix64"!
81        return "%s-%s" % (osname, release)
82    elif osname[:3] == "aix":
83        return "%s-%s.%s" % (osname, version, release)
84    elif osname[:6] == "cygwin":
85        osname = "cygwin"
86        rel_re = re.compile (r'[\d.]+')
87        m = rel_re.match(release)
88        if m:
89            release = m.group()
90    elif osname[:6] == "darwin":
91        #
92        # For our purposes, we'll assume that the system version from
93        # distutils' perspective is what MACOSX_DEPLOYMENT_TARGET is set
94        # to. This makes the compatibility story a bit more sane because the
95        # machine is going to compile and link as if it were
96        # MACOSX_DEPLOYMENT_TARGET.
97        from distutils.sysconfig import get_config_vars
98        cfgvars = get_config_vars()
99
100        macver = os.environ.get('MACOSX_DEPLOYMENT_TARGET')
101        if not macver:
102            macver = cfgvars.get('MACOSX_DEPLOYMENT_TARGET')
103
104        if 1:
105            # Always calculate the release of the running machine,
106            # needed to determine if we can build fat binaries or not.
107
108            macrelease = macver
109            # Get the system version. Reading this plist is a documented
110            # way to get the system version (see the documentation for
111            # the Gestalt Manager)
112            try:
113                f = open('/System/Library/CoreServices/SystemVersion.plist')
114            except IOError:
115                # We're on a plain darwin box, fall back to the default
116                # behaviour.
117                pass
118            else:
119                m = re.search(
120                        r'<key>ProductUserVisibleVersion</key>\s*' +
121                        r'<string>(.*?)</string>', f.read())
122                f.close()
123                if m is not None:
124                    macrelease = '.'.join(m.group(1).split('.')[:2])
125                # else: fall back to the default behaviour
126
127        if not macver:
128            macver = macrelease
129
130        if macver:
131            from distutils.sysconfig import get_config_vars
132            release = macver
133            osname = "macosx"
134
135            if (macrelease + '.') >= '10.4.' and \
136                    '-arch' in get_config_vars().get('CFLAGS', '').strip():
137                # The universal build will build fat binaries, but not on
138                # systems before 10.4
139                #
140                # Try to detect 4-way universal builds, those have machine-type
141                # 'universal' instead of 'fat'.
142
143                machine = 'fat'
144                cflags = get_config_vars().get('CFLAGS')
145
146                archs = re.findall('-arch\s+(\S+)', cflags)
147                archs = tuple(sorted(set(archs)))
148
149                if len(archs) == 1:
150                    machine = archs[0]
151                elif archs == ('i386', 'ppc'):
152                    machine = 'fat'
153                elif archs == ('i386', 'x86_64'):
154                    machine = 'intel'
155                elif archs == ('i386', 'ppc', 'x86_64'):
156                    machine = 'fat3'
157                elif archs == ('ppc64', 'x86_64'):
158                    machine = 'fat64'
159                elif archs == ('i386', 'ppc', 'ppc64', 'x86_64'):
160                    machine = 'universal'
161                else:
162                    raise ValueError(
163                       "Don't know machine value for archs=%r"%(archs,))
164
165            elif machine == 'i386':
166                # On OSX the machine type returned by uname is always the
167                # 32-bit variant, even if the executable architecture is
168                # the 64-bit variant
169                if sys.maxint >= 2**32:
170                    machine = 'x86_64'
171
172            elif machine in ('PowerPC', 'Power_Macintosh'):
173                # Pick a sane name for the PPC architecture.
174                machine = 'ppc'
175
176                # See 'i386' case
177                if sys.maxint >= 2**32:
178                    machine = 'ppc64'
179
180    return "%s-%s-%s" % (osname, release, machine)
181
182# get_platform ()
183
184
185def convert_path (pathname):
186    """Return 'pathname' as a name that will work on the native filesystem,
187    i.e. split it on '/' and put it back together again using the current
188    directory separator.  Needed because filenames in the setup script are
189    always supplied in Unix style, and have to be converted to the local
190    convention before we can actually use them in the filesystem.  Raises
191    ValueError on non-Unix-ish systems if 'pathname' either starts or
192    ends with a slash.
193    """
194    if os.sep == '/':
195        return pathname
196    if not pathname:
197        return pathname
198    if pathname[0] == '/':
199        raise ValueError, "path '%s' cannot be absolute" % pathname
200    if pathname[-1] == '/':
201        raise ValueError, "path '%s' cannot end with '/'" % pathname
202
203    paths = string.split(pathname, '/')
204    while '.' in paths:
205        paths.remove('.')
206    if not paths:
207        return os.curdir
208    return os.path.join(*paths)
209
210# convert_path ()
211
212
213def change_root (new_root, pathname):
214    """Return 'pathname' with 'new_root' prepended.  If 'pathname' is
215    relative, this is equivalent to "os.path.join(new_root,pathname)".
216    Otherwise, it requires making 'pathname' relative and then joining the
217    two, which is tricky on DOS/Windows and Mac OS.
218    """
219    os_name = os._name if sys.platform.startswith('java') else os.name
220    if os_name == 'posix':
221        if not os.path.isabs(pathname):
222            return os.path.join(new_root, pathname)
223        else:
224            return os.path.join(new_root, pathname[1:])
225
226    elif os_name == 'nt':
227        (drive, path) = os.path.splitdrive(pathname)
228        if path[0] == '\\':
229            path = path[1:]
230        return os.path.join(new_root, path)
231
232    elif os_name == 'os2':
233        (drive, path) = os.path.splitdrive(pathname)
234        if path[0] == os.sep:
235            path = path[1:]
236        return os.path.join(new_root, path)
237
238    elif os_name == 'mac':
239        if not os.path.isabs(pathname):
240            return os.path.join(new_root, pathname)
241        else:
242            # Chop off volume name from start of path
243            elements = string.split(pathname, ":", 1)
244            pathname = ":" + elements[1]
245            return os.path.join(new_root, pathname)
246
247    else:
248        raise DistutilsPlatformError, \
249              "nothing known about platform '%s'" % os_name
250
251
252_environ_checked = 0
253def check_environ ():
254    """Ensure that 'os.environ' has all the environment variables we
255    guarantee that users can use in config files, command-line options,
256    etc.  Currently this includes:
257      HOME - user's home directory (Unix only)
258      PLAT - description of the current platform, including hardware
259             and OS (see 'get_platform()')
260    """
261    global _environ_checked
262    if _environ_checked:
263        return
264
265    if os.name == 'posix' and 'HOME' not in os.environ:
266        import pwd
267        os.environ['HOME'] = pwd.getpwuid(os.getuid())[5]
268
269    if 'PLAT' not in os.environ:
270        os.environ['PLAT'] = get_platform()
271
272    _environ_checked = 1
273
274
275def subst_vars (s, local_vars):
276    """Perform shell/Perl-style variable substitution on 'string'.  Every
277    occurrence of '$' followed by a name is considered a variable, and
278    variable is substituted by the value found in the 'local_vars'
279    dictionary, or in 'os.environ' if it's not in 'local_vars'.
280    'os.environ' is first checked/augmented to guarantee that it contains
281    certain values: see 'check_environ()'.  Raise ValueError for any
282    variables not found in either 'local_vars' or 'os.environ'.
283    """
284    check_environ()
285    def _subst (match, local_vars=local_vars):
286        var_name = match.group(1)
287        if var_name in local_vars:
288            return str(local_vars[var_name])
289        else:
290            return os.environ[var_name]
291
292    try:
293        return re.sub(r'\$([a-zA-Z_][a-zA-Z_0-9]*)', _subst, s)
294    except KeyError, var:
295        raise ValueError, "invalid variable '$%s'" % var
296
297# subst_vars ()
298
299
300def grok_environment_error (exc, prefix="error: "):
301    """Generate a useful error message from an EnvironmentError (IOError or
302    OSError) exception object.  Handles Python 1.5.1 and 1.5.2 styles, and
303    does what it can to deal with exception objects that don't have a
304    filename (which happens when the error is due to a two-file operation,
305    such as 'rename()' or 'link()'.  Returns the error message as a string
306    prefixed with 'prefix'.
307    """
308    # check for Python 1.5.2-style {IO,OS}Error exception objects
309    if hasattr(exc, 'filename') and hasattr(exc, 'strerror'):
310        if exc.filename:
311            error = prefix + "%s: %s" % (exc.filename, exc.strerror)
312        else:
313            # two-argument functions in posix module don't
314            # include the filename in the exception object!
315            error = prefix + "%s" % exc.strerror
316    else:
317        error = prefix + str(exc[-1])
318
319    return error
320
321
322# Needed by 'split_quoted()'
323_wordchars_re = _squote_re = _dquote_re = None
324def _init_regex():
325    global _wordchars_re, _squote_re, _dquote_re
326    _wordchars_re = re.compile(r'[^\\\'\"%s ]*' % string.whitespace)
327    _squote_re = re.compile(r"'(?:[^'\\]|\\.)*'")
328    _dquote_re = re.compile(r'"(?:[^"\\]|\\.)*"')
329
330def split_quoted (s):
331    """Split a string up according to Unix shell-like rules for quotes and
332    backslashes.  In short: words are delimited by spaces, as long as those
333    spaces are not escaped by a backslash, or inside a quoted string.
334    Single and double quotes are equivalent, and the quote characters can
335    be backslash-escaped.  The backslash is stripped from any two-character
336    escape sequence, leaving only the escaped character.  The quote
337    characters are stripped from any quoted string.  Returns a list of
338    words.
339    """
340
341    # This is a nice algorithm for splitting up a single string, since it
342    # doesn't require character-by-character examination.  It was a little
343    # bit of a brain-bender to get it working right, though...
344    if _wordchars_re is None: _init_regex()
345
346    s = string.strip(s)
347    words = []
348    pos = 0
349
350    while s:
351        m = _wordchars_re.match(s, pos)
352        end = m.end()
353        if end == len(s):
354            words.append(s[:end])
355            break
356
357        if s[end] in string.whitespace: # unescaped, unquoted whitespace: now
358            words.append(s[:end])       # we definitely have a word delimiter
359            s = string.lstrip(s[end:])
360            pos = 0
361
362        elif s[end] == '\\':            # preserve whatever is being escaped;
363                                        # will become part of the current word
364            s = s[:end] + s[end+1:]
365            pos = end+1
366
367        else:
368            if s[end] == "'":           # slurp singly-quoted string
369                m = _squote_re.match(s, end)
370            elif s[end] == '"':         # slurp doubly-quoted string
371                m = _dquote_re.match(s, end)
372            else:
373                raise RuntimeError, \
374                      "this can't happen (bad char '%c')" % s[end]
375
376            if m is None:
377                raise ValueError, \
378                      "bad string (mismatched %s quotes?)" % s[end]
379
380            (beg, end) = m.span()
381            s = s[:beg] + s[beg+1:end-1] + s[end:]
382            pos = m.end() - 2
383
384        if pos >= len(s):
385            words.append(s)
386            break
387
388    return words
389
390# split_quoted ()
391
392
393def execute (func, args, msg=None, verbose=0, dry_run=0):
394    """Perform some action that affects the outside world (eg.  by
395    writing to the filesystem).  Such actions are special because they
396    are disabled by the 'dry_run' flag.  This method takes care of all
397    that bureaucracy for you; all you have to do is supply the
398    function to call and an argument tuple for it (to embody the
399    "external action" being performed), and an optional message to
400    print.
401    """
402    if msg is None:
403        msg = "%s%r" % (func.__name__, args)
404        if msg[-2:] == ',)':        # correct for singleton tuple
405            msg = msg[0:-2] + ')'
406
407    log.info(msg)
408    if not dry_run:
409        func(*args)
410
411
412def strtobool (val):
413    """Convert a string representation of truth to true (1) or false (0).
414
415    True values are 'y', 'yes', 't', 'true', 'on', and '1'; false values
416    are 'n', 'no', 'f', 'false', 'off', and '0'.  Raises ValueError if
417    'val' is anything else.
418    """
419    val = string.lower(val)
420    if val in ('y', 'yes', 't', 'true', 'on', '1'):
421        return 1
422    elif val in ('n', 'no', 'f', 'false', 'off', '0'):
423        return 0
424    else:
425        raise ValueError, "invalid truth value %r" % (val,)
426
427
428def byte_compile (py_files,
429                  optimize=0, force=0,
430                  prefix=None, base_dir=None,
431                  verbose=1, dry_run=0,
432                  direct=None):
433    """Byte-compile a collection of Python source files to either .pyc
434    or .pyo files in the same directory.  'py_files' is a list of files
435    to compile; any files that don't end in ".py" are silently skipped.
436    'optimize' must be one of the following:
437      0 - don't optimize (generate .pyc)
438      1 - normal optimization (like "python -O")
439      2 - extra optimization (like "python -OO")
440    If 'force' is true, all files are recompiled regardless of
441    timestamps.
442
443    The source filename encoded in each bytecode file defaults to the
444    filenames listed in 'py_files'; you can modify these with 'prefix' and
445    'basedir'.  'prefix' is a string that will be stripped off of each
446    source filename, and 'base_dir' is a directory name that will be
447    prepended (after 'prefix' is stripped).  You can supply either or both
448    (or neither) of 'prefix' and 'base_dir', as you wish.
449
450    If 'dry_run' is true, doesn't actually do anything that would
451    affect the filesystem.
452
453    Byte-compilation is either done directly in this interpreter process
454    with the standard py_compile module, or indirectly by writing a
455    temporary script and executing it.  Normally, you should let
456    'byte_compile()' figure out to use direct compilation or not (see
457    the source for details).  The 'direct' flag is used by the script
458    generated in indirect mode; unless you know what you're doing, leave
459    it set to None.
460    """
461    # nothing is done if sys.dont_write_bytecode is True
462    if sys.dont_write_bytecode:
463        raise DistutilsByteCompileError('byte-compiling is disabled.')
464
465    # First, if the caller didn't force us into direct or indirect mode,
466    # figure out which mode we should be in.  We take a conservative
467    # approach: choose direct mode *only* if the current interpreter is
468    # in debug mode and optimize is 0.  If we're not in debug mode (-O
469    # or -OO), we don't know which level of optimization this
470    # interpreter is running with, so we can't do direct
471    # byte-compilation and be certain that it's the right thing.  Thus,
472    # always compile indirectly if the current interpreter is in either
473    # optimize mode, or if either optimization level was requested by
474    # the caller.
475    if direct is None:
476        direct = (__debug__ and optimize == 0)
477
478    # "Indirect" byte-compilation: write a temporary script and then
479    # run it with the appropriate flags.
480    if not direct:
481        try:
482            from tempfile import mkstemp
483            (script_fd, script_name) = mkstemp(".py")
484        except ImportError:
485            from tempfile import mktemp
486            (script_fd, script_name) = None, mktemp(".py")
487        log.info("writing byte-compilation script '%s'", script_name)
488        if not dry_run:
489            if script_fd is not None:
490                script = os.fdopen(script_fd, "w")
491            else:
492                script = open(script_name, "w")
493
494            script.write("""\
495from distutils.util import byte_compile
496files = [
497""")
498
499            # XXX would be nice to write absolute filenames, just for
500            # safety's sake (script should be more robust in the face of
501            # chdir'ing before running it).  But this requires abspath'ing
502            # 'prefix' as well, and that breaks the hack in build_lib's
503            # 'byte_compile()' method that carefully tacks on a trailing
504            # slash (os.sep really) to make sure the prefix here is "just
505            # right".  This whole prefix business is rather delicate -- the
506            # problem is that it's really a directory, but I'm treating it
507            # as a dumb string, so trailing slashes and so forth matter.
508
509            #py_files = map(os.path.abspath, py_files)
510            #if prefix:
511            #    prefix = os.path.abspath(prefix)
512
513            script.write(string.join(map(repr, py_files), ",\n") + "]\n")
514            script.write("""
515byte_compile(files, optimize=%r, force=%r,
516             prefix=%r, base_dir=%r,
517             verbose=%r, dry_run=0,
518             direct=1)
519""" % (optimize, force, prefix, base_dir, verbose))
520
521            script.close()
522
523        cmd = [sys.executable, script_name]
524        if optimize == 1:
525            cmd.insert(1, "-O")
526        elif optimize == 2:
527            cmd.insert(1, "-OO")
528        spawn(cmd, dry_run=dry_run)
529        execute(os.remove, (script_name,), "removing %s" % script_name,
530                dry_run=dry_run)
531
532    # "Direct" byte-compilation: use the py_compile module to compile
533    # right here, right now.  Note that the script generated in indirect
534    # mode simply calls 'byte_compile()' in direct mode, a weird sort of
535    # cross-process recursion.  Hey, it works!
536    else:
537        from py_compile import compile
538
539        for file in py_files:
540            if file[-3:] != ".py":
541                # This lets us be lazy and not filter filenames in
542                # the "install_lib" command.
543                continue
544
545            # Terminology from the py_compile module:
546            #   cfile - byte-compiled file
547            #   dfile - purported source filename (same as 'file' by default)
548            if sys.platform.startswith('java'):
549                cfile = file[:-3] + '$py.class'
550            else:
551                cfile = file + (__debug__ and "c" or "o")
552            dfile = file
553            if prefix:
554                if file[:len(prefix)] != prefix:
555                    raise ValueError, \
556                          ("invalid prefix: filename %r doesn't start with %r"
557                           % (file, prefix))
558                dfile = dfile[len(prefix):]
559            if base_dir:
560                dfile = os.path.join(base_dir, dfile)
561
562            cfile_base = os.path.basename(cfile)
563            if direct:
564                if force or newer(file, cfile):
565                    log.info("byte-compiling %s to %s", file, cfile_base)
566                    if not dry_run:
567                        compile(file, cfile, dfile)
568                else:
569                    log.debug("skipping byte-compilation of %s to %s",
570                              file, cfile_base)
571
572# byte_compile ()
573
574def rfc822_escape (header):
575    """Return a version of the string escaped for inclusion in an
576    RFC-822 header, by ensuring there are 8 spaces space after each newline.
577    """
578    lines = string.split(header, '\n')
579    header = string.join(lines, '\n' + 8*' ')
580    return header
581