1#
2# Copyright (C) 2012-2017 The Python Software Foundation.
3# See LICENSE.txt and CONTRIBUTORS.txt.
4#
5import codecs
6from collections import deque
7import contextlib
8import csv
9from glob import iglob as std_iglob
10import io
11import json
12import logging
13import os
14import py_compile
15import re
16import socket
17try:
18    import ssl
19except ImportError:  # pragma: no cover
20    ssl = None
21import subprocess
22import sys
23import tarfile
24import tempfile
25import textwrap
26
27try:
28    import threading
29except ImportError:  # pragma: no cover
30    import dummy_threading as threading
31import time
32
33from . import DistlibException
34from .compat import (string_types, text_type, shutil, raw_input, StringIO,
35                     cache_from_source, urlopen, urljoin, httplib, xmlrpclib,
36                     splittype, HTTPHandler, BaseConfigurator, valid_ident,
37                     Container, configparser, URLError, ZipFile, fsdecode,
38                     unquote, urlparse)
39
40logger = logging.getLogger(__name__)
41
42#
43# Requirement parsing code as per PEP 508
44#
45
46IDENTIFIER = re.compile(r'^([\w\.-]+)\s*')
47VERSION_IDENTIFIER = re.compile(r'^([\w\.*+-]+)\s*')
48COMPARE_OP = re.compile(r'^(<=?|>=?|={2,3}|[~!]=)\s*')
49MARKER_OP = re.compile(r'^((<=?)|(>=?)|={2,3}|[~!]=|in|not\s+in)\s*')
50OR = re.compile(r'^or\b\s*')
51AND = re.compile(r'^and\b\s*')
52NON_SPACE = re.compile(r'(\S+)\s*')
53STRING_CHUNK = re.compile(r'([\s\w\.{}()*+#:;,/?!~`@$%^&=|<>\[\]-]+)')
54
55
56def parse_marker(marker_string):
57    """
58    Parse a marker string and return a dictionary containing a marker expression.
59
60    The dictionary will contain keys "op", "lhs" and "rhs" for non-terminals in
61    the expression grammar, or strings. A string contained in quotes is to be
62    interpreted as a literal string, and a string not contained in quotes is a
63    variable (such as os_name).
64    """
65    def marker_var(remaining):
66        # either identifier, or literal string
67        m = IDENTIFIER.match(remaining)
68        if m:
69            result = m.groups()[0]
70            remaining = remaining[m.end():]
71        elif not remaining:
72            raise SyntaxError('unexpected end of input')
73        else:
74            q = remaining[0]
75            if q not in '\'"':
76                raise SyntaxError('invalid expression: %s' % remaining)
77            oq = '\'"'.replace(q, '')
78            remaining = remaining[1:]
79            parts = [q]
80            while remaining:
81                # either a string chunk, or oq, or q to terminate
82                if remaining[0] == q:
83                    break
84                elif remaining[0] == oq:
85                    parts.append(oq)
86                    remaining = remaining[1:]
87                else:
88                    m = STRING_CHUNK.match(remaining)
89                    if not m:
90                        raise SyntaxError('error in string literal: %s' % remaining)
91                    parts.append(m.groups()[0])
92                    remaining = remaining[m.end():]
93            else:
94                s = ''.join(parts)
95                raise SyntaxError('unterminated string: %s' % s)
96            parts.append(q)
97            result = ''.join(parts)
98            remaining = remaining[1:].lstrip() # skip past closing quote
99        return result, remaining
100
101    def marker_expr(remaining):
102        if remaining and remaining[0] == '(':
103            result, remaining = marker(remaining[1:].lstrip())
104            if remaining[0] != ')':
105                raise SyntaxError('unterminated parenthesis: %s' % remaining)
106            remaining = remaining[1:].lstrip()
107        else:
108            lhs, remaining = marker_var(remaining)
109            while remaining:
110                m = MARKER_OP.match(remaining)
111                if not m:
112                    break
113                op = m.groups()[0]
114                remaining = remaining[m.end():]
115                rhs, remaining = marker_var(remaining)
116                lhs = {'op': op, 'lhs': lhs, 'rhs': rhs}
117            result = lhs
118        return result, remaining
119
120    def marker_and(remaining):
121        lhs, remaining = marker_expr(remaining)
122        while remaining:
123            m = AND.match(remaining)
124            if not m:
125                break
126            remaining = remaining[m.end():]
127            rhs, remaining = marker_expr(remaining)
128            lhs = {'op': 'and', 'lhs': lhs, 'rhs': rhs}
129        return lhs, remaining
130
131    def marker(remaining):
132        lhs, remaining = marker_and(remaining)
133        while remaining:
134            m = OR.match(remaining)
135            if not m:
136                break
137            remaining = remaining[m.end():]
138            rhs, remaining = marker_and(remaining)
139            lhs = {'op': 'or', 'lhs': lhs, 'rhs': rhs}
140        return lhs, remaining
141
142    return marker(marker_string)
143
144
145def parse_requirement(req):
146    """
147    Parse a requirement passed in as a string. Return a Container
148    whose attributes contain the various parts of the requirement.
149    """
150    remaining = req.strip()
151    if not remaining or remaining.startswith('#'):
152        return None
153    m = IDENTIFIER.match(remaining)
154    if not m:
155        raise SyntaxError('name expected: %s' % remaining)
156    distname = m.groups()[0]
157    remaining = remaining[m.end():]
158    extras = mark_expr = versions = uri = None
159    if remaining and remaining[0] == '[':
160        i = remaining.find(']', 1)
161        if i < 0:
162            raise SyntaxError('unterminated extra: %s' % remaining)
163        s = remaining[1:i]
164        remaining = remaining[i + 1:].lstrip()
165        extras = []
166        while s:
167            m = IDENTIFIER.match(s)
168            if not m:
169                raise SyntaxError('malformed extra: %s' % s)
170            extras.append(m.groups()[0])
171            s = s[m.end():]
172            if not s:
173                break
174            if s[0] != ',':
175                raise SyntaxError('comma expected in extras: %s' % s)
176            s = s[1:].lstrip()
177        if not extras:
178            extras = None
179    if remaining:
180        if remaining[0] == '@':
181            # it's a URI
182            remaining = remaining[1:].lstrip()
183            m = NON_SPACE.match(remaining)
184            if not m:
185                raise SyntaxError('invalid URI: %s' % remaining)
186            uri = m.groups()[0]
187            t = urlparse(uri)
188            # there are issues with Python and URL parsing, so this test
189            # is a bit crude. See bpo-20271, bpo-23505. Python doesn't
190            # always parse invalid URLs correctly - it should raise
191            # exceptions for malformed URLs
192            if not (t.scheme and t.netloc):
193                raise SyntaxError('Invalid URL: %s' % uri)
194            remaining = remaining[m.end():].lstrip()
195        else:
196
197            def get_versions(ver_remaining):
198                """
199                Return a list of operator, version tuples if any are
200                specified, else None.
201                """
202                m = COMPARE_OP.match(ver_remaining)
203                versions = None
204                if m:
205                    versions = []
206                    while True:
207                        op = m.groups()[0]
208                        ver_remaining = ver_remaining[m.end():]
209                        m = VERSION_IDENTIFIER.match(ver_remaining)
210                        if not m:
211                            raise SyntaxError('invalid version: %s' % ver_remaining)
212                        v = m.groups()[0]
213                        versions.append((op, v))
214                        ver_remaining = ver_remaining[m.end():]
215                        if not ver_remaining or ver_remaining[0] != ',':
216                            break
217                        ver_remaining = ver_remaining[1:].lstrip()
218                        m = COMPARE_OP.match(ver_remaining)
219                        if not m:
220                            raise SyntaxError('invalid constraint: %s' % ver_remaining)
221                    if not versions:
222                        versions = None
223                return versions, ver_remaining
224
225            if remaining[0] != '(':
226                versions, remaining = get_versions(remaining)
227            else:
228                i = remaining.find(')', 1)
229                if i < 0:
230                    raise SyntaxError('unterminated parenthesis: %s' % remaining)
231                s = remaining[1:i]
232                remaining = remaining[i + 1:].lstrip()
233                # As a special diversion from PEP 508, allow a version number
234                # a.b.c in parentheses as a synonym for ~= a.b.c (because this
235                # is allowed in earlier PEPs)
236                if COMPARE_OP.match(s):
237                    versions, _ = get_versions(s)
238                else:
239                    m = VERSION_IDENTIFIER.match(s)
240                    if not m:
241                        raise SyntaxError('invalid constraint: %s' % s)
242                    v = m.groups()[0]
243                    s = s[m.end():].lstrip()
244                    if s:
245                        raise SyntaxError('invalid constraint: %s' % s)
246                    versions = [('~=', v)]
247
248    if remaining:
249        if remaining[0] != ';':
250            raise SyntaxError('invalid requirement: %s' % remaining)
251        remaining = remaining[1:].lstrip()
252
253        mark_expr, remaining = parse_marker(remaining)
254
255    if remaining and remaining[0] != '#':
256        raise SyntaxError('unexpected trailing data: %s' % remaining)
257
258    if not versions:
259        rs = distname
260    else:
261        rs = '%s %s' % (distname, ', '.join(['%s %s' % con for con in versions]))
262    return Container(name=distname, extras=extras, constraints=versions,
263                     marker=mark_expr, url=uri, requirement=rs)
264
265
266def get_resources_dests(resources_root, rules):
267    """Find destinations for resources files"""
268
269    def get_rel_path(root, path):
270        # normalizes and returns a lstripped-/-separated path
271        root = root.replace(os.path.sep, '/')
272        path = path.replace(os.path.sep, '/')
273        assert path.startswith(root)
274        return path[len(root):].lstrip('/')
275
276    destinations = {}
277    for base, suffix, dest in rules:
278        prefix = os.path.join(resources_root, base)
279        for abs_base in iglob(prefix):
280            abs_glob = os.path.join(abs_base, suffix)
281            for abs_path in iglob(abs_glob):
282                resource_file = get_rel_path(resources_root, abs_path)
283                if dest is None:  # remove the entry if it was here
284                    destinations.pop(resource_file, None)
285                else:
286                    rel_path = get_rel_path(abs_base, abs_path)
287                    rel_dest = dest.replace(os.path.sep, '/').rstrip('/')
288                    destinations[resource_file] = rel_dest + '/' + rel_path
289    return destinations
290
291
292def in_venv():
293    if hasattr(sys, 'real_prefix'):
294        # virtualenv venvs
295        result = True
296    else:
297        # PEP 405 venvs
298        result = sys.prefix != getattr(sys, 'base_prefix', sys.prefix)
299    return result
300
301
302def get_executable():
303# The __PYVENV_LAUNCHER__ dance is apparently no longer needed, as
304# changes to the stub launcher mean that sys.executable always points
305# to the stub on OS X
306#    if sys.platform == 'darwin' and ('__PYVENV_LAUNCHER__'
307#                                     in os.environ):
308#        result =  os.environ['__PYVENV_LAUNCHER__']
309#    else:
310#        result = sys.executable
311#    return result
312    result = os.path.normcase(sys.executable)
313    if not isinstance(result, text_type):
314        result = fsdecode(result)
315    return result
316
317
318def proceed(prompt, allowed_chars, error_prompt=None, default=None):
319    p = prompt
320    while True:
321        s = raw_input(p)
322        p = prompt
323        if not s and default:
324            s = default
325        if s:
326            c = s[0].lower()
327            if c in allowed_chars:
328                break
329            if error_prompt:
330                p = '%c: %s\n%s' % (c, error_prompt, prompt)
331    return c
332
333
334def extract_by_key(d, keys):
335    if isinstance(keys, string_types):
336        keys = keys.split()
337    result = {}
338    for key in keys:
339        if key in d:
340            result[key] = d[key]
341    return result
342
343def read_exports(stream):
344    if sys.version_info[0] >= 3:
345        # needs to be a text stream
346        stream = codecs.getreader('utf-8')(stream)
347    # Try to load as JSON, falling back on legacy format
348    data = stream.read()
349    stream = StringIO(data)
350    try:
351        jdata = json.load(stream)
352        result = jdata['extensions']['python.exports']['exports']
353        for group, entries in result.items():
354            for k, v in entries.items():
355                s = '%s = %s' % (k, v)
356                entry = get_export_entry(s)
357                assert entry is not None
358                entries[k] = entry
359        return result
360    except Exception:
361        stream.seek(0, 0)
362
363    def read_stream(cp, stream):
364        if hasattr(cp, 'read_file'):
365            cp.read_file(stream)
366        else:
367            cp.readfp(stream)
368
369    cp = configparser.ConfigParser()
370    try:
371        read_stream(cp, stream)
372    except configparser.MissingSectionHeaderError:
373        stream.close()
374        data = textwrap.dedent(data)
375        stream = StringIO(data)
376        read_stream(cp, stream)
377
378    result = {}
379    for key in cp.sections():
380        result[key] = entries = {}
381        for name, value in cp.items(key):
382            s = '%s = %s' % (name, value)
383            entry = get_export_entry(s)
384            assert entry is not None
385            #entry.dist = self
386            entries[name] = entry
387    return result
388
389
390def write_exports(exports, stream):
391    if sys.version_info[0] >= 3:
392        # needs to be a text stream
393        stream = codecs.getwriter('utf-8')(stream)
394    cp = configparser.ConfigParser()
395    for k, v in exports.items():
396        # TODO check k, v for valid values
397        cp.add_section(k)
398        for entry in v.values():
399            if entry.suffix is None:
400                s = entry.prefix
401            else:
402                s = '%s:%s' % (entry.prefix, entry.suffix)
403            if entry.flags:
404                s = '%s [%s]' % (s, ', '.join(entry.flags))
405            cp.set(k, entry.name, s)
406    cp.write(stream)
407
408
409@contextlib.contextmanager
410def tempdir():
411    td = tempfile.mkdtemp()
412    try:
413        yield td
414    finally:
415        shutil.rmtree(td)
416
417@contextlib.contextmanager
418def chdir(d):
419    cwd = os.getcwd()
420    try:
421        os.chdir(d)
422        yield
423    finally:
424        os.chdir(cwd)
425
426
427@contextlib.contextmanager
428def socket_timeout(seconds=15):
429    cto = socket.getdefaulttimeout()
430    try:
431        socket.setdefaulttimeout(seconds)
432        yield
433    finally:
434        socket.setdefaulttimeout(cto)
435
436
437class cached_property(object):
438    def __init__(self, func):
439        self.func = func
440        #for attr in ('__name__', '__module__', '__doc__'):
441        #    setattr(self, attr, getattr(func, attr, None))
442
443    def __get__(self, obj, cls=None):
444        if obj is None:
445            return self
446        value = self.func(obj)
447        object.__setattr__(obj, self.func.__name__, value)
448        #obj.__dict__[self.func.__name__] = value = self.func(obj)
449        return value
450
451def convert_path(pathname):
452    """Return 'pathname' as a name that will work on the native filesystem.
453
454    The path is split on '/' and put back together again using the current
455    directory separator.  Needed because filenames in the setup script are
456    always supplied in Unix style, and have to be converted to the local
457    convention before we can actually use them in the filesystem.  Raises
458    ValueError on non-Unix-ish systems if 'pathname' either starts or
459    ends with a slash.
460    """
461    if os.sep == '/':
462        return pathname
463    if not pathname:
464        return pathname
465    if pathname[0] == '/':
466        raise ValueError("path '%s' cannot be absolute" % pathname)
467    if pathname[-1] == '/':
468        raise ValueError("path '%s' cannot end with '/'" % pathname)
469
470    paths = pathname.split('/')
471    while os.curdir in paths:
472        paths.remove(os.curdir)
473    if not paths:
474        return os.curdir
475    return os.path.join(*paths)
476
477
478class FileOperator(object):
479    def __init__(self, dry_run=False):
480        self.dry_run = dry_run
481        self.ensured = set()
482        self._init_record()
483
484    def _init_record(self):
485        self.record = False
486        self.files_written = set()
487        self.dirs_created = set()
488
489    def record_as_written(self, path):
490        if self.record:
491            self.files_written.add(path)
492
493    def newer(self, source, target):
494        """Tell if the target is newer than the source.
495
496        Returns true if 'source' exists and is more recently modified than
497        'target', or if 'source' exists and 'target' doesn't.
498
499        Returns false if both exist and 'target' is the same age or younger
500        than 'source'. Raise PackagingFileError if 'source' does not exist.
501
502        Note that this test is not very accurate: files created in the same
503        second will have the same "age".
504        """
505        if not os.path.exists(source):
506            raise DistlibException("file '%r' does not exist" %
507                                   os.path.abspath(source))
508        if not os.path.exists(target):
509            return True
510
511        return os.stat(source).st_mtime > os.stat(target).st_mtime
512
513    def copy_file(self, infile, outfile, check=True):
514        """Copy a file respecting dry-run and force flags.
515        """
516        self.ensure_dir(os.path.dirname(outfile))
517        logger.info('Copying %s to %s', infile, outfile)
518        if not self.dry_run:
519            msg = None
520            if check:
521                if os.path.islink(outfile):
522                    msg = '%s is a symlink' % outfile
523                elif os.path.exists(outfile) and not os.path.isfile(outfile):
524                    msg = '%s is a non-regular file' % outfile
525            if msg:
526                raise ValueError(msg + ' which would be overwritten')
527            shutil.copyfile(infile, outfile)
528        self.record_as_written(outfile)
529
530    def copy_stream(self, instream, outfile, encoding=None):
531        assert not os.path.isdir(outfile)
532        self.ensure_dir(os.path.dirname(outfile))
533        logger.info('Copying stream %s to %s', instream, outfile)
534        if not self.dry_run:
535            if encoding is None:
536                outstream = open(outfile, 'wb')
537            else:
538                outstream = codecs.open(outfile, 'w', encoding=encoding)
539            try:
540                shutil.copyfileobj(instream, outstream)
541            finally:
542                outstream.close()
543        self.record_as_written(outfile)
544
545    def write_binary_file(self, path, data):
546        self.ensure_dir(os.path.dirname(path))
547        if not self.dry_run:
548            if os.path.exists(path):
549                os.remove(path)
550            with open(path, 'wb') as f:
551                f.write(data)
552        self.record_as_written(path)
553
554    def write_text_file(self, path, data, encoding):
555        self.write_binary_file(path, data.encode(encoding))
556
557    def set_mode(self, bits, mask, files):
558        if os.name == 'posix' or (os.name == 'java' and os._name == 'posix'):
559            # Set the executable bits (owner, group, and world) on
560            # all the files specified.
561            for f in files:
562                if self.dry_run:
563                    logger.info("changing mode of %s", f)
564                else:
565                    mode = (os.stat(f).st_mode | bits) & mask
566                    logger.info("changing mode of %s to %o", f, mode)
567                    os.chmod(f, mode)
568
569    set_executable_mode = lambda s, f: s.set_mode(0o555, 0o7777, f)
570
571    def ensure_dir(self, path):
572        path = os.path.abspath(path)
573        if path not in self.ensured and not os.path.exists(path):
574            self.ensured.add(path)
575            d, f = os.path.split(path)
576            self.ensure_dir(d)
577            logger.info('Creating %s' % path)
578            if not self.dry_run:
579                os.mkdir(path)
580            if self.record:
581                self.dirs_created.add(path)
582
583    def byte_compile(self, path, optimize=False, force=False, prefix=None, hashed_invalidation=False):
584        dpath = cache_from_source(path, not optimize)
585        logger.info('Byte-compiling %s to %s', path, dpath)
586        if not self.dry_run:
587            if force or self.newer(path, dpath):
588                if not prefix:
589                    diagpath = None
590                else:
591                    assert path.startswith(prefix)
592                    diagpath = path[len(prefix):]
593            compile_kwargs = {}
594            if hashed_invalidation and hasattr(py_compile, 'PycInvalidationMode'):
595                compile_kwargs['invalidation_mode'] = py_compile.PycInvalidationMode.CHECKED_HASH
596            py_compile.compile(path, dpath, diagpath, True, **compile_kwargs)     # raise error
597        self.record_as_written(dpath)
598        return dpath
599
600    def ensure_removed(self, path):
601        if os.path.exists(path):
602            if os.path.isdir(path) and not os.path.islink(path):
603                logger.debug('Removing directory tree at %s', path)
604                if not self.dry_run:
605                    shutil.rmtree(path)
606                if self.record:
607                    if path in self.dirs_created:
608                        self.dirs_created.remove(path)
609            else:
610                if os.path.islink(path):
611                    s = 'link'
612                else:
613                    s = 'file'
614                logger.debug('Removing %s %s', s, path)
615                if not self.dry_run:
616                    os.remove(path)
617                if self.record:
618                    if path in self.files_written:
619                        self.files_written.remove(path)
620
621    def is_writable(self, path):
622        result = False
623        while not result:
624            if os.path.exists(path):
625                result = os.access(path, os.W_OK)
626                break
627            parent = os.path.dirname(path)
628            if parent == path:
629                break
630            path = parent
631        return result
632
633    def commit(self):
634        """
635        Commit recorded changes, turn off recording, return
636        changes.
637        """
638        assert self.record
639        result = self.files_written, self.dirs_created
640        self._init_record()
641        return result
642
643    def rollback(self):
644        if not self.dry_run:
645            for f in list(self.files_written):
646                if os.path.exists(f):
647                    os.remove(f)
648            # dirs should all be empty now, except perhaps for
649            # __pycache__ subdirs
650            # reverse so that subdirs appear before their parents
651            dirs = sorted(self.dirs_created, reverse=True)
652            for d in dirs:
653                flist = os.listdir(d)
654                if flist:
655                    assert flist == ['__pycache__']
656                    sd = os.path.join(d, flist[0])
657                    os.rmdir(sd)
658                os.rmdir(d)     # should fail if non-empty
659        self._init_record()
660
661def resolve(module_name, dotted_path):
662    if module_name in sys.modules:
663        mod = sys.modules[module_name]
664    else:
665        mod = __import__(module_name)
666    if dotted_path is None:
667        result = mod
668    else:
669        parts = dotted_path.split('.')
670        result = getattr(mod, parts.pop(0))
671        for p in parts:
672            result = getattr(result, p)
673    return result
674
675
676class ExportEntry(object):
677    def __init__(self, name, prefix, suffix, flags):
678        self.name = name
679        self.prefix = prefix
680        self.suffix = suffix
681        self.flags = flags
682
683    @cached_property
684    def value(self):
685        return resolve(self.prefix, self.suffix)
686
687    def __repr__(self):  # pragma: no cover
688        return '<ExportEntry %s = %s:%s %s>' % (self.name, self.prefix,
689                                                self.suffix, self.flags)
690
691    def __eq__(self, other):
692        if not isinstance(other, ExportEntry):
693            result = False
694        else:
695            result = (self.name == other.name and
696                      self.prefix == other.prefix and
697                      self.suffix == other.suffix and
698                      self.flags == other.flags)
699        return result
700
701    __hash__ = object.__hash__
702
703
704ENTRY_RE = re.compile(r'''(?P<name>(\w|[-.+])+)
705                      \s*=\s*(?P<callable>(\w+)([:\.]\w+)*)
706                      \s*(\[\s*(?P<flags>[\w-]+(=\w+)?(,\s*\w+(=\w+)?)*)\s*\])?
707                      ''', re.VERBOSE)
708
709def get_export_entry(specification):
710    m = ENTRY_RE.search(specification)
711    if not m:
712        result = None
713        if '[' in specification or ']' in specification:
714            raise DistlibException("Invalid specification "
715                                   "'%s'" % specification)
716    else:
717        d = m.groupdict()
718        name = d['name']
719        path = d['callable']
720        colons = path.count(':')
721        if colons == 0:
722            prefix, suffix = path, None
723        else:
724            if colons != 1:
725                raise DistlibException("Invalid specification "
726                                       "'%s'" % specification)
727            prefix, suffix = path.split(':')
728        flags = d['flags']
729        if flags is None:
730            if '[' in specification or ']' in specification:
731                raise DistlibException("Invalid specification "
732                                       "'%s'" % specification)
733            flags = []
734        else:
735            flags = [f.strip() for f in flags.split(',')]
736        result = ExportEntry(name, prefix, suffix, flags)
737    return result
738
739
740def get_cache_base(suffix=None):
741    """
742    Return the default base location for distlib caches. If the directory does
743    not exist, it is created. Use the suffix provided for the base directory,
744    and default to '.distlib' if it isn't provided.
745
746    On Windows, if LOCALAPPDATA is defined in the environment, then it is
747    assumed to be a directory, and will be the parent directory of the result.
748    On POSIX, and on Windows if LOCALAPPDATA is not defined, the user's home
749    directory - using os.expanduser('~') - will be the parent directory of
750    the result.
751
752    The result is just the directory '.distlib' in the parent directory as
753    determined above, or with the name specified with ``suffix``.
754    """
755    if suffix is None:
756        suffix = '.distlib'
757    if os.name == 'nt' and 'LOCALAPPDATA' in os.environ:
758        result = os.path.expandvars('$localappdata')
759    else:
760        # Assume posix, or old Windows
761        result = os.path.expanduser('~')
762    # we use 'isdir' instead of 'exists', because we want to
763    # fail if there's a file with that name
764    if os.path.isdir(result):
765        usable = os.access(result, os.W_OK)
766        if not usable:
767            logger.warning('Directory exists but is not writable: %s', result)
768    else:
769        try:
770            os.makedirs(result)
771            usable = True
772        except OSError:
773            logger.warning('Unable to create %s', result, exc_info=True)
774            usable = False
775    if not usable:
776        result = tempfile.mkdtemp()
777        logger.warning('Default location unusable, using %s', result)
778    return os.path.join(result, suffix)
779
780
781def path_to_cache_dir(path):
782    """
783    Convert an absolute path to a directory name for use in a cache.
784
785    The algorithm used is:
786
787    #. On Windows, any ``':'`` in the drive is replaced with ``'---'``.
788    #. Any occurrence of ``os.sep`` is replaced with ``'--'``.
789    #. ``'.cache'`` is appended.
790    """
791    d, p = os.path.splitdrive(os.path.abspath(path))
792    if d:
793        d = d.replace(':', '---')
794    p = p.replace(os.sep, '--')
795    return d + p + '.cache'
796
797
798def ensure_slash(s):
799    if not s.endswith('/'):
800        return s + '/'
801    return s
802
803
804def parse_credentials(netloc):
805    username = password = None
806    if '@' in netloc:
807        prefix, netloc = netloc.rsplit('@', 1)
808        if ':' not in prefix:
809            username = prefix
810        else:
811            username, password = prefix.split(':', 1)
812    if username:
813        username = unquote(username)
814    if password:
815        password = unquote(password)
816    return username, password, netloc
817
818
819def get_process_umask():
820    result = os.umask(0o22)
821    os.umask(result)
822    return result
823
824def is_string_sequence(seq):
825    result = True
826    i = None
827    for i, s in enumerate(seq):
828        if not isinstance(s, string_types):
829            result = False
830            break
831    assert i is not None
832    return result
833
834PROJECT_NAME_AND_VERSION = re.compile('([a-z0-9_]+([.-][a-z_][a-z0-9_]*)*)-'
835                                      '([a-z0-9_.+-]+)', re.I)
836PYTHON_VERSION = re.compile(r'-py(\d\.?\d?)')
837
838
839def split_filename(filename, project_name=None):
840    """
841    Extract name, version, python version from a filename (no extension)
842
843    Return name, version, pyver or None
844    """
845    result = None
846    pyver = None
847    filename = unquote(filename).replace(' ', '-')
848    m = PYTHON_VERSION.search(filename)
849    if m:
850        pyver = m.group(1)
851        filename = filename[:m.start()]
852    if project_name and len(filename) > len(project_name) + 1:
853        m = re.match(re.escape(project_name) + r'\b', filename)
854        if m:
855            n = m.end()
856            result = filename[:n], filename[n + 1:], pyver
857    if result is None:
858        m = PROJECT_NAME_AND_VERSION.match(filename)
859        if m:
860            result = m.group(1), m.group(3), pyver
861    return result
862
863# Allow spaces in name because of legacy dists like "Twisted Core"
864NAME_VERSION_RE = re.compile(r'(?P<name>[\w .-]+)\s*'
865                             r'\(\s*(?P<ver>[^\s)]+)\)$')
866
867def parse_name_and_version(p):
868    """
869    A utility method used to get name and version from a string.
870
871    From e.g. a Provides-Dist value.
872
873    :param p: A value in a form 'foo (1.0)'
874    :return: The name and version as a tuple.
875    """
876    m = NAME_VERSION_RE.match(p)
877    if not m:
878        raise DistlibException('Ill-formed name/version string: \'%s\'' % p)
879    d = m.groupdict()
880    return d['name'].strip().lower(), d['ver']
881
882def get_extras(requested, available):
883    result = set()
884    requested = set(requested or [])
885    available = set(available or [])
886    if '*' in requested:
887        requested.remove('*')
888        result |= available
889    for r in requested:
890        if r == '-':
891            result.add(r)
892        elif r.startswith('-'):
893            unwanted = r[1:]
894            if unwanted not in available:
895                logger.warning('undeclared extra: %s' % unwanted)
896            if unwanted in result:
897                result.remove(unwanted)
898        else:
899            if r not in available:
900                logger.warning('undeclared extra: %s' % r)
901            result.add(r)
902    return result
903#
904# Extended metadata functionality
905#
906
907def _get_external_data(url):
908    result = {}
909    try:
910        # urlopen might fail if it runs into redirections,
911        # because of Python issue #13696. Fixed in locators
912        # using a custom redirect handler.
913        resp = urlopen(url)
914        headers = resp.info()
915        ct = headers.get('Content-Type')
916        if not ct.startswith('application/json'):
917            logger.debug('Unexpected response for JSON request: %s', ct)
918        else:
919            reader = codecs.getreader('utf-8')(resp)
920            #data = reader.read().decode('utf-8')
921            #result = json.loads(data)
922            result = json.load(reader)
923    except Exception as e:
924        logger.exception('Failed to get external data for %s: %s', url, e)
925    return result
926
927_external_data_base_url = 'https://www.red-dove.com/pypi/projects/'
928
929def get_project_data(name):
930    url = '%s/%s/project.json' % (name[0].upper(), name)
931    url = urljoin(_external_data_base_url, url)
932    result = _get_external_data(url)
933    return result
934
935def get_package_data(name, version):
936    url = '%s/%s/package-%s.json' % (name[0].upper(), name, version)
937    url = urljoin(_external_data_base_url, url)
938    return _get_external_data(url)
939
940
941class Cache(object):
942    """
943    A class implementing a cache for resources that need to live in the file system
944    e.g. shared libraries. This class was moved from resources to here because it
945    could be used by other modules, e.g. the wheel module.
946    """
947
948    def __init__(self, base):
949        """
950        Initialise an instance.
951
952        :param base: The base directory where the cache should be located.
953        """
954        # we use 'isdir' instead of 'exists', because we want to
955        # fail if there's a file with that name
956        if not os.path.isdir(base):  # pragma: no cover
957            os.makedirs(base)
958        if (os.stat(base).st_mode & 0o77) != 0:
959            logger.warning('Directory \'%s\' is not private', base)
960        self.base = os.path.abspath(os.path.normpath(base))
961
962    def prefix_to_dir(self, prefix):
963        """
964        Converts a resource prefix to a directory name in the cache.
965        """
966        return path_to_cache_dir(prefix)
967
968    def clear(self):
969        """
970        Clear the cache.
971        """
972        not_removed = []
973        for fn in os.listdir(self.base):
974            fn = os.path.join(self.base, fn)
975            try:
976                if os.path.islink(fn) or os.path.isfile(fn):
977                    os.remove(fn)
978                elif os.path.isdir(fn):
979                    shutil.rmtree(fn)
980            except Exception:
981                not_removed.append(fn)
982        return not_removed
983
984
985class EventMixin(object):
986    """
987    A very simple publish/subscribe system.
988    """
989    def __init__(self):
990        self._subscribers = {}
991
992    def add(self, event, subscriber, append=True):
993        """
994        Add a subscriber for an event.
995
996        :param event: The name of an event.
997        :param subscriber: The subscriber to be added (and called when the
998                           event is published).
999        :param append: Whether to append or prepend the subscriber to an
1000                       existing subscriber list for the event.
1001        """
1002        subs = self._subscribers
1003        if event not in subs:
1004            subs[event] = deque([subscriber])
1005        else:
1006            sq = subs[event]
1007            if append:
1008                sq.append(subscriber)
1009            else:
1010                sq.appendleft(subscriber)
1011
1012    def remove(self, event, subscriber):
1013        """
1014        Remove a subscriber for an event.
1015
1016        :param event: The name of an event.
1017        :param subscriber: The subscriber to be removed.
1018        """
1019        subs = self._subscribers
1020        if event not in subs:
1021            raise ValueError('No subscribers: %r' % event)
1022        subs[event].remove(subscriber)
1023
1024    def get_subscribers(self, event):
1025        """
1026        Return an iterator for the subscribers for an event.
1027        :param event: The event to return subscribers for.
1028        """
1029        return iter(self._subscribers.get(event, ()))
1030
1031    def publish(self, event, *args, **kwargs):
1032        """
1033        Publish a event and return a list of values returned by its
1034        subscribers.
1035
1036        :param event: The event to publish.
1037        :param args: The positional arguments to pass to the event's
1038                     subscribers.
1039        :param kwargs: The keyword arguments to pass to the event's
1040                       subscribers.
1041        """
1042        result = []
1043        for subscriber in self.get_subscribers(event):
1044            try:
1045                value = subscriber(event, *args, **kwargs)
1046            except Exception:
1047                logger.exception('Exception during event publication')
1048                value = None
1049            result.append(value)
1050        logger.debug('publish %s: args = %s, kwargs = %s, result = %s',
1051                     event, args, kwargs, result)
1052        return result
1053
1054#
1055# Simple sequencing
1056#
1057class Sequencer(object):
1058    def __init__(self):
1059        self._preds = {}
1060        self._succs = {}
1061        self._nodes = set()     # nodes with no preds/succs
1062
1063    def add_node(self, node):
1064        self._nodes.add(node)
1065
1066    def remove_node(self, node, edges=False):
1067        if node in self._nodes:
1068            self._nodes.remove(node)
1069        if edges:
1070            for p in set(self._preds.get(node, ())):
1071                self.remove(p, node)
1072            for s in set(self._succs.get(node, ())):
1073                self.remove(node, s)
1074            # Remove empties
1075            for k, v in list(self._preds.items()):
1076                if not v:
1077                    del self._preds[k]
1078            for k, v in list(self._succs.items()):
1079                if not v:
1080                    del self._succs[k]
1081
1082    def add(self, pred, succ):
1083        assert pred != succ
1084        self._preds.setdefault(succ, set()).add(pred)
1085        self._succs.setdefault(pred, set()).add(succ)
1086
1087    def remove(self, pred, succ):
1088        assert pred != succ
1089        try:
1090            preds = self._preds[succ]
1091            succs = self._succs[pred]
1092        except KeyError:  # pragma: no cover
1093            raise ValueError('%r not a successor of anything' % succ)
1094        try:
1095            preds.remove(pred)
1096            succs.remove(succ)
1097        except KeyError:  # pragma: no cover
1098            raise ValueError('%r not a successor of %r' % (succ, pred))
1099
1100    def is_step(self, step):
1101        return (step in self._preds or step in self._succs or
1102                step in self._nodes)
1103
1104    def get_steps(self, final):
1105        if not self.is_step(final):
1106            raise ValueError('Unknown: %r' % final)
1107        result = []
1108        todo = []
1109        seen = set()
1110        todo.append(final)
1111        while todo:
1112            step = todo.pop(0)
1113            if step in seen:
1114                # if a step was already seen,
1115                # move it to the end (so it will appear earlier
1116                # when reversed on return) ... but not for the
1117                # final step, as that would be confusing for
1118                # users
1119                if step != final:
1120                    result.remove(step)
1121                    result.append(step)
1122            else:
1123                seen.add(step)
1124                result.append(step)
1125                preds = self._preds.get(step, ())
1126                todo.extend(preds)
1127        return reversed(result)
1128
1129    @property
1130    def strong_connections(self):
1131        #http://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm
1132        index_counter = [0]
1133        stack = []
1134        lowlinks = {}
1135        index = {}
1136        result = []
1137
1138        graph = self._succs
1139
1140        def strongconnect(node):
1141            # set the depth index for this node to the smallest unused index
1142            index[node] = index_counter[0]
1143            lowlinks[node] = index_counter[0]
1144            index_counter[0] += 1
1145            stack.append(node)
1146
1147            # Consider successors
1148            try:
1149                successors = graph[node]
1150            except Exception:
1151                successors = []
1152            for successor in successors:
1153                if successor not in lowlinks:
1154                    # Successor has not yet been visited
1155                    strongconnect(successor)
1156                    lowlinks[node] = min(lowlinks[node],lowlinks[successor])
1157                elif successor in stack:
1158                    # the successor is in the stack and hence in the current
1159                    # strongly connected component (SCC)
1160                    lowlinks[node] = min(lowlinks[node],index[successor])
1161
1162            # If `node` is a root node, pop the stack and generate an SCC
1163            if lowlinks[node] == index[node]:
1164                connected_component = []
1165
1166                while True:
1167                    successor = stack.pop()
1168                    connected_component.append(successor)
1169                    if successor == node: break
1170                component = tuple(connected_component)
1171                # storing the result
1172                result.append(component)
1173
1174        for node in graph:
1175            if node not in lowlinks:
1176                strongconnect(node)
1177
1178        return result
1179
1180    @property
1181    def dot(self):
1182        result = ['digraph G {']
1183        for succ in self._preds:
1184            preds = self._preds[succ]
1185            for pred in preds:
1186                result.append('  %s -> %s;' % (pred, succ))
1187        for node in self._nodes:
1188            result.append('  %s;' % node)
1189        result.append('}')
1190        return '\n'.join(result)
1191
1192#
1193# Unarchiving functionality for zip, tar, tgz, tbz, whl
1194#
1195
1196ARCHIVE_EXTENSIONS = ('.tar.gz', '.tar.bz2', '.tar', '.zip',
1197                      '.tgz', '.tbz', '.whl')
1198
1199def unarchive(archive_filename, dest_dir, format=None, check=True):
1200
1201    def check_path(path):
1202        if not isinstance(path, text_type):
1203            path = path.decode('utf-8')
1204        p = os.path.abspath(os.path.join(dest_dir, path))
1205        if not p.startswith(dest_dir) or p[plen] != os.sep:
1206            raise ValueError('path outside destination: %r' % p)
1207
1208    dest_dir = os.path.abspath(dest_dir)
1209    plen = len(dest_dir)
1210    archive = None
1211    if format is None:
1212        if archive_filename.endswith(('.zip', '.whl')):
1213            format = 'zip'
1214        elif archive_filename.endswith(('.tar.gz', '.tgz')):
1215            format = 'tgz'
1216            mode = 'r:gz'
1217        elif archive_filename.endswith(('.tar.bz2', '.tbz')):
1218            format = 'tbz'
1219            mode = 'r:bz2'
1220        elif archive_filename.endswith('.tar'):
1221            format = 'tar'
1222            mode = 'r'
1223        else:  # pragma: no cover
1224            raise ValueError('Unknown format for %r' % archive_filename)
1225    try:
1226        if format == 'zip':
1227            archive = ZipFile(archive_filename, 'r')
1228            if check:
1229                names = archive.namelist()
1230                for name in names:
1231                    check_path(name)
1232        else:
1233            archive = tarfile.open(archive_filename, mode)
1234            if check:
1235                names = archive.getnames()
1236                for name in names:
1237                    check_path(name)
1238        if format != 'zip' and sys.version_info[0] < 3:
1239            # See Python issue 17153. If the dest path contains Unicode,
1240            # tarfile extraction fails on Python 2.x if a member path name
1241            # contains non-ASCII characters - it leads to an implicit
1242            # bytes -> unicode conversion using ASCII to decode.
1243            for tarinfo in archive.getmembers():
1244                if not isinstance(tarinfo.name, text_type):
1245                    tarinfo.name = tarinfo.name.decode('utf-8')
1246        archive.extractall(dest_dir)
1247
1248    finally:
1249        if archive:
1250            archive.close()
1251
1252
1253def zip_dir(directory):
1254    """zip a directory tree into a BytesIO object"""
1255    result = io.BytesIO()
1256    dlen = len(directory)
1257    with ZipFile(result, "w") as zf:
1258        for root, dirs, files in os.walk(directory):
1259            for name in files:
1260                full = os.path.join(root, name)
1261                rel = root[dlen:]
1262                dest = os.path.join(rel, name)
1263                zf.write(full, dest)
1264    return result
1265
1266#
1267# Simple progress bar
1268#
1269
1270UNITS = ('', 'K', 'M', 'G','T','P')
1271
1272
1273class Progress(object):
1274    unknown = 'UNKNOWN'
1275
1276    def __init__(self, minval=0, maxval=100):
1277        assert maxval is None or maxval >= minval
1278        self.min = self.cur = minval
1279        self.max = maxval
1280        self.started = None
1281        self.elapsed = 0
1282        self.done = False
1283
1284    def update(self, curval):
1285        assert self.min <= curval
1286        assert self.max is None or curval <= self.max
1287        self.cur = curval
1288        now = time.time()
1289        if self.started is None:
1290            self.started = now
1291        else:
1292            self.elapsed = now - self.started
1293
1294    def increment(self, incr):
1295        assert incr >= 0
1296        self.update(self.cur + incr)
1297
1298    def start(self):
1299        self.update(self.min)
1300        return self
1301
1302    def stop(self):
1303        if self.max is not None:
1304            self.update(self.max)
1305        self.done = True
1306
1307    @property
1308    def maximum(self):
1309        return self.unknown if self.max is None else self.max
1310
1311    @property
1312    def percentage(self):
1313        if self.done:
1314            result = '100 %'
1315        elif self.max is None:
1316            result = ' ?? %'
1317        else:
1318            v = 100.0 * (self.cur - self.min) / (self.max - self.min)
1319            result = '%3d %%' % v
1320        return result
1321
1322    def format_duration(self, duration):
1323        if (duration <= 0) and self.max is None or self.cur == self.min:
1324            result = '??:??:??'
1325        #elif duration < 1:
1326        #    result = '--:--:--'
1327        else:
1328            result = time.strftime('%H:%M:%S', time.gmtime(duration))
1329        return result
1330
1331    @property
1332    def ETA(self):
1333        if self.done:
1334            prefix = 'Done'
1335            t = self.elapsed
1336            #import pdb; pdb.set_trace()
1337        else:
1338            prefix = 'ETA '
1339            if self.max is None:
1340                t = -1
1341            elif self.elapsed == 0 or (self.cur == self.min):
1342                t = 0
1343            else:
1344                #import pdb; pdb.set_trace()
1345                t = float(self.max - self.min)
1346                t /= self.cur - self.min
1347                t = (t - 1) * self.elapsed
1348        return '%s: %s' % (prefix, self.format_duration(t))
1349
1350    @property
1351    def speed(self):
1352        if self.elapsed == 0:
1353            result = 0.0
1354        else:
1355            result = (self.cur - self.min) / self.elapsed
1356        for unit in UNITS:
1357            if result < 1000:
1358                break
1359            result /= 1000.0
1360        return '%d %sB/s' % (result, unit)
1361
1362#
1363# Glob functionality
1364#
1365
1366RICH_GLOB = re.compile(r'\{([^}]*)\}')
1367_CHECK_RECURSIVE_GLOB = re.compile(r'[^/\\,{]\*\*|\*\*[^/\\,}]')
1368_CHECK_MISMATCH_SET = re.compile(r'^[^{]*\}|\{[^}]*$')
1369
1370
1371def iglob(path_glob):
1372    """Extended globbing function that supports ** and {opt1,opt2,opt3}."""
1373    if _CHECK_RECURSIVE_GLOB.search(path_glob):
1374        msg = """invalid glob %r: recursive glob "**" must be used alone"""
1375        raise ValueError(msg % path_glob)
1376    if _CHECK_MISMATCH_SET.search(path_glob):
1377        msg = """invalid glob %r: mismatching set marker '{' or '}'"""
1378        raise ValueError(msg % path_glob)
1379    return _iglob(path_glob)
1380
1381
1382def _iglob(path_glob):
1383    rich_path_glob = RICH_GLOB.split(path_glob, 1)
1384    if len(rich_path_glob) > 1:
1385        assert len(rich_path_glob) == 3, rich_path_glob
1386        prefix, set, suffix = rich_path_glob
1387        for item in set.split(','):
1388            for path in _iglob(''.join((prefix, item, suffix))):
1389                yield path
1390    else:
1391        if '**' not in path_glob:
1392            for item in std_iglob(path_glob):
1393                yield item
1394        else:
1395            prefix, radical = path_glob.split('**', 1)
1396            if prefix == '':
1397                prefix = '.'
1398            if radical == '':
1399                radical = '*'
1400            else:
1401                # we support both
1402                radical = radical.lstrip('/')
1403                radical = radical.lstrip('\\')
1404            for path, dir, files in os.walk(prefix):
1405                path = os.path.normpath(path)
1406                for fn in _iglob(os.path.join(path, radical)):
1407                    yield fn
1408
1409if ssl:
1410    from .compat import (HTTPSHandler as BaseHTTPSHandler, match_hostname,
1411                         CertificateError)
1412
1413
1414#
1415# HTTPSConnection which verifies certificates/matches domains
1416#
1417
1418    class HTTPSConnection(httplib.HTTPSConnection):
1419        ca_certs = None # set this to the path to the certs file (.pem)
1420        check_domain = True # only used if ca_certs is not None
1421
1422        # noinspection PyPropertyAccess
1423        def connect(self):
1424            sock = socket.create_connection((self.host, self.port), self.timeout)
1425            if getattr(self, '_tunnel_host', False):
1426                self.sock = sock
1427                self._tunnel()
1428
1429            if not hasattr(ssl, 'SSLContext'):
1430                # For 2.x
1431                if self.ca_certs:
1432                    cert_reqs = ssl.CERT_REQUIRED
1433                else:
1434                    cert_reqs = ssl.CERT_NONE
1435                self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file,
1436                                            cert_reqs=cert_reqs,
1437                                            ssl_version=ssl.PROTOCOL_SSLv23,
1438                                            ca_certs=self.ca_certs)
1439            else:  # pragma: no cover
1440                context = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
1441                if hasattr(ssl, 'OP_NO_SSLv2'):
1442                    context.options |= ssl.OP_NO_SSLv2
1443                if self.cert_file:
1444                    context.load_cert_chain(self.cert_file, self.key_file)
1445                kwargs = {}
1446                if self.ca_certs:
1447                    context.verify_mode = ssl.CERT_REQUIRED
1448                    context.load_verify_locations(cafile=self.ca_certs)
1449                    if getattr(ssl, 'HAS_SNI', False):
1450                        kwargs['server_hostname'] = self.host
1451                self.sock = context.wrap_socket(sock, **kwargs)
1452            if self.ca_certs and self.check_domain:
1453                try:
1454                    match_hostname(self.sock.getpeercert(), self.host)
1455                    logger.debug('Host verified: %s', self.host)
1456                except CertificateError:  # pragma: no cover
1457                    self.sock.shutdown(socket.SHUT_RDWR)
1458                    self.sock.close()
1459                    raise
1460
1461    class HTTPSHandler(BaseHTTPSHandler):
1462        def __init__(self, ca_certs, check_domain=True):
1463            BaseHTTPSHandler.__init__(self)
1464            self.ca_certs = ca_certs
1465            self.check_domain = check_domain
1466
1467        def _conn_maker(self, *args, **kwargs):
1468            """
1469            This is called to create a connection instance. Normally you'd
1470            pass a connection class to do_open, but it doesn't actually check for
1471            a class, and just expects a callable. As long as we behave just as a
1472            constructor would have, we should be OK. If it ever changes so that
1473            we *must* pass a class, we'll create an UnsafeHTTPSConnection class
1474            which just sets check_domain to False in the class definition, and
1475            choose which one to pass to do_open.
1476            """
1477            result = HTTPSConnection(*args, **kwargs)
1478            if self.ca_certs:
1479                result.ca_certs = self.ca_certs
1480                result.check_domain = self.check_domain
1481            return result
1482
1483        def https_open(self, req):
1484            try:
1485                return self.do_open(self._conn_maker, req)
1486            except URLError as e:
1487                if 'certificate verify failed' in str(e.reason):
1488                    raise CertificateError('Unable to verify server certificate '
1489                                           'for %s' % req.host)
1490                else:
1491                    raise
1492
1493    #
1494    # To prevent against mixing HTTP traffic with HTTPS (examples: A Man-In-The-
1495    # Middle proxy using HTTP listens on port 443, or an index mistakenly serves
1496    # HTML containing a http://xyz link when it should be https://xyz),
1497    # you can use the following handler class, which does not allow HTTP traffic.
1498    #
1499    # It works by inheriting from HTTPHandler - so build_opener won't add a
1500    # handler for HTTP itself.
1501    #
1502    class HTTPSOnlyHandler(HTTPSHandler, HTTPHandler):
1503        def http_open(self, req):
1504            raise URLError('Unexpected HTTP request on what should be a secure '
1505                           'connection: %s' % req)
1506
1507#
1508# XML-RPC with timeouts
1509#
1510
1511_ver_info = sys.version_info[:2]
1512
1513if _ver_info == (2, 6):
1514    class HTTP(httplib.HTTP):
1515        def __init__(self, host='', port=None, **kwargs):
1516            if port == 0:   # 0 means use port 0, not the default port
1517                port = None
1518            self._setup(self._connection_class(host, port, **kwargs))
1519
1520
1521    if ssl:
1522        class HTTPS(httplib.HTTPS):
1523            def __init__(self, host='', port=None, **kwargs):
1524                if port == 0:   # 0 means use port 0, not the default port
1525                    port = None
1526                self._setup(self._connection_class(host, port, **kwargs))
1527
1528
1529class Transport(xmlrpclib.Transport):
1530    def __init__(self, timeout, use_datetime=0):
1531        self.timeout = timeout
1532        xmlrpclib.Transport.__init__(self, use_datetime)
1533
1534    def make_connection(self, host):
1535        h, eh, x509 = self.get_host_info(host)
1536        if _ver_info == (2, 6):
1537            result = HTTP(h, timeout=self.timeout)
1538        else:
1539            if not self._connection or host != self._connection[0]:
1540                self._extra_headers = eh
1541                self._connection = host, httplib.HTTPConnection(h)
1542            result = self._connection[1]
1543        return result
1544
1545if ssl:
1546    class SafeTransport(xmlrpclib.SafeTransport):
1547        def __init__(self, timeout, use_datetime=0):
1548            self.timeout = timeout
1549            xmlrpclib.SafeTransport.__init__(self, use_datetime)
1550
1551        def make_connection(self, host):
1552            h, eh, kwargs = self.get_host_info(host)
1553            if not kwargs:
1554                kwargs = {}
1555            kwargs['timeout'] = self.timeout
1556            if _ver_info == (2, 6):
1557                result = HTTPS(host, None, **kwargs)
1558            else:
1559                if not self._connection or host != self._connection[0]:
1560                    self._extra_headers = eh
1561                    self._connection = host, httplib.HTTPSConnection(h, None,
1562                                                                     **kwargs)
1563                result = self._connection[1]
1564            return result
1565
1566
1567class ServerProxy(xmlrpclib.ServerProxy):
1568    def __init__(self, uri, **kwargs):
1569        self.timeout = timeout = kwargs.pop('timeout', None)
1570        # The above classes only come into play if a timeout
1571        # is specified
1572        if timeout is not None:
1573            scheme, _ = splittype(uri)
1574            use_datetime = kwargs.get('use_datetime', 0)
1575            if scheme == 'https':
1576                tcls = SafeTransport
1577            else:
1578                tcls = Transport
1579            kwargs['transport'] = t = tcls(timeout, use_datetime=use_datetime)
1580            self.transport = t
1581        xmlrpclib.ServerProxy.__init__(self, uri, **kwargs)
1582
1583#
1584# CSV functionality. This is provided because on 2.x, the csv module can't
1585# handle Unicode. However, we need to deal with Unicode in e.g. RECORD files.
1586#
1587
1588def _csv_open(fn, mode, **kwargs):
1589    if sys.version_info[0] < 3:
1590        mode += 'b'
1591    else:
1592        kwargs['newline'] = ''
1593        # Python 3 determines encoding from locale. Force 'utf-8'
1594        # file encoding to match other forced utf-8 encoding
1595        kwargs['encoding'] = 'utf-8'
1596    return open(fn, mode, **kwargs)
1597
1598
1599class CSVBase(object):
1600    defaults = {
1601        'delimiter': str(','),      # The strs are used because we need native
1602        'quotechar': str('"'),      # str in the csv API (2.x won't take
1603        'lineterminator': str('\n') # Unicode)
1604    }
1605
1606    def __enter__(self):
1607        return self
1608
1609    def __exit__(self, *exc_info):
1610        self.stream.close()
1611
1612
1613class CSVReader(CSVBase):
1614    def __init__(self, **kwargs):
1615        if 'stream' in kwargs:
1616            stream = kwargs['stream']
1617            if sys.version_info[0] >= 3:
1618                # needs to be a text stream
1619                stream = codecs.getreader('utf-8')(stream)
1620            self.stream = stream
1621        else:
1622            self.stream = _csv_open(kwargs['path'], 'r')
1623        self.reader = csv.reader(self.stream, **self.defaults)
1624
1625    def __iter__(self):
1626        return self
1627
1628    def next(self):
1629        result = next(self.reader)
1630        if sys.version_info[0] < 3:
1631            for i, item in enumerate(result):
1632                if not isinstance(item, text_type):
1633                    result[i] = item.decode('utf-8')
1634        return result
1635
1636    __next__ = next
1637
1638class CSVWriter(CSVBase):
1639    def __init__(self, fn, **kwargs):
1640        self.stream = _csv_open(fn, 'w')
1641        self.writer = csv.writer(self.stream, **self.defaults)
1642
1643    def writerow(self, row):
1644        if sys.version_info[0] < 3:
1645            r = []
1646            for item in row:
1647                if isinstance(item, text_type):
1648                    item = item.encode('utf-8')
1649                r.append(item)
1650            row = r
1651        self.writer.writerow(row)
1652
1653#
1654#   Configurator functionality
1655#
1656
1657class Configurator(BaseConfigurator):
1658
1659    value_converters = dict(BaseConfigurator.value_converters)
1660    value_converters['inc'] = 'inc_convert'
1661
1662    def __init__(self, config, base=None):
1663        super(Configurator, self).__init__(config)
1664        self.base = base or os.getcwd()
1665
1666    def configure_custom(self, config):
1667        def convert(o):
1668            if isinstance(o, (list, tuple)):
1669                result = type(o)([convert(i) for i in o])
1670            elif isinstance(o, dict):
1671                if '()' in o:
1672                    result = self.configure_custom(o)
1673                else:
1674                    result = {}
1675                    for k in o:
1676                        result[k] = convert(o[k])
1677            else:
1678                result = self.convert(o)
1679            return result
1680
1681        c = config.pop('()')
1682        if not callable(c):
1683            c = self.resolve(c)
1684        props = config.pop('.', None)
1685        # Check for valid identifiers
1686        args = config.pop('[]', ())
1687        if args:
1688            args = tuple([convert(o) for o in args])
1689        items = [(k, convert(config[k])) for k in config if valid_ident(k)]
1690        kwargs = dict(items)
1691        result = c(*args, **kwargs)
1692        if props:
1693            for n, v in props.items():
1694                setattr(result, n, convert(v))
1695        return result
1696
1697    def __getitem__(self, key):
1698        result = self.config[key]
1699        if isinstance(result, dict) and '()' in result:
1700            self.config[key] = result = self.configure_custom(result)
1701        return result
1702
1703    def inc_convert(self, value):
1704        """Default converter for the inc:// protocol."""
1705        if not os.path.isabs(value):
1706            value = os.path.join(self.base, value)
1707        with codecs.open(value, 'r', encoding='utf-8') as f:
1708            result = json.load(f)
1709        return result
1710
1711
1712class SubprocessMixin(object):
1713    """
1714    Mixin for running subprocesses and capturing their output
1715    """
1716    def __init__(self, verbose=False, progress=None):
1717        self.verbose = verbose
1718        self.progress = progress
1719
1720    def reader(self, stream, context):
1721        """
1722        Read lines from a subprocess' output stream and either pass to a progress
1723        callable (if specified) or write progress information to sys.stderr.
1724        """
1725        progress = self.progress
1726        verbose = self.verbose
1727        while True:
1728            s = stream.readline()
1729            if not s:
1730                break
1731            if progress is not None:
1732                progress(s, context)
1733            else:
1734                if not verbose:
1735                    sys.stderr.write('.')
1736                else:
1737                    sys.stderr.write(s.decode('utf-8'))
1738                sys.stderr.flush()
1739        stream.close()
1740
1741    def run_command(self, cmd, **kwargs):
1742        p = subprocess.Popen(cmd, stdout=subprocess.PIPE,
1743                             stderr=subprocess.PIPE, **kwargs)
1744        t1 = threading.Thread(target=self.reader, args=(p.stdout, 'stdout'))
1745        t1.start()
1746        t2 = threading.Thread(target=self.reader, args=(p.stderr, 'stderr'))
1747        t2.start()
1748        p.wait()
1749        t1.join()
1750        t2.join()
1751        if self.progress is not None:
1752            self.progress('done.', 'main')
1753        elif self.verbose:
1754            sys.stderr.write('done.\n')
1755        return p
1756
1757
1758def normalize_name(name):
1759    """Normalize a python package name a la PEP 503"""
1760    # https://www.python.org/dev/peps/pep-0503/#normalized-names
1761    return re.sub('[-_.]+', '-', name).lower()
1762