1"""Open an arbitrary URL.
2
3See the following document for more info on URLs:
4"Names and Addresses, URIs, URLs, URNs, URCs", at
5http://www.w3.org/pub/WWW/Addressing/Overview.html
6
7See also the HTTP spec (from which the error codes are derived):
8"HTTP - Hypertext Transfer Protocol", at
9http://www.w3.org/pub/WWW/Protocols/
10
11Related standards and specs:
12- RFC1808: the "relative URL" spec. (authoritative status)
13- RFC1738 - the "URL standard". (authoritative status)
14- RFC1630 - the "URI spec". (informational status)
15
16The object returned by URLopener().open(file) will differ per
17protocol.  All you know is that is has methods read(), readline(),
18readlines(), fileno(), close() and info().  The read*(), fileno()
19and close() methods work like those of open files.
20The info() method returns a mimetools.Message object which can be
21used to query various info about the object, if available.
22(mimetools.Message objects are queried with the getheader() method.)
23"""
24
25import string
26import socket
27import os
28import time
29import sys
30import base64
31
32from urlparse import urljoin as basejoin
33
34__all__ = ["urlopen", "URLopener", "FancyURLopener", "urlretrieve",
35           "urlcleanup", "quote", "quote_plus", "unquote", "unquote_plus",
36           "urlencode", "url2pathname", "pathname2url", "splittag",
37           "localhost", "thishost", "ftperrors", "basejoin", "unwrap",
38           "splittype", "splithost", "splituser", "splitpasswd", "splitport",
39           "splitnport", "splitquery", "splitattr", "splitvalue",
40           "getproxies"]
41
42__version__ = '1.17'    # XXX This version is not always updated :-(
43
44MAXFTPCACHE = 10        # Trim the ftp cache beyond this size
45
46# Helper for non-unix systems
47if (os._name if sys.platform.startswith('java') else os.name) == 'nt':
48    from nturl2path import url2pathname, pathname2url
49elif os.name == 'riscos':
50    from rourl2path import url2pathname, pathname2url
51else:
52    def url2pathname(pathname):
53        """OS-specific conversion from a relative URL of the 'file' scheme
54        to a file system path; not recommended for general use."""
55        return unquote(pathname)
56
57    def pathname2url(pathname):
58        """OS-specific conversion from a file system path to a relative URL
59        of the 'file' scheme; not recommended for general use."""
60        return quote(pathname)
61
62# This really consists of two pieces:
63# (1) a class which handles opening of all sorts of URLs
64#     (plus assorted utilities etc.)
65# (2) a set of functions for parsing URLs
66# XXX Should these be separated out into different modules?
67
68
69# Shortcut for basic usage
70_urlopener = None
71def urlopen(url, data=None, proxies=None):
72    """Create a file-like object for the specified URL to read from."""
73    from warnings import warnpy3k
74    warnpy3k("urllib.urlopen() has been removed in Python 3.0 in "
75             "favor of urllib2.urlopen()", stacklevel=2)
76
77    global _urlopener
78    if proxies is not None:
79        opener = FancyURLopener(proxies=proxies)
80    elif not _urlopener:
81        opener = FancyURLopener()
82        _urlopener = opener
83    else:
84        opener = _urlopener
85    if data is None:
86        return opener.open(url)
87    else:
88        return opener.open(url, data)
89def urlretrieve(url, filename=None, reporthook=None, data=None):
90    global _urlopener
91    if not _urlopener:
92        _urlopener = FancyURLopener()
93    return _urlopener.retrieve(url, filename, reporthook, data)
94def urlcleanup():
95    if _urlopener:
96        _urlopener.cleanup()
97    _safe_quoters.clear()
98    ftpcache.clear()
99
100# check for SSL
101try:
102    import ssl
103except:
104    _have_ssl = False
105else:
106    _have_ssl = True
107
108# exception raised when downloaded size does not match content-length
109class ContentTooShortError(IOError):
110    def __init__(self, message, content):
111        IOError.__init__(self, message)
112        self.content = content
113
114ftpcache = {}
115class URLopener:
116    """Class to open URLs.
117    This is a class rather than just a subroutine because we may need
118    more than one set of global protocol-specific options.
119    Note -- this is a base class for those who don't want the
120    automatic handling of errors type 302 (relocated) and 401
121    (authorization needed)."""
122
123    __tempfiles = None
124
125    version = "Python-urllib/%s" % __version__
126
127    # Constructor
128    def __init__(self, proxies=None, **x509):
129        if proxies is None:
130            proxies = getproxies()
131        assert hasattr(proxies, 'has_key'), "proxies must be a mapping"
132        self.proxies = proxies
133        self.key_file = x509.get('key_file')
134        self.cert_file = x509.get('cert_file')
135        self.addheaders = [('User-Agent', self.version)]
136        self.__tempfiles = []
137        self.__unlink = os.unlink # See cleanup()
138        self.tempcache = None
139        # Undocumented feature: if you assign {} to tempcache,
140        # it is used to cache files retrieved with
141        # self.retrieve().  This is not enabled by default
142        # since it does not work for changing documents (and I
143        # haven't got the logic to check expiration headers
144        # yet).
145        self.ftpcache = ftpcache
146        # Undocumented feature: you can use a different
147        # ftp cache by assigning to the .ftpcache member;
148        # in case you want logically independent URL openers
149        # XXX This is not threadsafe.  Bah.
150
151    def __del__(self):
152        self.close()
153
154    def close(self):
155        self.cleanup()
156
157    def cleanup(self):
158        # This code sometimes runs when the rest of this module
159        # has already been deleted, so it can't use any globals
160        # or import anything.
161        if self.__tempfiles:
162            for file in self.__tempfiles:
163                try:
164                    self.__unlink(file)
165                except OSError:
166                    pass
167            del self.__tempfiles[:]
168        if self.tempcache:
169            self.tempcache.clear()
170
171    def addheader(self, *args):
172        """Add a header to be used by the HTTP interface only
173        e.g. u.addheader('Accept', 'sound/basic')"""
174        self.addheaders.append(args)
175
176    # External interface
177    def open(self, fullurl, data=None):
178        """Use URLopener().open(file) instead of open(file, 'r')."""
179        fullurl = unwrap(toBytes(fullurl))
180        # percent encode url, fixing lame server errors for e.g, like space
181        # within url paths.
182        fullurl = quote(fullurl, safe="%/:=&?~#+!$,;'@()*[]|")
183        if self.tempcache and fullurl in self.tempcache:
184            filename, headers = self.tempcache[fullurl]
185            fp = open(filename, 'rb')
186            return addinfourl(fp, headers, fullurl)
187        urltype, url = splittype(fullurl)
188        if not urltype:
189            urltype = 'file'
190        if urltype in self.proxies:
191            proxy = self.proxies[urltype]
192            urltype, proxyhost = splittype(proxy)
193            host, selector = splithost(proxyhost)
194            url = (host, fullurl) # Signal special case to open_*()
195        else:
196            proxy = None
197        name = 'open_' + urltype
198        self.type = urltype
199        name = name.replace('-', '_')
200        if not hasattr(self, name):
201            if proxy:
202                return self.open_unknown_proxy(proxy, fullurl, data)
203            else:
204                return self.open_unknown(fullurl, data)
205        try:
206            if data is None:
207                return getattr(self, name)(url)
208            else:
209                return getattr(self, name)(url, data)
210        except socket.error, msg:
211            raise IOError, ('socket error', msg), sys.exc_info()[2]
212
213    def open_unknown(self, fullurl, data=None):
214        """Overridable interface to open unknown URL type."""
215        type, url = splittype(fullurl)
216        raise IOError, ('url error', 'unknown url type', type)
217
218    def open_unknown_proxy(self, proxy, fullurl, data=None):
219        """Overridable interface to open unknown URL type."""
220        type, url = splittype(fullurl)
221        raise IOError, ('url error', 'invalid proxy for %s' % type, proxy)
222
223    # External interface
224    def retrieve(self, url, filename=None, reporthook=None, data=None):
225        """retrieve(url) returns (filename, headers) for a local object
226        or (tempfilename, headers) for a remote object."""
227        url = unwrap(toBytes(url))
228        if self.tempcache and url in self.tempcache:
229            return self.tempcache[url]
230        type, url1 = splittype(url)
231        if filename is None and (not type or type == 'file'):
232            try:
233                fp = self.open_local_file(url1)
234                hdrs = fp.info()
235                fp.close()
236                return url2pathname(splithost(url1)[1]), hdrs
237            except IOError:
238                pass
239        fp = self.open(url, data)
240        try:
241            headers = fp.info()
242            if filename:
243                tfp = open(filename, 'wb')
244            else:
245                import tempfile
246                garbage, path = splittype(url)
247                garbage, path = splithost(path or "")
248                path, garbage = splitquery(path or "")
249                path, garbage = splitattr(path or "")
250                suffix = os.path.splitext(path)[1]
251                (fd, filename) = tempfile.mkstemp(suffix)
252                self.__tempfiles.append(filename)
253                tfp = os.fdopen(fd, 'wb')
254            try:
255                result = filename, headers
256                if self.tempcache is not None:
257                    self.tempcache[url] = result
258                bs = 1024*8
259                size = -1
260                read = 0
261                blocknum = 0
262                if "content-length" in headers:
263                    size = int(headers["Content-Length"])
264                if reporthook:
265                    reporthook(blocknum, bs, size)
266                while 1:
267                    block = fp.read(bs)
268                    if block == "":
269                        break
270                    read += len(block)
271                    tfp.write(block)
272                    blocknum += 1
273                    if reporthook:
274                        reporthook(blocknum, bs, size)
275            finally:
276                tfp.close()
277        finally:
278            fp.close()
279
280        # raise exception if actual size does not match content-length header
281        if size >= 0 and read < size:
282            raise ContentTooShortError("retrieval incomplete: got only %i out "
283                                       "of %i bytes" % (read, size), result)
284
285        return result
286
287    # Each method named open_<type> knows how to open that type of URL
288
289    def open_http(self, url, data=None):
290        """Use HTTP protocol."""
291        import httplib
292        user_passwd = None
293        proxy_passwd= None
294        if isinstance(url, str):
295            host, selector = splithost(url)
296            if host:
297                user_passwd, host = splituser(host)
298                host = unquote(host)
299            realhost = host
300        else:
301            host, selector = url
302            # check whether the proxy contains authorization information
303            proxy_passwd, host = splituser(host)
304            # now we proceed with the url we want to obtain
305            urltype, rest = splittype(selector)
306            url = rest
307            user_passwd = None
308            if urltype.lower() != 'http':
309                realhost = None
310            else:
311                realhost, rest = splithost(rest)
312                if realhost:
313                    user_passwd, realhost = splituser(realhost)
314                if user_passwd:
315                    selector = "%s://%s%s" % (urltype, realhost, rest)
316                if proxy_bypass(realhost):
317                    host = realhost
318
319            #print "proxy via http:", host, selector
320        if not host: raise IOError, ('http error', 'no host given')
321
322        if proxy_passwd:
323            proxy_passwd = unquote(proxy_passwd)
324            proxy_auth = base64.b64encode(proxy_passwd).strip()
325        else:
326            proxy_auth = None
327
328        if user_passwd:
329            user_passwd = unquote(user_passwd)
330            auth = base64.b64encode(user_passwd).strip()
331        else:
332            auth = None
333        h = httplib.HTTP(host)
334        if data is not None:
335            h.putrequest('POST', selector)
336            h.putheader('Content-Type', 'application/x-www-form-urlencoded')
337            h.putheader('Content-Length', '%d' % len(data))
338        else:
339            h.putrequest('GET', selector)
340        if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth)
341        if auth: h.putheader('Authorization', 'Basic %s' % auth)
342        if realhost: h.putheader('Host', realhost)
343        for args in self.addheaders: h.putheader(*args)
344        h.endheaders(data)
345        errcode, errmsg, headers = h.getreply()
346        fp = h.getfile()
347        if errcode == -1:
348            if fp: fp.close()
349            # something went wrong with the HTTP status line
350            raise IOError, ('http protocol error', 0,
351                            'got a bad status line', None)
352        # According to RFC 2616, "2xx" code indicates that the client's
353        # request was successfully received, understood, and accepted.
354        if (200 <= errcode < 300):
355            return addinfourl(fp, headers, "http:" + url, errcode)
356        else:
357            if data is None:
358                return self.http_error(url, fp, errcode, errmsg, headers)
359            else:
360                return self.http_error(url, fp, errcode, errmsg, headers, data)
361
362    def http_error(self, url, fp, errcode, errmsg, headers, data=None):
363        """Handle http errors.
364        Derived class can override this, or provide specific handlers
365        named http_error_DDD where DDD is the 3-digit error code."""
366        # First check if there's a specific handler for this error
367        name = 'http_error_%d' % errcode
368        if hasattr(self, name):
369            method = getattr(self, name)
370            if data is None:
371                result = method(url, fp, errcode, errmsg, headers)
372            else:
373                result = method(url, fp, errcode, errmsg, headers, data)
374            if result: return result
375        return self.http_error_default(url, fp, errcode, errmsg, headers)
376
377    def http_error_default(self, url, fp, errcode, errmsg, headers):
378        """Default error handler: close the connection and raise IOError."""
379        fp.close()
380        raise IOError, ('http error', errcode, errmsg, headers)
381
382    if _have_ssl:
383        def open_https(self, url, data=None):
384            """Use HTTPS protocol."""
385
386            import httplib
387            user_passwd = None
388            proxy_passwd = None
389            if isinstance(url, str):
390                host, selector = splithost(url)
391                if host:
392                    user_passwd, host = splituser(host)
393                    host = unquote(host)
394                realhost = host
395            else:
396                host, selector = url
397                # here, we determine, whether the proxy contains authorization information
398                proxy_passwd, host = splituser(host)
399                urltype, rest = splittype(selector)
400                url = rest
401                user_passwd = None
402                if urltype.lower() != 'https':
403                    realhost = None
404                else:
405                    realhost, rest = splithost(rest)
406                    if realhost:
407                        user_passwd, realhost = splituser(realhost)
408                    if user_passwd:
409                        selector = "%s://%s%s" % (urltype, realhost, rest)
410                #print "proxy via https:", host, selector
411            if not host: raise IOError, ('https error', 'no host given')
412            if proxy_passwd:
413                proxy_passwd = unquote(proxy_passwd)
414                proxy_auth = base64.b64encode(proxy_passwd).strip()
415            else:
416                proxy_auth = None
417            if user_passwd:
418                user_passwd = unquote(user_passwd)
419                auth = base64.b64encode(user_passwd).strip()
420            else:
421                auth = None
422            h = httplib.HTTPS(host, 0,
423                              key_file=self.key_file,
424                              cert_file=self.cert_file)
425            if data is not None:
426                h.putrequest('POST', selector)
427                h.putheader('Content-Type',
428                            'application/x-www-form-urlencoded')
429                h.putheader('Content-Length', '%d' % len(data))
430            else:
431                h.putrequest('GET', selector)
432            if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth)
433            if auth: h.putheader('Authorization', 'Basic %s' % auth)
434            if realhost: h.putheader('Host', realhost)
435            for args in self.addheaders: h.putheader(*args)
436            h.endheaders(data)
437            errcode, errmsg, headers = h.getreply()
438            fp = h.getfile()
439            if errcode == -1:
440                if fp: fp.close()
441                # something went wrong with the HTTP status line
442                raise IOError, ('http protocol error', 0,
443                                'got a bad status line', None)
444            # According to RFC 2616, "2xx" code indicates that the client's
445            # request was successfully received, understood, and accepted.
446            if (200 <= errcode < 300):
447                return addinfourl(fp, headers, "https:" + url, errcode)
448            else:
449                if data is None:
450                    return self.http_error(url, fp, errcode, errmsg, headers)
451                else:
452                    return self.http_error(url, fp, errcode, errmsg, headers,
453                                           data)
454
455    def open_file(self, url):
456        """Use local file or FTP depending on form of URL."""
457        if not isinstance(url, str):
458            raise IOError, ('file error', 'proxy support for file protocol currently not implemented')
459        if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/':
460            return self.open_ftp(url)
461        else:
462            return self.open_local_file(url)
463
464    def open_local_file(self, url):
465        """Use local file."""
466        import mimetypes, mimetools, email.utils
467        try:
468            from cStringIO import StringIO
469        except ImportError:
470            from StringIO import StringIO
471        host, file = splithost(url)
472        localname = url2pathname(file)
473        try:
474            stats = os.stat(localname)
475        except OSError, e:
476            raise IOError(e.errno, e.strerror, e.filename)
477        size = stats.st_size
478        modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
479        mtype = mimetypes.guess_type(url)[0]
480        headers = mimetools.Message(StringIO(
481            'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
482            (mtype or 'text/plain', size, modified)))
483        if not host:
484            urlfile = file
485            if file[:1] == '/':
486                urlfile = 'file://' + file
487            elif file[:2] == './':
488                raise ValueError("local file url may start with / or file:. Unknown url of type: %s" % url)
489            return addinfourl(open(localname, 'rb'),
490                              headers, urlfile)
491        host, port = splitport(host)
492        if not port \
493           and socket.gethostbyname(host) in (localhost(), thishost()):
494            urlfile = file
495            if file[:1] == '/':
496                urlfile = 'file://' + file
497            return addinfourl(open(localname, 'rb'),
498                              headers, urlfile)
499        raise IOError, ('local file error', 'not on local host')
500
501    def open_ftp(self, url):
502        """Use FTP protocol."""
503        if not isinstance(url, str):
504            raise IOError, ('ftp error', 'proxy support for ftp protocol currently not implemented')
505        import mimetypes, mimetools
506        try:
507            from cStringIO import StringIO
508        except ImportError:
509            from StringIO import StringIO
510        host, path = splithost(url)
511        if not host: raise IOError, ('ftp error', 'no host given')
512        host, port = splitport(host)
513        user, host = splituser(host)
514        if user: user, passwd = splitpasswd(user)
515        else: passwd = None
516        host = unquote(host)
517        user = user or ''
518        passwd = passwd or ''
519        host = socket.gethostbyname(host)
520        if not port:
521            import ftplib
522            port = ftplib.FTP_PORT
523        else:
524            port = int(port)
525        path, attrs = splitattr(path)
526        path = unquote(path)
527        dirs = path.split('/')
528        dirs, file = dirs[:-1], dirs[-1]
529        if dirs and not dirs[0]: dirs = dirs[1:]
530        if dirs and not dirs[0]: dirs[0] = '/'
531        key = user, host, port, '/'.join(dirs)
532        # XXX thread unsafe!
533        if len(self.ftpcache) > MAXFTPCACHE:
534            # Prune the cache, rather arbitrarily
535            for k in self.ftpcache.keys():
536                if k != key:
537                    v = self.ftpcache[k]
538                    del self.ftpcache[k]
539                    v.close()
540        try:
541            if not key in self.ftpcache:
542                self.ftpcache[key] = \
543                    ftpwrapper(user, passwd, host, port, dirs)
544            if not file: type = 'D'
545            else: type = 'I'
546            for attr in attrs:
547                attr, value = splitvalue(attr)
548                if attr.lower() == 'type' and \
549                   value in ('a', 'A', 'i', 'I', 'd', 'D'):
550                    type = value.upper()
551            (fp, retrlen) = self.ftpcache[key].retrfile(file, type)
552            mtype = mimetypes.guess_type("ftp:" + url)[0]
553            headers = ""
554            if mtype:
555                headers += "Content-Type: %s\n" % mtype
556            if retrlen is not None and retrlen >= 0:
557                headers += "Content-Length: %d\n" % retrlen
558            headers = mimetools.Message(StringIO(headers))
559            return addinfourl(fp, headers, "ftp:" + url)
560        except ftperrors(), msg:
561            raise IOError, ('ftp error', msg), sys.exc_info()[2]
562
563    def open_data(self, url, data=None):
564        """Use "data" URL."""
565        if not isinstance(url, str):
566            raise IOError, ('data error', 'proxy support for data protocol currently not implemented')
567        # ignore POSTed data
568        #
569        # syntax of data URLs:
570        # dataurl   := "data:" [ mediatype ] [ ";base64" ] "," data
571        # mediatype := [ type "/" subtype ] *( ";" parameter )
572        # data      := *urlchar
573        # parameter := attribute "=" value
574        import mimetools
575        try:
576            from cStringIO import StringIO
577        except ImportError:
578            from StringIO import StringIO
579        try:
580            [type, data] = url.split(',', 1)
581        except ValueError:
582            raise IOError, ('data error', 'bad data URL')
583        if not type:
584            type = 'text/plain;charset=US-ASCII'
585        semi = type.rfind(';')
586        if semi >= 0 and '=' not in type[semi:]:
587            encoding = type[semi+1:]
588            type = type[:semi]
589        else:
590            encoding = ''
591        msg = []
592        msg.append('Date: %s'%time.strftime('%a, %d %b %Y %H:%M:%S GMT',
593                                            time.gmtime(time.time())))
594        msg.append('Content-type: %s' % type)
595        if encoding == 'base64':
596            data = base64.decodestring(data)
597        else:
598            data = unquote(data)
599        msg.append('Content-Length: %d' % len(data))
600        msg.append('')
601        msg.append(data)
602        msg = '\n'.join(msg)
603        f = StringIO(msg)
604        headers = mimetools.Message(f, 0)
605        #f.fileno = None     # needed for addinfourl
606        return addinfourl(f, headers, url)
607
608
609class FancyURLopener(URLopener):
610    """Derived class with handlers for errors we can handle (perhaps)."""
611
612    def __init__(self, *args, **kwargs):
613        URLopener.__init__(self, *args, **kwargs)
614        self.auth_cache = {}
615        self.tries = 0
616        self.maxtries = 10
617
618    def http_error_default(self, url, fp, errcode, errmsg, headers):
619        """Default error handling -- don't raise an exception."""
620        return addinfourl(fp, headers, "http:" + url, errcode)
621
622    def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
623        """Error 302 -- relocated (temporarily)."""
624        self.tries += 1
625        if self.maxtries and self.tries >= self.maxtries:
626            if hasattr(self, "http_error_500"):
627                meth = self.http_error_500
628            else:
629                meth = self.http_error_default
630            self.tries = 0
631            return meth(url, fp, 500,
632                        "Internal Server Error: Redirect Recursion", headers)
633        result = self.redirect_internal(url, fp, errcode, errmsg, headers,
634                                        data)
635        self.tries = 0
636        return result
637
638    def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
639        if 'location' in headers:
640            newurl = headers['location']
641        elif 'uri' in headers:
642            newurl = headers['uri']
643        else:
644            return
645        fp.close()
646        # In case the server sent a relative URL, join with original:
647        newurl = basejoin(self.type + ":" + url, newurl)
648
649        # For security reasons we do not allow redirects to protocols
650        # other than HTTP, HTTPS or FTP.
651        newurl_lower = newurl.lower()
652        if not (newurl_lower.startswith('http://') or
653                newurl_lower.startswith('https://') or
654                newurl_lower.startswith('ftp://')):
655            raise IOError('redirect error', errcode,
656                          errmsg + " - Redirection to url '%s' is not allowed" %
657                          newurl,
658                          headers)
659
660        return self.open(newurl)
661
662    def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
663        """Error 301 -- also relocated (permanently)."""
664        return self.http_error_302(url, fp, errcode, errmsg, headers, data)
665
666    def http_error_303(self, url, fp, errcode, errmsg, headers, data=None):
667        """Error 303 -- also relocated (essentially identical to 302)."""
668        return self.http_error_302(url, fp, errcode, errmsg, headers, data)
669
670    def http_error_307(self, url, fp, errcode, errmsg, headers, data=None):
671        """Error 307 -- relocated, but turn POST into error."""
672        if data is None:
673            return self.http_error_302(url, fp, errcode, errmsg, headers, data)
674        else:
675            return self.http_error_default(url, fp, errcode, errmsg, headers)
676
677    def http_error_401(self, url, fp, errcode, errmsg, headers, data=None):
678        """Error 401 -- authentication required.
679        This function supports Basic authentication only."""
680        if not 'www-authenticate' in headers:
681            URLopener.http_error_default(self, url, fp,
682                                         errcode, errmsg, headers)
683        stuff = headers['www-authenticate']
684        import re
685        match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
686        if not match:
687            URLopener.http_error_default(self, url, fp,
688                                         errcode, errmsg, headers)
689        scheme, realm = match.groups()
690        if scheme.lower() != 'basic':
691            URLopener.http_error_default(self, url, fp,
692                                         errcode, errmsg, headers)
693        name = 'retry_' + self.type + '_basic_auth'
694        if data is None:
695            return getattr(self,name)(url, realm)
696        else:
697            return getattr(self,name)(url, realm, data)
698
699    def http_error_407(self, url, fp, errcode, errmsg, headers, data=None):
700        """Error 407 -- proxy authentication required.
701        This function supports Basic authentication only."""
702        if not 'proxy-authenticate' in headers:
703            URLopener.http_error_default(self, url, fp,
704                                         errcode, errmsg, headers)
705        stuff = headers['proxy-authenticate']
706        import re
707        match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
708        if not match:
709            URLopener.http_error_default(self, url, fp,
710                                         errcode, errmsg, headers)
711        scheme, realm = match.groups()
712        if scheme.lower() != 'basic':
713            URLopener.http_error_default(self, url, fp,
714                                         errcode, errmsg, headers)
715        name = 'retry_proxy_' + self.type + '_basic_auth'
716        if data is None:
717            return getattr(self,name)(url, realm)
718        else:
719            return getattr(self,name)(url, realm, data)
720
721    def retry_proxy_http_basic_auth(self, url, realm, data=None):
722        host, selector = splithost(url)
723        newurl = 'http://' + host + selector
724        proxy = self.proxies['http']
725        urltype, proxyhost = splittype(proxy)
726        proxyhost, proxyselector = splithost(proxyhost)
727        i = proxyhost.find('@') + 1
728        proxyhost = proxyhost[i:]
729        user, passwd = self.get_user_passwd(proxyhost, realm, i)
730        if not (user or passwd): return None
731        proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost
732        self.proxies['http'] = 'http://' + proxyhost + proxyselector
733        if data is None:
734            return self.open(newurl)
735        else:
736            return self.open(newurl, data)
737
738    def retry_proxy_https_basic_auth(self, url, realm, data=None):
739        host, selector = splithost(url)
740        newurl = 'https://' + host + selector
741        proxy = self.proxies['https']
742        urltype, proxyhost = splittype(proxy)
743        proxyhost, proxyselector = splithost(proxyhost)
744        i = proxyhost.find('@') + 1
745        proxyhost = proxyhost[i:]
746        user, passwd = self.get_user_passwd(proxyhost, realm, i)
747        if not (user or passwd): return None
748        proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost
749        self.proxies['https'] = 'https://' + proxyhost + proxyselector
750        if data is None:
751            return self.open(newurl)
752        else:
753            return self.open(newurl, data)
754
755    def retry_http_basic_auth(self, url, realm, data=None):
756        host, selector = splithost(url)
757        i = host.find('@') + 1
758        host = host[i:]
759        user, passwd = self.get_user_passwd(host, realm, i)
760        if not (user or passwd): return None
761        host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
762        newurl = 'http://' + host + selector
763        if data is None:
764            return self.open(newurl)
765        else:
766            return self.open(newurl, data)
767
768    def retry_https_basic_auth(self, url, realm, data=None):
769        host, selector = splithost(url)
770        i = host.find('@') + 1
771        host = host[i:]
772        user, passwd = self.get_user_passwd(host, realm, i)
773        if not (user or passwd): return None
774        host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
775        newurl = 'https://' + host + selector
776        if data is None:
777            return self.open(newurl)
778        else:
779            return self.open(newurl, data)
780
781    def get_user_passwd(self, host, realm, clear_cache=0):
782        key = realm + '@' + host.lower()
783        if key in self.auth_cache:
784            if clear_cache:
785                del self.auth_cache[key]
786            else:
787                return self.auth_cache[key]
788        user, passwd = self.prompt_user_passwd(host, realm)
789        if user or passwd: self.auth_cache[key] = (user, passwd)
790        return user, passwd
791
792    def prompt_user_passwd(self, host, realm):
793        """Override this in a GUI environment!"""
794        import getpass
795        try:
796            user = raw_input("Enter username for %s at %s: " % (realm,
797                                                                host))
798            passwd = getpass.getpass("Enter password for %s in %s at %s: " %
799                (user, realm, host))
800            return user, passwd
801        except KeyboardInterrupt:
802            print
803            return None, None
804
805
806# Utility functions
807
808_localhost = None
809def localhost():
810    """Return the IP address of the magic hostname 'localhost'."""
811    global _localhost
812    if _localhost is None:
813        _localhost = socket.gethostbyname('localhost')
814    return _localhost
815
816_thishost = None
817def thishost():
818    """Return the IP address of the current host."""
819    global _thishost
820    if _thishost is None:
821        _thishost = socket.gethostbyname(socket.gethostname())
822    return _thishost
823
824_ftperrors = None
825def ftperrors():
826    """Return the set of errors raised by the FTP class."""
827    global _ftperrors
828    if _ftperrors is None:
829        import ftplib
830        _ftperrors = ftplib.all_errors
831    return _ftperrors
832
833_noheaders = None
834def noheaders():
835    """Return an empty mimetools.Message object."""
836    global _noheaders
837    if _noheaders is None:
838        import mimetools
839        try:
840            from cStringIO import StringIO
841        except ImportError:
842            from StringIO import StringIO
843        _noheaders = mimetools.Message(StringIO(), 0)
844        _noheaders.fp.close()   # Recycle file descriptor
845    return _noheaders
846
847
848# Utility classes
849
850class ftpwrapper:
851    """Class used by open_ftp() for cache of open FTP connections."""
852
853    def __init__(self, user, passwd, host, port, dirs,
854                 timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
855                 persistent=True):
856        self.user = user
857        self.passwd = passwd
858        self.host = host
859        self.port = port
860        self.dirs = dirs
861        self.timeout = timeout
862        self.refcount = 0
863        self.keepalive = persistent
864        self.init()
865
866    def init(self):
867        import ftplib
868        self.busy = 0
869        self.ftp = ftplib.FTP()
870        self.ftp.connect(self.host, self.port, self.timeout)
871        self.ftp.login(self.user, self.passwd)
872        for dir in self.dirs:
873            self.ftp.cwd(dir)
874
875    def retrfile(self, file, type):
876        import ftplib
877        self.endtransfer()
878        if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1
879        else: cmd = 'TYPE ' + type; isdir = 0
880        try:
881            self.ftp.voidcmd(cmd)
882        except ftplib.all_errors:
883            self.init()
884            self.ftp.voidcmd(cmd)
885        conn = None
886        if file and not isdir:
887            # Try to retrieve as a file
888            try:
889                cmd = 'RETR ' + file
890                conn, retrlen = self.ftp.ntransfercmd(cmd)
891            except ftplib.error_perm, reason:
892                if str(reason)[:3] != '550':
893                    raise IOError, ('ftp error', reason), sys.exc_info()[2]
894        if not conn:
895            # Set transfer mode to ASCII!
896            self.ftp.voidcmd('TYPE A')
897            # Try a directory listing. Verify that directory exists.
898            if file:
899                pwd = self.ftp.pwd()
900                try:
901                    try:
902                        self.ftp.cwd(file)
903                    except ftplib.error_perm, reason:
904                        raise IOError, ('ftp error', reason), sys.exc_info()[2]
905                finally:
906                    self.ftp.cwd(pwd)
907                cmd = 'LIST ' + file
908            else:
909                cmd = 'LIST'
910            conn, retrlen = self.ftp.ntransfercmd(cmd)
911        self.busy = 1
912        ftpobj = addclosehook(conn.makefile('rb'), self.file_close)
913        self.refcount += 1
914        conn.close()
915        # Pass back both a suitably decorated object and a retrieval length
916        return (ftpobj, retrlen)
917
918    def endtransfer(self):
919        if not self.busy:
920            return
921        self.busy = 0
922        try:
923            self.ftp.voidresp()
924        except ftperrors():
925            pass
926
927    def close(self):
928        self.keepalive = False
929        if self.refcount <= 0:
930            self.real_close()
931
932    def file_close(self):
933        self.endtransfer()
934        self.refcount -= 1
935        if self.refcount <= 0 and not self.keepalive:
936            self.real_close()
937
938    def real_close(self):
939        self.endtransfer()
940        try:
941            self.ftp.close()
942        except ftperrors():
943            pass
944
945class addbase:
946    """Base class for addinfo and addclosehook."""
947
948    def __init__(self, fp):
949        self.fp = fp
950        self.read = self.fp.read
951        self.readline = self.fp.readline
952        if hasattr(self.fp, "readlines"): self.readlines = self.fp.readlines
953        if hasattr(self.fp, "fileno"):
954            self.fileno = self.fp.fileno
955        else:
956            self.fileno = lambda: None
957        if hasattr(self.fp, "__iter__"):
958            self.__iter__ = self.fp.__iter__
959            if hasattr(self.fp, "next"):
960                self.next = self.fp.next
961
962    def __repr__(self):
963        return '<%s at %r whose fp = %r>' % (self.__class__.__name__,
964                                             id(self), self.fp)
965
966    def close(self):
967        self.read = None
968        self.readline = None
969        self.readlines = None
970        self.fileno = None
971        if self.fp: self.fp.close()
972        self.fp = None
973
974class addclosehook(addbase):
975    """Class to add a close hook to an open file."""
976
977    def __init__(self, fp, closehook, *hookargs):
978        addbase.__init__(self, fp)
979        self.closehook = closehook
980        self.hookargs = hookargs
981
982    def close(self):
983        if self.closehook:
984            self.closehook(*self.hookargs)
985            self.closehook = None
986            self.hookargs = None
987        addbase.close(self)
988
989class addinfo(addbase):
990    """class to add an info() method to an open file."""
991
992    def __init__(self, fp, headers):
993        addbase.__init__(self, fp)
994        self.headers = headers
995
996    def info(self):
997        return self.headers
998
999class addinfourl(addbase):
1000    """class to add info() and geturl() methods to an open file."""
1001
1002    def __init__(self, fp, headers, url, code=None):
1003        addbase.__init__(self, fp)
1004        self.headers = headers
1005        self.url = url
1006        self.code = code
1007
1008    def info(self):
1009        return self.headers
1010
1011    def getcode(self):
1012        return self.code
1013
1014    def geturl(self):
1015        return self.url
1016
1017
1018# Utilities to parse URLs (most of these return None for missing parts):
1019# unwrap('<URL:type://host/path>') --> 'type://host/path'
1020# splittype('type:opaquestring') --> 'type', 'opaquestring'
1021# splithost('//host[:port]/path') --> 'host[:port]', '/path'
1022# splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'
1023# splitpasswd('user:passwd') -> 'user', 'passwd'
1024# splitport('host:port') --> 'host', 'port'
1025# splitquery('/path?query') --> '/path', 'query'
1026# splittag('/path#tag') --> '/path', 'tag'
1027# splitattr('/path;attr1=value1;attr2=value2;...') ->
1028#   '/path', ['attr1=value1', 'attr2=value2', ...]
1029# splitvalue('attr=value') --> 'attr', 'value'
1030# unquote('abc%20def') -> 'abc def'
1031# quote('abc def') -> 'abc%20def')
1032
1033try:
1034    unicode
1035except NameError:
1036    def _is_unicode(x):
1037        return 0
1038else:
1039    def _is_unicode(x):
1040        return isinstance(x, unicode)
1041
1042def toBytes(url):
1043    """toBytes(u"URL") --> 'URL'."""
1044    # Most URL schemes require ASCII. If that changes, the conversion
1045    # can be relaxed
1046    if _is_unicode(url):
1047        try:
1048            url = url.encode("ASCII")
1049        except UnicodeError:
1050            raise UnicodeError("URL " + repr(url) +
1051                               " contains non-ASCII characters")
1052    return url
1053
1054def unwrap(url):
1055    """unwrap('<URL:type://host/path>') --> 'type://host/path'."""
1056    url = url.strip()
1057    if url[:1] == '<' and url[-1:] == '>':
1058        url = url[1:-1].strip()
1059    if url[:4] == 'URL:': url = url[4:].strip()
1060    return url
1061
1062_typeprog = None
1063def splittype(url):
1064    """splittype('type:opaquestring') --> 'type', 'opaquestring'."""
1065    global _typeprog
1066    if _typeprog is None:
1067        import re
1068        _typeprog = re.compile('^([^/:]+):')
1069
1070    match = _typeprog.match(url)
1071    if match:
1072        scheme = match.group(1)
1073        return scheme.lower(), url[len(scheme) + 1:]
1074    return None, url
1075
1076_hostprog = None
1077def splithost(url):
1078    """splithost('//host[:port]/path') --> 'host[:port]', '/path'."""
1079    global _hostprog
1080    if _hostprog is None:
1081        import re
1082        _hostprog = re.compile('^//([^/?]*)(.*)$')
1083
1084    match = _hostprog.match(url)
1085    if match:
1086        host_port = match.group(1)
1087        path = match.group(2)
1088        if path and not path.startswith('/'):
1089            path = '/' + path
1090        return host_port, path
1091    return None, url
1092
1093_userprog = None
1094def splituser(host):
1095    """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'."""
1096    global _userprog
1097    if _userprog is None:
1098        import re
1099        _userprog = re.compile('^(.*)@(.*)$')
1100
1101    match = _userprog.match(host)
1102    if match: return match.group(1, 2)
1103    return None, host
1104
1105_passwdprog = None
1106def splitpasswd(user):
1107    """splitpasswd('user:passwd') -> 'user', 'passwd'."""
1108    global _passwdprog
1109    if _passwdprog is None:
1110        import re
1111        _passwdprog = re.compile('^([^:]*):(.*)$',re.S)
1112
1113    match = _passwdprog.match(user)
1114    if match: return match.group(1, 2)
1115    return user, None
1116
1117# splittag('/path#tag') --> '/path', 'tag'
1118_portprog = None
1119def splitport(host):
1120    """splitport('host:port') --> 'host', 'port'."""
1121    global _portprog
1122    if _portprog is None:
1123        import re
1124        _portprog = re.compile('^(.*):([0-9]+)$')
1125
1126    match = _portprog.match(host)
1127    if match: return match.group(1, 2)
1128    return host, None
1129
1130_nportprog = None
1131def splitnport(host, defport=-1):
1132    """Split host and port, returning numeric port.
1133    Return given default port if no ':' found; defaults to -1.
1134    Return numerical port if a valid number are found after ':'.
1135    Return None if ':' but not a valid number."""
1136    global _nportprog
1137    if _nportprog is None:
1138        import re
1139        _nportprog = re.compile('^(.*):(.*)$')
1140
1141    match = _nportprog.match(host)
1142    if match:
1143        host, port = match.group(1, 2)
1144        try:
1145            if not port: raise ValueError, "no digits"
1146            nport = int(port)
1147        except ValueError:
1148            nport = None
1149        return host, nport
1150    return host, defport
1151
1152_queryprog = None
1153def splitquery(url):
1154    """splitquery('/path?query') --> '/path', 'query'."""
1155    global _queryprog
1156    if _queryprog is None:
1157        import re
1158        _queryprog = re.compile('^(.*)\?([^?]*)$')
1159
1160    match = _queryprog.match(url)
1161    if match: return match.group(1, 2)
1162    return url, None
1163
1164_tagprog = None
1165def splittag(url):
1166    """splittag('/path#tag') --> '/path', 'tag'."""
1167    global _tagprog
1168    if _tagprog is None:
1169        import re
1170        _tagprog = re.compile('^(.*)#([^#]*)$')
1171
1172    match = _tagprog.match(url)
1173    if match: return match.group(1, 2)
1174    return url, None
1175
1176def splitattr(url):
1177    """splitattr('/path;attr1=value1;attr2=value2;...') ->
1178        '/path', ['attr1=value1', 'attr2=value2', ...]."""
1179    words = url.split(';')
1180    return words[0], words[1:]
1181
1182_valueprog = None
1183def splitvalue(attr):
1184    """splitvalue('attr=value') --> 'attr', 'value'."""
1185    global _valueprog
1186    if _valueprog is None:
1187        import re
1188        _valueprog = re.compile('^([^=]*)=(.*)$')
1189
1190    match = _valueprog.match(attr)
1191    if match: return match.group(1, 2)
1192    return attr, None
1193
1194# urlparse contains a duplicate of this method to avoid a circular import.  If
1195# you update this method, also update the copy in urlparse.  This code
1196# duplication does not exist in Python3.
1197
1198_hexdig = '0123456789ABCDEFabcdef'
1199_hextochr = dict((a + b, chr(int(a + b, 16)))
1200                 for a in _hexdig for b in _hexdig)
1201
1202def unquote(s):
1203    """unquote('abc%20def') -> 'abc def'."""
1204    res = s.split('%')
1205    # fastpath
1206    if len(res) == 1:
1207        return s
1208    s = res[0]
1209    for item in res[1:]:
1210        try:
1211            s += _hextochr[item[:2]] + item[2:]
1212        except KeyError:
1213            s += '%' + item
1214        except UnicodeDecodeError:
1215            s += unichr(int(item[:2], 16)) + item[2:]
1216    return s
1217
1218def unquote_plus(s):
1219    """unquote('%7e/abc+def') -> '~/abc def'"""
1220    s = s.replace('+', ' ')
1221    return unquote(s)
1222
1223always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
1224               'abcdefghijklmnopqrstuvwxyz'
1225               '0123456789' '_.-')
1226_safe_map = {}
1227for i, c in zip(xrange(256), str(bytearray(xrange(256)))):
1228    _safe_map[c] = c if (i < 128 and c in always_safe) else '%{:02X}'.format(i)
1229_safe_quoters = {}
1230
1231def quote(s, safe='/'):
1232    """quote('abc def') -> 'abc%20def'
1233
1234    Each part of a URL, e.g. the path info, the query, etc., has a
1235    different set of reserved characters that must be quoted.
1236
1237    RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
1238    the following reserved characters.
1239
1240    reserved    = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
1241                  "$" | ","
1242
1243    Each of these characters is reserved in some component of a URL,
1244    but not necessarily in all of them.
1245
1246    By default, the quote function is intended for quoting the path
1247    section of a URL.  Thus, it will not encode '/'.  This character
1248    is reserved, but in typical usage the quote function is being
1249    called on a path where the existing slash characters are used as
1250    reserved characters.
1251    """
1252    # fastpath
1253    if not s:
1254        if s is None:
1255            raise TypeError('None object cannot be quoted')
1256        return s
1257    cachekey = (safe, always_safe)
1258    try:
1259        (quoter, safe) = _safe_quoters[cachekey]
1260    except KeyError:
1261        safe_map = _safe_map.copy()
1262        safe_map.update([(c, c) for c in safe])
1263        quoter = safe_map.__getitem__
1264        safe = always_safe + safe
1265        _safe_quoters[cachekey] = (quoter, safe)
1266    if not s.rstrip(safe):
1267        return s
1268    return ''.join(map(quoter, s))
1269
1270def quote_plus(s, safe=''):
1271    """Quote the query fragment of a URL; replacing ' ' with '+'"""
1272    if ' ' in s:
1273        s = quote(s, safe + ' ')
1274        return s.replace(' ', '+')
1275    return quote(s, safe)
1276
1277def urlencode(query, doseq=0):
1278    """Encode a sequence of two-element tuples or dictionary into a URL query string.
1279
1280    If any values in the query arg are sequences and doseq is true, each
1281    sequence element is converted to a separate parameter.
1282
1283    If the query arg is a sequence of two-element tuples, the order of the
1284    parameters in the output will match the order of parameters in the
1285    input.
1286    """
1287
1288    if hasattr(query,"items"):
1289        # mapping objects
1290        query = query.items()
1291    else:
1292        # it's a bother at times that strings and string-like objects are
1293        # sequences...
1294        try:
1295            # non-sequence items should not work with len()
1296            # non-empty strings will fail this
1297            if len(query) and not isinstance(query[0], tuple):
1298                raise TypeError
1299            # zero-length sequences of all types will get here and succeed,
1300            # but that's a minor nit - since the original implementation
1301            # allowed empty dicts that type of behavior probably should be
1302            # preserved for consistency
1303        except TypeError:
1304            ty,va,tb = sys.exc_info()
1305            raise TypeError, "not a valid non-string sequence or mapping object", tb
1306
1307    l = []
1308    if not doseq:
1309        # preserve old behavior
1310        for k, v in query:
1311            k = quote_plus(str(k))
1312            v = quote_plus(str(v))
1313            l.append(k + '=' + v)
1314    else:
1315        for k, v in query:
1316            k = quote_plus(str(k))
1317            if isinstance(v, str):
1318                v = quote_plus(v)
1319                l.append(k + '=' + v)
1320            elif _is_unicode(v):
1321                # is there a reasonable way to convert to ASCII?
1322                # encode generates a string, but "replace" or "ignore"
1323                # lose information and "strict" can raise UnicodeError
1324                v = quote_plus(v.encode("ASCII","replace"))
1325                l.append(k + '=' + v)
1326            else:
1327                try:
1328                    # is this a sufficient test for sequence-ness?
1329                    len(v)
1330                except TypeError:
1331                    # not a sequence
1332                    v = quote_plus(str(v))
1333                    l.append(k + '=' + v)
1334                else:
1335                    # loop over the sequence
1336                    for elt in v:
1337                        l.append(k + '=' + quote_plus(str(elt)))
1338    return '&'.join(l)
1339
1340# Proxy handling
1341def getproxies_environment():
1342    """Return a dictionary of scheme -> proxy server URL mappings.
1343
1344    Scan the environment for variables named <scheme>_proxy;
1345    this seems to be the standard convention.  If you need a
1346    different way, you can pass a proxies dictionary to the
1347    [Fancy]URLopener constructor.
1348
1349    """
1350    proxies = {}
1351    for name, value in os.environ.items():
1352        name = name.lower()
1353        if value and name[-6:] == '_proxy':
1354            proxies[name[:-6]] = value
1355    return proxies
1356
1357def proxy_bypass_environment(host):
1358    """Test if proxies should not be used for a particular host.
1359
1360    Checks the environment for a variable named no_proxy, which should
1361    be a list of DNS suffixes separated by commas, or '*' for all hosts.
1362    """
1363    no_proxy = os.environ.get('no_proxy', '') or os.environ.get('NO_PROXY', '')
1364    # '*' is special case for always bypass
1365    if no_proxy == '*':
1366        return 1
1367    # strip port off host
1368    hostonly, port = splitport(host)
1369    # check if the host ends with any of the DNS suffixes
1370    no_proxy_list = [proxy.strip() for proxy in no_proxy.split(',')]
1371    for name in no_proxy_list:
1372        if name and (hostonly.endswith(name) or host.endswith(name)):
1373            return 1
1374    # otherwise, don't bypass
1375    return 0
1376
1377
1378if sys.platform == 'darwin':
1379    from _scproxy import _get_proxy_settings, _get_proxies
1380
1381    def proxy_bypass_macosx_sysconf(host):
1382        """
1383        Return True iff this host shouldn't be accessed using a proxy
1384
1385        This function uses the MacOSX framework SystemConfiguration
1386        to fetch the proxy information.
1387        """
1388        import re
1389        import socket
1390        from fnmatch import fnmatch
1391
1392        hostonly, port = splitport(host)
1393
1394        def ip2num(ipAddr):
1395            parts = ipAddr.split('.')
1396            parts = map(int, parts)
1397            if len(parts) != 4:
1398                parts = (parts + [0, 0, 0, 0])[:4]
1399            return (parts[0] << 24) | (parts[1] << 16) | (parts[2] << 8) | parts[3]
1400
1401        proxy_settings = _get_proxy_settings()
1402
1403        # Check for simple host names:
1404        if '.' not in host:
1405            if proxy_settings['exclude_simple']:
1406                return True
1407
1408        hostIP = None
1409
1410        for value in proxy_settings.get('exceptions', ()):
1411            # Items in the list are strings like these: *.local, 169.254/16
1412            if not value: continue
1413
1414            m = re.match(r"(\d+(?:\.\d+)*)(/\d+)?", value)
1415            if m is not None:
1416                if hostIP is None:
1417                    try:
1418                        hostIP = socket.gethostbyname(hostonly)
1419                        hostIP = ip2num(hostIP)
1420                    except socket.error:
1421                        continue
1422
1423                base = ip2num(m.group(1))
1424                mask = m.group(2)
1425                if mask is None:
1426                    mask = 8 * (m.group(1).count('.') + 1)
1427
1428                else:
1429                    mask = int(mask[1:])
1430                mask = 32 - mask
1431
1432                if (hostIP >> mask) == (base >> mask):
1433                    return True
1434
1435            elif fnmatch(host, value):
1436                return True
1437
1438        return False
1439
1440    def getproxies_macosx_sysconf():
1441        """Return a dictionary of scheme -> proxy server URL mappings.
1442
1443        This function uses the MacOSX framework SystemConfiguration
1444        to fetch the proxy information.
1445        """
1446        return _get_proxies()
1447
1448    def proxy_bypass(host):
1449        if getproxies_environment():
1450            return proxy_bypass_environment(host)
1451        else:
1452            return proxy_bypass_macosx_sysconf(host)
1453
1454    def getproxies():
1455        return getproxies_environment() or getproxies_macosx_sysconf()
1456
1457elif os.name == 'nt':
1458    def getproxies_registry():
1459        """Return a dictionary of scheme -> proxy server URL mappings.
1460
1461        Win32 uses the registry to store proxies.
1462
1463        """
1464        proxies = {}
1465        try:
1466            import _winreg
1467        except ImportError:
1468            # Std module, so should be around - but you never know!
1469            return proxies
1470        try:
1471            internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
1472                r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
1473            proxyEnable = _winreg.QueryValueEx(internetSettings,
1474                                               'ProxyEnable')[0]
1475            if proxyEnable:
1476                # Returned as Unicode but problems if not converted to ASCII
1477                proxyServer = str(_winreg.QueryValueEx(internetSettings,
1478                                                       'ProxyServer')[0])
1479                if '=' in proxyServer:
1480                    # Per-protocol settings
1481                    for p in proxyServer.split(';'):
1482                        protocol, address = p.split('=', 1)
1483                        # See if address has a type:// prefix
1484                        import re
1485                        if not re.match('^([^/:]+)://', address):
1486                            address = '%s://%s' % (protocol, address)
1487                        proxies[protocol] = address
1488                else:
1489                    # Use one setting for all protocols
1490                    if proxyServer[:5] == 'http:':
1491                        proxies['http'] = proxyServer
1492                    else:
1493                        proxies['http'] = 'http://%s' % proxyServer
1494                        proxies['https'] = 'https://%s' % proxyServer
1495                        proxies['ftp'] = 'ftp://%s' % proxyServer
1496            internetSettings.Close()
1497        except (WindowsError, ValueError, TypeError):
1498            # Either registry key not found etc, or the value in an
1499            # unexpected format.
1500            # proxies already set up to be empty so nothing to do
1501            pass
1502        return proxies
1503
1504    def getproxies():
1505        """Return a dictionary of scheme -> proxy server URL mappings.
1506
1507        Returns settings gathered from the environment, if specified,
1508        or the registry.
1509
1510        """
1511        return getproxies_environment() or getproxies_registry()
1512
1513    def proxy_bypass_registry(host):
1514        try:
1515            import _winreg
1516            import re
1517        except ImportError:
1518            # Std modules, so should be around - but you never know!
1519            return 0
1520        try:
1521            internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
1522                r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
1523            proxyEnable = _winreg.QueryValueEx(internetSettings,
1524                                               'ProxyEnable')[0]
1525            proxyOverride = str(_winreg.QueryValueEx(internetSettings,
1526                                                     'ProxyOverride')[0])
1527            # ^^^^ Returned as Unicode but problems if not converted to ASCII
1528        except WindowsError:
1529            return 0
1530        if not proxyEnable or not proxyOverride:
1531            return 0
1532        # try to make a host list from name and IP address.
1533        rawHost, port = splitport(host)
1534        host = [rawHost]
1535        try:
1536            addr = socket.gethostbyname(rawHost)
1537            if addr != rawHost:
1538                host.append(addr)
1539        except socket.error:
1540            pass
1541        try:
1542            fqdn = socket.getfqdn(rawHost)
1543            if fqdn != rawHost:
1544                host.append(fqdn)
1545        except socket.error:
1546            pass
1547        # make a check value list from the registry entry: replace the
1548        # '<local>' string by the localhost entry and the corresponding
1549        # canonical entry.
1550        proxyOverride = proxyOverride.split(';')
1551        # now check if we match one of the registry values.
1552        for test in proxyOverride:
1553            if test == '<local>':
1554                if '.' not in rawHost:
1555                    return 1
1556            test = test.replace(".", r"\.")     # mask dots
1557            test = test.replace("*", r".*")     # change glob sequence
1558            test = test.replace("?", r".")      # change glob char
1559            for val in host:
1560                # print "%s <--> %s" %( test, val )
1561                if re.match(test, val, re.I):
1562                    return 1
1563        return 0
1564
1565    def proxy_bypass(host):
1566        """Return a dictionary of scheme -> proxy server URL mappings.
1567
1568        Returns settings gathered from the environment, if specified,
1569        or the registry.
1570
1571        """
1572        if getproxies_environment():
1573            return proxy_bypass_environment(host)
1574        else:
1575            return proxy_bypass_registry(host)
1576
1577else:
1578    # By default use environment variables
1579    getproxies = getproxies_environment
1580    proxy_bypass = proxy_bypass_environment
1581
1582# Test and time quote() and unquote()
1583def test1():
1584    s = ''
1585    for i in range(256): s = s + chr(i)
1586    s = s*4
1587    t0 = time.time()
1588    qs = quote(s)
1589    uqs = unquote(qs)
1590    t1 = time.time()
1591    if uqs != s:
1592        print 'Wrong!'
1593    print repr(s)
1594    print repr(qs)
1595    print repr(uqs)
1596    print round(t1 - t0, 3), 'sec'
1597
1598
1599def reporthook(blocknum, blocksize, totalsize):
1600    # Report during remote transfers
1601    print "Block number: %d, Block size: %d, Total size: %d" % (
1602        blocknum, blocksize, totalsize)
1603