1# This code is part of Ansible, but is an independent component.
2# This particular file snippet, and this file snippet only, is BSD licensed.
3# Modules you write using this snippet, which is embedded dynamically by Ansible
4# still belong to the author of the module, and may assign their own license
5# to the complete work.
6#
7# Copyright (c), Michael DeHaan <michael.dehaan@gmail.com>, 2012-2013
8# Copyright (c), Toshio Kuratomi <tkuratomi@ansible.com>, 2015
9#
10# Simplified BSD License (see licenses/simplified_bsd.txt or https://opensource.org/licenses/BSD-2-Clause)
11#
12# The match_hostname function and supporting code is under the terms and
13# conditions of the Python Software Foundation License.  They were taken from
14# the Python3 standard library and adapted for use in Python2.  See comments in the
15# source for which code precisely is under this License.
16#
17# PSF License (see licenses/PSF-license.txt or https://opensource.org/licenses/Python-2.0)
18
19
20'''
21The **urls** utils module offers a replacement for the urllib2 python library.
22
23urllib2 is the python stdlib way to retrieve files from the Internet but it
24lacks some security features (around verifying SSL certificates) that users
25should care about in most situations. Using the functions in this module corrects
26deficiencies in the urllib2 module wherever possible.
27
28There are also third-party libraries (for instance, requests) which can be used
29to replace urllib2 with a more secure library. However, all third party libraries
30require that the library be installed on the managed machine. That is an extra step
31for users making use of a module. If possible, avoid third party libraries by using
32this code instead.
33'''
34
35import atexit
36import base64
37import functools
38import netrc
39import os
40import platform
41import re
42import socket
43import sys
44import tempfile
45import traceback
46
47from contextlib import contextmanager
48
49try:
50    import httplib
51except ImportError:
52    # Python 3
53    import http.client as httplib
54
55import ansible.module_utils.six.moves.http_cookiejar as cookiejar
56import ansible.module_utils.six.moves.urllib.request as urllib_request
57import ansible.module_utils.six.moves.urllib.error as urllib_error
58
59from ansible.module_utils.six import PY3
60
61from ansible.module_utils.basic import get_distribution
62from ansible.module_utils._text import to_bytes, to_native, to_text
63
64try:
65    # python3
66    import urllib.request as urllib_request
67    from urllib.request import AbstractHTTPHandler
68except ImportError:
69    # python2
70    import urllib2 as urllib_request
71    from urllib2 import AbstractHTTPHandler
72
73urllib_request.HTTPRedirectHandler.http_error_308 = urllib_request.HTTPRedirectHandler.http_error_307
74
75try:
76    from ansible.module_utils.six.moves.urllib.parse import urlparse, urlunparse
77    HAS_URLPARSE = True
78except Exception:
79    HAS_URLPARSE = False
80
81try:
82    import ssl
83    HAS_SSL = True
84except Exception:
85    HAS_SSL = False
86
87try:
88    # SNI Handling needs python2.7.9's SSLContext
89    from ssl import create_default_context, SSLContext
90    HAS_SSLCONTEXT = True
91except ImportError:
92    HAS_SSLCONTEXT = False
93
94# SNI Handling for python < 2.7.9 with urllib3 support
95try:
96    # urllib3>=1.15
97    HAS_URLLIB3_SSL_WRAP_SOCKET = False
98    try:
99        from urllib3.contrib.pyopenssl import PyOpenSSLContext
100    except ImportError:
101        from requests.packages.urllib3.contrib.pyopenssl import PyOpenSSLContext
102    HAS_URLLIB3_PYOPENSSLCONTEXT = True
103except ImportError:
104    # urllib3<1.15,>=1.6
105    HAS_URLLIB3_PYOPENSSLCONTEXT = False
106    try:
107        try:
108            from urllib3.contrib.pyopenssl import ssl_wrap_socket
109        except ImportError:
110            from requests.packages.urllib3.contrib.pyopenssl import ssl_wrap_socket
111        HAS_URLLIB3_SSL_WRAP_SOCKET = True
112    except ImportError:
113        pass
114
115# Select a protocol that includes all secure tls protocols
116# Exclude insecure ssl protocols if possible
117
118if HAS_SSL:
119    # If we can't find extra tls methods, ssl.PROTOCOL_TLSv1 is sufficient
120    PROTOCOL = ssl.PROTOCOL_TLSv1
121if not HAS_SSLCONTEXT and HAS_SSL:
122    try:
123        import ctypes
124        import ctypes.util
125    except ImportError:
126        # python 2.4 (likely rhel5 which doesn't have tls1.1 support in its openssl)
127        pass
128    else:
129        libssl_name = ctypes.util.find_library('ssl')
130        libssl = ctypes.CDLL(libssl_name)
131        for method in ('TLSv1_1_method', 'TLSv1_2_method'):
132            try:
133                libssl[method]
134                # Found something - we'll let openssl autonegotiate and hope
135                # the server has disabled sslv2 and 3.  best we can do.
136                PROTOCOL = ssl.PROTOCOL_SSLv23
137                break
138            except AttributeError:
139                pass
140        del libssl
141
142
143# The following makes it easier for us to script updates of the bundled backports.ssl_match_hostname
144# The bundled backports.ssl_match_hostname should really be moved into its own file for processing
145_BUNDLED_METADATA = {"pypi_name": "backports.ssl_match_hostname", "version": "3.7.0.1"}
146
147LOADED_VERIFY_LOCATIONS = set()
148
149HAS_MATCH_HOSTNAME = True
150try:
151    from ssl import match_hostname, CertificateError
152except ImportError:
153    try:
154        from backports.ssl_match_hostname import match_hostname, CertificateError
155    except ImportError:
156        HAS_MATCH_HOSTNAME = False
157
158
159try:
160    import urllib_gssapi
161    HAS_GSSAPI = True
162except ImportError:
163    HAS_GSSAPI = False
164
165if not HAS_MATCH_HOSTNAME:
166    # The following block of code is under the terms and conditions of the
167    # Python Software Foundation License
168
169    """The match_hostname() function from Python 3.4, essential when using SSL."""
170
171    try:
172        # Divergence: Python-3.7+'s _ssl has this exception type but older Pythons do not
173        from _ssl import SSLCertVerificationError
174        CertificateError = SSLCertVerificationError
175    except ImportError:
176        class CertificateError(ValueError):
177            pass
178
179    def _dnsname_match(dn, hostname):
180        """Matching according to RFC 6125, section 6.4.3
181
182        - Hostnames are compared lower case.
183        - For IDNA, both dn and hostname must be encoded as IDN A-label (ACE).
184        - Partial wildcards like 'www*.example.org', multiple wildcards, sole
185          wildcard or wildcards in labels other then the left-most label are not
186          supported and a CertificateError is raised.
187        - A wildcard must match at least one character.
188        """
189        if not dn:
190            return False
191
192        wildcards = dn.count('*')
193        # speed up common case w/o wildcards
194        if not wildcards:
195            return dn.lower() == hostname.lower()
196
197        if wildcards > 1:
198            # Divergence .format() to percent formatting for Python < 2.6
199            raise CertificateError(
200                "too many wildcards in certificate DNS name: %s" % repr(dn))
201
202        dn_leftmost, sep, dn_remainder = dn.partition('.')
203
204        if '*' in dn_remainder:
205            # Only match wildcard in leftmost segment.
206            # Divergence .format() to percent formatting for Python < 2.6
207            raise CertificateError(
208                "wildcard can only be present in the leftmost label: "
209                "%s." % repr(dn))
210
211        if not sep:
212            # no right side
213            # Divergence .format() to percent formatting for Python < 2.6
214            raise CertificateError(
215                "sole wildcard without additional labels are not support: "
216                "%s." % repr(dn))
217
218        if dn_leftmost != '*':
219            # no partial wildcard matching
220            # Divergence .format() to percent formatting for Python < 2.6
221            raise CertificateError(
222                "partial wildcards in leftmost label are not supported: "
223                "%s." % repr(dn))
224
225        hostname_leftmost, sep, hostname_remainder = hostname.partition('.')
226        if not hostname_leftmost or not sep:
227            # wildcard must match at least one char
228            return False
229        return dn_remainder.lower() == hostname_remainder.lower()
230
231    def _inet_paton(ipname):
232        """Try to convert an IP address to packed binary form
233
234        Supports IPv4 addresses on all platforms and IPv6 on platforms with IPv6
235        support.
236        """
237        # inet_aton() also accepts strings like '1'
238        # Divergence: We make sure we have native string type for all python versions
239        try:
240            b_ipname = to_bytes(ipname, errors='strict')
241        except UnicodeError:
242            raise ValueError("%s must be an all-ascii string." % repr(ipname))
243
244        # Set ipname in native string format
245        if sys.version_info < (3,):
246            n_ipname = b_ipname
247        else:
248            n_ipname = ipname
249
250        if n_ipname.count('.') == 3:
251            try:
252                return socket.inet_aton(n_ipname)
253            # Divergence: OSError on late python3.  socket.error earlier.
254            # Null bytes generate ValueError on python3(we want to raise
255            # ValueError anyway), TypeError # earlier
256            except (OSError, socket.error, TypeError):
257                pass
258
259        try:
260            return socket.inet_pton(socket.AF_INET6, n_ipname)
261        # Divergence: OSError on late python3.  socket.error earlier.
262        # Null bytes generate ValueError on python3(we want to raise
263        # ValueError anyway), TypeError # earlier
264        except (OSError, socket.error, TypeError):
265            # Divergence .format() to percent formatting for Python < 2.6
266            raise ValueError("%s is neither an IPv4 nor an IP6 "
267                             "address." % repr(ipname))
268        except AttributeError:
269            # AF_INET6 not available
270            pass
271
272        # Divergence .format() to percent formatting for Python < 2.6
273        raise ValueError("%s is not an IPv4 address." % repr(ipname))
274
275    def _ipaddress_match(ipname, host_ip):
276        """Exact matching of IP addresses.
277
278        RFC 6125 explicitly doesn't define an algorithm for this
279        (section 1.7.2 - "Out of Scope").
280        """
281        # OpenSSL may add a trailing newline to a subjectAltName's IP address
282        ip = _inet_paton(ipname.rstrip())
283        return ip == host_ip
284
285    def match_hostname(cert, hostname):
286        """Verify that *cert* (in decoded format as returned by
287        SSLSocket.getpeercert()) matches the *hostname*.  RFC 2818 and RFC 6125
288        rules are followed.
289
290        The function matches IP addresses rather than dNSNames if hostname is a
291        valid ipaddress string. IPv4 addresses are supported on all platforms.
292        IPv6 addresses are supported on platforms with IPv6 support (AF_INET6
293        and inet_pton).
294
295        CertificateError is raised on failure. On success, the function
296        returns nothing.
297        """
298        if not cert:
299            raise ValueError("empty or no certificate, match_hostname needs a "
300                             "SSL socket or SSL context with either "
301                             "CERT_OPTIONAL or CERT_REQUIRED")
302        try:
303            # Divergence: Deal with hostname as bytes
304            host_ip = _inet_paton(to_text(hostname, errors='strict'))
305        except UnicodeError:
306            # Divergence: Deal with hostname as byte strings.
307            # IP addresses should be all ascii, so we consider it not
308            # an IP address if this fails
309            host_ip = None
310        except ValueError:
311            # Not an IP address (common case)
312            host_ip = None
313        dnsnames = []
314        san = cert.get('subjectAltName', ())
315        for key, value in san:
316            if key == 'DNS':
317                if host_ip is None and _dnsname_match(value, hostname):
318                    return
319                dnsnames.append(value)
320            elif key == 'IP Address':
321                if host_ip is not None and _ipaddress_match(value, host_ip):
322                    return
323                dnsnames.append(value)
324        if not dnsnames:
325            # The subject is only checked when there is no dNSName entry
326            # in subjectAltName
327            for sub in cert.get('subject', ()):
328                for key, value in sub:
329                    # XXX according to RFC 2818, the most specific Common Name
330                    # must be used.
331                    if key == 'commonName':
332                        if _dnsname_match(value, hostname):
333                            return
334                        dnsnames.append(value)
335        if len(dnsnames) > 1:
336            raise CertificateError("hostname %r doesn't match either of %s" % (hostname, ', '.join(map(repr, dnsnames))))
337        elif len(dnsnames) == 1:
338            raise CertificateError("hostname %r doesn't match %r" % (hostname, dnsnames[0]))
339        else:
340            raise CertificateError("no appropriate commonName or subjectAltName fields were found")
341
342    # End of Python Software Foundation Licensed code
343
344    HAS_MATCH_HOSTNAME = True
345
346
347# This is a dummy cacert provided for macOS since you need at least 1
348# ca cert, regardless of validity, for Python on macOS to use the
349# keychain functionality in OpenSSL for validating SSL certificates.
350# See: http://mercurial.selenic.com/wiki/CACertificates#Mac_OS_X_10.6_and_higher
351b_DUMMY_CA_CERT = b"""-----BEGIN CERTIFICATE-----
352MIICvDCCAiWgAwIBAgIJAO8E12S7/qEpMA0GCSqGSIb3DQEBBQUAMEkxCzAJBgNV
353BAYTAlVTMRcwFQYDVQQIEw5Ob3J0aCBDYXJvbGluYTEPMA0GA1UEBxMGRHVyaGFt
354MRAwDgYDVQQKEwdBbnNpYmxlMB4XDTE0MDMxODIyMDAyMloXDTI0MDMxNTIyMDAy
355MlowSTELMAkGA1UEBhMCVVMxFzAVBgNVBAgTDk5vcnRoIENhcm9saW5hMQ8wDQYD
356VQQHEwZEdXJoYW0xEDAOBgNVBAoTB0Fuc2libGUwgZ8wDQYJKoZIhvcNAQEBBQAD
357gY0AMIGJAoGBANtvpPq3IlNlRbCHhZAcP6WCzhc5RbsDqyh1zrkmLi0GwcQ3z/r9
358gaWfQBYhHpobK2Tiq11TfraHeNB3/VfNImjZcGpN8Fl3MWwu7LfVkJy3gNNnxkA1
3594Go0/LmIvRFHhbzgfuo9NFgjPmmab9eqXJceqZIlz2C8xA7EeG7ku0+vAgMBAAGj
360gaswgagwHQYDVR0OBBYEFPnN1nPRqNDXGlCqCvdZchRNi/FaMHkGA1UdIwRyMHCA
361FPnN1nPRqNDXGlCqCvdZchRNi/FaoU2kSzBJMQswCQYDVQQGEwJVUzEXMBUGA1UE
362CBMOTm9ydGggQ2Fyb2xpbmExDzANBgNVBAcTBkR1cmhhbTEQMA4GA1UEChMHQW5z
363aWJsZYIJAO8E12S7/qEpMAwGA1UdEwQFMAMBAf8wDQYJKoZIhvcNAQEFBQADgYEA
364MUB80IR6knq9K/tY+hvPsZer6eFMzO3JGkRFBh2kn6JdMDnhYGX7AXVHGflrwNQH
365qFy+aenWXsC0ZvrikFxbQnX8GVtDADtVznxOi7XzFw7JOxdsVrpXgSN0eh0aMzvV
366zKPZsZ2miVGclicJHzm5q080b1p/sZtuKIEZk6vZqEg=
367-----END CERTIFICATE-----
368"""
369
370#
371# Exceptions
372#
373
374
375class ConnectionError(Exception):
376    """Failed to connect to the server"""
377    pass
378
379
380class ProxyError(ConnectionError):
381    """Failure to connect because of a proxy"""
382    pass
383
384
385class SSLValidationError(ConnectionError):
386    """Failure to connect due to SSL validation failing"""
387    pass
388
389
390class NoSSLError(SSLValidationError):
391    """Needed to connect to an HTTPS url but no ssl library available to verify the certificate"""
392    pass
393
394
395# Some environments (Google Compute Engine's CoreOS deploys) do not compile
396# against openssl and thus do not have any HTTPS support.
397CustomHTTPSConnection = None
398CustomHTTPSHandler = None
399HTTPSClientAuthHandler = None
400UnixHTTPSConnection = None
401if hasattr(httplib, 'HTTPSConnection') and hasattr(urllib_request, 'HTTPSHandler'):
402    class CustomHTTPSConnection(httplib.HTTPSConnection):
403        def __init__(self, *args, **kwargs):
404            httplib.HTTPSConnection.__init__(self, *args, **kwargs)
405            self.context = None
406            if HAS_SSLCONTEXT:
407                self.context = self._context
408            elif HAS_URLLIB3_PYOPENSSLCONTEXT:
409                self.context = self._context = PyOpenSSLContext(PROTOCOL)
410            if self.context and self.cert_file:
411                self.context.load_cert_chain(self.cert_file, self.key_file)
412
413        def connect(self):
414            "Connect to a host on a given (SSL) port."
415
416            if hasattr(self, 'source_address'):
417                sock = socket.create_connection((self.host, self.port), self.timeout, self.source_address)
418            else:
419                sock = socket.create_connection((self.host, self.port), self.timeout)
420
421            server_hostname = self.host
422            # Note: self._tunnel_host is not available on py < 2.6 but this code
423            # isn't used on py < 2.6 (lack of create_connection)
424            if self._tunnel_host:
425                self.sock = sock
426                self._tunnel()
427                server_hostname = self._tunnel_host
428
429            if HAS_SSLCONTEXT or HAS_URLLIB3_PYOPENSSLCONTEXT:
430                self.sock = self.context.wrap_socket(sock, server_hostname=server_hostname)
431            elif HAS_URLLIB3_SSL_WRAP_SOCKET:
432                self.sock = ssl_wrap_socket(sock, keyfile=self.key_file, cert_reqs=ssl.CERT_NONE, certfile=self.cert_file, ssl_version=PROTOCOL,
433                                            server_hostname=server_hostname)
434            else:
435                self.sock = ssl.wrap_socket(sock, keyfile=self.key_file, certfile=self.cert_file, ssl_version=PROTOCOL)
436
437    class CustomHTTPSHandler(urllib_request.HTTPSHandler):
438
439        def https_open(self, req):
440            kwargs = {}
441            if HAS_SSLCONTEXT:
442                kwargs['context'] = self._context
443            return self.do_open(
444                functools.partial(
445                    CustomHTTPSConnection,
446                    **kwargs
447                ),
448                req
449            )
450
451        https_request = AbstractHTTPHandler.do_request_
452
453    class HTTPSClientAuthHandler(urllib_request.HTTPSHandler):
454        '''Handles client authentication via cert/key
455
456        This is a fairly lightweight extension on HTTPSHandler, and can be used
457        in place of HTTPSHandler
458        '''
459
460        def __init__(self, client_cert=None, client_key=None, unix_socket=None, **kwargs):
461            urllib_request.HTTPSHandler.__init__(self, **kwargs)
462            self.client_cert = client_cert
463            self.client_key = client_key
464            self._unix_socket = unix_socket
465
466        def https_open(self, req):
467            return self.do_open(self._build_https_connection, req)
468
469        def _build_https_connection(self, host, **kwargs):
470            kwargs.update({
471                'cert_file': self.client_cert,
472                'key_file': self.client_key,
473            })
474            try:
475                kwargs['context'] = self._context
476            except AttributeError:
477                pass
478            if self._unix_socket:
479                return UnixHTTPSConnection(self._unix_socket)(host, **kwargs)
480            return httplib.HTTPSConnection(host, **kwargs)
481
482    @contextmanager
483    def unix_socket_patch_httpconnection_connect():
484        '''Monkey patch ``httplib.HTTPConnection.connect`` to be ``UnixHTTPConnection.connect``
485        so that when calling ``super(UnixHTTPSConnection, self).connect()`` we get the
486        correct behavior of creating self.sock for the unix socket
487        '''
488        _connect = httplib.HTTPConnection.connect
489        httplib.HTTPConnection.connect = UnixHTTPConnection.connect
490        yield
491        httplib.HTTPConnection.connect = _connect
492
493    class UnixHTTPSConnection(httplib.HTTPSConnection):
494        def __init__(self, unix_socket):
495            self._unix_socket = unix_socket
496
497        def connect(self):
498            # This method exists simply to ensure we monkeypatch
499            # httplib.HTTPConnection.connect to call UnixHTTPConnection.connect
500            with unix_socket_patch_httpconnection_connect():
501                # Disable pylint check for the super() call. It complains about UnixHTTPSConnection
502                # being a NoneType because of the initial definition above, but it won't actually
503                # be a NoneType when this code runs
504                # pylint: disable=bad-super-call
505                super(UnixHTTPSConnection, self).connect()
506
507        def __call__(self, *args, **kwargs):
508            httplib.HTTPSConnection.__init__(self, *args, **kwargs)
509            return self
510
511
512class UnixHTTPConnection(httplib.HTTPConnection):
513    '''Handles http requests to a unix socket file'''
514
515    def __init__(self, unix_socket):
516        self._unix_socket = unix_socket
517
518    def connect(self):
519        self.sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
520        try:
521            self.sock.connect(self._unix_socket)
522        except OSError as e:
523            raise OSError('Invalid Socket File (%s): %s' % (self._unix_socket, e))
524        if self.timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
525            self.sock.settimeout(self.timeout)
526
527    def __call__(self, *args, **kwargs):
528        httplib.HTTPConnection.__init__(self, *args, **kwargs)
529        return self
530
531
532class UnixHTTPHandler(urllib_request.HTTPHandler):
533    '''Handler for Unix urls'''
534
535    def __init__(self, unix_socket, **kwargs):
536        urllib_request.HTTPHandler.__init__(self, **kwargs)
537        self._unix_socket = unix_socket
538
539    def http_open(self, req):
540        return self.do_open(UnixHTTPConnection(self._unix_socket), req)
541
542
543class ParseResultDottedDict(dict):
544    '''
545    A dict that acts similarly to the ParseResult named tuple from urllib
546    '''
547    def __init__(self, *args, **kwargs):
548        super(ParseResultDottedDict, self).__init__(*args, **kwargs)
549        self.__dict__ = self
550
551    def as_list(self):
552        '''
553        Generate a list from this dict, that looks like the ParseResult named tuple
554        '''
555        return [self.get(k, None) for k in ('scheme', 'netloc', 'path', 'params', 'query', 'fragment')]
556
557
558def generic_urlparse(parts):
559    '''
560    Returns a dictionary of url parts as parsed by urlparse,
561    but accounts for the fact that older versions of that
562    library do not support named attributes (ie. .netloc)
563    '''
564    generic_parts = ParseResultDottedDict()
565    if hasattr(parts, 'netloc'):
566        # urlparse is newer, just read the fields straight
567        # from the parts object
568        generic_parts['scheme'] = parts.scheme
569        generic_parts['netloc'] = parts.netloc
570        generic_parts['path'] = parts.path
571        generic_parts['params'] = parts.params
572        generic_parts['query'] = parts.query
573        generic_parts['fragment'] = parts.fragment
574        generic_parts['username'] = parts.username
575        generic_parts['password'] = parts.password
576        hostname = parts.hostname
577        if hostname and hostname[0] == '[' and '[' in parts.netloc and ']' in parts.netloc:
578            # Py2.6 doesn't parse IPv6 addresses correctly
579            hostname = parts.netloc.split(']')[0][1:].lower()
580        generic_parts['hostname'] = hostname
581
582        try:
583            port = parts.port
584        except ValueError:
585            # Py2.6 doesn't parse IPv6 addresses correctly
586            netloc = parts.netloc.split('@')[-1].split(']')[-1]
587            if ':' in netloc:
588                port = netloc.split(':')[1]
589                if port:
590                    port = int(port)
591            else:
592                port = None
593        generic_parts['port'] = port
594    else:
595        # we have to use indexes, and then parse out
596        # the other parts not supported by indexing
597        generic_parts['scheme'] = parts[0]
598        generic_parts['netloc'] = parts[1]
599        generic_parts['path'] = parts[2]
600        generic_parts['params'] = parts[3]
601        generic_parts['query'] = parts[4]
602        generic_parts['fragment'] = parts[5]
603        # get the username, password, etc.
604        try:
605            netloc_re = re.compile(r'^((?:\w)+(?::(?:\w)+)?@)?([A-Za-z0-9.-]+)(:\d+)?$')
606            match = netloc_re.match(parts[1])
607            auth = match.group(1)
608            hostname = match.group(2)
609            port = match.group(3)
610            if port:
611                # the capture group for the port will include the ':',
612                # so remove it and convert the port to an integer
613                port = int(port[1:])
614            if auth:
615                # the capture group above includes the @, so remove it
616                # and then split it up based on the first ':' found
617                auth = auth[:-1]
618                username, password = auth.split(':', 1)
619            else:
620                username = password = None
621            generic_parts['username'] = username
622            generic_parts['password'] = password
623            generic_parts['hostname'] = hostname
624            generic_parts['port'] = port
625        except Exception:
626            generic_parts['username'] = None
627            generic_parts['password'] = None
628            generic_parts['hostname'] = parts[1]
629            generic_parts['port'] = None
630    return generic_parts
631
632
633class RequestWithMethod(urllib_request.Request):
634    '''
635    Workaround for using DELETE/PUT/etc with urllib2
636    Originally contained in library/net_infrastructure/dnsmadeeasy
637    '''
638
639    def __init__(self, url, method, data=None, headers=None, origin_req_host=None, unverifiable=True):
640        if headers is None:
641            headers = {}
642        self._method = method.upper()
643        urllib_request.Request.__init__(self, url, data, headers, origin_req_host, unverifiable)
644
645    def get_method(self):
646        if self._method:
647            return self._method
648        else:
649            return urllib_request.Request.get_method(self)
650
651
652def RedirectHandlerFactory(follow_redirects=None, validate_certs=True, ca_path=None):
653    """This is a class factory that closes over the value of
654    ``follow_redirects`` so that the RedirectHandler class has access to
655    that value without having to use globals, and potentially cause problems
656    where ``open_url`` or ``fetch_url`` are used multiple times in a module.
657    """
658
659    class RedirectHandler(urllib_request.HTTPRedirectHandler):
660        """This is an implementation of a RedirectHandler to match the
661        functionality provided by httplib2. It will utilize the value of
662        ``follow_redirects`` that is passed into ``RedirectHandlerFactory``
663        to determine how redirects should be handled in urllib2.
664        """
665
666        def redirect_request(self, req, fp, code, msg, hdrs, newurl):
667            if not HAS_SSLCONTEXT:
668                handler = maybe_add_ssl_handler(newurl, validate_certs, ca_path=ca_path)
669                if handler:
670                    urllib_request._opener.add_handler(handler)
671
672            # Preserve urllib2 compatibility
673            if follow_redirects == 'urllib2':
674                return urllib_request.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, hdrs, newurl)
675
676            # Handle disabled redirects
677            elif follow_redirects in ['no', 'none', False]:
678                raise urllib_error.HTTPError(newurl, code, msg, hdrs, fp)
679
680            method = req.get_method()
681
682            # Handle non-redirect HTTP status or invalid follow_redirects
683            if follow_redirects in ['all', 'yes', True]:
684                if code < 300 or code >= 400:
685                    raise urllib_error.HTTPError(req.get_full_url(), code, msg, hdrs, fp)
686            elif follow_redirects == 'safe':
687                if code < 300 or code >= 400 or method not in ('GET', 'HEAD'):
688                    raise urllib_error.HTTPError(req.get_full_url(), code, msg, hdrs, fp)
689            else:
690                raise urllib_error.HTTPError(req.get_full_url(), code, msg, hdrs, fp)
691
692            try:
693                # Python 2-3.3
694                data = req.get_data()
695                origin_req_host = req.get_origin_req_host()
696            except AttributeError:
697                # Python 3.4+
698                data = req.data
699                origin_req_host = req.origin_req_host
700
701            # Be conciliant with URIs containing a space
702            newurl = newurl.replace(' ', '%20')
703
704            # Suport redirect with payload and original headers
705            if code in (307, 308):
706                # Preserve payload and headers
707                headers = req.headers
708            else:
709                # Do not preserve payload and filter headers
710                data = None
711                headers = dict((k, v) for k, v in req.headers.items()
712                               if k.lower() not in ("content-length", "content-type", "transfer-encoding"))
713
714                # http://tools.ietf.org/html/rfc7231#section-6.4.4
715                if code == 303 and method != 'HEAD':
716                    method = 'GET'
717
718                # Do what the browsers do, despite standards...
719                # First, turn 302s into GETs.
720                if code == 302 and method != 'HEAD':
721                    method = 'GET'
722
723                # Second, if a POST is responded to with a 301, turn it into a GET.
724                if code == 301 and method == 'POST':
725                    method = 'GET'
726
727            return RequestWithMethod(newurl,
728                                     method=method,
729                                     headers=headers,
730                                     data=data,
731                                     origin_req_host=origin_req_host,
732                                     unverifiable=True,
733                                     )
734
735    return RedirectHandler
736
737
738def build_ssl_validation_error(hostname, port, paths, exc=None):
739    '''Inteligently build out the SSLValidationError based on what support
740    you have installed
741    '''
742
743    msg = [
744        ('Failed to validate the SSL certificate for %s:%s.'
745         ' Make sure your managed systems have a valid CA'
746         ' certificate installed.')
747    ]
748    if not HAS_SSLCONTEXT:
749        msg.append('If the website serving the url uses SNI you need'
750                   ' python >= 2.7.9 on your managed machine')
751        msg.append(' (the python executable used (%s) is version: %s)' %
752                   (sys.executable, ''.join(sys.version.splitlines())))
753        if not HAS_URLLIB3_PYOPENSSLCONTEXT and not HAS_URLLIB3_SSL_WRAP_SOCKET:
754            msg.append('or you can install the `urllib3`, `pyOpenSSL`,'
755                       ' `ndg-httpsclient`, and `pyasn1` python modules')
756
757        msg.append('to perform SNI verification in python >= 2.6.')
758
759    msg.append('You can use validate_certs=False if you do'
760               ' not need to confirm the servers identity but this is'
761               ' unsafe and not recommended.'
762               ' Paths checked for this platform: %s.')
763
764    if exc:
765        msg.append('The exception msg was: %s.' % to_native(exc))
766
767    raise SSLValidationError(' '.join(msg) % (hostname, port, ", ".join(paths)))
768
769
770def atexit_remove_file(filename):
771    if os.path.exists(filename):
772        try:
773            os.unlink(filename)
774        except Exception:
775            # just ignore if we cannot delete, things should be ok
776            pass
777
778
779class SSLValidationHandler(urllib_request.BaseHandler):
780    '''
781    A custom handler class for SSL validation.
782
783    Based on:
784    http://stackoverflow.com/questions/1087227/validate-ssl-certificates-with-python
785    http://techknack.net/python-urllib2-handlers/
786    '''
787    CONNECT_COMMAND = "CONNECT %s:%s HTTP/1.0\r\n"
788
789    def __init__(self, hostname, port, ca_path=None):
790        self.hostname = hostname
791        self.port = port
792        self.ca_path = ca_path
793
794    def get_ca_certs(self):
795        # tries to find a valid CA cert in one of the
796        # standard locations for the current distribution
797
798        ca_certs = []
799        cadata = bytearray()
800        paths_checked = []
801
802        if self.ca_path:
803            paths_checked = [self.ca_path]
804            with open(to_bytes(self.ca_path, errors='surrogate_or_strict'), 'rb') as f:
805                if HAS_SSLCONTEXT:
806                    cadata.extend(
807                        ssl.PEM_cert_to_DER_cert(
808                            to_native(f.read(), errors='surrogate_or_strict')
809                        )
810                    )
811                else:
812                    ca_certs.append(f.read())
813            return ca_certs, cadata, paths_checked
814
815        if not HAS_SSLCONTEXT:
816            paths_checked.append('/etc/ssl/certs')
817
818        system = to_text(platform.system(), errors='surrogate_or_strict')
819        # build a list of paths to check for .crt/.pem files
820        # based on the platform type
821        if system == u'Linux':
822            paths_checked.append('/etc/pki/ca-trust/extracted/pem')
823            paths_checked.append('/etc/pki/tls/certs')
824            paths_checked.append('/usr/share/ca-certificates/cacert.org')
825        elif system == u'FreeBSD':
826            paths_checked.append('/usr/local/share/certs')
827        elif system == u'OpenBSD':
828            paths_checked.append('/etc/ssl')
829        elif system == u'NetBSD':
830            ca_certs.append('/etc/openssl/certs')
831        elif system == u'SunOS':
832            paths_checked.append('/opt/local/etc/openssl/certs')
833
834        # fall back to a user-deployed cert in a standard
835        # location if the OS platform one is not available
836        paths_checked.append('/usr/local/etc/ansible')
837
838        tmp_path = None
839        if not HAS_SSLCONTEXT:
840            tmp_fd, tmp_path = tempfile.mkstemp()
841            atexit.register(atexit_remove_file, tmp_path)
842
843        # Write the dummy ca cert if we are running on macOS
844        if system == u'Darwin':
845            if HAS_SSLCONTEXT:
846                cadata.extend(
847                    ssl.PEM_cert_to_DER_cert(
848                        to_native(b_DUMMY_CA_CERT, errors='surrogate_or_strict')
849                    )
850                )
851            else:
852                os.write(tmp_fd, b_DUMMY_CA_CERT)
853            # Default Homebrew path for OpenSSL certs
854            paths_checked.append('/usr/local/etc/openssl')
855
856        # for all of the paths, find any  .crt or .pem files
857        # and compile them into single temp file for use
858        # in the ssl check to speed up the test
859        for path in paths_checked:
860            if os.path.exists(path) and os.path.isdir(path):
861                dir_contents = os.listdir(path)
862                for f in dir_contents:
863                    full_path = os.path.join(path, f)
864                    if os.path.isfile(full_path) and os.path.splitext(f)[1] in ('.crt', '.pem'):
865                        try:
866                            if full_path not in LOADED_VERIFY_LOCATIONS:
867                                with open(full_path, 'rb') as cert_file:
868                                    b_cert = cert_file.read()
869                                if HAS_SSLCONTEXT:
870                                    try:
871                                        cadata.extend(
872                                            ssl.PEM_cert_to_DER_cert(
873                                                to_native(b_cert, errors='surrogate_or_strict')
874                                            )
875                                        )
876                                    except Exception:
877                                        continue
878                                else:
879                                    os.write(tmp_fd, b_cert)
880                                    os.write(tmp_fd, b'\n')
881                        except (OSError, IOError):
882                            pass
883
884        if HAS_SSLCONTEXT:
885            default_verify_paths = ssl.get_default_verify_paths()
886            paths_checked[:0] = [default_verify_paths.capath]
887
888        return (tmp_path, cadata, paths_checked)
889
890    def validate_proxy_response(self, response, valid_codes=None):
891        '''
892        make sure we get back a valid code from the proxy
893        '''
894        valid_codes = [200] if valid_codes is None else valid_codes
895
896        try:
897            (http_version, resp_code, msg) = re.match(br'(HTTP/\d\.\d) (\d\d\d) (.*)', response).groups()
898            if int(resp_code) not in valid_codes:
899                raise Exception
900        except Exception:
901            raise ProxyError('Connection to proxy failed')
902
903    def detect_no_proxy(self, url):
904        '''
905        Detect if the 'no_proxy' environment variable is set and honor those locations.
906        '''
907        env_no_proxy = os.environ.get('no_proxy')
908        if env_no_proxy:
909            env_no_proxy = env_no_proxy.split(',')
910            netloc = urlparse(url).netloc
911
912            for host in env_no_proxy:
913                if netloc.endswith(host) or netloc.split(':')[0].endswith(host):
914                    # Our requested URL matches something in no_proxy, so don't
915                    # use the proxy for this
916                    return False
917        return True
918
919    def make_context(self, cafile, cadata):
920        cafile = self.ca_path or cafile
921        if self.ca_path:
922            cadata = None
923        else:
924            cadata = cadata or None
925
926        if HAS_SSLCONTEXT:
927            context = create_default_context(cafile=cafile)
928        elif HAS_URLLIB3_PYOPENSSLCONTEXT:
929            context = PyOpenSSLContext(PROTOCOL)
930        else:
931            raise NotImplementedError('Host libraries are too old to support creating an sslcontext')
932
933        if cafile or cadata:
934            context.load_verify_locations(cafile=cafile, cadata=cadata)
935        return context
936
937    def http_request(self, req):
938        tmp_ca_cert_path, cadata, paths_checked = self.get_ca_certs()
939
940        # Detect if 'no_proxy' environment variable is set and if our URL is included
941        use_proxy = self.detect_no_proxy(req.get_full_url())
942        https_proxy = os.environ.get('https_proxy')
943
944        context = None
945        try:
946            context = self.make_context(tmp_ca_cert_path, cadata)
947        except NotImplementedError:
948            # We'll make do with no context below
949            pass
950
951        try:
952            if use_proxy and https_proxy:
953                proxy_parts = generic_urlparse(urlparse(https_proxy))
954                port = proxy_parts.get('port') or 443
955                proxy_hostname = proxy_parts.get('hostname', None)
956                if proxy_hostname is None or proxy_parts.get('scheme') == '':
957                    raise ProxyError("Failed to parse https_proxy environment variable."
958                                     " Please make sure you export https proxy as 'https_proxy=<SCHEME>://<IP_ADDRESS>:<PORT>'")
959
960                s = socket.create_connection((proxy_hostname, port))
961                if proxy_parts.get('scheme') == 'http':
962                    s.sendall(to_bytes(self.CONNECT_COMMAND % (self.hostname, self.port), errors='surrogate_or_strict'))
963                    if proxy_parts.get('username'):
964                        credentials = "%s:%s" % (proxy_parts.get('username', ''), proxy_parts.get('password', ''))
965                        s.sendall(b'Proxy-Authorization: Basic %s\r\n' % base64.b64encode(to_bytes(credentials, errors='surrogate_or_strict')).strip())
966                    s.sendall(b'\r\n')
967                    connect_result = b""
968                    while connect_result.find(b"\r\n\r\n") <= 0:
969                        connect_result += s.recv(4096)
970                        # 128 kilobytes of headers should be enough for everyone.
971                        if len(connect_result) > 131072:
972                            raise ProxyError('Proxy sent too verbose headers. Only 128KiB allowed.')
973                    self.validate_proxy_response(connect_result)
974                    if context:
975                        ssl_s = context.wrap_socket(s, server_hostname=self.hostname)
976                    elif HAS_URLLIB3_SSL_WRAP_SOCKET:
977                        ssl_s = ssl_wrap_socket(s, ca_certs=tmp_ca_cert_path, cert_reqs=ssl.CERT_REQUIRED, ssl_version=PROTOCOL, server_hostname=self.hostname)
978                    else:
979                        ssl_s = ssl.wrap_socket(s, ca_certs=tmp_ca_cert_path, cert_reqs=ssl.CERT_REQUIRED, ssl_version=PROTOCOL)
980                        match_hostname(ssl_s.getpeercert(), self.hostname)
981                else:
982                    raise ProxyError('Unsupported proxy scheme: %s. Currently ansible only supports HTTP proxies.' % proxy_parts.get('scheme'))
983            else:
984                s = socket.create_connection((self.hostname, self.port))
985                if context:
986                    ssl_s = context.wrap_socket(s, server_hostname=self.hostname)
987                elif HAS_URLLIB3_SSL_WRAP_SOCKET:
988                    ssl_s = ssl_wrap_socket(s, ca_certs=tmp_ca_cert_path, cert_reqs=ssl.CERT_REQUIRED, ssl_version=PROTOCOL, server_hostname=self.hostname)
989                else:
990                    ssl_s = ssl.wrap_socket(s, ca_certs=tmp_ca_cert_path, cert_reqs=ssl.CERT_REQUIRED, ssl_version=PROTOCOL)
991                    match_hostname(ssl_s.getpeercert(), self.hostname)
992            # close the ssl connection
993            # ssl_s.unwrap()
994            s.close()
995        except (ssl.SSLError, CertificateError) as e:
996            build_ssl_validation_error(self.hostname, self.port, paths_checked, e)
997        except socket.error as e:
998            raise ConnectionError('Failed to connect to %s at port %s: %s' % (self.hostname, self.port, to_native(e)))
999
1000        return req
1001
1002    https_request = http_request
1003
1004
1005def maybe_add_ssl_handler(url, validate_certs, ca_path=None):
1006    parsed = generic_urlparse(urlparse(url))
1007    if parsed.scheme == 'https' and validate_certs:
1008        if not HAS_SSL:
1009            raise NoSSLError('SSL validation is not available in your version of python. You can use validate_certs=False,'
1010                             ' however this is unsafe and not recommended')
1011
1012        # create the SSL validation handler and
1013        # add it to the list of handlers
1014        return SSLValidationHandler(parsed.hostname, parsed.port or 443, ca_path=ca_path)
1015
1016
1017def rfc2822_date_string(timetuple, zone='-0000'):
1018    """Accepts a timetuple and optional zone which defaults to ``-0000``
1019    and returns a date string as specified by RFC 2822, e.g.:
1020
1021    Fri, 09 Nov 2001 01:08:47 -0000
1022
1023    Copied from email.utils.formatdate and modified for separate use
1024    """
1025    return '%s, %02d %s %04d %02d:%02d:%02d %s' % (
1026        ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'][timetuple[6]],
1027        timetuple[2],
1028        ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
1029         'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'][timetuple[1] - 1],
1030        timetuple[0], timetuple[3], timetuple[4], timetuple[5],
1031        zone)
1032
1033
1034class Request:
1035    def __init__(self, headers=None, use_proxy=True, force=False, timeout=10, validate_certs=True,
1036                 url_username=None, url_password=None, http_agent=None, force_basic_auth=False,
1037                 follow_redirects='urllib2', client_cert=None, client_key=None, cookies=None, unix_socket=None,
1038                 ca_path=None):
1039        """This class works somewhat similarly to the ``Session`` class of from requests
1040        by defining a cookiejar that an be used across requests as well as cascaded defaults that
1041        can apply to repeated requests
1042
1043        For documentation of params, see ``Request.open``
1044
1045        >>> from ansible.module_utils.urls import Request
1046        >>> r = Request()
1047        >>> r.open('GET', 'http://httpbin.org/cookies/set?k1=v1').read()
1048        '{\n  "cookies": {\n    "k1": "v1"\n  }\n}\n'
1049        >>> r = Request(url_username='user', url_password='passwd')
1050        >>> r.open('GET', 'http://httpbin.org/basic-auth/user/passwd').read()
1051        '{\n  "authenticated": true, \n  "user": "user"\n}\n'
1052        >>> r = Request(headers=dict(foo='bar'))
1053        >>> r.open('GET', 'http://httpbin.org/get', headers=dict(baz='qux')).read()
1054
1055        """
1056
1057        self.headers = headers or {}
1058        if not isinstance(self.headers, dict):
1059            raise ValueError("headers must be a dict: %r" % self.headers)
1060        self.use_proxy = use_proxy
1061        self.force = force
1062        self.timeout = timeout
1063        self.validate_certs = validate_certs
1064        self.url_username = url_username
1065        self.url_password = url_password
1066        self.http_agent = http_agent
1067        self.force_basic_auth = force_basic_auth
1068        self.follow_redirects = follow_redirects
1069        self.client_cert = client_cert
1070        self.client_key = client_key
1071        self.unix_socket = unix_socket
1072        self.ca_path = ca_path
1073        if isinstance(cookies, cookiejar.CookieJar):
1074            self.cookies = cookies
1075        else:
1076            self.cookies = cookiejar.CookieJar()
1077
1078    def _fallback(self, value, fallback):
1079        if value is None:
1080            return fallback
1081        return value
1082
1083    def open(self, method, url, data=None, headers=None, use_proxy=None,
1084             force=None, last_mod_time=None, timeout=None, validate_certs=None,
1085             url_username=None, url_password=None, http_agent=None,
1086             force_basic_auth=None, follow_redirects=None,
1087             client_cert=None, client_key=None, cookies=None, use_gssapi=False,
1088             unix_socket=None, ca_path=None, unredirected_headers=None):
1089        """
1090        Sends a request via HTTP(S) or FTP using urllib2 (Python2) or urllib (Python3)
1091
1092        Does not require the module environment
1093
1094        Returns :class:`HTTPResponse` object.
1095
1096        :arg method: method for the request
1097        :arg url: URL to request
1098
1099        :kwarg data: (optional) bytes, or file-like object to send
1100            in the body of the request
1101        :kwarg headers: (optional) Dictionary of HTTP Headers to send with the
1102            request
1103        :kwarg use_proxy: (optional) Boolean of whether or not to use proxy
1104        :kwarg force: (optional) Boolean of whether or not to set `cache-control: no-cache` header
1105        :kwarg last_mod_time: (optional) Datetime object to use when setting If-Modified-Since header
1106        :kwarg timeout: (optional) How long to wait for the server to send
1107            data before giving up, as a float
1108        :kwarg validate_certs: (optional) Booleani that controls whether we verify
1109            the server's TLS certificate
1110        :kwarg url_username: (optional) String of the user to use when authenticating
1111        :kwarg url_password: (optional) String of the password to use when authenticating
1112        :kwarg http_agent: (optional) String of the User-Agent to use in the request
1113        :kwarg force_basic_auth: (optional) Boolean determining if auth header should be sent in the initial request
1114        :kwarg follow_redirects: (optional) String of urllib2, all/yes, safe, none to determine how redirects are
1115            followed, see RedirectHandlerFactory for more information
1116        :kwarg client_cert: (optional) PEM formatted certificate chain file to be used for SSL client authentication.
1117            This file can also include the key as well, and if the key is included, client_key is not required
1118        :kwarg client_key: (optional) PEM formatted file that contains your private key to be used for SSL client
1119            authentication. If client_cert contains both the certificate and key, this option is not required
1120        :kwarg cookies: (optional) CookieJar object to send with the
1121            request
1122        :kwarg use_gssapi: (optional) Use GSSAPI handler of requests.
1123        :kwarg unix_socket: (optional) String of file system path to unix socket file to use when establishing
1124            connection to the provided url
1125        :kwarg ca_path: (optional) String of file system path to CA cert bundle to use
1126        :kwarg unredirected_headers: (optional) A list of headers to not attach on a redirected request
1127        :returns: HTTPResponse. Added in Ansible 2.9
1128        """
1129
1130        method = method.upper()
1131
1132        if headers is None:
1133            headers = {}
1134        elif not isinstance(headers, dict):
1135            raise ValueError("headers must be a dict")
1136        headers = dict(self.headers, **headers)
1137
1138        use_proxy = self._fallback(use_proxy, self.use_proxy)
1139        force = self._fallback(force, self.force)
1140        timeout = self._fallback(timeout, self.timeout)
1141        validate_certs = self._fallback(validate_certs, self.validate_certs)
1142        url_username = self._fallback(url_username, self.url_username)
1143        url_password = self._fallback(url_password, self.url_password)
1144        http_agent = self._fallback(http_agent, self.http_agent)
1145        force_basic_auth = self._fallback(force_basic_auth, self.force_basic_auth)
1146        follow_redirects = self._fallback(follow_redirects, self.follow_redirects)
1147        client_cert = self._fallback(client_cert, self.client_cert)
1148        client_key = self._fallback(client_key, self.client_key)
1149        cookies = self._fallback(cookies, self.cookies)
1150        unix_socket = self._fallback(unix_socket, self.unix_socket)
1151        ca_path = self._fallback(ca_path, self.ca_path)
1152
1153        handlers = []
1154
1155        if unix_socket:
1156            handlers.append(UnixHTTPHandler(unix_socket))
1157
1158        ssl_handler = maybe_add_ssl_handler(url, validate_certs, ca_path=ca_path)
1159        if ssl_handler and not HAS_SSLCONTEXT:
1160            handlers.append(ssl_handler)
1161        if HAS_GSSAPI and use_gssapi:
1162            handlers.append(urllib_gssapi.HTTPSPNEGOAuthHandler())
1163
1164        parsed = generic_urlparse(urlparse(url))
1165        if parsed.scheme != 'ftp':
1166            username = url_username
1167
1168            if username:
1169                password = url_password
1170                netloc = parsed.netloc
1171            elif '@' in parsed.netloc:
1172                credentials, netloc = parsed.netloc.split('@', 1)
1173                if ':' in credentials:
1174                    username, password = credentials.split(':', 1)
1175                else:
1176                    username = credentials
1177                    password = ''
1178
1179                parsed_list = parsed.as_list()
1180                parsed_list[1] = netloc
1181
1182                # reconstruct url without credentials
1183                url = urlunparse(parsed_list)
1184
1185            if username and not force_basic_auth:
1186                passman = urllib_request.HTTPPasswordMgrWithDefaultRealm()
1187
1188                # this creates a password manager
1189                passman.add_password(None, netloc, username, password)
1190
1191                # because we have put None at the start it will always
1192                # use this username/password combination for  urls
1193                # for which `theurl` is a super-url
1194                authhandler = urllib_request.HTTPBasicAuthHandler(passman)
1195                digest_authhandler = urllib_request.HTTPDigestAuthHandler(passman)
1196
1197                # create the AuthHandler
1198                handlers.append(authhandler)
1199                handlers.append(digest_authhandler)
1200
1201            elif username and force_basic_auth:
1202                headers["Authorization"] = basic_auth_header(username, password)
1203
1204            else:
1205                try:
1206                    rc = netrc.netrc(os.environ.get('NETRC'))
1207                    login = rc.authenticators(parsed.hostname)
1208                except IOError:
1209                    login = None
1210
1211                if login:
1212                    username, _, password = login
1213                    if username and password:
1214                        headers["Authorization"] = basic_auth_header(username, password)
1215
1216        if not use_proxy:
1217            proxyhandler = urllib_request.ProxyHandler({})
1218            handlers.append(proxyhandler)
1219
1220        context = None
1221        if HAS_SSLCONTEXT and not validate_certs:
1222            # In 2.7.9, the default context validates certificates
1223            context = SSLContext(ssl.PROTOCOL_SSLv23)
1224            if ssl.OP_NO_SSLv2:
1225                context.options |= ssl.OP_NO_SSLv2
1226            context.options |= ssl.OP_NO_SSLv3
1227            context.verify_mode = ssl.CERT_NONE
1228            context.check_hostname = False
1229            handlers.append(HTTPSClientAuthHandler(client_cert=client_cert,
1230                                                   client_key=client_key,
1231                                                   context=context,
1232                                                   unix_socket=unix_socket))
1233        elif client_cert or unix_socket:
1234            handlers.append(HTTPSClientAuthHandler(client_cert=client_cert,
1235                                                   client_key=client_key,
1236                                                   unix_socket=unix_socket))
1237
1238        if ssl_handler and HAS_SSLCONTEXT and validate_certs:
1239            tmp_ca_path, cadata, paths_checked = ssl_handler.get_ca_certs()
1240            try:
1241                context = ssl_handler.make_context(tmp_ca_path, cadata)
1242            except NotImplementedError:
1243                pass
1244
1245        # pre-2.6 versions of python cannot use the custom https
1246        # handler, since the socket class is lacking create_connection.
1247        # Some python builds lack HTTPS support.
1248        if hasattr(socket, 'create_connection') and CustomHTTPSHandler:
1249            kwargs = {}
1250            if HAS_SSLCONTEXT:
1251                kwargs['context'] = context
1252            handlers.append(CustomHTTPSHandler(**kwargs))
1253
1254        handlers.append(RedirectHandlerFactory(follow_redirects, validate_certs, ca_path=ca_path))
1255
1256        # add some nicer cookie handling
1257        if cookies is not None:
1258            handlers.append(urllib_request.HTTPCookieProcessor(cookies))
1259
1260        opener = urllib_request.build_opener(*handlers)
1261        urllib_request.install_opener(opener)
1262
1263        data = to_bytes(data, nonstring='passthru')
1264        request = RequestWithMethod(url, method, data)
1265
1266        # add the custom agent header, to help prevent issues
1267        # with sites that block the default urllib agent string
1268        if http_agent:
1269            request.add_header('User-agent', http_agent)
1270
1271        # Cache control
1272        # Either we directly force a cache refresh
1273        if force:
1274            request.add_header('cache-control', 'no-cache')
1275        # or we do it if the original is more recent than our copy
1276        elif last_mod_time:
1277            tstamp = rfc2822_date_string(last_mod_time.timetuple(), 'GMT')
1278            request.add_header('If-Modified-Since', tstamp)
1279
1280        # user defined headers now, which may override things we've set above
1281        unredirected_headers = unredirected_headers or []
1282        for header in headers:
1283            if header in unredirected_headers:
1284                request.add_unredirected_header(header, headers[header])
1285            else:
1286                request.add_header(header, headers[header])
1287
1288        urlopen_args = [request, None]
1289        if sys.version_info >= (2, 6, 0):
1290            # urlopen in python prior to 2.6.0 did not
1291            # have a timeout parameter
1292            urlopen_args.append(timeout)
1293
1294        r = urllib_request.urlopen(*urlopen_args)
1295        return r
1296
1297    def get(self, url, **kwargs):
1298        r"""Sends a GET request. Returns :class:`HTTPResponse` object.
1299
1300        :arg url: URL to request
1301        :kwarg \*\*kwargs: Optional arguments that ``open`` takes.
1302        :returns: HTTPResponse
1303        """
1304
1305        return self.open('GET', url, **kwargs)
1306
1307    def options(self, url, **kwargs):
1308        r"""Sends a OPTIONS request. Returns :class:`HTTPResponse` object.
1309
1310        :arg url: URL to request
1311        :kwarg \*\*kwargs: Optional arguments that ``open`` takes.
1312        :returns: HTTPResponse
1313        """
1314
1315        return self.open('OPTIONS', url, **kwargs)
1316
1317    def head(self, url, **kwargs):
1318        r"""Sends a HEAD request. Returns :class:`HTTPResponse` object.
1319
1320        :arg url: URL to request
1321        :kwarg \*\*kwargs: Optional arguments that ``open`` takes.
1322        :returns: HTTPResponse
1323        """
1324
1325        return self.open('HEAD', url, **kwargs)
1326
1327    def post(self, url, data=None, **kwargs):
1328        r"""Sends a POST request. Returns :class:`HTTPResponse` object.
1329
1330        :arg url: URL to request.
1331        :kwarg data: (optional) bytes, or file-like object to send in the body of the request.
1332        :kwarg \*\*kwargs: Optional arguments that ``open`` takes.
1333        :returns: HTTPResponse
1334        """
1335
1336        return self.open('POST', url, data=data, **kwargs)
1337
1338    def put(self, url, data=None, **kwargs):
1339        r"""Sends a PUT request. Returns :class:`HTTPResponse` object.
1340
1341        :arg url: URL to request.
1342        :kwarg data: (optional) bytes, or file-like object to send in the body of the request.
1343        :kwarg \*\*kwargs: Optional arguments that ``open`` takes.
1344        :returns: HTTPResponse
1345        """
1346
1347        return self.open('PUT', url, data=data, **kwargs)
1348
1349    def patch(self, url, data=None, **kwargs):
1350        r"""Sends a PATCH request. Returns :class:`HTTPResponse` object.
1351
1352        :arg url: URL to request.
1353        :kwarg data: (optional) bytes, or file-like object to send in the body of the request.
1354        :kwarg \*\*kwargs: Optional arguments that ``open`` takes.
1355        :returns: HTTPResponse
1356        """
1357
1358        return self.open('PATCH', url, data=data, **kwargs)
1359
1360    def delete(self, url, **kwargs):
1361        r"""Sends a DELETE request. Returns :class:`HTTPResponse` object.
1362
1363        :arg url: URL to request
1364        :kwargs \*\*kwargs: Optional arguments that ``open`` takes.
1365        :returns: HTTPResponse
1366        """
1367
1368        return self.open('DELETE', url, **kwargs)
1369
1370
1371def open_url(url, data=None, headers=None, method=None, use_proxy=True,
1372             force=False, last_mod_time=None, timeout=10, validate_certs=True,
1373             url_username=None, url_password=None, http_agent=None,
1374             force_basic_auth=False, follow_redirects='urllib2',
1375             client_cert=None, client_key=None, cookies=None,
1376             use_gssapi=False, unix_socket=None, ca_path=None,
1377             unredirected_headers=None):
1378    '''
1379    Sends a request via HTTP(S) or FTP using urllib2 (Python2) or urllib (Python3)
1380
1381    Does not require the module environment
1382    '''
1383    method = method or ('POST' if data else 'GET')
1384    return Request().open(method, url, data=data, headers=headers, use_proxy=use_proxy,
1385                          force=force, last_mod_time=last_mod_time, timeout=timeout, validate_certs=validate_certs,
1386                          url_username=url_username, url_password=url_password, http_agent=http_agent,
1387                          force_basic_auth=force_basic_auth, follow_redirects=follow_redirects,
1388                          client_cert=client_cert, client_key=client_key, cookies=cookies,
1389                          use_gssapi=use_gssapi, unix_socket=unix_socket, ca_path=ca_path,
1390                          unredirected_headers=unredirected_headers)
1391
1392
1393#
1394# Module-related functions
1395#
1396
1397
1398def basic_auth_header(username, password):
1399    """Takes a username and password and returns a byte string suitable for
1400    using as value of an Authorization header to do basic auth.
1401    """
1402    return b"Basic %s" % base64.b64encode(to_bytes("%s:%s" % (username, password), errors='surrogate_or_strict'))
1403
1404
1405def url_argument_spec():
1406    '''
1407    Creates an argument spec that can be used with any module
1408    that will be requesting content via urllib/urllib2
1409    '''
1410    return dict(
1411        url=dict(type='str'),
1412        force=dict(type='bool', default=False, aliases=['thirsty'], deprecated_aliases=[dict(name='thirsty', version='2.13')]),
1413        http_agent=dict(type='str', default='ansible-httpget'),
1414        use_proxy=dict(type='bool', default=True),
1415        validate_certs=dict(type='bool', default=True),
1416        url_username=dict(type='str'),
1417        url_password=dict(type='str', no_log=True),
1418        force_basic_auth=dict(type='bool', default=False),
1419        client_cert=dict(type='path'),
1420        client_key=dict(type='path'),
1421    )
1422
1423
1424def fetch_url(module, url, data=None, headers=None, method=None,
1425              use_proxy=True, force=False, last_mod_time=None, timeout=10,
1426              use_gssapi=False, unix_socket=None, ca_path=None, cookies=None):
1427    """Sends a request via HTTP(S) or FTP (needs the module as parameter)
1428
1429    :arg module: The AnsibleModule (used to get username, password etc. (s.b.).
1430    :arg url:             The url to use.
1431
1432    :kwarg data:          The data to be sent (in case of POST/PUT).
1433    :kwarg headers:       A dict with the request headers.
1434    :kwarg method:        "POST", "PUT", etc.
1435    :kwarg boolean use_proxy:     Default: True
1436    :kwarg boolean force: If True: Do not get a cached copy (Default: False)
1437    :kwarg last_mod_time: Default: None
1438    :kwarg int timeout:   Default: 10
1439    :kwarg boolean use_gssapi:   Default: False
1440    :kwarg unix_socket: (optional) String of file system path to unix socket file to use when establishing
1441        connection to the provided url
1442    :kwarg ca_path: (optional) String of file system path to CA cert bundle to use
1443
1444    :returns: A tuple of (**response**, **info**). Use ``response.read()`` to read the data.
1445        The **info** contains the 'status' and other meta data. When a HttpError (status > 400)
1446        occurred then ``info['body']`` contains the error response data::
1447
1448    Example::
1449
1450        data={...}
1451        resp, info = fetch_url(module,
1452                               "http://example.com",
1453                               data=module.jsonify(data),
1454                               headers={'Content-type': 'application/json'},
1455                               method="POST")
1456        status_code = info["status"]
1457        body = resp.read()
1458        if status_code >= 400 :
1459            body = info['body']
1460    """
1461
1462    if not HAS_URLPARSE:
1463        module.fail_json(msg='urlparse is not installed')
1464
1465    # ensure we use proper tempdir
1466    old_tempdir = tempfile.tempdir
1467    tempfile.tempdir = module.tmpdir
1468
1469    # Get validate_certs from the module params
1470    validate_certs = module.params.get('validate_certs', True)
1471
1472    username = module.params.get('url_username', '')
1473    password = module.params.get('url_password', '')
1474    http_agent = module.params.get('http_agent', 'ansible-httpget')
1475    force_basic_auth = module.params.get('force_basic_auth', '')
1476
1477    follow_redirects = module.params.get('follow_redirects', 'urllib2')
1478
1479    client_cert = module.params.get('client_cert')
1480    client_key = module.params.get('client_key')
1481
1482    if not isinstance(cookies, cookiejar.CookieJar):
1483        cookies = cookiejar.LWPCookieJar()
1484
1485    r = None
1486    info = dict(url=url, status=-1)
1487    try:
1488        r = open_url(url, data=data, headers=headers, method=method,
1489                     use_proxy=use_proxy, force=force, last_mod_time=last_mod_time, timeout=timeout,
1490                     validate_certs=validate_certs, url_username=username,
1491                     url_password=password, http_agent=http_agent, force_basic_auth=force_basic_auth,
1492                     follow_redirects=follow_redirects, client_cert=client_cert,
1493                     client_key=client_key, cookies=cookies, use_gssapi=use_gssapi,
1494                     unix_socket=unix_socket, ca_path=ca_path)
1495        # Lowercase keys, to conform to py2 behavior, so that py3 and py2 are predictable
1496        info.update(dict((k.lower(), v) for k, v in r.info().items()))
1497
1498        # Don't be lossy, append header values for duplicate headers
1499        # In Py2 there is nothing that needs done, py2 does this for us
1500        if PY3:
1501            temp_headers = {}
1502            for name, value in r.headers.items():
1503                # The same as above, lower case keys to match py2 behavior, and create more consistent results
1504                name = name.lower()
1505                if name in temp_headers:
1506                    temp_headers[name] = ', '.join((temp_headers[name], value))
1507                else:
1508                    temp_headers[name] = value
1509            info.update(temp_headers)
1510
1511        # parse the cookies into a nice dictionary
1512        cookie_list = []
1513        cookie_dict = dict()
1514        # Python sorts cookies in order of most specific (ie. longest) path first. See ``CookieJar._cookie_attrs``
1515        # Cookies with the same path are reversed from response order.
1516        # This code makes no assumptions about that, and accepts the order given by python
1517        for cookie in cookies:
1518            cookie_dict[cookie.name] = cookie.value
1519            cookie_list.append((cookie.name, cookie.value))
1520        info['cookies_string'] = '; '.join('%s=%s' % c for c in cookie_list)
1521
1522        info['cookies'] = cookie_dict
1523        # finally update the result with a message about the fetch
1524        info.update(dict(msg="OK (%s bytes)" % r.headers.get('Content-Length', 'unknown'), url=r.geturl(), status=r.code))
1525    except NoSSLError as e:
1526        distribution = get_distribution()
1527        if distribution is not None and distribution.lower() == 'redhat':
1528            module.fail_json(msg='%s. You can also install python-ssl from EPEL' % to_native(e), **info)
1529        else:
1530            module.fail_json(msg='%s' % to_native(e), **info)
1531    except (ConnectionError, ValueError) as e:
1532        module.fail_json(msg=to_native(e), **info)
1533    except urllib_error.HTTPError as e:
1534        try:
1535            body = e.read()
1536        except AttributeError:
1537            body = ''
1538
1539        # Try to add exception info to the output but don't fail if we can't
1540        try:
1541            # Lowercase keys, to conform to py2 behavior, so that py3 and py2 are predictable
1542            info.update(dict((k.lower(), v) for k, v in e.info().items()))
1543        except Exception:
1544            pass
1545
1546        info.update({'msg': to_native(e), 'body': body, 'status': e.code})
1547
1548    except urllib_error.URLError as e:
1549        code = int(getattr(e, 'code', -1))
1550        info.update(dict(msg="Request failed: %s" % to_native(e), status=code))
1551    except socket.error as e:
1552        info.update(dict(msg="Connection failure: %s" % to_native(e), status=-1))
1553    except httplib.BadStatusLine as e:
1554        info.update(dict(msg="Connection failure: connection was closed before a valid response was received: %s" % to_native(e.line), status=-1))
1555    except Exception as e:
1556        info.update(dict(msg="An unknown error occurred: %s" % to_native(e), status=-1),
1557                    exception=traceback.format_exc())
1558    finally:
1559        tempfile.tempdir = old_tempdir
1560
1561    return r, info
1562
1563
1564def fetch_file(module, url, data=None, headers=None, method=None,
1565               use_proxy=True, force=False, last_mod_time=None, timeout=10):
1566    '''Download and save a file via HTTP(S) or FTP (needs the module as parameter).
1567    This is basically a wrapper around fetch_url().
1568
1569    :arg module: The AnsibleModule (used to get username, password etc. (s.b.).
1570    :arg url:             The url to use.
1571
1572    :kwarg data:          The data to be sent (in case of POST/PUT).
1573    :kwarg headers:       A dict with the request headers.
1574    :kwarg method:        "POST", "PUT", etc.
1575    :kwarg boolean use_proxy:     Default: True
1576    :kwarg boolean force: If True: Do not get a cached copy (Default: False)
1577    :kwarg last_mod_time: Default: None
1578    :kwarg int timeout:   Default: 10
1579
1580    :returns: A string, the path to the downloaded file.
1581    '''
1582    # download file
1583    bufsize = 65536
1584    file_name, file_ext = os.path.splitext(str(url.rsplit('/', 1)[1]))
1585    fetch_temp_file = tempfile.NamedTemporaryFile(dir=module.tmpdir, prefix=file_name, suffix=file_ext, delete=False)
1586    module.add_cleanup_file(fetch_temp_file.name)
1587    try:
1588        rsp, info = fetch_url(module, url, data, headers, method, use_proxy, force, last_mod_time, timeout)
1589        if not rsp:
1590            module.fail_json(msg="Failure downloading %s, %s" % (url, info['msg']))
1591        data = rsp.read(bufsize)
1592        while data:
1593            fetch_temp_file.write(data)
1594            data = rsp.read(bufsize)
1595        fetch_temp_file.close()
1596    except Exception as e:
1597        module.fail_json(msg="Failure downloading %s, %s" % (url, to_native(e)))
1598    return fetch_temp_file.name
1599