1#!/usr/bin/python 2# -*- coding: utf-8 -*- 3 4# Copyright: (c) 2012, Jan-Piet Mens <jpmens () gmail.com> 5# GNU General Public License v3.0+ (see COPYING or https://www.gnu.org/licenses/gpl-3.0.txt) 6 7from __future__ import absolute_import, division, print_function 8__metaclass__ = type 9 10 11DOCUMENTATION = r''' 12--- 13module: get_url 14short_description: Downloads files from HTTP, HTTPS, or FTP to node 15description: 16 - Downloads files from HTTP, HTTPS, or FTP to the remote server. The remote 17 server I(must) have direct access to the remote resource. 18 - By default, if an environment variable C(<protocol>_proxy) is set on 19 the target host, requests will be sent through that proxy. This 20 behaviour can be overridden by setting a variable for this task 21 (see `setting the environment 22 <https://docs.ansible.com/playbooks_environment.html>`_), 23 or by using the use_proxy option. 24 - HTTP redirects can redirect from HTTP to HTTPS so you should be sure that 25 your proxy environment for both protocols is correct. 26 - From Ansible 2.4 when run with C(--check), it will do a HEAD request to validate the URL but 27 will not download the entire file or verify it against hashes. 28 - For Windows targets, use the M(ansible.windows.win_get_url) module instead. 29version_added: '0.6' 30options: 31 url: 32 description: 33 - HTTP, HTTPS, or FTP URL in the form (http|https|ftp)://[user[:pass]]@host.domain[:port]/path 34 type: str 35 required: true 36 dest: 37 description: 38 - Absolute path of where to download the file to. 39 - If C(dest) is a directory, either the server provided filename or, if 40 none provided, the base name of the URL on the remote server will be 41 used. If a directory, C(force) has no effect. 42 - If C(dest) is a directory, the file will always be downloaded 43 (regardless of the C(force) option), but replaced only if the contents changed.. 44 type: path 45 required: true 46 tmp_dest: 47 description: 48 - Absolute path of where temporary file is downloaded to. 49 - When run on Ansible 2.5 or greater, path defaults to ansible's remote_tmp setting 50 - When run on Ansible prior to 2.5, it defaults to C(TMPDIR), C(TEMP) or C(TMP) env variables or a platform specific value. 51 - U(https://docs.python.org/2/library/tempfile.html#tempfile.tempdir) 52 type: path 53 version_added: '2.1' 54 force: 55 description: 56 - If C(yes) and C(dest) is not a directory, will download the file every 57 time and replace the file if the contents change. If C(no), the file 58 will only be downloaded if the destination does not exist. Generally 59 should be C(yes) only for small local files. 60 - Prior to 0.6, this module behaved as if C(yes) was the default. 61 - Alias C(thirsty) has been deprecated and will be removed in 2.13. 62 type: bool 63 default: no 64 aliases: [ thirsty ] 65 version_added: '0.7' 66 backup: 67 description: 68 - Create a backup file including the timestamp information so you can get 69 the original file back if you somehow clobbered it incorrectly. 70 type: bool 71 default: no 72 version_added: '2.1' 73 sha256sum: 74 description: 75 - If a SHA-256 checksum is passed to this parameter, the digest of the 76 destination file will be calculated after it is downloaded to ensure 77 its integrity and verify that the transfer completed successfully. 78 This option is deprecated and will be removed in version 2.14. Use 79 option C(checksum) instead. 80 default: '' 81 version_added: "1.3" 82 checksum: 83 description: 84 - 'If a checksum is passed to this parameter, the digest of the 85 destination file will be calculated after it is downloaded to ensure 86 its integrity and verify that the transfer completed successfully. 87 Format: <algorithm>:<checksum|url>, e.g. checksum="sha256:D98291AC[...]B6DC7B97", 88 checksum="sha256:http://example.com/path/sha256sum.txt"' 89 - If you worry about portability, only the sha1 algorithm is available 90 on all platforms and python versions. 91 - The third party hashlib library can be installed for access to additional algorithms. 92 - Additionally, if a checksum is passed to this parameter, and the file exist under 93 the C(dest) location, the I(destination_checksum) would be calculated, and if 94 checksum equals I(destination_checksum), the file download would be skipped 95 (unless C(force) is true). If the checksum does not equal I(destination_checksum), 96 the destination file is deleted. 97 type: str 98 default: '' 99 version_added: "2.0" 100 use_proxy: 101 description: 102 - if C(no), it will not use a proxy, even if one is defined in 103 an environment variable on the target hosts. 104 type: bool 105 default: yes 106 validate_certs: 107 description: 108 - If C(no), SSL certificates will not be validated. 109 - This should only be used on personally controlled sites using self-signed certificates. 110 type: bool 111 default: yes 112 timeout: 113 description: 114 - Timeout in seconds for URL request. 115 type: int 116 default: 10 117 version_added: '1.8' 118 headers: 119 description: 120 - Add custom HTTP headers to a request in hash/dict format. 121 - The hash/dict format was added in Ansible 2.6. 122 - Previous versions used a C("key:value,key:value") string format. 123 - The C("key:value,key:value") string format is deprecated and has been removed in version 2.10. 124 type: dict 125 version_added: '2.0' 126 url_username: 127 description: 128 - The username for use in HTTP basic authentication. 129 - This parameter can be used without C(url_password) for sites that allow empty passwords. 130 - Since version 2.8 you can also use the C(username) alias for this option. 131 type: str 132 aliases: ['username'] 133 version_added: '1.6' 134 url_password: 135 description: 136 - The password for use in HTTP basic authentication. 137 - If the C(url_username) parameter is not specified, the C(url_password) parameter will not be used. 138 - Since version 2.8 you can also use the 'password' alias for this option. 139 type: str 140 aliases: ['password'] 141 version_added: '1.6' 142 force_basic_auth: 143 description: 144 - Force the sending of the Basic authentication header upon initial request. 145 - httplib2, the library used by the uri module only sends authentication information when a webservice 146 responds to an initial request with a 401 status. Since some basic auth services do not properly 147 send a 401, logins will fail. 148 type: bool 149 default: no 150 version_added: '2.0' 151 client_cert: 152 description: 153 - PEM formatted certificate chain file to be used for SSL client authentication. 154 - This file can also include the key as well, and if the key is included, C(client_key) is not required. 155 type: path 156 version_added: '2.4' 157 client_key: 158 description: 159 - PEM formatted file that contains your private key to be used for SSL client authentication. 160 - If C(client_cert) contains both the certificate and key, this option is not required. 161 type: path 162 version_added: '2.4' 163 http_agent: 164 description: 165 - Header to identify as, generally appears in web server logs. 166 type: str 167 default: ansible-httpget 168# informational: requirements for nodes 169extends_documentation_fragment: 170 - files 171notes: 172 - For Windows targets, use the M(ansible.windows.win_get_url) module instead. 173seealso: 174- module: ansible.builtin.uri 175- module: ansible.windows.win_get_url 176author: 177- Jan-Piet Mens (@jpmens) 178''' 179 180EXAMPLES = r''' 181- name: Download foo.conf 182 get_url: 183 url: http://example.com/path/file.conf 184 dest: /etc/foo.conf 185 mode: '0440' 186 187- name: Download file and force basic auth 188 get_url: 189 url: http://example.com/path/file.conf 190 dest: /etc/foo.conf 191 force_basic_auth: yes 192 193- name: Download file with custom HTTP headers 194 get_url: 195 url: http://example.com/path/file.conf 196 dest: /etc/foo.conf 197 headers: 198 key1: one 199 key2: two 200 201- name: Download file with check (sha256) 202 get_url: 203 url: http://example.com/path/file.conf 204 dest: /etc/foo.conf 205 checksum: sha256:b5bb9d8014a0f9b1d61e21e796d78dccdf1352f23cd32812f4850b878ae4944c 206 207- name: Download file with check (md5) 208 get_url: 209 url: http://example.com/path/file.conf 210 dest: /etc/foo.conf 211 checksum: md5:66dffb5228a211e61d6d7ef4a86f5758 212 213- name: Download file with checksum url (sha256) 214 get_url: 215 url: http://example.com/path/file.conf 216 dest: /etc/foo.conf 217 checksum: sha256:http://example.com/path/sha256sum.txt 218 219- name: Download file from a file path 220 get_url: 221 url: file:///tmp/afile.txt 222 dest: /tmp/afilecopy.txt 223 224- name: < Fetch file that requires authentication. 225 username/password only available since 2.8, in older versions you need to use url_username/url_password 226 get_url: 227 url: http://example.com/path/file.conf 228 dest: /etc/foo.conf 229 username: bar 230 password: '{{ mysecret }}' 231''' 232 233RETURN = r''' 234backup_file: 235 description: name of backup file created after download 236 returned: changed and if backup=yes 237 type: str 238 sample: /path/to/file.txt.2015-02-12@22:09~ 239checksum_dest: 240 description: sha1 checksum of the file after copy 241 returned: success 242 type: str 243 sample: 6e642bb8dd5c2e027bf21dd923337cbb4214f827 244checksum_src: 245 description: sha1 checksum of the file 246 returned: success 247 type: str 248 sample: 6e642bb8dd5c2e027bf21dd923337cbb4214f827 249dest: 250 description: destination file/path 251 returned: success 252 type: str 253 sample: /path/to/file.txt 254elapsed: 255 description: The number of seconds that elapsed while performing the download 256 returned: always 257 type: int 258 sample: 23 259gid: 260 description: group id of the file 261 returned: success 262 type: int 263 sample: 100 264group: 265 description: group of the file 266 returned: success 267 type: str 268 sample: "httpd" 269md5sum: 270 description: md5 checksum of the file after download 271 returned: when supported 272 type: str 273 sample: "2a5aeecc61dc98c4d780b14b330e3282" 274mode: 275 description: permissions of the target 276 returned: success 277 type: str 278 sample: "0644" 279msg: 280 description: the HTTP message from the request 281 returned: always 282 type: str 283 sample: OK (unknown bytes) 284owner: 285 description: owner of the file 286 returned: success 287 type: str 288 sample: httpd 289secontext: 290 description: the SELinux security context of the file 291 returned: success 292 type: str 293 sample: unconfined_u:object_r:user_tmp_t:s0 294size: 295 description: size of the target 296 returned: success 297 type: int 298 sample: 1220 299src: 300 description: source file used after download 301 returned: always 302 type: str 303 sample: /tmp/tmpAdFLdV 304state: 305 description: state of the target 306 returned: success 307 type: str 308 sample: file 309status_code: 310 description: the HTTP status code from the request 311 returned: always 312 type: int 313 sample: 200 314uid: 315 description: owner id of the file, after execution 316 returned: success 317 type: int 318 sample: 100 319url: 320 description: the actual URL used for the request 321 returned: always 322 type: str 323 sample: https://www.ansible.com/ 324''' 325 326import datetime 327import os 328import re 329import shutil 330import tempfile 331import traceback 332 333from ansible.module_utils.basic import AnsibleModule 334from ansible.module_utils.six.moves.urllib.parse import urlsplit 335from ansible.module_utils._text import to_native 336from ansible.module_utils.urls import fetch_url, url_argument_spec 337 338# ============================================================== 339# url handling 340 341 342def url_filename(url): 343 fn = os.path.basename(urlsplit(url)[2]) 344 if fn == '': 345 return 'index.html' 346 return fn 347 348 349def url_get(module, url, dest, use_proxy, last_mod_time, force, timeout=10, headers=None, tmp_dest=''): 350 """ 351 Download data from the url and store in a temporary file. 352 353 Return (tempfile, info about the request) 354 """ 355 if module.check_mode: 356 method = 'HEAD' 357 else: 358 method = 'GET' 359 360 start = datetime.datetime.utcnow() 361 rsp, info = fetch_url(module, url, use_proxy=use_proxy, force=force, last_mod_time=last_mod_time, timeout=timeout, headers=headers, method=method) 362 elapsed = (datetime.datetime.utcnow() - start).seconds 363 364 if info['status'] == 304: 365 module.exit_json(url=url, dest=dest, changed=False, msg=info.get('msg', ''), status_code=info['status'], elapsed=elapsed) 366 367 # Exceptions in fetch_url may result in a status -1, the ensures a proper error to the user in all cases 368 if info['status'] == -1: 369 module.fail_json(msg=info['msg'], url=url, dest=dest, elapsed=elapsed) 370 371 if info['status'] != 200 and not url.startswith('file:/') and not (url.startswith('ftp:/') and info.get('msg', '').startswith('OK')): 372 module.fail_json(msg="Request failed", status_code=info['status'], response=info['msg'], url=url, dest=dest, elapsed=elapsed) 373 374 # create a temporary file and copy content to do checksum-based replacement 375 if tmp_dest: 376 # tmp_dest should be an existing dir 377 tmp_dest_is_dir = os.path.isdir(tmp_dest) 378 if not tmp_dest_is_dir: 379 if os.path.exists(tmp_dest): 380 module.fail_json(msg="%s is a file but should be a directory." % tmp_dest, elapsed=elapsed) 381 else: 382 module.fail_json(msg="%s directory does not exist." % tmp_dest, elapsed=elapsed) 383 else: 384 tmp_dest = module.tmpdir 385 386 fd, tempname = tempfile.mkstemp(dir=tmp_dest) 387 388 f = os.fdopen(fd, 'wb') 389 try: 390 shutil.copyfileobj(rsp, f) 391 except Exception as e: 392 os.remove(tempname) 393 module.fail_json(msg="failed to create temporary content file: %s" % to_native(e), elapsed=elapsed, exception=traceback.format_exc()) 394 f.close() 395 rsp.close() 396 return tempname, info 397 398 399def extract_filename_from_headers(headers): 400 """ 401 Extracts a filename from the given dict of HTTP headers. 402 403 Looks for the content-disposition header and applies a regex. 404 Returns the filename if successful, else None.""" 405 cont_disp_regex = 'attachment; ?filename="?([^"]+)' 406 res = None 407 408 if 'content-disposition' in headers: 409 cont_disp = headers['content-disposition'] 410 match = re.match(cont_disp_regex, cont_disp) 411 if match: 412 res = match.group(1) 413 # Try preventing any funny business. 414 res = os.path.basename(res) 415 416 return res 417 418 419def is_url(checksum): 420 """ 421 Returns True if checksum value has supported URL scheme, else False.""" 422 supported_schemes = ('http', 'https', 'ftp', 'file') 423 424 return urlsplit(checksum).scheme in supported_schemes 425 426 427# ============================================================== 428# main 429 430def main(): 431 argument_spec = url_argument_spec() 432 433 # setup aliases 434 argument_spec['url_username']['aliases'] = ['username'] 435 argument_spec['url_password']['aliases'] = ['password'] 436 437 argument_spec.update( 438 url=dict(type='str', required=True), 439 dest=dict(type='path', required=True), 440 backup=dict(type='bool', default=False), 441 sha256sum=dict(type='str', default=''), 442 checksum=dict(type='str', default=''), 443 timeout=dict(type='int', default=10), 444 headers=dict(type='dict'), 445 tmp_dest=dict(type='path'), 446 ) 447 448 module = AnsibleModule( 449 # not checking because of daisy chain to file module 450 argument_spec=argument_spec, 451 add_file_common_args=True, 452 supports_check_mode=True, 453 mutually_exclusive=[['checksum', 'sha256sum']], 454 ) 455 456 if module.params.get('thirsty'): 457 module.deprecate('The alias "thirsty" has been deprecated and will be removed, use "force" instead', 458 version='2.13', collection_name='ansible.builtin') 459 460 if module.params.get('sha256sum'): 461 module.deprecate('The parameter "sha256sum" has been deprecated and will be removed, use "checksum" instead', 462 version='2.14', collection_name='ansible.builtin') 463 464 url = module.params['url'] 465 dest = module.params['dest'] 466 backup = module.params['backup'] 467 force = module.params['force'] 468 sha256sum = module.params['sha256sum'] 469 checksum = module.params['checksum'] 470 use_proxy = module.params['use_proxy'] 471 timeout = module.params['timeout'] 472 headers = module.params['headers'] 473 tmp_dest = module.params['tmp_dest'] 474 475 result = dict( 476 changed=False, 477 checksum_dest=None, 478 checksum_src=None, 479 dest=dest, 480 elapsed=0, 481 url=url, 482 ) 483 484 dest_is_dir = os.path.isdir(dest) 485 last_mod_time = None 486 487 # workaround for usage of deprecated sha256sum parameter 488 if sha256sum: 489 checksum = 'sha256:%s' % (sha256sum) 490 491 # checksum specified, parse for algorithm and checksum 492 if checksum: 493 try: 494 algorithm, checksum = checksum.split(':', 1) 495 except ValueError: 496 module.fail_json(msg="The checksum parameter has to be in format <algorithm>:<checksum>", **result) 497 498 if is_url(checksum): 499 checksum_url = checksum 500 # download checksum file to checksum_tmpsrc 501 checksum_tmpsrc, checksum_info = url_get(module, checksum_url, dest, use_proxy, last_mod_time, force, timeout, headers, tmp_dest) 502 with open(checksum_tmpsrc) as f: 503 lines = [line.rstrip('\n') for line in f] 504 os.remove(checksum_tmpsrc) 505 checksum_map = [] 506 for line in lines: 507 # Split by one whitespace to keep the leading type char ' ' (whitespace) for text and '*' for binary 508 parts = line.split(" ", 1) 509 if len(parts) == 2: 510 # Remove the leading type char, we expect 511 if parts[1].startswith((" ", "*",)): 512 parts[1] = parts[1][1:] 513 514 # Append checksum and path without potential leading './' 515 checksum_map.append((parts[0], parts[1].lstrip("./"))) 516 517 filename = url_filename(url) 518 519 # Look through each line in the checksum file for a hash corresponding to 520 # the filename in the url, returning the first hash that is found. 521 for cksum in (s for (s, f) in checksum_map if f == filename): 522 checksum = cksum 523 break 524 else: 525 checksum = None 526 527 if checksum is None: 528 module.fail_json(msg="Unable to find a checksum for file '%s' in '%s'" % (filename, checksum_url)) 529 # Remove any non-alphanumeric characters, including the infamous 530 # Unicode zero-width space 531 checksum = re.sub(r'\W+', '', checksum).lower() 532 # Ensure the checksum portion is a hexdigest 533 try: 534 int(checksum, 16) 535 except ValueError: 536 module.fail_json(msg='The checksum format is invalid', **result) 537 538 if not dest_is_dir and os.path.exists(dest): 539 checksum_mismatch = False 540 541 # If the download is not forced and there is a checksum, allow 542 # checksum match to skip the download. 543 if not force and checksum != '': 544 destination_checksum = module.digest_from_file(dest, algorithm) 545 546 if checksum != destination_checksum: 547 checksum_mismatch = True 548 549 # Not forcing redownload, unless checksum does not match 550 if not force and checksum and not checksum_mismatch: 551 # Not forcing redownload, unless checksum does not match 552 # allow file attribute changes 553 file_args = module.load_file_common_arguments(module.params, path=dest) 554 result['changed'] = module.set_fs_attributes_if_different(file_args, False) 555 if result['changed']: 556 module.exit_json(msg="file already exists but file attributes changed", **result) 557 module.exit_json(msg="file already exists", **result) 558 559 # If the file already exists, prepare the last modified time for the 560 # request. 561 mtime = os.path.getmtime(dest) 562 last_mod_time = datetime.datetime.utcfromtimestamp(mtime) 563 564 # If the checksum does not match we have to force the download 565 # because last_mod_time may be newer than on remote 566 if checksum_mismatch: 567 force = True 568 569 # download to tmpsrc 570 start = datetime.datetime.utcnow() 571 tmpsrc, info = url_get(module, url, dest, use_proxy, last_mod_time, force, timeout, headers, tmp_dest) 572 result['elapsed'] = (datetime.datetime.utcnow() - start).seconds 573 result['src'] = tmpsrc 574 575 # Now the request has completed, we can finally generate the final 576 # destination file name from the info dict. 577 578 if dest_is_dir: 579 filename = extract_filename_from_headers(info) 580 if not filename: 581 # Fall back to extracting the filename from the URL. 582 # Pluck the URL from the info, since a redirect could have changed 583 # it. 584 filename = url_filename(info['url']) 585 dest = os.path.join(dest, filename) 586 result['dest'] = dest 587 588 # raise an error if there is no tmpsrc file 589 if not os.path.exists(tmpsrc): 590 os.remove(tmpsrc) 591 module.fail_json(msg="Request failed", status_code=info['status'], response=info['msg'], **result) 592 if not os.access(tmpsrc, os.R_OK): 593 os.remove(tmpsrc) 594 module.fail_json(msg="Source %s is not readable" % (tmpsrc), **result) 595 result['checksum_src'] = module.sha1(tmpsrc) 596 597 # check if there is no dest file 598 if os.path.exists(dest): 599 # raise an error if copy has no permission on dest 600 if not os.access(dest, os.W_OK): 601 os.remove(tmpsrc) 602 module.fail_json(msg="Destination %s is not writable" % (dest), **result) 603 if not os.access(dest, os.R_OK): 604 os.remove(tmpsrc) 605 module.fail_json(msg="Destination %s is not readable" % (dest), **result) 606 result['checksum_dest'] = module.sha1(dest) 607 else: 608 if not os.path.exists(os.path.dirname(dest)): 609 os.remove(tmpsrc) 610 module.fail_json(msg="Destination %s does not exist" % (os.path.dirname(dest)), **result) 611 if not os.access(os.path.dirname(dest), os.W_OK): 612 os.remove(tmpsrc) 613 module.fail_json(msg="Destination %s is not writable" % (os.path.dirname(dest)), **result) 614 615 if module.check_mode: 616 if os.path.exists(tmpsrc): 617 os.remove(tmpsrc) 618 result['changed'] = ('checksum_dest' not in result or 619 result['checksum_src'] != result['checksum_dest']) 620 module.exit_json(msg=info.get('msg', ''), **result) 621 622 backup_file = None 623 if result['checksum_src'] != result['checksum_dest']: 624 try: 625 if backup: 626 if os.path.exists(dest): 627 backup_file = module.backup_local(dest) 628 module.atomic_move(tmpsrc, dest, unsafe_writes=module.params['unsafe_writes']) 629 except Exception as e: 630 if os.path.exists(tmpsrc): 631 os.remove(tmpsrc) 632 module.fail_json(msg="failed to copy %s to %s: %s" % (tmpsrc, dest, to_native(e)), 633 exception=traceback.format_exc(), **result) 634 result['changed'] = True 635 else: 636 result['changed'] = False 637 if os.path.exists(tmpsrc): 638 os.remove(tmpsrc) 639 640 if checksum != '': 641 destination_checksum = module.digest_from_file(dest, algorithm) 642 643 if checksum != destination_checksum: 644 os.remove(dest) 645 module.fail_json(msg="The checksum for %s did not match %s; it was %s." % (dest, checksum, destination_checksum), **result) 646 647 # allow file attribute changes 648 file_args = module.load_file_common_arguments(module.params, path=dest) 649 result['changed'] = module.set_fs_attributes_if_different(file_args, result['changed']) 650 651 # Backwards compat only. We'll return None on FIPS enabled systems 652 try: 653 result['md5sum'] = module.md5(dest) 654 except ValueError: 655 result['md5sum'] = None 656 657 if backup_file: 658 result['backup_file'] = backup_file 659 660 # Mission complete 661 module.exit_json(msg=info.get('msg', ''), status_code=info.get('status', ''), **result) 662 663 664if __name__ == '__main__': 665 main() 666