1# Copyright (c) 2018 The Pooch Developers. 2# Distributed under the terms of the BSD 3-Clause License. 3# SPDX-License-Identifier: BSD-3-Clause 4# 5# This code is part of the Fatiando a Terra project (https://www.fatiando.org) 6# 7""" 8The main Pooch class and a factory function for it. 9""" 10import os 11import time 12import contextlib 13from pathlib import Path 14import shutil 15import ftplib 16 17import requests 18import requests.exceptions 19 20from .hashes import hash_matches, file_hash 21from .utils import ( 22 check_version, 23 parse_url, 24 get_logger, 25 make_local_storage, 26 cache_location, 27 temporary_file, 28 os_cache, 29 unique_file_name, 30) 31from .downloaders import choose_downloader 32 33 34def retrieve(url, known_hash, fname=None, path=None, processor=None, downloader=None): 35 """ 36 Download and cache a single file locally. 37 38 Uses HTTP or FTP by default, depending on the protocol in the given *url*. 39 Other download methods can be controlled through the *downloader* argument 40 (see below). 41 42 The file will be downloaded to a temporary location first and its hash will 43 be compared to the given *known_hash*. This is done to ensure that the 44 download happened correctly and securely. If the hash doesn't match, the 45 file will be deleted and an exception will be raised. 46 47 If the file already exists locally, its hash will be compared to 48 *known_hash*. If they are not the same, this is interpreted as the file 49 needing to be updated and it will be downloaded again. 50 51 You can bypass these checks by passing ``known_hash=None``. If this is 52 done, the SHA256 hash of the downloaded file will be logged to the screen. 53 It is highly recommended that you copy and paste this hash as *known_hash* 54 so that future downloads are guaranteed to be the exact same file. This is 55 crucial for reproducible computations. 56 57 If the file exists in the given *path* with the given *fname* and the hash 58 matches, it will not be downloaded and the absolute path to the file will 59 be returned. 60 61 .. note:: 62 63 This function is meant for downloading single files. If you need to 64 manage the download and caching of several files, with versioning, use 65 :func:`pooch.create` and :class:`pooch.Pooch` instead. 66 67 Parameters 68 ---------- 69 url : str 70 The URL to the file that is to be downloaded. Ideally, the URL should 71 end in a file name. 72 known_hash : str 73 A known hash (checksum) of the file. Will be used to verify the 74 download or check if an existing file needs to be updated. By default, 75 will assume it's a SHA256 hash. To specify a different hashing method, 76 prepend the hash with ``algorithm:``, for example 77 ``md5:pw9co2iun29juoh`` or ``sha1:092odwhi2ujdp2du2od2odh2wod2``. If 78 None, will NOT check the hash of the downloaded file or check if an 79 existing file needs to be updated. 80 fname : str or None 81 The name that will be used to save the file. Should NOT include the 82 full the path, just the file name (it will be appended to *path*). If 83 None, will create a unique file name using a combination of the last 84 part of the URL (assuming it's the file name) and the MD5 hash of the 85 URL. For example, ``81whdo2d2e928yd1wi22-data-file.csv``. This ensures 86 that files from different URLs never overwrite each other, even if they 87 have the same name. 88 path : str or PathLike or None 89 The location of the cache folder on disk. This is where the file will 90 be saved. If None, will save to a ``pooch`` folder in the default cache 91 location for your operating system (see :func:`pooch.os_cache`). 92 processor : None or callable 93 If not None, then a function (or callable object) that will be called 94 before returning the full path and after the file has been downloaded 95 (if required). See :ref:`processors` for details. 96 downloader : None or callable 97 If not None, then a function (or callable object) that will be called 98 to download a given URL to a provided local file name. See 99 :ref:`downloaders` for details. 100 101 Returns 102 ------- 103 full_path : str 104 The absolute path (including the file name) of the file in the local 105 storage. 106 107 Examples 108 -------- 109 110 Download one of the data files from the Pooch repository on GitHub: 111 112 >>> import os 113 >>> from pooch import __version__, check_version, retrieve 114 >>> # Make a URL for the version of pooch we have installed 115 >>> url = "https://github.com/fatiando/pooch/raw/{}/data/tiny-data.txt" 116 >>> url = url.format(check_version(__version__)) 117 >>> # Download the file and save it locally. Will check the MD5 checksum of 118 >>> # the downloaded file against the given value to make sure it's the 119 >>> # right file. You can use other hashes by specifying different 120 >>> # algorithm names (sha256, sha1, etc). 121 >>> fname = retrieve( 122 ... url, known_hash="md5:70e2afd3fd7e336ae478b1e740a5f08e", 123 ... ) 124 >>> with open(fname) as f: 125 ... print(f.read().strip()) 126 # A tiny data file for test purposes only 127 1 2 3 4 5 6 128 >>> # Running again won't trigger a download and only return the path to 129 >>> # the existing file. 130 >>> fname2 = retrieve( 131 ... url, known_hash="md5:70e2afd3fd7e336ae478b1e740a5f08e", 132 ... ) 133 >>> print(fname2 == fname) 134 True 135 >>> os.remove(fname) 136 137 Files that are compressed with gzip, xz/lzma, or bzip2 can be automatically 138 decompressed by passing using the :class:`pooch.Decompress` processor: 139 140 >>> from pooch import Decompress 141 >>> # URLs to a gzip compressed version of the data file. 142 >>> url = ("https://github.com/fatiando/pooch/raw/{}/" 143 ... + "pooch/tests/data/tiny-data.txt.gz") 144 >>> url = url.format(check_version(__version__)) 145 >>> # By default, you would have to decompress the file yourself 146 >>> fname = retrieve( 147 ... url, 148 ... known_hash="md5:8812ba10b6c7778014fdae81b03f9def", 149 ... ) 150 >>> print(os.path.splitext(fname)[1]) 151 .gz 152 >>> # Use the processor to decompress after download automatically and 153 >>> # return the path to the decompressed file instead. 154 >>> fname2 = retrieve( 155 ... url, 156 ... known_hash="md5:8812ba10b6c7778014fdae81b03f9def", 157 ... processor=Decompress(), 158 ... ) 159 >>> print(fname2 == fname) 160 False 161 >>> with open(fname2) as f: 162 ... print(f.read().strip()) 163 # A tiny data file for test purposes only 164 1 2 3 4 5 6 165 >>> os.remove(fname) 166 >>> os.remove(fname2) 167 168 When downloading archives (zip or tar), it can be useful to unpack them 169 after download to avoid having to do that yourself. Use the processors 170 :class:`pooch.Unzip` or :class:`pooch.Untar` to do this automatically: 171 172 >>> from pooch import Unzip 173 >>> # URLs to a zip archive with a single data file. 174 >>> url = ("https://github.com/fatiando/pooch/raw/{}/" 175 ... + "pooch/tests/data/tiny-data.zip") 176 >>> url = url.format(check_version(__version__)) 177 >>> # By default, you would get the path to the archive 178 >>> fname = retrieve( 179 ... url, 180 ... known_hash="md5:e9592cb46cf3514a1079051f8a148148", 181 ... ) 182 >>> print(os.path.splitext(fname)[1]) 183 .zip 184 >>> os.remove(fname) 185 >>> # Using the processor, the archive will be unzipped and a list with the 186 >>> # path to every file will be returned instead of a single path. 187 >>> fnames = retrieve( 188 ... url, 189 ... known_hash="md5:e9592cb46cf3514a1079051f8a148148", 190 ... processor=Unzip(), 191 ... ) 192 >>> # There was only a single file in our archive. 193 >>> print(len(fnames)) 194 1 195 >>> with open(fnames[0]) as f: 196 ... print(f.read().strip()) 197 # A tiny data file for test purposes only 198 1 2 3 4 5 6 199 >>> for f in fnames: 200 ... os.remove(f) 201 202 203 """ 204 if path is None: 205 path = os_cache("pooch") 206 if fname is None: 207 fname = unique_file_name(url) 208 # Create the local data directory if it doesn't already exist and make the 209 # path absolute. 210 path = cache_location(path, env=None, version=None) 211 make_local_storage(path) 212 213 full_path = path.resolve() / fname 214 action, verb = download_action(full_path, known_hash) 215 216 if action in ("download", "update"): 217 get_logger().info( 218 "%s data from '%s' to file '%s'.", 219 verb, 220 url, 221 str(full_path), 222 ) 223 224 if downloader is None: 225 downloader = choose_downloader(url) 226 227 stream_download(url, full_path, known_hash, downloader, pooch=None) 228 229 if known_hash is None: 230 get_logger().info( 231 "SHA256 hash of downloaded file: %s\n" 232 "Use this value as the 'known_hash' argument of 'pooch.retrieve'" 233 " to ensure that the file hasn't changed if it is downloaded again" 234 " in the future.", 235 file_hash(str(full_path)), 236 ) 237 238 if processor is not None: 239 return processor(str(full_path), action, None) 240 241 return str(full_path) 242 243 244def create( 245 path, 246 base_url, 247 version=None, 248 version_dev="master", 249 env=None, 250 registry=None, 251 urls=None, 252 retry_if_failed=0, 253): 254 """ 255 Create a :class:`~pooch.Pooch` with sensible defaults to fetch data files. 256 257 If a version string is given, the Pooch will be versioned, meaning that the 258 local storage folder and the base URL depend on the project version. This 259 is necessary if your users have multiple versions of your library installed 260 (using virtual environments) and you updated the data files between 261 versions. Otherwise, every time a user switches environments would trigger 262 a re-download of the data. The version string will be appended to the local 263 storage path (for example, ``~/.mypooch/cache/v0.1``) and inserted into the 264 base URL (for example, 265 ``https://github.com/fatiando/pooch/raw/v0.1/data``). If the version string 266 contains ``+XX.XXXXX``, it will be interpreted as a development version. 267 268 Does **not** create the local data storage folder. The folder will only be 269 created the first time a download is attempted with 270 :meth:`pooch.Pooch.fetch`. This makes it safe to use this function at the 271 module level (so it's executed on ``import`` and the resulting 272 :class:`~pooch.Pooch` is a global variable). 273 274 Parameters 275 ---------- 276 path : str, PathLike, list or tuple 277 The path to the local data storage folder. If this is a list or tuple, 278 we'll join the parts with the appropriate separator. The *version* will 279 be appended to the end of this path. Use :func:`pooch.os_cache` for a 280 sensible default. 281 base_url : str 282 Base URL for the remote data source. All requests will be made relative 283 to this URL. The string should have a ``{version}`` formatting mark in 284 it. We will call ``.format(version=version)`` on this string. If the 285 URL is a directory path, it must end in a ``'/'`` because we will not 286 include it. 287 version : str or None 288 The version string for your project. Should be PEP440 compatible. If 289 None is given, will not attempt to format *base_url* and no subfolder 290 will be appended to *path*. 291 version_dev : str 292 The name used for the development version of a project. If your data is 293 hosted on Github (and *base_url* is a Github raw link), then 294 ``"master"`` is a good choice (default). Ignored if *version* is None. 295 env : str or None 296 An environment variable that can be used to overwrite *path*. This 297 allows users to control where they want the data to be stored. We'll 298 append *version* to the end of this value as well. 299 registry : dict or None 300 A record of the files that are managed by this Pooch. Keys should be 301 the file names and the values should be their hashes. Only files 302 in the registry can be fetched from the local storage. Files in 303 subdirectories of *path* **must use Unix-style separators** (``'/'``) 304 even on Windows. 305 urls : dict or None 306 Custom URLs for downloading individual files in the registry. A 307 dictionary with the file names as keys and the custom URLs as values. 308 Not all files in *registry* need an entry in *urls*. If a file has an 309 entry in *urls*, the *base_url* will be ignored when downloading it in 310 favor of ``urls[fname]``. 311 retry_if_failed : int 312 Retry a file download the specified number of times if it fails because 313 of a bad connection or a hash mismatch. By default, downloads are only 314 attempted once (``retry_if_failed=0``). Initially, will wait for 1s 315 between retries and then increase the wait time by 1s with each retry 316 until a maximum of 10s. 317 318 Returns 319 ------- 320 pooch : :class:`~pooch.Pooch` 321 The :class:`~pooch.Pooch` initialized with the given arguments. 322 323 Examples 324 -------- 325 326 Create a :class:`~pooch.Pooch` for a release (v0.1): 327 328 >>> pup = create(path="myproject", 329 ... base_url="http://some.link.com/{version}/", 330 ... version="v0.1", 331 ... registry={"data.txt": "9081wo2eb2gc0u..."}) 332 >>> print(pup.path.parts) # The path is a pathlib.Path 333 ('myproject', 'v0.1') 334 >>> # The local folder is only created when a dataset is first downloaded 335 >>> print(pup.path.exists()) 336 False 337 >>> print(pup.base_url) 338 http://some.link.com/v0.1/ 339 >>> print(pup.registry) 340 {'data.txt': '9081wo2eb2gc0u...'} 341 >>> print(pup.registry_files) 342 ['data.txt'] 343 344 If this is a development version (12 commits ahead of v0.1), then the 345 ``version_dev`` will be used (defaults to ``"master"``): 346 347 >>> pup = create(path="myproject", 348 ... base_url="http://some.link.com/{version}/", 349 ... version="v0.1+12.do9iwd") 350 >>> print(pup.path.parts) 351 ('myproject', 'master') 352 >>> print(pup.base_url) 353 http://some.link.com/master/ 354 355 Versioning is optional (but highly encouraged): 356 357 >>> pup = create(path="myproject", 358 ... base_url="http://some.link.com/", 359 ... registry={"data.txt": "9081wo2eb2gc0u..."}) 360 >>> print(pup.path.parts) # The path is a pathlib.Path 361 ('myproject',) 362 >>> print(pup.base_url) 363 http://some.link.com/ 364 365 To place the storage folder at a subdirectory, pass in a list and we'll 366 join the path for you using the appropriate separator for your operating 367 system: 368 369 >>> pup = create(path=["myproject", "cache", "data"], 370 ... base_url="http://some.link.com/{version}/", 371 ... version="v0.1") 372 >>> print(pup.path.parts) 373 ('myproject', 'cache', 'data', 'v0.1') 374 375 The user can overwrite the storage path by setting an environment variable: 376 377 >>> # The variable is not set so we'll use *path* 378 >>> pup = create(path=["myproject", "not_from_env"], 379 ... base_url="http://some.link.com/{version}/", 380 ... version="v0.1", 381 ... env="MYPROJECT_DATA_DIR") 382 >>> print(pup.path.parts) 383 ('myproject', 'not_from_env', 'v0.1') 384 >>> # Set the environment variable and try again 385 >>> import os 386 >>> os.environ["MYPROJECT_DATA_DIR"] = os.path.join("myproject", "env") 387 >>> pup = create(path=["myproject", "not_env"], 388 ... base_url="http://some.link.com/{version}/", 389 ... version="v0.1", 390 ... env="MYPROJECT_DATA_DIR") 391 >>> print(pup.path.parts) 392 ('myproject', 'env', 'v0.1') 393 394 """ 395 if version is not None: 396 version = check_version(version, fallback=version_dev) 397 base_url = base_url.format(version=version) 398 # Don't create the cache folder here! This function is usually called in 399 # the module context (at import time), so touching the file system is not 400 # recommended. It could cause crashes when multiple processes/threads try 401 # to import at the same time (which would try to create the folder several 402 # times at once). 403 path = cache_location(path, env, version) 404 pup = Pooch( 405 path=path, 406 base_url=base_url, 407 registry=registry, 408 urls=urls, 409 retry_if_failed=retry_if_failed, 410 ) 411 return pup 412 413 414class Pooch: 415 """ 416 Manager for a local data storage that can fetch from a remote source. 417 418 Avoid creating ``Pooch`` instances directly. Use :func:`pooch.create` 419 instead. 420 421 Parameters 422 ---------- 423 path : str 424 The path to the local data storage folder. The path must exist in the 425 file system. 426 base_url : str 427 Base URL for the remote data source. All requests will be made relative 428 to this URL. 429 registry : dict or None 430 A record of the files that are managed by this good boy. Keys should be 431 the file names and the values should be their hashes. Only files 432 in the registry can be fetched from the local storage. Files in 433 subdirectories of *path* **must use Unix-style separators** (``'/'``) 434 even on Windows. 435 urls : dict or None 436 Custom URLs for downloading individual files in the registry. A 437 dictionary with the file names as keys and the custom URLs as values. 438 Not all files in *registry* need an entry in *urls*. If a file has an 439 entry in *urls*, the *base_url* will be ignored when downloading it in 440 favor of ``urls[fname]``. 441 retry_if_failed : int 442 Retry a file download the specified number of times if it fails because 443 of a bad connection or a hash mismatch. By default, downloads are only 444 attempted once (``retry_if_failed=0``). Initially, will wait for 1s 445 between retries and then increase the wait time by 1s with each retry 446 until a maximum of 10s. 447 448 """ 449 450 def __init__(self, path, base_url, registry=None, urls=None, retry_if_failed=0): 451 self.path = path 452 self.base_url = base_url 453 if registry is None: 454 registry = dict() 455 self.registry = registry 456 if urls is None: 457 urls = dict() 458 self.urls = dict(urls) 459 self.retry_if_failed = retry_if_failed 460 461 @property 462 def abspath(self): 463 "Absolute path to the local storage" 464 return Path(os.path.abspath(os.path.expanduser(str(self.path)))) 465 466 @property 467 def registry_files(self): 468 "List of file names on the registry" 469 return list(self.registry) 470 471 def fetch(self, fname, processor=None, downloader=None): 472 """ 473 Get the absolute path to a file in the local storage. 474 475 If it's not in the local storage, it will be downloaded. If the hash of 476 the file in local storage doesn't match the one in the registry, will 477 download a new copy of the file. This is considered a sign that the 478 file was updated in the remote storage. If the hash of the downloaded 479 file still doesn't match the one in the registry, will raise an 480 exception to warn of possible file corruption. 481 482 Post-processing actions sometimes need to be taken on downloaded files 483 (unzipping, conversion to a more efficient format, etc). If these 484 actions are time or memory consuming, it would be best to do this only 485 once right after the file is downloaded. Use the *processor* argument 486 to specify a function that is executed after the download to perform 487 these actions. See :ref:`processors` for details. 488 489 Custom file downloaders can be provided through the *downloader* 490 argument. By default, Pooch will determine the download protocol from 491 the URL in the registry. If the server for a given file requires 492 authentication (username and password), use a downloader that support 493 these features. Downloaders can also be used to print custom messages 494 (like a progress bar), etc. See :ref:`downloaders` for details. 495 496 Parameters 497 ---------- 498 fname : str 499 The file name (relative to the *base_url* of the remote data 500 storage) to fetch from the local storage. 501 processor : None or callable 502 If not None, then a function (or callable object) that will be 503 called before returning the full path and after the file has been 504 downloaded. See :ref:`processors` for details. 505 downloader : None or callable 506 If not None, then a function (or callable object) that will be 507 called to download a given URL to a provided local file name. See 508 :ref:`downloaders` for details. 509 510 Returns 511 ------- 512 full_path : str 513 The absolute path (including the file name) of the file in the 514 local storage. 515 516 """ 517 self._assert_file_in_registry(fname) 518 519 # Create the local data directory if it doesn't already exist 520 make_local_storage(str(self.abspath)) 521 522 url = self.get_url(fname) 523 full_path = self.abspath / fname 524 known_hash = self.registry[fname] 525 action, verb = download_action(full_path, known_hash) 526 527 if action in ("download", "update"): 528 get_logger().info( 529 "%s file '%s' from '%s' to '%s'.", 530 verb, 531 fname, 532 url, 533 str(self.abspath), 534 ) 535 536 if downloader is None: 537 downloader = choose_downloader(url) 538 539 stream_download( 540 url, 541 full_path, 542 known_hash, 543 downloader, 544 pooch=self, 545 retry_if_failed=self.retry_if_failed, 546 ) 547 548 if processor is not None: 549 return processor(str(full_path), action, self) 550 551 return str(full_path) 552 553 def _assert_file_in_registry(self, fname): 554 """ 555 Check if a file is in the registry and raise :class:`ValueError` if 556 it's not. 557 """ 558 if fname not in self.registry: 559 raise ValueError(f"File '{fname}' is not in the registry.") 560 561 def get_url(self, fname): 562 """ 563 Get the full URL to download a file in the registry. 564 565 Parameters 566 ---------- 567 fname : str 568 The file name (relative to the *base_url* of the remote data 569 storage) to fetch from the local storage. 570 571 """ 572 self._assert_file_in_registry(fname) 573 return self.urls.get(fname, "".join([self.base_url, fname])) 574 575 def load_registry(self, fname): 576 """ 577 Load entries from a file and add them to the registry. 578 579 Use this if you are managing many files. 580 581 Each line of the file should have file name and its hash separated by 582 a space. Hash can specify checksum algorithm using "alg:hash" format. 583 In case no algorithm is provided, SHA256 is used by default. 584 Only one file per line is allowed. Custom download URLs for individual 585 files can be specified as a third element on the line. Line comments 586 can be added and must be prepended with ``#``. 587 588 Parameters 589 ---------- 590 fname : str | fileobj 591 Path (or open file object) to the registry file. 592 593 """ 594 with contextlib.ExitStack() as stack: 595 if hasattr(fname, "read"): 596 # It's a file object 597 fin = fname 598 else: 599 # It's a file path 600 fin = stack.enter_context(open(fname)) 601 602 for linenum, line in enumerate(fin): 603 if isinstance(line, bytes): 604 line = line.decode("utf-8") 605 606 line = line.strip() 607 # skip line comments 608 if line.startswith("#"): 609 continue 610 611 elements = line.split() 612 if not len(elements) in [0, 2, 3]: 613 raise OSError( 614 f"Invalid entry in Pooch registry file '{fname}': " 615 f"expected 2 or 3 elements in line {linenum + 1} but got " 616 f"{len(elements)}. Offending entry: '{line}'" 617 ) 618 if elements: 619 file_name = elements[0] 620 file_checksum = elements[1] 621 if len(elements) == 3: 622 file_url = elements[2] 623 self.urls[file_name] = file_url 624 self.registry[file_name] = file_checksum.lower() 625 626 def is_available(self, fname): 627 """ 628 Check availability of a remote file without downloading it. 629 630 Use this method when working with large files to check if they are 631 available for download. 632 633 Parameters 634 ---------- 635 fname : str 636 The file name (relative to the *base_url* of the remote data 637 storage) to fetch from the local storage. 638 639 Returns 640 ------- 641 status : bool 642 True if the file is available for download. False otherwise. 643 644 """ 645 self._assert_file_in_registry(fname) 646 source = self.get_url(fname) 647 parsed_url = parse_url(source) 648 if parsed_url["protocol"] == "ftp": 649 directory, file_name = os.path.split(parsed_url["path"]) 650 ftp = ftplib.FTP() 651 ftp.connect(host=parsed_url["netloc"]) 652 try: 653 ftp.login() 654 available = file_name in ftp.nlst(directory) 655 finally: 656 ftp.close() 657 else: 658 response = requests.head(source, allow_redirects=True) 659 available = bool(response.status_code == 200) 660 return available 661 662 663def download_action(path, known_hash): 664 """ 665 Determine the action that is needed to get the file on disk. 666 667 Parameters 668 ---------- 669 path : PathLike 670 The path to the file on disk. 671 known_hash : str 672 A known hash (checksum) of the file. Will be used to verify the 673 download or check if an existing file needs to be updated. By default, 674 will assume it's a SHA256 hash. To specify a different hashing method, 675 prepend the hash with ``algorithm:``, for example 676 ``md5:pw9co2iun29juoh`` or ``sha1:092odwhi2ujdp2du2od2odh2wod2``. 677 678 Returns 679 ------- 680 action, verb : str 681 The action that must be taken and the English verb (infinitive form of 682 *action*) used in the log: 683 * ``'download'``: File does not exist locally and must be downloaded. 684 * ``'update'``: File exists locally but needs to be updated. 685 * ``'fetch'``: File exists locally and only need to inform its path. 686 687 688 """ 689 if not path.exists(): 690 action = "download" 691 verb = "Downloading" 692 elif not hash_matches(str(path), known_hash): 693 action = "update" 694 verb = "Updating" 695 else: 696 action = "fetch" 697 verb = "Fetching" 698 return action, verb 699 700 701def stream_download(url, fname, known_hash, downloader, pooch=None, retry_if_failed=0): 702 """ 703 Stream the file and check that its hash matches the known one. 704 705 The file is first downloaded to a temporary file name in the cache folder. 706 It will be moved to the desired file name only if the hash matches the 707 known hash. Otherwise, the temporary file is deleted. 708 709 If the download fails for either a bad connection or a hash mismatch, we 710 will retry the download the specified number of times in case the failure 711 was due to a network error. 712 """ 713 # Ensure the parent directory exists in case the file is in a subdirectory. 714 # Otherwise, move will cause an error. 715 if not fname.parent.exists(): 716 os.makedirs(str(fname.parent)) 717 download_attempts = 1 + retry_if_failed 718 max_wait = 10 719 for i in range(download_attempts): 720 try: 721 # Stream the file to a temporary so that we can safely check its 722 # hash before overwriting the original. 723 with temporary_file(path=str(fname.parent)) as tmp: 724 downloader(url, tmp, pooch) 725 hash_matches(tmp, known_hash, strict=True, source=str(fname.name)) 726 shutil.move(tmp, str(fname)) 727 break 728 except (ValueError, requests.exceptions.RequestException): 729 if i == download_attempts - 1: 730 raise 731 retries_left = download_attempts - (i + 1) 732 get_logger().info( 733 "Failed to download '%s'. " 734 "Will attempt the download again %d more time%s.", 735 str(fname.name), 736 retries_left, 737 "s" if retries_left > 1 else "", 738 ) 739 time.sleep(min(i + 1, max_wait)) 740