1# Copyright (c) 2018 The Pooch Developers.
2# Distributed under the terms of the BSD 3-Clause License.
3# SPDX-License-Identifier: BSD-3-Clause
4#
5# This code is part of the Fatiando a Terra project (https://www.fatiando.org)
6#
7"""
8The main Pooch class and a factory function for it.
9"""
10import os
11import time
12import contextlib
13from pathlib import Path
14import shutil
15import ftplib
16
17import requests
18import requests.exceptions
19
20from .hashes import hash_matches, file_hash
21from .utils import (
22    check_version,
23    parse_url,
24    get_logger,
25    make_local_storage,
26    cache_location,
27    temporary_file,
28    os_cache,
29    unique_file_name,
30)
31from .downloaders import choose_downloader
32
33
34def retrieve(url, known_hash, fname=None, path=None, processor=None, downloader=None):
35    """
36    Download and cache a single file locally.
37
38    Uses HTTP or FTP by default, depending on the protocol in the given *url*.
39    Other download methods can be controlled through the *downloader* argument
40    (see below).
41
42    The file will be downloaded to a temporary location first and its hash will
43    be compared to the given *known_hash*. This is done to ensure that the
44    download happened correctly and securely. If the hash doesn't match, the
45    file will be deleted and an exception will be raised.
46
47    If the file already exists locally, its hash will be compared to
48    *known_hash*. If they are not the same, this is interpreted as the file
49    needing to be updated and it will be downloaded again.
50
51    You can bypass these checks by passing ``known_hash=None``. If this is
52    done, the SHA256 hash of the downloaded file will be logged to the screen.
53    It is highly recommended that you copy and paste this hash as *known_hash*
54    so that future downloads are guaranteed to be the exact same file. This is
55    crucial for reproducible computations.
56
57    If the file exists in the given *path* with the given *fname* and the hash
58    matches, it will not be downloaded and the absolute path to the file will
59    be returned.
60
61    .. note::
62
63        This function is meant for downloading single files. If you need to
64        manage the download and caching of several files, with versioning, use
65        :func:`pooch.create` and :class:`pooch.Pooch` instead.
66
67    Parameters
68    ----------
69    url : str
70        The URL to the file that is to be downloaded. Ideally, the URL should
71        end in a file name.
72    known_hash : str
73        A known hash (checksum) of the file. Will be used to verify the
74        download or check if an existing file needs to be updated. By default,
75        will assume it's a SHA256 hash. To specify a different hashing method,
76        prepend the hash with ``algorithm:``, for example
77        ``md5:pw9co2iun29juoh`` or ``sha1:092odwhi2ujdp2du2od2odh2wod2``. If
78        None, will NOT check the hash of the downloaded file or check if an
79        existing file needs to be updated.
80    fname : str or None
81        The name that will be used to save the file. Should NOT include the
82        full the path, just the file name (it will be appended to *path*). If
83        None, will create a unique file name using a combination of the last
84        part of the URL (assuming it's the file name) and the MD5 hash of the
85        URL. For example, ``81whdo2d2e928yd1wi22-data-file.csv``. This ensures
86        that files from different URLs never overwrite each other, even if they
87        have the same name.
88    path : str or PathLike or None
89        The location of the cache folder on disk. This is where the file will
90        be saved. If None, will save to a ``pooch`` folder in the default cache
91        location for your operating system (see :func:`pooch.os_cache`).
92    processor : None or callable
93        If not None, then a function (or callable object) that will be called
94        before returning the full path and after the file has been downloaded
95        (if required). See :ref:`processors` for details.
96    downloader : None or callable
97        If not None, then a function (or callable object) that will be called
98        to download a given URL to a provided local file name. See
99        :ref:`downloaders` for details.
100
101    Returns
102    -------
103    full_path : str
104        The absolute path (including the file name) of the file in the local
105        storage.
106
107    Examples
108    --------
109
110    Download one of the data files from the Pooch repository on GitHub:
111
112    >>> import os
113    >>> from pooch import __version__, check_version, retrieve
114    >>> # Make a URL for the version of pooch we have installed
115    >>> url = "https://github.com/fatiando/pooch/raw/{}/data/tiny-data.txt"
116    >>> url = url.format(check_version(__version__))
117    >>> # Download the file and save it locally. Will check the MD5 checksum of
118    >>> # the downloaded file against the given value to make sure it's the
119    >>> # right file. You can use other hashes by specifying different
120    >>> # algorithm names (sha256, sha1, etc).
121    >>> fname = retrieve(
122    ...     url, known_hash="md5:70e2afd3fd7e336ae478b1e740a5f08e",
123    ... )
124    >>> with open(fname) as f:
125    ...     print(f.read().strip())
126    # A tiny data file for test purposes only
127    1  2  3  4  5  6
128    >>> # Running again won't trigger a download and only return the path to
129    >>> # the existing file.
130    >>> fname2 = retrieve(
131    ...     url, known_hash="md5:70e2afd3fd7e336ae478b1e740a5f08e",
132    ... )
133    >>> print(fname2 == fname)
134    True
135    >>> os.remove(fname)
136
137    Files that are compressed with gzip, xz/lzma, or bzip2 can be automatically
138    decompressed by passing using the :class:`pooch.Decompress` processor:
139
140    >>> from pooch import Decompress
141    >>> # URLs to a gzip compressed version of the data file.
142    >>> url = ("https://github.com/fatiando/pooch/raw/{}/"
143    ...        + "pooch/tests/data/tiny-data.txt.gz")
144    >>> url = url.format(check_version(__version__))
145    >>> # By default, you would have to decompress the file yourself
146    >>> fname = retrieve(
147    ...     url,
148    ...     known_hash="md5:8812ba10b6c7778014fdae81b03f9def",
149    ... )
150    >>> print(os.path.splitext(fname)[1])
151    .gz
152    >>> # Use the processor to decompress after download automatically and
153    >>> # return the path to the decompressed file instead.
154    >>> fname2 = retrieve(
155    ...     url,
156    ...     known_hash="md5:8812ba10b6c7778014fdae81b03f9def",
157    ...     processor=Decompress(),
158    ... )
159    >>> print(fname2 == fname)
160    False
161    >>> with open(fname2) as f:
162    ...     print(f.read().strip())
163    # A tiny data file for test purposes only
164    1  2  3  4  5  6
165    >>> os.remove(fname)
166    >>> os.remove(fname2)
167
168    When downloading archives (zip or tar), it can be useful to unpack them
169    after download to avoid having to do that yourself. Use the processors
170    :class:`pooch.Unzip` or :class:`pooch.Untar` to do this automatically:
171
172    >>> from pooch import Unzip
173    >>> # URLs to a zip archive with a single data file.
174    >>> url = ("https://github.com/fatiando/pooch/raw/{}/"
175    ...        + "pooch/tests/data/tiny-data.zip")
176    >>> url = url.format(check_version(__version__))
177    >>> # By default, you would get the path to the archive
178    >>> fname = retrieve(
179    ...     url,
180    ...     known_hash="md5:e9592cb46cf3514a1079051f8a148148",
181    ... )
182    >>> print(os.path.splitext(fname)[1])
183    .zip
184    >>> os.remove(fname)
185    >>> # Using the processor, the archive will be unzipped and a list with the
186    >>> # path to every file will be returned instead of a single path.
187    >>> fnames = retrieve(
188    ...     url,
189    ...     known_hash="md5:e9592cb46cf3514a1079051f8a148148",
190    ...     processor=Unzip(),
191    ... )
192    >>> # There was only a single file in our archive.
193    >>> print(len(fnames))
194    1
195    >>> with open(fnames[0]) as f:
196    ...     print(f.read().strip())
197    # A tiny data file for test purposes only
198    1  2  3  4  5  6
199    >>> for f in fnames:
200    ...     os.remove(f)
201
202
203    """
204    if path is None:
205        path = os_cache("pooch")
206    if fname is None:
207        fname = unique_file_name(url)
208    # Create the local data directory if it doesn't already exist and make the
209    # path absolute.
210    path = cache_location(path, env=None, version=None)
211    make_local_storage(path)
212
213    full_path = path.resolve() / fname
214    action, verb = download_action(full_path, known_hash)
215
216    if action in ("download", "update"):
217        get_logger().info(
218            "%s data from '%s' to file '%s'.",
219            verb,
220            url,
221            str(full_path),
222        )
223
224        if downloader is None:
225            downloader = choose_downloader(url)
226
227        stream_download(url, full_path, known_hash, downloader, pooch=None)
228
229        if known_hash is None:
230            get_logger().info(
231                "SHA256 hash of downloaded file: %s\n"
232                "Use this value as the 'known_hash' argument of 'pooch.retrieve'"
233                " to ensure that the file hasn't changed if it is downloaded again"
234                " in the future.",
235                file_hash(str(full_path)),
236            )
237
238    if processor is not None:
239        return processor(str(full_path), action, None)
240
241    return str(full_path)
242
243
244def create(
245    path,
246    base_url,
247    version=None,
248    version_dev="master",
249    env=None,
250    registry=None,
251    urls=None,
252    retry_if_failed=0,
253):
254    """
255    Create a :class:`~pooch.Pooch` with sensible defaults to fetch data files.
256
257    If a version string is given, the Pooch will be versioned, meaning that the
258    local storage folder and the base URL depend on the project version. This
259    is necessary if your users have multiple versions of your library installed
260    (using virtual environments) and you updated the data files between
261    versions. Otherwise, every time a user switches environments would trigger
262    a re-download of the data. The version string will be appended to the local
263    storage path (for example, ``~/.mypooch/cache/v0.1``) and inserted into the
264    base URL (for example,
265    ``https://github.com/fatiando/pooch/raw/v0.1/data``). If the version string
266    contains ``+XX.XXXXX``, it will be interpreted as a development version.
267
268    Does **not** create the local data storage folder. The folder will only be
269    created the first time a download is attempted with
270    :meth:`pooch.Pooch.fetch`. This makes it safe to use this function at the
271    module level (so it's executed on ``import`` and the resulting
272    :class:`~pooch.Pooch` is a global variable).
273
274    Parameters
275    ----------
276    path : str, PathLike, list or tuple
277        The path to the local data storage folder. If this is a list or tuple,
278        we'll join the parts with the appropriate separator. The *version* will
279        be appended to the end of this path. Use :func:`pooch.os_cache` for a
280        sensible default.
281    base_url : str
282        Base URL for the remote data source. All requests will be made relative
283        to this URL. The string should have a ``{version}`` formatting mark in
284        it. We will call ``.format(version=version)`` on this string. If the
285        URL is a directory path, it must end in a ``'/'`` because we will not
286        include it.
287    version : str or None
288        The version string for your project. Should be PEP440 compatible. If
289        None is given, will not attempt to format *base_url* and no subfolder
290        will be appended to *path*.
291    version_dev : str
292        The name used for the development version of a project. If your data is
293        hosted on Github (and *base_url* is a Github raw link), then
294        ``"master"`` is a good choice (default). Ignored if *version* is None.
295    env : str or None
296        An environment variable that can be used to overwrite *path*. This
297        allows users to control where they want the data to be stored. We'll
298        append *version* to the end of this value as well.
299    registry : dict or None
300        A record of the files that are managed by this Pooch. Keys should be
301        the file names and the values should be their hashes. Only files
302        in the registry can be fetched from the local storage. Files in
303        subdirectories of *path* **must use Unix-style separators** (``'/'``)
304        even on Windows.
305    urls : dict or None
306        Custom URLs for downloading individual files in the registry. A
307        dictionary with the file names as keys and the custom URLs as values.
308        Not all files in *registry* need an entry in *urls*. If a file has an
309        entry in *urls*, the *base_url* will be ignored when downloading it in
310        favor of ``urls[fname]``.
311    retry_if_failed : int
312        Retry a file download the specified number of times if it fails because
313        of a bad connection or a hash mismatch. By default, downloads are only
314        attempted once (``retry_if_failed=0``). Initially, will wait for 1s
315        between retries and then increase the wait time by 1s with each retry
316        until a maximum of 10s.
317
318    Returns
319    -------
320    pooch : :class:`~pooch.Pooch`
321        The :class:`~pooch.Pooch` initialized with the given arguments.
322
323    Examples
324    --------
325
326    Create a :class:`~pooch.Pooch` for a release (v0.1):
327
328    >>> pup = create(path="myproject",
329    ...              base_url="http://some.link.com/{version}/",
330    ...              version="v0.1",
331    ...              registry={"data.txt": "9081wo2eb2gc0u..."})
332    >>> print(pup.path.parts)  # The path is a pathlib.Path
333    ('myproject', 'v0.1')
334    >>> # The local folder is only created when a dataset is first downloaded
335    >>> print(pup.path.exists())
336    False
337    >>> print(pup.base_url)
338    http://some.link.com/v0.1/
339    >>> print(pup.registry)
340    {'data.txt': '9081wo2eb2gc0u...'}
341    >>> print(pup.registry_files)
342    ['data.txt']
343
344    If this is a development version (12 commits ahead of v0.1), then the
345    ``version_dev`` will be used (defaults to ``"master"``):
346
347    >>> pup = create(path="myproject",
348    ...              base_url="http://some.link.com/{version}/",
349    ...              version="v0.1+12.do9iwd")
350    >>> print(pup.path.parts)
351    ('myproject', 'master')
352    >>> print(pup.base_url)
353    http://some.link.com/master/
354
355    Versioning is optional (but highly encouraged):
356
357    >>> pup = create(path="myproject",
358    ...              base_url="http://some.link.com/",
359    ...              registry={"data.txt": "9081wo2eb2gc0u..."})
360    >>> print(pup.path.parts)  # The path is a pathlib.Path
361    ('myproject',)
362    >>> print(pup.base_url)
363    http://some.link.com/
364
365    To place the storage folder at a subdirectory, pass in a list and we'll
366    join the path for you using the appropriate separator for your operating
367    system:
368
369    >>> pup = create(path=["myproject", "cache", "data"],
370    ...              base_url="http://some.link.com/{version}/",
371    ...              version="v0.1")
372    >>> print(pup.path.parts)
373    ('myproject', 'cache', 'data', 'v0.1')
374
375    The user can overwrite the storage path by setting an environment variable:
376
377    >>> # The variable is not set so we'll use *path*
378    >>> pup = create(path=["myproject", "not_from_env"],
379    ...              base_url="http://some.link.com/{version}/",
380    ...              version="v0.1",
381    ...              env="MYPROJECT_DATA_DIR")
382    >>> print(pup.path.parts)
383    ('myproject', 'not_from_env', 'v0.1')
384    >>> # Set the environment variable and try again
385    >>> import os
386    >>> os.environ["MYPROJECT_DATA_DIR"] = os.path.join("myproject", "env")
387    >>> pup = create(path=["myproject", "not_env"],
388    ...              base_url="http://some.link.com/{version}/",
389    ...              version="v0.1",
390    ...              env="MYPROJECT_DATA_DIR")
391    >>> print(pup.path.parts)
392    ('myproject', 'env', 'v0.1')
393
394    """
395    if version is not None:
396        version = check_version(version, fallback=version_dev)
397        base_url = base_url.format(version=version)
398    # Don't create the cache folder here! This function is usually called in
399    # the module context (at import time), so touching the file system is not
400    # recommended. It could cause crashes when multiple processes/threads try
401    # to import at the same time (which would try to create the folder several
402    # times at once).
403    path = cache_location(path, env, version)
404    pup = Pooch(
405        path=path,
406        base_url=base_url,
407        registry=registry,
408        urls=urls,
409        retry_if_failed=retry_if_failed,
410    )
411    return pup
412
413
414class Pooch:
415    """
416    Manager for a local data storage that can fetch from a remote source.
417
418    Avoid creating ``Pooch`` instances directly. Use :func:`pooch.create`
419    instead.
420
421    Parameters
422    ----------
423    path : str
424        The path to the local data storage folder. The path must exist in the
425        file system.
426    base_url : str
427        Base URL for the remote data source. All requests will be made relative
428        to this URL.
429    registry : dict or None
430        A record of the files that are managed by this good boy. Keys should be
431        the file names and the values should be their hashes. Only files
432        in the registry can be fetched from the local storage. Files in
433        subdirectories of *path* **must use Unix-style separators** (``'/'``)
434        even on Windows.
435    urls : dict or None
436        Custom URLs for downloading individual files in the registry. A
437        dictionary with the file names as keys and the custom URLs as values.
438        Not all files in *registry* need an entry in *urls*. If a file has an
439        entry in *urls*, the *base_url* will be ignored when downloading it in
440        favor of ``urls[fname]``.
441    retry_if_failed : int
442        Retry a file download the specified number of times if it fails because
443        of a bad connection or a hash mismatch. By default, downloads are only
444        attempted once (``retry_if_failed=0``). Initially, will wait for 1s
445        between retries and then increase the wait time by 1s with each retry
446        until a maximum of 10s.
447
448    """
449
450    def __init__(self, path, base_url, registry=None, urls=None, retry_if_failed=0):
451        self.path = path
452        self.base_url = base_url
453        if registry is None:
454            registry = dict()
455        self.registry = registry
456        if urls is None:
457            urls = dict()
458        self.urls = dict(urls)
459        self.retry_if_failed = retry_if_failed
460
461    @property
462    def abspath(self):
463        "Absolute path to the local storage"
464        return Path(os.path.abspath(os.path.expanduser(str(self.path))))
465
466    @property
467    def registry_files(self):
468        "List of file names on the registry"
469        return list(self.registry)
470
471    def fetch(self, fname, processor=None, downloader=None):
472        """
473        Get the absolute path to a file in the local storage.
474
475        If it's not in the local storage, it will be downloaded. If the hash of
476        the file in local storage doesn't match the one in the registry, will
477        download a new copy of the file. This is considered a sign that the
478        file was updated in the remote storage. If the hash of the downloaded
479        file still doesn't match the one in the registry, will raise an
480        exception to warn of possible file corruption.
481
482        Post-processing actions sometimes need to be taken on downloaded files
483        (unzipping, conversion to a more efficient format, etc). If these
484        actions are time or memory consuming, it would be best to do this only
485        once right after the file is downloaded. Use the *processor* argument
486        to specify a function that is executed after the download to perform
487        these actions. See :ref:`processors` for details.
488
489        Custom file downloaders can be provided through the *downloader*
490        argument. By default, Pooch will determine the download protocol from
491        the URL in the registry. If the server for a given file requires
492        authentication (username and password), use a downloader that support
493        these features. Downloaders can also be used to print custom messages
494        (like a progress bar), etc. See :ref:`downloaders` for details.
495
496        Parameters
497        ----------
498        fname : str
499            The file name (relative to the *base_url* of the remote data
500            storage) to fetch from the local storage.
501        processor : None or callable
502            If not None, then a function (or callable object) that will be
503            called before returning the full path and after the file has been
504            downloaded. See :ref:`processors` for details.
505        downloader : None or callable
506            If not None, then a function (or callable object) that will be
507            called to download a given URL to a provided local file name. See
508            :ref:`downloaders` for details.
509
510        Returns
511        -------
512        full_path : str
513            The absolute path (including the file name) of the file in the
514            local storage.
515
516        """
517        self._assert_file_in_registry(fname)
518
519        # Create the local data directory if it doesn't already exist
520        make_local_storage(str(self.abspath))
521
522        url = self.get_url(fname)
523        full_path = self.abspath / fname
524        known_hash = self.registry[fname]
525        action, verb = download_action(full_path, known_hash)
526
527        if action in ("download", "update"):
528            get_logger().info(
529                "%s file '%s' from '%s' to '%s'.",
530                verb,
531                fname,
532                url,
533                str(self.abspath),
534            )
535
536            if downloader is None:
537                downloader = choose_downloader(url)
538
539            stream_download(
540                url,
541                full_path,
542                known_hash,
543                downloader,
544                pooch=self,
545                retry_if_failed=self.retry_if_failed,
546            )
547
548        if processor is not None:
549            return processor(str(full_path), action, self)
550
551        return str(full_path)
552
553    def _assert_file_in_registry(self, fname):
554        """
555        Check if a file is in the registry and raise :class:`ValueError` if
556        it's not.
557        """
558        if fname not in self.registry:
559            raise ValueError(f"File '{fname}' is not in the registry.")
560
561    def get_url(self, fname):
562        """
563        Get the full URL to download a file in the registry.
564
565        Parameters
566        ----------
567        fname : str
568            The file name (relative to the *base_url* of the remote data
569            storage) to fetch from the local storage.
570
571        """
572        self._assert_file_in_registry(fname)
573        return self.urls.get(fname, "".join([self.base_url, fname]))
574
575    def load_registry(self, fname):
576        """
577        Load entries from a file and add them to the registry.
578
579        Use this if you are managing many files.
580
581        Each line of the file should have file name and its hash separated by
582        a space. Hash can specify checksum algorithm using "alg:hash" format.
583        In case no algorithm is provided, SHA256 is used by default.
584        Only one file per line is allowed. Custom download URLs for individual
585        files can be specified as a third element on the line. Line comments
586        can be added and must be prepended with ``#``.
587
588        Parameters
589        ----------
590        fname : str | fileobj
591            Path (or open file object) to the registry file.
592
593        """
594        with contextlib.ExitStack() as stack:
595            if hasattr(fname, "read"):
596                # It's a file object
597                fin = fname
598            else:
599                # It's a file path
600                fin = stack.enter_context(open(fname))
601
602            for linenum, line in enumerate(fin):
603                if isinstance(line, bytes):
604                    line = line.decode("utf-8")
605
606                line = line.strip()
607                # skip line comments
608                if line.startswith("#"):
609                    continue
610
611                elements = line.split()
612                if not len(elements) in [0, 2, 3]:
613                    raise OSError(
614                        f"Invalid entry in Pooch registry file '{fname}': "
615                        f"expected 2 or 3 elements in line {linenum + 1} but got "
616                        f"{len(elements)}. Offending entry: '{line}'"
617                    )
618                if elements:
619                    file_name = elements[0]
620                    file_checksum = elements[1]
621                    if len(elements) == 3:
622                        file_url = elements[2]
623                        self.urls[file_name] = file_url
624                    self.registry[file_name] = file_checksum.lower()
625
626    def is_available(self, fname):
627        """
628        Check availability of a remote file without downloading it.
629
630        Use this method when working with large files to check if they are
631        available for download.
632
633        Parameters
634        ----------
635        fname : str
636            The file name (relative to the *base_url* of the remote data
637            storage) to fetch from the local storage.
638
639        Returns
640        -------
641        status : bool
642            True if the file is available for download. False otherwise.
643
644        """
645        self._assert_file_in_registry(fname)
646        source = self.get_url(fname)
647        parsed_url = parse_url(source)
648        if parsed_url["protocol"] == "ftp":
649            directory, file_name = os.path.split(parsed_url["path"])
650            ftp = ftplib.FTP()
651            ftp.connect(host=parsed_url["netloc"])
652            try:
653                ftp.login()
654                available = file_name in ftp.nlst(directory)
655            finally:
656                ftp.close()
657        else:
658            response = requests.head(source, allow_redirects=True)
659            available = bool(response.status_code == 200)
660        return available
661
662
663def download_action(path, known_hash):
664    """
665    Determine the action that is needed to get the file on disk.
666
667    Parameters
668    ----------
669    path : PathLike
670        The path to the file on disk.
671    known_hash : str
672        A known hash (checksum) of the file. Will be used to verify the
673        download or check if an existing file needs to be updated. By default,
674        will assume it's a SHA256 hash. To specify a different hashing method,
675        prepend the hash with ``algorithm:``, for example
676        ``md5:pw9co2iun29juoh`` or ``sha1:092odwhi2ujdp2du2od2odh2wod2``.
677
678    Returns
679    -------
680    action, verb : str
681        The action that must be taken and the English verb (infinitive form of
682        *action*) used in the log:
683        * ``'download'``: File does not exist locally and must be downloaded.
684        * ``'update'``: File exists locally but needs to be updated.
685        * ``'fetch'``: File exists locally and only need to inform its path.
686
687
688    """
689    if not path.exists():
690        action = "download"
691        verb = "Downloading"
692    elif not hash_matches(str(path), known_hash):
693        action = "update"
694        verb = "Updating"
695    else:
696        action = "fetch"
697        verb = "Fetching"
698    return action, verb
699
700
701def stream_download(url, fname, known_hash, downloader, pooch=None, retry_if_failed=0):
702    """
703    Stream the file and check that its hash matches the known one.
704
705    The file is first downloaded to a temporary file name in the cache folder.
706    It will be moved to the desired file name only if the hash matches the
707    known hash. Otherwise, the temporary file is deleted.
708
709    If the download fails for either a bad connection or a hash mismatch, we
710    will retry the download the specified number of times in case the failure
711    was due to a network error.
712    """
713    # Ensure the parent directory exists in case the file is in a subdirectory.
714    # Otherwise, move will cause an error.
715    if not fname.parent.exists():
716        os.makedirs(str(fname.parent))
717    download_attempts = 1 + retry_if_failed
718    max_wait = 10
719    for i in range(download_attempts):
720        try:
721            # Stream the file to a temporary so that we can safely check its
722            # hash before overwriting the original.
723            with temporary_file(path=str(fname.parent)) as tmp:
724                downloader(url, tmp, pooch)
725                hash_matches(tmp, known_hash, strict=True, source=str(fname.name))
726                shutil.move(tmp, str(fname))
727            break
728        except (ValueError, requests.exceptions.RequestException):
729            if i == download_attempts - 1:
730                raise
731            retries_left = download_attempts - (i + 1)
732            get_logger().info(
733                "Failed to download '%s'. "
734                "Will attempt the download again %d more time%s.",
735                str(fname.name),
736                retries_left,
737                "s" if retries_left > 1 else "",
738            )
739            time.sleep(min(i + 1, max_wait))
740