1# -*- coding: utf-8 -*-
2# Based on code from the vispy project
3# Distributed under the (new) BSD License. See LICENSE.txt for more info.
5"""Data downloading and reading functions
8from math import log
9import os
10from os import path as op
11import sys
12import shutil
13import time
15from . import appdata_dir, resource_dirs
16from . import StdoutProgressIndicator, urlopen
19class InternetNotAllowedError(IOError):
20    """ Plugins that need resources can just use get_remote_file(), but
21    should catch this error and silently ignore it.
22    """
24    pass
27class NeedDownloadError(IOError):
28    """ Is raised when a remote file is requested that is not locally
29    available, but which needs to be explicitly downloaded by the user.
30    """
33def get_remote_file(fname, directory=None, force_download=False, auto=True):
34    """ Get a the filename for the local version of a file from the web
36    Parameters
37    ----------
38    fname : str
39        The relative filename on the remote data repository to download.
40        These correspond to paths on
41        ``https://github.com/imageio/imageio-binaries/``.
42    directory : str | None
43        The directory where the file will be cached if a download was
44        required to obtain the file. By default, the appdata directory
45        is used. This is also the first directory that is checked for
46        a local version of the file. If the directory does not exist,
47        it will be created.
48    force_download : bool | str
49        If True, the file will be downloaded even if a local copy exists
50        (and this copy will be overwritten). Can also be a YYYY-MM-DD date
51        to ensure a file is up-to-date (modified date of a file on disk,
52        if present, is checked).
53    auto : bool
54        Whether to auto-download the file if its not present locally. Default
55        True. If False and a download is needed, raises NeedDownloadError.
57    Returns
58    -------
59    fname : str
60        The path to the file on the local system.
61    """
62    _url_root = "https://github.com/imageio/imageio-binaries/raw/master/"
63    url = _url_root + fname
64    nfname = op.normcase(fname)  # convert to native
65    # Get dirs to look for the resource
66    given_directory = directory
67    directory = given_directory or appdata_dir("imageio")
68    dirs = resource_dirs()
69    dirs.insert(0, directory)  # Given dir has preference
70    # Try to find the resource locally
71    for dir in dirs:
72        filename = op.join(dir, nfname)
73        if op.isfile(filename):
74            if not force_download:  # we're done
75                if given_directory and given_directory != dir:
76                    filename2 = os.path.join(given_directory, nfname)
77                    # Make sure the output directory exists
78                    if not op.isdir(op.dirname(filename2)):
79                        os.makedirs(op.abspath(op.dirname(filename2)))
80                    shutil.copy(filename, filename2)
81                    return filename2
82                return filename
83            if isinstance(force_download, str):
84                ntime = time.strptime(force_download, "%Y-%m-%d")
85                ftime = time.gmtime(op.getctime(filename))
86                if ftime >= ntime:
87                    if given_directory and given_directory != dir:
88                        filename2 = os.path.join(given_directory, nfname)
89                        # Make sure the output directory exists
90                        if not op.isdir(op.dirname(filename2)):
91                            os.makedirs(op.abspath(op.dirname(filename2)))
92                        shutil.copy(filename, filename2)
93                        return filename2
94                    return filename
95                else:
96                    print("File older than %s, updating..." % force_download)
97                    break
99    # If we get here, we're going to try to download the file
100    if os.getenv("IMAGEIO_NO_INTERNET", "").lower() in ("1", "true", "yes"):
101        raise InternetNotAllowedError(
102            "Will not download resource from the "
103            "internet because environment variable "
104            "IMAGEIO_NO_INTERNET is set."
105        )
107    # Can we proceed with auto-download?
108    if not auto:
109        raise NeedDownloadError()
111    # Get filename to store to and make sure the dir exists
112    filename = op.join(directory, nfname)
113    if not op.isdir(op.dirname(filename)):
114        os.makedirs(op.abspath(op.dirname(filename)))
115    # let's go get the file
116    if os.getenv("CONTINUOUS_INTEGRATION", False):  # pragma: no cover
117        # On Travis, we retry a few times ...
118        for i in range(2):
119            try:
120                _fetch_file(url, filename)
121                return filename
122            except IOError:
123                time.sleep(0.5)
124        else:
125            _fetch_file(url, filename)
126            return filename
127    else:  # pragma: no cover
128        _fetch_file(url, filename)
129        return filename
132def _fetch_file(url, file_name, print_destination=True):
133    """Load requested file, downloading it if needed or requested
135    Parameters
136    ----------
137    url: string
138        The url of file to be downloaded.
139    file_name: string
140        Name, along with the path, of where downloaded file will be saved.
141    print_destination: bool, optional
142        If true, destination of where file was saved will be printed after
143        download finishes.
144    resume: bool, optional
145        If true, try to resume partially downloaded files.
146    """
147    # Adapted from NISL:
148    # https://github.com/nisl/tutorial/blob/master/nisl/datasets.py
150    print(
151        "Imageio: %r was not found on your computer; "
152        "downloading it now." % os.path.basename(file_name)
153    )
155    temp_file_name = file_name + ".part"
156    local_file = None
157    initial_size = 0
158    errors = []
159    for tries in range(4):
160        try:
161            # Checking file size and displaying it alongside the download url
162            remote_file = urlopen(url, timeout=5.0)
163            file_size = int(remote_file.headers["Content-Length"].strip())
164            size_str = _sizeof_fmt(file_size)
165            print("Try %i. Download from %s (%s)" % (tries + 1, url, size_str))
166            # Downloading data (can be extended to resume if need be)
167            local_file = open(temp_file_name, "wb")
168            _chunk_read(remote_file, local_file, initial_size=initial_size)
169            # temp file must be closed prior to the move
170            if not local_file.closed:
171                local_file.close()
172            shutil.move(temp_file_name, file_name)
173            if print_destination is True:
174                sys.stdout.write("File saved as %s.\n" % file_name)
175            break
176        except Exception as e:
177            errors.append(e)
178            print("Error while fetching file: %s." % str(e))
179        finally:
180            if local_file is not None:
181                if not local_file.closed:
182                    local_file.close()
183    else:
184        raise IOError(
185            "Unable to download %r. Perhaps there is a no internet "
186            "connection? If there is, please report this problem."
187            % os.path.basename(file_name)
188        )
191def _chunk_read(response, local_file, chunk_size=8192, initial_size=0):
192    """Download a file chunk by chunk and show advancement
194    Can also be used when resuming downloads over http.
196    Parameters
197    ----------
198    response: urllib.response.addinfourl
199        Response to the download request in order to get file size.
200    local_file: file
201        Hard disk file where data should be written.
202    chunk_size: integer, optional
203        Size of downloaded chunks. Default: 8192
204    initial_size: int, optional
205        If resuming, indicate the initial size of the file.
206    """
207    # Adapted from NISL:
208    # https://github.com/nisl/tutorial/blob/master/nisl/datasets.py
210    bytes_so_far = initial_size
211    # Returns only amount left to download when resuming, not the size of the
212    # entire file
213    total_size = int(response.headers["Content-Length"].strip())
214    total_size += initial_size
216    progress = StdoutProgressIndicator("Downloading")
217    progress.start("", "bytes", total_size)
219    while True:
220        chunk = response.read(chunk_size)
221        bytes_so_far += len(chunk)
222        if not chunk:
223            break
224        _chunk_write(chunk, local_file, progress)
225    progress.finish("Done")
228def _chunk_write(chunk, local_file, progress):
229    """Write a chunk to file and update the progress bar"""
230    local_file.write(chunk)
231    progress.increase_progress(len(chunk))
232    time.sleep(0)  # Give other threads a chance, e.g. those that handle stdout pipes
235def _sizeof_fmt(num):
236    """Turn number of bytes into human-readable str"""
237    units = ["bytes", "kB", "MB", "GB", "TB", "PB"]
238    decimals = [0, 0, 1, 2, 2, 2]
239    """Human friendly file size"""
240    if num > 1:
241        exponent = min(int(log(num, 1024)), len(units) - 1)
242        quotient = float(num) / 1024 ** exponent
243        unit = units[exponent]
244        num_decimals = decimals[exponent]
245        format_string = "{0:.%sf} {1}" % num_decimals
246        return format_string.format(quotient, unit)
247    return "0 bytes" if num == 0 else "1 byte"