1# -*- coding: utf-8 -*-
2# Based on code from the vispy project
3# Distributed under the (new) BSD License. See LICENSE.txt for more info.
4
5"""Data downloading and reading functions
6"""
7
8from math import log
9import os
10from os import path as op
11import sys
12import shutil
13import time
14
15from . import appdata_dir, resource_dirs
16from . import StdoutProgressIndicator, urlopen
17
18
19class InternetNotAllowedError(IOError):
20    """ Plugins that need resources can just use get_remote_file(), but
21    should catch this error and silently ignore it.
22    """
23
24    pass
25
26
27class NeedDownloadError(IOError):
28    """ Is raised when a remote file is requested that is not locally
29    available, but which needs to be explicitly downloaded by the user.
30    """
31
32
33def get_remote_file(fname, directory=None, force_download=False, auto=True):
34    """ Get a the filename for the local version of a file from the web
35
36    Parameters
37    ----------
38    fname : str
39        The relative filename on the remote data repository to download.
40        These correspond to paths on
41        ``https://github.com/imageio/imageio-binaries/``.
42    directory : str | None
43        The directory where the file will be cached if a download was
44        required to obtain the file. By default, the appdata directory
45        is used. This is also the first directory that is checked for
46        a local version of the file. If the directory does not exist,
47        it will be created.
48    force_download : bool | str
49        If True, the file will be downloaded even if a local copy exists
50        (and this copy will be overwritten). Can also be a YYYY-MM-DD date
51        to ensure a file is up-to-date (modified date of a file on disk,
52        if present, is checked).
53    auto : bool
54        Whether to auto-download the file if its not present locally. Default
55        True. If False and a download is needed, raises NeedDownloadError.
56
57    Returns
58    -------
59    fname : str
60        The path to the file on the local system.
61    """
62    _url_root = "https://github.com/imageio/imageio-binaries/raw/master/"
63    url = _url_root + fname
64    nfname = op.normcase(fname)  # convert to native
65    # Get dirs to look for the resource
66    given_directory = directory
67    directory = given_directory or appdata_dir("imageio")
68    dirs = resource_dirs()
69    dirs.insert(0, directory)  # Given dir has preference
70    # Try to find the resource locally
71    for dir in dirs:
72        filename = op.join(dir, nfname)
73        if op.isfile(filename):
74            if not force_download:  # we're done
75                if given_directory and given_directory != dir:
76                    filename2 = os.path.join(given_directory, nfname)
77                    # Make sure the output directory exists
78                    if not op.isdir(op.dirname(filename2)):
79                        os.makedirs(op.abspath(op.dirname(filename2)))
80                    shutil.copy(filename, filename2)
81                    return filename2
82                return filename
83            if isinstance(force_download, str):
84                ntime = time.strptime(force_download, "%Y-%m-%d")
85                ftime = time.gmtime(op.getctime(filename))
86                if ftime >= ntime:
87                    if given_directory and given_directory != dir:
88                        filename2 = os.path.join(given_directory, nfname)
89                        # Make sure the output directory exists
90                        if not op.isdir(op.dirname(filename2)):
91                            os.makedirs(op.abspath(op.dirname(filename2)))
92                        shutil.copy(filename, filename2)
93                        return filename2
94                    return filename
95                else:
96                    print("File older than %s, updating..." % force_download)
97                    break
98
99    # If we get here, we're going to try to download the file
100    if os.getenv("IMAGEIO_NO_INTERNET", "").lower() in ("1", "true", "yes"):
101        raise InternetNotAllowedError(
102            "Will not download resource from the "
103            "internet because environment variable "
104            "IMAGEIO_NO_INTERNET is set."
105        )
106
107    # Can we proceed with auto-download?
108    if not auto:
109        raise NeedDownloadError()
110
111    # Get filename to store to and make sure the dir exists
112    filename = op.join(directory, nfname)
113    if not op.isdir(op.dirname(filename)):
114        os.makedirs(op.abspath(op.dirname(filename)))
115    # let's go get the file
116    if os.getenv("CONTINUOUS_INTEGRATION", False):  # pragma: no cover
117        # On Travis, we retry a few times ...
118        for i in range(2):
119            try:
120                _fetch_file(url, filename)
121                return filename
122            except IOError:
123                time.sleep(0.5)
124        else:
125            _fetch_file(url, filename)
126            return filename
127    else:  # pragma: no cover
128        _fetch_file(url, filename)
129        return filename
130
131
132def _fetch_file(url, file_name, print_destination=True):
133    """Load requested file, downloading it if needed or requested
134
135    Parameters
136    ----------
137    url: string
138        The url of file to be downloaded.
139    file_name: string
140        Name, along with the path, of where downloaded file will be saved.
141    print_destination: bool, optional
142        If true, destination of where file was saved will be printed after
143        download finishes.
144    resume: bool, optional
145        If true, try to resume partially downloaded files.
146    """
147    # Adapted from NISL:
148    # https://github.com/nisl/tutorial/blob/master/nisl/datasets.py
149
150    print(
151        "Imageio: %r was not found on your computer; "
152        "downloading it now." % os.path.basename(file_name)
153    )
154
155    temp_file_name = file_name + ".part"
156    local_file = None
157    initial_size = 0
158    errors = []
159    for tries in range(4):
160        try:
161            # Checking file size and displaying it alongside the download url
162            remote_file = urlopen(url, timeout=5.0)
163            file_size = int(remote_file.headers["Content-Length"].strip())
164            size_str = _sizeof_fmt(file_size)
165            print("Try %i. Download from %s (%s)" % (tries + 1, url, size_str))
166            # Downloading data (can be extended to resume if need be)
167            local_file = open(temp_file_name, "wb")
168            _chunk_read(remote_file, local_file, initial_size=initial_size)
169            # temp file must be closed prior to the move
170            if not local_file.closed:
171                local_file.close()
172            shutil.move(temp_file_name, file_name)
173            if print_destination is True:
174                sys.stdout.write("File saved as %s.\n" % file_name)
175            break
176        except Exception as e:
177            errors.append(e)
178            print("Error while fetching file: %s." % str(e))
179        finally:
180            if local_file is not None:
181                if not local_file.closed:
182                    local_file.close()
183    else:
184        raise IOError(
185            "Unable to download %r. Perhaps there is a no internet "
186            "connection? If there is, please report this problem."
187            % os.path.basename(file_name)
188        )
189
190
191def _chunk_read(response, local_file, chunk_size=8192, initial_size=0):
192    """Download a file chunk by chunk and show advancement
193
194    Can also be used when resuming downloads over http.
195
196    Parameters
197    ----------
198    response: urllib.response.addinfourl
199        Response to the download request in order to get file size.
200    local_file: file
201        Hard disk file where data should be written.
202    chunk_size: integer, optional
203        Size of downloaded chunks. Default: 8192
204    initial_size: int, optional
205        If resuming, indicate the initial size of the file.
206    """
207    # Adapted from NISL:
208    # https://github.com/nisl/tutorial/blob/master/nisl/datasets.py
209
210    bytes_so_far = initial_size
211    # Returns only amount left to download when resuming, not the size of the
212    # entire file
213    total_size = int(response.headers["Content-Length"].strip())
214    total_size += initial_size
215
216    progress = StdoutProgressIndicator("Downloading")
217    progress.start("", "bytes", total_size)
218
219    while True:
220        chunk = response.read(chunk_size)
221        bytes_so_far += len(chunk)
222        if not chunk:
223            break
224        _chunk_write(chunk, local_file, progress)
225    progress.finish("Done")
226
227
228def _chunk_write(chunk, local_file, progress):
229    """Write a chunk to file and update the progress bar"""
230    local_file.write(chunk)
231    progress.increase_progress(len(chunk))
232    time.sleep(0)  # Give other threads a chance, e.g. those that handle stdout pipes
233
234
235def _sizeof_fmt(num):
236    """Turn number of bytes into human-readable str"""
237    units = ["bytes", "kB", "MB", "GB", "TB", "PB"]
238    decimals = [0, 0, 1, 2, 2, 2]
239    """Human friendly file size"""
240    if num > 1:
241        exponent = min(int(log(num, 1024)), len(units) - 1)
242        quotient = float(num) / 1024 ** exponent
243        unit = units[exponent]
244        num_decimals = decimals[exponent]
245        format_string = "{0:.%sf} {1}" % num_decimals
246        return format_string.format(quotient, unit)
247    return "0 bytes" if num == 0 else "1 byte"
248