1# -*- coding: utf-8 -*- 2# Based on code from the vispy project 3# Distributed under the (new) BSD License. See LICENSE.txt for more info. 4 5"""Data downloading and reading functions 6""" 7 8from math import log 9import os 10from os import path as op 11import sys 12import shutil 13import time 14 15from . import appdata_dir, resource_dirs 16from . import StdoutProgressIndicator, urlopen 17 18 19class InternetNotAllowedError(IOError): 20 """ Plugins that need resources can just use get_remote_file(), but 21 should catch this error and silently ignore it. 22 """ 23 24 pass 25 26 27class NeedDownloadError(IOError): 28 """ Is raised when a remote file is requested that is not locally 29 available, but which needs to be explicitly downloaded by the user. 30 """ 31 32 33def get_remote_file(fname, directory=None, force_download=False, auto=True): 34 """ Get a the filename for the local version of a file from the web 35 36 Parameters 37 ---------- 38 fname : str 39 The relative filename on the remote data repository to download. 40 These correspond to paths on 41 ``https://github.com/imageio/imageio-binaries/``. 42 directory : str | None 43 The directory where the file will be cached if a download was 44 required to obtain the file. By default, the appdata directory 45 is used. This is also the first directory that is checked for 46 a local version of the file. If the directory does not exist, 47 it will be created. 48 force_download : bool | str 49 If True, the file will be downloaded even if a local copy exists 50 (and this copy will be overwritten). Can also be a YYYY-MM-DD date 51 to ensure a file is up-to-date (modified date of a file on disk, 52 if present, is checked). 53 auto : bool 54 Whether to auto-download the file if its not present locally. Default 55 True. If False and a download is needed, raises NeedDownloadError. 56 57 Returns 58 ------- 59 fname : str 60 The path to the file on the local system. 61 """ 62 _url_root = "https://github.com/imageio/imageio-binaries/raw/master/" 63 url = _url_root + fname 64 nfname = op.normcase(fname) # convert to native 65 # Get dirs to look for the resource 66 given_directory = directory 67 directory = given_directory or appdata_dir("imageio") 68 dirs = resource_dirs() 69 dirs.insert(0, directory) # Given dir has preference 70 # Try to find the resource locally 71 for dir in dirs: 72 filename = op.join(dir, nfname) 73 if op.isfile(filename): 74 if not force_download: # we're done 75 if given_directory and given_directory != dir: 76 filename2 = os.path.join(given_directory, nfname) 77 # Make sure the output directory exists 78 if not op.isdir(op.dirname(filename2)): 79 os.makedirs(op.abspath(op.dirname(filename2))) 80 shutil.copy(filename, filename2) 81 return filename2 82 return filename 83 if isinstance(force_download, str): 84 ntime = time.strptime(force_download, "%Y-%m-%d") 85 ftime = time.gmtime(op.getctime(filename)) 86 if ftime >= ntime: 87 if given_directory and given_directory != dir: 88 filename2 = os.path.join(given_directory, nfname) 89 # Make sure the output directory exists 90 if not op.isdir(op.dirname(filename2)): 91 os.makedirs(op.abspath(op.dirname(filename2))) 92 shutil.copy(filename, filename2) 93 return filename2 94 return filename 95 else: 96 print("File older than %s, updating..." % force_download) 97 break 98 99 # If we get here, we're going to try to download the file 100 if os.getenv("IMAGEIO_NO_INTERNET", "").lower() in ("1", "true", "yes"): 101 raise InternetNotAllowedError( 102 "Will not download resource from the " 103 "internet because environment variable " 104 "IMAGEIO_NO_INTERNET is set." 105 ) 106 107 # Can we proceed with auto-download? 108 if not auto: 109 raise NeedDownloadError() 110 111 # Get filename to store to and make sure the dir exists 112 filename = op.join(directory, nfname) 113 if not op.isdir(op.dirname(filename)): 114 os.makedirs(op.abspath(op.dirname(filename))) 115 # let's go get the file 116 if os.getenv("CONTINUOUS_INTEGRATION", False): # pragma: no cover 117 # On Travis, we retry a few times ... 118 for i in range(2): 119 try: 120 _fetch_file(url, filename) 121 return filename 122 except IOError: 123 time.sleep(0.5) 124 else: 125 _fetch_file(url, filename) 126 return filename 127 else: # pragma: no cover 128 _fetch_file(url, filename) 129 return filename 130 131 132def _fetch_file(url, file_name, print_destination=True): 133 """Load requested file, downloading it if needed or requested 134 135 Parameters 136 ---------- 137 url: string 138 The url of file to be downloaded. 139 file_name: string 140 Name, along with the path, of where downloaded file will be saved. 141 print_destination: bool, optional 142 If true, destination of where file was saved will be printed after 143 download finishes. 144 resume: bool, optional 145 If true, try to resume partially downloaded files. 146 """ 147 # Adapted from NISL: 148 # https://github.com/nisl/tutorial/blob/master/nisl/datasets.py 149 150 print( 151 "Imageio: %r was not found on your computer; " 152 "downloading it now." % os.path.basename(file_name) 153 ) 154 155 temp_file_name = file_name + ".part" 156 local_file = None 157 initial_size = 0 158 errors = [] 159 for tries in range(4): 160 try: 161 # Checking file size and displaying it alongside the download url 162 remote_file = urlopen(url, timeout=5.0) 163 file_size = int(remote_file.headers["Content-Length"].strip()) 164 size_str = _sizeof_fmt(file_size) 165 print("Try %i. Download from %s (%s)" % (tries + 1, url, size_str)) 166 # Downloading data (can be extended to resume if need be) 167 local_file = open(temp_file_name, "wb") 168 _chunk_read(remote_file, local_file, initial_size=initial_size) 169 # temp file must be closed prior to the move 170 if not local_file.closed: 171 local_file.close() 172 shutil.move(temp_file_name, file_name) 173 if print_destination is True: 174 sys.stdout.write("File saved as %s.\n" % file_name) 175 break 176 except Exception as e: 177 errors.append(e) 178 print("Error while fetching file: %s." % str(e)) 179 finally: 180 if local_file is not None: 181 if not local_file.closed: 182 local_file.close() 183 else: 184 raise IOError( 185 "Unable to download %r. Perhaps there is a no internet " 186 "connection? If there is, please report this problem." 187 % os.path.basename(file_name) 188 ) 189 190 191def _chunk_read(response, local_file, chunk_size=8192, initial_size=0): 192 """Download a file chunk by chunk and show advancement 193 194 Can also be used when resuming downloads over http. 195 196 Parameters 197 ---------- 198 response: urllib.response.addinfourl 199 Response to the download request in order to get file size. 200 local_file: file 201 Hard disk file where data should be written. 202 chunk_size: integer, optional 203 Size of downloaded chunks. Default: 8192 204 initial_size: int, optional 205 If resuming, indicate the initial size of the file. 206 """ 207 # Adapted from NISL: 208 # https://github.com/nisl/tutorial/blob/master/nisl/datasets.py 209 210 bytes_so_far = initial_size 211 # Returns only amount left to download when resuming, not the size of the 212 # entire file 213 total_size = int(response.headers["Content-Length"].strip()) 214 total_size += initial_size 215 216 progress = StdoutProgressIndicator("Downloading") 217 progress.start("", "bytes", total_size) 218 219 while True: 220 chunk = response.read(chunk_size) 221 bytes_so_far += len(chunk) 222 if not chunk: 223 break 224 _chunk_write(chunk, local_file, progress) 225 progress.finish("Done") 226 227 228def _chunk_write(chunk, local_file, progress): 229 """Write a chunk to file and update the progress bar""" 230 local_file.write(chunk) 231 progress.increase_progress(len(chunk)) 232 time.sleep(0) # Give other threads a chance, e.g. those that handle stdout pipes 233 234 235def _sizeof_fmt(num): 236 """Turn number of bytes into human-readable str""" 237 units = ["bytes", "kB", "MB", "GB", "TB", "PB"] 238 decimals = [0, 0, 1, 2, 2, 2] 239 """Human friendly file size""" 240 if num > 1: 241 exponent = min(int(log(num, 1024)), len(units) - 1) 242 quotient = float(num) / 1024 ** exponent 243 unit = units[exponent] 244 num_decimals = decimals[exponent] 245 format_string = "{0:.%sf} {1}" % num_decimals 246 return format_string.format(quotient, unit) 247 return "0 bytes" if num == 0 else "1 byte" 248