1#!/usr/bin/python 2# Copyright (c) 2012 The Native Client Authors. All rights reserved. 3# Use of this source code is governed by a BSD-style license that can be 4# found in the LICENSE file. 5 6"""A library to assist automatically downloading files. 7 8This library is used by scripts that download tarballs, zipfiles, etc. as part 9of the build process. 10""" 11 12from __future__ import print_function 13 14import hashlib 15import os.path 16import re 17import sys 18 19try: 20 import urllib2 as urllib 21except ImportError: # For Py3 compatibility 22 import urllib.request as urllib 23 24import pynacl.http_download 25 26SOURCE_STAMP = 'SOURCE_URL' 27HASH_STAMP = 'SOURCE_SHA1' 28 29class HashError(Exception): 30 def __init__(self, download_url, expected_hash, actual_hash): 31 self.download_url = download_url 32 self.expected_hash = expected_hash 33 self.actual_hash = actual_hash 34 35 def __str__(self): 36 return 'Got hash "%s" but expected hash "%s" for "%s"' % ( 37 self.actual_hash, self.expected_hash, self.download_url) 38 39def EnsureFileCanBeWritten(filename): 40 directory = os.path.dirname(filename) 41 if not os.path.exists(directory): 42 os.makedirs(directory) 43 44 45def WriteData(filename, data): 46 EnsureFileCanBeWritten(filename) 47 f = open(filename, 'wb') 48 f.write(data) 49 f.close() 50 51 52def WriteDataFromStream(filename, stream, chunk_size, verbose=True): 53 EnsureFileCanBeWritten(filename) 54 dst = open(filename, 'wb') 55 try: 56 while True: 57 data = stream.read(chunk_size) 58 if len(data) == 0: 59 break 60 dst.write(data) 61 if verbose: 62 # Indicate that we're still writing. 63 sys.stdout.write('.') 64 sys.stdout.flush() 65 finally: 66 if verbose: 67 sys.stdout.write('\n') 68 dst.close() 69 70 71def DoesStampMatch(stampfile, expected, index): 72 try: 73 f = open(stampfile, 'r') 74 stamp = f.read() 75 f.close() 76 if stamp.split('\n')[index] == expected: 77 return 'already up-to-date.' 78 elif stamp.startswith('manual'): 79 return 'manual override.' 80 return False 81 except IOError: 82 return False 83 84 85def WriteStamp(stampfile, data): 86 EnsureFileCanBeWritten(stampfile) 87 f = open(stampfile, 'w') 88 f.write(data) 89 f.close() 90 91 92def StampIsCurrent(path, stamp_name, stamp_contents, min_time=None, index=0): 93 stampfile = os.path.join(path, stamp_name) 94 95 stampmatch = DoesStampMatch(stampfile, stamp_contents, index) 96 97 # If toolchain was downloaded and/or created manually then keep it untouched 98 if stampmatch == 'manual override.': 99 return stampmatch 100 101 # Check if the stampfile is older than the minimum last mod time 102 if min_time: 103 try: 104 stamp_time = os.stat(stampfile).st_mtime 105 if stamp_time <= min_time: 106 return False 107 except OSError: 108 return False 109 110 return stampmatch 111 112 113def WriteSourceStamp(path, url): 114 stampfile = os.path.join(path, SOURCE_STAMP) 115 WriteStamp(stampfile, url) 116 117 118def WriteHashStamp(path, hash_val): 119 hash_stampfile = os.path.join(path, HASH_STAMP) 120 WriteStamp(hash_stampfile, hash_val) 121 122 123def _HashFileHandle(fh): 124 """sha1 of a file like object. 125 126 Arguments: 127 fh: file handle like object to hash. 128 Returns: 129 sha1 as a string. 130 """ 131 hasher = hashlib.sha1() 132 try: 133 while True: 134 data = fh.read(4096) 135 if not data: 136 break 137 hasher.update(data) 138 finally: 139 fh.close() 140 return hasher.hexdigest() 141 142 143def HashFile(filename): 144 """sha1 a file on disk. 145 146 Arguments: 147 filename: filename to hash. 148 Returns: 149 sha1 as a string. 150 """ 151 fh = open(filename, 'rb') 152 return _HashFileHandle(fh) 153 154 155def HashUrlByDownloading(url): 156 """sha1 the data at an url. 157 158 Arguments: 159 url: url to download from. 160 Returns: 161 sha1 of the data at the url. 162 """ 163 try: 164 fh = urllib.urlopen(url) 165 except: 166 sys.stderr.write('Failed fetching URL: %s\n' % url) 167 raise 168 return _HashFileHandle(fh) 169 170 171# Attempts to get the SHA1 hash of a file given a URL by looking for 172# an adjacent file with a ".sha1hash" suffix. This saves having to 173# download a large tarball just to get its hash. Otherwise, we fall 174# back to downloading the main file. 175def HashUrl(url): 176 hash_url = '%s.sha1hash' % url 177 try: 178 fh = urllib.urlopen(hash_url) 179 data = fh.read(100) 180 fh.close() 181 except urllib.HTTPError as exn: 182 if exn.code == 404: 183 return HashUrlByDownloading(url) 184 raise 185 else: 186 if not re.match('[0-9a-f]{40}\n?$', data): 187 raise AssertionError('Bad SHA1 hash file: %r' % data) 188 return data.strip() 189 190 191def SyncURL(url, filename=None, stamp_dir=None, min_time=None, 192 hash_val=None, keep=False, verbose=False, stamp_index=0): 193 """Synchronize a destination file with a URL 194 195 if the URL does not match the URL stamp, then we must re-download it. 196 197 Arugments: 198 url: the url which will to compare against and download 199 filename: the file to create on download 200 path: the download path 201 stamp_dir: the filename containing the URL stamp to check against 202 hash_val: if set, the expected hash which must be matched 203 verbose: prints out status as it runs 204 stamp_index: index within the stamp file to check. 205 Returns: 206 True if the file is replaced 207 False if the file is not replaced 208 Exception: 209 HashError: if the hash does not match 210 """ 211 212 assert url and filename 213 214 # If we are not keeping the tarball, or we already have it, we can 215 # skip downloading it for this reason. If we are keeping it, 216 # it must exist. 217 if keep: 218 tarball_ok = os.path.isfile(filename) 219 else: 220 tarball_ok = True 221 222 # If we don't need the tarball and the stamp_file matches the url, then 223 # we must be up to date. If the URL differs but the recorded hash matches 224 # the one we'll insist the tarball has, then that's good enough too. 225 # TODO(mcgrathr): Download the .sha1sum file first to compare with 226 # the cached hash, in case --file-hash options weren't used. 227 if tarball_ok and stamp_dir is not None: 228 if StampIsCurrent(stamp_dir, SOURCE_STAMP, url, min_time): 229 if verbose: 230 print('%s is already up to date.' % filename) 231 return False 232 if (hash_val is not None and 233 StampIsCurrent(stamp_dir, HASH_STAMP, hash_val, min_time, stamp_index)): 234 if verbose: 235 print('%s is identical to the up to date file.' % filename) 236 return False 237 238 if (os.path.isfile(filename) 239 and hash_val is not None 240 and hash_val == HashFile(filename)): 241 return True 242 243 if verbose: 244 print('Updating %s\n\tfrom %s.' % (filename, url)) 245 EnsureFileCanBeWritten(filename) 246 pynacl.http_download.HttpDownload(url, filename) 247 248 if hash_val: 249 tar_hash = HashFile(filename) 250 if hash_val != tar_hash: 251 raise HashError(actual_hash=tar_hash, expected_hash=hash_val, 252 download_url=url) 253 254 return True 255