1#!/usr/bin/python
2# Copyright (c) 2012 The Native Client Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""A library to assist automatically downloading files.
7
8This library is used by scripts that download tarballs, zipfiles, etc. as part
9of the build process.
10"""
11
12from __future__ import print_function
13
14import hashlib
15import os.path
16import re
17import sys
18
19try:
20  import urllib2 as urllib
21except ImportError:  # For Py3 compatibility
22  import urllib.request as urllib
23
24import pynacl.http_download
25
26SOURCE_STAMP = 'SOURCE_URL'
27HASH_STAMP = 'SOURCE_SHA1'
28
29class HashError(Exception):
30  def __init__(self, download_url, expected_hash, actual_hash):
31    self.download_url = download_url
32    self.expected_hash = expected_hash
33    self.actual_hash = actual_hash
34
35  def __str__(self):
36    return 'Got hash "%s" but expected hash "%s" for "%s"' % (
37        self.actual_hash, self.expected_hash, self.download_url)
38
39def EnsureFileCanBeWritten(filename):
40  directory = os.path.dirname(filename)
41  if not os.path.exists(directory):
42    os.makedirs(directory)
43
44
45def WriteData(filename, data):
46  EnsureFileCanBeWritten(filename)
47  f = open(filename, 'wb')
48  f.write(data)
49  f.close()
50
51
52def WriteDataFromStream(filename, stream, chunk_size, verbose=True):
53  EnsureFileCanBeWritten(filename)
54  dst = open(filename, 'wb')
55  try:
56    while True:
57      data = stream.read(chunk_size)
58      if len(data) == 0:
59        break
60      dst.write(data)
61      if verbose:
62        # Indicate that we're still writing.
63        sys.stdout.write('.')
64        sys.stdout.flush()
65  finally:
66    if verbose:
67      sys.stdout.write('\n')
68    dst.close()
69
70
71def DoesStampMatch(stampfile, expected, index):
72  try:
73    f = open(stampfile, 'r')
74    stamp = f.read()
75    f.close()
76    if stamp.split('\n')[index] == expected:
77      return 'already up-to-date.'
78    elif stamp.startswith('manual'):
79      return 'manual override.'
80    return False
81  except IOError:
82    return False
83
84
85def WriteStamp(stampfile, data):
86  EnsureFileCanBeWritten(stampfile)
87  f = open(stampfile, 'w')
88  f.write(data)
89  f.close()
90
91
92def StampIsCurrent(path, stamp_name, stamp_contents, min_time=None, index=0):
93  stampfile = os.path.join(path, stamp_name)
94
95  stampmatch = DoesStampMatch(stampfile, stamp_contents, index)
96
97  # If toolchain was downloaded and/or created manually then keep it untouched
98  if stampmatch == 'manual override.':
99    return stampmatch
100
101  # Check if the stampfile is older than the minimum last mod time
102  if min_time:
103    try:
104      stamp_time = os.stat(stampfile).st_mtime
105      if stamp_time <= min_time:
106        return False
107    except OSError:
108      return False
109
110  return stampmatch
111
112
113def WriteSourceStamp(path, url):
114  stampfile = os.path.join(path, SOURCE_STAMP)
115  WriteStamp(stampfile, url)
116
117
118def WriteHashStamp(path, hash_val):
119  hash_stampfile = os.path.join(path, HASH_STAMP)
120  WriteStamp(hash_stampfile, hash_val)
121
122
123def _HashFileHandle(fh):
124  """sha1 of a file like object.
125
126  Arguments:
127    fh: file handle like object to hash.
128  Returns:
129    sha1 as a string.
130  """
131  hasher = hashlib.sha1()
132  try:
133    while True:
134      data = fh.read(4096)
135      if not data:
136        break
137      hasher.update(data)
138  finally:
139    fh.close()
140  return hasher.hexdigest()
141
142
143def HashFile(filename):
144  """sha1 a file on disk.
145
146  Arguments:
147    filename: filename to hash.
148  Returns:
149    sha1 as a string.
150  """
151  fh = open(filename, 'rb')
152  return _HashFileHandle(fh)
153
154
155def HashUrlByDownloading(url):
156  """sha1 the data at an url.
157
158  Arguments:
159    url: url to download from.
160  Returns:
161    sha1 of the data at the url.
162  """
163  try:
164    fh = urllib.urlopen(url)
165  except:
166    sys.stderr.write('Failed fetching URL: %s\n' % url)
167    raise
168  return _HashFileHandle(fh)
169
170
171# Attempts to get the SHA1 hash of a file given a URL by looking for
172# an adjacent file with a ".sha1hash" suffix.  This saves having to
173# download a large tarball just to get its hash.  Otherwise, we fall
174# back to downloading the main file.
175def HashUrl(url):
176  hash_url = '%s.sha1hash' % url
177  try:
178    fh = urllib.urlopen(hash_url)
179    data = fh.read(100)
180    fh.close()
181  except urllib.HTTPError as exn:
182    if exn.code == 404:
183      return HashUrlByDownloading(url)
184    raise
185  else:
186    if not re.match('[0-9a-f]{40}\n?$', data):
187      raise AssertionError('Bad SHA1 hash file: %r' % data)
188    return data.strip()
189
190
191def SyncURL(url, filename=None, stamp_dir=None, min_time=None,
192            hash_val=None, keep=False, verbose=False, stamp_index=0):
193  """Synchronize a destination file with a URL
194
195  if the URL does not match the URL stamp, then we must re-download it.
196
197  Arugments:
198    url: the url which will to compare against and download
199    filename: the file to create on download
200    path: the download path
201    stamp_dir: the filename containing the URL stamp to check against
202    hash_val: if set, the expected hash which must be matched
203    verbose: prints out status as it runs
204    stamp_index: index within the stamp file to check.
205  Returns:
206    True if the file is replaced
207    False if the file is not replaced
208  Exception:
209    HashError: if the hash does not match
210  """
211
212  assert url and filename
213
214  # If we are not keeping the tarball, or we already have it, we can
215  # skip downloading it for this reason. If we are keeping it,
216  # it must exist.
217  if keep:
218    tarball_ok = os.path.isfile(filename)
219  else:
220    tarball_ok = True
221
222  # If we don't need the tarball and the stamp_file matches the url, then
223  # we must be up to date.  If the URL differs but the recorded hash matches
224  # the one we'll insist the tarball has, then that's good enough too.
225  # TODO(mcgrathr): Download the .sha1sum file first to compare with
226  # the cached hash, in case --file-hash options weren't used.
227  if tarball_ok and stamp_dir is not None:
228    if StampIsCurrent(stamp_dir, SOURCE_STAMP, url, min_time):
229      if verbose:
230        print('%s is already up to date.' % filename)
231      return False
232    if (hash_val is not None and
233        StampIsCurrent(stamp_dir, HASH_STAMP, hash_val, min_time, stamp_index)):
234      if verbose:
235        print('%s is identical to the up to date file.' % filename)
236      return False
237
238  if (os.path.isfile(filename)
239      and hash_val is not None
240      and hash_val == HashFile(filename)):
241    return True
242
243  if verbose:
244    print('Updating %s\n\tfrom %s.' % (filename, url))
245  EnsureFileCanBeWritten(filename)
246  pynacl.http_download.HttpDownload(url, filename)
247
248  if hash_val:
249    tar_hash = HashFile(filename)
250    if hash_val != tar_hash:
251      raise HashError(actual_hash=tar_hash, expected_hash=hash_val,
252                      download_url=url)
253
254  return True
255