1# Copyright (c) 2015, Google Inc.
2#
3# Permission to use, copy, modify, and/or distribute this software for any
4# purpose with or without fee is hereby granted, provided that the above
5# copyright notice and this permission notice appear in all copies.
6#
7# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
8# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
9# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
10# SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
12# OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
13# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
14
15"""Extracts archives."""
16
17
18import hashlib
19import optparse
20import os
21import os.path
22import tarfile
23import shutil
24import sys
25import zipfile
26
27
28def CheckedJoin(output, path):
29  """
30  CheckedJoin returns os.path.join(output, path). It does sanity checks to
31  ensure the resulting path is under output, but shouldn't be used on untrusted
32  input.
33  """
34  path = os.path.normpath(path)
35  if os.path.isabs(path) or path.startswith('.'):
36    raise ValueError(path)
37  return os.path.join(output, path)
38
39
40class FileEntry(object):
41  def __init__(self, path, mode, fileobj):
42    self.path = path
43    self.mode = mode
44    self.fileobj = fileobj
45
46
47class SymlinkEntry(object):
48  def __init__(self, path, mode, target):
49    self.path = path
50    self.mode = mode
51    self.target = target
52
53
54def IterateZip(path):
55  """
56  IterateZip opens the zip file at path and returns a generator of entry objects
57  for each file in it.
58  """
59  with zipfile.ZipFile(path, 'r') as zip_file:
60    for info in zip_file.infolist():
61      if info.filename.endswith('/'):
62        continue
63      yield FileEntry(info.filename, None, zip_file.open(info))
64
65
66def IterateTar(path, compression):
67  """
68  IterateTar opens the tar.gz or tar.bz2 file at path and returns a generator of
69  entry objects for each file in it.
70  """
71  with tarfile.open(path, 'r:' + compression) as tar_file:
72    for info in tar_file:
73      if info.isdir():
74        pass
75      elif info.issym():
76        yield SymlinkEntry(info.name, None, info.linkname)
77      elif info.isfile():
78        yield FileEntry(info.name, info.mode, tar_file.extractfile(info))
79      else:
80        raise ValueError('Unknown entry type "%s"' % (info.name, ))
81
82
83def main(args):
84  parser = optparse.OptionParser(usage='Usage: %prog ARCHIVE OUTPUT')
85  parser.add_option('--no-prefix', dest='no_prefix', action='store_true',
86                    help='Do not remove a prefix from paths in the archive.')
87  options, args = parser.parse_args(args)
88
89  if len(args) != 2:
90    parser.print_help()
91    return 1
92
93  archive, output = args
94
95  if not os.path.exists(archive):
96    # Skip archives that weren't downloaded.
97    return 0
98
99  with open(archive) as f:
100    sha256 = hashlib.sha256()
101    while True:
102      chunk = f.read(1024 * 1024)
103      if not chunk:
104        break
105      sha256.update(chunk)
106    digest = sha256.hexdigest()
107
108  stamp_path = os.path.join(output, ".boringssl_archive_digest")
109  if os.path.exists(stamp_path):
110    with open(stamp_path) as f:
111      if f.read().strip() == digest:
112        print "Already up-to-date."
113        return 0
114
115  if archive.endswith('.zip'):
116    entries = IterateZip(archive)
117  elif archive.endswith('.tar.gz'):
118    entries = IterateTar(archive, 'gz')
119  elif archive.endswith('.tar.bz2'):
120    entries = IterateTar(archive, 'bz2')
121  else:
122    raise ValueError(archive)
123
124  try:
125    if os.path.exists(output):
126      print "Removing %s" % (output, )
127      shutil.rmtree(output)
128
129    print "Extracting %s to %s" % (archive, output)
130    prefix = None
131    num_extracted = 0
132    for entry in entries:
133      # Even on Windows, zip files must always use forward slashes.
134      if '\\' in entry.path or entry.path.startswith('/'):
135        raise ValueError(entry.path)
136
137      if not options.no_prefix:
138        new_prefix, rest = entry.path.split('/', 1)
139
140        # Ensure the archive is consistent.
141        if prefix is None:
142          prefix = new_prefix
143        if prefix != new_prefix:
144          raise ValueError((prefix, new_prefix))
145      else:
146        rest = entry.path
147
148      # Extract the file into the output directory.
149      fixed_path = CheckedJoin(output, rest)
150      if not os.path.isdir(os.path.dirname(fixed_path)):
151        os.makedirs(os.path.dirname(fixed_path))
152      if isinstance(entry, FileEntry):
153        with open(fixed_path, 'wb') as out:
154          shutil.copyfileobj(entry.fileobj, out)
155      elif isinstance(entry, SymlinkEntry):
156        os.symlink(entry.target, fixed_path)
157      else:
158        raise TypeError('unknown entry type')
159
160      # Fix up permissions if needbe.
161      # TODO(davidben): To be extra tidy, this should only track the execute bit
162      # as in git.
163      if entry.mode is not None:
164        os.chmod(fixed_path, entry.mode)
165
166      # Print every 100 files, so bots do not time out on large archives.
167      num_extracted += 1
168      if num_extracted % 100 == 0:
169        print "Extracted %d files..." % (num_extracted,)
170  finally:
171    entries.close()
172
173  with open(stamp_path, 'w') as f:
174    f.write(digest)
175
176  print "Done. Extracted %d files." % (num_extracted,)
177  return 0
178
179
180if __name__ == '__main__':
181  sys.exit(main(sys.argv[1:]))
182