1# Copyright 2013 The Chromium Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5from __future__ import print_function
6
7import difflib
8import hashlib
9import itertools
10import json
11import os
12import sys
13import zipfile
14
15from util import build_utils
16
17sys.path.insert(1, os.path.join(build_utils.DIR_SOURCE_ROOT, 'build'))
18import print_python_deps
19
20# When set and a difference is detected, a diff of what changed is printed.
21PRINT_EXPLANATIONS = int(os.environ.get('PRINT_BUILD_EXPLANATIONS', 0))
22
23# An escape hatch that causes all targets to be rebuilt.
24_FORCE_REBUILD = int(os.environ.get('FORCE_REBUILD', 0))
25
26
27def CallAndWriteDepfileIfStale(on_stale_md5,
28                               options,
29                               record_path=None,
30                               input_paths=None,
31                               input_strings=None,
32                               output_paths=None,
33                               force=False,
34                               pass_changes=False,
35                               track_subpaths_allowlist=None,
36                               depfile_deps=None):
37  """Wraps CallAndRecordIfStale() and writes a depfile if applicable.
38
39  Depfiles are automatically added to output_paths when present in the |options|
40  argument. They are then created after |on_stale_md5| is called.
41
42  By default, only python dependencies are added to the depfile. If there are
43  other input paths that are not captured by GN deps, then they should be listed
44  in depfile_deps. It's important to write paths to the depfile that are already
45  captured by GN deps since GN args can cause GN deps to change, and such
46  changes are not immediately reflected in depfiles (http://crbug.com/589311).
47  """
48  if not output_paths:
49    raise Exception('At least one output_path must be specified.')
50  input_paths = list(input_paths or [])
51  input_strings = list(input_strings or [])
52  output_paths = list(output_paths or [])
53
54  input_paths += print_python_deps.ComputePythonDependencies()
55
56  CallAndRecordIfStale(
57      on_stale_md5,
58      record_path=record_path,
59      input_paths=input_paths,
60      input_strings=input_strings,
61      output_paths=output_paths,
62      force=force,
63      pass_changes=pass_changes,
64      track_subpaths_allowlist=track_subpaths_allowlist)
65
66  # Write depfile even when inputs have not changed to ensure build correctness
67  # on bots that build with & without patch, and the patch changes the depfile
68  # location.
69  if hasattr(options, 'depfile') and options.depfile:
70    build_utils.WriteDepfile(options.depfile, output_paths[0], depfile_deps)
71
72
73def CallAndRecordIfStale(function,
74                         record_path=None,
75                         input_paths=None,
76                         input_strings=None,
77                         output_paths=None,
78                         force=False,
79                         pass_changes=False,
80                         track_subpaths_allowlist=None):
81  """Calls function if outputs are stale.
82
83  Outputs are considered stale if:
84  - any output_paths are missing, or
85  - the contents of any file within input_paths has changed, or
86  - the contents of input_strings has changed.
87
88  To debug which files are out-of-date, set the environment variable:
89      PRINT_MD5_DIFFS=1
90
91  Args:
92    function: The function to call.
93    record_path: Path to record metadata.
94      Defaults to output_paths[0] + '.md5.stamp'
95    input_paths: List of paths to calcualte an md5 sum on.
96    input_strings: List of strings to record verbatim.
97    output_paths: List of output paths.
98    force: Whether to treat outputs as missing regardless of whether they
99      actually are.
100    pass_changes: Whether to pass a Changes instance to |function|.
101    track_subpaths_allowlist: Relevant only when pass_changes=True. List of .zip
102      files from |input_paths| to make subpath information available for.
103  """
104  assert record_path or output_paths
105  input_paths = input_paths or []
106  input_strings = input_strings or []
107  output_paths = output_paths or []
108  record_path = record_path or output_paths[0] + '.md5.stamp'
109
110  assert record_path.endswith('.stamp'), (
111      'record paths must end in \'.stamp\' so that they are easy to find '
112      'and delete')
113
114  new_metadata = _Metadata(track_entries=pass_changes or PRINT_EXPLANATIONS)
115  new_metadata.AddStrings(input_strings)
116
117  zip_allowlist = set(track_subpaths_allowlist or [])
118  for path in input_paths:
119    # It's faster to md5 an entire zip file than it is to just locate & hash
120    # its central directory (which is what this used to do).
121    if path in zip_allowlist:
122      entries = _ExtractZipEntries(path)
123      new_metadata.AddZipFile(path, entries)
124    else:
125      new_metadata.AddFile(path, _ComputeTagForPath(path))
126
127  old_metadata = None
128  force = force or _FORCE_REBUILD
129  missing_outputs = [x for x in output_paths if force or not os.path.exists(x)]
130  too_new = []
131  # When outputs are missing, don't bother gathering change information.
132  if not missing_outputs and os.path.exists(record_path):
133    record_mtime = os.path.getmtime(record_path)
134    # Outputs newer than the change information must have been modified outside
135    # of the build, and should be considered stale.
136    too_new = [x for x in output_paths if os.path.getmtime(x) > record_mtime]
137    if not too_new:
138      with open(record_path, 'r') as jsonfile:
139        try:
140          old_metadata = _Metadata.FromFile(jsonfile)
141        except:  # pylint: disable=bare-except
142          pass  # Not yet using new file format.
143
144  changes = Changes(old_metadata, new_metadata, force, missing_outputs, too_new)
145  if not changes.HasChanges():
146    return
147
148  if PRINT_EXPLANATIONS:
149    print('=' * 80)
150    print('Target is stale: %s' % record_path)
151    print(changes.DescribeDifference())
152    print('=' * 80)
153
154  args = (changes,) if pass_changes else ()
155  function(*args)
156
157  with open(record_path, 'w') as f:
158    new_metadata.ToFile(f)
159
160
161class Changes(object):
162  """Provides and API for querying what changed between runs."""
163
164  def __init__(self, old_metadata, new_metadata, force, missing_outputs,
165               too_new):
166    self.old_metadata = old_metadata
167    self.new_metadata = new_metadata
168    self.force = force
169    self.missing_outputs = missing_outputs
170    self.too_new = too_new
171
172  def _GetOldTag(self, path, subpath=None):
173    return self.old_metadata and self.old_metadata.GetTag(path, subpath)
174
175  def HasChanges(self):
176    """Returns whether any changes exist."""
177    return (self.HasStringChanges()
178            or self.old_metadata.FilesMd5() != self.new_metadata.FilesMd5())
179
180  def HasStringChanges(self):
181    """Returns whether string metadata changed."""
182    return (self.force or not self.old_metadata
183            or self.old_metadata.StringsMd5() != self.new_metadata.StringsMd5())
184
185  def AddedOrModifiedOnly(self):
186    """Returns whether the only changes were from added or modified (sub)files.
187
188    No missing outputs, no removed paths/subpaths.
189    """
190    if self.HasStringChanges():
191      return False
192    if any(self.IterRemovedPaths()):
193      return False
194    for path in self.IterModifiedPaths():
195      if any(self.IterRemovedSubpaths(path)):
196        return False
197    return True
198
199  def IterAllPaths(self):
200    """Generator for paths."""
201    return self.new_metadata.IterPaths();
202
203  def IterAllSubpaths(self, path):
204    """Generator for subpaths."""
205    return self.new_metadata.IterSubpaths(path);
206
207  def IterAddedPaths(self):
208    """Generator for paths that were added."""
209    for path in self.new_metadata.IterPaths():
210      if self._GetOldTag(path) is None:
211        yield path
212
213  def IterAddedSubpaths(self, path):
214    """Generator for paths that were added within the given zip file."""
215    for subpath in self.new_metadata.IterSubpaths(path):
216      if self._GetOldTag(path, subpath) is None:
217        yield subpath
218
219  def IterRemovedPaths(self):
220    """Generator for paths that were removed."""
221    if self.old_metadata:
222      for path in self.old_metadata.IterPaths():
223        if self.new_metadata.GetTag(path) is None:
224          yield path
225
226  def IterRemovedSubpaths(self, path):
227    """Generator for paths that were removed within the given zip file."""
228    if self.old_metadata:
229      for subpath in self.old_metadata.IterSubpaths(path):
230        if self.new_metadata.GetTag(path, subpath) is None:
231          yield subpath
232
233  def IterModifiedPaths(self):
234    """Generator for paths whose contents have changed."""
235    for path in self.new_metadata.IterPaths():
236      old_tag = self._GetOldTag(path)
237      new_tag = self.new_metadata.GetTag(path)
238      if old_tag is not None and old_tag != new_tag:
239        yield path
240
241  def IterModifiedSubpaths(self, path):
242    """Generator for paths within a zip file whose contents have changed."""
243    for subpath in self.new_metadata.IterSubpaths(path):
244      old_tag = self._GetOldTag(path, subpath)
245      new_tag = self.new_metadata.GetTag(path, subpath)
246      if old_tag is not None and old_tag != new_tag:
247        yield subpath
248
249  def IterChangedPaths(self):
250    """Generator for all changed paths (added/removed/modified)."""
251    return itertools.chain(self.IterRemovedPaths(),
252                           self.IterModifiedPaths(),
253                           self.IterAddedPaths())
254
255  def IterChangedSubpaths(self, path):
256    """Generator for paths within a zip that were added/removed/modified."""
257    return itertools.chain(self.IterRemovedSubpaths(path),
258                           self.IterModifiedSubpaths(path),
259                           self.IterAddedSubpaths(path))
260
261  def DescribeDifference(self):
262    """Returns a human-readable description of what changed."""
263    if self.force:
264      return 'force=True'
265    elif self.missing_outputs:
266      return 'Outputs do not exist:\n  ' + '\n  '.join(self.missing_outputs)
267    elif self.too_new:
268      return 'Outputs newer than stamp file:\n  ' + '\n  '.join(self.too_new)
269    elif self.old_metadata is None:
270      return 'Previous stamp file not found.'
271
272    if self.old_metadata.StringsMd5() != self.new_metadata.StringsMd5():
273      ndiff = difflib.ndiff(self.old_metadata.GetStrings(),
274                            self.new_metadata.GetStrings())
275      changed = [s for s in ndiff if not s.startswith(' ')]
276      return 'Input strings changed:\n  ' + '\n  '.join(changed)
277
278    if self.old_metadata.FilesMd5() == self.new_metadata.FilesMd5():
279      return "There's no difference."
280
281    lines = []
282    lines.extend('Added: ' + p for p in self.IterAddedPaths())
283    lines.extend('Removed: ' + p for p in self.IterRemovedPaths())
284    for path in self.IterModifiedPaths():
285      lines.append('Modified: ' + path)
286      lines.extend('  -> Subpath added: ' + p
287                   for p in self.IterAddedSubpaths(path))
288      lines.extend('  -> Subpath removed: ' + p
289                   for p in self.IterRemovedSubpaths(path))
290      lines.extend('  -> Subpath modified: ' + p
291                   for p in self.IterModifiedSubpaths(path))
292    if lines:
293      return 'Input files changed:\n  ' + '\n  '.join(lines)
294    return 'I have no idea what changed (there is a bug).'
295
296
297class _Metadata(object):
298  """Data model for tracking change metadata.
299
300  Args:
301    track_entries: Enables per-file change tracking. Slower, but required for
302        Changes functionality.
303  """
304  # Schema:
305  # {
306  #   "files-md5": "VALUE",
307  #   "strings-md5": "VALUE",
308  #   "input-files": [
309  #     {
310  #       "path": "path.jar",
311  #       "tag": "{MD5 of entries}",
312  #       "entries": [
313  #         { "path": "org/chromium/base/Foo.class", "tag": "{CRC32}" }, ...
314  #       ]
315  #     }, {
316  #       "path": "path.txt",
317  #       "tag": "{MD5}",
318  #     }
319  #   ],
320  #   "input-strings": ["a", "b", ...],
321  # }
322  def __init__(self, track_entries=False):
323    self._track_entries = track_entries
324    self._files_md5 = None
325    self._strings_md5 = None
326    self._files = []
327    self._strings = []
328    # Map of (path, subpath) -> entry. Created upon first call to _GetEntry().
329    self._file_map = None
330
331  @classmethod
332  def FromFile(cls, fileobj):
333    """Returns a _Metadata initialized from a file object."""
334    ret = cls()
335    obj = json.load(fileobj)
336    ret._files_md5 = obj['files-md5']
337    ret._strings_md5 = obj['strings-md5']
338    ret._files = obj.get('input-files', [])
339    ret._strings = obj.get('input-strings', [])
340    return ret
341
342  def ToFile(self, fileobj):
343    """Serializes metadata to the given file object."""
344    obj = {
345        'files-md5': self.FilesMd5(),
346        'strings-md5': self.StringsMd5(),
347    }
348    if self._track_entries:
349      obj['input-files'] = sorted(self._files, key=lambda e: e['path'])
350      obj['input-strings'] = self._strings
351
352    json.dump(obj, fileobj, indent=2)
353
354  def _AssertNotQueried(self):
355    assert self._files_md5 is None
356    assert self._strings_md5 is None
357    assert self._file_map is None
358
359  def AddStrings(self, values):
360    self._AssertNotQueried()
361    self._strings.extend(str(v) for v in values)
362
363  def AddFile(self, path, tag):
364    """Adds metadata for a non-zip file.
365
366    Args:
367      path: Path to the file.
368      tag: A short string representative of the file contents.
369    """
370    self._AssertNotQueried()
371    self._files.append({
372        'path': path,
373        'tag': tag,
374    })
375
376  def AddZipFile(self, path, entries):
377    """Adds metadata for a zip file.
378
379    Args:
380      path: Path to the file.
381      entries: List of (subpath, tag) tuples for entries within the zip.
382    """
383    self._AssertNotQueried()
384    tag = _ComputeInlineMd5(itertools.chain((e[0] for e in entries),
385                                            (e[1] for e in entries)))
386    self._files.append({
387        'path': path,
388        'tag': tag,
389        'entries': [{"path": e[0], "tag": e[1]} for e in entries],
390    })
391
392  def GetStrings(self):
393    """Returns the list of input strings."""
394    return self._strings
395
396  def FilesMd5(self):
397    """Lazily computes and returns the aggregate md5 of input files."""
398    if self._files_md5 is None:
399      # Omit paths from md5 since temporary files have random names.
400      self._files_md5 = _ComputeInlineMd5(
401          self.GetTag(p) for p in sorted(self.IterPaths()))
402    return self._files_md5
403
404  def StringsMd5(self):
405    """Lazily computes and returns the aggregate md5 of input strings."""
406    if self._strings_md5 is None:
407      self._strings_md5 = _ComputeInlineMd5(self._strings)
408    return self._strings_md5
409
410  def _GetEntry(self, path, subpath=None):
411    """Returns the JSON entry for the given path / subpath."""
412    if self._file_map is None:
413      self._file_map = {}
414      for entry in self._files:
415        self._file_map[(entry['path'], None)] = entry
416        for subentry in entry.get('entries', ()):
417          self._file_map[(entry['path'], subentry['path'])] = subentry
418    return self._file_map.get((path, subpath))
419
420  def GetTag(self, path, subpath=None):
421    """Returns the tag for the given path / subpath."""
422    ret = self._GetEntry(path, subpath)
423    return ret and ret['tag']
424
425  def IterPaths(self):
426    """Returns a generator for all top-level paths."""
427    return (e['path'] for e in self._files)
428
429  def IterSubpaths(self, path):
430    """Returns a generator for all subpaths in the given zip.
431
432    If the given path is not a zip file or doesn't exist, returns an empty
433    iterable.
434    """
435    outer_entry = self._GetEntry(path)
436    if not outer_entry:
437      return ()
438    subentries = outer_entry.get('entries', [])
439    return (entry['path'] for entry in subentries)
440
441
442def _ComputeTagForPath(path):
443  stat = os.stat(path)
444  if stat.st_size > 1 * 1024 * 1024:
445    # Fallback to mtime for large files so that md5_check does not take too long
446    # to run.
447    return stat.st_mtime
448  md5 = hashlib.md5()
449  with open(path, 'rb') as f:
450    md5.update(f.read())
451  return md5.hexdigest()
452
453
454def _ComputeInlineMd5(iterable):
455  """Computes the md5 of the concatenated parameters."""
456  md5 = hashlib.md5()
457  for item in iterable:
458    md5.update(str(item).encode('ascii'))
459  return md5.hexdigest()
460
461
462def _ExtractZipEntries(path):
463  """Returns a list of (path, CRC32) of all files within |path|."""
464  entries = []
465  with zipfile.ZipFile(path) as zip_file:
466    for zip_info in zip_file.infolist():
467      # Skip directories and empty files.
468      if zip_info.CRC:
469        entries.append(
470            (zip_info.filename, zip_info.CRC + zip_info.compress_type))
471  return entries
472