1# Copyright 2013 The Chromium Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5from __future__ import print_function 6 7import difflib 8import hashlib 9import itertools 10import json 11import os 12import sys 13import zipfile 14 15from util import build_utils 16 17sys.path.insert(1, os.path.join(build_utils.DIR_SOURCE_ROOT, 'build')) 18import print_python_deps 19 20# When set and a difference is detected, a diff of what changed is printed. 21PRINT_EXPLANATIONS = int(os.environ.get('PRINT_BUILD_EXPLANATIONS', 0)) 22 23# An escape hatch that causes all targets to be rebuilt. 24_FORCE_REBUILD = int(os.environ.get('FORCE_REBUILD', 0)) 25 26 27def CallAndWriteDepfileIfStale(on_stale_md5, 28 options, 29 record_path=None, 30 input_paths=None, 31 input_strings=None, 32 output_paths=None, 33 force=False, 34 pass_changes=False, 35 track_subpaths_allowlist=None, 36 depfile_deps=None): 37 """Wraps CallAndRecordIfStale() and writes a depfile if applicable. 38 39 Depfiles are automatically added to output_paths when present in the |options| 40 argument. They are then created after |on_stale_md5| is called. 41 42 By default, only python dependencies are added to the depfile. If there are 43 other input paths that are not captured by GN deps, then they should be listed 44 in depfile_deps. It's important to write paths to the depfile that are already 45 captured by GN deps since GN args can cause GN deps to change, and such 46 changes are not immediately reflected in depfiles (http://crbug.com/589311). 47 """ 48 if not output_paths: 49 raise Exception('At least one output_path must be specified.') 50 input_paths = list(input_paths or []) 51 input_strings = list(input_strings or []) 52 output_paths = list(output_paths or []) 53 54 input_paths += print_python_deps.ComputePythonDependencies() 55 56 CallAndRecordIfStale( 57 on_stale_md5, 58 record_path=record_path, 59 input_paths=input_paths, 60 input_strings=input_strings, 61 output_paths=output_paths, 62 force=force, 63 pass_changes=pass_changes, 64 track_subpaths_allowlist=track_subpaths_allowlist) 65 66 # Write depfile even when inputs have not changed to ensure build correctness 67 # on bots that build with & without patch, and the patch changes the depfile 68 # location. 69 if hasattr(options, 'depfile') and options.depfile: 70 build_utils.WriteDepfile(options.depfile, output_paths[0], depfile_deps) 71 72 73def CallAndRecordIfStale(function, 74 record_path=None, 75 input_paths=None, 76 input_strings=None, 77 output_paths=None, 78 force=False, 79 pass_changes=False, 80 track_subpaths_allowlist=None): 81 """Calls function if outputs are stale. 82 83 Outputs are considered stale if: 84 - any output_paths are missing, or 85 - the contents of any file within input_paths has changed, or 86 - the contents of input_strings has changed. 87 88 To debug which files are out-of-date, set the environment variable: 89 PRINT_MD5_DIFFS=1 90 91 Args: 92 function: The function to call. 93 record_path: Path to record metadata. 94 Defaults to output_paths[0] + '.md5.stamp' 95 input_paths: List of paths to calcualte an md5 sum on. 96 input_strings: List of strings to record verbatim. 97 output_paths: List of output paths. 98 force: Whether to treat outputs as missing regardless of whether they 99 actually are. 100 pass_changes: Whether to pass a Changes instance to |function|. 101 track_subpaths_allowlist: Relevant only when pass_changes=True. List of .zip 102 files from |input_paths| to make subpath information available for. 103 """ 104 assert record_path or output_paths 105 input_paths = input_paths or [] 106 input_strings = input_strings or [] 107 output_paths = output_paths or [] 108 record_path = record_path or output_paths[0] + '.md5.stamp' 109 110 assert record_path.endswith('.stamp'), ( 111 'record paths must end in \'.stamp\' so that they are easy to find ' 112 'and delete') 113 114 new_metadata = _Metadata(track_entries=pass_changes or PRINT_EXPLANATIONS) 115 new_metadata.AddStrings(input_strings) 116 117 zip_allowlist = set(track_subpaths_allowlist or []) 118 for path in input_paths: 119 # It's faster to md5 an entire zip file than it is to just locate & hash 120 # its central directory (which is what this used to do). 121 if path in zip_allowlist: 122 entries = _ExtractZipEntries(path) 123 new_metadata.AddZipFile(path, entries) 124 else: 125 new_metadata.AddFile(path, _ComputeTagForPath(path)) 126 127 old_metadata = None 128 force = force or _FORCE_REBUILD 129 missing_outputs = [x for x in output_paths if force or not os.path.exists(x)] 130 too_new = [] 131 # When outputs are missing, don't bother gathering change information. 132 if not missing_outputs and os.path.exists(record_path): 133 record_mtime = os.path.getmtime(record_path) 134 # Outputs newer than the change information must have been modified outside 135 # of the build, and should be considered stale. 136 too_new = [x for x in output_paths if os.path.getmtime(x) > record_mtime] 137 if not too_new: 138 with open(record_path, 'r') as jsonfile: 139 try: 140 old_metadata = _Metadata.FromFile(jsonfile) 141 except: # pylint: disable=bare-except 142 pass # Not yet using new file format. 143 144 changes = Changes(old_metadata, new_metadata, force, missing_outputs, too_new) 145 if not changes.HasChanges(): 146 return 147 148 if PRINT_EXPLANATIONS: 149 print('=' * 80) 150 print('Target is stale: %s' % record_path) 151 print(changes.DescribeDifference()) 152 print('=' * 80) 153 154 args = (changes,) if pass_changes else () 155 function(*args) 156 157 with open(record_path, 'w') as f: 158 new_metadata.ToFile(f) 159 160 161class Changes(object): 162 """Provides and API for querying what changed between runs.""" 163 164 def __init__(self, old_metadata, new_metadata, force, missing_outputs, 165 too_new): 166 self.old_metadata = old_metadata 167 self.new_metadata = new_metadata 168 self.force = force 169 self.missing_outputs = missing_outputs 170 self.too_new = too_new 171 172 def _GetOldTag(self, path, subpath=None): 173 return self.old_metadata and self.old_metadata.GetTag(path, subpath) 174 175 def HasChanges(self): 176 """Returns whether any changes exist.""" 177 return (self.HasStringChanges() 178 or self.old_metadata.FilesMd5() != self.new_metadata.FilesMd5()) 179 180 def HasStringChanges(self): 181 """Returns whether string metadata changed.""" 182 return (self.force or not self.old_metadata 183 or self.old_metadata.StringsMd5() != self.new_metadata.StringsMd5()) 184 185 def AddedOrModifiedOnly(self): 186 """Returns whether the only changes were from added or modified (sub)files. 187 188 No missing outputs, no removed paths/subpaths. 189 """ 190 if self.HasStringChanges(): 191 return False 192 if any(self.IterRemovedPaths()): 193 return False 194 for path in self.IterModifiedPaths(): 195 if any(self.IterRemovedSubpaths(path)): 196 return False 197 return True 198 199 def IterAllPaths(self): 200 """Generator for paths.""" 201 return self.new_metadata.IterPaths(); 202 203 def IterAllSubpaths(self, path): 204 """Generator for subpaths.""" 205 return self.new_metadata.IterSubpaths(path); 206 207 def IterAddedPaths(self): 208 """Generator for paths that were added.""" 209 for path in self.new_metadata.IterPaths(): 210 if self._GetOldTag(path) is None: 211 yield path 212 213 def IterAddedSubpaths(self, path): 214 """Generator for paths that were added within the given zip file.""" 215 for subpath in self.new_metadata.IterSubpaths(path): 216 if self._GetOldTag(path, subpath) is None: 217 yield subpath 218 219 def IterRemovedPaths(self): 220 """Generator for paths that were removed.""" 221 if self.old_metadata: 222 for path in self.old_metadata.IterPaths(): 223 if self.new_metadata.GetTag(path) is None: 224 yield path 225 226 def IterRemovedSubpaths(self, path): 227 """Generator for paths that were removed within the given zip file.""" 228 if self.old_metadata: 229 for subpath in self.old_metadata.IterSubpaths(path): 230 if self.new_metadata.GetTag(path, subpath) is None: 231 yield subpath 232 233 def IterModifiedPaths(self): 234 """Generator for paths whose contents have changed.""" 235 for path in self.new_metadata.IterPaths(): 236 old_tag = self._GetOldTag(path) 237 new_tag = self.new_metadata.GetTag(path) 238 if old_tag is not None and old_tag != new_tag: 239 yield path 240 241 def IterModifiedSubpaths(self, path): 242 """Generator for paths within a zip file whose contents have changed.""" 243 for subpath in self.new_metadata.IterSubpaths(path): 244 old_tag = self._GetOldTag(path, subpath) 245 new_tag = self.new_metadata.GetTag(path, subpath) 246 if old_tag is not None and old_tag != new_tag: 247 yield subpath 248 249 def IterChangedPaths(self): 250 """Generator for all changed paths (added/removed/modified).""" 251 return itertools.chain(self.IterRemovedPaths(), 252 self.IterModifiedPaths(), 253 self.IterAddedPaths()) 254 255 def IterChangedSubpaths(self, path): 256 """Generator for paths within a zip that were added/removed/modified.""" 257 return itertools.chain(self.IterRemovedSubpaths(path), 258 self.IterModifiedSubpaths(path), 259 self.IterAddedSubpaths(path)) 260 261 def DescribeDifference(self): 262 """Returns a human-readable description of what changed.""" 263 if self.force: 264 return 'force=True' 265 elif self.missing_outputs: 266 return 'Outputs do not exist:\n ' + '\n '.join(self.missing_outputs) 267 elif self.too_new: 268 return 'Outputs newer than stamp file:\n ' + '\n '.join(self.too_new) 269 elif self.old_metadata is None: 270 return 'Previous stamp file not found.' 271 272 if self.old_metadata.StringsMd5() != self.new_metadata.StringsMd5(): 273 ndiff = difflib.ndiff(self.old_metadata.GetStrings(), 274 self.new_metadata.GetStrings()) 275 changed = [s for s in ndiff if not s.startswith(' ')] 276 return 'Input strings changed:\n ' + '\n '.join(changed) 277 278 if self.old_metadata.FilesMd5() == self.new_metadata.FilesMd5(): 279 return "There's no difference." 280 281 lines = [] 282 lines.extend('Added: ' + p for p in self.IterAddedPaths()) 283 lines.extend('Removed: ' + p for p in self.IterRemovedPaths()) 284 for path in self.IterModifiedPaths(): 285 lines.append('Modified: ' + path) 286 lines.extend(' -> Subpath added: ' + p 287 for p in self.IterAddedSubpaths(path)) 288 lines.extend(' -> Subpath removed: ' + p 289 for p in self.IterRemovedSubpaths(path)) 290 lines.extend(' -> Subpath modified: ' + p 291 for p in self.IterModifiedSubpaths(path)) 292 if lines: 293 return 'Input files changed:\n ' + '\n '.join(lines) 294 return 'I have no idea what changed (there is a bug).' 295 296 297class _Metadata(object): 298 """Data model for tracking change metadata. 299 300 Args: 301 track_entries: Enables per-file change tracking. Slower, but required for 302 Changes functionality. 303 """ 304 # Schema: 305 # { 306 # "files-md5": "VALUE", 307 # "strings-md5": "VALUE", 308 # "input-files": [ 309 # { 310 # "path": "path.jar", 311 # "tag": "{MD5 of entries}", 312 # "entries": [ 313 # { "path": "org/chromium/base/Foo.class", "tag": "{CRC32}" }, ... 314 # ] 315 # }, { 316 # "path": "path.txt", 317 # "tag": "{MD5}", 318 # } 319 # ], 320 # "input-strings": ["a", "b", ...], 321 # } 322 def __init__(self, track_entries=False): 323 self._track_entries = track_entries 324 self._files_md5 = None 325 self._strings_md5 = None 326 self._files = [] 327 self._strings = [] 328 # Map of (path, subpath) -> entry. Created upon first call to _GetEntry(). 329 self._file_map = None 330 331 @classmethod 332 def FromFile(cls, fileobj): 333 """Returns a _Metadata initialized from a file object.""" 334 ret = cls() 335 obj = json.load(fileobj) 336 ret._files_md5 = obj['files-md5'] 337 ret._strings_md5 = obj['strings-md5'] 338 ret._files = obj.get('input-files', []) 339 ret._strings = obj.get('input-strings', []) 340 return ret 341 342 def ToFile(self, fileobj): 343 """Serializes metadata to the given file object.""" 344 obj = { 345 'files-md5': self.FilesMd5(), 346 'strings-md5': self.StringsMd5(), 347 } 348 if self._track_entries: 349 obj['input-files'] = sorted(self._files, key=lambda e: e['path']) 350 obj['input-strings'] = self._strings 351 352 json.dump(obj, fileobj, indent=2) 353 354 def _AssertNotQueried(self): 355 assert self._files_md5 is None 356 assert self._strings_md5 is None 357 assert self._file_map is None 358 359 def AddStrings(self, values): 360 self._AssertNotQueried() 361 self._strings.extend(str(v) for v in values) 362 363 def AddFile(self, path, tag): 364 """Adds metadata for a non-zip file. 365 366 Args: 367 path: Path to the file. 368 tag: A short string representative of the file contents. 369 """ 370 self._AssertNotQueried() 371 self._files.append({ 372 'path': path, 373 'tag': tag, 374 }) 375 376 def AddZipFile(self, path, entries): 377 """Adds metadata for a zip file. 378 379 Args: 380 path: Path to the file. 381 entries: List of (subpath, tag) tuples for entries within the zip. 382 """ 383 self._AssertNotQueried() 384 tag = _ComputeInlineMd5(itertools.chain((e[0] for e in entries), 385 (e[1] for e in entries))) 386 self._files.append({ 387 'path': path, 388 'tag': tag, 389 'entries': [{"path": e[0], "tag": e[1]} for e in entries], 390 }) 391 392 def GetStrings(self): 393 """Returns the list of input strings.""" 394 return self._strings 395 396 def FilesMd5(self): 397 """Lazily computes and returns the aggregate md5 of input files.""" 398 if self._files_md5 is None: 399 # Omit paths from md5 since temporary files have random names. 400 self._files_md5 = _ComputeInlineMd5( 401 self.GetTag(p) for p in sorted(self.IterPaths())) 402 return self._files_md5 403 404 def StringsMd5(self): 405 """Lazily computes and returns the aggregate md5 of input strings.""" 406 if self._strings_md5 is None: 407 self._strings_md5 = _ComputeInlineMd5(self._strings) 408 return self._strings_md5 409 410 def _GetEntry(self, path, subpath=None): 411 """Returns the JSON entry for the given path / subpath.""" 412 if self._file_map is None: 413 self._file_map = {} 414 for entry in self._files: 415 self._file_map[(entry['path'], None)] = entry 416 for subentry in entry.get('entries', ()): 417 self._file_map[(entry['path'], subentry['path'])] = subentry 418 return self._file_map.get((path, subpath)) 419 420 def GetTag(self, path, subpath=None): 421 """Returns the tag for the given path / subpath.""" 422 ret = self._GetEntry(path, subpath) 423 return ret and ret['tag'] 424 425 def IterPaths(self): 426 """Returns a generator for all top-level paths.""" 427 return (e['path'] for e in self._files) 428 429 def IterSubpaths(self, path): 430 """Returns a generator for all subpaths in the given zip. 431 432 If the given path is not a zip file or doesn't exist, returns an empty 433 iterable. 434 """ 435 outer_entry = self._GetEntry(path) 436 if not outer_entry: 437 return () 438 subentries = outer_entry.get('entries', []) 439 return (entry['path'] for entry in subentries) 440 441 442def _ComputeTagForPath(path): 443 stat = os.stat(path) 444 if stat.st_size > 1 * 1024 * 1024: 445 # Fallback to mtime for large files so that md5_check does not take too long 446 # to run. 447 return stat.st_mtime 448 md5 = hashlib.md5() 449 with open(path, 'rb') as f: 450 md5.update(f.read()) 451 return md5.hexdigest() 452 453 454def _ComputeInlineMd5(iterable): 455 """Computes the md5 of the concatenated parameters.""" 456 md5 = hashlib.md5() 457 for item in iterable: 458 md5.update(str(item).encode('ascii')) 459 return md5.hexdigest() 460 461 462def _ExtractZipEntries(path): 463 """Returns a list of (path, CRC32) of all files within |path|.""" 464 entries = [] 465 with zipfile.ZipFile(path) as zip_file: 466 for zip_info in zip_file.infolist(): 467 # Skip directories and empty files. 468 if zip_info.CRC: 469 entries.append( 470 (zip_info.filename, zip_info.CRC + zip_info.compress_type)) 471 return entries 472