1# Copyright 2013 The Chromium Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5from __future__ import print_function 6 7import difflib 8import hashlib 9import itertools 10import json 11import os 12import sys 13import zipfile 14 15from util import build_utils 16 17# When set and a difference is detected, a diff of what changed is printed. 18PRINT_EXPLANATIONS = int(os.environ.get('PRINT_BUILD_EXPLANATIONS', 0)) 19 20# An escape hatch that causes all targets to be rebuilt. 21_FORCE_REBUILD = int(os.environ.get('FORCE_REBUILD', 0)) 22 23 24def CallAndWriteDepfileIfStale(on_stale_md5, 25 options, 26 record_path=None, 27 input_paths=None, 28 input_strings=None, 29 output_paths=None, 30 force=False, 31 pass_changes=False, 32 track_subpaths_allowlist=None, 33 depfile_deps=None): 34 """Wraps CallAndRecordIfStale() and writes a depfile if applicable. 35 36 Depfiles are automatically added to output_paths when present in the |options| 37 argument. They are then created after |on_stale_md5| is called. 38 39 By default, only python dependencies are added to the depfile. If there are 40 other input paths that are not captured by GN deps, then they should be listed 41 in depfile_deps. It's important to write paths to the depfile that are already 42 captured by GN deps since GN args can cause GN deps to change, and such 43 changes are not immediately reflected in depfiles (http://crbug.com/589311). 44 """ 45 if not output_paths: 46 raise Exception('At least one output_path must be specified.') 47 input_paths = list(input_paths or []) 48 input_strings = list(input_strings or []) 49 output_paths = list(output_paths or []) 50 51 input_paths += build_utils.ComputePythonDependencies() 52 53 CallAndRecordIfStale( 54 on_stale_md5, 55 record_path=record_path, 56 input_paths=input_paths, 57 input_strings=input_strings, 58 output_paths=output_paths, 59 force=force, 60 pass_changes=pass_changes, 61 track_subpaths_allowlist=track_subpaths_allowlist) 62 63 # Write depfile even when inputs have not changed to ensure build correctness 64 # on bots that build with & without patch, and the patch changes the depfile 65 # location. 66 if hasattr(options, 'depfile') and options.depfile: 67 build_utils.WriteDepfile( 68 options.depfile, output_paths[0], depfile_deps, add_pydeps=False) 69 70 71def CallAndRecordIfStale(function, 72 record_path=None, 73 input_paths=None, 74 input_strings=None, 75 output_paths=None, 76 force=False, 77 pass_changes=False, 78 track_subpaths_allowlist=None): 79 """Calls function if outputs are stale. 80 81 Outputs are considered stale if: 82 - any output_paths are missing, or 83 - the contents of any file within input_paths has changed, or 84 - the contents of input_strings has changed. 85 86 To debug which files are out-of-date, set the environment variable: 87 PRINT_MD5_DIFFS=1 88 89 Args: 90 function: The function to call. 91 record_path: Path to record metadata. 92 Defaults to output_paths[0] + '.md5.stamp' 93 input_paths: List of paths to calcualte an md5 sum on. 94 input_strings: List of strings to record verbatim. 95 output_paths: List of output paths. 96 force: Whether to treat outputs as missing regardless of whether they 97 actually are. 98 pass_changes: Whether to pass a Changes instance to |function|. 99 track_subpaths_allowlist: Relevant only when pass_changes=True. List of .zip 100 files from |input_paths| to make subpath information available for. 101 """ 102 assert record_path or output_paths 103 input_paths = input_paths or [] 104 input_strings = input_strings or [] 105 output_paths = output_paths or [] 106 record_path = record_path or output_paths[0] + '.md5.stamp' 107 108 assert record_path.endswith('.stamp'), ( 109 'record paths must end in \'.stamp\' so that they are easy to find ' 110 'and delete') 111 112 new_metadata = _Metadata(track_entries=pass_changes or PRINT_EXPLANATIONS) 113 new_metadata.AddStrings(input_strings) 114 115 zip_allowlist = set(track_subpaths_allowlist or []) 116 for path in input_paths: 117 # It's faster to md5 an entire zip file than it is to just locate & hash 118 # its central directory (which is what this used to do). 119 if path in zip_allowlist: 120 entries = _ExtractZipEntries(path) 121 new_metadata.AddZipFile(path, entries) 122 else: 123 new_metadata.AddFile(path, _ComputeTagForPath(path)) 124 125 old_metadata = None 126 force = force or _FORCE_REBUILD 127 missing_outputs = [x for x in output_paths if force or not os.path.exists(x)] 128 # When outputs are missing, don't bother gathering change information. 129 if not missing_outputs and os.path.exists(record_path): 130 with open(record_path, 'r') as jsonfile: 131 try: 132 old_metadata = _Metadata.FromFile(jsonfile) 133 except: # pylint: disable=bare-except 134 pass # Not yet using new file format. 135 136 changes = Changes(old_metadata, new_metadata, force, missing_outputs) 137 if not changes.HasChanges(): 138 return 139 140 if PRINT_EXPLANATIONS: 141 print('=' * 80) 142 print('Target is stale: %s' % record_path) 143 print(changes.DescribeDifference()) 144 print('=' * 80) 145 146 args = (changes,) if pass_changes else () 147 function(*args) 148 149 with open(record_path, 'w') as f: 150 new_metadata.ToFile(f) 151 152 153class Changes(object): 154 """Provides and API for querying what changed between runs.""" 155 156 def __init__(self, old_metadata, new_metadata, force, missing_outputs): 157 self.old_metadata = old_metadata 158 self.new_metadata = new_metadata 159 self.force = force 160 self.missing_outputs = missing_outputs 161 162 def _GetOldTag(self, path, subpath=None): 163 return self.old_metadata and self.old_metadata.GetTag(path, subpath) 164 165 def HasChanges(self): 166 """Returns whether any changes exist.""" 167 return (self.HasStringChanges() 168 or self.old_metadata.FilesMd5() != self.new_metadata.FilesMd5()) 169 170 def HasStringChanges(self): 171 """Returns whether string metadata changed.""" 172 return (self.force or not self.old_metadata 173 or self.old_metadata.StringsMd5() != self.new_metadata.StringsMd5()) 174 175 def AddedOrModifiedOnly(self): 176 """Returns whether the only changes were from added or modified (sub)files. 177 178 No missing outputs, no removed paths/subpaths. 179 """ 180 if self.HasStringChanges(): 181 return False 182 if any(self.IterRemovedPaths()): 183 return False 184 for path in self.IterModifiedPaths(): 185 if any(self.IterRemovedSubpaths(path)): 186 return False 187 return True 188 189 def IterAllPaths(self): 190 """Generator for paths.""" 191 return self.new_metadata.IterPaths(); 192 193 def IterAllSubpaths(self, path): 194 """Generator for subpaths.""" 195 return self.new_metadata.IterSubpaths(path); 196 197 def IterAddedPaths(self): 198 """Generator for paths that were added.""" 199 for path in self.new_metadata.IterPaths(): 200 if self._GetOldTag(path) is None: 201 yield path 202 203 def IterAddedSubpaths(self, path): 204 """Generator for paths that were added within the given zip file.""" 205 for subpath in self.new_metadata.IterSubpaths(path): 206 if self._GetOldTag(path, subpath) is None: 207 yield subpath 208 209 def IterRemovedPaths(self): 210 """Generator for paths that were removed.""" 211 if self.old_metadata: 212 for path in self.old_metadata.IterPaths(): 213 if self.new_metadata.GetTag(path) is None: 214 yield path 215 216 def IterRemovedSubpaths(self, path): 217 """Generator for paths that were removed within the given zip file.""" 218 if self.old_metadata: 219 for subpath in self.old_metadata.IterSubpaths(path): 220 if self.new_metadata.GetTag(path, subpath) is None: 221 yield subpath 222 223 def IterModifiedPaths(self): 224 """Generator for paths whose contents have changed.""" 225 for path in self.new_metadata.IterPaths(): 226 old_tag = self._GetOldTag(path) 227 new_tag = self.new_metadata.GetTag(path) 228 if old_tag is not None and old_tag != new_tag: 229 yield path 230 231 def IterModifiedSubpaths(self, path): 232 """Generator for paths within a zip file whose contents have changed.""" 233 for subpath in self.new_metadata.IterSubpaths(path): 234 old_tag = self._GetOldTag(path, subpath) 235 new_tag = self.new_metadata.GetTag(path, subpath) 236 if old_tag is not None and old_tag != new_tag: 237 yield subpath 238 239 def IterChangedPaths(self): 240 """Generator for all changed paths (added/removed/modified).""" 241 return itertools.chain(self.IterRemovedPaths(), 242 self.IterModifiedPaths(), 243 self.IterAddedPaths()) 244 245 def IterChangedSubpaths(self, path): 246 """Generator for paths within a zip that were added/removed/modified.""" 247 return itertools.chain(self.IterRemovedSubpaths(path), 248 self.IterModifiedSubpaths(path), 249 self.IterAddedSubpaths(path)) 250 251 def DescribeDifference(self): 252 """Returns a human-readable description of what changed.""" 253 if self.force: 254 return 'force=True' 255 elif self.missing_outputs: 256 return 'Outputs do not exist:\n ' + '\n '.join(self.missing_outputs) 257 elif self.old_metadata is None: 258 return 'Previous stamp file not found.' 259 260 if self.old_metadata.StringsMd5() != self.new_metadata.StringsMd5(): 261 ndiff = difflib.ndiff(self.old_metadata.GetStrings(), 262 self.new_metadata.GetStrings()) 263 changed = [s for s in ndiff if not s.startswith(' ')] 264 return 'Input strings changed:\n ' + '\n '.join(changed) 265 266 if self.old_metadata.FilesMd5() == self.new_metadata.FilesMd5(): 267 return "There's no difference." 268 269 lines = [] 270 lines.extend('Added: ' + p for p in self.IterAddedPaths()) 271 lines.extend('Removed: ' + p for p in self.IterRemovedPaths()) 272 for path in self.IterModifiedPaths(): 273 lines.append('Modified: ' + path) 274 lines.extend(' -> Subpath added: ' + p 275 for p in self.IterAddedSubpaths(path)) 276 lines.extend(' -> Subpath removed: ' + p 277 for p in self.IterRemovedSubpaths(path)) 278 lines.extend(' -> Subpath modified: ' + p 279 for p in self.IterModifiedSubpaths(path)) 280 if lines: 281 return 'Input files changed:\n ' + '\n '.join(lines) 282 return 'I have no idea what changed (there is a bug).' 283 284 285class _Metadata(object): 286 """Data model for tracking change metadata. 287 288 Args: 289 track_entries: Enables per-file change tracking. Slower, but required for 290 Changes functionality. 291 """ 292 # Schema: 293 # { 294 # "files-md5": "VALUE", 295 # "strings-md5": "VALUE", 296 # "input-files": [ 297 # { 298 # "path": "path.jar", 299 # "tag": "{MD5 of entries}", 300 # "entries": [ 301 # { "path": "org/chromium/base/Foo.class", "tag": "{CRC32}" }, ... 302 # ] 303 # }, { 304 # "path": "path.txt", 305 # "tag": "{MD5}", 306 # } 307 # ], 308 # "input-strings": ["a", "b", ...], 309 # } 310 def __init__(self, track_entries=False): 311 self._track_entries = track_entries 312 self._files_md5 = None 313 self._strings_md5 = None 314 self._files = [] 315 self._strings = [] 316 # Map of (path, subpath) -> entry. Created upon first call to _GetEntry(). 317 self._file_map = None 318 319 @classmethod 320 def FromFile(cls, fileobj): 321 """Returns a _Metadata initialized from a file object.""" 322 ret = cls() 323 obj = json.load(fileobj) 324 ret._files_md5 = obj['files-md5'] 325 ret._strings_md5 = obj['strings-md5'] 326 ret._files = obj.get('input-files', []) 327 ret._strings = obj.get('input-strings', []) 328 return ret 329 330 def ToFile(self, fileobj): 331 """Serializes metadata to the given file object.""" 332 obj = { 333 'files-md5': self.FilesMd5(), 334 'strings-md5': self.StringsMd5(), 335 } 336 if self._track_entries: 337 obj['input-files'] = sorted(self._files, key=lambda e: e['path']) 338 obj['input-strings'] = self._strings 339 340 json.dump(obj, fileobj, indent=2) 341 342 def _AssertNotQueried(self): 343 assert self._files_md5 is None 344 assert self._strings_md5 is None 345 assert self._file_map is None 346 347 def AddStrings(self, values): 348 self._AssertNotQueried() 349 self._strings.extend(str(v) for v in values) 350 351 def AddFile(self, path, tag): 352 """Adds metadata for a non-zip file. 353 354 Args: 355 path: Path to the file. 356 tag: A short string representative of the file contents. 357 """ 358 self._AssertNotQueried() 359 self._files.append({ 360 'path': path, 361 'tag': tag, 362 }) 363 364 def AddZipFile(self, path, entries): 365 """Adds metadata for a zip file. 366 367 Args: 368 path: Path to the file. 369 entries: List of (subpath, tag) tuples for entries within the zip. 370 """ 371 self._AssertNotQueried() 372 tag = _ComputeInlineMd5(itertools.chain((e[0] for e in entries), 373 (e[1] for e in entries))) 374 self._files.append({ 375 'path': path, 376 'tag': tag, 377 'entries': [{"path": e[0], "tag": e[1]} for e in entries], 378 }) 379 380 def GetStrings(self): 381 """Returns the list of input strings.""" 382 return self._strings 383 384 def FilesMd5(self): 385 """Lazily computes and returns the aggregate md5 of input files.""" 386 if self._files_md5 is None: 387 # Omit paths from md5 since temporary files have random names. 388 self._files_md5 = _ComputeInlineMd5( 389 self.GetTag(p) for p in sorted(self.IterPaths())) 390 return self._files_md5 391 392 def StringsMd5(self): 393 """Lazily computes and returns the aggregate md5 of input strings.""" 394 if self._strings_md5 is None: 395 self._strings_md5 = _ComputeInlineMd5(self._strings) 396 return self._strings_md5 397 398 def _GetEntry(self, path, subpath=None): 399 """Returns the JSON entry for the given path / subpath.""" 400 if self._file_map is None: 401 self._file_map = {} 402 for entry in self._files: 403 self._file_map[(entry['path'], None)] = entry 404 for subentry in entry.get('entries', ()): 405 self._file_map[(entry['path'], subentry['path'])] = subentry 406 return self._file_map.get((path, subpath)) 407 408 def GetTag(self, path, subpath=None): 409 """Returns the tag for the given path / subpath.""" 410 ret = self._GetEntry(path, subpath) 411 return ret and ret['tag'] 412 413 def IterPaths(self): 414 """Returns a generator for all top-level paths.""" 415 return (e['path'] for e in self._files) 416 417 def IterSubpaths(self, path): 418 """Returns a generator for all subpaths in the given zip. 419 420 If the given path is not a zip file or doesn't exist, returns an empty 421 iterable. 422 """ 423 outer_entry = self._GetEntry(path) 424 if not outer_entry: 425 return () 426 subentries = outer_entry.get('entries', []) 427 return (entry['path'] for entry in subentries) 428 429 430def _ComputeTagForPath(path): 431 stat = os.stat(path) 432 if stat.st_size > 1 * 1024 * 1024: 433 # Fallback to mtime for large files so that md5_check does not take too long 434 # to run. 435 return stat.st_mtime 436 md5 = hashlib.md5() 437 with open(path, 'rb') as f: 438 md5.update(f.read()) 439 return md5.hexdigest() 440 441 442def _ComputeInlineMd5(iterable): 443 """Computes the md5 of the concatenated parameters.""" 444 md5 = hashlib.md5() 445 for item in iterable: 446 md5.update(str(item)) 447 return md5.hexdigest() 448 449 450def _ExtractZipEntries(path): 451 """Returns a list of (path, CRC32) of all files within |path|.""" 452 entries = [] 453 with zipfile.ZipFile(path) as zip_file: 454 for zip_info in zip_file.infolist(): 455 # Skip directories and empty files. 456 if zip_info.CRC: 457 entries.append( 458 (zip_info.filename, zip_info.CRC + zip_info.compress_type)) 459 return entries 460