1# $Id: BuildCache.pm,v 1.47 2012/03/04 13:56:35 pfeiffer Exp $ 2# 3# Possible improvements: 4# 5# o We need to handle .o files properly, doing a string substitution on the 6# path. Maybe .a files too? I don't know. Do we really? This would 7# eliminate the considerable space savings possible with links. The user 8# should get to decide if he wants this. 9# o .lo files and .la files should never be exported to a cache. 10# o Why do we write out a separate build_info file with a recalculated 11# signature. When is it expected to differ, and how can we generalize this, 12# so that we can link the existing build_info file to and from the cache? 13# 14 15=head1 NAME 16 17Mpp::BuildCache -- subroutines for handling the makepp build cache 18 19=head1 SYNOPSIS 20 21 $bc = new Mpp::BuildCache("/path/to/build_cache", $create_flags_hash); 22 $bc->cache_file($file_info_of_file_to_archive, $file_key); 23 $bc->cleanup(); # Clean out files that haven't been used for a while. 24 $bc_entry = $bc->lookup_file($file_key); 25 26 $build_info = $bc_entry->build_info; 27 $bc_entry->copy_from_cache($output_finfo); 28 29=head1 The Mpp::BuildCache package 30 31The Mpp::BuildCache is a cache system that makepp uses to store the results 32of compilation so that they can be used later. If a file with the same 33input signature is needed, it can be fetched again immediately instead 34of rebuilt. This can cut down compilation time significantly in a 35number of cases. For example: 36 37=over 4 38 39=item * 40 41Suppose you compile all files in your program for optimization. Then 42you find a bug and you recompile for debug. Then you fix the bug and 43you want to recompile for optimization. Most of the source files 44haven't changed, but you just wiped out all the F<.o> files when you 45turned off optimization, so without a build cache you'd have to 46recompile everything. With the build cache, an extra copy of the file 47was made and stored in the cache, so it can be fetched again, instead of 48recompiling. 49 50=item * 51 52Suppose you have checked out several copies of your sources into several 53different directory trees, and have made small modifications to each 54tree. Now most of the files are the same across the directory trees, so 55when you compile another directory tree, it can fetch most of the 56compiled files from the build cache created when you built the first 57directory tree. 58 59=item * 60 61Suppose you have 5 developers all working on approximately the same set 62of sources. Once again, most of their files will be identical. If one 63person compiles a file, the remaining developers can fetch the file from 64the build cache rather than compiling it for themselves. 65 66=back 67 68=head2 Cache format 69 70The cache is actually a directory hierarchy where the filename of each 71file is the build cache key. For example, if the build cache key of a 72file is C<0123456789abcdef>, the actual file name might be 73F<01/234/56789abcdef_xyz.o>. On some file systems, performance suffers 74if there are too many files per directory, so Mpp::BuildCache can 75automatically break them up into directories as shown. 76 77It remembers the key that it was given, which is presumably some sort of hash 78of all the inputs that went into building the file. Mpp::BuildCache does remember 79the build info structure for the file. This is intended to help in the very 80rare case where there is a collision in the key, and several files have the 81same key. Mpp::BuildCache cannot store multiple files with the same key, but by 82storing the build information it is at least possible to determine that the 83given file is the wrong file. 84 85=head2 Use of Mpp::File 86 87We do not use the Mpp::File class to store information about the files in the 88build cache. The reason is that we don't want to waste the memory storing all 89the results. Typically things are looked up once in the build cache and never 90examined again, so it's a waste of memory to build up the Mpp::File structures 91for them. For this reason, for any files in the build cache directories, we 92do the stat and other operations directly instead of calling the Mpp::File 93subroutines. 94 95We do use the Mpp::File subroutines for files stored elsewhere, however. 96 97=cut 98 99package Mpp::BuildCache; 100use strict; 101use Mpp::File; 102use Mpp::FileOpt; 103use Mpp::Cmds (); 104use Sys::Hostname; 105use POSIX qw(:errno_h S_ISREG); 106 107BEGIN { 108 eval { $_ = ESTALE }; # Not defined on Win ActiveState. 109 if( $@ ) { 110 no warnings; 111 require Mpp::Text; 112 *ESTALE = sub() { -1 }; 113 } 114} 115 116 117our $global; # Build cache specified on command line or with global keyword. 118our $options_file = 'build_cache_options.pl'; 119 120=head2 new Mpp::BuildCache("/path/to/cache"); 121 122Opens an existing build cache. 123 124=cut 125 126sub new { 127 my( $class, $build_cache_dir, $self ) = @_; 128 129 $self ||= do "$build_cache_dir/$options_file"; 130 # Load the creation options. 131 ref $self or 132 die "Build cache $build_cache_dir does not have a valid format\n $build_cache_dir/$options_file is missing or corrupted\n"; 133 134 $build_cache_dir = file_info $build_cache_dir; 135 136 @$self{qw(DEV ACCESS_PERMISSIONS)} = 137 @{stat_array $build_cache_dir}[Mpp::File::STAT_DEV, Mpp::File::STAT_MODE]; 138 $self->{ACCESS_PERMISSIONS} &= 0777; 139 # Use the current directory protections as the 140 # proper mask. 141 $self->{MKDIR_OPT} = sprintf '-pm%o', $self->{ACCESS_PERMISSIONS}; 142 143 $self->{DIRNAME} = absolute_filename $build_cache_dir; 144 145 bless $self, $class; 146} 147 148=head2 cache_file 149 150 $build_cache->cache_file($file_info, $file_key, $build_info); 151 152Copies or links the file into the build cache with the given file key. Also 153the build information is stored alongside the file so that when it is 154retrieved we can verify that in fact it is exactly what we want. 155 156Returns a true value if the operation succeeded, false if any part failed. If 157anything failed in updating the build cache, the cache is cleaned up and left 158in a consistent state. 159 160=cut 161 162# A string that cannot possibly prefix a build cache key: 163our $incoming_subdir = 'incoming.dir'; 164 165# From "man 2 creat" on Linux 2.4.21: 166# O_EXCL is broken on NFS file systems, programs which rely on it for 167# performing lock-ing tasks will contain a race condition. The solution for 168# performing atomic file locking using a lockfile is to create a unique file 169# on the same fs (e.g., incorporating hostname and pid), use link(2) to make 170# a link to the lockfile. If link() returns 0, the lock is successful. 171# Otherwise, use stat(2) on the unique file to check if its link count has 172# increased to 2, in which case the lock is also successful. 173# 174# $! will be set appropriately if it returns false; it may be altered even 175# if it returns true. 176sub link_over_nfs { 177 # $old has to be a file that nobody else might be touching. 178 my ($old, $new) = @_; 179 link($old, $new) || ((stat $old)[3] || 0) > 1; 180} 181 182my $unique_suffix; 183 184# Because there is a race in aging between the time that the age of a file is 185# sampled and when it is deleted, it is possible for a brand new file to get 186# aged out if it replaces a file that is old enough to get aged. 187# TBD: If this happens, we could probably recover seamlessly by retrying 188# exactly once, but it's not clear whether it's worthwhile to uglify the code 189# in order to do that. The code is going to be hard enough to maintain as it 190# is, because it's very hard to test the race conditions. 191my $target_aged = 'temporary copy of target file was deleted, possibly by aging (OK)'; 192my $build_info_aged = 'temporary copy of build info was deleted, possibly by aging (OK)'; 193 194sub cache_file { 195 my( $self, $input_finfo, $cache_fname, $reason ) = @_; # Name the arguments. 196 # 4th arg atime, only for mppbcc, accessed below. 197 $reason or die; 198 199 my $input_filename = absolute_filename_nolink $input_finfo; 200 my $orig_prot = (lstat_array $input_finfo)->[Mpp::File::STAT_MODE]; 201 return 1 # Succeed without doing anything 202 unless S_ISREG $orig_prot; # if not a regular file? 203 204 # TBD: Perhaps we ought to succeed without doing anything if the entry 205 # is already in the cache. This reduces the likelihood of thrashing, but 206 # perhaps strange things could happen if multiple targets of a rule weren't 207 # actually built together. Either way, you run the risk of leaving behind 208 # a build info file without an MD5_SUM, which makes --md5-check-bc unhappy. 209 210 if( $cache_fname !~ /^\// ) { # Not called from Mpp::BuildCacheControl? 211 substr $cache_fname, $_, 0, '/' for reverse @{$self->{SUBDIR_CHARS}}; 212 $cache_fname = $self->{DIRNAME} . '/' . $cache_fname; 213 # Get the name of the file to create. 214 } 215 216# Build info is currently stored in a file whose name is the same as the main 217# file, but with ".makepp" before the last directory and .mk as a suffix. 218# E.g., if the filename is 01/234/5679abcdef, then the build info is 219# stored in 01/234/.makepp/56789abcdef.mk. 220 221 my $build_info_fname = $cache_fname; 222 $build_info_fname =~ s@/([^/]+)$@/$Mpp::File::build_info_subdir@; 223 -d $build_info_fname or 224 eval { Mpp::Cmds::c_mkdir $self->{MKDIR_OPT}, $build_info_fname } or do { 225 $$reason = ($! == ENOENT || $! == ESTALE) ? "$@ -- possibly due to aging (OK)" : $@; 226 return undef; 227 }; 228 # Make sure .makepp directory and parents exists. 229 230 $build_info_fname .= "/$1.mk"; 231 232# Before writing to the final location, we write to a temp location, so that 233# the writes are atomic. If we're linking, then we don't need to create a 234# copy of it, because it gets linked in anyway, but we always create a temp 235# file for the build info. The temp paths are currently incoming.dir/$host.$pid 236# and incoming.dir/$host.$pid.mk. 237 238 # This is a string that it unique over all currently active processes that 239 # might be able to write to the build cache, and it can't end in '.mk'. 240 $unique_suffix ||= hostname . '_' . $$; 241 my $temp_cache_fname = "$self->{DIRNAME}/$incoming_subdir/$unique_suffix"; 242 my $temp_build_info_fname = $temp_cache_fname . '.mk'; 243 244 my $build_info = $input_finfo->{BUILD_INFO}; # Get the build info hash. 245 $build_info ||= Mpp::File::load_build_info_file($input_finfo); 246 # Load it from disk if we didn't have it. 247 $build_info or die "internal error: file in build cache (" . absolute_filename( $input_finfo ) . 248 ") is missing build info\n"; 249 250 local $build_info->{SIGNATURE}; 251# 252# Calculate the protections we want to be on the file. 253# We make the world and group protections be the user protection anded 254# with the build cache directory protections. 255# 256 257 my $file_prot = (0111 * int $orig_prot % 01000 / 0100) & $self->{ACCESS_PERMISSIONS}; 258 # Make the group & other protections the same 259 # as the user protections. 260 # Remove protections not granted by the 261 # build cache directory. 262 263# 264# If the build cache is not on the same file system as the file, then 265# copy the file. If it is on the same file system, then make a hard link, 266# since that is faster and uses almost no disk space. 267# 268 my $dev = (stat_array $input_finfo->{'..'})->[Mpp::File::STAT_DEV]; 269 my( $size, $mtime ) = 270 @{Mpp::File::lstat_array $input_finfo}[Mpp::File::STAT_SIZE, Mpp::File::STAT_MTIME]; 271 # If it's on the same filesystem, then link; otherwise, copy. 272 my $target_src; 273 my @files_to_unlink; 274 my $result = eval { 275 my $linking; 276 my $target_prot = $file_prot; 277 if( $dev == $self->{DEV} && !$Mpp::force_bc_copy ) { 278 $linking = 1; 279 $target_src = $input_filename; 280 $target_prot &= ~0222; # Make it read only, so that no one can 281 # accidentally corrupt the build cache copy. 282 Mpp::File::set_build_info_string( $input_finfo, 'LINKED_TO_CACHE', 1 ); 283 # Remember that it's linked to the build 284 # cache, so we need to delete it before 285 # allowing it to be changed. 286 if($Mpp::md5check_bc) { 287 # Make sure that $build_info->{MD5_SUM} is set. 288 require Mpp::Signature::md5; 289 Mpp::Signature::md5::signature($Mpp::Signature::md5::md5, $input_finfo); 290 } 291 } else { # Hard link not possible on different dev 292 my $md5; 293 if($Mpp::md5check_bc && !$build_info->{MD5_SUM}) { 294 require Digest::MD5; 295 $md5 = Digest::MD5->new; 296 } 297 $target_src = $temp_cache_fname; 298 push @files_to_unlink, $temp_cache_fname; 299 # Need to unlink first, in case there are other links to it and/or 300 # the current permissions don't allow writing. 301 unlink $temp_cache_fname or $! == ENOENT or do { 302 $$reason = "unlink $temp_cache_fname: $!"; 303 return undef; 304 }; 305 if (!(($size) = copy_check_md5($input_filename, $temp_cache_fname, $md5))) { 306 $$reason = ($! == ESTALE) ? $target_aged : "write $temp_cache_fname: $!"; 307 return undef; 308 } 309 utime $_[4] || time, $mtime, $temp_cache_fname or # Try to copy over mtime. 310 # NOTE: We can't get the mtime of $temp_cache_fname from the stat that 311 # we do on the destination filehandle at the end of the copy, because 312 # that mtime could be based on the local clock instead of the clock of 313 # the machine on which the file is stored. 314 $mtime = (stat $temp_cache_fname)[9] or do { 315 $$reason = ($! == ENOENT || $! == ESTALE) ? $target_aged : "stat $temp_cache_fname: $!"; 316 return undef; 317 }; 318 $build_info->{MD5_SUM} = $md5->b64digest if $md5; 319 } 320 $build_info->{SIGNATURE} = $mtime . ',' . $size; 321 # Be sure we store a signature. 322 323 push @files_to_unlink, $temp_build_info_fname; 324 unlink $temp_build_info_fname or $! == ENOENT or do { 325 $$reason = "unlink $temp_build_info_fname: $!"; 326 return undef; 327 }; 328 Mpp::File::write_build_info_file($temp_build_info_fname, $build_info) or do { 329 $$reason = ($! == ESTALE) ? $build_info_aged : "write $temp_build_info_fname: $!"; 330 return undef; 331 }; 332 chmod $file_prot, $temp_build_info_fname or do { 333 $$reason = ($! == ENOENT || $! == ESTALE) ? $build_info_aged : "chmod $temp_build_info_fname: $!"; 334 return undef; 335 }; 336 337 # We can leave garbage in the incoming directory on an interrupt, but we 338 # need to make sure that we don't corrupt to the cache entries if we can 339 # possibly help it. 340 my @files_to_unlink; 341 $Mpp::critical_sections++; 342 my $result = eval { 343 # NOTE: We try to make the build info file live longer than the target 344 # file, because we don't like to fail to import just because the build 345 # info file isn't there yet. However, this isn't guaranteed over NFS. 346 for($cache_fname, $build_info_fname) { 347 unlink $_ or $! == ENOENT or $! == ESTALE or do { 348 $$reason = "unlink $_: $!"; 349 return undef; 350 }; 351 } 352 353 link_over_nfs($temp_build_info_fname, $build_info_fname) or do { 354 if($! == EEXIST) { 355 $$reason = 'build info file was already there, possibly created by another party (OK)' 356 } elsif($! == ENOENT || $! == ESTALE) { 357 # NOTE: This might instead mean that the parent directory of 358 # $build_info_fname was aged, so the message is a bit misleading. 359 $$reason = $build_info_aged; 360 } else { 361 $$reason = "link $temp_build_info_fname to $build_info_fname: $!"; 362 } 363 return undef; 364 }; 365 push @files_to_unlink, $build_info_fname; 366 367 chmod $target_prot, $target_src or do { 368 $$reason = (!$linking && ($! == ENOENT || $! == ESTALE)) ? $target_aged : "chmod $target_src: $!"; 369 return undef; 370 }; 371 link_over_nfs($target_src, $cache_fname) or do { 372 if($! == EEXIST) { 373 $$reason = "target file was already there, possibly created by another party after our build info was immediately aged (OK)" 374 } elsif($! == ENOENT || $! == ESTALE) { 375 # NOTE: This might instead mean that the parent directory of 376 # $cache_fname was aged, so the message is a bit misleading. 377 $$reason = $target_aged; 378 } else { 379 $$reason = "link $target_src to $cache_fname: $!"; 380 } 381 return undef; 382 }; 383 #push @files_to_unlink, $cache_fname; # Currently redundant 384 385 @files_to_unlink = (); # Commit to leave the entry in the cache 386 Mpp::log $linking ? 'BC_LINK' : 'BC_EXPORT' => $input_finfo, $cache_fname 387 if $Mpp::log_level; 388 1 389 }; 390 my $error = $@; 391 eval { unlink @files_to_unlink }; # Ignore failure here 392 $Mpp::critical_sections--; 393 Mpp::propagate_pending_signals(); 394 die $error if $error; 395 $result 396 }; 397 my $error = $@; 398 eval { unlink @files_to_unlink }; # Ignore failure here 399 die $error if $error; 400 $result 401} 402 403=head2 lookup_file 404 405 $bc_entry = $bc->lookup_file($file_key); 406 407Lookup a file by its cache key. Returns undef if the file does not exist in 408the cache. Returns a Mpp::BuildCache::Entry structure if it does exist. You can 409query the Mpp::BuildCache::Entry structure to see what the build info is, or to 410copy the file into the current directory. 411 412=cut 413 414sub lookup_file { 415 my( $self, $cache_fname ) = @_; 416 417 substr $cache_fname, $_, 0, '/' for reverse @{$self->{SUBDIR_CHARS}}; 418 $cache_fname = $self->{DIRNAME} . '/' . $cache_fname; 419 # Get the file name we're looking for. 420 421 return if exists $self->{SYMLINK} && !-e $cache_fname; # Stale link? 422 423 my $dev = (lstat $cache_fname)[0]; # 0 == real STAT_DEV. Does the file exist? 424 425 defined $dev and # Quit if file does not exist. 426 bless { FILENAME => $cache_fname, DEV => $dev }, 'Mpp::BuildCache::Entry'; 427} 428 429=head2 copy_check_md5 430 431 my $md5; 432 my $result = copy_check_md5("in", "out", \$md5, $setmode); 433 434Assuming that the input file is atomically generated and removed, 435copy_check_md5 will either copy the file as-is or return undef with $! set, 436even if the input file is unlinked and/or re-created concurrently, 437even over NFS. 438Mode bits are copied as well if $mode is true. 439Copy_check_md5 will instead die if it detects that the input file is not 440being written atomically, or if it detects something that it can't explain. 441 442If a Digest object is provided as a third argument, then the file's content 443is added to it. It may be modified even if the copy fails. 444See L<Digest(3pm)>. 445 446A successful copy will return a 2-element array consisting of the size and 447modification time of the input file. 448 449If the return value is an empty array, then $! is set as follows: 450 451=over 2 452 453=item ENOENT 454 455The input file was removed while it was being read. 456 457=item ESTALE 458 459The output file was removed while it was being written, 460or the directory containing the input file was removed. 461 462=item Others 463 464Many other errors are possible, such as EACCES, EINTR, EIO, EISDIR, ENFILE 465EMFILE, EFBIG, ENOSPC, EROFS, EPIPE, ENAMETOOLONG, ENOSTR. 466In most cases, these are non-transient conditions that require manual 467intervention, and should therefore cause the program to terminate. 468 469=back 470 471=cut 472 473our $Too_Big = 1024 * 1024 * 2; 474 475sub copy_check_md5 { 476 my ($in, $out, $md5, $setmode) = @_; 477 478 open(my $fin, '<', $in) or return; 479 480 # NOTE: This works only because we stat the filehandle instead of the 481 # file. The file could have been unlinked and re-created since we opened 482 # it for read. 483 my ($ino, $mode, $size, $mtime) = do { no warnings; (stat $fin)[1,2,7,9] }; 484 defined($size) or return; 485 486 open(my $fout, '>', $out) or return; 487 488 # Stolen from File::Copy: 489 my $bufsize = $size; 490 $bufsize = 1024 if ($bufsize < 512); 491 $bufsize = $Too_Big if ($bufsize > $Too_Big); 492 my $buf; 493 for (;;) { 494 my ($r, $w, $t); 495 defined($r = sysread($fin, $buf, $bufsize)) or return; 496 last unless $r; 497 $md5->add($buf) if $md5; 498 for ($w = 0; $w < $r; $w += $t) { 499 $t = syswrite($fout, $buf, $r - $w, $w) or return; 500 } 501 } 502 503 504 my $size3; 505 { 506 local $SIG{__WARN__} = sub { 507 local $_ = $_[0]; 508 warn $_ unless /unopened/; # Ignore "stat() on unopened filehandle" 509 }; 510 $size3 = (stat $fout)[7]; 511 } 512 close($fout) or return; 513 514 # Now, if the file is still there, report if it changed. This is how 515 # we'll know if somebody isn't following the rules. 516 my ($ino2, $size2, $mtime2) = do { no warnings; (stat $fin)[1,7,9] }; 517 die "$in changed during copying (created non-atomically)" 518 if $ino2 && ($ino2 != $ino || $size2 != $size || $mtime2 != $mtime); 519 520 close($fin); 521 522 # I don't know of any way that this could happen, but we'll check here 523 # just so we know for sure that it didn't happen. 524 die "Copying to $out: size $size3 doesn't match source size $size" 525 unless defined($size3) && $size3 == $size; 526 527 chmod($mode & 0777, $out) or die "chmod $out: $!" if $setmode; 528 529 ($size, $mtime) 530} 531 532############################################################################### 533# 534# Subroutines in the Mpp::BuildCache::Entry package: 535# 536package Mpp::BuildCache::Entry; 537 538=head1 The Mpp::BuildCache::Entry package 539 540A Mpp::BuildCache::Entry is an object returned by BuildCache::lookup_file. You can 541do the following with the object: 542 543=head2 absolute_filename 544 545 $bc_entry->absolute_filename 546 547Returns the name of the file in the build cache. 548 549=cut 550 551sub absolute_filename { $_[0]->{FILENAME} } 552*name = \&absolute_filename; 553 554=head2 copy_from_cache 555 556 $bc_entry->copy_from_cache($output_finfo, $rule, \$reason); 557 558Replaces the file in $output_finfo with the file from the cache, and updates 559all the Mpp::File data structures to reflect this change. 560The build info signature is checked against the target file in the cache, 561and if $Mpp::md5check_bc is set, then the MD5 checksum is also verified. 562 563Returns true if the file was successfully restored from the cache, false if 564not. (I B<think> the only reason it wouldn't be successfully restored is that 565someone deleted the file from cache between the time it was returned from 566lookup_file and the time copy_from_cache is invoked.) 567If it returns false, then $reason is set to a string that explains why. 568If $reason ends with '(OK)', then the failure could have been due to legitimate 569concurrent access of the build cache. 570If it fails, then the output target is unlinked. 571 572=cut 573 574sub fix_ok { 575# If we detect that a target and its build info don't go together, 576# then we are empowered to nuke them even in --nopopulate_bc mode. We do this 577# only if the target is at least 10 minutes old, because otherwise someone 578# might always nuke files just as they get created. It's still possible 579# (although unlikely) for a file to be removed immediately after it replaces 580# a file that had been in the cache for a long time, but that's OK. 581 my ($self) = @_; 582 # Re-stat, because this is the last chance we have to notice an update. 583 my $mtime = (stat $self->{FILENAME})[9]; # 9 == real STAT_MTIME 584 $mtime && time - $mtime > 600 585} 586 587sub copy_from_cache { 588 my ($self, $output_finfo, $rule, $reason) = @_; 589 $reason || die; 590 591 Mpp::File::unlink( $output_finfo ); # Get rid of anything that's there currently. 592 my $output_fname = Mpp::File::absolute_filename_nolink $output_finfo; 593 my $link_to_build_cache = 0; 594 595# 596# Read in the build info: 597# 598 my $cache_fname = $self->{FILENAME}; 599 my $build_info_fname = $cache_fname; 600 $build_info_fname =~ s@/([^/]+)$@/$Mpp::File::build_info_subdir/$1.mk@; 601 open my( $fh ), $build_info_fname or do { 602 if($! == POSIX::ENOENT || $! == Mpp::BuildCache::ESTALE) { 603 $$reason = 'the build info file is missing (OK)'; 604 unlink $cache_fname if fix_ok($self); 605 } else { 606 $$reason = "read $build_info_fname: $!"; 607 } 608 return undef; 609 }; 610 my $line; 611 my $build_info=Mpp::File::grok_build_info_file($fh); 612 close $fh; 613 614 $build_info or do { 615 $$reason='currupt build info file, possibly deleted while reading (OK)'; 616 unlink $cache_fname, $build_info_fname if fix_ok($self); 617 return undef; 618 }; # Something's wrong with this file. 619 620 # If the target directory doesn't already exist, then we assume that the 621 # rule would have created it. 622 Mpp::Cmds::c_mkdir '-p', Mpp::File::absolute_filename_nolink $output_finfo->{'..'}; 623 624# It's a real file. If it's on the same file system, make it an extra hard 625# link since that's faster and takes up almost no disk space. Otherwise, copy the 626# file. 627# We have to be very careful not to import a target without its build info 628# file, even if an interrupt arrives, because then it will look like a source 629# file, and then --rm-stale might not work. 630# 631 $Mpp::critical_sections++; 632 my $result = eval { 633 my $md5; 634 require Digest::MD5; 635 $md5 = Digest::MD5->new if $Mpp::md5check_bc; 636 my ($size, $mtime); 637 # TBD: Maybe we shouldn't fall back to copying if link fails. There 638 # should be a warning at least. 639 if( $self->{DEV} == ((Mpp::File::stat_array $output_finfo->{'..'})->[Mpp::File::STAT_DEV] || 0) 640 && !$Mpp::force_bc_copy && 641 # Same file system? 642 link($self->{FILENAME}, $output_fname)) { 643 # Re-stat in case it changed since we looked it up. 644 ($size, $mtime) = (stat $output_fname)[7, 9]; 645 unless( defined $size ) { 646 $$reason = "cached file $self->{FILENAME} became a stale link after we looked it up (OK)"; 647 unlink $output_fname; 648 unlink $cache_fname, $build_info_fname if fix_ok($self); 649 return undef; 650 } 651 if($md5 && open(my $fh, '<', $self->{FILENAME})) { 652 $md5->addfile($fh); 653 } 654 $link_to_build_cache = 1; # Remember that we did the link. 655 } elsif( !( ($size, $mtime) = copy_check_md5( $self->{FILENAME}, $output_fname, $md5, 1) )) { 656 # Link failed for some reason: 657 # NOTE: Several versions of the Linux NFS client can return EIO instead 658 # of ESTALE or ENOENT on a read after the file has been unlinked. If 659 # this is a real hardware error, then we hope that it also shows up on 660 # some other operation where it can't happen legitimately. 661 $$reason = ($!==POSIX::ENOENT || $!==Mpp::BuildCache::ESTALE || $!==POSIX::EIO) ? 'file was just deleted (OK)' : "copy $self->{FILENAME} to $output_fname: $!"; 662 return undef; 663 } 664 my $signature = $mtime . ',' . $size; 665 # Form the expected signature. 666 my $build_info_sig = $build_info->{SIGNATURE} || ''; 667 if ($signature ne $build_info_sig) { 668 # File was corrupted in the build cache. 669 # Get rid of it, and don't import it. 670 $$reason = "cached build info file $build_info_sig mismatches cached target file $signature (OK)"; 671 unlink $cache_fname, $build_info_fname if fix_ok($self); 672 return undef; 673 } 674 if($md5) { 675 # Digest key and format needs to match Mpp::Signature::md5 676 my $md5sum = $build_info->{MD5_SUM} or do { 677 $$reason = 'no stored MD5 in cached build info file (OK)'; 678 return undef; 679 }; 680 my $target_md5 = $md5->b64digest; 681 if($target_md5 ne $md5sum) { 682 $$reason = "cached target file $target_md5 mismatches build info MD5_SUM $md5sum (OK)"; 683 unlink $cache_fname, $build_info_fname if fix_ok($self); 684 return undef; 685 } 686 } 687 688# 689# Now restore the build info: 690# 691 Mpp::File::may_have_changed( $output_finfo ); 692 $output_finfo->{BUILD_INFO} = $build_info; 693 Mpp::File::set_build_info_string( $output_finfo, 'LINKED_TO_CACHE', $link_to_build_cache); 694 # Remember if it's a link to something in 695 # the build cache. 696 697 # Need to match build info signature to file signature, or else build info 698 # will be ignored. This has the drawback that targets that don't use an MD5 699 # signature for this file as a dependency will think it has changed. 700 Mpp::File::set_build_info_string( $output_finfo, 'SIGNATURE', Mpp::File::signature( $output_finfo )); 701 702 # Update the DEP_SIGS that aren't MD5-based, so that the target will still 703 # look up-to-date the next time we run makepp. 704 $rule->build_check_method->update_dep_sigs($output_finfo, $rule); 705 706 Mpp::File::mark_build_info_for_update( $output_finfo ); 707 &Mpp::File::update_build_infos; # Write out the build cache right now. 708 1 # No error. 709 }; 710 my $error = $@; 711 $result or eval { Mpp::File::unlink( $output_finfo ) }; # Clean up on error 712 $Mpp::critical_sections--; 713 Mpp::propagate_pending_signals(); 714 die $error if $error; 715 716 # TBD: Some filesystems don't update atime on file access, for performance 717 # and/or power reasons. If the build cache is on such a filesystem, then 718 # files will get aged based on their creation time, which is bad because 719 # frequently used files will be aged just as quickly as files that are never 720 # used. To fix that, I propose that the build_cache_options.pl file define 721 # a new 'UTIME_ON_IMPORT' parameter, and if that is set, then we should 722 # use utime(2) here to update the atime and set mtime to the same value 723 # that we previously sampled. (Gary Holt has tried this on such a 724 # filesystem, and he reports that it works.) This is likely to fail because 725 # of permissions, in which case we can copy the target to a unique 726 # filename that we own, then rename that file back to the target, and 727 # finally call utime to update atime and reset the old mtime. This isn't 728 # implemented because nobody needs it yet (and therefore nobody would be 729 # testing it). When this is implemented, the check in copy_check_md5 for 730 # constant mtime needs to be downgraded from a die to a failure, because 731 # the utime operation introduces legitimate races. 732 733 $result 734} 735 736*copy_check_md5 = \&Mpp::BuildCache::copy_check_md5; 737 7381; 739