1# $Id: BuildCache.pm,v 1.47 2012/03/04 13:56:35 pfeiffer Exp $
2#
3# Possible improvements:
4#
5# o We need to handle .o files properly, doing a string substitution on the
6#   path.  Maybe .a files too?	I don't know.  Do we really?  This would
7#   eliminate the considerable space savings possible with links.  The user
8#   should get to decide if he wants this.
9# o .lo files and .la files should never be exported to a cache.
10# o Why do we write out a separate build_info file with a recalculated
11#   signature.  When is it expected to differ, and how can we generalize this,
12#   so that we can link the existing build_info file to and from the cache?
13#
14
15=head1 NAME
16
17Mpp::BuildCache -- subroutines for handling the makepp build cache
18
19=head1 SYNOPSIS
20
21    $bc = new Mpp::BuildCache("/path/to/build_cache", $create_flags_hash);
22    $bc->cache_file($file_info_of_file_to_archive, $file_key);
23    $bc->cleanup();	 # Clean out files that haven't been used for a while.
24    $bc_entry = $bc->lookup_file($file_key);
25
26    $build_info = $bc_entry->build_info;
27    $bc_entry->copy_from_cache($output_finfo);
28
29=head1 The Mpp::BuildCache package
30
31The Mpp::BuildCache is a cache system that makepp uses to store the results
32of compilation so that they can be used later.	If a file with the same
33input signature is needed, it can be fetched again immediately instead
34of rebuilt.  This can cut down compilation time significantly in a
35number of cases.  For example:
36
37=over 4
38
39=item *
40
41Suppose you compile all files in your program for optimization.	 Then
42you find a bug and you recompile for debug.  Then you fix the bug and
43you want to recompile for optimization.	 Most of the source files
44haven't changed, but you just wiped out all the F<.o> files when you
45turned off optimization, so without a build cache you'd have to
46recompile everything.  With the build cache, an extra copy of the file
47was made and stored in the cache, so it can be fetched again, instead of
48recompiling.
49
50=item *
51
52Suppose you have checked out several copies of your sources into several
53different directory trees, and have made small modifications to each
54tree.  Now most of the files are the same across the directory trees, so
55when you compile another directory tree, it can fetch most of the
56compiled files from the build cache created when you built the first
57directory tree.
58
59=item *
60
61Suppose you have 5 developers all working on approximately the same set
62of sources.  Once again, most of their files will be identical.	 If one
63person compiles a file, the remaining developers can fetch the file from
64the build cache rather than compiling it for themselves.
65
66=back
67
68=head2 Cache format
69
70The cache is actually a directory hierarchy where the filename of each
71file is the build cache key.  For example, if the build cache key of a
72file is C<0123456789abcdef>, the actual file name might be
73F<01/234/56789abcdef_xyz.o>.  On some file systems, performance suffers
74if there are too many files per directory, so Mpp::BuildCache can
75automatically break them up into directories as shown.
76
77It remembers the key that it was given, which is presumably some sort of hash
78of all the inputs that went into building the file.  Mpp::BuildCache does remember
79the build info structure for the file.  This is intended to help in the very
80rare case where there is a collision in the key, and several files have the
81same key.  Mpp::BuildCache cannot store multiple files with the same key, but by
82storing the build information it is at least possible to determine that the
83given file is the wrong file.
84
85=head2 Use of Mpp::File
86
87We do not use the Mpp::File class to store information about the files in the
88build cache.  The reason is that we don't want to waste the memory storing all
89the results.  Typically things are looked up once in the build cache and never
90examined again, so it's a waste of memory to build up the Mpp::File structures
91for them.  For this reason, for any files in the build cache directories, we
92do the stat and other operations directly instead of calling the Mpp::File
93subroutines.
94
95We do use the Mpp::File subroutines for files stored elsewhere, however.
96
97=cut
98
99package Mpp::BuildCache;
100use strict;
101use Mpp::File;
102use Mpp::FileOpt;
103use Mpp::Cmds ();
104use Sys::Hostname;
105use POSIX qw(:errno_h S_ISREG);
106
107BEGIN {
108  eval { $_ = ESTALE };		# Not defined on Win ActiveState.
109  if( $@ ) {
110    no warnings;
111    require Mpp::Text;
112    *ESTALE = sub() { -1 };
113  }
114}
115
116
117our $global;			# Build cache specified on command line or with global keyword.
118our $options_file = 'build_cache_options.pl';
119
120=head2 new Mpp::BuildCache("/path/to/cache");
121
122Opens an existing build cache.
123
124=cut
125
126sub new {
127  my( $class, $build_cache_dir, $self ) = @_;
128
129  $self ||= do "$build_cache_dir/$options_file";
130				# Load the creation options.
131  ref $self or
132    die "Build cache $build_cache_dir does not have a valid format\n  $build_cache_dir/$options_file is missing or corrupted\n";
133
134  $build_cache_dir = file_info $build_cache_dir;
135
136  @$self{qw(DEV ACCESS_PERMISSIONS)} =
137    @{stat_array $build_cache_dir}[Mpp::File::STAT_DEV, Mpp::File::STAT_MODE];
138  $self->{ACCESS_PERMISSIONS} &= 0777;
139				# Use the current directory protections as the
140				# proper mask.
141  $self->{MKDIR_OPT} = sprintf '-pm%o', $self->{ACCESS_PERMISSIONS};
142
143  $self->{DIRNAME} = absolute_filename $build_cache_dir;
144
145  bless $self, $class;
146}
147
148=head2 cache_file
149
150   $build_cache->cache_file($file_info, $file_key, $build_info);
151
152Copies or links the file into the build cache with the given file key.	Also
153the build information is stored alongside the file so that when it is
154retrieved we can verify that in fact it is exactly what we want.
155
156Returns a true value if the operation succeeded, false if any part failed.  If
157anything failed in updating the build cache, the cache is cleaned up and left
158in a consistent state.
159
160=cut
161
162# A string that cannot possibly prefix a build cache key:
163our $incoming_subdir = 'incoming.dir';
164
165# From "man 2 creat" on Linux 2.4.21:
166# O_EXCL is broken on NFS file systems, programs which rely on it for
167# performing lock-ing tasks will contain a race condition.  The solution for
168# performing atomic file locking using a lockfile is to create a unique file
169# on the same fs (e.g., incorporating hostname and pid), use link(2) to make
170# a link to the lockfile.  If link() returns 0, the lock is successful.
171# Otherwise, use stat(2) on the unique file to check if its link count has
172# increased to 2, in which case the lock is also successful.
173#
174# $! will be set appropriately if it returns false; it may be altered even
175# if it returns true.
176sub link_over_nfs {
177  # $old has to be a file that nobody else might be touching.
178  my ($old, $new) = @_;
179  link($old, $new) || ((stat $old)[3] || 0) > 1;
180}
181
182my $unique_suffix;
183
184# Because there is a race in aging between the time that the age of a file is
185# sampled and when it is deleted, it is possible for a brand new file to get
186# aged out if it replaces a file that is old enough to get aged.
187# TBD: If this happens, we could probably recover seamlessly by retrying
188# exactly once, but it's not clear whether it's worthwhile to uglify the code
189# in order to do that.  The code is going to be hard enough to maintain as it
190# is, because it's very hard to test the race conditions.
191my $target_aged = 'temporary copy of target file was deleted, possibly by aging (OK)';
192my $build_info_aged = 'temporary copy of build info was deleted, possibly by aging (OK)';
193
194sub cache_file {
195  my( $self, $input_finfo, $cache_fname, $reason ) = @_; # Name the arguments.
196				# 4th arg atime, only for mppbcc, accessed below.
197  $reason or die;
198
199  my $input_filename = absolute_filename_nolink $input_finfo;
200  my $orig_prot = (lstat_array $input_finfo)->[Mpp::File::STAT_MODE];
201  return 1			# Succeed without doing anything
202    unless S_ISREG $orig_prot;	# if not a regular file?
203
204  # TBD: Perhaps we ought to succeed without doing anything if the entry
205  # is already in the cache.  This reduces the likelihood of thrashing, but
206  # perhaps strange things could happen if multiple targets of a rule weren't
207  # actually built together.  Either way, you run the risk of leaving behind
208  # a build info file without an MD5_SUM, which makes --md5-check-bc unhappy.
209
210  if( $cache_fname !~ /^\// ) {	# Not called from Mpp::BuildCacheControl?
211    substr $cache_fname, $_, 0, '/' for reverse @{$self->{SUBDIR_CHARS}};
212    $cache_fname = $self->{DIRNAME} . '/' . $cache_fname;
213				# Get the name of the file to create.
214  }
215
216# Build info is currently stored in a file whose name is the same as the main
217# file, but with ".makepp" before the last directory and .mk as a suffix.
218# E.g., if the filename is 01/234/5679abcdef, then the build info is
219# stored in 01/234/.makepp/56789abcdef.mk.
220
221  my $build_info_fname = $cache_fname;
222  $build_info_fname =~ s@/([^/]+)$@/$Mpp::File::build_info_subdir@;
223  -d $build_info_fname or
224    eval { Mpp::Cmds::c_mkdir $self->{MKDIR_OPT}, $build_info_fname } or do {
225      $$reason = ($! == ENOENT || $! == ESTALE) ? "$@ -- possibly due to aging (OK)" : $@;
226      return undef;
227    };
228				# Make sure .makepp directory and parents exists.
229
230  $build_info_fname .= "/$1.mk";
231
232# Before writing to the final location, we write to a temp location, so that
233# the writes are atomic.  If we're linking, then we don't need to create a
234# copy of it, because it gets linked in anyway, but we always create a temp
235# file for the build info.  The temp paths are currently incoming.dir/$host.$pid
236# and incoming.dir/$host.$pid.mk.
237
238  # This is a string that it unique over all currently active processes that
239  # might be able to write to the build cache, and it can't end in '.mk'.
240  $unique_suffix ||= hostname . '_' . $$;
241  my $temp_cache_fname = "$self->{DIRNAME}/$incoming_subdir/$unique_suffix";
242  my $temp_build_info_fname = $temp_cache_fname . '.mk';
243
244  my $build_info = $input_finfo->{BUILD_INFO}; # Get the build info hash.
245  $build_info ||= Mpp::File::load_build_info_file($input_finfo);
246				# Load it from disk if we didn't have it.
247  $build_info or die "internal error: file in build cache (" . absolute_filename( $input_finfo ) .
248    ") is missing build info\n";
249
250  local $build_info->{SIGNATURE};
251#
252# Calculate the protections we want to be on the file.
253# We make the world and group protections be the user protection anded
254# with the build cache directory protections.
255#
256
257  my $file_prot = (0111 * int $orig_prot % 01000 / 0100) & $self->{ACCESS_PERMISSIONS};
258				# Make the group & other protections the same
259				# as the user protections.
260				# Remove protections not granted by the
261				# build cache directory.
262
263#
264# If the build cache is not on the same file system as the file, then
265# copy the file.  If it is on the same file system, then make a hard link,
266# since that is faster and uses almost no disk space.
267#
268  my $dev = (stat_array $input_finfo->{'..'})->[Mpp::File::STAT_DEV];
269  my( $size, $mtime ) =
270    @{Mpp::File::lstat_array $input_finfo}[Mpp::File::STAT_SIZE, Mpp::File::STAT_MTIME];
271  # If it's on the same filesystem, then link; otherwise, copy.
272  my $target_src;
273  my @files_to_unlink;
274  my $result = eval {
275    my $linking;
276    my $target_prot = $file_prot;
277    if( $dev == $self->{DEV} && !$Mpp::force_bc_copy ) {
278      $linking = 1;
279      $target_src = $input_filename;
280      $target_prot &= ~0222;	# Make it read only, so that no one can
281				# accidentally corrupt the build cache copy.
282      Mpp::File::set_build_info_string( $input_finfo, 'LINKED_TO_CACHE', 1 );
283				# Remember that it's linked to the build
284				# cache, so we need to delete it before
285				# allowing it to be changed.
286      if($Mpp::md5check_bc) {
287	# Make sure that $build_info->{MD5_SUM} is set.
288	require Mpp::Signature::md5;
289	Mpp::Signature::md5::signature($Mpp::Signature::md5::md5, $input_finfo);
290      }
291    } else {			# Hard link not possible on different dev
292      my $md5;
293      if($Mpp::md5check_bc && !$build_info->{MD5_SUM}) {
294	require Digest::MD5;
295	$md5 = Digest::MD5->new;
296      }
297      $target_src = $temp_cache_fname;
298      push @files_to_unlink, $temp_cache_fname;
299      # Need to unlink first, in case there are other links to it and/or
300      # the current permissions don't allow writing.
301      unlink $temp_cache_fname or $! == ENOENT or do {
302	$$reason = "unlink $temp_cache_fname: $!";
303	return undef;
304      };
305      if (!(($size) = copy_check_md5($input_filename, $temp_cache_fname, $md5))) {
306	$$reason = ($! == ESTALE) ? $target_aged : "write $temp_cache_fname: $!";
307	return undef;
308      }
309      utime $_[4] || time, $mtime, $temp_cache_fname or # Try to copy over mtime.
310	# NOTE: We can't get the mtime of $temp_cache_fname from the stat that
311	# we do on the destination filehandle at the end of the copy, because
312	# that mtime could be based on the local clock instead of the clock of
313	# the machine on which the file is stored.
314	$mtime = (stat $temp_cache_fname)[9] or do {
315	  $$reason = ($! == ENOENT || $! == ESTALE) ? $target_aged : "stat $temp_cache_fname: $!";
316	  return undef;
317	};
318      $build_info->{MD5_SUM} = $md5->b64digest if $md5;
319    }
320    $build_info->{SIGNATURE} = $mtime . ',' . $size;
321				  # Be sure we store a signature.
322
323    push @files_to_unlink, $temp_build_info_fname;
324    unlink $temp_build_info_fname or $! == ENOENT or do {
325      $$reason = "unlink $temp_build_info_fname: $!";
326      return undef;
327    };
328    Mpp::File::write_build_info_file($temp_build_info_fname, $build_info) or do {
329      $$reason = ($! == ESTALE) ? $build_info_aged : "write $temp_build_info_fname: $!";
330      return undef;
331    };
332    chmod $file_prot, $temp_build_info_fname or do {
333      $$reason = ($! == ENOENT || $! == ESTALE) ? $build_info_aged : "chmod $temp_build_info_fname: $!";
334      return undef;
335    };
336
337    # We can leave garbage in the incoming directory on an interrupt, but we
338    # need to make sure that we don't corrupt to the cache entries if we can
339    # possibly help it.
340    my @files_to_unlink;
341    $Mpp::critical_sections++;
342    my $result = eval {
343      # NOTE: We try to make the build info file live longer than the target
344      # file, because we don't like to fail to import just because the build
345      # info file isn't there yet.  However, this isn't guaranteed over NFS.
346      for($cache_fname, $build_info_fname) {
347	unlink $_ or $! == ENOENT or $! == ESTALE or do {
348	  $$reason = "unlink $_: $!";
349	  return undef;
350	};
351      }
352
353      link_over_nfs($temp_build_info_fname, $build_info_fname) or do {
354	if($! == EEXIST) {
355	  $$reason = 'build info file was already there, possibly created by another party (OK)'
356	} elsif($! == ENOENT || $! == ESTALE) {
357	  # NOTE: This might instead mean that the parent directory of
358	  # $build_info_fname was aged, so the message is a bit misleading.
359	  $$reason = $build_info_aged;
360	} else {
361	  $$reason = "link $temp_build_info_fname to $build_info_fname: $!";
362	}
363	return undef;
364      };
365      push @files_to_unlink, $build_info_fname;
366
367      chmod $target_prot, $target_src or do {
368	$$reason = (!$linking && ($! == ENOENT || $! == ESTALE)) ? $target_aged : "chmod $target_src: $!";
369	return undef;
370      };
371      link_over_nfs($target_src, $cache_fname) or do {
372	if($! == EEXIST) {
373	  $$reason = "target file was already there, possibly created by another party after our build info was immediately aged (OK)"
374	} elsif($! == ENOENT || $! == ESTALE) {
375	  # NOTE: This might instead mean that the parent directory of
376	  # $cache_fname was aged, so the message is a bit misleading.
377	  $$reason = $target_aged;
378	} else {
379	  $$reason = "link $target_src to $cache_fname: $!";
380	}
381	return undef;
382      };
383      #push @files_to_unlink, $cache_fname; # Currently redundant
384
385      @files_to_unlink = ();	# Commit to leave the entry in the cache
386      Mpp::log $linking ? 'BC_LINK' : 'BC_EXPORT' => $input_finfo, $cache_fname
387	if $Mpp::log_level;
388      1
389    };
390    my $error = $@;
391    eval { unlink @files_to_unlink }; # Ignore failure here
392    $Mpp::critical_sections--;
393    Mpp::propagate_pending_signals();
394    die $error if $error;
395    $result
396  };
397  my $error = $@;
398  eval { unlink @files_to_unlink }; # Ignore failure here
399  die $error if $error;
400  $result
401}
402
403=head2 lookup_file
404
405  $bc_entry = $bc->lookup_file($file_key);
406
407Lookup a file by its cache key.	 Returns undef if the file does not exist in
408the cache.  Returns a Mpp::BuildCache::Entry structure if it does exist.  You can
409query the Mpp::BuildCache::Entry structure to see what the build info is, or to
410copy the file into the current directory.
411
412=cut
413
414sub lookup_file {
415  my( $self, $cache_fname ) = @_;
416
417  substr $cache_fname, $_, 0, '/' for reverse @{$self->{SUBDIR_CHARS}};
418  $cache_fname = $self->{DIRNAME} . '/' . $cache_fname;
419				# Get the file name we're looking for.
420
421  return if exists $self->{SYMLINK} && !-e $cache_fname; # Stale link?
422
423  my $dev = (lstat $cache_fname)[0]; # 0 == real STAT_DEV.  Does the file exist?
424
425  defined $dev and		# Quit if file does not exist.
426    bless { FILENAME => $cache_fname, DEV => $dev }, 'Mpp::BuildCache::Entry';
427}
428
429=head2 copy_check_md5
430
431    my $md5;
432    my $result = copy_check_md5("in", "out", \$md5, $setmode);
433
434Assuming that the input file is atomically generated and removed,
435copy_check_md5 will either copy the file as-is or return undef with $! set,
436even if the input file is unlinked and/or re-created concurrently,
437even over NFS.
438Mode bits are copied as well if $mode is true.
439Copy_check_md5 will instead die if it detects that the input file is not
440being written atomically, or if it detects something that it can't explain.
441
442If a Digest object is provided as a third argument, then the file's content
443is added to it.  It may be modified even if the copy fails.
444See L<Digest(3pm)>.
445
446A successful copy will return a 2-element array consisting of the size and
447modification time of the input file.
448
449If the return value is an empty array, then $! is set as follows:
450
451=over 2
452
453=item ENOENT
454
455The input file was removed while it was being read.
456
457=item ESTALE
458
459The output file was removed while it was being written,
460or the directory containing the input file was removed.
461
462=item Others
463
464Many other errors are possible, such as EACCES, EINTR, EIO, EISDIR, ENFILE
465EMFILE, EFBIG, ENOSPC, EROFS, EPIPE, ENAMETOOLONG, ENOSTR.
466In most cases, these are non-transient conditions that require manual
467intervention, and should therefore cause the program to terminate.
468
469=back
470
471=cut
472
473our $Too_Big = 1024 * 1024 * 2;
474
475sub copy_check_md5 {
476  my ($in, $out, $md5, $setmode) = @_;
477
478  open(my $fin, '<', $in) or return;
479
480  # NOTE: This works only because we stat the filehandle instead of the
481  # file.  The file could have been unlinked and re-created since we opened
482  # it for read.
483  my ($ino, $mode, $size, $mtime) = do { no warnings; (stat $fin)[1,2,7,9] };
484  defined($size) or return;
485
486  open(my $fout, '>', $out) or return;
487
488  # Stolen from File::Copy:
489  my $bufsize = $size;
490  $bufsize = 1024 if ($bufsize < 512);
491  $bufsize = $Too_Big if ($bufsize > $Too_Big);
492  my $buf;
493  for (;;) {
494    my ($r, $w, $t);
495    defined($r = sysread($fin, $buf, $bufsize)) or return;
496    last unless $r;
497    $md5->add($buf) if $md5;
498    for ($w = 0; $w < $r; $w += $t) {
499      $t = syswrite($fout, $buf, $r - $w, $w) or return;
500    }
501  }
502
503
504  my $size3;
505  {
506    local $SIG{__WARN__} = sub {
507      local $_ = $_[0];
508      warn $_ unless /unopened/;	# Ignore "stat() on unopened filehandle"
509    };
510    $size3 = (stat $fout)[7];
511  }
512  close($fout) or return;
513
514  # Now, if the file is still there, report if it changed.  This is how
515  # we'll know if somebody isn't following the rules.
516  my ($ino2, $size2, $mtime2) = do { no warnings; (stat $fin)[1,7,9] };
517  die "$in changed during copying (created non-atomically)"
518    if $ino2 && ($ino2 != $ino || $size2 != $size || $mtime2 != $mtime);
519
520  close($fin);
521
522  # I don't know of any way that this could happen, but we'll check here
523  # just so we know for sure that it didn't happen.
524  die "Copying to $out: size $size3 doesn't match source size $size"
525    unless defined($size3) && $size3 == $size;
526
527  chmod($mode & 0777, $out) or die "chmod $out: $!" if $setmode;
528
529  ($size, $mtime)
530}
531
532###############################################################################
533#
534# Subroutines in the Mpp::BuildCache::Entry package:
535#
536package Mpp::BuildCache::Entry;
537
538=head1 The Mpp::BuildCache::Entry package
539
540A Mpp::BuildCache::Entry is an object returned by BuildCache::lookup_file.  You can
541do the following with the object:
542
543=head2 absolute_filename
544
545   $bc_entry->absolute_filename
546
547Returns the name of the file in the build cache.
548
549=cut
550
551sub absolute_filename { $_[0]->{FILENAME} }
552*name = \&absolute_filename;
553
554=head2 copy_from_cache
555
556  $bc_entry->copy_from_cache($output_finfo, $rule, \$reason);
557
558Replaces the file in $output_finfo with the file from the cache, and updates
559all the Mpp::File data structures to reflect this change.
560The build info signature is checked against the target file in the cache,
561and if $Mpp::md5check_bc is set, then the MD5 checksum is also verified.
562
563Returns true if the file was successfully restored from the cache, false if
564not.  (I B<think> the only reason it wouldn't be successfully restored is that
565someone deleted the file from cache between the time it was returned from
566lookup_file and the time copy_from_cache is invoked.)
567If it returns false, then $reason is set to a string that explains why.
568If $reason ends with '(OK)', then the failure could have been due to legitimate
569concurrent access of the build cache.
570If it fails, then the output target is unlinked.
571
572=cut
573
574sub fix_ok {
575# If we detect that a target and its build info don't go together,
576# then we are empowered to nuke them even in --nopopulate_bc mode. We do this
577# only if the target is at least 10 minutes old, because otherwise someone
578# might always nuke files just as they get created.  It's still possible
579# (although unlikely) for a file to be removed immediately after it replaces
580# a file that had been in the cache for a long time, but that's OK.
581  my ($self) = @_;
582  # Re-stat, because this is the last chance we have to notice an update.
583  my $mtime = (stat $self->{FILENAME})[9]; # 9 == real STAT_MTIME
584  $mtime && time - $mtime > 600
585}
586
587sub copy_from_cache {
588  my ($self, $output_finfo, $rule, $reason) = @_;
589  $reason || die;
590
591  Mpp::File::unlink( $output_finfo );	    # Get rid of anything that's there currently.
592  my $output_fname = Mpp::File::absolute_filename_nolink $output_finfo;
593  my $link_to_build_cache = 0;
594
595#
596# Read in the build info:
597#
598  my $cache_fname = $self->{FILENAME};
599  my $build_info_fname = $cache_fname;
600  $build_info_fname =~ s@/([^/]+)$@/$Mpp::File::build_info_subdir/$1.mk@;
601  open my( $fh ), $build_info_fname or do {
602    if($! == POSIX::ENOENT || $! == Mpp::BuildCache::ESTALE) {
603      $$reason = 'the build info file is missing (OK)';
604      unlink $cache_fname if fix_ok($self);
605    } else {
606      $$reason = "read $build_info_fname: $!";
607    }
608    return undef;
609  };
610  my $line;
611  my $build_info=Mpp::File::grok_build_info_file($fh);
612  close $fh;
613
614  $build_info or do {
615    $$reason='currupt build info file, possibly deleted while reading (OK)';
616    unlink $cache_fname, $build_info_fname if fix_ok($self);
617    return undef;
618  }; # Something's wrong with this file.
619
620  # If the target directory doesn't already exist, then we assume that the
621  # rule would have created it.
622  Mpp::Cmds::c_mkdir '-p', Mpp::File::absolute_filename_nolink $output_finfo->{'..'};
623
624# It's a real file.  If it's on the same file system, make it an extra hard
625# link since that's faster and takes up almost no disk space.  Otherwise, copy the
626# file.
627# We have to be very careful not to import a target without its build info
628# file, even if an interrupt arrives, because then it will look like a source
629# file, and then --rm-stale might not work.
630#
631  $Mpp::critical_sections++;
632  my $result = eval {
633    my $md5;
634    require Digest::MD5;
635    $md5 = Digest::MD5->new if $Mpp::md5check_bc;
636    my ($size, $mtime);
637    # TBD: Maybe we shouldn't fall back to copying if link fails.  There
638    # should be a warning at least.
639    if( $self->{DEV} == ((Mpp::File::stat_array $output_finfo->{'..'})->[Mpp::File::STAT_DEV] || 0)
640	&& !$Mpp::force_bc_copy &&
641				  # Same file system?
642	link($self->{FILENAME}, $output_fname)) {
643      # Re-stat in case it changed since we looked it up.
644      ($size, $mtime) = (stat $output_fname)[7, 9];
645      unless( defined $size ) {
646	$$reason = "cached file $self->{FILENAME} became a stale link after we looked it up (OK)";
647	unlink $output_fname;
648	unlink $cache_fname, $build_info_fname if fix_ok($self);
649	return undef;
650      }
651      if($md5 && open(my $fh, '<', $self->{FILENAME})) {
652	$md5->addfile($fh);
653      }
654      $link_to_build_cache = 1;	# Remember that we did the link.
655    } elsif( !( ($size, $mtime) = copy_check_md5( $self->{FILENAME}, $output_fname, $md5, 1) )) {
656				  # Link failed for some reason:
657      # NOTE: Several versions of the Linux NFS client can return EIO instead
658      # of ESTALE or ENOENT on a read after the file has been unlinked.  If
659      # this is a real hardware error, then we hope that it also shows up on
660      # some other operation where it can't happen legitimately.
661      $$reason = ($!==POSIX::ENOENT || $!==Mpp::BuildCache::ESTALE || $!==POSIX::EIO) ? 'file was just deleted (OK)' : "copy $self->{FILENAME} to $output_fname: $!";
662      return undef;
663    }
664    my $signature = $mtime . ',' . $size;
665                                    # Form the expected signature.
666    my $build_info_sig = $build_info->{SIGNATURE} || '';
667    if ($signature ne $build_info_sig) {
668                                    # File was corrupted in the build cache.
669                                    # Get rid of it, and don't import it.
670      $$reason = "cached build info file $build_info_sig mismatches cached target file $signature (OK)";
671      unlink $cache_fname, $build_info_fname if fix_ok($self);
672      return undef;
673    }
674    if($md5) {
675      # Digest key and format needs to match Mpp::Signature::md5
676      my $md5sum = $build_info->{MD5_SUM} or do {
677        $$reason = 'no stored MD5 in cached build info file (OK)';
678        return undef;
679      };
680      my $target_md5 = $md5->b64digest;
681      if($target_md5 ne $md5sum) {
682	$$reason = "cached target file $target_md5 mismatches build info MD5_SUM $md5sum (OK)";
683        unlink $cache_fname, $build_info_fname if fix_ok($self);
684	return undef;
685      }
686    }
687
688#
689# Now restore the build info:
690#
691    Mpp::File::may_have_changed( $output_finfo );
692    $output_finfo->{BUILD_INFO} = $build_info;
693    Mpp::File::set_build_info_string( $output_finfo, 'LINKED_TO_CACHE', $link_to_build_cache);
694				  # Remember if it's a link to something in
695				  # the build cache.
696
697    # Need to match build info signature to file signature, or else build info
698    # will be ignored.  This has the drawback that targets that don't use an MD5
699    # signature for this file as a dependency will think it has changed.
700    Mpp::File::set_build_info_string( $output_finfo, 'SIGNATURE', Mpp::File::signature( $output_finfo ));
701
702    # Update the DEP_SIGS that aren't MD5-based, so that the target will still
703    # look up-to-date the next time we run makepp.
704    $rule->build_check_method->update_dep_sigs($output_finfo, $rule);
705
706    Mpp::File::mark_build_info_for_update( $output_finfo );
707    &Mpp::File::update_build_infos; # Write out the build cache right now.
708    1				# No error.
709  };
710  my $error = $@;
711  $result or eval { Mpp::File::unlink( $output_finfo ) }; # Clean up on error
712  $Mpp::critical_sections--;
713  Mpp::propagate_pending_signals();
714  die $error if $error;
715
716  # TBD: Some filesystems don't update atime on file access, for performance
717  # and/or power reasons.  If the build cache is on such a filesystem, then
718  # files will get aged based on their creation time, which is bad because
719  # frequently used files will be aged just as quickly as files that are never
720  # used.  To fix that, I propose that the build_cache_options.pl file define
721  # a new 'UTIME_ON_IMPORT' parameter, and if that is set, then we should
722  # use utime(2) here to update the atime and set mtime to the same value
723  # that we previously sampled.  (Gary Holt has tried this on such a
724  # filesystem, and he reports that it works.)  This is likely to fail because
725  # of permissions, in which case we can copy the target to a unique
726  # filename that we own, then rename that file back to the target, and
727  # finally call utime to update atime and reset the old mtime.  This isn't
728  # implemented because nobody needs it yet (and therefore nobody would be
729  # testing it).  When this is implemented, the check in copy_check_md5 for
730  # constant mtime needs to be downgraded from a die to a failure, because
731  # the utime operation introduces legitimate races.
732
733  $result
734}
735
736*copy_check_md5 = \&Mpp::BuildCache::copy_check_md5;
737
7381;
739