1# Copyright (c) 2008-2013 Zmanda, Inc. All Rights Reserved. 2# 3# This program is free software; you can redistribute it and/or 4# modify it under the terms of the GNU General Public License 5# as published by the Free Software Foundation; either version 2 6# of the License, or (at your option) any later version. 7# 8# This program is distributed in the hope that it will be useful, but 9# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 10# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 11# for more details. 12# 13# You should have received a copy of the GNU General Public License along 14# with this program; if not, write to the Free Software Foundation, Inc., 15# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 16# 17# Contact information: Zmanda Inc, 505 N Mathlida Ave, Suite 120 18# Sunnyvale, CA 94085, USA, or: http://www.zmanda.com 19 20package Amanda::DB::Catalog; 21 22=head1 NAME 23 24Amanda::DB::Catalog - access to the Amanda catalog: where is that dump? 25 26=head1 SYNOPSIS 27 28 use Amanda::DB::Catalog; 29 30 # get all dump timestamps on record 31 my @timestamps = Amanda::DB::Catalog::get_timestamps(); 32 33 # loop over those timestamps, printing dump info for each one 34 for my $timestamp (@timestamps) { 35 my @dumpfiles = Amanda::DB::Catalog::get_parts( 36 timestamp => $timestamp, 37 ok => 1 38 ); 39 print "$timstamp:\n"; 40 for my $dumpfile (@dumpfiles) { 41 print " ", $dumpfile->{hostname}, ":", $dumpfile->{diskname}, 42 " level ", $dumpfile->{level}, "\n"; 43 } 44 } 45 46=head1 MODEL 47 48The Amanda catalog is modeled as a set of dumps comprised of parts. A dump is 49a complete bytestream received from an application, and is uniquely identified 50by the combination of C<hostname>, C<diskname>, C<dump_timestamp>, C<level>, 51and C<write_timestamp>. A dump may be partial, or even a complete failure. 52 53A part corresponds to a single file on a volume, containing a portion of the 54data for a dump. A part, then, is completely specified by a volume label and a 55file number (C<filenum>). Each part has, among other things, a part number 56(C<partnum>) which gives its relative position within the dump. The bytestream 57for a dump is recovered by concatenating all of the successful (C<status> = OK) 58parts matching the dump. 59 60Files in the holding disk are considered part of the catalog, and are 61represented as single-part dumps (holding-disk chunking is ignored, as it is 62distinct from split parts). 63 64=head2 DUMPS 65 66The dump table contains one row per dump. It has the following columns: 67 68=over 69 70=item dump_timestamp 71 72(string) -- timestamp of the run in which the dump was created 73 74=item write_timestamp 75 76(string) -- timestamp of the run in which the part was written to this volume, 77or C<"00000000000000"> for dumps in the holding disk. 78 79=item hostname 80 81(string) -- dump hostname 82 83=item diskname 84 85(string) -- dump diskname 86 87=item level 88 89(integer) -- dump level 90 91=item status 92 93(string) -- The status of the dump - "OK", "PARTIAL", or "FAIL". If a disk 94failed to dump at all, then it is not part of the catalog and thus will not 95have an associated dump row. 96 97=item message 98 99(string) -- reason for PARTIAL or FAIL status 100 101=item nparts 102 103(integer) -- number of successful parts in this dump 104 105=item bytes 106 107(integer) -- size (in bytes) of the dump on disk, 0 if the size is not known. 108 109=item kb 110 111(integer) -- size (in kb) of the dump on disk 112 113=item orig_kb 114 115(integer) -- size (in kb) of the complete dump (before compression or encryption); undef 116if not available 117 118=item sec 119 120(integer) -- time (in seconds) spent writing this part 121 122=item parts 123 124(arrayref) -- array of parts, indexed by partnum (so C<< $parts->[0] >> is 125always C<undef>). When multiple partial parts are available, the choice of the 126partial that is included in this array is undefined. 127 128=back 129 130A dump is represented as a hashref with these keys. 131 132The C<write_timestamp> gives the time of the amanda run in which the part was 133written to this volume. The C<write_timestamp> may differ from the 134C<dump_timestamp> if, for example, I<amflush> wrote the part to tape after the 135initial dump. 136 137=head2 PARTS 138 139The parts table contains one row per part, and has the following columns: 140 141=over 142 143=item label 144 145(string) -- volume label (not present for holding files) 146 147=item filenum 148 149(integer) -- file on that volume (not present for holding files) 150 151=item holding_file 152 153(string) -- fully-qualified pathname of the holding file (not present for 154on-media dumps) 155 156=item dump 157 158(object ref) -- a reference to the dump containing this part 159 160=item status 161 162(string) -- The status of the part - "OK", "PARTIAL", or "FAILED". 163 164=item partnum 165 166(integer) -- part number of a split part (1-based) 167 168=item kb 169 170(integer) -- size (in kb) of this part 171 172=item sec 173 174(integer) -- time (in seconds) spent writing this part 175 176=back 177 178A part is represented as a hashref with these keys. The C<label> and 179C<filenum> serve as a primary key. 180 181Note that parts' C<dump> and dumps' C<parts> create a reference loop. This is 182broken by making the C<parts> array's contents weak references in C<get_dumps>, 183and the C<dump> reference weak in C<get_parts>. 184 185=head2 NOTES 186 187All timestamps used in this module are full-length, in the format 188C<YYYYMMDDHHMMSS>. If the underlying data contains only datestamps, they are 189zero-extended into timestamps: C<YYYYMMDD000000>. A C<dump_timestamp> always 190corresponds to the initiation of the I<original> dump run, while 191C<write_timestamp> gives the time the file was written to the volume. When 192parts are migrated from volume to volume (e.g., by I<amvault>), the 193C<dump_timestamp> does not change. 194 195In Amanda, the tuple (C<hostname>, C<diskname>, C<level>, C<dump_timestamp>) 196serves as a unique identifier for a dump bytestream, but because the bytestream 197may appear several times in the catalog (due to vaulting) the additional 198C<write_timestamp> is required to identify a particular on-storage instance of 199a dump. Note that the part sizes may differ between instances, so it is not 200valid to concatenate parts from different dump instances. 201 202=head1 INTERFACES 203 204=head2 SUMMARY DATA 205 206The following functions provide summary data based on the contents of the 207catalog. 208 209=over 210 211=item get_write_timestamps() 212 213Get a list of all write timestamps, sorted in chronological order. 214 215=item get_latest_write_timestamp() 216 217Return the most recent write timestamp. 218 219=item get_latest_write_timestamp(type => 'amvault') 220=item get_latest_write_timestamp(types => [ 'amvault', .. ]) 221 222Return the timestamp of the most recent dump of the given type or types. The 223available types are given below for C<get_run_type>. 224 225=item get_labels_written_at_timestamp($ts) 226 227Return a list of labels for volumes written at the given timestamp. 228 229=item get_run_type($ts) 230 231Return the type of run made at the given timestamp. The result is one of 232C<amvault>, C<amdump>, C<amflush>, or the default, C<unknown>. 233 234=back 235 236=head2 PARTS 237 238=over 239 240=item get_parts(%parameters) 241 242This function returns a sequence of parts. Values in C<%parameters> restrict 243the set of parts that are returned. The hash can have any of the following 244keys: 245 246=over 247 248=item write_timestamp 249 250restrict to parts written at this timestamp 251 252=item write_timestamps 253 254(arrayref) restrict to parts written at any of these timestamps (note that 255holding-disk files have no C<write_timestamp>, so this option and the previous 256will omit them) 257 258=item dump_timestamp 259 260restrict to parts with exactly this timestamp 261 262=item dump_timestamps 263 264(arrayref) restrict to parts with any of these timestamps 265 266=item dump_timestamp_match 267 268restrict to parts with timestamps matching this expression 269 270=item holding 271 272if true, only return dumps on holding disk. If false, omit dumps on holding 273disk. 274 275=item hostname 276 277restrict to parts with exactly this hostname 278 279=item hostnames 280 281(arrayref) restrict to parts with any of these hostnames 282 283=item hostname_match 284 285restrict to parts with hostnames matching this expression 286 287=item diskname 288 289restrict to parts with exactly this diskname 290 291=item disknames 292 293(arrayref) restrict to parts with any of these disknames 294 295=item diskname_match 296 297restrict to parts with disknames matching this expression 298 299=item label 300 301restrict to parts with exactly this label 302 303=item labels 304 305(arrayref) restrict to parts with any of these labels 306 307=item level 308 309restrict to parts with exactly this level 310 311=item levels 312 313(arrayref) restrict to parts with any of these levels 314 315=item status 316 317restrict to parts with this status 318 319=item labelstr 320 321restrict to parts on volume matching the labelstr. 322 323=item dumpspecs 324 325(arrayref of dumpspecs) restruct to parts matching one or more of these dumpspecs 326 327=back 328 329Match expressions are described in the amanda(8) manual page. 330 331=item sort_parts([ $key1, $key2, .. ], @parts) 332 333Given a list of parts, this function sorts that list by the requested keys. 334The following keys are available: 335 336=over 337 338=item hostname 339 340=item diskname 341 342=item write_timestamp 343 344=item dump_timestamp 345 346=item level 347 348=item filenum 349 350=item label 351 352Note that this sorts labels I<lexically>, not necessarily in the order they were used! 353 354=item partnum 355 356=item nparts 357 358=back 359 360Keys are processed from left to right: if two dumps have the same value for 361C<$key1>, then C<$key2> is examined, and so on. Key names may be prefixed by a 362dash (C<->) to reverse the order. 363 364Note that some of these keys are dump keys; the function will automatically 365access those values via the C<dump> attribute. 366 367=back 368 369=head2 DUMPS 370 371=over 372 373=item get_dumps(%parameters) 374 375This function returns a sequence of dumps. Values in C<%parameters> restrict 376the set of dumps that are returned. The same keys as are used for C<get_parts> 377are available here, with the exception of C<label> and C<labels>. In this 378case, the C<status> parameter applies to the dump status, not the status of its 379constituent parts. 380 381=item sort_dumps([ $key1, $key2 ], @dumps) 382 383Like C<sort_parts>, this sorts a sequence of dumps generated by C<get_dumps>. 384The same keys are available, with the exception of C<label>, C<filenum>, and 385C<partnum>. 386 387=back 388 389=head2 ADDING DATA 390 391=over 392 393=item add_part($part) 394 395Add the given part to the database. In terms of logfiles, this will either 396create a new logfile (if the part's C<write_timestamp> has not been seen 397before) or append to an existing logfile. Note that a new logfile will require 398a corresponding new entry in the tapelist. 399 400Note that no locking is performed: multiple simultaneous calls to this function 401can result in a corrupted or incorrect logfile. 402 403TODO: add_dump 404 405=back 406 407=cut 408 409use Amanda::Logfile qw( :constants ); 410use Amanda::Tapelist; 411use Amanda::Config qw( :init :getconf config_dir_relative ); 412use Amanda::Util qw( quote_string weaken_ref match_disk match_host match_datestamp match_level match_labelstr_expr); 413use File::Glob qw( :glob ); 414use warnings; 415use strict; 416 417# tapelist cache 418my $tapelist = undef; 419 420# utility function 421sub zeropad { 422 my ($timestamp) = @_; 423 if (length($timestamp) == 8) { 424 return $timestamp."000000"; 425 } 426 return $timestamp; 427} 428 429sub get_write_timestamps { 430 my @rv; 431 432 # find_log assumes that the tapelist has been loaded, so load it now 433 _load_tapelist(); 434 435 for (Amanda::Logfile::find_log()) { 436 next unless (my ($timestamp) = /^log\.([0-9]+)(?:\.[0-9]+|\.amflush)?$/); 437 push @rv, zeropad($timestamp); 438 } 439 440 return sort @rv; 441} 442 443sub get_latest_write_timestamp { 444 my %params = @_; 445 446 if ($params{'type'}) { 447 push @{$params{'types'}}, $params{'type'}; 448 } 449 450 # get all of the timestamps and select the last one 451 my @timestamps = get_write_timestamps(); 452 453 if (@timestamps) { 454 # if we're not looking for a particular type, then this is easy 455 if (!exists $params{'types'}) { 456 return $timestamps[-1]; 457 } 458 459 # otherwise we need to search backward until we find a logfile of 460 # the right type 461 while (@timestamps) { 462 my $ts = pop @timestamps; 463 my $typ = get_run_type($ts); 464 if (grep { $_ eq $typ } @{$params{'types'}}) { 465 return $ts; 466 } 467 } 468 } 469 470 return undef; 471} 472 473sub get_run_type { 474 my ($write_timestamp) = @_; 475 476 # find all of the logfiles with that name 477 my $logdir = config_dir_relative(getconf($CNF_LOGDIR)); 478 my @matches = File::Glob::bsd_glob("$logdir/log.$write_timestamp.*", GLOB_NOSORT); 479 if ($write_timestamp =~ /000000$/) { 480 my $write_datestamp = substr($write_timestamp, 0, 8); 481 push @matches, File::Glob::bsd_glob("$logdir/log.$write_datestamp.*", GLOB_NOSORT); 482 } 483 484 for my $lf (@matches) { 485 open(my $fh, "<", $lf) or next; 486 while (<$fh>) { 487 # amflush and amvault put their own names in 488 return $1 if (/^START (amflush|amvault)/); 489 # but for amdump we see planner 490 return 'amdump' if (/^START planner/); 491 } 492 } 493 494 return "unknown"; 495} 496 497 498# this generic function implements the loop of scanning logfiles to find 499# the requested data; get_parts and get_dumps then adjust the results to 500# match what the user expects. 501sub get_parts_and_dumps { 502 my $get_what = shift; # "parts" or "dumps" 503 my %params = @_; 504 my $logfile_dir = config_dir_relative(getconf($CNF_LOGDIR)); 505 506 # find_log assumes that the tapelist has been loaded, so load it now 507 _load_tapelist(); 508 509 # pre-process params by appending all of the "singular" parameters to the "plurals" 510 push @{$params{'write_timestamps'}}, map { zeropad($_) } $params{'write_timestamp'} 511 if exists($params{'write_timestamp'}); 512 push @{$params{'dump_timestamps'}}, map { zeropad($_) } $params{'dump_timestamp'} 513 if exists($params{'dump_timestamp'}); 514 push @{$params{'hostnames'}}, $params{'hostname'} 515 if exists($params{'hostname'}); 516 push @{$params{'disknames'}}, $params{'diskname'} 517 if exists($params{'diskname'}); 518 push @{$params{'levels'}}, $params{'level'} 519 if exists($params{'level'}); 520 if ($get_what eq 'parts') { 521 push @{$params{'labels'}}, $params{'label'} 522 if exists($params{'label'}); 523 } else { 524 delete $params{'labels'}; 525 } 526 527 # specifying write_timestamps implies we won't check holding files 528 if ($params{'write_timestamps'}) { 529 if (defined $params{'holding'} and $params{'holding'}) { 530 return [], []; # well, that's easy.. 531 } 532 $params{'holding'} = 0; 533 } 534 # specifying labelstr implies we won't check holding files 535 if ($params{'labelstr'}) { 536 if (defined $params{'holding'} and $params{'holding'}) { 537 return [], []; # well, that's easy.. 538 } 539 $params{'holding'} = 0; 540 } 541 542 # Since we're working from logfiles, we have to pick the logfiles we'll use first. 543 # Then we can use search_logfile. 544 my @logfiles; 545 if ($params{'holding'}) { 546 @logfiles = ( 'holding', ); 547 } elsif (exists($params{'write_timestamps'})) { 548 # if we have specific write_timestamps, the job is pretty easy. 549 my %timestamps_hash = map { ($_, undef) } @{$params{'write_timestamps'}}; 550 for my $logfile (Amanda::Logfile::find_log()) { 551 next unless (my ($timestamp) = $logfile =~ /^log\.([0-9]+)(?:\.[0-9]+|\.amflush)?$/); 552 next unless (exists($timestamps_hash{zeropad($timestamp)})); 553 push @logfiles, $logfile; 554 } 555 } elsif (exists($params{'dump_timestamps'})) { 556 # otherwise, we need only look in logfiles at or after the earliest dump timestamp 557 my @sorted_timestamps = sort @{$params{'dump_timestamps'}}; 558 my $earliest_timestamp = $sorted_timestamps[0]; 559 for my $logfile (Amanda::Logfile::find_log()) { 560 next unless (my ($timestamp) = $logfile =~ /^log\.([0-9]+)(?:\.[0-9]+|\.amflush)?$/); 561 next unless (zeropad($timestamp) ge $earliest_timestamp); 562 push @logfiles, $logfile; 563 } 564 } else { 565 # oh well -- it looks like we'll have to read all existing logfiles. 566 @logfiles = Amanda::Logfile::find_log(); 567 } 568 569 # Set up some hash tables for speedy lookups of various attributes 570 my (%dump_timestamps_hash, %hostnames_hash, %disknames_hash, %levels_hash, %labels_hash); 571 %dump_timestamps_hash = map { ($_, undef) } @{$params{'dump_timestamps'}} 572 if (exists($params{'dump_timestamps'})); 573 %hostnames_hash = map { ($_, undef) } @{$params{'hostnames'}} 574 if (exists($params{'hostnames'})); 575 %disknames_hash = map { ($_, undef) } @{$params{'disknames'}} 576 if (exists($params{'disknames'})); 577 %levels_hash = map { ($_, undef) } @{$params{'levels'}} 578 if (exists($params{'levels'})); 579 %labels_hash = map { ($_, undef) } @{$params{'labels'}} 580 if (exists($params{'labels'})); 581 582 my %dumps; 583 my @parts; 584 585 # *also* scan holding if the holding param wasn't specified 586 if (!exists $params{'holding'}) { 587 push @logfiles, 'holding'; 588 } 589 590 # now loop over those logfiles and use search_logfile to load the dumpfiles 591 # from them, then process each entry from the logfile 592 for my $logfile (@logfiles) { 593 my (@find_results, $write_timestamp); 594 595 # get the raw contents from search_logfile, or use holding if 596 # $logfile is undef 597 if ($logfile ne 'holding') { 598 @find_results = Amanda::Logfile::search_logfile(undef, undef, 599 "$logfile_dir/$logfile", 1); 600 # convert to dumpfile hashes, including the write_timestamp from the logfile name 601 my ($timestamp) = $logfile =~ /^log\.([0-9]+)(?:\.[0-9]+|\.amflush)?$/; 602 $write_timestamp = zeropad($timestamp); 603 604 } else { 605 @find_results = Amanda::Logfile::search_holding_disk(); 606 $write_timestamp = '00000000000000'; 607 } 608 609 # filter against *_match with dumps_match 610 @find_results = Amanda::Logfile::dumps_match([@find_results], 611 exists($params{'hostname_match'})? $params{'hostname_match'} : undef, 612 exists($params{'diskname_match'})? $params{'diskname_match'} : undef, 613 exists($params{'dump_timestamp_match'})? $params{'dump_timestamp_match'} : undef, 614 undef, 615 0); 616 617 # loop over each entry in the logfile. 618 for my $find_result (@find_results) { 619 620 # filter out the non-dump error messages that find.c produces 621 next unless (defined $find_result->{'label'}); 622 623 # bail out on this result early, if possible 624 next if (%dump_timestamps_hash 625 and !exists($dump_timestamps_hash{zeropad($find_result->{'timestamp'})})); 626 next if (%hostnames_hash 627 and !exists($hostnames_hash{$find_result->{'hostname'}})); 628 next if (%disknames_hash 629 and !exists($disknames_hash{$find_result->{'diskname'}})); 630 next if (%levels_hash 631 and !exists($levels_hash{$find_result->{'level'}})); 632 next if (%labels_hash 633 and !exists($labels_hash{$find_result->{'label'}})); 634 next if (defined $params{'labelstr'} 635 and !match_labelstr_expr($params{'labelstr'},$find_result->{'label'})); 636 if ($get_what eq 'parts') { 637 next if (exists($params{'status'}) 638 and defined $find_result->{'status'} 639 and $find_result->{'status'} ne $params{'status'}); 640 } 641 642 # filter each result against dumpspecs, to avoid dumps_match_dumpspecs' 643 # tendency to produce duplicate results 644 next if ($params{'dumpspecs'} 645 and !Amanda::Logfile::dumps_match_dumpspecs([$find_result], 646 $params{'dumpspecs'}, 0)); 647 648 my $dump_timestamp = zeropad($find_result->{'timestamp'}); 649 650 my $dumpkey = join("\0", $find_result->{'hostname'}, $find_result->{'diskname'}, 651 $write_timestamp, $find_result->{'level'}, $dump_timestamp); 652 my $dump = $dumps{$dumpkey}; 653 if (!defined $dump) { 654 $dump = $dumps{$dumpkey} = { 655 dump_timestamp => $dump_timestamp, 656 write_timestamp => $write_timestamp, 657 hostname => $find_result->{'hostname'}, 658 diskname => $find_result->{'diskname'}, 659 level => $find_result->{'level'}+0, 660 orig_kb => $find_result->{'orig_kb'}, 661 status => $find_result->{'dump_status'}, 662 message => $find_result->{'message'}, 663 # the rest of these params are unknown until we see a taper 664 # DONE, PARTIAL, or FAIL line, although we count nparts 665 # manually instead of relying on the logfile 666 nparts => 0, # $find_result->{'totalparts'} 667 bytes => -1, # $find_result->{'bytes'} 668 kb => -1, # $find_result->{'kb'} 669 sec => -1, # $find_result->{'sec'} 670 }; 671 } 672 673 # start setting up a part hash for this result 674 my %part; 675 if ($logfile ne 'holding') { 676 # on-media dump 677 %part = ( 678 label => $find_result->{'label'}, 679 filenum => $find_result->{'filenum'}, 680 dump => $dump, 681 status => $find_result->{'status'} || 'FAILED', 682 sec => $find_result->{'sec'}, 683 kb => $find_result->{'kb'}, 684 orig_kb => $find_result->{'orig_kb'}, 685 partnum => $find_result->{'partnum'}, 686 ); 687 } else { 688 # holding disk 689 %part = ( 690 holding_file => $find_result->{'label'}, 691 dump => $dump, 692 status => $find_result->{'status'} || 'FAILED', 693 sec => 0.0, 694 kb => $find_result->{'kb'}, 695 orig_kb => $find_result->{'orig_kb'}, 696 partnum => 1, 697 ); 698 # and fix up the dump, too 699 $dump->{'status'} = $find_result->{'status'} || 'FAILED'; 700 $dump->{'bytes'} = $find_result->{'bytes'}; 701 $dump->{'kb'} = $find_result->{'kb'}; 702 $dump->{'sec'} = $find_result->{'sec'}; 703 } 704 705 # weaken the dump ref if we're returning dumps 706 weaken_ref($part{'dump'}) 707 if ($get_what eq 'dumps'); 708 709 # count the number of successful parts in the dump 710 $dump->{'nparts'}++ if $part{'status'} eq 'OK'; 711 712 # and add a ref to the array of parts; if we're getting 713 # parts, then this is a weak ref 714 $dump->{'parts'}[$part{'partnum'}] = \%part; 715 weaken_ref($dump->{'parts'}[$part{'partnum'}]) 716 if ($get_what eq 'parts'); 717 718 push @parts, \%part; 719 } 720 721 # if these dumps were on the holding disk, then we're done 722 next if $logfile eq 'holding'; 723 724 # re-read the logfile to extract dump-level info that's not captured by 725 # search_logfile 726 my $logh = Amanda::Logfile::open_logfile("$logfile_dir/$logfile"); 727 die "logfile '$logfile' not found" unless $logh; 728 while (my ($type, $prog, $str) = Amanda::Logfile::get_logline($logh)) { 729 next unless $prog == $P_TAPER; 730 my $status; 731 if ($type == $L_DONE) { 732 $status = 'OK'; 733 } elsif ($type == $L_PARTIAL) { 734 $status = 'PARTIAL'; 735 } elsif ($type == $L_FAIL) { 736 $status = 'FAIL'; 737 } elsif ($type == $L_SUCCESS) { 738 $status = "OK"; 739 } else { 740 next; 741 } 742 743 # now extract the appropriate info; luckily these log lines have the same 744 # format, more or less 745 my ($hostname, $diskname, $dump_timestamp, $nparts, $level, $secs, $kb, $bytes, $message); 746 ($hostname, $str) = Amanda::Util::skip_quoted_string($str); 747 ($diskname, $str) = Amanda::Util::skip_quoted_string($str); 748 ($dump_timestamp, $str) = Amanda::Util::skip_quoted_string($str); 749 if ($status ne 'FAIL' and $type != $L_SUCCESS) { # nparts is not in SUCCESS lines 750 ($nparts, my $str1) = Amanda::Util::skip_quoted_string($str); 751 if (substr($str1, 0,1) ne '[') { 752 $str = $str1; 753 } else { # nparts is not in all PARTIAL lines 754 $nparts = 0; 755 } 756 757 } else { 758 $nparts = 0; 759 } 760 ($level, $str) = Amanda::Util::skip_quoted_string($str); 761 if ($status ne 'FAIL') { 762 my $s = $str; 763 my $b_unit; 764 ($secs, $b_unit, $kb, $str) = ($str =~ /^\[sec ([-0-9.]+) (kb|bytes) ([-0-9]+).*\] ?(.*)$/) 765 or die("'$s'"); 766 if ($b_unit eq 'bytes') { 767 $bytes = $kb; 768 $kb /= 1024; 769 } else { 770 $bytes = 0; 771 } 772 $secs = 0.1 if ($secs <= 0); 773 } 774 if ($status ne 'OK') { 775 $message = $str; 776 } else { 777 $message = ''; 778 } 779 780 $hostname = Amanda::Util::unquote_string($hostname); 781 $diskname = Amanda::Util::unquote_string($diskname); 782 $message = Amanda::Util::unquote_string($message) if $message; 783 784 # filter against dump criteria 785 next if ($params{'dump_timestamp_match'} 786 and !match_datestamp($params{'dump_timestamp_match'}, zeropad($dump_timestamp))); 787 next if (%dump_timestamps_hash 788 and !exists($dump_timestamps_hash{zeropad($dump_timestamp)})); 789 790 next if ($params{'hostname_match'} 791 and !match_host($params{'hostname_match'}, $hostname)); 792 next if (%hostnames_hash 793 and !exists($hostnames_hash{$hostname})); 794 795 next if ($params{'diskname_match'} 796 and !match_disk($params{'diskname_match'}, $diskname)); 797 next if (%disknames_hash 798 and !exists($disknames_hash{$diskname})); 799 800 next if (%levels_hash 801 and !exists($levels_hash{$level})); 802 # get_dumps filters on status 803 804 if ($params{'dumpspecs'}) { 805 my $ok = 0; 806 for my $ds (@{$params{'dumpspecs'}}) { 807 # (the "". are for SWIG's benefit - SWIGged functions don't like 808 # strings generated by SWIG. Long story.) 809 next if (defined $ds->{'host'} 810 and !match_host("".$ds->{'host'}, $hostname)); 811 next if (defined $ds->{'disk'} 812 and !match_disk("".$ds->{'disk'}, $diskname)); 813 next if (defined $ds->{'datestamp'} 814 and !match_datestamp("".$ds->{'datestamp'}, $dump_timestamp)); 815 next if (defined $ds->{'level'} 816 and !match_level("".$ds->{'level'}, $level)); 817 next if (defined $ds->{'write_timestamp'} 818 and !match_datestamp("".$ds->{'write_timestamp'}, $write_timestamp)); 819 $ok = 1; 820 last; 821 } 822 next unless $ok; 823 } 824 825 my $dumpkey = join("\0", $hostname, $diskname, $write_timestamp, 826 $level, zeropad($dump_timestamp)); 827 my $dump = $dumps{$dumpkey}; 828 if (!defined $dump) { 829 # this will happen when a dump has no parts - a FAILed dump. 830 $dump = $dumps{$dumpkey} = { 831 dump_timestamp => zeropad($dump_timestamp), 832 write_timestamp => $write_timestamp, 833 hostname => $hostname, 834 diskname => $diskname, 835 level => $level+0, 836 orig_kb => undef, 837 status => "FAILED", 838 # message set below 839 nparts => $nparts, # hopefully 0? 840 # kb set below 841 # sec set below 842 }; 843 } 844 845 $dump->{'message'} = $message; 846 if ($status eq 'FAIL') { 847 $dump->{'bytes'} = 0; 848 $dump->{'kb'} = 0; 849 $dump->{'sec'} = 0.0; 850 } else { 851 $dump->{'bytes'} = $bytes+0; 852 $dump->{'kb'} = $kb+0; 853 $dump->{'sec'} = $secs+0.0; 854 } 855 } 856 Amanda::Logfile::close_logfile($logh); 857 } 858 859 return [ values %dumps], \@parts; 860} 861 862sub get_parts { 863 my ($dumps, $parts) = get_parts_and_dumps("parts", @_); 864 return @$parts; 865} 866 867sub get_dumps { 868 my %params = @_; 869 my ($dumps, $parts) = get_parts_and_dumps("dumps", @_); 870 my @dumps = @$dumps; 871 872 if (exists $params{'status'}) { 873 @dumps = grep { $_->{'status'} eq $params{'status'} } @dumps; 874 } 875 876 return @dumps; 877} 878 879sub sort_parts { 880 my ($keys, @parts) = @_; 881 882 # TODO: make this more efficient by selecting the comparison 883 # functions once, in advance, and just applying them 884 return sort { 885 my $res; 886 for my $key (@$keys) { 887 my ($rev, $k) = ($key =~ /^(-?)(.*)$/); 888 889 if ($k =~ /^(partnum|filenum)$/) { 890 # compare part components numerically 891 $res = $a->{$k} <=> $b->{$k}; 892 } elsif ($k =~ /^(nparts|level)$/) { 893 # compare dump components numerically 894 $res = $a->{'dump'}->{$k} <=> $b->{'dump'}->{$k}; 895 } elsif ($k =~ /^(hostname|diskname|write_timestamp|dump_timestamp)$/) { 896 # compare dump components alphabetically 897 $res = $a->{'dump'}->{$k} cmp $b->{'dump'}->{$k}; 898 } else { # (label) 899 # compare part components alphabetically 900 $res = $a->{$k} cmp $b->{$k}; 901 } 902 $res = -$res if ($rev eq '-' and $res); 903 return $res if $res; 904 } 905 return 0; 906 } @parts; 907} 908 909sub sort_dumps { 910 my ($keys, @dumps) = @_; 911 912 # TODO: make this more efficient by selecting the comparison 913 # functions once, in advance, and just applying them 914 return sort { 915 my $res; 916 for my $key (@$keys) { 917 my ($rev, $k) = ($key =~ /^(-?)(.*)$/); 918 919 if ($k =~ /^(nparts|level|filenum)$/) { 920 # compare dump components numerically 921 $res = $a->{$k} <=> $b->{$k}; 922 } else { # ($k =~ /^(hostname|diskname|write_timestamp|dump_timestamp)$/) 923 # compare dump components alphabetically 924 $res = $a->{$k} cmp $b->{$k}; 925 } 926 $res = -$res if ($rev eq '-' and $res); 927 return $res if $res; 928 } 929 return 0; 930 } @dumps; 931} 932 933# caches for add_part() to avoid repeatedly looking up the log 934# filename for a particular write_timestamp. 935my $add_part_last_label = undef; 936my $add_part_last_write_timestamp = undef; 937my $add_part_last_logfile = undef; 938 939sub add_part { 940 my ($dump) = @_; 941 my $found; 942 my $logfh; 943 my $logfile; 944 my $find_result; 945 my $logdir = config_dir_relative(getconf($CNF_LOGDIR)); 946 my ($last_filenum, $last_secs, $last_kbs); 947 948 # first order of business is to find out whether we need to make a new 949 # dumpfile for this. 950 my $write_timestamp = zeropad($dump->{'write_timestamp'}); 951 die "dump has no 'write_timestamp'" unless defined $write_timestamp; 952 953 # consult our one-element cache for this label and write_timestamp 954 if (!defined $add_part_last_label 955 or $add_part_last_label ne $dump->{'label'} 956 or $add_part_last_write_timestamp ne $dump->{'write_timestamp'}) { 957 958 # update the cache 959 $add_part_last_logfile = undef; 960 LOGFILE: 961 for my $lf (Amanda::Logfile::find_log()) { 962 next unless (my ($log_timestamp) = $lf =~ /^log\.([0-9]+)(?:\.[0-9]+|\.amflush)?$/); 963 next unless (zeropad($log_timestamp) eq $write_timestamp); 964 965 # write timestamp matches; now check the label 966 LOGFILE_DUMP: 967 for $find_result (Amanda::Logfile::search_logfile(undef, undef, 968 "$logdir/$lf", 1)) { 969 next unless (defined $find_result->{'label'}); 970 971 if ($find_result->{'label'} eq $dump->{'label'}) { 972 $add_part_last_label = $dump->{'label'}; 973 $add_part_last_write_timestamp = $dump->{'write_timestamp'}; 974 $add_part_last_logfile = $lf; 975 last LOGFILE; 976 } 977 } 978 } 979 } 980 $logfile = $add_part_last_logfile; 981 982 # truncate the write_timestamp if we're not using timestamps 983 if (!getconf($CNF_USETIMESTAMPS)) { 984 $write_timestamp = substr($write_timestamp, 0, 8); 985 } 986 987 # get the information on the last dump and part in this logfile, or create 988 # a new logfile if none exists, then open the logfile for writing. 989 if (defined $logfile) { 990 $last_filenum = -1; 991 992 # NOTE: this depends on an implementation detail of search_logfile: it 993 # returns the results in the reverse order of appearance in the logfile. 994 # Since we're concerned with the last elements of this logfile that we 995 # will be appending to shortly, we simply reverse this list. As this 996 # package is rewritten to parse logfiles on its own (or access a relational 997 # database), this implementation detail will no longer be relevant. 998 my @find_results = reverse Amanda::Logfile::search_logfile(undef, undef, 999 "$logdir/$logfile", 1); 1000 for $find_result (@find_results) { 1001 # filter out the non-dump error messages that find.c produces 1002 next unless (defined $find_result->{'label'}); 1003 1004 $last_filenum = $find_result->{'filenum'}; 1005 1006 # if this is part number 1, reset our secs and kbs counters on the 1007 # assumption that this is the beginning of a new dump 1008 if ($find_result->{'partnum'} == 1) { 1009 $last_secs = $last_kbs = 0; 1010 } 1011 $last_secs += $find_result->{'sec'}; 1012 $last_kbs += $find_result->{'kb'}; 1013 } 1014 1015 open($logfh, ">>", "$logdir/$logfile"); 1016 } else { 1017 $last_filenum = -1; 1018 $last_secs = 0; 1019 $last_kbs = 0; 1020 1021 # pick an unused log filename 1022 my $i = 0; 1023 while (1) { 1024 $logfile = "log.$write_timestamp.$i"; 1025 last unless -f "$logdir/$logfile"; 1026 $i++; 1027 } 1028 1029 open($logfh, ">", "$logdir/$logfile") 1030 or die("Could not write '$logdir/$logfile': $!"); 1031 1032 print $logfh 1033 "INFO taper This logfile was generated by Amanda::DB::Catalog\n"; 1034 1035 print $logfh 1036 "START taper datestamp $write_timestamp label $dump->{label} tape $i\n"; 1037 1038 if (!defined $tapelist) { 1039 _load_tapelist(); 1040 } else { 1041 # reload the tapelist immediately, in case it's been modified 1042 $tapelist->reload(); 1043 } 1044 1045 # see if we need to add an entry to the tapelist for this dump 1046 if (!grep { $_->{'label'} eq $dump->{'label'} 1047 and zeropad($_->{'datestamp'}) eq zeropad($dump->{'write_timestamp'}) 1048 } @{$tapelist->{tles}}) { 1049 $tapelist->reload(1); 1050 $tapelist->add_tapelabel($write_timestamp, $dump->{'label'}, undef, 1); 1051 $tapelist->write(); 1052 } 1053 } 1054 1055 if ($last_filenum >= 0 && $last_filenum+1 != $dump->{'filenum'}) { 1056 warn "Discontinuity in filenums in $logfile: " . 1057 "from $last_filenum to $dump->{filenum}"; 1058 } 1059 1060 my $kps = $dump->{'sec'}? (($dump->{'kb'} + 0.0) / $dump->{'sec'}) : 0.0; 1061 1062 my $part_line = "PART taper "; 1063 $part_line .= "$dump->{label} "; 1064 $part_line .= "$dump->{filenum} "; 1065 $part_line .= quote_string($dump->{hostname}) . " "; 1066 $part_line .= quote_string($dump->{diskname}) . " "; 1067 $part_line .= "$dump->{dump_timestamp} "; 1068 $part_line .= "$dump->{partnum}/$dump->{nparts} "; 1069 $part_line .= "$dump->{level} "; 1070 $part_line .= "[sec $dump->{sec} kb $dump->{kb} kps $kps]"; 1071 print $logfh "$part_line\n"; 1072 1073 # TODO: we don't always know nparts when writing a part, so 1074 # this is not always an effective way to detect a complete dump. 1075 # However, it works for purposes of data vaulting. 1076 if ($dump->{'partnum'} == $dump->{'nparts'}) { 1077 my $secs = $last_secs + $dump->{'sec'}; 1078 my $kbs = $last_kbs + $dump->{'kb'}; 1079 $kps = $secs? ($kbs + 0.0) / $secs : 0.0; 1080 1081 my $done_line = "DONE taper "; 1082 $done_line .= quote_string($dump->{hostname}) ." "; 1083 $done_line .= quote_string($dump->{diskname}) ." "; 1084 $done_line .= "$dump->{dump_timestamp} "; 1085 $done_line .= "$dump->{nparts} "; 1086 $done_line .= "$dump->{level} "; 1087 $done_line .= "[sec $secs kb $kbs kps $kps]"; 1088 print $logfh "$done_line\n"; 1089 } 1090 1091 close($logfh); 1092} 1093 1094sub _load_tapelist { 1095 if (!defined $tapelist) { 1096 my $tapelist_filename = config_dir_relative(getconf($CNF_TAPELIST)); 1097 $tapelist = Amanda::Tapelist->new($tapelist_filename); 1098 } 1099} 1100 1101sub _clear_cache { # (used by installcheck) 1102 $tapelist = undef; 1103} 1104 11051; 1106