1# Copyright (c) 2008-2013 Zmanda, Inc.  All Rights Reserved.
2#
3# This program is free software; you can redistribute it and/or
4# modify it under the terms of the GNU General Public License
5# as published by the Free Software Foundation; either version 2
6# of the License, or (at your option) any later version.
7#
8# This program is distributed in the hope that it will be useful, but
9# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
10# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
11# for more details.
12#
13# You should have received a copy of the GNU General Public License along
14# with this program; if not, write to the Free Software Foundation, Inc.,
15# 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
16#
17# Contact information: Zmanda Inc, 505 N Mathlida Ave, Suite 120
18# Sunnyvale, CA 94085, USA, or: http://www.zmanda.com
19
20package Amanda::DB::Catalog;
21
22=head1 NAME
23
24Amanda::DB::Catalog - access to the Amanda catalog: where is that dump?
25
26=head1 SYNOPSIS
27
28  use Amanda::DB::Catalog;
29
30  # get all dump timestamps on record
31  my @timestamps = Amanda::DB::Catalog::get_timestamps();
32
33  # loop over those timestamps, printing dump info for each one
34  for my $timestamp (@timestamps) {
35      my @dumpfiles = Amanda::DB::Catalog::get_parts(
36	  timestamp => $timestamp,
37	  ok => 1
38      );
39      print "$timstamp:\n";
40      for my $dumpfile (@dumpfiles) {
41	  print " ", $dumpfile->{hostname}, ":", $dumpfile->{diskname},
42		" level ", $dumpfile->{level}, "\n";
43      }
44  }
45
46=head1 MODEL
47
48The Amanda catalog is modeled as a set of dumps comprised of parts.  A dump is
49a complete bytestream received from an application, and is uniquely identified
50by the combination of C<hostname>, C<diskname>, C<dump_timestamp>, C<level>,
51and C<write_timestamp>.  A dump may be partial, or even a complete failure.
52
53A part corresponds to a single file on a volume, containing a portion of the
54data for a dump.  A part, then, is completely specified by a volume label and a
55file number (C<filenum>).  Each part has, among other things, a part number
56(C<partnum>) which gives its relative position within the dump.  The bytestream
57for a dump is recovered by concatenating all of the successful (C<status> = OK)
58parts matching the dump.
59
60Files in the holding disk are considered part of the catalog, and are
61represented as single-part dumps (holding-disk chunking is ignored, as it is
62distinct from split parts).
63
64=head2 DUMPS
65
66The dump table contains one row per dump.  It has the following columns:
67
68=over
69
70=item dump_timestamp
71
72(string) -- timestamp of the run in which the dump was created
73
74=item write_timestamp
75
76(string) -- timestamp of the run in which the part was written to this volume,
77or C<"00000000000000"> for dumps in the holding disk.
78
79=item hostname
80
81(string) -- dump hostname
82
83=item diskname
84
85(string) -- dump diskname
86
87=item level
88
89(integer) -- dump level
90
91=item status
92
93(string) -- The status of the dump - "OK", "PARTIAL", or "FAIL".  If a disk
94failed to dump at all, then it is not part of the catalog and thus will not
95have an associated dump row.
96
97=item message
98
99(string) -- reason for PARTIAL or FAIL status
100
101=item nparts
102
103(integer) -- number of successful parts in this dump
104
105=item bytes
106
107(integer) -- size (in bytes) of the dump on disk, 0 if the size is not known.
108
109=item kb
110
111(integer) -- size (in kb) of the dump on disk
112
113=item orig_kb
114
115(integer) -- size (in kb) of the complete dump (before compression or encryption); undef
116if not available
117
118=item sec
119
120(integer) -- time (in seconds) spent writing this part
121
122=item parts
123
124(arrayref) -- array of parts, indexed by partnum (so C<< $parts->[0] >> is
125always C<undef>).  When multiple partial parts are available, the choice of the
126partial that is included in this array is undefined.
127
128=back
129
130A dump is represented as a hashref with these keys.
131
132The C<write_timestamp> gives the time of the amanda run in which the part was
133written to this volume.  The C<write_timestamp> may differ from the
134C<dump_timestamp> if, for example, I<amflush> wrote the part to tape after the
135initial dump.
136
137=head2 PARTS
138
139The parts table contains one row per part, and has the following columns:
140
141=over
142
143=item label
144
145(string) -- volume label (not present for holding files)
146
147=item filenum
148
149(integer) -- file on that volume (not present for holding files)
150
151=item holding_file
152
153(string) -- fully-qualified pathname of the holding file (not present for
154on-media dumps)
155
156=item dump
157
158(object ref) -- a reference to the dump containing this part
159
160=item status
161
162(string) -- The status of the part - "OK", "PARTIAL", or "FAILED".
163
164=item partnum
165
166(integer) -- part number of a split part (1-based)
167
168=item kb
169
170(integer) -- size (in kb) of this part
171
172=item sec
173
174(integer) -- time (in seconds) spent writing this part
175
176=back
177
178A part is represented as a hashref with these keys.  The C<label> and
179C<filenum> serve as a primary key.
180
181Note that parts' C<dump> and dumps' C<parts> create a reference loop.  This is
182broken by making the C<parts> array's contents weak references in C<get_dumps>,
183and the C<dump> reference weak in C<get_parts>.
184
185=head2 NOTES
186
187All timestamps used in this module are full-length, in the format
188C<YYYYMMDDHHMMSS>.  If the underlying data contains only datestamps, they are
189zero-extended into timestamps: C<YYYYMMDD000000>.  A C<dump_timestamp> always
190corresponds to the initiation of the I<original> dump run, while
191C<write_timestamp> gives the time the file was written to the volume.  When
192parts are migrated from volume to volume (e.g., by I<amvault>), the
193C<dump_timestamp> does not change.
194
195In Amanda, the tuple (C<hostname>, C<diskname>, C<level>, C<dump_timestamp>)
196serves as a unique identifier for a dump bytestream, but because the bytestream
197may appear several times in the catalog (due to vaulting) the additional
198C<write_timestamp> is required to identify a particular on-storage instance of
199a dump.  Note that the part sizes may differ between instances, so it is not
200valid to concatenate parts from different dump instances.
201
202=head1 INTERFACES
203
204=head2 SUMMARY DATA
205
206The following functions provide summary data based on the contents of the
207catalog.
208
209=over
210
211=item get_write_timestamps()
212
213Get a list of all write timestamps, sorted in chronological order.
214
215=item get_latest_write_timestamp()
216
217Return the most recent write timestamp.
218
219=item get_latest_write_timestamp(type => 'amvault')
220=item get_latest_write_timestamp(types => [ 'amvault', .. ])
221
222Return the timestamp of the most recent dump of the given type or types.  The
223available types are given below for C<get_run_type>.
224
225=item get_labels_written_at_timestamp($ts)
226
227Return a list of labels for volumes written at the given timestamp.
228
229=item get_run_type($ts)
230
231Return the type of run made at the given timestamp.  The result is one of
232C<amvault>, C<amdump>, C<amflush>, or the default, C<unknown>.
233
234=back
235
236=head2 PARTS
237
238=over
239
240=item get_parts(%parameters)
241
242This function returns a sequence of parts.  Values in C<%parameters> restrict
243the set of parts that are returned.  The hash can have any of the following
244keys:
245
246=over
247
248=item write_timestamp
249
250restrict to parts written at this timestamp
251
252=item write_timestamps
253
254(arrayref) restrict to parts written at any of these timestamps (note that
255holding-disk files have no C<write_timestamp>, so this option and the previous
256will omit them)
257
258=item dump_timestamp
259
260restrict to parts with exactly this timestamp
261
262=item dump_timestamps
263
264(arrayref) restrict to parts with any of these timestamps
265
266=item dump_timestamp_match
267
268restrict to parts with timestamps matching this expression
269
270=item holding
271
272if true, only return dumps on holding disk.  If false, omit dumps on holding
273disk.
274
275=item hostname
276
277restrict to parts with exactly this hostname
278
279=item hostnames
280
281(arrayref) restrict to parts with any of these hostnames
282
283=item hostname_match
284
285restrict to parts with hostnames matching this expression
286
287=item diskname
288
289restrict to parts with exactly this diskname
290
291=item disknames
292
293(arrayref) restrict to parts with any of these disknames
294
295=item diskname_match
296
297restrict to parts with disknames matching this expression
298
299=item label
300
301restrict to parts with exactly this label
302
303=item labels
304
305(arrayref) restrict to parts with any of these labels
306
307=item level
308
309restrict to parts with exactly this level
310
311=item levels
312
313(arrayref) restrict to parts with any of these levels
314
315=item status
316
317restrict to parts with this status
318
319=item labelstr
320
321restrict to parts on volume matching the labelstr.
322
323=item dumpspecs
324
325(arrayref of dumpspecs) restruct to parts matching one or more of these dumpspecs
326
327=back
328
329Match expressions are described in the amanda(8) manual page.
330
331=item sort_parts([ $key1, $key2, .. ], @parts)
332
333Given a list of parts, this function sorts that list by the requested keys.
334The following keys are available:
335
336=over
337
338=item hostname
339
340=item diskname
341
342=item write_timestamp
343
344=item dump_timestamp
345
346=item level
347
348=item filenum
349
350=item label
351
352Note that this sorts labels I<lexically>, not necessarily in the order they were used!
353
354=item partnum
355
356=item nparts
357
358=back
359
360Keys are processed from left to right: if two dumps have the same value for
361C<$key1>, then C<$key2> is examined, and so on.  Key names may be prefixed by a
362dash (C<->) to reverse the order.
363
364Note that some of these keys are dump keys; the function will automatically
365access those values via the C<dump> attribute.
366
367=back
368
369=head2 DUMPS
370
371=over
372
373=item get_dumps(%parameters)
374
375This function returns a sequence of dumps.  Values in C<%parameters> restrict
376the set of dumps that are returned.  The same keys as are used for C<get_parts>
377are available here, with the exception of C<label> and C<labels>.  In this
378case, the C<status> parameter applies to the dump status, not the status of its
379constituent parts.
380
381=item sort_dumps([ $key1, $key2 ], @dumps)
382
383Like C<sort_parts>, this sorts a sequence of dumps generated by C<get_dumps>.
384The same keys are available, with the exception of C<label>, C<filenum>, and
385C<partnum>.
386
387=back
388
389=head2 ADDING DATA
390
391=over
392
393=item add_part($part)
394
395Add the given part to the database.  In terms of logfiles, this will either
396create a new logfile (if the part's C<write_timestamp> has not been seen
397before) or append to an existing logfile.  Note that a new logfile will require
398a corresponding new entry in the tapelist.
399
400Note that no locking is performed: multiple simultaneous calls to this function
401can result in a corrupted or incorrect logfile.
402
403TODO: add_dump
404
405=back
406
407=cut
408
409use Amanda::Logfile qw( :constants );
410use Amanda::Tapelist;
411use Amanda::Config qw( :init :getconf config_dir_relative );
412use Amanda::Util qw( quote_string weaken_ref match_disk match_host match_datestamp match_level match_labelstr_expr);
413use File::Glob qw( :glob );
414use warnings;
415use strict;
416
417# tapelist cache
418my $tapelist = undef;
419
420# utility function
421sub zeropad {
422    my ($timestamp) = @_;
423    if (length($timestamp) == 8) {
424	return $timestamp."000000";
425    }
426    return $timestamp;
427}
428
429sub get_write_timestamps {
430    my @rv;
431
432    # find_log assumes that the tapelist has been loaded, so load it now
433    _load_tapelist();
434
435    for (Amanda::Logfile::find_log()) {
436	next unless (my ($timestamp) = /^log\.([0-9]+)(?:\.[0-9]+|\.amflush)?$/);
437	push @rv, zeropad($timestamp);
438    }
439
440    return sort @rv;
441}
442
443sub get_latest_write_timestamp {
444    my %params = @_;
445
446    if ($params{'type'}) {
447	push @{$params{'types'}}, $params{'type'};
448    }
449
450    # get all of the timestamps and select the last one
451    my @timestamps = get_write_timestamps();
452
453    if (@timestamps) {
454	# if we're not looking for a particular type, then this is easy
455	if (!exists $params{'types'}) {
456	    return $timestamps[-1];
457	}
458
459	# otherwise we need to search backward until we find a logfile of
460	# the right type
461	while (@timestamps) {
462	    my $ts = pop @timestamps;
463	    my $typ = get_run_type($ts);
464	    if (grep { $_ eq $typ } @{$params{'types'}}) {
465		return $ts;
466	    }
467	}
468    }
469
470    return undef;
471}
472
473sub get_run_type {
474    my ($write_timestamp) = @_;
475
476    # find all of the logfiles with that name
477    my $logdir = config_dir_relative(getconf($CNF_LOGDIR));
478    my @matches = File::Glob::bsd_glob("$logdir/log.$write_timestamp.*", GLOB_NOSORT);
479    if ($write_timestamp =~ /000000$/) {
480	my $write_datestamp = substr($write_timestamp, 0, 8);
481	push @matches, File::Glob::bsd_glob("$logdir/log.$write_datestamp.*", GLOB_NOSORT);
482    }
483
484    for my $lf (@matches) {
485	open(my $fh, "<", $lf) or next;
486	while (<$fh>) {
487	    # amflush and amvault put their own names in
488	    return $1 if (/^START (amflush|amvault)/);
489	    # but for amdump we see planner
490	    return 'amdump' if (/^START planner/);
491	}
492    }
493
494    return "unknown";
495}
496
497
498# this generic function implements the loop of scanning logfiles to find
499# the requested data; get_parts and get_dumps then adjust the results to
500# match what the user expects.
501sub get_parts_and_dumps {
502    my $get_what = shift; # "parts" or "dumps"
503    my %params = @_;
504    my $logfile_dir = config_dir_relative(getconf($CNF_LOGDIR));
505
506    # find_log assumes that the tapelist has been loaded, so load it now
507    _load_tapelist();
508
509    # pre-process params by appending all of the "singular" parameters to the "plurals"
510    push @{$params{'write_timestamps'}}, map { zeropad($_) } $params{'write_timestamp'}
511	if exists($params{'write_timestamp'});
512    push @{$params{'dump_timestamps'}}, map { zeropad($_) } $params{'dump_timestamp'}
513	if exists($params{'dump_timestamp'});
514    push @{$params{'hostnames'}}, $params{'hostname'}
515	if exists($params{'hostname'});
516    push @{$params{'disknames'}}, $params{'diskname'}
517	if exists($params{'diskname'});
518    push @{$params{'levels'}}, $params{'level'}
519	if exists($params{'level'});
520    if ($get_what eq 'parts') {
521	push @{$params{'labels'}}, $params{'label'}
522	    if exists($params{'label'});
523    } else {
524	delete $params{'labels'};
525    }
526
527    # specifying write_timestamps implies we won't check holding files
528    if ($params{'write_timestamps'}) {
529	if (defined $params{'holding'} and $params{'holding'}) {
530	    return [], []; # well, that's easy..
531	}
532	$params{'holding'} = 0;
533    }
534    # specifying labelstr implies we won't check holding files
535    if ($params{'labelstr'}) {
536	if (defined $params{'holding'} and $params{'holding'}) {
537	    return [], []; # well, that's easy..
538	}
539	$params{'holding'} = 0;
540    }
541
542    # Since we're working from logfiles, we have to pick the logfiles we'll use first.
543    # Then we can use search_logfile.
544    my @logfiles;
545    if ($params{'holding'}) {
546	@logfiles = ( 'holding', );
547    } elsif (exists($params{'write_timestamps'})) {
548	# if we have specific write_timestamps, the job is pretty easy.
549	my %timestamps_hash = map { ($_, undef) } @{$params{'write_timestamps'}};
550	for my $logfile (Amanda::Logfile::find_log()) {
551	    next unless (my ($timestamp) = $logfile =~ /^log\.([0-9]+)(?:\.[0-9]+|\.amflush)?$/);
552	    next unless (exists($timestamps_hash{zeropad($timestamp)}));
553	    push @logfiles, $logfile;
554	}
555    } elsif (exists($params{'dump_timestamps'})) {
556	# otherwise, we need only look in logfiles at or after the earliest dump timestamp
557	my @sorted_timestamps = sort @{$params{'dump_timestamps'}};
558	my $earliest_timestamp = $sorted_timestamps[0];
559	for my $logfile (Amanda::Logfile::find_log()) {
560	    next unless (my ($timestamp) = $logfile =~ /^log\.([0-9]+)(?:\.[0-9]+|\.amflush)?$/);
561	    next unless (zeropad($timestamp) ge $earliest_timestamp);
562	    push @logfiles, $logfile;
563	}
564    } else {
565	# oh well -- it looks like we'll have to read all existing logfiles.
566	@logfiles = Amanda::Logfile::find_log();
567    }
568
569    # Set up some hash tables for speedy lookups of various attributes
570    my (%dump_timestamps_hash, %hostnames_hash, %disknames_hash, %levels_hash, %labels_hash);
571    %dump_timestamps_hash = map { ($_, undef) } @{$params{'dump_timestamps'}}
572	if (exists($params{'dump_timestamps'}));
573    %hostnames_hash = map { ($_, undef) } @{$params{'hostnames'}}
574	if (exists($params{'hostnames'}));
575    %disknames_hash = map { ($_, undef) } @{$params{'disknames'}}
576	if (exists($params{'disknames'}));
577    %levels_hash = map { ($_, undef) } @{$params{'levels'}}
578	if (exists($params{'levels'}));
579    %labels_hash = map { ($_, undef) } @{$params{'labels'}}
580	if (exists($params{'labels'}));
581
582    my %dumps;
583    my @parts;
584
585    # *also* scan holding if the holding param wasn't specified
586    if (!exists $params{'holding'}) {
587	push @logfiles, 'holding';
588    }
589
590    # now loop over those logfiles and use search_logfile to load the dumpfiles
591    # from them, then process each entry from the logfile
592    for my $logfile (@logfiles) {
593	my (@find_results, $write_timestamp);
594
595	# get the raw contents from search_logfile, or use holding if
596	# $logfile is undef
597	if ($logfile ne 'holding') {
598	    @find_results = Amanda::Logfile::search_logfile(undef, undef,
599							"$logfile_dir/$logfile", 1);
600	    # convert to dumpfile hashes, including the write_timestamp from the logfile name
601	    my ($timestamp) = $logfile =~ /^log\.([0-9]+)(?:\.[0-9]+|\.amflush)?$/;
602	    $write_timestamp = zeropad($timestamp);
603
604	} else {
605	    @find_results = Amanda::Logfile::search_holding_disk();
606	    $write_timestamp = '00000000000000';
607	}
608
609	# filter against *_match with dumps_match
610	@find_results = Amanda::Logfile::dumps_match([@find_results],
611	    exists($params{'hostname_match'})? $params{'hostname_match'} : undef,
612	    exists($params{'diskname_match'})? $params{'diskname_match'} : undef,
613	    exists($params{'dump_timestamp_match'})? $params{'dump_timestamp_match'} : undef,
614	    undef,
615	    0);
616
617	# loop over each entry in the logfile.
618	for my $find_result (@find_results) {
619
620	    # filter out the non-dump error messages that find.c produces
621	    next unless (defined $find_result->{'label'});
622
623	    # bail out on this result early, if possible
624	    next if (%dump_timestamps_hash
625		and !exists($dump_timestamps_hash{zeropad($find_result->{'timestamp'})}));
626	    next if (%hostnames_hash
627		and !exists($hostnames_hash{$find_result->{'hostname'}}));
628	    next if (%disknames_hash
629		and !exists($disknames_hash{$find_result->{'diskname'}}));
630	    next if (%levels_hash
631		and !exists($levels_hash{$find_result->{'level'}}));
632	    next if (%labels_hash
633		and !exists($labels_hash{$find_result->{'label'}}));
634	    next if (defined $params{'labelstr'}
635		and !match_labelstr_expr($params{'labelstr'},$find_result->{'label'}));
636	    if ($get_what eq 'parts') {
637		next if (exists($params{'status'})
638		    and defined $find_result->{'status'}
639		    and $find_result->{'status'} ne $params{'status'});
640	    }
641
642	    # filter each result against dumpspecs, to avoid dumps_match_dumpspecs'
643	    # tendency to produce duplicate results
644	    next if ($params{'dumpspecs'}
645		and !Amanda::Logfile::dumps_match_dumpspecs([$find_result],
646						    $params{'dumpspecs'}, 0));
647
648	    my $dump_timestamp = zeropad($find_result->{'timestamp'});
649
650	    my $dumpkey = join("\0", $find_result->{'hostname'}, $find_result->{'diskname'},
651			             $write_timestamp, $find_result->{'level'}, $dump_timestamp);
652	    my $dump = $dumps{$dumpkey};
653	    if (!defined $dump) {
654		$dump = $dumps{$dumpkey} = {
655		    dump_timestamp => $dump_timestamp,
656		    write_timestamp => $write_timestamp,
657		    hostname => $find_result->{'hostname'},
658		    diskname => $find_result->{'diskname'},
659		    level => $find_result->{'level'}+0,
660		    orig_kb => $find_result->{'orig_kb'},
661		    status => $find_result->{'dump_status'},
662		    message => $find_result->{'message'},
663		    # the rest of these params are unknown until we see a taper
664		    # DONE, PARTIAL, or FAIL line, although we count nparts
665		    # manually instead of relying on the logfile
666		    nparts => 0, # $find_result->{'totalparts'}
667		    bytes => -1, # $find_result->{'bytes'}
668		    kb => -1,    # $find_result->{'kb'}
669		    sec => -1,   # $find_result->{'sec'}
670		};
671	    }
672
673	    # start setting up a part hash for this result
674	    my %part;
675	    if ($logfile ne 'holding') {
676		# on-media dump
677		%part = (
678		    label => $find_result->{'label'},
679		    filenum => $find_result->{'filenum'},
680		    dump => $dump,
681		    status => $find_result->{'status'} || 'FAILED',
682		    sec => $find_result->{'sec'},
683		    kb => $find_result->{'kb'},
684		    orig_kb => $find_result->{'orig_kb'},
685		    partnum => $find_result->{'partnum'},
686		);
687	    } else {
688		# holding disk
689		%part = (
690		    holding_file => $find_result->{'label'},
691		    dump => $dump,
692		    status => $find_result->{'status'} || 'FAILED',
693		    sec => 0.0,
694		    kb => $find_result->{'kb'},
695		    orig_kb => $find_result->{'orig_kb'},
696		    partnum => 1,
697		);
698		# and fix up the dump, too
699		$dump->{'status'} = $find_result->{'status'} || 'FAILED';
700		$dump->{'bytes'} = $find_result->{'bytes'};
701		$dump->{'kb'} = $find_result->{'kb'};
702		$dump->{'sec'} = $find_result->{'sec'};
703	    }
704
705	    # weaken the dump ref if we're returning dumps
706	    weaken_ref($part{'dump'})
707		if ($get_what eq 'dumps');
708
709	    # count the number of successful parts in the dump
710	    $dump->{'nparts'}++ if $part{'status'} eq 'OK';
711
712	    # and add a ref to the array of parts; if we're getting
713	    # parts, then this is a weak ref
714	    $dump->{'parts'}[$part{'partnum'}] = \%part;
715	    weaken_ref($dump->{'parts'}[$part{'partnum'}])
716		if ($get_what eq 'parts');
717
718	    push @parts, \%part;
719	}
720
721	# if these dumps were on the holding disk, then we're done
722	next if $logfile eq 'holding';
723
724	# re-read the logfile to extract dump-level info that's not captured by
725	# search_logfile
726	my $logh = Amanda::Logfile::open_logfile("$logfile_dir/$logfile");
727	die "logfile '$logfile' not found" unless $logh;
728	while (my ($type, $prog, $str) = Amanda::Logfile::get_logline($logh)) {
729	    next unless $prog == $P_TAPER;
730	    my $status;
731	    if ($type == $L_DONE) {
732		$status = 'OK';
733	    } elsif ($type == $L_PARTIAL) {
734		$status = 'PARTIAL';
735	    } elsif ($type == $L_FAIL) {
736		$status = 'FAIL';
737	    } elsif ($type == $L_SUCCESS) {
738		$status = "OK";
739	    } else {
740		next;
741	    }
742
743	    # now extract the appropriate info; luckily these log lines have the same
744	    # format, more or less
745	    my ($hostname, $diskname, $dump_timestamp, $nparts, $level, $secs, $kb, $bytes, $message);
746	    ($hostname, $str) = Amanda::Util::skip_quoted_string($str);
747	    ($diskname, $str) = Amanda::Util::skip_quoted_string($str);
748	    ($dump_timestamp, $str) = Amanda::Util::skip_quoted_string($str);
749	    if ($status ne 'FAIL' and $type != $L_SUCCESS) { # nparts is not in SUCCESS lines
750		($nparts, my $str1) = Amanda::Util::skip_quoted_string($str);
751		if (substr($str1, 0,1) ne '[') {
752		    $str = $str1;
753		} else { # nparts is not in all PARTIAL lines
754		    $nparts = 0;
755		}
756
757	    } else {
758		$nparts = 0;
759	    }
760	    ($level, $str) = Amanda::Util::skip_quoted_string($str);
761	    if ($status ne 'FAIL') {
762		my $s = $str;
763		my $b_unit;
764		($secs, $b_unit, $kb, $str) = ($str =~ /^\[sec ([-0-9.]+) (kb|bytes) ([-0-9]+).*\] ?(.*)$/)
765		    or die("'$s'");
766		if ($b_unit eq 'bytes') {
767		    $bytes = $kb;
768		    $kb /= 1024;
769		} else {
770		    $bytes = 0;
771		}
772		$secs = 0.1 if ($secs <= 0);
773	    }
774	    if ($status ne 'OK') {
775		$message = $str;
776	    } else {
777		$message = '';
778	    }
779
780	    $hostname = Amanda::Util::unquote_string($hostname);
781	    $diskname = Amanda::Util::unquote_string($diskname);
782	    $message = Amanda::Util::unquote_string($message) if $message;
783
784	    # filter against dump criteria
785	    next if ($params{'dump_timestamp_match'}
786		and !match_datestamp($params{'dump_timestamp_match'}, zeropad($dump_timestamp)));
787	    next if (%dump_timestamps_hash
788		and !exists($dump_timestamps_hash{zeropad($dump_timestamp)}));
789
790	    next if ($params{'hostname_match'}
791		and !match_host($params{'hostname_match'}, $hostname));
792	    next if (%hostnames_hash
793		and !exists($hostnames_hash{$hostname}));
794
795	    next if ($params{'diskname_match'}
796		and !match_disk($params{'diskname_match'}, $diskname));
797	    next if (%disknames_hash
798		and !exists($disknames_hash{$diskname}));
799
800	    next if (%levels_hash
801		and !exists($levels_hash{$level}));
802	    # get_dumps filters on status
803
804	    if ($params{'dumpspecs'}) {
805		my $ok = 0;
806		for my $ds (@{$params{'dumpspecs'}}) {
807		    # (the "". are for SWIG's benefit - SWIGged functions don't like
808		    # strings generated by SWIG.  Long story.)
809		    next if (defined $ds->{'host'}
810			    and !match_host("".$ds->{'host'}, $hostname));
811		    next if (defined $ds->{'disk'}
812			    and !match_disk("".$ds->{'disk'}, $diskname));
813		    next if (defined $ds->{'datestamp'}
814			    and !match_datestamp("".$ds->{'datestamp'}, $dump_timestamp));
815		    next if (defined $ds->{'level'}
816			    and !match_level("".$ds->{'level'}, $level));
817		    next if (defined $ds->{'write_timestamp'}
818			     and !match_datestamp("".$ds->{'write_timestamp'}, $write_timestamp));
819		    $ok = 1;
820		    last;
821		}
822		next unless $ok;
823	    }
824
825	    my $dumpkey = join("\0", $hostname, $diskname, $write_timestamp,
826				     $level, zeropad($dump_timestamp));
827	    my $dump = $dumps{$dumpkey};
828	    if (!defined $dump) {
829		# this will happen when a dump has no parts - a FAILed dump.
830		$dump = $dumps{$dumpkey} = {
831		    dump_timestamp => zeropad($dump_timestamp),
832		    write_timestamp => $write_timestamp,
833		    hostname => $hostname,
834		    diskname => $diskname,
835		    level => $level+0,
836		    orig_kb => undef,
837		    status => "FAILED",
838		    # message set below
839		    nparts => $nparts, # hopefully 0?
840		    # kb set below
841		    # sec set below
842		};
843	    }
844
845	    $dump->{'message'} = $message;
846	    if ($status eq 'FAIL') {
847		$dump->{'bytes'} = 0;
848		$dump->{'kb'} = 0;
849		$dump->{'sec'} = 0.0;
850	    } else {
851		$dump->{'bytes'} = $bytes+0;
852		$dump->{'kb'} = $kb+0;
853		$dump->{'sec'} = $secs+0.0;
854	    }
855	}
856	Amanda::Logfile::close_logfile($logh);
857    }
858
859    return [ values %dumps], \@parts;
860}
861
862sub get_parts {
863    my ($dumps, $parts) = get_parts_and_dumps("parts", @_);
864    return @$parts;
865}
866
867sub get_dumps {
868    my %params = @_;
869    my ($dumps, $parts) = get_parts_and_dumps("dumps", @_);
870    my @dumps = @$dumps;
871
872    if (exists $params{'status'}) {
873	@dumps = grep { $_->{'status'} eq $params{'status'} } @dumps;
874    }
875
876    return @dumps;
877}
878
879sub sort_parts {
880    my ($keys, @parts) = @_;
881
882    # TODO: make this more efficient by selecting the comparison
883    # functions once, in advance, and just applying them
884    return sort {
885	my $res;
886	for my $key (@$keys) {
887	    my ($rev, $k) = ($key =~ /^(-?)(.*)$/);
888
889	    if ($k =~ /^(partnum|filenum)$/) {
890		# compare part components numerically
891		$res = $a->{$k} <=> $b->{$k};
892	    } elsif ($k =~ /^(nparts|level)$/) {
893		# compare dump components numerically
894		$res = $a->{'dump'}->{$k} <=> $b->{'dump'}->{$k};
895	    } elsif ($k =~ /^(hostname|diskname|write_timestamp|dump_timestamp)$/) {
896		# compare dump components alphabetically
897		$res = $a->{'dump'}->{$k} cmp $b->{'dump'}->{$k};
898	    } else { # (label)
899		# compare part components alphabetically
900		$res = $a->{$k} cmp $b->{$k};
901	    }
902	    $res = -$res if ($rev eq '-' and $res);
903	    return $res if $res;
904	}
905	return 0;
906    } @parts;
907}
908
909sub sort_dumps {
910    my ($keys, @dumps) = @_;
911
912    # TODO: make this more efficient by selecting the comparison
913    # functions once, in advance, and just applying them
914    return sort {
915	my $res;
916	for my $key (@$keys) {
917	    my ($rev, $k) = ($key =~ /^(-?)(.*)$/);
918
919	    if ($k =~ /^(nparts|level|filenum)$/) {
920		# compare dump components numerically
921		$res = $a->{$k} <=> $b->{$k};
922	    } else { # ($k =~ /^(hostname|diskname|write_timestamp|dump_timestamp)$/)
923		# compare dump components alphabetically
924		$res = $a->{$k} cmp $b->{$k};
925	    }
926	    $res = -$res if ($rev eq '-' and $res);
927	    return $res if $res;
928	}
929	return 0;
930    } @dumps;
931}
932
933# caches for add_part() to avoid repeatedly looking up the log
934# filename for a particular write_timestamp.
935my $add_part_last_label = undef;
936my $add_part_last_write_timestamp = undef;
937my $add_part_last_logfile = undef;
938
939sub add_part {
940    my ($dump) = @_;
941    my $found;
942    my $logfh;
943    my $logfile;
944    my $find_result;
945    my $logdir = config_dir_relative(getconf($CNF_LOGDIR));
946    my ($last_filenum, $last_secs, $last_kbs);
947
948    # first order of business is to find out whether we need to make a new
949    # dumpfile for this.
950    my $write_timestamp = zeropad($dump->{'write_timestamp'});
951    die "dump has no 'write_timestamp'" unless defined $write_timestamp;
952
953    # consult our one-element cache for this label and write_timestamp
954    if (!defined $add_part_last_label
955	or $add_part_last_label ne $dump->{'label'}
956	or $add_part_last_write_timestamp ne $dump->{'write_timestamp'}) {
957
958	# update the cache
959	$add_part_last_logfile = undef;
960	LOGFILE:
961	for my $lf (Amanda::Logfile::find_log()) {
962	    next unless (my ($log_timestamp) = $lf =~ /^log\.([0-9]+)(?:\.[0-9]+|\.amflush)?$/);
963	    next unless (zeropad($log_timestamp) eq $write_timestamp);
964
965	    # write timestamp matches; now check the label
966	    LOGFILE_DUMP:
967	    for $find_result (Amanda::Logfile::search_logfile(undef, undef,
968					"$logdir/$lf", 1)) {
969		next unless (defined $find_result->{'label'});
970
971		if ($find_result->{'label'} eq $dump->{'label'}) {
972		    $add_part_last_label = $dump->{'label'};
973		    $add_part_last_write_timestamp = $dump->{'write_timestamp'};
974		    $add_part_last_logfile = $lf;
975		    last LOGFILE;
976		}
977	    }
978	}
979    }
980    $logfile = $add_part_last_logfile;
981
982    # truncate the write_timestamp if we're not using timestamps
983    if (!getconf($CNF_USETIMESTAMPS)) {
984	$write_timestamp = substr($write_timestamp, 0, 8);
985    }
986
987    # get the information on the last dump and part in this logfile, or create
988    # a new logfile if none exists, then open the logfile for writing.
989    if (defined $logfile) {
990	$last_filenum = -1;
991
992	# NOTE: this depends on an implementation detail of search_logfile: it
993	# returns the results in the reverse order of appearance in the logfile.
994	# Since we're concerned with the last elements of this logfile that we
995	# will be appending to shortly, we simply reverse this list.  As this
996	# package is rewritten to parse logfiles on its own (or access a relational
997	# database), this implementation detail will no longer be relevant.
998	my @find_results = reverse Amanda::Logfile::search_logfile(undef, undef,
999						    "$logdir/$logfile", 1);
1000	for $find_result (@find_results) {
1001	    # filter out the non-dump error messages that find.c produces
1002	    next unless (defined $find_result->{'label'});
1003
1004	    $last_filenum = $find_result->{'filenum'};
1005
1006	    # if this is part number 1, reset our secs and kbs counters on the
1007	    # assumption that this is the beginning of a new dump
1008	    if ($find_result->{'partnum'} == 1) {
1009		$last_secs = $last_kbs = 0;
1010	    }
1011	    $last_secs += $find_result->{'sec'};
1012	    $last_kbs += $find_result->{'kb'};
1013	}
1014
1015	open($logfh, ">>", "$logdir/$logfile");
1016    } else {
1017	$last_filenum = -1;
1018	$last_secs = 0;
1019	$last_kbs = 0;
1020
1021	# pick an unused log filename
1022	my $i = 0;
1023	while (1) {
1024	    $logfile = "log.$write_timestamp.$i";
1025	    last unless -f "$logdir/$logfile";
1026	    $i++;
1027	}
1028
1029	open($logfh, ">", "$logdir/$logfile")
1030	    or die("Could not write '$logdir/$logfile': $!");
1031
1032	print $logfh
1033	    "INFO taper This logfile was generated by Amanda::DB::Catalog\n";
1034
1035	print $logfh
1036	    "START taper datestamp $write_timestamp label $dump->{label} tape $i\n";
1037
1038	if (!defined $tapelist) {
1039	    _load_tapelist();
1040	} else {
1041	    # reload the tapelist immediately, in case it's been modified
1042	    $tapelist->reload();
1043	}
1044
1045	# see if we need to add an entry to the tapelist for this dump
1046	if (!grep { $_->{'label'} eq $dump->{'label'}
1047		    and zeropad($_->{'datestamp'}) eq zeropad($dump->{'write_timestamp'})
1048		} @{$tapelist->{tles}}) {
1049	    $tapelist->reload(1);
1050	    $tapelist->add_tapelabel($write_timestamp, $dump->{'label'}, undef, 1);
1051	    $tapelist->write();
1052	}
1053    }
1054
1055    if ($last_filenum >= 0 && $last_filenum+1 != $dump->{'filenum'}) {
1056	warn "Discontinuity in filenums in $logfile: " .
1057	     "from $last_filenum to $dump->{filenum}";
1058    }
1059
1060    my $kps = $dump->{'sec'}? (($dump->{'kb'} + 0.0) / $dump->{'sec'}) : 0.0;
1061
1062    my $part_line = "PART taper ";
1063    $part_line .= "$dump->{label} ";
1064    $part_line .= "$dump->{filenum} ";
1065    $part_line .= quote_string($dump->{hostname}) . " ";
1066    $part_line .= quote_string($dump->{diskname}) . " ";
1067    $part_line .= "$dump->{dump_timestamp} ";
1068    $part_line .= "$dump->{partnum}/$dump->{nparts} ";
1069    $part_line .= "$dump->{level} ";
1070    $part_line .= "[sec $dump->{sec} kb $dump->{kb} kps $kps]";
1071    print $logfh "$part_line\n";
1072
1073    # TODO: we don't always know nparts when writing a part, so
1074    # this is not always an effective way to detect a complete dump.
1075    # However, it works for purposes of data vaulting.
1076    if ($dump->{'partnum'} == $dump->{'nparts'}) {
1077	my $secs = $last_secs + $dump->{'sec'};
1078	my $kbs = $last_kbs + $dump->{'kb'};
1079	$kps = $secs? ($kbs + 0.0) / $secs : 0.0;
1080
1081	my $done_line = "DONE taper ";
1082	$done_line .= quote_string($dump->{hostname}) ." ";
1083	$done_line .= quote_string($dump->{diskname}) ." ";
1084	$done_line .= "$dump->{dump_timestamp} ";
1085	$done_line .= "$dump->{nparts} ";
1086	$done_line .= "$dump->{level} ";
1087	$done_line .= "[sec $secs kb $kbs kps $kps]";
1088	print $logfh "$done_line\n";
1089    }
1090
1091    close($logfh);
1092}
1093
1094sub _load_tapelist {
1095    if (!defined $tapelist) {
1096	my $tapelist_filename = config_dir_relative(getconf($CNF_TAPELIST));
1097	$tapelist = Amanda::Tapelist->new($tapelist_filename);
1098    }
1099}
1100
1101sub _clear_cache { # (used by installcheck)
1102    $tapelist = undef;
1103}
1104
11051;
1106