1#!/usr/local/bin/perl
2
3# this software is licensed for use under the Free Software Foundation's GPL v3.0 license, as retrieved
4# from http://www.gnu.org/licenses/gpl-3.0.html on 2014-11-17.  A copy should also be available in this
5# project's Git repository at https://github.com/jimsalterjrs/sanoid/blob/master/LICENSE.
6
7$::VERSION = '2.0.3';
8my $MINIMUM_DEFAULTS_VERSION = 2;
9
10use strict;
11use warnings;
12use Config::IniFiles; # read samba-style conf file
13use Data::Dumper;     # debugging - print contents of hash
14use File::Path;       # for rmtree command in use_prune
15use Getopt::Long qw(:config auto_version auto_help);
16use Pod::Usage;       # pod2usage
17use Time::Local;      # to parse dates in reverse
18use Capture::Tiny ':all';
19
20my %args = ("configdir" => "/usr/local/etc/sanoid");
21GetOptions(\%args, "verbose", "debug", "cron", "readonly", "quiet",
22                   "monitor-health", "force-update", "configdir=s",
23                   "monitor-snapshots", "take-snapshots", "prune-snapshots", "force-prune",
24                   "monitor-capacity"
25          ) or pod2usage(2);
26
27# If only config directory (or nothing) has been specified, default to --cron --verbose
28if (keys %args < 2) {
29	$args{'cron'} = 1;
30	$args{'verbose'} = 1;
31}
32
33my $pscmd = '/bin/ps';
34
35my $zfs = '/sbin/zfs';
36my $zpool = '/sbin/zpool';
37
38my $conf_file = "$args{'configdir'}/sanoid.conf";
39my $default_conf_file = "$args{'configdir'}/sanoid.defaults.conf";
40
41# parse config file
42my %config = init($conf_file,$default_conf_file);
43
44# if we call getsnaps(%config,1) it will forcibly update the cache, TTL or no TTL
45my $forcecacheupdate = 0;
46my $cache = '/var/cache/sanoidsnapshots.txt';
47my $cacheTTL = 900; # 15 minutes
48my %snaps = getsnaps( \%config, $cacheTTL, $forcecacheupdate );
49my %pruned;
50my %capacitycache;
51
52my %snapsbytype = getsnapsbytype( \%config, \%snaps );
53
54my %snapsbypath = getsnapsbypath( \%config, \%snaps );
55
56# let's make it a little easier to be consistent passing these hashes in the same order to each sub
57my @params = ( \%config, \%snaps, \%snapsbytype, \%snapsbypath );
58
59if ($args{'debug'})		{ $args{'verbose'}=1; blabber (@params); }
60if ($args{'monitor-snapshots'}) { monitor_snapshots(@params); }
61if ($args{'monitor-health'}) 	{ monitor_health(@params); }
62if ($args{'monitor-capacity'})  { monitor_capacity(@params); }
63if ($args{'force-update'}) 	{ my $snaps = getsnaps( \%config, $cacheTTL, 1 ); }
64
65if ($args{'cron'}) {
66	if ($args{'quiet'}) { $args{'verbose'} = 0; }
67	take_snapshots (@params);
68	prune_snapshots (@params);
69} else {
70	if ($args{'take-snapshots'}) { take_snapshots (@params); }
71	if ($args{'prune-snapshots'}) { prune_snapshots (@params); }
72}
73
74exit 0;
75
76
77####################################################################################
78####################################################################################
79####################################################################################
80
81sub monitor_health {
82	my ($config, $snaps, $snapsbytype, $snapsbypath) = @_;
83	my %pools;
84	my @messages;
85	my $errlevel=0;
86
87	foreach my $path (keys %{ $snapsbypath}) {
88		my @pool = split ('/',$path);
89		$pools{$pool[0]}=1;
90	}
91
92	foreach my $pool (keys %pools) {
93		my ($exitcode, $msg) = check_zpool($pool,2);
94		if ($exitcode > $errlevel) { $errlevel = $exitcode; }
95		chomp $msg;
96		push (@messages, $msg);
97	}
98
99	my @warninglevels = ('','*** WARNING *** ','*** CRITICAL *** ');
100	my $message = $warninglevels[$errlevel] . join (', ',@messages);
101	print "$message\n";
102	exit $errlevel;
103
104}
105
106####################################################################################
107####################################################################################
108####################################################################################
109
110sub monitor_snapshots {
111
112	# nagios plugin format: exit 0,1,2,3 for OK, WARN, CRITICAL, or ERROR.
113
114	# check_snapshot_date - test ZFS fs creation timestamp for recentness
115	# accepts arguments: $filesystem, $warn (in seconds elapsed), $crit (in seconds elapsed)
116
117	my ($config, $snaps, $snapsbytype, $snapsbypath) = @_;
118	my %datestamp = get_date();
119	my $errorlevel = 0;
120	my $msg;
121	my @msgs;
122	my @paths;
123
124	foreach my $section (keys %config) {
125		if ($section =~ /^template/) { next; }
126		if (! $config{$section}{'monitor'}) { next; }
127		if ($config{$section}{'process_children_only'}) { next; }
128
129		my $path = $config{$section}{'path'};
130		push @paths, $path;
131
132		my @types = ('yearly','monthly','weekly','daily','hourly','frequently');
133		foreach my $type (@types) {
134			if ($config{$section}{$type} == 0) { next; }
135
136			my $smallerperiod = 0;
137			# we need to set the period length in seconds first
138			if ($type eq 'frequently') { $smallerperiod = 1; }
139			elsif ($type eq 'hourly') { $smallerperiod = 60; }
140			elsif ($type eq 'daily') { $smallerperiod = 60*60; }
141			elsif ($type eq 'weekly') { $smallerperiod = 60*60*24; }
142			elsif ($type eq 'monthly') { $smallerperiod = 60*60*24*7; }
143			elsif ($type eq 'yearly') { $smallerperiod = 60*60*24*31; }
144
145			my $typewarn = $type . '_warn';
146			my $typecrit = $type . '_crit';
147			my $warn = convertTimePeriod($config{$section}{$typewarn}, $smallerperiod);
148			my $crit = convertTimePeriod($config{$section}{$typecrit}, $smallerperiod);
149			my $elapsed = -1;
150			if (defined $snapsbytype{$path}{$type}{'newest'}) {
151				$elapsed = $snapsbytype{$path}{$type}{'newest'};
152			}
153			my $dispelapsed = displaytime($elapsed);
154			my $dispwarn = displaytime($warn);
155			my $dispcrit = displaytime($crit);
156			if ( $elapsed > $crit || $elapsed == -1) {
157				if ($crit > 0) {
158					if (! $config{$section}{'monitor_dont_crit'}) { $errorlevel = 2; }
159					if ($elapsed == -1) {
160						push @msgs, "CRIT: $path has no $type snapshots at all!";
161					} else {
162						push @msgs, "CRIT: $path\'s newest $type snapshot is $dispelapsed old (should be < $dispcrit)";
163					}
164				}
165			 } elsif ($elapsed > $warn) {
166				if ($warn > 0) {
167					if (! $config{$section}{'monitor_dont_warn'} && ($errorlevel < 2) ) { $errorlevel = 1; }
168					push @msgs, "WARN: $path\'s newest $type snapshot is $dispelapsed old (should be < $dispwarn)";
169				}
170			} else {
171				# push @msgs .= "OK: $path\'s newest $type snapshot is $dispelapsed old \n";
172			}
173
174		}
175	}
176
177	my @sorted_msgs = sort { lc($a) cmp lc($b) } @msgs;
178	my @sorted_paths = sort { lc($a) cmp lc($b) } @paths;
179	$msg = join (", ", @sorted_msgs);
180	my $paths = join (", ", @sorted_paths);
181
182	if ($msg eq '') { $msg = "OK: all monitored datasets \($paths\) have fresh snapshots"; }
183
184	print "$msg\n";
185	exit $errorlevel;
186}
187
188
189####################################################################################
190####################################################################################
191####################################################################################
192
193sub monitor_capacity {
194	my ($config, $snaps, $snapsbytype, $snapsbypath) = @_;
195	my %pools;
196	my @messages;
197	my $errlevel=0;
198
199	# build pool list with corresponding capacity limits
200	foreach my $section (keys %config) {
201		my @pool = split ('/',$section);
202
203		if (scalar @pool == 1 || !defined($pools{$pool[0]}) ) {
204			my %capacitylimits;
205
206			if (!check_capacity_limit($config{$section}{'capacity_warn'})) {
207				die "ERROR: invalid zpool capacity warning limit!\n";
208			}
209
210			if ($config{$section}{'capacity_warn'} != 0) {
211				$capacitylimits{'warn'} = $config{$section}{'capacity_warn'};
212			}
213
214			if (!check_capacity_limit($config{$section}{'capacity_crit'})) {
215				die "ERROR: invalid zpool capacity critical limit!\n";
216			}
217
218			if ($config{$section}{'capacity_crit'} != 0) {
219				$capacitylimits{'crit'} = $config{$section}{'capacity_crit'};
220			}
221
222			if (%capacitylimits) {
223				$pools{$pool[0]} = \%capacitylimits;
224			}
225		}
226	}
227
228	foreach my $pool (keys %pools) {
229		my $capacitylimitsref = $pools{$pool};
230
231		my ($exitcode, $msg) = check_zpool_capacity($pool,\%$capacitylimitsref);
232		if ($exitcode > $errlevel) { $errlevel = $exitcode; }
233		chomp $msg;
234		push (@messages, $msg);
235	}
236
237	my @warninglevels = ('','*** WARNING *** ','*** CRITICAL *** ');
238	my $message = $warninglevels[$errlevel] . join (', ',@messages);
239	print "$message\n";
240	exit $errlevel;
241}
242
243####################################################################################
244####################################################################################
245####################################################################################
246
247
248sub prune_snapshots {
249
250	if ($args{'verbose'}) { print "INFO: pruning snapshots...\n"; }
251	my ($config, $snaps, $snapsbytype, $snapsbypath) = @_;
252
253	my %datestamp = get_date();
254	my $forcecacheupdate = 0;
255
256	foreach my $section (keys %config) {
257		if ($section =~ /^template/) { next; }
258		if (! $config{$section}{'autoprune'}) { next; }
259		if ($config{$section}{'process_children_only'}) { next; }
260
261		my $path = $config{$section}{'path'};
262
263		my $period = 0;
264		if (check_prune_defer($config, $section)) {
265			if ($args{'verbose'}) { print "INFO: deferring snapshot pruning ($section)...\n"; }
266			next;
267		}
268
269		foreach my $type (keys %{ $config{$section} }){
270			unless ($type =~ /ly$/) { next; }
271
272			# we need to set the period length in seconds first
273			if ($type eq 'frequently') { $period = 60 * $config{$section}{'frequent_period'}; }
274			elsif ($type eq 'hourly') { $period = 60*60; }
275			elsif ($type eq 'daily') { $period = 60*60*24; }
276			elsif ($type eq 'weekly') { $period = 60*60*24*7; }
277			elsif ($type eq 'monthly') { $period = 60*60*24*31; }
278			elsif ($type eq 'yearly') { $period = 60*60*24*365.25; }
279
280			# avoid pissing off use warnings by not executing this block if no matching snaps exist
281			if (defined $snapsbytype{$path}{$type}{'sorted'}) {
282				my @sorted = split (/\|/,$snapsbytype{$path}{$type}{'sorted'});
283
284				# if we say "daily=30" we really mean "don't keep any dailies more than 30 days old", etc
285				my $maxage = ( time() - $config{$section}{$type} * $period );
286				# but if we say "daily=30" we ALSO mean "don't get rid of ANY dailies unless we have more than 30".
287				my $minsnapsthistype = $config{$section}{$type};
288
289				# how many total snaps of this type do we currently have?
290				my $numsnapsthistype = scalar (@sorted);
291
292				my @prunesnaps;
293				foreach my $snap( @sorted ){
294					# print "snap $path\@$snap has age $snaps{$path}{$snap}{'ctime'}, maxage is $maxage.\n";
295					if ( ($snaps{$path}{$snap}{'ctime'} < $maxage) && ($numsnapsthistype > $minsnapsthistype) ) {
296						my $fullpath = $path . '@' . $snap;
297						push(@prunesnaps,$fullpath);
298						# we just got rid of a snap, so we now have one fewer, duh
299						$numsnapsthistype--;
300					}
301				}
302
303				if ((scalar @prunesnaps) > 0) {
304					# print "found some snaps to prune!\n"
305					if (checklock('sanoid_pruning')) {
306						writelock('sanoid_pruning');
307						foreach my $snap( @prunesnaps ){
308							if ($args{'verbose'}) { print "INFO: pruning $snap ... \n"; }
309							if (!$args{'force-prune'} && iszfsbusy($path)) {
310								if ($args{'verbose'}) { print "INFO: deferring pruning of $snap - $path is currently in zfs send or receive.\n"; }
311							} else {
312								if (! $args{'readonly'}) {
313									if (system($zfs, "destroy", $snap) == 0) {
314										$pruned{$snap} = 1;
315										my $dataset = (split '@', $snap)[0];
316										my $snapname = (split '@', $snap)[1];
317										if ($config{$dataset}{'pruning_script'}) {
318											$ENV{'SANOID_TARGET'} = $dataset;
319											$ENV{'SANOID_SNAPNAME'} = $snapname;
320											if ($args{'verbose'}) { print "executing pruning_script '".$config{$dataset}{'pruning_script'}."' on dataset '$dataset'\n"; }
321											my $ret = runscript('pruning_script',$dataset);
322
323											delete $ENV{'SANOID_TARGET'};
324											delete $ENV{'SANOID_SNAPNAME'};
325										}
326									} else {
327										warn "could not remove $snap : $?";
328									}
329								}
330							}
331						}
332						removelock('sanoid_pruning');
333						removecachedsnapshots(0);
334					} else {
335						if ($args{'verbose'}) { print "INFO: deferring snapshot pruning - valid pruning lock held by other sanoid process.\n"; }
336					}
337				}
338			}
339		}
340	}
341
342	# if there were any deferred cache updates,
343	# do them now and wait if necessary
344	removecachedsnapshots(1);
345} # end prune_snapshots
346
347
348####################################################################################
349####################################################################################
350####################################################################################
351
352sub take_snapshots {
353
354	my ($config, $snaps, $snapsbytype, $snapsbypath) = @_;
355
356	my %datestamp = get_date();
357	my $forcecacheupdate = 0;
358
359	my @newsnaps;
360
361	# get utc timestamp of the current day for DST check
362	my $daystartUtc = timelocal(0, 0, 0, $datestamp{'mday'}, ($datestamp{'mon'}-1), $datestamp{'year'});
363	my ($isdst) = (localtime($daystartUtc))[8];
364	my $dstOffset = 0;
365
366	if ($isdst ne $datestamp{'isdst'}) {
367		# current dst is different then at the beginning og the day
368		if ($isdst) {
369			# DST ended in the current day
370			$dstOffset = 60*60;
371		}
372	}
373
374	if ($args{'verbose'}) { print "INFO: taking snapshots...\n"; }
375	foreach my $section (keys %config) {
376		if ($section =~ /^template/) { next; }
377		if (! $config{$section}{'autosnap'}) { next; }
378		if ($config{$section}{'process_children_only'}) { next; }
379
380		my $path = $config{$section}{'path'};
381                my @types = ('yearly','monthly','weekly','daily','hourly','frequently');
382
383                foreach my $type (@types) {
384			if ($config{$section}{$type} > 0) {
385
386				my $newestage; # in seconds
387				if (defined $snapsbytype{$path}{$type}{'newest'}) {
388					$newestage = $snapsbytype{$path}{$type}{'newest'};
389				} else{
390					$newestage = 9999999999999999;
391				}
392
393				# for use with localtime: @preferredtime will be most recent preferred snapshot time in ($sec,$min,$hour,$mon-1,$year) format
394				my @preferredtime;
395				my $lastpreferred;
396
397				# to avoid duplicates with DST
398				my $handleDst = 0;
399
400				if ($type eq 'frequently')     {
401					my $frequentslice = int($datestamp{'min'} / $config{$section}{'frequent_period'});
402
403					push @preferredtime,0; # try to hit 0 seconds
404					push @preferredtime,$frequentslice * $config{$section}{'frequent_period'};
405					push @preferredtime,$datestamp{'hour'};
406					push @preferredtime,$datestamp{'mday'};
407					push @preferredtime,($datestamp{'mon'}-1); # january is month 0
408					push @preferredtime,$datestamp{'year'};
409					$lastpreferred = timelocal(@preferredtime);
410					if ($lastpreferred > time()) { $lastpreferred -= 60 * $config{$section}{'frequent_period'}; } # preferred time is later this frequent period - so look at last frequent period
411				} elsif ($type eq 'hourly')     {
412					push @preferredtime,0; # try to hit 0 seconds
413					push @preferredtime,$config{$section}{'hourly_min'};
414					push @preferredtime,$datestamp{'hour'};
415					push @preferredtime,$datestamp{'mday'};
416					push @preferredtime,($datestamp{'mon'}-1); # january is month 0
417					push @preferredtime,$datestamp{'year'};
418					$lastpreferred = timelocal(@preferredtime);
419
420					if ($dstOffset ne 0) {
421						# timelocal doesn't take DST into account
422						$lastpreferred += $dstOffset;
423						# DST ended, avoid duplicates
424						$handleDst = 1;
425					}
426					if ($lastpreferred > time()) { $lastpreferred -= 60*60; } # preferred time is later this hour - so look at last hour's
427				} elsif ($type eq 'daily')   {
428					push @preferredtime,0; # try to hit 0 seconds
429					push @preferredtime,$config{$section}{'daily_min'};
430					push @preferredtime,$config{$section}{'daily_hour'};
431					push @preferredtime,$datestamp{'mday'};
432					push @preferredtime,($datestamp{'mon'}-1); # january is month 0
433					push @preferredtime,$datestamp{'year'};
434					$lastpreferred = timelocal(@preferredtime);
435
436					# timelocal doesn't take DST into account
437					$lastpreferred += $dstOffset;
438
439					# check if the planned time has different DST flag than the current
440					my ($isdst) = (localtime($lastpreferred))[8];
441					if ($isdst ne $datestamp{'isdst'}) {
442						if (!$isdst) {
443							# correct DST difference
444							$lastpreferred -= 60*60;
445						}
446					}
447
448					if ($lastpreferred > time()) {
449						$lastpreferred -= 60*60*24;
450
451						if ($dstOffset ne 0) {
452							# because we are going back one day
453							# the DST difference has to be accounted
454							# for in reverse now
455							$lastpreferred -= 2*$dstOffset;
456						}
457					} # preferred time is later today - so look at yesterday's
458				} elsif ($type eq 'weekly')   {
459					# calculate offset in seconds for the desired weekday
460					my $offset = 0;
461					if ($config{$section}{'weekly_wday'} < $datestamp{'wday'}) {
462						$offset += 7;
463					}
464					$offset += $config{$section}{'weekly_wday'} - $datestamp{'wday'};
465					$offset *= 60*60*24; # full day
466
467					push @preferredtime,0; # try to hit 0 seconds
468					push @preferredtime,$config{$section}{'weekly_min'};
469					push @preferredtime,$config{$section}{'weekly_hour'};
470					push @preferredtime,$datestamp{'mday'};
471					push @preferredtime,($datestamp{'mon'}-1); # january is month 0
472					push @preferredtime,$datestamp{'year'};
473					$lastpreferred = timelocal(@preferredtime);
474					$lastpreferred += $offset;
475					if ($lastpreferred > time()) { $lastpreferred -= 60*60*24*7; } # preferred time is later this week - so look at last week's
476				} elsif ($type eq 'monthly') {
477					push @preferredtime,0; # try to hit 0 seconds
478					push @preferredtime,$config{$section}{'monthly_min'};
479					push @preferredtime,$config{$section}{'monthly_hour'};
480					push @preferredtime,$config{$section}{'monthly_mday'};
481					push @preferredtime,($datestamp{'mon'}-1); # january is month 0
482					push @preferredtime,$datestamp{'year'};
483					$lastpreferred = timelocal(@preferredtime);
484					if ($lastpreferred > time()) { $lastpreferred -= 60*60*24*31; } # preferred time is later this month - so look at last month's
485				} elsif ($type eq 'yearly')  {
486					push @preferredtime,0; # try to hit 0 seconds
487					push @preferredtime,$config{$section}{'yearly_min'};
488					push @preferredtime,$config{$section}{'yearly_hour'};
489					push @preferredtime,$config{$section}{'yearly_mday'};
490					push @preferredtime,($config{$section}{'yearly_mon'}-1); # january is month 0
491					push @preferredtime,$datestamp{'year'};
492					$lastpreferred = timelocal(@preferredtime);
493					if ($lastpreferred > time()) { $lastpreferred -= 60*60*24*31*365.25; } # preferred time is later this year - so look at last year
494				} else {
495					warn "WARN: unknown interval type $type in config!";
496					next;
497				}
498
499				# reconstruct our human-formatted most recent preferred snapshot time into an epoch time, to compare with the epoch of our most recent snapshot
500				my $maxage = time()-$lastpreferred;
501
502				if ( $newestage > $maxage ) {
503					# update to most current possible datestamp
504					%datestamp = get_date();
505					# print "we should have had a $type snapshot of $path $maxage seconds ago; most recent is $newestage seconds old.\n";
506
507					my $flags = "";
508					# use zfs (atomic) recursion if specified in config
509					if ($config{$section}{'zfs_recursion'}) {
510						$flags .= "r";
511					}
512					if ($handleDst) {
513						$flags .= "d";
514					}
515
516					if ($flags ne "") {
517						push(@newsnaps, "$path\@autosnap_$datestamp{'sortable'}_$type\@$flags");
518					} else {
519						push(@newsnaps, "$path\@autosnap_$datestamp{'sortable'}_$type");
520					}
521				}
522			}
523		}
524	}
525
526	if ( (scalar(@newsnaps)) > 0) {
527		foreach my $snap ( @newsnaps ) {
528			my $extraMessage = "";
529			my @split = split '@', $snap, -1;
530			my $recursiveFlag = 0;
531			my $dstHandling = 0;
532			if (scalar(@split) == 3) {
533				my $flags = $split[2];
534				if (index($flags, "r") != -1) {
535					$recursiveFlag = 1;
536					$extraMessage = " (zfs recursive)";
537					chop $snap;
538				}
539				if (index($flags, "d") != -1) {
540					$dstHandling = 1;
541					chop $snap;
542				}
543				chop $snap;
544			}
545			my $dataset = $split[0];
546			my $snapname = $split[1];
547			my $presnapshotfailure = 0;
548			my $ret = 0;
549			if ($config{$dataset}{'pre_snapshot_script'}) {
550				$ENV{'SANOID_TARGET'} = $dataset;
551				$ENV{'SANOID_SNAPNAME'} = $snapname;
552				if ($args{'verbose'}) { print "executing pre_snapshot_script '".$config{$dataset}{'pre_snapshot_script'}."' on dataset '$dataset'\n"; }
553
554				if (!$args{'readonly'}) {
555					$ret = runscript('pre_snapshot_script',$dataset);
556				}
557
558				delete $ENV{'SANOID_TARGET'};
559				delete $ENV{'SANOID_SNAPNAME'};
560
561				if ($ret != 0) {
562					# warning was already thrown by runscript function
563					$config{$dataset}{'no_inconsistent_snapshot'} and next;
564					$presnapshotfailure = 1;
565				}
566			}
567			if ($args{'verbose'}) { print "taking snapshot $snap$extraMessage\n"; }
568			if (!$args{'readonly'}) {
569				my $stderr;
570				my $exit;
571				($stderr, $exit) = tee_stderr {
572					if ($recursiveFlag) {
573						system($zfs, "snapshot", "-r", "$snap");
574					} else {
575						system($zfs, "snapshot", "$snap");
576					}
577				};
578
579				$exit == 0 or do {
580					if ($dstHandling) {
581						if ($stderr =~ /already exists/) {
582							$exit = 0;
583							$snap =~ s/_([a-z]+)$/dst_$1/g;
584							if ($args{'verbose'}) { print "taking dst snapshot $snap$extraMessage\n"; }
585							if ($recursiveFlag) {
586								system($zfs, "snapshot", "-r", "$snap") == 0
587									or warn "CRITICAL ERROR: $zfs snapshot -r $snap failed, $?";
588							} else {
589								system($zfs, "snapshot", "$snap") == 0
590									or warn "CRITICAL ERROR: $zfs snapshot $snap failed, $?";
591							}
592						}
593					}
594				};
595
596				$exit == 0 or do {
597					if ($recursiveFlag) {
598						warn "CRITICAL ERROR: $zfs snapshot -r $snap failed, $?";
599					} else {
600						warn "CRITICAL ERROR: $zfs snapshot $snap failed, $?";
601					}
602				};
603			}
604			if ($config{$dataset}{'post_snapshot_script'}) {
605				if (!$presnapshotfailure or $config{$dataset}{'force_post_snapshot_script'}) {
606					$ENV{'SANOID_TARGET'} = $dataset;
607					$ENV{'SANOID_SNAPNAME'} = $snapname;
608					if ($args{'verbose'}) { print "executing post_snapshot_script '".$config{$dataset}{'post_snapshot_script'}."' on dataset '$dataset'\n"; }
609
610					if (!$args{'readonly'}) {
611						runscript('post_snapshot_script',$dataset);
612					}
613
614					delete $ENV{'SANOID_TARGET'};
615					delete $ENV{'SANOID_SNAPNAME'};
616				}
617			}
618		}
619		$forcecacheupdate = 1;
620		%snaps = getsnaps(%config,$cacheTTL,$forcecacheupdate);
621	}
622}
623
624####################################################################################
625####################################################################################
626####################################################################################
627
628sub blabber {
629
630	my ($config, $snaps, $snapsbytype, $snapsbypath) = @_;
631
632	$Data::Dumper::Sortkeys = 1;
633	print "****** CONFIGS ******\n";
634	print Dumper(\%config);
635	#print "****** SNAPSHOTS ******\n";
636	#print Dumper(\%snaps);
637	#print "****** SNAPSBYTYPE ******\n";
638	#print Dumper(\%snapsbytype);
639	#print "****** SNAPSBYPATH ******\n";
640	#print Dumper(\%snapsbypath);
641
642	print "\n";
643
644	foreach my $section (keys %config) {
645		my $path = $config{$section}{'path'};
646		print "Filesystem $path has:\n";
647		print "     $snapsbypath{$path}{'numsnaps'} total snapshots ";
648		if ($snapsbypath{$path}{'numsnaps'} == 0) {
649			print "(no current snapshots)"
650		} else {
651			print "(newest: ";
652			my $newest = sprintf("%.1f",$snapsbypath{$path}{'newest'} / 60 / 60);
653			print "$newest hours old)\n";
654
655			foreach my $type (keys %{ $snapsbytype{$path} }){
656				print "          $snapsbytype{$path}{$type}{'numsnaps'} $type\n";
657				print "              desired: $config{$section}{$type}\n";
658				print "              newest: ";
659				my $newest = sprintf("%.1f",($snapsbytype{$path}{$type}{'newest'} / 60 / 60));
660				print "$newest hours old, named $snapsbytype{$path}{$type}{'newestname'}\n";
661			}
662		}
663		print "\n\n";
664	}
665
666} # end blabber
667
668
669####################################################################################
670####################################################################################
671####################################################################################
672
673
674sub getsnapsbytype {
675
676	my ($config, $snaps) = @_;
677	my %snapsbytype;
678
679	# iterate through each module section - each section is a single ZFS path
680	foreach my $section (keys %config) {
681		my $path = $config{$section}{'path'};
682
683		my %rawsnaps;
684		foreach my $name (keys %{ $snaps{$path} }){
685			my $type = $snaps{$path}{$name}{'type'};
686			$rawsnaps{$type}{$name} = $snaps{$path}{$name}{'ctime'}
687		}
688
689		# iterate through snapshots of each type, ordered by creation time of each snapshot within that type
690		foreach my $type (keys %rawsnaps) {
691			$snapsbytype{$path}{$type}{'numsnaps'} = scalar (keys %{ $rawsnaps{$type} });
692			my @sortedsnaps;
693			foreach my $name (
694				sort { $rawsnaps{$type}{$a} <=> $rawsnaps{$type}{$b} } keys %{ $rawsnaps{$type} }
695				) {
696				push @sortedsnaps, $name;
697				$snapsbytype{$path}{$type}{'newest'} = (time-$snaps{$path}{$name}{'ctime'});
698				$snapsbytype{$path}{$type}{'newestname'} = $name;
699			}
700			$snapsbytype{$path}{$type}{'sorted'} = join ('|',@sortedsnaps);
701		}
702	}
703
704	return %snapsbytype;
705
706} # end getsnapsbytype
707
708
709####################################################################################
710####################################################################################
711####################################################################################
712
713
714sub getsnapsbypath {
715
716	my ($config,$snaps) = @_;
717	my %snapsbypath;
718
719	# iterate through each module section - each section is a single ZFS path
720	foreach my $section (keys %config) {
721		my $path = $config{$section}{'path'};
722		$snapsbypath{$path}{'numsnaps'} = scalar (keys %{ $snaps{$path} });
723
724		# iterate through snapshots of each type, ordered by creation time of each snapshot within that type
725		my %rawsnaps;
726		foreach my $snapname ( keys %{ $snaps{$path} } ) {
727			$rawsnaps{$path}{$snapname} = $snaps{$path}{$snapname}{'ctime'};
728		}
729		my @sortedsnaps;
730		foreach my $snapname (
731				sort { $rawsnaps{$path}{$a} <=> $rawsnaps{$path}{$b} } keys %{ $rawsnaps{$path} }
732				) {
733				push @sortedsnaps, $snapname;
734				$snapsbypath{$path}{'newest'} = (time-$snaps{$path}{$snapname}{'ctime'});
735		}
736		my $sortedsnaps = join ('|',@sortedsnaps);
737		$snapsbypath{$path}{'sorted'} = $sortedsnaps;
738	}
739
740	return %snapsbypath;
741
742} # end getsnapsbypath
743
744
745
746
747####################################################################################
748####################################################################################
749####################################################################################
750
751sub getsnaps {
752
753	my ($config, $cacheTTL, $forcecacheupdate) = @_;
754
755	my @rawsnaps;
756
757	my ($dev, $ino, $mode, $nlink, $uid, $gid, $rdev, $size, $atime, $mtime, $ctime, $blksize, $blocks) = stat($cache);
758
759	if ( $forcecacheupdate || ! -f $cache || (time() - $mtime) > $cacheTTL ) {
760		if (checklock('sanoid_cacheupdate')) {
761			writelock('sanoid_cacheupdate');
762			if ($args{'verbose'}) {
763				if ($args{'force-update'}) {
764					print "INFO: cache forcibly expired - updating from zfs list.\n";
765				} else {
766					print "INFO: cache expired - updating from zfs list.\n";
767				}
768			}
769			open FH, "$zfs get -Hrpt snapshot creation |";
770			@rawsnaps = <FH>;
771			close FH;
772
773			open FH, "> $cache" or die 'Could not write to $cache!\n';
774			print FH @rawsnaps;
775			close FH;
776			removelock('sanoid_cacheupdate');
777		} else {
778			if ($args{'verbose'}) { print "INFO: deferring cache update - valid cache update lock held by another sanoid process.\n"; }
779			open FH, "< $cache";
780			@rawsnaps = <FH>;
781			close FH;
782		}
783	} else {
784		# if ($args{'debug'}) { print "DEBUG: cache not expired (" . (time() - $mtime) . " seconds old with TTL of $cacheTTL): pulling snapshot list from cache.\n"; }
785		open FH, "< $cache";
786		@rawsnaps = <FH>;
787		close FH;
788	}
789
790	foreach my $snap (@rawsnaps) {
791		my ($fs,$snapname,$snapdate) = ($snap =~ m/(.*)\@(.*ly)\t*creation\t*(\d*)/);
792
793		# avoid pissing off use warnings
794		if (defined $snapname) {
795			my ($snaptype) = ($snapname =~ m/.*_(\w*ly)/);
796			if ($snapname =~ /^autosnap/) {
797				$snaps{$fs}{$snapname}{'ctime'}=$snapdate;
798				$snaps{$fs}{$snapname}{'type'}=$snaptype;
799			}
800		}
801	}
802
803	return %snaps;
804}
805
806####################################################################################
807####################################################################################
808####################################################################################
809
810sub init {
811	my ($conf_file, $default_conf_file) = @_;
812	my %config;
813
814	unless (-e $default_conf_file ) { die "FATAL: cannot load $default_conf_file - please restore a clean copy, this is not a user-editable file!"; }
815	unless (-e $conf_file ) { die "FATAL: cannot load $conf_file - please create a valid local config file before running sanoid!"; }
816
817	tie my %defaults, 'Config::IniFiles', ( -file => $default_conf_file ) or die "FATAL: cannot load $default_conf_file - please restore a clean copy, this is not a user-editable file!";
818	tie my %ini, 'Config::IniFiles', ( -file => $conf_file ) or die "FATAL: cannot load $conf_file - please create a valid local config file before running sanoid!";
819
820	# we'll use these later to normalize potentially true and false values on any toggle keys
821	my @toggles = ('autosnap','autoprune','monitor_dont_warn','monitor_dont_crit','monitor','recursive','process_children_only','skip_children','no_inconsistent_snapshot','force_post_snapshot_script');
822	# recursive is defined as toggle but can also have the special value "zfs", it is kept to be backward compatible
823
824	my @istrue=(1,"true","True","TRUE","yes","Yes","YES","on","On","ON");
825	my @isfalse=(0,"false","False","FALSE","no","No","NO","off","Off","OFF");
826
827	# check if default configuration file is up to date
828	my $defaults_version = 1;
829	if (defined $defaults{'version'}{'version'}) {
830		$defaults_version = $defaults{'version'}{'version'};
831		delete $defaults{'version'};
832	}
833
834	if ($defaults_version < $MINIMUM_DEFAULTS_VERSION) {
835		die "FATAL: you're using sanoid.defaults.conf v$defaults_version, this version of sanoid requires a minimum sanoid.defaults.conf v$MINIMUM_DEFAULTS_VERSION";
836	}
837
838	foreach my $section (keys %ini) {
839
840		# first up - die with honor if unknown parameters are set in any modules or templates by the user.
841		foreach my $key (keys %{$ini{$section}}) {
842			if (! defined ($defaults{'template_default'}{$key})) {
843				die "FATAL ERROR: I don't understand the setting $key you've set in \[$section\] in $conf_file.\n";
844			}
845		}
846
847		if ($section =~ /^template_/) { next; } # don't process templates directly
848
849		# only set defaults on sections that haven't already been initialized - this allows us to override values
850		# for sections directly when they've already been defined recursively, without starting them over from scratch.
851		if (! defined ($config{$section}{'initialized'})) {
852			if ($args{'debug'}) { print "DEBUG: initializing \$config\{$section\} with default values from $default_conf_file.\n"; }
853			# set default values from %defaults, which can then be overriden by template
854			# and/or local settings within the module.
855			foreach my $key (keys %{$defaults{'template_default'}}) {
856				if (! ($key =~ /template|recursive|children_only/)) {
857					$config{$section}{$key} = $defaults{'template_default'}{$key};
858				}
859			}
860
861			# override with values from user-defined default template, if any
862
863			foreach my $key (keys %{$ini{'template_default'}}) {
864				if ($key =~ /template|recursive/) {
865					warn "ignored key '$key' from user-defined default template.\n";
866					next;
867				}
868				if ($args{'debug'}) { print "DEBUG: overriding $key on $section with value from user-defined default template.\n"; }
869				$config{$section}{$key} = $ini{'template_default'}{$key};
870			}
871		}
872
873		# override with values from user-defined templates applied to this module,
874		# in the order they were specified (ie use_template = default,production,mytemplate)
875		if (defined $ini{$section}{'use_template'}) {
876			my @templates = split (' *, *',$ini{$section}{'use_template'});
877			foreach my $rawtemplate (@templates) {
878				# strip trailing whitespace
879				$rawtemplate =~ s/\s+$//g;
880
881				my $template = 'template_'.$rawtemplate;
882				foreach my $key (keys %{$ini{$template}}) {
883					if ($key =~ /template|recursive/) {
884						warn "ignored key '$key' from '$rawtemplate' template.\n";
885						next;
886					}
887					if ($args{'debug'}) { print "DEBUG: overriding $key on $section with value from user-defined template $template.\n"; }
888					$config{$section}{$key} = $ini{$template}{$key};
889				}
890			}
891		}
892
893		# override with any locally set values in the module itself
894		foreach my $key (keys %{$ini{$section}} ) {
895			if (! ($key =~ /template|recursive|skip_children/)) {
896				if ($args{'debug'}) { print "DEBUG: overriding $key on $section with value directly set in module.\n"; }
897				$config{$section}{$key} = $ini{$section}{$key};
898			}
899		}
900
901		# make sure that true values are true and false values are false for any toggled values
902		foreach my $toggle(@toggles) {
903			foreach my $true (@istrue) {
904				if (defined $config{$section}{$toggle} && $config{$section}{$toggle} eq $true) { $config{$section}{$toggle} = 1; }
905			}
906			foreach my $false (@isfalse) {
907				if (defined $config{$section}{$toggle} && $config{$section}{$toggle} eq $false) { $config{$section}{$toggle} = 0; }
908			}
909		}
910
911		# section path is the section name, unless section path has been explicitly defined
912		if (defined ($ini{$section}{'path'})) {
913			$config{$section}{'path'} = $ini{$section}{'path'};
914		} else {
915			$config{$section}{'path'} = $section;
916		}
917
918		# how 'bout some recursion? =)
919		if ($config{$section}{'zfs_recursion'} && $config{$section}{'zfs_recursion'} == 1 && $config{$section}{'autosnap'} == 1) {
920			warn "ignored autosnap configuration for '$section' because it's part of a zfs recursion.\n";
921			$config{$section}{'autosnap'} = 0;
922		}
923
924		my $recursive = $ini{$section}{'recursive'} && grep( /^$ini{$section}{'recursive'}$/, @istrue );
925		my $zfsRecursive = $ini{$section}{'recursive'} && $ini{$section}{'recursive'} =~ /zfs/i;
926		my $skipChildren = $ini{$section}{'skip_children'} && grep( /^$ini{$section}{'skip_children'}$/, @istrue );
927		my @datasets;
928		if ($zfsRecursive || $recursive || $skipChildren) {
929			if ($zfsRecursive) {
930				$config{$section}{'zfs_recursion'} = 1;
931			}
932
933			@datasets = getchilddatasets($config{$section}{'path'});
934			DATASETS: foreach my $dataset(@datasets) {
935				chomp $dataset;
936
937				if ($zfsRecursive) {
938					# don't try to take the snapshot ourself, recursive zfs snapshot will take care of that
939					$config{$dataset}{'autosnap'} = 0;
940
941					foreach my $key (keys %{$config{$section}} ) {
942						if (! ($key =~ /template|recursive|children_only|autosnap/)) {
943							if ($args{'debug'}) { print "DEBUG: recursively setting $key from $section to $dataset.\n"; }
944							$config{$dataset}{$key} = $config{$section}{$key};
945						}
946					}
947				} else {
948					if ($skipChildren) {
949						if ($args{'debug'}) { print "DEBUG: ignoring $dataset.\n"; }
950						delete $config{$dataset};
951						next DATASETS;
952					}
953
954					foreach my $key (keys %{$config{$section}} ) {
955						if (! ($key =~ /template|recursive|children_only/)) {
956							if ($args{'debug'}) { print "DEBUG: recursively setting $key from $section to $dataset.\n"; }
957							$config{$dataset}{$key} = $config{$section}{$key};
958						}
959					}
960				}
961
962				$config{$dataset}{'path'} = $dataset;
963				$config{$dataset}{'initialized'} = 1;
964			}
965		}
966
967
968
969	}
970
971	return %config;
972} # end sub init
973
974####################################################################################
975####################################################################################
976####################################################################################
977
978sub get_date {
979	my %datestamp;
980	($datestamp{'sec'},$datestamp{'min'},$datestamp{'hour'},$datestamp{'mday'},$datestamp{'mon'},$datestamp{'year'},$datestamp{'wday'},$datestamp{'yday'},$datestamp{'isdst'}) = localtime(time);
981	$datestamp{'year'} += 1900;
982	$datestamp{'unix_time'} = (((((((($datestamp{'year'} - 1971) * 365) + $datestamp{'yday'}) * 24) + $datestamp{'hour'}) * 60) + $datestamp{'min'}) * 60) + $datestamp{'sec'};
983	$datestamp{'sec'} = sprintf ("%02u", $datestamp{'sec'});
984	$datestamp{'min'} = sprintf ("%02u", $datestamp{'min'});
985	$datestamp{'hour'} = sprintf ("%02u", $datestamp{'hour'});
986	$datestamp{'mday'} = sprintf ("%02u", $datestamp{'mday'});
987	$datestamp{'mon'} = sprintf ("%02u", ($datestamp{'mon'} + 1));
988	$datestamp{'noseconds'} = "$datestamp{'year'}-$datestamp{'mon'}-$datestamp{'mday'}_$datestamp{'hour'}:$datestamp{'min'}";
989	$datestamp{'sortable'} = "$datestamp{'noseconds'}:$datestamp{'sec'}";
990	return %datestamp;
991}
992
993####################################################################################
994####################################################################################
995####################################################################################
996
997
998sub displaytime {
999        # take a time in seconds, return it in human readable form
1000        my ($elapsed) = @_;
1001
1002        my $days = int ($elapsed / 60 / 60 / 24);
1003        $elapsed -= $days * 60 * 60 * 24;
1004        my $hours = int ($elapsed / 60 / 60);
1005        $elapsed -= $hours * 60 * 60;
1006        my $minutes = int ($elapsed / 60);
1007        $elapsed -= $minutes * 60;
1008        my $seconds = int($elapsed);
1009        my $humanreadable;
1010        if ($days) { $humanreadable .= " $days" . 'd'; }
1011        if ($hours || $days) { $humanreadable .= " $hours" . 'h'; }
1012        if ($minutes || $hours || $days) { $humanreadable .= " $minutes" . 'm'; }
1013        $humanreadable .= " $seconds" . 's';
1014        $humanreadable =~ s/^ //;
1015        return $humanreadable;
1016}
1017
1018
1019####################################################################################
1020####################################################################################
1021####################################################################################
1022
1023sub check_zpool() {
1024	# check_zfs Nagios plugin for monitoring Sun ZFS zpools
1025	# Copyright (c) 2007
1026	# original Written by Nathan Butcher
1027	# adapted for use within Sanoid framework by Jim Salter (2014)
1028	#
1029	# Released under the GNU Public License
1030	#
1031	# This program is free software; you can redistribute it and/or modify
1032	# it under the terms of the GNU General Public License as published by
1033	# the Free Software Foundation; either version 2 of the License, or
1034	# (at your option) any later version.
1035	#
1036	# This program is distributed in the hope that it will be useful,
1037	# but WITHOUT ANY WARRANTY; without even the implied warranty of
1038	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
1039	# GNU General Public License for more details.
1040	#
1041	# You should have received a copy of the GNU General Public License
1042	# along with this program; if not, write to the Free Software
1043	# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
1044
1045	# Version: 0.9.2
1046	# Date : 24th July 2007
1047	# This plugin has tested on FreeBSD 7.0-CURRENT and Solaris 10
1048	# With a bit of fondling, it could be expanded to recognize other OSes in
1049	# future (e.g. if FUSE Linux gets off the ground)
1050
1051	# Verbose levels:-
1052	# 1 - Only alert us of zpool health and size stats
1053	# 2 - ...also alert us of failed devices when things go bad
1054	# 3 - ...alert us of the status of all devices regardless of health
1055	#
1056	# Usage:   check_zfs <zpool> <verbose level 1-3>
1057	# Example: check_zfs zeepool 1
1058	#	ZPOOL zeedata : ONLINE {Size:3.97G Used:183K Avail:3.97G Cap:0%}
1059
1060
1061	my %ERRORS=('DEPENDENT'=>4,'UNKNOWN'=>3,'OK'=>0,'WARNING'=>1,'CRITICAL'=>2);
1062	my $state="UNKNOWN";
1063	my $msg="FAILURE";
1064
1065	my $pool=shift;
1066	my $verbose=shift;
1067
1068	my $size="";
1069	my $used="";
1070	my $avail="";
1071	my $cap="";
1072	my $health="";
1073	my $dmge="";
1074	my $dedup="";
1075
1076	if ($verbose < 1 || $verbose > 3) {
1077		print "Verbose levels range from 1-3\n";
1078		exit $ERRORS{$state};
1079	}
1080
1081	my $statcommand="$zpool list -o name,size,cap,health,free $pool";
1082
1083	if (! open STAT, "$statcommand|") {
1084		print ("$state '$statcommand' command returns no result! NOTE: This plugin needs OS support for ZFS, and execution with root privileges.\n");
1085		exit $ERRORS{$state};
1086	}
1087
1088	# chuck the header line
1089	my $header = <STAT>;
1090
1091	# find and parse the line with values for the pool
1092	while(<STAT>) {
1093		chomp;
1094		if (/^${pool}\s+/) {
1095			my @row = split (/ +/);
1096			my $name;
1097			($name, $size, $cap, $health, $avail) = @row;
1098		}
1099	}
1100
1101	# Tony: Debuging
1102	# print "Size: $size \t Used: $used \t Avai: $avail \t Cap: $cap \t Health: $health\n";
1103
1104	close(STAT);
1105
1106	## check for valid zpool list response from zpool
1107	if (! $health ) {
1108		$state = "CRITICAL";
1109		$msg = sprintf "ZPOOL {%s} does not exist and/or is not responding!\n", $pool;
1110		print $state, " ", $msg;
1111		exit ($ERRORS{$state});
1112	}
1113
1114	## determine health of zpool and subsequent error status
1115	if ($health eq "ONLINE" ) {
1116		$state = "OK";
1117	} else {
1118		if ($health eq "DEGRADED") {
1119			$state = "WARNING";
1120		} else {
1121			$state = "CRITICAL";
1122		}
1123	}
1124
1125	## get more detail on possible device failure
1126	## flag to detect section of zpool status involving our zpool
1127	my $poolfind=0;
1128
1129	$statcommand="$zpool status $pool";
1130	if (! open STAT, "$statcommand|") {
1131		$state = 'CRITICAL';
1132		print ("$state '$statcommand' command returns no result! NOTE: This plugin needs OS support for ZFS, and execution with root privileges.\n");
1133		exit $ERRORS{$state};
1134	}
1135
1136	## go through zfs status output to find zpool fses and devices
1137	while(<STAT>) {
1138		chomp;
1139
1140		if (/^\s${pool}/ && $poolfind==1) {
1141			$poolfind=2;
1142			next;
1143		} elsif ( $poolfind==1 ) {
1144			$poolfind=0;
1145		}
1146
1147		if (/NAME\s+STATE\s+READ\s+WRITE\s+CKSUM/) {
1148			$poolfind=1;
1149		}
1150
1151		if ( /^$/ ) {
1152			$poolfind=0;
1153		}
1154
1155		if ($poolfind == 2) {
1156
1157			## special cases pertaining to full verbose
1158			if (/^\sspares/) {
1159				next unless $verbose == 3;
1160				$dmge=$dmge . "[SPARES]:- ";
1161				next;
1162			}
1163			if (/^\s{5}spare\s/) {
1164				next unless $verbose == 3;
1165				my ($sta) = /spare\s+(\S+)/;
1166				$dmge=$dmge . "[SPARE:${sta}]:- ";
1167				next;
1168			}
1169			if (/^\s{5}replacing\s/) {
1170				next unless $verbose == 3;
1171				my $perc;
1172				my ($sta) = /^\s+\S+\s+(\S+)/;
1173				if (/%/) {
1174					($perc) = /([0-9]+%)/;
1175				} else {
1176					$perc = "working";
1177				}
1178				$dmge=$dmge . "[REPLACING:${sta} (${perc})]:- ";
1179				next;
1180			}
1181
1182			## other cases
1183			my ($dev, $sta, $read, $write, $cksum) = /^\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)/;
1184
1185			if (!defined($sta)) {
1186				# cache and logs are special and don't have a status
1187				next;
1188			}
1189
1190			## pool online, not degraded thanks to dead/corrupted disk
1191			if ($state eq "OK" && $sta eq "UNAVAIL") {
1192				$state="WARNING";
1193
1194				## switching to verbose level 2 to explain weirdness
1195				if ($verbose == 1) {
1196					$verbose =2;
1197				}
1198			}
1199
1200			## no display for verbose level 1
1201			next if ($verbose==1);
1202			## don't display working devices for verbose level 2
1203			if ($verbose==2 && ($state eq "OK" || $sta eq "ONLINE" || $sta eq "AVAIL" || $sta eq "INUSE")) {
1204				# check for io/checksum errors
1205
1206				my @vdeverr = ();
1207				if ($read != 0) { push @vdeverr, "read" };
1208				if ($write != 0) { push @vdeverr, "write" };
1209				if ($cksum != 0) { push @vdeverr, "cksum" };
1210
1211				if (scalar @vdeverr) {
1212					$dmge=$dmge . "(" . $dev . ":" . join(", ", @vdeverr) . " errors) ";
1213					if ($state eq "OK") { $state = "WARNING" };
1214				}
1215
1216				next;
1217			}
1218
1219			## show everything else
1220			if (/^\s{3}(\S+)/) {
1221				$dmge=$dmge . "<" . $dev . ":" . $sta . "> ";
1222			} elsif (/^\s{7}(\S+)/) {
1223				$dmge=$dmge . "(" . $dev . ":" . $sta . ") ";
1224			} else {
1225				$dmge=$dmge . $dev . ":" . $sta . " ";
1226			}
1227		}
1228	}
1229
1230	## calling all goats!
1231
1232	$msg = sprintf "ZPOOL %s : %s {Size:%s Free:%s Cap:%s} %s\n", $pool, $health, $size, $avail, $cap, $dmge;
1233	$msg = "$state $msg";
1234	return ($ERRORS{$state},$msg);
1235} # end check_zpool()
1236
1237sub check_capacity_limit {
1238	my $value = shift;
1239
1240	if (!defined($value) || $value !~ /^\d+\z/) {
1241		return undef;
1242	}
1243
1244	if ($value < 0 || $value > 100) {
1245		return undef;
1246	}
1247
1248	return 1
1249}
1250
1251sub check_zpool_capacity() {
1252	my %ERRORS=('DEPENDENT'=>4,'UNKNOWN'=>3,'OK'=>0,'WARNING'=>1,'CRITICAL'=>2);
1253	my $state="UNKNOWN";
1254	my $msg="FAILURE";
1255
1256	my $pool=shift;
1257	my $capacitylimitsref=shift;
1258	my %capacitylimits=%$capacitylimitsref;
1259
1260	my $statcommand="$zpool list -H -o cap $pool";
1261
1262	if (! open STAT, "$statcommand|") {
1263		print ("$state '$statcommand' command returns no result!\n");
1264		exit $ERRORS{$state};
1265	}
1266
1267	my $line = <STAT>;
1268	close(STAT);
1269
1270	chomp $line;
1271	my @row = split(/ +/, $line);
1272	my $cap=$row[0];
1273
1274	## check for valid capacity value
1275	if ($cap !~ m/^[0-9]{1,3}%$/ ) {
1276		$state = "CRITICAL";
1277		$msg = sprintf "ZPOOL {%s} does not exist and/or is not responding!\n", $pool;
1278		print $state, " ", $msg;
1279		exit ($ERRORS{$state});
1280	}
1281
1282	$state="OK";
1283
1284	# check capacity
1285	my $capn = $cap;
1286	$capn =~ s/\D//g;
1287
1288	if (defined($capacitylimits{"warn"})) {
1289		if ($capn >= $capacitylimits{"warn"}) {
1290			$state = "WARNING";
1291		}
1292	}
1293
1294	if (defined($capacitylimits{"crit"})) {
1295		if ($capn >= $capacitylimits{"crit"}) {
1296			$state = "CRITICAL";
1297		}
1298	}
1299
1300	$msg = sprintf "ZPOOL %s : %s\n", $pool, $cap;
1301	$msg = "$state $msg";
1302	return ($ERRORS{$state},$msg);
1303} # end check_zpool_capacity()
1304
1305sub check_prune_defer {
1306	my ($config, $section) = @_;
1307
1308	my $limit = $config{$section}{"prune_defer"};
1309
1310	if (!check_capacity_limit($limit)) {
1311		die "ERROR: invalid prune_defer limit!\n";
1312	}
1313
1314	if ($limit eq 0) {
1315		return 0;
1316	}
1317
1318	my @parts = split /\//, $section, 2;
1319	my $pool = $parts[0];
1320
1321	if (exists $capacitycache{$pool}) {
1322	} else {
1323		$capacitycache{$pool} = get_zpool_capacity($pool);
1324	}
1325
1326	if ($limit < $capacitycache{$pool}) {
1327		return 0;
1328	}
1329
1330	return 1;
1331}
1332
1333sub get_zpool_capacity {
1334	my $pool = shift;
1335
1336	my $statcommand="$zpool list -H -o cap $pool";
1337
1338	if (! open STAT, "$statcommand|") {
1339		die "ERROR: '$statcommand' command returns no result!\n";
1340	}
1341
1342	my $line = <STAT>;
1343	close(STAT);
1344
1345	chomp $line;
1346	my @row = split(/ +/, $line);
1347	my $cap=$row[0];
1348
1349	## check for valid capacity value
1350	if ($cap !~ m/^[0-9]{1,3}%$/ ) {
1351		die "ERROR: '$statcommand' command returned invalid capacity value ($cap)!\n";
1352	}
1353
1354	$cap =~ s/\D//g;
1355
1356	return $cap;
1357}
1358
1359######################################################################################################
1360######################################################################################################
1361######################################################################################################
1362######################################################################################################
1363######################################################################################################
1364
1365sub checklock {
1366	# take argument $lockname.
1367	#
1368	# read /var/run/$lockname.lock for a pid on first line and a mutex on second line.
1369	#
1370	# check process list to see if the pid from /var/run/$lockname.lock is still active with
1371	# the original mutex found in /var/run/$lockname.lock.
1372	#
1373	# return:
1374	#    0 if lock is present and valid for another process
1375	#    1 if no lock is present
1376	#    2 if lock is present, but we own the lock
1377	#
1378	# shorthand - any true return indicates we are clear to lock; a false return indicates
1379	#             that somebody else already has the lock and therefore we cannot.
1380	#
1381
1382	my $lockname = shift;
1383	my $lockfile = "/var/run/$lockname.lock";
1384
1385	if (! -e $lockfile) {
1386		# no lockfile
1387		return 1;
1388	}
1389	# make sure lockfile contains something
1390	if ( -z $lockfile) {
1391	        # zero size lockfile, something is wrong
1392	        die "ERROR: something is wrong! $lockfile is empty\n";
1393	}
1394
1395	# lockfile exists. read pid and mutex from it. see if it's our pid.  if not, see if
1396	# there's still a process running with that pid and with the same mutex.
1397
1398	open FH, "< $lockfile" or die "ERROR: unable to open $lockfile";
1399	my @lock = <FH>;
1400	close FH;
1401	# if we didn't get exactly 2 items from the lock file there is a problem
1402	if (scalar(@lock) != 2) {
1403	    die "ERROR: $lockfile is invalid.\n"
1404	}
1405
1406	my $lockmutex = pop(@lock);
1407	my $lockpid = pop(@lock);
1408
1409	chomp $lockmutex;
1410	chomp $lockpid;
1411
1412	if ($lockpid == $$) {
1413		# we own the lockfile. no need to check any further.
1414		return 2;
1415	}
1416	open PL, "$pscmd -p $lockpid -o args= |";
1417	my @processlist = <PL>;
1418	close PL;
1419
1420	my $checkmutex = pop(@processlist);
1421	chomp $checkmutex;
1422
1423	if ($checkmutex eq $lockmutex) {
1424		# lock exists, is valid, is not owned by us - return false
1425		return 0;
1426	} else {
1427		# lock is present but not valid - remove and return true
1428		unlink $lockfile;
1429		return 1;
1430	}
1431}
1432
1433sub removelock {
1434	# take argument $lockname.
1435	#
1436	# make sure /var/run/$lockname.lock actually belongs to me (contains my pid and mutex)
1437	# and remove it if it does, die if it doesn't.
1438
1439	my $lockname = shift;
1440	my $lockfile = "/var/run/$lockname.lock";
1441
1442	if (checklock($lockname) == 2) {
1443		unlink $lockfile;
1444		return;
1445	} elsif (checklock($lockname) == 1) {
1446		die "ERROR: No valid lockfile found - Did a rogue process or user update or delete it?\n";
1447	} else {
1448		die "ERROR: A valid lockfile exists but does not belong to me! I refuse to remove it.\n";
1449	}
1450}
1451
1452sub writelock {
1453	# take argument $lockname.
1454	#
1455	# write a lockfile to /var/run/$lockname.lock with first line
1456	# being my pid and second line being my mutex.
1457
1458	my $lockname = shift;
1459	my $lockfile = "/var/run/$lockname.lock";
1460
1461	# die honorably rather than overwriting a valid, existing lock
1462	if (! checklock($lockname)) {
1463		die "ERROR: Valid lock already exists - I refuse to overwrite it. Committing seppuku now.\n";
1464	}
1465
1466	my $pid = $$;
1467
1468	open PL, "$pscmd -p $$ -o args= |";
1469	my @processlist = <PL>;
1470	close PL;
1471
1472	my $mutex = pop(@processlist);
1473	chomp $mutex;
1474
1475	open FH, "> $lockfile";
1476	print FH "$pid\n";
1477	print FH "$mutex\n";
1478	close FH;
1479}
1480
1481sub iszfsbusy {
1482	# check to see if ZFS filesystem passed in as argument currently has a zfs send or zfs receive process referencing it.
1483	# return true if busy (currently being sent or received), return false if not.
1484
1485        my $fs = shift;
1486        # if (args{'debug'}) { print "DEBUG: checking to see if $fs on is already in zfs receive using $pscmd -Ao args= ...\n"; }
1487
1488        open PL, "$pscmd -Ao args= |";
1489        my @processes = <PL>;
1490        close PL;
1491
1492        foreach my $process (@processes) {
1493		# if ($args{'debug'}) { print "DEBUG: checking process $process...\n"; }
1494                if ($process =~ /zfs *(send|receive|recv).*$fs/) {
1495                        # there's already a zfs send/receive process for our target filesystem - return true
1496                        # if ($args{'debug'}) { print "DEBUG: process $process matches target $fs!\n"; }
1497                        return 1;
1498                }
1499        }
1500
1501        # no zfs receive processes for our target filesystem found - return false
1502        return 0;
1503}
1504
1505#######################################################################################################################3
1506#######################################################################################################################3
1507#######################################################################################################################3
1508
1509sub getchilddatasets {
1510	# for later, if we make sanoid itself support sudo use
1511	my $fs = shift;
1512	my $mysudocmd = '';
1513
1514	my $getchildrencmd = "$mysudocmd $zfs list -o name -t filesystem,volume -Hr $fs |";
1515	 if ($args{'debug'}) { print "DEBUG: getting list of child datasets on $fs using $getchildrencmd...\n"; }
1516	open FH, $getchildrencmd;
1517	my @children = <FH>;
1518	close FH;
1519
1520	# parent dataset is the first element
1521	shift @children;
1522
1523	return @children;
1524}
1525
1526#######################################################################################################################3
1527#######################################################################################################################3
1528#######################################################################################################################3
1529
1530sub removecachedsnapshots {
1531	my $wait = shift;
1532
1533	if (not %pruned) {
1534		return;
1535	}
1536
1537	my $unlocked = checklock('sanoid_cacheupdate');
1538
1539	if ($wait != 1 && not $unlocked) {
1540		if ($args{'verbose'}) { print "INFO: deferring cache update (snapshot removal) - valid cache update lock held by another sanoid process.\n"; }
1541		return;
1542	}
1543
1544	# wait until we can get a lock to do our cache changes
1545	while (not $unlocked) {
1546		if ($args{'verbose'}) { print "INFO: waiting for cache update lock held by another sanoid process.\n"; }
1547		sleep(10);
1548		$unlocked = checklock('sanoid_cacheupdate');
1549	}
1550
1551	writelock('sanoid_cacheupdate');
1552
1553	if ($args{'verbose'}) {
1554		print "INFO: removing destroyed snapshots from cache.\n";
1555	}
1556	open FH, "< $cache";
1557	my @rawsnaps = <FH>;
1558	close FH;
1559
1560	open FH, "> $cache" or die 'Could not write to $cache!\n';
1561	foreach my $snapline ( @rawsnaps ) {
1562		my @columns = split("\t", $snapline);
1563		my $snap = $columns[0];
1564		print FH $snapline unless ( exists($pruned{$snap}) );
1565	}
1566	close FH;
1567
1568	removelock('sanoid_cacheupdate');
1569	%snaps = getsnaps(\%config,$cacheTTL,$forcecacheupdate);
1570
1571	# clear hash
1572	undef %pruned;
1573}
1574
1575#######################################################################################################################3
1576#######################################################################################################################3
1577#######################################################################################################################3
1578
1579sub runscript {
1580	my $key=shift;
1581	my $dataset=shift;
1582
1583	my $timeout=$config{$dataset}{'script_timeout'};
1584
1585	my $ret;
1586	eval {
1587		if ($timeout gt 0) {
1588			local $SIG{ALRM} = sub { die "alarm\n" };
1589			alarm $timeout;
1590		}
1591		$ret = system($config{$dataset}{$key});
1592		alarm 0;
1593	};
1594	if ($@) {
1595		if ($@ eq "alarm\n") {
1596			warn "WARN: $key didn't finish in the allowed time!";
1597		} else {
1598			warn "CRITICAL ERROR: $@";
1599		}
1600		return -1;
1601	} else {
1602		if ($ret != 0) {
1603			warn "WARN: $key failed, $?";
1604		}
1605	}
1606
1607	return $ret;
1608}
1609
1610#######################################################################################################################3
1611#######################################################################################################################3
1612#######################################################################################################################3
1613
1614sub convertTimePeriod {
1615	my $value=shift;
1616	my $period=shift;
1617
1618	if ($value =~ /^\d+[yY]$/) {
1619		$period = 60*60*24*31*365;
1620		chop $value;
1621	} elsif ($value =~ /^\d+[wW]$/) {
1622		$period = 60*60*24*7;
1623		chop $value;
1624	} elsif ($value =~ /^\d+[dD]$/) {
1625		$period = 60*60*24;
1626		chop $value;
1627	} elsif ($value =~ /^\d+[hH]$/) {
1628		$period = 60*60;
1629		chop $value;
1630	} elsif ($value =~ /^\d+[mM]$/) {
1631		$period = 60;
1632		chop $value;
1633	} elsif ($value =~ /^\d+[sS]$/) {
1634		$period = 1;
1635		chop $value;
1636	} elsif ($value =~ /^\d+$/) {
1637		# no unit, provided fallback period is used
1638	} else {
1639		# invalid value, return smallest valid value as fallback
1640		# (will trigger a warning message for monitoring for sure)
1641		return 1;
1642	}
1643
1644	return $value * $period;
1645}
1646
1647__END__
1648
1649=head1 NAME
1650
1651sanoid - ZFS snapshot management and replication tool
1652
1653=head1 SYNOPSIS
1654
1655sanoid [options]
1656
1657Assumes --cron --verbose if no other arguments (other than configdir) are specified
1658
1659Options:
1660
1661  --configdir=DIR       Specify a directory to find config file sanoid.conf
1662
1663  --cron                Creates snapshots and purges expired snapshots
1664  --verbose             Prints out additional information during a sanoid run
1665  --readonly            Simulates creation/deletion of snapshots
1666  --quiet               Suppresses non-error output
1667  --force-update        Clears out sanoid's zfs snapshot cache
1668
1669  --monitor-health      Reports on zpool "health", in a Nagios compatible format
1670  --monitor-capacity    Reports on zpool capacity, in a Nagios compatible format
1671  --monitor-snapshots   Reports on snapshot "health", in a Nagios compatible format
1672  --take-snapshots      Creates snapshots as specified in sanoid.conf
1673  --prune-snapshots     Purges expired snapshots as specified in sanoid.conf
1674  --force-prune         Purges expired snapshots even if a send/recv is in progress
1675
1676  --help                Prints this helptext
1677  --version             Prints the version number
1678  --debug               Prints out a lot of additional information during a sanoid run
1679