1#!/usr/local/bin/perl 2 3# this software is licensed for use under the Free Software Foundation's GPL v3.0 license, as retrieved 4# from http://www.gnu.org/licenses/gpl-3.0.html on 2014-11-17. A copy should also be available in this 5# project's Git repository at https://github.com/jimsalterjrs/sanoid/blob/master/LICENSE. 6 7$::VERSION = '2.0.3'; 8my $MINIMUM_DEFAULTS_VERSION = 2; 9 10use strict; 11use warnings; 12use Config::IniFiles; # read samba-style conf file 13use Data::Dumper; # debugging - print contents of hash 14use File::Path; # for rmtree command in use_prune 15use Getopt::Long qw(:config auto_version auto_help); 16use Pod::Usage; # pod2usage 17use Time::Local; # to parse dates in reverse 18use Capture::Tiny ':all'; 19 20my %args = ("configdir" => "/usr/local/etc/sanoid"); 21GetOptions(\%args, "verbose", "debug", "cron", "readonly", "quiet", 22 "monitor-health", "force-update", "configdir=s", 23 "monitor-snapshots", "take-snapshots", "prune-snapshots", "force-prune", 24 "monitor-capacity" 25 ) or pod2usage(2); 26 27# If only config directory (or nothing) has been specified, default to --cron --verbose 28if (keys %args < 2) { 29 $args{'cron'} = 1; 30 $args{'verbose'} = 1; 31} 32 33my $pscmd = '/bin/ps'; 34 35my $zfs = '/sbin/zfs'; 36my $zpool = '/sbin/zpool'; 37 38my $conf_file = "$args{'configdir'}/sanoid.conf"; 39my $default_conf_file = "$args{'configdir'}/sanoid.defaults.conf"; 40 41# parse config file 42my %config = init($conf_file,$default_conf_file); 43 44# if we call getsnaps(%config,1) it will forcibly update the cache, TTL or no TTL 45my $forcecacheupdate = 0; 46my $cache = '/var/cache/sanoidsnapshots.txt'; 47my $cacheTTL = 900; # 15 minutes 48my %snaps = getsnaps( \%config, $cacheTTL, $forcecacheupdate ); 49my %pruned; 50my %capacitycache; 51 52my %snapsbytype = getsnapsbytype( \%config, \%snaps ); 53 54my %snapsbypath = getsnapsbypath( \%config, \%snaps ); 55 56# let's make it a little easier to be consistent passing these hashes in the same order to each sub 57my @params = ( \%config, \%snaps, \%snapsbytype, \%snapsbypath ); 58 59if ($args{'debug'}) { $args{'verbose'}=1; blabber (@params); } 60if ($args{'monitor-snapshots'}) { monitor_snapshots(@params); } 61if ($args{'monitor-health'}) { monitor_health(@params); } 62if ($args{'monitor-capacity'}) { monitor_capacity(@params); } 63if ($args{'force-update'}) { my $snaps = getsnaps( \%config, $cacheTTL, 1 ); } 64 65if ($args{'cron'}) { 66 if ($args{'quiet'}) { $args{'verbose'} = 0; } 67 take_snapshots (@params); 68 prune_snapshots (@params); 69} else { 70 if ($args{'take-snapshots'}) { take_snapshots (@params); } 71 if ($args{'prune-snapshots'}) { prune_snapshots (@params); } 72} 73 74exit 0; 75 76 77#################################################################################### 78#################################################################################### 79#################################################################################### 80 81sub monitor_health { 82 my ($config, $snaps, $snapsbytype, $snapsbypath) = @_; 83 my %pools; 84 my @messages; 85 my $errlevel=0; 86 87 foreach my $path (keys %{ $snapsbypath}) { 88 my @pool = split ('/',$path); 89 $pools{$pool[0]}=1; 90 } 91 92 foreach my $pool (keys %pools) { 93 my ($exitcode, $msg) = check_zpool($pool,2); 94 if ($exitcode > $errlevel) { $errlevel = $exitcode; } 95 chomp $msg; 96 push (@messages, $msg); 97 } 98 99 my @warninglevels = ('','*** WARNING *** ','*** CRITICAL *** '); 100 my $message = $warninglevels[$errlevel] . join (', ',@messages); 101 print "$message\n"; 102 exit $errlevel; 103 104} 105 106#################################################################################### 107#################################################################################### 108#################################################################################### 109 110sub monitor_snapshots { 111 112 # nagios plugin format: exit 0,1,2,3 for OK, WARN, CRITICAL, or ERROR. 113 114 # check_snapshot_date - test ZFS fs creation timestamp for recentness 115 # accepts arguments: $filesystem, $warn (in seconds elapsed), $crit (in seconds elapsed) 116 117 my ($config, $snaps, $snapsbytype, $snapsbypath) = @_; 118 my %datestamp = get_date(); 119 my $errorlevel = 0; 120 my $msg; 121 my @msgs; 122 my @paths; 123 124 foreach my $section (keys %config) { 125 if ($section =~ /^template/) { next; } 126 if (! $config{$section}{'monitor'}) { next; } 127 if ($config{$section}{'process_children_only'}) { next; } 128 129 my $path = $config{$section}{'path'}; 130 push @paths, $path; 131 132 my @types = ('yearly','monthly','weekly','daily','hourly','frequently'); 133 foreach my $type (@types) { 134 if ($config{$section}{$type} == 0) { next; } 135 136 my $smallerperiod = 0; 137 # we need to set the period length in seconds first 138 if ($type eq 'frequently') { $smallerperiod = 1; } 139 elsif ($type eq 'hourly') { $smallerperiod = 60; } 140 elsif ($type eq 'daily') { $smallerperiod = 60*60; } 141 elsif ($type eq 'weekly') { $smallerperiod = 60*60*24; } 142 elsif ($type eq 'monthly') { $smallerperiod = 60*60*24*7; } 143 elsif ($type eq 'yearly') { $smallerperiod = 60*60*24*31; } 144 145 my $typewarn = $type . '_warn'; 146 my $typecrit = $type . '_crit'; 147 my $warn = convertTimePeriod($config{$section}{$typewarn}, $smallerperiod); 148 my $crit = convertTimePeriod($config{$section}{$typecrit}, $smallerperiod); 149 my $elapsed = -1; 150 if (defined $snapsbytype{$path}{$type}{'newest'}) { 151 $elapsed = $snapsbytype{$path}{$type}{'newest'}; 152 } 153 my $dispelapsed = displaytime($elapsed); 154 my $dispwarn = displaytime($warn); 155 my $dispcrit = displaytime($crit); 156 if ( $elapsed > $crit || $elapsed == -1) { 157 if ($crit > 0) { 158 if (! $config{$section}{'monitor_dont_crit'}) { $errorlevel = 2; } 159 if ($elapsed == -1) { 160 push @msgs, "CRIT: $path has no $type snapshots at all!"; 161 } else { 162 push @msgs, "CRIT: $path\'s newest $type snapshot is $dispelapsed old (should be < $dispcrit)"; 163 } 164 } 165 } elsif ($elapsed > $warn) { 166 if ($warn > 0) { 167 if (! $config{$section}{'monitor_dont_warn'} && ($errorlevel < 2) ) { $errorlevel = 1; } 168 push @msgs, "WARN: $path\'s newest $type snapshot is $dispelapsed old (should be < $dispwarn)"; 169 } 170 } else { 171 # push @msgs .= "OK: $path\'s newest $type snapshot is $dispelapsed old \n"; 172 } 173 174 } 175 } 176 177 my @sorted_msgs = sort { lc($a) cmp lc($b) } @msgs; 178 my @sorted_paths = sort { lc($a) cmp lc($b) } @paths; 179 $msg = join (", ", @sorted_msgs); 180 my $paths = join (", ", @sorted_paths); 181 182 if ($msg eq '') { $msg = "OK: all monitored datasets \($paths\) have fresh snapshots"; } 183 184 print "$msg\n"; 185 exit $errorlevel; 186} 187 188 189#################################################################################### 190#################################################################################### 191#################################################################################### 192 193sub monitor_capacity { 194 my ($config, $snaps, $snapsbytype, $snapsbypath) = @_; 195 my %pools; 196 my @messages; 197 my $errlevel=0; 198 199 # build pool list with corresponding capacity limits 200 foreach my $section (keys %config) { 201 my @pool = split ('/',$section); 202 203 if (scalar @pool == 1 || !defined($pools{$pool[0]}) ) { 204 my %capacitylimits; 205 206 if (!check_capacity_limit($config{$section}{'capacity_warn'})) { 207 die "ERROR: invalid zpool capacity warning limit!\n"; 208 } 209 210 if ($config{$section}{'capacity_warn'} != 0) { 211 $capacitylimits{'warn'} = $config{$section}{'capacity_warn'}; 212 } 213 214 if (!check_capacity_limit($config{$section}{'capacity_crit'})) { 215 die "ERROR: invalid zpool capacity critical limit!\n"; 216 } 217 218 if ($config{$section}{'capacity_crit'} != 0) { 219 $capacitylimits{'crit'} = $config{$section}{'capacity_crit'}; 220 } 221 222 if (%capacitylimits) { 223 $pools{$pool[0]} = \%capacitylimits; 224 } 225 } 226 } 227 228 foreach my $pool (keys %pools) { 229 my $capacitylimitsref = $pools{$pool}; 230 231 my ($exitcode, $msg) = check_zpool_capacity($pool,\%$capacitylimitsref); 232 if ($exitcode > $errlevel) { $errlevel = $exitcode; } 233 chomp $msg; 234 push (@messages, $msg); 235 } 236 237 my @warninglevels = ('','*** WARNING *** ','*** CRITICAL *** '); 238 my $message = $warninglevels[$errlevel] . join (', ',@messages); 239 print "$message\n"; 240 exit $errlevel; 241} 242 243#################################################################################### 244#################################################################################### 245#################################################################################### 246 247 248sub prune_snapshots { 249 250 if ($args{'verbose'}) { print "INFO: pruning snapshots...\n"; } 251 my ($config, $snaps, $snapsbytype, $snapsbypath) = @_; 252 253 my %datestamp = get_date(); 254 my $forcecacheupdate = 0; 255 256 foreach my $section (keys %config) { 257 if ($section =~ /^template/) { next; } 258 if (! $config{$section}{'autoprune'}) { next; } 259 if ($config{$section}{'process_children_only'}) { next; } 260 261 my $path = $config{$section}{'path'}; 262 263 my $period = 0; 264 if (check_prune_defer($config, $section)) { 265 if ($args{'verbose'}) { print "INFO: deferring snapshot pruning ($section)...\n"; } 266 next; 267 } 268 269 foreach my $type (keys %{ $config{$section} }){ 270 unless ($type =~ /ly$/) { next; } 271 272 # we need to set the period length in seconds first 273 if ($type eq 'frequently') { $period = 60 * $config{$section}{'frequent_period'}; } 274 elsif ($type eq 'hourly') { $period = 60*60; } 275 elsif ($type eq 'daily') { $period = 60*60*24; } 276 elsif ($type eq 'weekly') { $period = 60*60*24*7; } 277 elsif ($type eq 'monthly') { $period = 60*60*24*31; } 278 elsif ($type eq 'yearly') { $period = 60*60*24*365.25; } 279 280 # avoid pissing off use warnings by not executing this block if no matching snaps exist 281 if (defined $snapsbytype{$path}{$type}{'sorted'}) { 282 my @sorted = split (/\|/,$snapsbytype{$path}{$type}{'sorted'}); 283 284 # if we say "daily=30" we really mean "don't keep any dailies more than 30 days old", etc 285 my $maxage = ( time() - $config{$section}{$type} * $period ); 286 # but if we say "daily=30" we ALSO mean "don't get rid of ANY dailies unless we have more than 30". 287 my $minsnapsthistype = $config{$section}{$type}; 288 289 # how many total snaps of this type do we currently have? 290 my $numsnapsthistype = scalar (@sorted); 291 292 my @prunesnaps; 293 foreach my $snap( @sorted ){ 294 # print "snap $path\@$snap has age $snaps{$path}{$snap}{'ctime'}, maxage is $maxage.\n"; 295 if ( ($snaps{$path}{$snap}{'ctime'} < $maxage) && ($numsnapsthistype > $minsnapsthistype) ) { 296 my $fullpath = $path . '@' . $snap; 297 push(@prunesnaps,$fullpath); 298 # we just got rid of a snap, so we now have one fewer, duh 299 $numsnapsthistype--; 300 } 301 } 302 303 if ((scalar @prunesnaps) > 0) { 304 # print "found some snaps to prune!\n" 305 if (checklock('sanoid_pruning')) { 306 writelock('sanoid_pruning'); 307 foreach my $snap( @prunesnaps ){ 308 if ($args{'verbose'}) { print "INFO: pruning $snap ... \n"; } 309 if (!$args{'force-prune'} && iszfsbusy($path)) { 310 if ($args{'verbose'}) { print "INFO: deferring pruning of $snap - $path is currently in zfs send or receive.\n"; } 311 } else { 312 if (! $args{'readonly'}) { 313 if (system($zfs, "destroy", $snap) == 0) { 314 $pruned{$snap} = 1; 315 my $dataset = (split '@', $snap)[0]; 316 my $snapname = (split '@', $snap)[1]; 317 if ($config{$dataset}{'pruning_script'}) { 318 $ENV{'SANOID_TARGET'} = $dataset; 319 $ENV{'SANOID_SNAPNAME'} = $snapname; 320 if ($args{'verbose'}) { print "executing pruning_script '".$config{$dataset}{'pruning_script'}."' on dataset '$dataset'\n"; } 321 my $ret = runscript('pruning_script',$dataset); 322 323 delete $ENV{'SANOID_TARGET'}; 324 delete $ENV{'SANOID_SNAPNAME'}; 325 } 326 } else { 327 warn "could not remove $snap : $?"; 328 } 329 } 330 } 331 } 332 removelock('sanoid_pruning'); 333 removecachedsnapshots(0); 334 } else { 335 if ($args{'verbose'}) { print "INFO: deferring snapshot pruning - valid pruning lock held by other sanoid process.\n"; } 336 } 337 } 338 } 339 } 340 } 341 342 # if there were any deferred cache updates, 343 # do them now and wait if necessary 344 removecachedsnapshots(1); 345} # end prune_snapshots 346 347 348#################################################################################### 349#################################################################################### 350#################################################################################### 351 352sub take_snapshots { 353 354 my ($config, $snaps, $snapsbytype, $snapsbypath) = @_; 355 356 my %datestamp = get_date(); 357 my $forcecacheupdate = 0; 358 359 my @newsnaps; 360 361 # get utc timestamp of the current day for DST check 362 my $daystartUtc = timelocal(0, 0, 0, $datestamp{'mday'}, ($datestamp{'mon'}-1), $datestamp{'year'}); 363 my ($isdst) = (localtime($daystartUtc))[8]; 364 my $dstOffset = 0; 365 366 if ($isdst ne $datestamp{'isdst'}) { 367 # current dst is different then at the beginning og the day 368 if ($isdst) { 369 # DST ended in the current day 370 $dstOffset = 60*60; 371 } 372 } 373 374 if ($args{'verbose'}) { print "INFO: taking snapshots...\n"; } 375 foreach my $section (keys %config) { 376 if ($section =~ /^template/) { next; } 377 if (! $config{$section}{'autosnap'}) { next; } 378 if ($config{$section}{'process_children_only'}) { next; } 379 380 my $path = $config{$section}{'path'}; 381 my @types = ('yearly','monthly','weekly','daily','hourly','frequently'); 382 383 foreach my $type (@types) { 384 if ($config{$section}{$type} > 0) { 385 386 my $newestage; # in seconds 387 if (defined $snapsbytype{$path}{$type}{'newest'}) { 388 $newestage = $snapsbytype{$path}{$type}{'newest'}; 389 } else{ 390 $newestage = 9999999999999999; 391 } 392 393 # for use with localtime: @preferredtime will be most recent preferred snapshot time in ($sec,$min,$hour,$mon-1,$year) format 394 my @preferredtime; 395 my $lastpreferred; 396 397 # to avoid duplicates with DST 398 my $handleDst = 0; 399 400 if ($type eq 'frequently') { 401 my $frequentslice = int($datestamp{'min'} / $config{$section}{'frequent_period'}); 402 403 push @preferredtime,0; # try to hit 0 seconds 404 push @preferredtime,$frequentslice * $config{$section}{'frequent_period'}; 405 push @preferredtime,$datestamp{'hour'}; 406 push @preferredtime,$datestamp{'mday'}; 407 push @preferredtime,($datestamp{'mon'}-1); # january is month 0 408 push @preferredtime,$datestamp{'year'}; 409 $lastpreferred = timelocal(@preferredtime); 410 if ($lastpreferred > time()) { $lastpreferred -= 60 * $config{$section}{'frequent_period'}; } # preferred time is later this frequent period - so look at last frequent period 411 } elsif ($type eq 'hourly') { 412 push @preferredtime,0; # try to hit 0 seconds 413 push @preferredtime,$config{$section}{'hourly_min'}; 414 push @preferredtime,$datestamp{'hour'}; 415 push @preferredtime,$datestamp{'mday'}; 416 push @preferredtime,($datestamp{'mon'}-1); # january is month 0 417 push @preferredtime,$datestamp{'year'}; 418 $lastpreferred = timelocal(@preferredtime); 419 420 if ($dstOffset ne 0) { 421 # timelocal doesn't take DST into account 422 $lastpreferred += $dstOffset; 423 # DST ended, avoid duplicates 424 $handleDst = 1; 425 } 426 if ($lastpreferred > time()) { $lastpreferred -= 60*60; } # preferred time is later this hour - so look at last hour's 427 } elsif ($type eq 'daily') { 428 push @preferredtime,0; # try to hit 0 seconds 429 push @preferredtime,$config{$section}{'daily_min'}; 430 push @preferredtime,$config{$section}{'daily_hour'}; 431 push @preferredtime,$datestamp{'mday'}; 432 push @preferredtime,($datestamp{'mon'}-1); # january is month 0 433 push @preferredtime,$datestamp{'year'}; 434 $lastpreferred = timelocal(@preferredtime); 435 436 # timelocal doesn't take DST into account 437 $lastpreferred += $dstOffset; 438 439 # check if the planned time has different DST flag than the current 440 my ($isdst) = (localtime($lastpreferred))[8]; 441 if ($isdst ne $datestamp{'isdst'}) { 442 if (!$isdst) { 443 # correct DST difference 444 $lastpreferred -= 60*60; 445 } 446 } 447 448 if ($lastpreferred > time()) { 449 $lastpreferred -= 60*60*24; 450 451 if ($dstOffset ne 0) { 452 # because we are going back one day 453 # the DST difference has to be accounted 454 # for in reverse now 455 $lastpreferred -= 2*$dstOffset; 456 } 457 } # preferred time is later today - so look at yesterday's 458 } elsif ($type eq 'weekly') { 459 # calculate offset in seconds for the desired weekday 460 my $offset = 0; 461 if ($config{$section}{'weekly_wday'} < $datestamp{'wday'}) { 462 $offset += 7; 463 } 464 $offset += $config{$section}{'weekly_wday'} - $datestamp{'wday'}; 465 $offset *= 60*60*24; # full day 466 467 push @preferredtime,0; # try to hit 0 seconds 468 push @preferredtime,$config{$section}{'weekly_min'}; 469 push @preferredtime,$config{$section}{'weekly_hour'}; 470 push @preferredtime,$datestamp{'mday'}; 471 push @preferredtime,($datestamp{'mon'}-1); # january is month 0 472 push @preferredtime,$datestamp{'year'}; 473 $lastpreferred = timelocal(@preferredtime); 474 $lastpreferred += $offset; 475 if ($lastpreferred > time()) { $lastpreferred -= 60*60*24*7; } # preferred time is later this week - so look at last week's 476 } elsif ($type eq 'monthly') { 477 push @preferredtime,0; # try to hit 0 seconds 478 push @preferredtime,$config{$section}{'monthly_min'}; 479 push @preferredtime,$config{$section}{'monthly_hour'}; 480 push @preferredtime,$config{$section}{'monthly_mday'}; 481 push @preferredtime,($datestamp{'mon'}-1); # january is month 0 482 push @preferredtime,$datestamp{'year'}; 483 $lastpreferred = timelocal(@preferredtime); 484 if ($lastpreferred > time()) { $lastpreferred -= 60*60*24*31; } # preferred time is later this month - so look at last month's 485 } elsif ($type eq 'yearly') { 486 push @preferredtime,0; # try to hit 0 seconds 487 push @preferredtime,$config{$section}{'yearly_min'}; 488 push @preferredtime,$config{$section}{'yearly_hour'}; 489 push @preferredtime,$config{$section}{'yearly_mday'}; 490 push @preferredtime,($config{$section}{'yearly_mon'}-1); # january is month 0 491 push @preferredtime,$datestamp{'year'}; 492 $lastpreferred = timelocal(@preferredtime); 493 if ($lastpreferred > time()) { $lastpreferred -= 60*60*24*31*365.25; } # preferred time is later this year - so look at last year 494 } else { 495 warn "WARN: unknown interval type $type in config!"; 496 next; 497 } 498 499 # reconstruct our human-formatted most recent preferred snapshot time into an epoch time, to compare with the epoch of our most recent snapshot 500 my $maxage = time()-$lastpreferred; 501 502 if ( $newestage > $maxage ) { 503 # update to most current possible datestamp 504 %datestamp = get_date(); 505 # print "we should have had a $type snapshot of $path $maxage seconds ago; most recent is $newestage seconds old.\n"; 506 507 my $flags = ""; 508 # use zfs (atomic) recursion if specified in config 509 if ($config{$section}{'zfs_recursion'}) { 510 $flags .= "r"; 511 } 512 if ($handleDst) { 513 $flags .= "d"; 514 } 515 516 if ($flags ne "") { 517 push(@newsnaps, "$path\@autosnap_$datestamp{'sortable'}_$type\@$flags"); 518 } else { 519 push(@newsnaps, "$path\@autosnap_$datestamp{'sortable'}_$type"); 520 } 521 } 522 } 523 } 524 } 525 526 if ( (scalar(@newsnaps)) > 0) { 527 foreach my $snap ( @newsnaps ) { 528 my $extraMessage = ""; 529 my @split = split '@', $snap, -1; 530 my $recursiveFlag = 0; 531 my $dstHandling = 0; 532 if (scalar(@split) == 3) { 533 my $flags = $split[2]; 534 if (index($flags, "r") != -1) { 535 $recursiveFlag = 1; 536 $extraMessage = " (zfs recursive)"; 537 chop $snap; 538 } 539 if (index($flags, "d") != -1) { 540 $dstHandling = 1; 541 chop $snap; 542 } 543 chop $snap; 544 } 545 my $dataset = $split[0]; 546 my $snapname = $split[1]; 547 my $presnapshotfailure = 0; 548 my $ret = 0; 549 if ($config{$dataset}{'pre_snapshot_script'}) { 550 $ENV{'SANOID_TARGET'} = $dataset; 551 $ENV{'SANOID_SNAPNAME'} = $snapname; 552 if ($args{'verbose'}) { print "executing pre_snapshot_script '".$config{$dataset}{'pre_snapshot_script'}."' on dataset '$dataset'\n"; } 553 554 if (!$args{'readonly'}) { 555 $ret = runscript('pre_snapshot_script',$dataset); 556 } 557 558 delete $ENV{'SANOID_TARGET'}; 559 delete $ENV{'SANOID_SNAPNAME'}; 560 561 if ($ret != 0) { 562 # warning was already thrown by runscript function 563 $config{$dataset}{'no_inconsistent_snapshot'} and next; 564 $presnapshotfailure = 1; 565 } 566 } 567 if ($args{'verbose'}) { print "taking snapshot $snap$extraMessage\n"; } 568 if (!$args{'readonly'}) { 569 my $stderr; 570 my $exit; 571 ($stderr, $exit) = tee_stderr { 572 if ($recursiveFlag) { 573 system($zfs, "snapshot", "-r", "$snap"); 574 } else { 575 system($zfs, "snapshot", "$snap"); 576 } 577 }; 578 579 $exit == 0 or do { 580 if ($dstHandling) { 581 if ($stderr =~ /already exists/) { 582 $exit = 0; 583 $snap =~ s/_([a-z]+)$/dst_$1/g; 584 if ($args{'verbose'}) { print "taking dst snapshot $snap$extraMessage\n"; } 585 if ($recursiveFlag) { 586 system($zfs, "snapshot", "-r", "$snap") == 0 587 or warn "CRITICAL ERROR: $zfs snapshot -r $snap failed, $?"; 588 } else { 589 system($zfs, "snapshot", "$snap") == 0 590 or warn "CRITICAL ERROR: $zfs snapshot $snap failed, $?"; 591 } 592 } 593 } 594 }; 595 596 $exit == 0 or do { 597 if ($recursiveFlag) { 598 warn "CRITICAL ERROR: $zfs snapshot -r $snap failed, $?"; 599 } else { 600 warn "CRITICAL ERROR: $zfs snapshot $snap failed, $?"; 601 } 602 }; 603 } 604 if ($config{$dataset}{'post_snapshot_script'}) { 605 if (!$presnapshotfailure or $config{$dataset}{'force_post_snapshot_script'}) { 606 $ENV{'SANOID_TARGET'} = $dataset; 607 $ENV{'SANOID_SNAPNAME'} = $snapname; 608 if ($args{'verbose'}) { print "executing post_snapshot_script '".$config{$dataset}{'post_snapshot_script'}."' on dataset '$dataset'\n"; } 609 610 if (!$args{'readonly'}) { 611 runscript('post_snapshot_script',$dataset); 612 } 613 614 delete $ENV{'SANOID_TARGET'}; 615 delete $ENV{'SANOID_SNAPNAME'}; 616 } 617 } 618 } 619 $forcecacheupdate = 1; 620 %snaps = getsnaps(%config,$cacheTTL,$forcecacheupdate); 621 } 622} 623 624#################################################################################### 625#################################################################################### 626#################################################################################### 627 628sub blabber { 629 630 my ($config, $snaps, $snapsbytype, $snapsbypath) = @_; 631 632 $Data::Dumper::Sortkeys = 1; 633 print "****** CONFIGS ******\n"; 634 print Dumper(\%config); 635 #print "****** SNAPSHOTS ******\n"; 636 #print Dumper(\%snaps); 637 #print "****** SNAPSBYTYPE ******\n"; 638 #print Dumper(\%snapsbytype); 639 #print "****** SNAPSBYPATH ******\n"; 640 #print Dumper(\%snapsbypath); 641 642 print "\n"; 643 644 foreach my $section (keys %config) { 645 my $path = $config{$section}{'path'}; 646 print "Filesystem $path has:\n"; 647 print " $snapsbypath{$path}{'numsnaps'} total snapshots "; 648 if ($snapsbypath{$path}{'numsnaps'} == 0) { 649 print "(no current snapshots)" 650 } else { 651 print "(newest: "; 652 my $newest = sprintf("%.1f",$snapsbypath{$path}{'newest'} / 60 / 60); 653 print "$newest hours old)\n"; 654 655 foreach my $type (keys %{ $snapsbytype{$path} }){ 656 print " $snapsbytype{$path}{$type}{'numsnaps'} $type\n"; 657 print " desired: $config{$section}{$type}\n"; 658 print " newest: "; 659 my $newest = sprintf("%.1f",($snapsbytype{$path}{$type}{'newest'} / 60 / 60)); 660 print "$newest hours old, named $snapsbytype{$path}{$type}{'newestname'}\n"; 661 } 662 } 663 print "\n\n"; 664 } 665 666} # end blabber 667 668 669#################################################################################### 670#################################################################################### 671#################################################################################### 672 673 674sub getsnapsbytype { 675 676 my ($config, $snaps) = @_; 677 my %snapsbytype; 678 679 # iterate through each module section - each section is a single ZFS path 680 foreach my $section (keys %config) { 681 my $path = $config{$section}{'path'}; 682 683 my %rawsnaps; 684 foreach my $name (keys %{ $snaps{$path} }){ 685 my $type = $snaps{$path}{$name}{'type'}; 686 $rawsnaps{$type}{$name} = $snaps{$path}{$name}{'ctime'} 687 } 688 689 # iterate through snapshots of each type, ordered by creation time of each snapshot within that type 690 foreach my $type (keys %rawsnaps) { 691 $snapsbytype{$path}{$type}{'numsnaps'} = scalar (keys %{ $rawsnaps{$type} }); 692 my @sortedsnaps; 693 foreach my $name ( 694 sort { $rawsnaps{$type}{$a} <=> $rawsnaps{$type}{$b} } keys %{ $rawsnaps{$type} } 695 ) { 696 push @sortedsnaps, $name; 697 $snapsbytype{$path}{$type}{'newest'} = (time-$snaps{$path}{$name}{'ctime'}); 698 $snapsbytype{$path}{$type}{'newestname'} = $name; 699 } 700 $snapsbytype{$path}{$type}{'sorted'} = join ('|',@sortedsnaps); 701 } 702 } 703 704 return %snapsbytype; 705 706} # end getsnapsbytype 707 708 709#################################################################################### 710#################################################################################### 711#################################################################################### 712 713 714sub getsnapsbypath { 715 716 my ($config,$snaps) = @_; 717 my %snapsbypath; 718 719 # iterate through each module section - each section is a single ZFS path 720 foreach my $section (keys %config) { 721 my $path = $config{$section}{'path'}; 722 $snapsbypath{$path}{'numsnaps'} = scalar (keys %{ $snaps{$path} }); 723 724 # iterate through snapshots of each type, ordered by creation time of each snapshot within that type 725 my %rawsnaps; 726 foreach my $snapname ( keys %{ $snaps{$path} } ) { 727 $rawsnaps{$path}{$snapname} = $snaps{$path}{$snapname}{'ctime'}; 728 } 729 my @sortedsnaps; 730 foreach my $snapname ( 731 sort { $rawsnaps{$path}{$a} <=> $rawsnaps{$path}{$b} } keys %{ $rawsnaps{$path} } 732 ) { 733 push @sortedsnaps, $snapname; 734 $snapsbypath{$path}{'newest'} = (time-$snaps{$path}{$snapname}{'ctime'}); 735 } 736 my $sortedsnaps = join ('|',@sortedsnaps); 737 $snapsbypath{$path}{'sorted'} = $sortedsnaps; 738 } 739 740 return %snapsbypath; 741 742} # end getsnapsbypath 743 744 745 746 747#################################################################################### 748#################################################################################### 749#################################################################################### 750 751sub getsnaps { 752 753 my ($config, $cacheTTL, $forcecacheupdate) = @_; 754 755 my @rawsnaps; 756 757 my ($dev, $ino, $mode, $nlink, $uid, $gid, $rdev, $size, $atime, $mtime, $ctime, $blksize, $blocks) = stat($cache); 758 759 if ( $forcecacheupdate || ! -f $cache || (time() - $mtime) > $cacheTTL ) { 760 if (checklock('sanoid_cacheupdate')) { 761 writelock('sanoid_cacheupdate'); 762 if ($args{'verbose'}) { 763 if ($args{'force-update'}) { 764 print "INFO: cache forcibly expired - updating from zfs list.\n"; 765 } else { 766 print "INFO: cache expired - updating from zfs list.\n"; 767 } 768 } 769 open FH, "$zfs get -Hrpt snapshot creation |"; 770 @rawsnaps = <FH>; 771 close FH; 772 773 open FH, "> $cache" or die 'Could not write to $cache!\n'; 774 print FH @rawsnaps; 775 close FH; 776 removelock('sanoid_cacheupdate'); 777 } else { 778 if ($args{'verbose'}) { print "INFO: deferring cache update - valid cache update lock held by another sanoid process.\n"; } 779 open FH, "< $cache"; 780 @rawsnaps = <FH>; 781 close FH; 782 } 783 } else { 784 # if ($args{'debug'}) { print "DEBUG: cache not expired (" . (time() - $mtime) . " seconds old with TTL of $cacheTTL): pulling snapshot list from cache.\n"; } 785 open FH, "< $cache"; 786 @rawsnaps = <FH>; 787 close FH; 788 } 789 790 foreach my $snap (@rawsnaps) { 791 my ($fs,$snapname,$snapdate) = ($snap =~ m/(.*)\@(.*ly)\t*creation\t*(\d*)/); 792 793 # avoid pissing off use warnings 794 if (defined $snapname) { 795 my ($snaptype) = ($snapname =~ m/.*_(\w*ly)/); 796 if ($snapname =~ /^autosnap/) { 797 $snaps{$fs}{$snapname}{'ctime'}=$snapdate; 798 $snaps{$fs}{$snapname}{'type'}=$snaptype; 799 } 800 } 801 } 802 803 return %snaps; 804} 805 806#################################################################################### 807#################################################################################### 808#################################################################################### 809 810sub init { 811 my ($conf_file, $default_conf_file) = @_; 812 my %config; 813 814 unless (-e $default_conf_file ) { die "FATAL: cannot load $default_conf_file - please restore a clean copy, this is not a user-editable file!"; } 815 unless (-e $conf_file ) { die "FATAL: cannot load $conf_file - please create a valid local config file before running sanoid!"; } 816 817 tie my %defaults, 'Config::IniFiles', ( -file => $default_conf_file ) or die "FATAL: cannot load $default_conf_file - please restore a clean copy, this is not a user-editable file!"; 818 tie my %ini, 'Config::IniFiles', ( -file => $conf_file ) or die "FATAL: cannot load $conf_file - please create a valid local config file before running sanoid!"; 819 820 # we'll use these later to normalize potentially true and false values on any toggle keys 821 my @toggles = ('autosnap','autoprune','monitor_dont_warn','monitor_dont_crit','monitor','recursive','process_children_only','skip_children','no_inconsistent_snapshot','force_post_snapshot_script'); 822 # recursive is defined as toggle but can also have the special value "zfs", it is kept to be backward compatible 823 824 my @istrue=(1,"true","True","TRUE","yes","Yes","YES","on","On","ON"); 825 my @isfalse=(0,"false","False","FALSE","no","No","NO","off","Off","OFF"); 826 827 # check if default configuration file is up to date 828 my $defaults_version = 1; 829 if (defined $defaults{'version'}{'version'}) { 830 $defaults_version = $defaults{'version'}{'version'}; 831 delete $defaults{'version'}; 832 } 833 834 if ($defaults_version < $MINIMUM_DEFAULTS_VERSION) { 835 die "FATAL: you're using sanoid.defaults.conf v$defaults_version, this version of sanoid requires a minimum sanoid.defaults.conf v$MINIMUM_DEFAULTS_VERSION"; 836 } 837 838 foreach my $section (keys %ini) { 839 840 # first up - die with honor if unknown parameters are set in any modules or templates by the user. 841 foreach my $key (keys %{$ini{$section}}) { 842 if (! defined ($defaults{'template_default'}{$key})) { 843 die "FATAL ERROR: I don't understand the setting $key you've set in \[$section\] in $conf_file.\n"; 844 } 845 } 846 847 if ($section =~ /^template_/) { next; } # don't process templates directly 848 849 # only set defaults on sections that haven't already been initialized - this allows us to override values 850 # for sections directly when they've already been defined recursively, without starting them over from scratch. 851 if (! defined ($config{$section}{'initialized'})) { 852 if ($args{'debug'}) { print "DEBUG: initializing \$config\{$section\} with default values from $default_conf_file.\n"; } 853 # set default values from %defaults, which can then be overriden by template 854 # and/or local settings within the module. 855 foreach my $key (keys %{$defaults{'template_default'}}) { 856 if (! ($key =~ /template|recursive|children_only/)) { 857 $config{$section}{$key} = $defaults{'template_default'}{$key}; 858 } 859 } 860 861 # override with values from user-defined default template, if any 862 863 foreach my $key (keys %{$ini{'template_default'}}) { 864 if ($key =~ /template|recursive/) { 865 warn "ignored key '$key' from user-defined default template.\n"; 866 next; 867 } 868 if ($args{'debug'}) { print "DEBUG: overriding $key on $section with value from user-defined default template.\n"; } 869 $config{$section}{$key} = $ini{'template_default'}{$key}; 870 } 871 } 872 873 # override with values from user-defined templates applied to this module, 874 # in the order they were specified (ie use_template = default,production,mytemplate) 875 if (defined $ini{$section}{'use_template'}) { 876 my @templates = split (' *, *',$ini{$section}{'use_template'}); 877 foreach my $rawtemplate (@templates) { 878 # strip trailing whitespace 879 $rawtemplate =~ s/\s+$//g; 880 881 my $template = 'template_'.$rawtemplate; 882 foreach my $key (keys %{$ini{$template}}) { 883 if ($key =~ /template|recursive/) { 884 warn "ignored key '$key' from '$rawtemplate' template.\n"; 885 next; 886 } 887 if ($args{'debug'}) { print "DEBUG: overriding $key on $section with value from user-defined template $template.\n"; } 888 $config{$section}{$key} = $ini{$template}{$key}; 889 } 890 } 891 } 892 893 # override with any locally set values in the module itself 894 foreach my $key (keys %{$ini{$section}} ) { 895 if (! ($key =~ /template|recursive|skip_children/)) { 896 if ($args{'debug'}) { print "DEBUG: overriding $key on $section with value directly set in module.\n"; } 897 $config{$section}{$key} = $ini{$section}{$key}; 898 } 899 } 900 901 # make sure that true values are true and false values are false for any toggled values 902 foreach my $toggle(@toggles) { 903 foreach my $true (@istrue) { 904 if (defined $config{$section}{$toggle} && $config{$section}{$toggle} eq $true) { $config{$section}{$toggle} = 1; } 905 } 906 foreach my $false (@isfalse) { 907 if (defined $config{$section}{$toggle} && $config{$section}{$toggle} eq $false) { $config{$section}{$toggle} = 0; } 908 } 909 } 910 911 # section path is the section name, unless section path has been explicitly defined 912 if (defined ($ini{$section}{'path'})) { 913 $config{$section}{'path'} = $ini{$section}{'path'}; 914 } else { 915 $config{$section}{'path'} = $section; 916 } 917 918 # how 'bout some recursion? =) 919 if ($config{$section}{'zfs_recursion'} && $config{$section}{'zfs_recursion'} == 1 && $config{$section}{'autosnap'} == 1) { 920 warn "ignored autosnap configuration for '$section' because it's part of a zfs recursion.\n"; 921 $config{$section}{'autosnap'} = 0; 922 } 923 924 my $recursive = $ini{$section}{'recursive'} && grep( /^$ini{$section}{'recursive'}$/, @istrue ); 925 my $zfsRecursive = $ini{$section}{'recursive'} && $ini{$section}{'recursive'} =~ /zfs/i; 926 my $skipChildren = $ini{$section}{'skip_children'} && grep( /^$ini{$section}{'skip_children'}$/, @istrue ); 927 my @datasets; 928 if ($zfsRecursive || $recursive || $skipChildren) { 929 if ($zfsRecursive) { 930 $config{$section}{'zfs_recursion'} = 1; 931 } 932 933 @datasets = getchilddatasets($config{$section}{'path'}); 934 DATASETS: foreach my $dataset(@datasets) { 935 chomp $dataset; 936 937 if ($zfsRecursive) { 938 # don't try to take the snapshot ourself, recursive zfs snapshot will take care of that 939 $config{$dataset}{'autosnap'} = 0; 940 941 foreach my $key (keys %{$config{$section}} ) { 942 if (! ($key =~ /template|recursive|children_only|autosnap/)) { 943 if ($args{'debug'}) { print "DEBUG: recursively setting $key from $section to $dataset.\n"; } 944 $config{$dataset}{$key} = $config{$section}{$key}; 945 } 946 } 947 } else { 948 if ($skipChildren) { 949 if ($args{'debug'}) { print "DEBUG: ignoring $dataset.\n"; } 950 delete $config{$dataset}; 951 next DATASETS; 952 } 953 954 foreach my $key (keys %{$config{$section}} ) { 955 if (! ($key =~ /template|recursive|children_only/)) { 956 if ($args{'debug'}) { print "DEBUG: recursively setting $key from $section to $dataset.\n"; } 957 $config{$dataset}{$key} = $config{$section}{$key}; 958 } 959 } 960 } 961 962 $config{$dataset}{'path'} = $dataset; 963 $config{$dataset}{'initialized'} = 1; 964 } 965 } 966 967 968 969 } 970 971 return %config; 972} # end sub init 973 974#################################################################################### 975#################################################################################### 976#################################################################################### 977 978sub get_date { 979 my %datestamp; 980 ($datestamp{'sec'},$datestamp{'min'},$datestamp{'hour'},$datestamp{'mday'},$datestamp{'mon'},$datestamp{'year'},$datestamp{'wday'},$datestamp{'yday'},$datestamp{'isdst'}) = localtime(time); 981 $datestamp{'year'} += 1900; 982 $datestamp{'unix_time'} = (((((((($datestamp{'year'} - 1971) * 365) + $datestamp{'yday'}) * 24) + $datestamp{'hour'}) * 60) + $datestamp{'min'}) * 60) + $datestamp{'sec'}; 983 $datestamp{'sec'} = sprintf ("%02u", $datestamp{'sec'}); 984 $datestamp{'min'} = sprintf ("%02u", $datestamp{'min'}); 985 $datestamp{'hour'} = sprintf ("%02u", $datestamp{'hour'}); 986 $datestamp{'mday'} = sprintf ("%02u", $datestamp{'mday'}); 987 $datestamp{'mon'} = sprintf ("%02u", ($datestamp{'mon'} + 1)); 988 $datestamp{'noseconds'} = "$datestamp{'year'}-$datestamp{'mon'}-$datestamp{'mday'}_$datestamp{'hour'}:$datestamp{'min'}"; 989 $datestamp{'sortable'} = "$datestamp{'noseconds'}:$datestamp{'sec'}"; 990 return %datestamp; 991} 992 993#################################################################################### 994#################################################################################### 995#################################################################################### 996 997 998sub displaytime { 999 # take a time in seconds, return it in human readable form 1000 my ($elapsed) = @_; 1001 1002 my $days = int ($elapsed / 60 / 60 / 24); 1003 $elapsed -= $days * 60 * 60 * 24; 1004 my $hours = int ($elapsed / 60 / 60); 1005 $elapsed -= $hours * 60 * 60; 1006 my $minutes = int ($elapsed / 60); 1007 $elapsed -= $minutes * 60; 1008 my $seconds = int($elapsed); 1009 my $humanreadable; 1010 if ($days) { $humanreadable .= " $days" . 'd'; } 1011 if ($hours || $days) { $humanreadable .= " $hours" . 'h'; } 1012 if ($minutes || $hours || $days) { $humanreadable .= " $minutes" . 'm'; } 1013 $humanreadable .= " $seconds" . 's'; 1014 $humanreadable =~ s/^ //; 1015 return $humanreadable; 1016} 1017 1018 1019#################################################################################### 1020#################################################################################### 1021#################################################################################### 1022 1023sub check_zpool() { 1024 # check_zfs Nagios plugin for monitoring Sun ZFS zpools 1025 # Copyright (c) 2007 1026 # original Written by Nathan Butcher 1027 # adapted for use within Sanoid framework by Jim Salter (2014) 1028 # 1029 # Released under the GNU Public License 1030 # 1031 # This program is free software; you can redistribute it and/or modify 1032 # it under the terms of the GNU General Public License as published by 1033 # the Free Software Foundation; either version 2 of the License, or 1034 # (at your option) any later version. 1035 # 1036 # This program is distributed in the hope that it will be useful, 1037 # but WITHOUT ANY WARRANTY; without even the implied warranty of 1038 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 1039 # GNU General Public License for more details. 1040 # 1041 # You should have received a copy of the GNU General Public License 1042 # along with this program; if not, write to the Free Software 1043 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 1044 1045 # Version: 0.9.2 1046 # Date : 24th July 2007 1047 # This plugin has tested on FreeBSD 7.0-CURRENT and Solaris 10 1048 # With a bit of fondling, it could be expanded to recognize other OSes in 1049 # future (e.g. if FUSE Linux gets off the ground) 1050 1051 # Verbose levels:- 1052 # 1 - Only alert us of zpool health and size stats 1053 # 2 - ...also alert us of failed devices when things go bad 1054 # 3 - ...alert us of the status of all devices regardless of health 1055 # 1056 # Usage: check_zfs <zpool> <verbose level 1-3> 1057 # Example: check_zfs zeepool 1 1058 # ZPOOL zeedata : ONLINE {Size:3.97G Used:183K Avail:3.97G Cap:0%} 1059 1060 1061 my %ERRORS=('DEPENDENT'=>4,'UNKNOWN'=>3,'OK'=>0,'WARNING'=>1,'CRITICAL'=>2); 1062 my $state="UNKNOWN"; 1063 my $msg="FAILURE"; 1064 1065 my $pool=shift; 1066 my $verbose=shift; 1067 1068 my $size=""; 1069 my $used=""; 1070 my $avail=""; 1071 my $cap=""; 1072 my $health=""; 1073 my $dmge=""; 1074 my $dedup=""; 1075 1076 if ($verbose < 1 || $verbose > 3) { 1077 print "Verbose levels range from 1-3\n"; 1078 exit $ERRORS{$state}; 1079 } 1080 1081 my $statcommand="$zpool list -o name,size,cap,health,free $pool"; 1082 1083 if (! open STAT, "$statcommand|") { 1084 print ("$state '$statcommand' command returns no result! NOTE: This plugin needs OS support for ZFS, and execution with root privileges.\n"); 1085 exit $ERRORS{$state}; 1086 } 1087 1088 # chuck the header line 1089 my $header = <STAT>; 1090 1091 # find and parse the line with values for the pool 1092 while(<STAT>) { 1093 chomp; 1094 if (/^${pool}\s+/) { 1095 my @row = split (/ +/); 1096 my $name; 1097 ($name, $size, $cap, $health, $avail) = @row; 1098 } 1099 } 1100 1101 # Tony: Debuging 1102 # print "Size: $size \t Used: $used \t Avai: $avail \t Cap: $cap \t Health: $health\n"; 1103 1104 close(STAT); 1105 1106 ## check for valid zpool list response from zpool 1107 if (! $health ) { 1108 $state = "CRITICAL"; 1109 $msg = sprintf "ZPOOL {%s} does not exist and/or is not responding!\n", $pool; 1110 print $state, " ", $msg; 1111 exit ($ERRORS{$state}); 1112 } 1113 1114 ## determine health of zpool and subsequent error status 1115 if ($health eq "ONLINE" ) { 1116 $state = "OK"; 1117 } else { 1118 if ($health eq "DEGRADED") { 1119 $state = "WARNING"; 1120 } else { 1121 $state = "CRITICAL"; 1122 } 1123 } 1124 1125 ## get more detail on possible device failure 1126 ## flag to detect section of zpool status involving our zpool 1127 my $poolfind=0; 1128 1129 $statcommand="$zpool status $pool"; 1130 if (! open STAT, "$statcommand|") { 1131 $state = 'CRITICAL'; 1132 print ("$state '$statcommand' command returns no result! NOTE: This plugin needs OS support for ZFS, and execution with root privileges.\n"); 1133 exit $ERRORS{$state}; 1134 } 1135 1136 ## go through zfs status output to find zpool fses and devices 1137 while(<STAT>) { 1138 chomp; 1139 1140 if (/^\s${pool}/ && $poolfind==1) { 1141 $poolfind=2; 1142 next; 1143 } elsif ( $poolfind==1 ) { 1144 $poolfind=0; 1145 } 1146 1147 if (/NAME\s+STATE\s+READ\s+WRITE\s+CKSUM/) { 1148 $poolfind=1; 1149 } 1150 1151 if ( /^$/ ) { 1152 $poolfind=0; 1153 } 1154 1155 if ($poolfind == 2) { 1156 1157 ## special cases pertaining to full verbose 1158 if (/^\sspares/) { 1159 next unless $verbose == 3; 1160 $dmge=$dmge . "[SPARES]:- "; 1161 next; 1162 } 1163 if (/^\s{5}spare\s/) { 1164 next unless $verbose == 3; 1165 my ($sta) = /spare\s+(\S+)/; 1166 $dmge=$dmge . "[SPARE:${sta}]:- "; 1167 next; 1168 } 1169 if (/^\s{5}replacing\s/) { 1170 next unless $verbose == 3; 1171 my $perc; 1172 my ($sta) = /^\s+\S+\s+(\S+)/; 1173 if (/%/) { 1174 ($perc) = /([0-9]+%)/; 1175 } else { 1176 $perc = "working"; 1177 } 1178 $dmge=$dmge . "[REPLACING:${sta} (${perc})]:- "; 1179 next; 1180 } 1181 1182 ## other cases 1183 my ($dev, $sta, $read, $write, $cksum) = /^\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)/; 1184 1185 if (!defined($sta)) { 1186 # cache and logs are special and don't have a status 1187 next; 1188 } 1189 1190 ## pool online, not degraded thanks to dead/corrupted disk 1191 if ($state eq "OK" && $sta eq "UNAVAIL") { 1192 $state="WARNING"; 1193 1194 ## switching to verbose level 2 to explain weirdness 1195 if ($verbose == 1) { 1196 $verbose =2; 1197 } 1198 } 1199 1200 ## no display for verbose level 1 1201 next if ($verbose==1); 1202 ## don't display working devices for verbose level 2 1203 if ($verbose==2 && ($state eq "OK" || $sta eq "ONLINE" || $sta eq "AVAIL" || $sta eq "INUSE")) { 1204 # check for io/checksum errors 1205 1206 my @vdeverr = (); 1207 if ($read != 0) { push @vdeverr, "read" }; 1208 if ($write != 0) { push @vdeverr, "write" }; 1209 if ($cksum != 0) { push @vdeverr, "cksum" }; 1210 1211 if (scalar @vdeverr) { 1212 $dmge=$dmge . "(" . $dev . ":" . join(", ", @vdeverr) . " errors) "; 1213 if ($state eq "OK") { $state = "WARNING" }; 1214 } 1215 1216 next; 1217 } 1218 1219 ## show everything else 1220 if (/^\s{3}(\S+)/) { 1221 $dmge=$dmge . "<" . $dev . ":" . $sta . "> "; 1222 } elsif (/^\s{7}(\S+)/) { 1223 $dmge=$dmge . "(" . $dev . ":" . $sta . ") "; 1224 } else { 1225 $dmge=$dmge . $dev . ":" . $sta . " "; 1226 } 1227 } 1228 } 1229 1230 ## calling all goats! 1231 1232 $msg = sprintf "ZPOOL %s : %s {Size:%s Free:%s Cap:%s} %s\n", $pool, $health, $size, $avail, $cap, $dmge; 1233 $msg = "$state $msg"; 1234 return ($ERRORS{$state},$msg); 1235} # end check_zpool() 1236 1237sub check_capacity_limit { 1238 my $value = shift; 1239 1240 if (!defined($value) || $value !~ /^\d+\z/) { 1241 return undef; 1242 } 1243 1244 if ($value < 0 || $value > 100) { 1245 return undef; 1246 } 1247 1248 return 1 1249} 1250 1251sub check_zpool_capacity() { 1252 my %ERRORS=('DEPENDENT'=>4,'UNKNOWN'=>3,'OK'=>0,'WARNING'=>1,'CRITICAL'=>2); 1253 my $state="UNKNOWN"; 1254 my $msg="FAILURE"; 1255 1256 my $pool=shift; 1257 my $capacitylimitsref=shift; 1258 my %capacitylimits=%$capacitylimitsref; 1259 1260 my $statcommand="$zpool list -H -o cap $pool"; 1261 1262 if (! open STAT, "$statcommand|") { 1263 print ("$state '$statcommand' command returns no result!\n"); 1264 exit $ERRORS{$state}; 1265 } 1266 1267 my $line = <STAT>; 1268 close(STAT); 1269 1270 chomp $line; 1271 my @row = split(/ +/, $line); 1272 my $cap=$row[0]; 1273 1274 ## check for valid capacity value 1275 if ($cap !~ m/^[0-9]{1,3}%$/ ) { 1276 $state = "CRITICAL"; 1277 $msg = sprintf "ZPOOL {%s} does not exist and/or is not responding!\n", $pool; 1278 print $state, " ", $msg; 1279 exit ($ERRORS{$state}); 1280 } 1281 1282 $state="OK"; 1283 1284 # check capacity 1285 my $capn = $cap; 1286 $capn =~ s/\D//g; 1287 1288 if (defined($capacitylimits{"warn"})) { 1289 if ($capn >= $capacitylimits{"warn"}) { 1290 $state = "WARNING"; 1291 } 1292 } 1293 1294 if (defined($capacitylimits{"crit"})) { 1295 if ($capn >= $capacitylimits{"crit"}) { 1296 $state = "CRITICAL"; 1297 } 1298 } 1299 1300 $msg = sprintf "ZPOOL %s : %s\n", $pool, $cap; 1301 $msg = "$state $msg"; 1302 return ($ERRORS{$state},$msg); 1303} # end check_zpool_capacity() 1304 1305sub check_prune_defer { 1306 my ($config, $section) = @_; 1307 1308 my $limit = $config{$section}{"prune_defer"}; 1309 1310 if (!check_capacity_limit($limit)) { 1311 die "ERROR: invalid prune_defer limit!\n"; 1312 } 1313 1314 if ($limit eq 0) { 1315 return 0; 1316 } 1317 1318 my @parts = split /\//, $section, 2; 1319 my $pool = $parts[0]; 1320 1321 if (exists $capacitycache{$pool}) { 1322 } else { 1323 $capacitycache{$pool} = get_zpool_capacity($pool); 1324 } 1325 1326 if ($limit < $capacitycache{$pool}) { 1327 return 0; 1328 } 1329 1330 return 1; 1331} 1332 1333sub get_zpool_capacity { 1334 my $pool = shift; 1335 1336 my $statcommand="$zpool list -H -o cap $pool"; 1337 1338 if (! open STAT, "$statcommand|") { 1339 die "ERROR: '$statcommand' command returns no result!\n"; 1340 } 1341 1342 my $line = <STAT>; 1343 close(STAT); 1344 1345 chomp $line; 1346 my @row = split(/ +/, $line); 1347 my $cap=$row[0]; 1348 1349 ## check for valid capacity value 1350 if ($cap !~ m/^[0-9]{1,3}%$/ ) { 1351 die "ERROR: '$statcommand' command returned invalid capacity value ($cap)!\n"; 1352 } 1353 1354 $cap =~ s/\D//g; 1355 1356 return $cap; 1357} 1358 1359###################################################################################################### 1360###################################################################################################### 1361###################################################################################################### 1362###################################################################################################### 1363###################################################################################################### 1364 1365sub checklock { 1366 # take argument $lockname. 1367 # 1368 # read /var/run/$lockname.lock for a pid on first line and a mutex on second line. 1369 # 1370 # check process list to see if the pid from /var/run/$lockname.lock is still active with 1371 # the original mutex found in /var/run/$lockname.lock. 1372 # 1373 # return: 1374 # 0 if lock is present and valid for another process 1375 # 1 if no lock is present 1376 # 2 if lock is present, but we own the lock 1377 # 1378 # shorthand - any true return indicates we are clear to lock; a false return indicates 1379 # that somebody else already has the lock and therefore we cannot. 1380 # 1381 1382 my $lockname = shift; 1383 my $lockfile = "/var/run/$lockname.lock"; 1384 1385 if (! -e $lockfile) { 1386 # no lockfile 1387 return 1; 1388 } 1389 # make sure lockfile contains something 1390 if ( -z $lockfile) { 1391 # zero size lockfile, something is wrong 1392 die "ERROR: something is wrong! $lockfile is empty\n"; 1393 } 1394 1395 # lockfile exists. read pid and mutex from it. see if it's our pid. if not, see if 1396 # there's still a process running with that pid and with the same mutex. 1397 1398 open FH, "< $lockfile" or die "ERROR: unable to open $lockfile"; 1399 my @lock = <FH>; 1400 close FH; 1401 # if we didn't get exactly 2 items from the lock file there is a problem 1402 if (scalar(@lock) != 2) { 1403 die "ERROR: $lockfile is invalid.\n" 1404 } 1405 1406 my $lockmutex = pop(@lock); 1407 my $lockpid = pop(@lock); 1408 1409 chomp $lockmutex; 1410 chomp $lockpid; 1411 1412 if ($lockpid == $$) { 1413 # we own the lockfile. no need to check any further. 1414 return 2; 1415 } 1416 open PL, "$pscmd -p $lockpid -o args= |"; 1417 my @processlist = <PL>; 1418 close PL; 1419 1420 my $checkmutex = pop(@processlist); 1421 chomp $checkmutex; 1422 1423 if ($checkmutex eq $lockmutex) { 1424 # lock exists, is valid, is not owned by us - return false 1425 return 0; 1426 } else { 1427 # lock is present but not valid - remove and return true 1428 unlink $lockfile; 1429 return 1; 1430 } 1431} 1432 1433sub removelock { 1434 # take argument $lockname. 1435 # 1436 # make sure /var/run/$lockname.lock actually belongs to me (contains my pid and mutex) 1437 # and remove it if it does, die if it doesn't. 1438 1439 my $lockname = shift; 1440 my $lockfile = "/var/run/$lockname.lock"; 1441 1442 if (checklock($lockname) == 2) { 1443 unlink $lockfile; 1444 return; 1445 } elsif (checklock($lockname) == 1) { 1446 die "ERROR: No valid lockfile found - Did a rogue process or user update or delete it?\n"; 1447 } else { 1448 die "ERROR: A valid lockfile exists but does not belong to me! I refuse to remove it.\n"; 1449 } 1450} 1451 1452sub writelock { 1453 # take argument $lockname. 1454 # 1455 # write a lockfile to /var/run/$lockname.lock with first line 1456 # being my pid and second line being my mutex. 1457 1458 my $lockname = shift; 1459 my $lockfile = "/var/run/$lockname.lock"; 1460 1461 # die honorably rather than overwriting a valid, existing lock 1462 if (! checklock($lockname)) { 1463 die "ERROR: Valid lock already exists - I refuse to overwrite it. Committing seppuku now.\n"; 1464 } 1465 1466 my $pid = $$; 1467 1468 open PL, "$pscmd -p $$ -o args= |"; 1469 my @processlist = <PL>; 1470 close PL; 1471 1472 my $mutex = pop(@processlist); 1473 chomp $mutex; 1474 1475 open FH, "> $lockfile"; 1476 print FH "$pid\n"; 1477 print FH "$mutex\n"; 1478 close FH; 1479} 1480 1481sub iszfsbusy { 1482 # check to see if ZFS filesystem passed in as argument currently has a zfs send or zfs receive process referencing it. 1483 # return true if busy (currently being sent or received), return false if not. 1484 1485 my $fs = shift; 1486 # if (args{'debug'}) { print "DEBUG: checking to see if $fs on is already in zfs receive using $pscmd -Ao args= ...\n"; } 1487 1488 open PL, "$pscmd -Ao args= |"; 1489 my @processes = <PL>; 1490 close PL; 1491 1492 foreach my $process (@processes) { 1493 # if ($args{'debug'}) { print "DEBUG: checking process $process...\n"; } 1494 if ($process =~ /zfs *(send|receive|recv).*$fs/) { 1495 # there's already a zfs send/receive process for our target filesystem - return true 1496 # if ($args{'debug'}) { print "DEBUG: process $process matches target $fs!\n"; } 1497 return 1; 1498 } 1499 } 1500 1501 # no zfs receive processes for our target filesystem found - return false 1502 return 0; 1503} 1504 1505#######################################################################################################################3 1506#######################################################################################################################3 1507#######################################################################################################################3 1508 1509sub getchilddatasets { 1510 # for later, if we make sanoid itself support sudo use 1511 my $fs = shift; 1512 my $mysudocmd = ''; 1513 1514 my $getchildrencmd = "$mysudocmd $zfs list -o name -t filesystem,volume -Hr $fs |"; 1515 if ($args{'debug'}) { print "DEBUG: getting list of child datasets on $fs using $getchildrencmd...\n"; } 1516 open FH, $getchildrencmd; 1517 my @children = <FH>; 1518 close FH; 1519 1520 # parent dataset is the first element 1521 shift @children; 1522 1523 return @children; 1524} 1525 1526#######################################################################################################################3 1527#######################################################################################################################3 1528#######################################################################################################################3 1529 1530sub removecachedsnapshots { 1531 my $wait = shift; 1532 1533 if (not %pruned) { 1534 return; 1535 } 1536 1537 my $unlocked = checklock('sanoid_cacheupdate'); 1538 1539 if ($wait != 1 && not $unlocked) { 1540 if ($args{'verbose'}) { print "INFO: deferring cache update (snapshot removal) - valid cache update lock held by another sanoid process.\n"; } 1541 return; 1542 } 1543 1544 # wait until we can get a lock to do our cache changes 1545 while (not $unlocked) { 1546 if ($args{'verbose'}) { print "INFO: waiting for cache update lock held by another sanoid process.\n"; } 1547 sleep(10); 1548 $unlocked = checklock('sanoid_cacheupdate'); 1549 } 1550 1551 writelock('sanoid_cacheupdate'); 1552 1553 if ($args{'verbose'}) { 1554 print "INFO: removing destroyed snapshots from cache.\n"; 1555 } 1556 open FH, "< $cache"; 1557 my @rawsnaps = <FH>; 1558 close FH; 1559 1560 open FH, "> $cache" or die 'Could not write to $cache!\n'; 1561 foreach my $snapline ( @rawsnaps ) { 1562 my @columns = split("\t", $snapline); 1563 my $snap = $columns[0]; 1564 print FH $snapline unless ( exists($pruned{$snap}) ); 1565 } 1566 close FH; 1567 1568 removelock('sanoid_cacheupdate'); 1569 %snaps = getsnaps(\%config,$cacheTTL,$forcecacheupdate); 1570 1571 # clear hash 1572 undef %pruned; 1573} 1574 1575#######################################################################################################################3 1576#######################################################################################################################3 1577#######################################################################################################################3 1578 1579sub runscript { 1580 my $key=shift; 1581 my $dataset=shift; 1582 1583 my $timeout=$config{$dataset}{'script_timeout'}; 1584 1585 my $ret; 1586 eval { 1587 if ($timeout gt 0) { 1588 local $SIG{ALRM} = sub { die "alarm\n" }; 1589 alarm $timeout; 1590 } 1591 $ret = system($config{$dataset}{$key}); 1592 alarm 0; 1593 }; 1594 if ($@) { 1595 if ($@ eq "alarm\n") { 1596 warn "WARN: $key didn't finish in the allowed time!"; 1597 } else { 1598 warn "CRITICAL ERROR: $@"; 1599 } 1600 return -1; 1601 } else { 1602 if ($ret != 0) { 1603 warn "WARN: $key failed, $?"; 1604 } 1605 } 1606 1607 return $ret; 1608} 1609 1610#######################################################################################################################3 1611#######################################################################################################################3 1612#######################################################################################################################3 1613 1614sub convertTimePeriod { 1615 my $value=shift; 1616 my $period=shift; 1617 1618 if ($value =~ /^\d+[yY]$/) { 1619 $period = 60*60*24*31*365; 1620 chop $value; 1621 } elsif ($value =~ /^\d+[wW]$/) { 1622 $period = 60*60*24*7; 1623 chop $value; 1624 } elsif ($value =~ /^\d+[dD]$/) { 1625 $period = 60*60*24; 1626 chop $value; 1627 } elsif ($value =~ /^\d+[hH]$/) { 1628 $period = 60*60; 1629 chop $value; 1630 } elsif ($value =~ /^\d+[mM]$/) { 1631 $period = 60; 1632 chop $value; 1633 } elsif ($value =~ /^\d+[sS]$/) { 1634 $period = 1; 1635 chop $value; 1636 } elsif ($value =~ /^\d+$/) { 1637 # no unit, provided fallback period is used 1638 } else { 1639 # invalid value, return smallest valid value as fallback 1640 # (will trigger a warning message for monitoring for sure) 1641 return 1; 1642 } 1643 1644 return $value * $period; 1645} 1646 1647__END__ 1648 1649=head1 NAME 1650 1651sanoid - ZFS snapshot management and replication tool 1652 1653=head1 SYNOPSIS 1654 1655sanoid [options] 1656 1657Assumes --cron --verbose if no other arguments (other than configdir) are specified 1658 1659Options: 1660 1661 --configdir=DIR Specify a directory to find config file sanoid.conf 1662 1663 --cron Creates snapshots and purges expired snapshots 1664 --verbose Prints out additional information during a sanoid run 1665 --readonly Simulates creation/deletion of snapshots 1666 --quiet Suppresses non-error output 1667 --force-update Clears out sanoid's zfs snapshot cache 1668 1669 --monitor-health Reports on zpool "health", in a Nagios compatible format 1670 --monitor-capacity Reports on zpool capacity, in a Nagios compatible format 1671 --monitor-snapshots Reports on snapshot "health", in a Nagios compatible format 1672 --take-snapshots Creates snapshots as specified in sanoid.conf 1673 --prune-snapshots Purges expired snapshots as specified in sanoid.conf 1674 --force-prune Purges expired snapshots even if a send/recv is in progress 1675 1676 --help Prints this helptext 1677 --version Prints the version number 1678 --debug Prints out a lot of additional information during a sanoid run 1679