1#!/usr/bin/perl -w 2# 3# matapicos v2.2 - Vins Vilaplana <vins at terra dot es) 4# 5# Translated by Humberto Rossetti Baptista <humberto at baptista dot name) 6# slight adjustments and code cleanup too :-) 7# 8# Changes: 9# - 2007/02/27 - knobi@knobisoft.de - Various changes: 10# Add value-based chopping (-t value) 11# Add analysis only mode (-a) 12# Controll verbose/debug output using -d and -v 13# Add -h help option 14# Move to using the Getopt::Std package 15# Use "strict" mode 16# - 2006/01/12 - vins@terra.es - "$!" takes other values in some perl interpreters (e.g. FreeBSD 4.11-R). Thanks to Atle Veka! 17# 18 19use strict; 20use Getopt::Std; 21my %opt=(); 22getopts("adhl:t:v",\%opt); 23 24my (@dump,%exp,@cols,@dbak,%tot,%por); 25my ($linea,$linbak,$lino,$cdo,$tresto,$tstamp,$a,$b,$c,$cont); 26my $DEBUG = 0; 27my $ANALYZE = 0; 28my $VERBOSE = 0; 29 30# Limit % for cutting. Any peak representing less than this % will be cut 31my $LIMIT=0.6; # obs this is really %, so 0.6 means 0.6% (and not 0.006%!) 32 33# Threshhold for cutting. Values above it will be chopped if "-t" is used 34my $THRESH=1.01e300; # Just set it to a very high default 35 36# Flag to indicate whether we are doing "binning" or threshold based chopping 37my $BINNING=1; 38 39if ($opt{h} || ($#ARGV < 0)) { 40 print "REMOVESPIKES: Remove spikes from RRDtool databases.\n\n"; 41 print "Usage:\n"; 42 print "$0 -d -a [-l number] [-t maxval] name_of_database\n\n"; 43 print "Where:\n"; 44 print " -d enables debug messages\n"; 45 print " -a runs only the analysis phase of the script\n"; 46 print " -h prints this message\n"; 47 print " -l sets the % limit of spikes bin-based chopping (default: $LIMIT)\n"; 48 print " -t sets the value above which records are chopped. Disabled by default.\n"; 49 print " Enabling value-based chopping will disable bin-based chopping\n\n"; 50 print " -v Verbose mode. Shows some information\n"; 51 print " name_of_database is the rrd file to be treated.\n"; 52 exit; 53} 54 55if ($opt{d}) { 56 $DEBUG = 1; 57 $VERBOSE = 1; 58 print "Enabling DEBUG mode\n"; 59} 60 61if ($opt{a}) { 62 $ANALYZE = 1; 63 print "Running in ANALYZE mode\n"; 64} 65 66if ($opt{v}) { 67 $VERBOSE = 1; 68 print "Running in VERBOSE mode\n"; 69} 70 71if ($opt{l}) { 72 $LIMIT=$opt{l}; 73 print "Limit for bin-based chopping set to $LIMIT\n" if $VERBOSE; 74} 75 76if ($opt{t}) { 77 $THRESH=$opt{t}; 78 $BINNING=0; 79 printf("Max Value set to %g, disabling bin-based chopping\n",$THRESH) if $VERBOSE; 80} 81 82# temporary filename: 83# safer this way, so many users can run this script simultaneusly 84my $tempfile="/tmp/matapicos.dump.$$"; 85 86########################################################################### 87# Dump the rrd database to the temporary file (as XML) 88system("rrdtool dump $ARGV[0] > $tempfile") == 0 or die "\n"; 89 90# Scan the XML dump checking the variations and exponent deviations 91open(FICH,"<$tempfile") 92 || die "$0: Cannot open file $tempfile:\n $! - $@"; 93 94while (<FICH>) { 95 chomp; 96 $linea=$_; 97 $cdo=0; 98 if ($linea=~/^(.*)<row>/) { $tstamp=$1; } 99 if ($linea=~/(<row>.*)$/) { $tresto=$1; } 100 if (/<v>\s\d\.\d+e.(\d+)\s<\/v>/) { 101 @dump = split(/<\/v>/, $tresto); 102 for ($lino=0; $lino<=$#dump-1; $lino++) { # scans DS's within each row 103 if ( $dump[$lino]=~/\d\.\d+e.(\d+)\s/ ) { # make sure it is a number (and not NaN) 104 $a=substr("0$lino",-2).":".$1; 105 $exp{$a}++; # store exponents 106 $tot{substr("0$lino",-2)}++; # and keep a per DS total 107 } 108 } 109 } 110} 111 112close FICH; 113 114########################################################################### 115# Scan the hash to get the percentage variation of each value 116foreach $lino (sort keys %exp) { 117 ($a)=$lino=~/^(\d+)\:/; 118 $por{$lino}=(100*$exp{$lino})/$tot{$a}; 119} 120 121if ($DEBUG || $ANALYZE) { 122 # Dumps percentages for debugging purposes 123 print "--percentages--\n"; 124 foreach $lino (sort keys %exp) { 125 print $lino."--".$exp{$lino}."/"; 126 ($a)=$lino=~/^(\d+)\:/; 127 print $tot{$a}." = ".$por{$lino}."%\n"; 128 } 129 print "---------------\n\n"; 130 exit if $ANALYZE; 131} 132 133 134########################################################################### 135# Open the XML dump, and create a new one removing the spikes: 136open(FICH,"<$tempfile") || 137 die "$0: Cannot open $tempfile for reading: $!-$@"; 138open(FSAL,">$tempfile.xml") || 139 die "$0: Cannot open $tempfile.xml for writing: $!-$@"; 140 141$linbak=''; 142$cont=0; 143while (<FICH>) { 144 chomp; 145 $linea=$_; 146 $cdo=0; 147 if ($linea=~/^(.*)<row>/) { $tstamp=$1; } # Grab timestamp 148 if ($linea=~/(<row>.*)$/) { $tresto=$1; } # grab rest-of-line :-) 149 if (/<v>\s\d\.\d+e.(\d+)\s<\/v>/) { # are there DS's? 150 @dump=split(/<\/v>/, $tresto); # split them 151 if ($linbak ne '') { 152 for ($lino=0;$lino<=$#dump-1;$lino++) { # for each DS: 153 if ($dump[$lino]=~/\d\.\d+e.(\d+)\s/) { # grab number (and not a NaN) 154 $c=$&; 155 $a=$1*1; # and exponent 156 $b=substr("0$lino",-2).":$1"; # calculate the max percentage of this DS 157 if (($BINNING && # 158 ($por{$b}< $LIMIT)) || # if this line represents less than $LIMIT 159 (!$BINNING && # 160 ($c > $THRESH))) { # or the value is larger then $THRESH 161 $linea=$tstamp.$linbak; # we dump it 162 $cdo=1; 163 $tresto=$linbak; 164 } 165 } 166 } 167 } 168 $linbak=$tresto; 169 if ($cdo==1) { 170 print "Chopping peak at $tstamp\n" if $DEBUG; 171 $cont++; } 172 } 173 174 print FSAL "$linea\n"; 175} 176close FICH; 177close FSAL; 178 179########################################################################### 180# Cleanup and move new file to the place of original one 181# and original one gets backed up. 182if ($cont == 0 && $VERBOSE) { print "No peaks found.!\n"; } 183else { 184 rename($ARGV[0],"$ARGV[0].old"); 185 $lino="rrdtool restore $tempfile.xml $ARGV[0]"; 186 system($lino); 187 die "$0: Unable to execute the rrdtool restore on $ARGV[0] - $! - $@\n" if $? != 0; 188} 189 190# cleans up the files created 191unlink("$tempfile"); 192unlink("$tempfile.xml"); 193 194