1#!/usr/bin/perl -w
2#
3# matapicos v2.2 - Vins Vilaplana <vins at terra dot es)
4#
5# Translated by Humberto Rossetti Baptista <humberto at baptista dot name)
6# slight adjustments and code cleanup too :-)
7#
8# Changes:
9#  - 2007/02/27 - knobi@knobisoft.de - Various changes:
10#                    Add value-based chopping (-t value)
11#                    Add analysis only mode (-a)
12#                    Controll verbose/debug output using -d and -v
13#                    Add -h help option
14#                    Move to using the Getopt::Std package
15#                    Use "strict" mode
16#  - 2006/01/12 - vins@terra.es - "$!" takes other values in some perl interpreters (e.g. FreeBSD 4.11-R). Thanks to Atle Veka!
17#
18
19use strict;
20use Getopt::Std;
21my %opt=();
22getopts("adhl:t:v",\%opt);
23
24my (@dump,%exp,@cols,@dbak,%tot,%por);
25my ($linea,$linbak,$lino,$cdo,$tresto,$tstamp,$a,$b,$c,$cont);
26my $DEBUG = 0;
27my $ANALYZE = 0;
28my $VERBOSE = 0;
29
30# Limit % for cutting. Any peak representing less than this % will be cut
31my $LIMIT=0.6; # obs this is really %, so 0.6 means 0.6% (and not 0.006%!)
32
33# Threshhold for cutting. Values above it will be chopped if "-t" is used
34my $THRESH=1.01e300; # Just set it to a very high default
35
36# Flag to indicate whether we are doing "binning" or threshold based chopping
37my $BINNING=1;
38
39if ($opt{h} || ($#ARGV < 0)) {
40   print "REMOVESPIKES: Remove spikes from RRDtool databases.\n\n";
41   print "Usage:\n";
42   print "$0 -d -a [-l number] [-t maxval] name_of_database\n\n";
43   print "Where:\n";
44   print "  -d enables debug messages\n";
45   print "  -a runs only the analysis phase of the script\n";
46   print "  -h prints this message\n";
47   print "  -l sets the % limit of spikes bin-based chopping (default: $LIMIT)\n";
48   print "  -t sets the value above which records are chopped. Disabled by default.\n";
49   print "     Enabling value-based chopping will disable bin-based chopping\n\n";
50   print "  -v Verbose mode. Shows some information\n";
51   print "  name_of_database is the rrd file to be treated.\n";
52   exit;
53}
54
55if ($opt{d}) {
56   $DEBUG = 1;
57   $VERBOSE = 1;
58   print "Enabling DEBUG mode\n";
59}
60
61if ($opt{a}) {
62   $ANALYZE = 1;
63   print "Running in ANALYZE mode\n";
64}
65
66if ($opt{v}) {
67   $VERBOSE = 1;
68   print "Running in VERBOSE mode\n";
69}
70
71if ($opt{l}) {
72   $LIMIT=$opt{l};
73   print "Limit for bin-based chopping set to $LIMIT\n" if $VERBOSE;
74}
75
76if ($opt{t}) {
77   $THRESH=$opt{t};
78   $BINNING=0;
79   printf("Max Value set to %g, disabling bin-based chopping\n",$THRESH) if $VERBOSE;
80}
81
82# temporary filename:
83# safer this way, so many users can run this script simultaneusly
84my $tempfile="/tmp/matapicos.dump.$$";
85
86###########################################################################
87# Dump the rrd database to the temporary file (as XML)
88system("rrdtool dump $ARGV[0] > $tempfile") == 0 or die "\n";
89
90# Scan the XML dump checking the variations and exponent deviations
91open(FICH,"<$tempfile")
92   || die "$0: Cannot open file $tempfile:\n $! - $@";
93
94while (<FICH>) {
95  chomp;
96  $linea=$_;
97  $cdo=0;
98  if ($linea=~/^(.*)<row>/) { $tstamp=$1; }
99  if ($linea=~/(<row>.*)$/) { $tresto=$1; }
100  if (/<v>\s\d\.\d+e.(\d+)\s<\/v>/) {
101    @dump = split(/<\/v>/, $tresto);
102    for ($lino=0; $lino<=$#dump-1; $lino++) {   # scans DS's within each row
103      if ( $dump[$lino]=~/\d\.\d+e.(\d+)\s/ ) { # make sure it is a number (and not NaN)
104        $a=substr("0$lino",-2).":".$1;
105        $exp{$a}++;                             # store exponents
106        $tot{substr("0$lino",-2)}++;            # and keep a per DS total
107      }
108    }
109  }
110}
111
112close FICH;
113
114###########################################################################
115# Scan the hash to get the percentage variation of each value
116foreach $lino (sort keys %exp) {
117  ($a)=$lino=~/^(\d+)\:/;
118  $por{$lino}=(100*$exp{$lino})/$tot{$a};
119}
120
121if ($DEBUG || $ANALYZE) {
122   # Dumps percentages for debugging purposes
123   print "--percentages--\n";
124   foreach $lino (sort keys %exp) {
125     print $lino."--".$exp{$lino}."/";
126     ($a)=$lino=~/^(\d+)\:/;
127     print $tot{$a}." = ".$por{$lino}."%\n";
128   }
129   print "---------------\n\n";
130   exit if $ANALYZE;
131}
132
133
134###########################################################################
135# Open the XML dump, and create a new one removing the spikes:
136open(FICH,"<$tempfile") ||
137   die "$0: Cannot open $tempfile for reading: $!-$@";
138open(FSAL,">$tempfile.xml")  ||
139   die "$0: Cannot open $tempfile.xml for writing: $!-$@";
140
141$linbak='';
142$cont=0;
143while (<FICH>) {
144  chomp;
145  $linea=$_;
146  $cdo=0;
147  if ($linea=~/^(.*)<row>/) { $tstamp=$1; }     # Grab timestamp
148  if ($linea=~/(<row>.*)$/) { $tresto=$1; }     # grab rest-of-line :-)
149  if (/<v>\s\d\.\d+e.(\d+)\s<\/v>/) {           # are there DS's?
150    @dump=split(/<\/v>/, $tresto);              # split them
151    if ($linbak ne '') {
152      for ($lino=0;$lino<=$#dump-1;$lino++) {   # for each DS:
153        if ($dump[$lino]=~/\d\.\d+e.(\d+)\s/) { # grab number (and not a NaN)
154	  $c=$&;
155          $a=$1*1;                              # and exponent
156          $b=substr("0$lino",-2).":$1";         # calculate the max percentage of this DS
157          if (($BINNING &&                      #
158		($por{$b}< $LIMIT)) ||          # if this line represents less than $LIMIT
159	      (!$BINNING &&			#
160		($c > $THRESH))) {              # or the value is larger then $THRESH
161            $linea=$tstamp.$linbak;             # we dump it
162            $cdo=1;
163            $tresto=$linbak;
164          }
165        }
166      }
167    }
168    $linbak=$tresto;
169    if ($cdo==1) {
170      print "Chopping peak at $tstamp\n" if $DEBUG;
171      $cont++; }
172  }
173
174  print FSAL "$linea\n";
175}
176close FICH;
177close FSAL;
178
179###########################################################################
180# Cleanup and move new file to the place of original one
181# and original one gets backed up.
182if ($cont == 0 && $VERBOSE) { print "No peaks found.!\n"; }
183else {
184  rename($ARGV[0],"$ARGV[0].old");
185  $lino="rrdtool restore $tempfile.xml $ARGV[0]";
186  system($lino);
187  die "$0: Unable to execute the rrdtool restore on $ARGV[0] - $! - $@\n" if $? != 0;
188}
189
190# cleans up the files created
191unlink("$tempfile");
192unlink("$tempfile.xml");
193
194