1#!/usr/local/bin/perl -T 2 3#------------------------------------------------------------------------------ 4# This is amavisd-nanny, a program to show the status 5# and keep an eye on the health of child processes in amavisd-new. 6# 7# Author: Mark Martinec <Mark.Martinec@ijs.si> 8# 9# Copyright (c) 2004-2014, Mark Martinec 10# All rights reserved. 11# 12# Redistribution and use in source and binary forms, with or without 13# modification, are permitted provided that the following conditions 14# are met: 15# 1. Redistributions of source code must retain the above copyright notice, 16# this list of conditions and the following disclaimer. 17# 2. Redistributions in binary form must reproduce the above copyright notice, 18# this list of conditions and the following disclaimer in the documentation 19# and/or other materials provided with the distribution. 20# 21# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 22# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS 25# BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31# POSSIBILITY OF SUCH DAMAGE. 32# 33# The views and conclusions contained in the software and documentation are 34# those of the authors and should not be interpreted as representing official 35# policies, either expressed or implied, of the Jozef Stefan Institute. 36 37# (the above license is the 2-clause BSD license, also known as 38# a "Simplified BSD License", and pertains to this program only) 39# 40# Patches and problem reports are welcome. 41# The latest version of this program is available at: 42# http://www.ijs.si/software/amavisd/ 43#------------------------------------------------------------------------------ 44 45use strict; 46use re 'taint'; 47use warnings; 48no warnings 'uninitialized'; 49 50use Errno qw(ESRCH ENOENT); 51use POSIX qw(strftime); 52use Time::HiRes (); 53use BerkeleyDB; 54 55use vars qw($VERSION); $VERSION = 1.401; 56 57my($idlettl) = 3*60*60; # idle children are sent a SIGTERM 58 # after this many seconds 59my($activettl) = 10*60; # stuck active children are sent a SIGTERM 60 # after this many seconds 61 62my($dbfile) = 'nanny.db'; 63my($db_home) = # DB databases directory 64 defined $ENV{'AMAVISD_DB_HOME'} ? $ENV{'AMAVISD_DB_HOME'} : '/var/amavis/db'; 65my($wakeuptime) = 2; # -w, sleep time in seconds, may be fractional 66my($repeatcount); # -c, repeat count (when defined) 67 68sub fmt_age($$$) { 69 my($t,$state_bar,$idling) = @_; 70 $t = int($t); 71 my($char) = $idling ? '.' : '='; 72 my($bar_l) = $idling ? $t : length($state_bar); 73 my($bar) = substr( ($char x 9 . ':') x 3 . $char x 5, 0,$bar_l); 74 if (!$idling) { 75 $state_bar = substr($state_bar,0,length($bar)-2) . substr($state_bar,-1,1) 76 . '>' if length($state_bar) > length($bar); 77 for my $j (0 .. length($bar)-1) { 78 substr($bar,$j,1) = substr($state_bar,$j,1) 79 if substr($bar,$j,1) eq '=' && substr($state_bar,$j,1) ne ' '; 80 } 81 } 82 my($s) = $t % 60; $t = int($t/60); 83 my($m) = $t % 60; $t = int($t/60); 84 my($h) = $t % 24; $t = int($t/24); 85 my($d) = $t; 86 my($str) = sprintf("%d:%02d:%02d", $h,$m,$s); 87 $str = (!$d ? " " : sprintf("%dd",$d)) . $str; 88 $str . ' ' . $bar; 89}; 90 91# main program starts here 92 my($normal_termination) = 0; 93 $SIG{INT} = sub { die "\n" }; # do the END code block when interrupted 94 while (@ARGV) { 95 my($opt) = shift @ARGV; 96 my($val) = shift @ARGV; 97 if ($opt eq '-w' && $val =~ /^\+?\d+(?:\.\d*)?\z/) { $wakeuptime = $val } 98 elsif ($opt eq '-c' && $val =~ /^[+-]?\d+\z/) { $repeatcount = $val } 99 else { 100 print <<'EOD'; 101States legend: 102 A accepted a connection 103 b begin with a protocol for accepting a request 104 m 'MAIL FROM' smtp command started a new transaction in the same session 105 d transferring data from MTA to amavisd 106 = content checking just started 107 G generating and verifying unique mail_id 108 D decoding of mail parts 109 V virus scanning 110 S spam scanning 111 P pen pals database lookup and updates 112 r preparing results 113 Q quarantining and preparing/sending notifications 114 F forwarding mail to MTA 115 . content checking just finished 116 sp space indicates idle (elapsed bar is showing dots) 117 118EOD 119 die "Usage: $0 [-c <count>] [-w <wait-interval>]\n"; 120 } 121 } 122 print <<'EOD'; 123process-id task-id elapsed in elapsed-bar (dots indicate idle) 124 or state idle or busy 125EOD 126 127 my(%waittime); # associative array on pid 128 my($env,$db,$old_db_inode,@dbstat,$cursor); 129 my(%proc_last_timestamp, %proc_state_bars); 130 for (;;) { 131 last if defined $repeatcount && $repeatcount <= 0; 132 @dbstat = stat("$db_home/$dbfile"); 133 my($errn) = @dbstat ? 0 : 0+$!; 134 $errn==0 || $errn==ENOENT or die "stat $db_home/$dbfile: $!"; 135 if (defined $db && $old_db_inode != $dbstat[1]) { 136 $db->db_close==0 or die "BDB db_close error: $BerkeleyDB::Error $!"; 137 undef $db; 138 printf STDERR ("Reopening nanny database %s/%s\n", $db_home,$dbfile); 139 } 140 if (!defined $db && $errn==0) { 141 $old_db_inode = $dbstat[1]; 142 $env = BerkeleyDB::Env->new( 143 -Home => $db_home, -Flags => DB_INIT_CDB | DB_INIT_MPOOL, 144 -ErrFile => \*STDOUT, -Verbose => 1); 145 defined $env or die "BDB no env: $BerkeleyDB::Error $!"; 146 $db = BerkeleyDB::Hash->new(-Filename => $dbfile, -Env => $env); 147 defined $db or die "BDB no dbN 1: $BerkeleyDB::Error $!"; 148 } 149 $| = 0; 150 my(%proc_timestamp, %proc_state, %proc_task_id); 151 my($stat,$key,$val); my($now); 152 my($eval_stat,$interrupt); $interrupt = ''; 153 if (!defined $db) { 154 printf STDERR ("No nanny database %s/%s; waiting...\n", 155 $db_home,$dbfile); 156 } else { 157 $repeatcount-- if defined $repeatcount && $repeatcount > 0; 158 print "\n"; 159 my($h1) = sub { $interrupt = $_[0] }; 160 local(@SIG{qw(INT HUP TERM TSTP QUIT ALRM USR1 USR2)}) = ($h1) x 8; 161 eval { 162 $cursor = $db->db_cursor; # obtain read lock 163 defined $cursor or die "db_cursor error: $BerkeleyDB::Error"; 164 $now = Time::HiRes::time; local($1,$2); 165 my($now_utc_iso8601) = strftime("%Y%m%dT%H%M%S",gmtime(int($now))); 166 while ( ($stat=$cursor->c_get($key,$val,DB_NEXT)) == 0 ) { 167 if ($val !~ /^(\d+(?:\.\d*)?) (.*?) *\z/s) { 168 print STDERR "Bad db entry: $key, $val\n"; 169 } else { 170 $proc_timestamp{$key} = $1; my($task_id) = $2; 171 $proc_state{$key} = $1 if $task_id =~ s/^([^0-9])//; 172 $proc_task_id{$key} = $task_id; 173 if (!exists $proc_state_bars{$key}) { # new process appeared 174 $proc_last_timestamp{$key} = $proc_timestamp{$key}; 175 $proc_state_bars{$key} = ''; 176 } 177 } 178 } 179 $stat==DB_NOTFOUND or die "c_get: $BerkeleyDB::Error $!"; 180 $cursor->c_close==0 or die "c_close error: $BerkeleyDB::Error"; 181 $cursor = undef; 182 }; 183 my($eval_stat) = $@; 184 if (defined $db) { 185 $cursor->c_close if defined $cursor; # unlock, ignoring status 186 $cursor = undef; 187 } 188 } 189 if ($interrupt ne '') { kill($interrupt,$$) } # resignal, ignoring status 190 elsif ($eval_stat ne '') { chomp($eval_stat); die "BDB $eval_stat\n" } 191 for my $key (keys(%proc_state_bars)) { # remove old entries 192 if (!exists($proc_timestamp{$key})) { 193 delete $proc_timestamp{$key}; 194 delete $proc_task_id{$key}; 195 delete $proc_state_bars{$key}; 196 } 197 } 198 my(@to_be_removed,@killed); 199 for my $pid (sort {$a<=>$b} keys %proc_timestamp) { 200 $proc_state{$pid} = ' ' if $proc_state{$pid} eq ''; 201 my($idling) = $proc_task_id{$pid} eq '' && 202 $proc_state{$pid} =~ /^[. ]?\z/s; 203 my($age) = $now - $proc_timestamp{$pid}; 204 if ($idling) { $proc_state_bars{$pid} = '' } 205 else { 206 $proc_state_bars{$pid} = '' 207 if $proc_timestamp{$pid} ne $proc_last_timestamp{$pid}; 208 my($len) = int($age+0.5); 209 $len = 1 if $len < 1; 210 my($str) = $proc_state_bars{$pid}; 211 if ($len > length($str)) { # replicate last character to desired size 212 my($ch) = $str eq '' ? '=' : substr($str,-1,1); 213 $str .= $ch x ($len - length($str)); 214 } 215 substr($str,$len-1,1) = $proc_state{$pid}; 216 $proc_state_bars{$pid} = $str; 217 } 218 $proc_last_timestamp{$pid} = $proc_timestamp{$pid}; 219 my($ttl) = $idling ? $idlettl : $activettl; 220 my($n) = kill(0,$pid); # test if the process is still there 221 if ($n == 0 && $! != ESRCH) { 222 die "Can't check the process $pid: $!"; 223 } elsif ($n == 0) { # ESRCH means there is no such process 224 printf STDERR ("PID %s: %-11s went away %s\n", 225 $pid, $proc_task_id{$pid} || $proc_state{$pid}, 226 fmt_age($age, $proc_state_bars{$pid}, $idling) ); 227 push(@to_be_removed, $pid); 228 } elsif ($age <= $ttl) { # all ok 229 printf STDERR ("PID %s: %-11s %s\n", 230 $pid, $proc_task_id{$pid} || $proc_state{$pid}, 231 fmt_age($age, $proc_state_bars{$pid}, $idling) ); 232 } else { # send a SIGTERM 233 printf STDERR ("PID %s: %-11s terminated %s\n", 234 $pid, $proc_task_id{$pid} || $proc_state{$pid}, 235 fmt_age($age, $proc_state_bars{$pid}, $idling) ); 236 if (kill('TERM',$pid) || $! == ESRCH) { push(@killed,$pid) } 237 else { warn "Can't SIGTERM $pid: $!" } 238 } 239 } 240 if (@to_be_removed) { 241 local($SIG{'INT'}) = 'IGNORE'; 242 $cursor = $db->db_cursor(DB_WRITECURSOR); # obtain a write lock 243 defined $cursor or die "BDB db_cursor error: $BerkeleyDB::Error"; 244 for my $key (@to_be_removed) { 245 my($val); my($stat) = $cursor->c_get($key,$val,DB_SET); 246 $stat==0 || $stat==DB_NOTFOUND 247 or die "BDB c_get: $BerkeleyDB::Error, $!."; 248 if ($stat==0) { # remove existing entry 249 $cursor->c_del==0 or die "BDB c_del: $BerkeleyDB::Error, $!."; 250 } 251 } 252 $cursor->c_close==0 or die "BDB c_close error: $BerkeleyDB::Error"; 253 $cursor = undef; 254 } 255 my($delay) = 1; # seconds 256 while (@killed) { 257 Time::HiRes::sleep($delay); $delay = 2; 258 for my $pid (@killed) { 259 $waittime{$pid}++; 260 printf STDERR ("PID %s: sending SIGKILL in %d s\n", 261 $pid, 30-$waittime{$pid}); 262 if ($waittime{$pid} > 30) { # send a SIGKILL 263 kill('KILL',$pid) or $! == ESRCH or warn "Can't SIGKILL $pid: $!"; 264 $waittime{$pid} = 0; 265 } elsif (kill(0,$pid)) { # process is still there 266 } elsif ($! != ESRCH) { # problem? 267 warn "Can't check process $pid: $!"; 268 } else { # no longer around 269 printf STDERR ("PID %s: %-11s successfully killed\n", $pid); 270 delete($waittime{$pid}); 271 $pid = undef; 272 } 273 } 274 @killed = grep {defined} @killed; 275 printf STDERR ("Waiting for the process to terminate: %s\n", 276 join(', ',@killed)) if @killed; 277 } 278 $| = 1; 279 last if defined $repeatcount && $repeatcount <= 0; 280 Time::HiRes::sleep($wakeuptime) if $wakeuptime > 0; 281 } # forever 282 $normal_termination = 1; 283 284END { 285 if (defined $db) { 286 $cursor->c_close if defined $cursor; # ignoring status 287 $db->db_close==0 or die "BDB db_close error: $BerkeleyDB::Error $!"; 288 } 289 print STDERR "exited\n" if !$normal_termination; 290} 291