1#!/usr/local/bin/perl -T
2
3#------------------------------------------------------------------------------
4# This is amavisd-nanny, a program to show the status
5# and keep an eye on the health of child processes in amavisd-new.
6#
7# Author: Mark Martinec <Mark.Martinec@ijs.si>
8#
9# Copyright (c) 2004-2014, Mark Martinec
10# All rights reserved.
11#
12# Redistribution and use in source and binary forms, with or without
13# modification, are permitted provided that the following conditions
14# are met:
15# 1. Redistributions of source code must retain the above copyright notice,
16#    this list of conditions and the following disclaimer.
17# 2. Redistributions in binary form must reproduce the above copyright notice,
18#    this list of conditions and the following disclaimer in the documentation
19#    and/or other materials provided with the distribution.
20#
21# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
25# BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31# POSSIBILITY OF SUCH DAMAGE.
32#
33# The views and conclusions contained in the software and documentation are
34# those of the authors and should not be interpreted as representing official
35# policies, either expressed or implied, of the Jozef Stefan Institute.
36
37# (the above license is the 2-clause BSD license, also known as
38#  a "Simplified BSD License", and pertains to this program only)
39#
40# Patches and problem reports are welcome.
41# The latest version of this program is available at:
42#   http://www.ijs.si/software/amavisd/
43#------------------------------------------------------------------------------
44
45use strict;
46use re 'taint';
47use warnings;
48no warnings 'uninitialized';
49
50use Errno qw(ESRCH ENOENT);
51use POSIX qw(strftime);
52use Time::HiRes ();
53use BerkeleyDB;
54
55use vars qw($VERSION);  $VERSION = 1.401;
56
57my($idlettl) = 3*60*60; # idle children are sent a SIGTERM
58                        #   after this many seconds
59my($activettl) = 10*60; # stuck active children are sent a SIGTERM
60                        #   after this many seconds
61
62my($dbfile) = 'nanny.db';
63my($db_home) =  # DB databases directory
64  defined $ENV{'AMAVISD_DB_HOME'} ? $ENV{'AMAVISD_DB_HOME'} : '/var/amavis/db';
65my($wakeuptime) = 2;  # -w, sleep time in seconds, may be fractional
66my($repeatcount);     # -c, repeat count (when defined)
67
68sub fmt_age($$$) {
69  my($t,$state_bar,$idling) = @_;
70  $t = int($t);
71  my($char) = $idling ? '.' : '=';
72  my($bar_l) = $idling ? $t : length($state_bar);
73  my($bar) = substr( ($char x 9 . ':') x 3 . $char x 5, 0,$bar_l);
74  if (!$idling) {
75    $state_bar = substr($state_bar,0,length($bar)-2) . substr($state_bar,-1,1)
76                 . '>'  if length($state_bar) > length($bar);
77    for my $j (0 .. length($bar)-1) {
78      substr($bar,$j,1) = substr($state_bar,$j,1)
79        if substr($bar,$j,1) eq '=' && substr($state_bar,$j,1) ne ' ';
80    }
81  }
82  my($s) = $t % 60;  $t = int($t/60);
83  my($m) = $t % 60;  $t = int($t/60);
84  my($h) = $t % 24;  $t = int($t/24);
85  my($d) = $t;
86  my($str) = sprintf("%d:%02d:%02d", $h,$m,$s);
87  $str = (!$d ? "  " : sprintf("%dd",$d)) . $str;
88  $str . ' ' . $bar;
89};
90
91# main program starts here
92  my($normal_termination) = 0;
93  $SIG{INT} = sub { die "\n" };  # do the END code block when interrupted
94  while (@ARGV) {
95    my($opt) = shift @ARGV;
96    my($val) = shift @ARGV;
97    if ($opt eq '-w' && $val =~ /^\+?\d+(?:\.\d*)?\z/) { $wakeuptime = $val }
98    elsif ($opt eq '-c' && $val =~ /^[+-]?\d+\z/) { $repeatcount = $val }
99    else {
100      print <<'EOD';
101States legend:
102  A  accepted a connection
103  b  begin with a protocol for accepting a request
104  m  'MAIL FROM' smtp command started a new transaction in the same session
105  d  transferring data from MTA to amavisd
106  =  content checking just started
107  G  generating and verifying unique mail_id
108  D  decoding of mail parts
109  V  virus scanning
110  S  spam scanning
111  P  pen pals database lookup and updates
112  r  preparing results
113  Q  quarantining and preparing/sending notifications
114  F  forwarding mail to MTA
115  .  content checking just finished
116  sp space indicates idle (elapsed bar is showing dots)
117
118EOD
119      die "Usage: $0 [-c <count>] [-w <wait-interval>]\n";
120    }
121  }
122  print <<'EOD';
123process-id task-id     elapsed in    elapsed-bar (dots indicate idle)
124           or state   idle or busy
125EOD
126
127  my(%waittime); # associative array on pid
128  my($env,$db,$old_db_inode,@dbstat,$cursor);
129  my(%proc_last_timestamp, %proc_state_bars);
130  for (;;) {
131    last  if defined $repeatcount && $repeatcount <= 0;
132    @dbstat = stat("$db_home/$dbfile");
133    my($errn) = @dbstat ? 0 : 0+$!;
134    $errn==0 || $errn==ENOENT  or die "stat $db_home/$dbfile: $!";
135    if (defined $db && $old_db_inode != $dbstat[1]) {
136      $db->db_close==0 or die "BDB db_close error: $BerkeleyDB::Error $!";
137      undef $db;
138      printf STDERR ("Reopening nanny database %s/%s\n", $db_home,$dbfile);
139    }
140    if (!defined $db && $errn==0) {
141      $old_db_inode = $dbstat[1];
142      $env = BerkeleyDB::Env->new(
143        -Home => $db_home, -Flags => DB_INIT_CDB | DB_INIT_MPOOL,
144        -ErrFile => \*STDOUT, -Verbose => 1);
145      defined $env or die "BDB no env: $BerkeleyDB::Error $!";
146      $db = BerkeleyDB::Hash->new(-Filename => $dbfile, -Env => $env);
147      defined $db or die "BDB no dbN 1: $BerkeleyDB::Error $!";
148    }
149    $| = 0;
150    my(%proc_timestamp, %proc_state, %proc_task_id);
151    my($stat,$key,$val); my($now);
152    my($eval_stat,$interrupt); $interrupt = '';
153    if (!defined $db) {
154      printf STDERR ("No nanny database %s/%s; waiting...\n",
155                     $db_home,$dbfile);
156    } else {
157      $repeatcount--  if defined $repeatcount && $repeatcount > 0;
158      print "\n";
159      my($h1) = sub { $interrupt = $_[0] };
160      local(@SIG{qw(INT HUP TERM TSTP QUIT ALRM USR1 USR2)}) = ($h1) x 8;
161      eval {
162        $cursor = $db->db_cursor;  # obtain read lock
163        defined $cursor or die "db_cursor error: $BerkeleyDB::Error";
164        $now = Time::HiRes::time; local($1,$2);
165        my($now_utc_iso8601) = strftime("%Y%m%dT%H%M%S",gmtime(int($now)));
166        while ( ($stat=$cursor->c_get($key,$val,DB_NEXT)) == 0 ) {
167          if ($val !~ /^(\d+(?:\.\d*)?) (.*?) *\z/s) {
168            print STDERR "Bad db entry: $key, $val\n";
169          } else {
170            $proc_timestamp{$key} = $1; my($task_id) = $2;
171            $proc_state{$key} = $1  if $task_id =~ s/^([^0-9])//;
172            $proc_task_id{$key} = $task_id;
173            if (!exists $proc_state_bars{$key}) {  # new process appeared
174              $proc_last_timestamp{$key} = $proc_timestamp{$key};
175              $proc_state_bars{$key} = '';
176            }
177          }
178        }
179        $stat==DB_NOTFOUND  or die "c_get: $BerkeleyDB::Error $!";
180        $cursor->c_close==0 or die "c_close error: $BerkeleyDB::Error";
181        $cursor = undef;
182      };
183      my($eval_stat) = $@;
184      if (defined $db) {
185        $cursor->c_close  if defined $cursor;  # unlock, ignoring status
186        $cursor = undef;
187      }
188    }
189    if ($interrupt ne '') { kill($interrupt,$$) }  # resignal, ignoring status
190    elsif ($eval_stat ne '') { chomp($eval_stat); die "BDB $eval_stat\n" }
191    for my $key (keys(%proc_state_bars)) {  # remove old entries
192      if (!exists($proc_timestamp{$key})) {
193        delete $proc_timestamp{$key};
194        delete $proc_task_id{$key};
195        delete $proc_state_bars{$key};
196      }
197    }
198    my(@to_be_removed,@killed);
199    for my $pid (sort {$a<=>$b} keys %proc_timestamp) {
200      $proc_state{$pid} = ' '  if $proc_state{$pid} eq '';
201      my($idling) = $proc_task_id{$pid} eq '' &&
202                    $proc_state{$pid} =~ /^[. ]?\z/s;
203      my($age) = $now - $proc_timestamp{$pid};
204      if ($idling) { $proc_state_bars{$pid} = '' }
205      else {
206        $proc_state_bars{$pid} = ''
207          if $proc_timestamp{$pid} ne $proc_last_timestamp{$pid};
208        my($len) = int($age+0.5);
209        $len = 1  if $len < 1;
210        my($str) = $proc_state_bars{$pid};
211        if ($len > length($str)) {  # replicate last character to desired size
212          my($ch) = $str eq '' ? '=' : substr($str,-1,1);
213          $str .= $ch x ($len - length($str));
214        }
215        substr($str,$len-1,1) = $proc_state{$pid};
216        $proc_state_bars{$pid} = $str;
217      }
218      $proc_last_timestamp{$pid} = $proc_timestamp{$pid};
219      my($ttl) = $idling ? $idlettl : $activettl;
220      my($n) = kill(0,$pid);  # test if the process is still there
221      if ($n == 0 && $! != ESRCH) {
222        die "Can't check the process $pid: $!";
223      } elsif ($n == 0) {  # ESRCH means there is no such process
224        printf STDERR ("PID %s: %-11s went away %s\n",
225                       $pid, $proc_task_id{$pid} || $proc_state{$pid},
226                       fmt_age($age, $proc_state_bars{$pid}, $idling) );
227        push(@to_be_removed, $pid);
228      } elsif ($age <= $ttl) {     # all ok
229        printf STDERR ("PID %s: %-11s %s\n",
230                       $pid, $proc_task_id{$pid} || $proc_state{$pid},
231                       fmt_age($age, $proc_state_bars{$pid}, $idling) );
232      } else {                                            # send a SIGTERM
233        printf STDERR ("PID %s: %-11s terminated %s\n",
234                       $pid, $proc_task_id{$pid} || $proc_state{$pid},
235                       fmt_age($age, $proc_state_bars{$pid}, $idling) );
236        if (kill('TERM',$pid) || $! == ESRCH) { push(@killed,$pid) }
237        else { warn "Can't SIGTERM $pid: $!" }
238      }
239    }
240    if (@to_be_removed) {
241      local($SIG{'INT'}) = 'IGNORE';
242      $cursor = $db->db_cursor(DB_WRITECURSOR);  # obtain a write lock
243      defined $cursor or die "BDB db_cursor error: $BerkeleyDB::Error";
244      for my $key (@to_be_removed) {
245        my($val); my($stat) = $cursor->c_get($key,$val,DB_SET);
246        $stat==0 || $stat==DB_NOTFOUND
247          or die "BDB c_get: $BerkeleyDB::Error, $!.";
248        if ($stat==0) {  # remove existing entry
249          $cursor->c_del==0 or die "BDB c_del: $BerkeleyDB::Error, $!.";
250        }
251      }
252      $cursor->c_close==0 or die "BDB c_close error: $BerkeleyDB::Error";
253      $cursor = undef;
254    }
255    my($delay) = 1;  # seconds
256    while (@killed) {
257      Time::HiRes::sleep($delay); $delay = 2;
258      for my $pid (@killed) {
259        $waittime{$pid}++;
260        printf STDERR ("PID %s: sending SIGKILL in %d s\n",
261                       $pid, 30-$waittime{$pid});
262        if ($waittime{$pid} > 30) {  # send a SIGKILL
263          kill('KILL',$pid) or $! == ESRCH or warn "Can't SIGKILL $pid: $!";
264          $waittime{$pid} = 0;
265        } elsif (kill(0,$pid)) {  # process is still there
266        } elsif ($! != ESRCH) {   # problem?
267          warn "Can't check process $pid: $!";
268        } else {                  # no longer around
269          printf STDERR ("PID %s: %-11s successfully killed\n", $pid);
270          delete($waittime{$pid});
271          $pid = undef;
272        }
273      }
274      @killed = grep {defined} @killed;
275      printf STDERR ("Waiting for the process to terminate: %s\n",
276                     join(', ',@killed))  if @killed;
277    }
278    $| = 1;
279    last  if defined $repeatcount && $repeatcount <= 0;
280    Time::HiRes::sleep($wakeuptime)  if $wakeuptime > 0;
281  } # forever
282  $normal_termination = 1;
283
284END {
285  if (defined $db) {
286    $cursor->c_close  if defined $cursor;  # ignoring status
287    $db->db_close==0 or die "BDB db_close error: $BerkeleyDB::Error $!";
288  }
289  print STDERR "exited\n"  if !$normal_termination;
290}
291