1#!/usr/bin/perl
2#
3# fixproc [-min n] [-max n] [-check | -kill | -restart | -exist | -fix] proc ...
4#
5# fixproc exit code:
6# 	0	ok
7# 	1	check failed
8# 	2	cannot restart
9# 	3	cannot kill
10# 	4	fix failed	if fix is defined as kill or restart, then
11# 				cannot kill or cannot restart is return instead
12# 	10	fixproc error
13#
14#
15# Fixes a process named "proc" by performing the specified action.  The
16# actions can be check, kill, restart, exist, or fix.  The action is specified
17# on the command line or is read from a default database, which describes
18# the default action to take for each process.  The database format and
19# the meaning of each action are described below.
20#
21# database format
22# ---------------
23#
24# name	foo			required
25# cmd	/a/b/name args		required
26# min	number			optional, defaults to 1
27# max	number			optional, defaults to 1
28#
29# check	{null, exist, shell}	optional, defaults to exist if not defined
30# [shell command		shell commands needed only if check=shell
31#  ...
32#  shell command
33#  end_shell]			keyword end_shell marks end of shell commands
34# fix	{kill, restart, shell}	required
35# [shell command			shell commands needed only if fix=shell
36#  ...
37#  shell command
38#  end_shell]			keyword end_shell marks end of shell commands
39#
40# Blank lines and lines beginning with "#" are ignored.
41#
42#
43# Example:
44#
45# name	test1
46# cmd	nice /home/kong/z/test1 > /dev/null &
47# max	2
48# fix	shell
49# 	xterm&
50# 	nice /home/kong/z/test1 > /dev/null &
51# 	end_shell
52#
53#
54# actions
55# -------
56# There are 5 possible actions:  kill, restart, fix, exist, check.  Fix is
57# defined to be the kill action, the restart action, or a series of shell
58# commands.  Check is optionally defined in the database.  If check is not
59# defined, it defaults to exist.
60#
61# If the action is specified on the cmd line, it is executed regardless of
62# check.  The commands executed for each action type is as follow:
63#
64#   switch action:
65# 	kill:
66# 	  kill process, wait 5 seconds, kill -9 if still exist
67# 	  if still exist
68# 	    return "cannot kill"
69# 	  else
70# 	    return "ok"
71#
72# 	restart:
73# 	  execute kill
74# 	  if kill returned "cannot kill"
75# 	    return "cannot kill"
76# 	  restart by issuing cmd to shell
77# 	  if check defined
78# 	    execute check
79# 	    if check succeeds
80# 	      return "ok"
81# 	    else
82# 	      return "cannot restart"
83#
84# 	fix:
85# 	  if fix=kill
86# 	    execute kill
87# 	  else if fix=restart
88# 	    execute restart
89# 	  else
90# 	    execute shell commands
91# 	    execute check
92#
93# 	check:
94# 	  if check defined as null
95# 	    return "fixproc error"
96# 	  else
97# 	    execute check
98# 	    if check succeeds
99# 	      return (execute exist)
100# 	    return "check failed"
101#
102# 	exist:
103# 	  if proc exists in ps && (min <= num. of processes <= max)
104# 	    return "ok"
105# 	  else
106# 	    return "check failed"
107#
108#
109# If the action is not specified on the cmd line, the default action is the
110# fix action defined in the database.  Fix is only executed if check fails:
111#
112# 	if fix defined
113# 	  if check is not defined as null
114# 	    execute check
115# 	    if check succeeds
116# 	      return "ok"
117# 	  execute action defined for fix
118# 	else
119# 	  return "fixproc error"
120#
121#
122# If proc is not specified on the command line, return "fixproc error."
123# Multiple proc's can be defined on the cmd line.   When an error occurs
124# when multiple proc's are specified, the first error encountered halts the
125# script.
126#
127# For check shell scripts, any non-zero exit code means the check has failed.
128#
129#
130# Timothy Kong		3/1995
131
132use File::Temp qw(tempfile);
133
134$database_file = '/local/etc/fixproc.conf';
135
136$debug = 0;			# specify debug level using -dN
137				# currently defined: -d1
138
139$no_error = 0;
140$check_failed_error = 1;
141$cannot_restart_error = 2;
142$cannot_kill_error = 3;
143$cannot_fix_error = 4;
144$fixproc_error = 10;
145
146$min = 1;
147$max = 1;
148$cmd_line_action = '';
149%min = ();
150%max = ();
151%cmd = ();
152%check = ();
153%fix = ();
154$shell_lines = ();
155@proc_list = ();
156
157$shell_header = "#!/bin/sh\n";
158$shell_end_marker = 'shell_end_marker';
159
160&read_args();
161&read_database();
162# &dump_database();		# debug only
163
164# change the default min. and max. number of processes allowed
165if ($min != 1)
166  {
167    for $name ( keys (%min) )
168      {
169	$min{$name} = $min;
170      }
171  }
172if ($max != 1)
173  {
174    for $name ( keys (%max) )
175      {
176	$max{$name} = $max;
177      }
178  }
179
180# work on one process at a time
181for $proc ( @proc_list )
182  {
183    $error_code = &work_on_proc ($proc);
184
185############# uncomment next line when fully working ############
186#    exit $error_code if ($error_code);
187
188    die "error_code = $error_code\n" if ($error_code);
189  }
190
191
192# create an executable shell script file
193sub create_sh_script
194{
195  local ($file) = pop (@_);
196  local ($fh) = pop (@_);
197  local ($i) = pop (@_);
198
199  printf (STDERR "create_sh_script\n") if ($debug > 0);
200
201  $! = $fixproc_error;
202  while ( $shell_lines[$i] ne $shell_end_marker )
203    {
204      printf ($fh "%s", $shell_lines[$i]);
205      $i++;
206    }
207  close ($fh);
208  chmod 0755, $file;
209}
210
211
212sub do_fix
213{
214  local ($proc) = pop(@_);
215
216  printf (STDERR "do_fix\n") if ($debug > 0);
217
218  if ($fix{$proc} eq '')
219    {
220      $! = $fixproc_error;
221      die "$0: internal error 4\n";
222    }
223  if ($fix{$proc} eq 'kill')
224    {
225      return &do_kill ($proc);
226    }
227  elsif ($fix{$proc} eq 'restart')
228    {
229      return &do_restart ($proc);
230    }
231  else
232    {
233      # it must be "shell", so execute the shell script defined in database
234      local ($tmpfh, $tmpfile) = tempfile("fix_XXXXXXXX", DIR => "/tmp");
235
236      &create_sh_script ($fix{$proc}, $tmpfh, $tmpfile);
237
238      	# return code is number divided by 256
239      $error_code = (system "$tmpfile") / 256;
240      unlink($tmpfile);
241      return ($fix_failed_error) if ($error_code != 0);
242        # sleep needed here?
243      return &do_exist ($proc);
244    }
245}
246
247
248sub do_check
249{
250  local ($proc) = pop(@_);
251
252  printf (STDERR "do_check\n") if ($debug > 0);
253
254  if ($check{$proc} eq '')
255    {
256      $! = $fixproc_error;
257      die "$0: internal error 2\n";
258    }
259
260  if ($check{$proc} ne 'exist')
261    {
262      # if not "exist", then it must be "shell", so execute the shell script
263      # defined in database
264
265      local ($tmpfh, $tmpfile) = tempfile("check_XXXXXXXX", DIR => "/tmp");
266
267      &create_sh_script ($fix{$proc}, $tmpfh, $tmpfile);
268
269      	# return code is number divided by 256
270      $error_code = (system "$tmpfile") / 256;
271      unlink($tmpfile);
272      return ($check_failed_error) if ($error_code != 0);
273
274      # check passed, continue
275    }
276  return &do_exist ($proc);
277}
278
279
280sub do_exist
281{
282  local ($proc) = pop(@_);
283
284  printf (STDERR "do_exist\n") if ($debug > 0);
285
286  # do ps, check to see if min <= no. of processes <= max
287  $! = $fixproc_error;
288  open (COMMAND, "/bin/ps -e | /bin/grep $proc | /bin/wc -l |")
289    || die "$0: can't run ps-grep-wc command\n";
290  $proc_count = <COMMAND>;
291  if (($proc_count < $min{$proc}) || ($proc_count > $max{$proc}))
292    {
293      return $check_failed_error;
294    }
295  return $no_error;
296}
297
298
299sub do_kill
300{
301  local ($proc) = pop(@_);
302  local ($second_kill_needed);
303
304  printf (STDERR "do_kill\n") if ($debug > 0);
305
306  # first try kill
307  $! = $fixproc_error;
308  open (COMMAND, "/bin/ps -e | /bin/grep $proc |")
309    || die "$0: can't run ps-grep-awk command\n";
310  while (<COMMAND>)
311    {
312      # match the first field of ps -e
313      $! = $fixproc_error;
314      /^\s*(\d+)\s/ || die "$0: can't match ps -e output\n";
315      system "kill $1";
316    }
317
318  # if process still exist, try kill -9
319  sleep 2;
320  $! = $fixproc_error;
321  open (COMMAND, "/bin/ps -e | /bin/grep $proc |")
322    || die "$0: can't run ps-grep-awk command\n";
323  $second_kill_needed = 0;
324  while (<COMMAND>)
325    {
326      # match the first field of ps -e
327      $! = $fixproc_error;
328      /^\s*(\d+)\s/ || die "$0: can't match ps -e output\n";
329      system "kill -9 $1";
330      $second_kill_needed = 1;
331    }
332  return ($no_error) if ($second_kill_needed == 0);
333
334  # see if kill -9 worked
335  sleep 2;
336  $! = $fixproc_error;
337  open (COMMAND, "/bin/ps -e | /bin/grep $proc |")
338    || die "$0: can't run ps-grep-awk command\n";
339  while (<COMMAND>)
340    {				# a process still exist, return error
341      return $cannot_kill_error;
342    }
343  return $no_error;		# good, all dead
344}
345
346
347sub do_restart
348{
349  local ($proc) = pop(@_);
350  local ($error_code);
351
352  printf (STDERR "do_restart\n") if ($debug > 0);
353
354  $error_code = &do_kill ($proc);
355  return $error_code if ($error_code != $no_error);
356  die "$0: internal error 3\n" if ($cmd{$proc} eq '');
357  system "$cmd{$proc}";
358  # sleep needed here?
359  if ($check{$proc} ne 'null')
360    {
361      return $no_error if (&do_check($proc) == $no_error);
362      return $cannot_restart_error;
363    }
364}
365
366
367sub work_on_proc
368{
369  local ($proc) = pop(@_);
370  local ($error_code);
371
372  printf (STDERR "work_on_proc\n") if ($debug > 0);
373
374  if ($cmd_line_action eq '')
375    {
376      # perform action from database
377
378      if ($check{$proc} ne 'null')
379	{
380	  $error_code = &do_check ($proc);
381	  if ($error_code != $check_failed_error)
382	    {
383	      return $error_code;
384	    }
385	}
386      return &do_fix ($proc);
387    }
388  else
389    {
390      # perform action from command line
391
392      $error_code = $no_error;
393      if ($cmd_line_action eq 'kill')
394	{
395	  $error_code = &do_kill ($proc);
396	}
397      elsif ($cmd_line_action eq 'restart')
398	{
399	  $error_code = &do_restart ($proc);
400	}
401      elsif ($cmd_line_action eq 'fix')
402	{
403	  $error_code = &do_fix ($proc);
404	}
405      elsif ($cmd_line_action eq 'check')
406	{
407	  if ( $check{$proc} eq 'null' )
408	    {
409	      exit $fixproc_error;
410	    }
411	  $error_code = &do_check ($proc);
412	}
413      elsif ($cmd_line_action eq 'exist')
414	{
415	  $error_code = &do_exist ($proc);
416	}
417      else
418	{
419	  $! = $fixproc_error;
420	  die "$0: internal error 1\n";
421	}
422    }
423}
424
425
426sub dump_database
427{
428  local ($name);
429
430  for $name (keys(%cmd))
431    {
432      printf ("name\t%s\n", $name);
433      printf ("cmd\t%s\n", $cmd{$name});
434      printf ("min\t%s\n", $min{$name});
435      printf ("max\t%s\n", $max{$name});
436      if ( $check{$name} =~ /[0-9]+/ )
437	{
438	  printf ("check\tshell\n");
439	  $i = $check{$name};
440	  while ( $shell_lines[$i] ne $shell_end_marker )
441	    {
442	      printf ("%s", $shell_lines[$i]);
443	      $i++;
444	    }
445	}
446      else
447	{
448	  printf ("check\t%s\n", $check{$name});
449	}
450      if ( $fix{$name} =~ /[0-9]+/ )
451	{
452	  printf ("fix\tshell\n");
453	  $i = $fix{$name};
454	  while ( $shell_lines[$i] ne $shell_end_marker )
455	    {
456	      printf ("%s", $shell_lines[$i]);
457	      $i++;
458	    }
459	}
460      else
461	{
462	  printf ("fix\t%s\n", $fix{$name});
463	}
464      printf ("\n");
465    }
466}
467
468
469sub read_database
470{
471  local ($in_check_shell_lines) = 0;
472  local ($in_fix_shell_lines) = 0;
473  local ($name) = '';
474  local ($str1);
475  local ($str2);
476
477  $! = $fixproc_error;
478  open (DB, $database_file) || die 'cannot open database file $database_file\n';
479  while (<DB>)
480    {
481      if ((! /\S/) || (/^[ \t]*#.*$/))
482	{
483		# ignore blank lines or lines beginning with "#"
484	}
485      elsif ($in_check_shell_lines)
486	{
487	  if ( /^\s*end_shell\s*$/ )
488	    {
489	      $in_check_shell_lines = 0;
490	      push (@shell_lines, $shell_end_marker);
491	    }
492	  else
493	    {
494	      push (@shell_lines, $_);
495	    }
496	}
497      elsif ($in_fix_shell_lines)
498	{
499	  if ( /^\s*end_shell\s*$/ )
500	    {
501	      $in_fix_shell_lines = 0;
502	      push (@shell_lines, $shell_end_marker);
503	    }
504	  else
505	    {
506	      push (@shell_lines, $_);
507	    }
508	}
509      else
510	{
511	  if ( ! /^\s*(\S+)\s+(\S.*)\s*$/ )
512	    {
513	      $! = $fixproc_error;
514	      die "$0: syntax error in database\n$_";
515	    }
516	  $str1 = $1;
517	  $str2 = $2;
518	  if ($str1 eq 'name')
519	    {
520	      &finish_db_entry($name);
521	      $name = $str2;
522	    }
523	  elsif ($str1 eq 'cmd')
524	    {
525	      $! = $fixproc_error;
526	      die "$0: cmd specified before name in database\n$_\n"
527	        if ($name eq '');
528	      die "$0: cmd specified multiple times for $name in database\n"
529		if ($cmd{$name} ne '');
530	      $cmd{$name} = $str2;
531	    }
532	  elsif ($str1 eq 'min')
533	    {
534	      $! = $fixproc_error;
535	      die "$0: min specified before name in database\n$_\n"
536	        if ($name eq '');
537	      die "$0: min specified multiple times in database\n$_\n"
538		if ($min{$name} ne '');
539	      die "$0: non-numeric min value in database\n$_\n"
540		if ( ! ($str2 =~ /[0-9]+/ ));
541	      $min{$name} = $str2;
542	    }
543	  elsif ($str1 eq 'max')
544	    {
545	      $! = $fixproc_error;
546	      die "$0: max specified before name in database\n$_\n"
547	        if ($name eq '');
548	      die "$0: max specified multiple times in database\n$_\n"
549		if ($max{$name} ne '');
550	      die "$0: non-numeric max value in database\n$_\n"
551		if ( ! ($str2 =~ /[0-9]+/ ));
552	      $max{$name} = $str2;
553	    }
554	  elsif ($str1 eq 'check')
555	    {
556	      $! = $fixproc_error;
557	      die "$0: check specified before name in database\n$_\n"
558	        if ($name eq '');
559	      die "$0: check specified multiple times in database\n$_\n"
560		if ($check{$name} ne '');
561	      if ( $str2 eq 'shell' )
562		{
563		  # if $check{$name} is a number, it is a pointer into
564		  # $shell_lines[] where the shell commands are kept
565		  $shell_lines[$#shell_lines+1] = $shell_header;
566		  $check{$name} = $#shell_lines;
567		  $in_check_shell_lines = 1;
568		}
569	      else
570		{
571		  $check{$name} = $str2;
572		}
573	    }
574	  elsif ($str1 eq 'fix')
575	    {
576	      $! = $fixproc_error;
577	      die "$0: fix specified before name in database\n$_\n"
578	        if ($name eq '');
579	      die "$0: fix specified multiple times in database\n$_\n"
580		if ($fix{$name} ne '');
581	      if ( $str2 eq 'shell' )
582		{
583		  # if $fix{$name} is a number, it is a pointer into
584		  # $shell_lines[] where the shell commands are kept
585		  $shell_lines[$#shell_lines+1] = $shell_header;
586		  $fix{$name} = $#shell_lines;
587		  $in_fix_shell_lines = 1;
588		}
589	      else
590		{
591		  $fix{$name} = $str2;
592		}
593	    }
594	}
595    }
596  &finish_db_entry($name);
597}
598
599
600sub finish_db_entry
601{
602  local ($name) = pop(@_);
603
604  if ($name ne '')
605    {
606      $! = $fixproc_error;
607      die "$0: fix not defined for $name in database\n"
608	if ($fix{$name} eq '');
609      die "$0: cmd not defined for $name in database\n"
610	if ($cmd{$name} eq '');
611      $check{$name} = 'exist' if ($check{$name} eq '');
612      $max{$name} = 1 if ($max{$name} eq '');
613      $min{$name} = 1 if ($min{$name} eq '');
614    }
615}
616
617
618sub read_args
619{
620  local ($i) = 0;
621  local ($arg);
622  local ($action_arg_count) = 0;
623
624  while ( $i <= $#ARGV )
625    {
626      $arg = $ARGV[$i];
627      if (($arg eq '-min') || ($arg eq '-max'))
628	{
629	  if (($i == $#ARGV - 1) || ($ARGV[$i+1] =~ /\D/))  # \D is non-numeric
630	    {
631	      $! = $fixproc_error;
632	      die "$0: numeric arg missing after -min or -max\n";
633	    }
634	  if ($arg eq '-min')
635	    {
636	      $min = $ARGV[$i+1];
637	    }
638	  else
639	    {
640	      $max = $ARGV[$i+1];
641	    }
642	  $i += 2;
643	}
644      elsif ($arg eq '-kill')
645	{
646	  $cmd_line_action = 'kill';
647	  $action_arg_count++;
648	  $i++;
649	}
650      elsif ($arg eq '-check')
651	{
652	  $cmd_line_action = 'check';
653	  $action_arg_count++;
654	  $i++;
655	}
656      elsif ($arg eq '-restart')
657	{
658	  $cmd_line_action = 'restart';
659	  $action_arg_count++;
660	  $i++;
661	}
662      elsif ($arg eq '-exist')
663	{
664	  $cmd_line_action = 'exist';
665	  $action_arg_count++;
666	  $i++;
667	}
668      elsif ($arg eq '-fix')
669	{
670	  $cmd_line_action = 'fix';
671	  $action_arg_count++;
672	  $i++;
673	}
674      elsif ($arg =~ /-d(\d)$/)
675	{
676	  $debug = $1;
677	  $i++;
678	}
679      elsif ($arg =~ /^-/)
680	{
681	  $! = $fixproc_error;
682	  die "$0: unknown switch $arg\n";
683	}
684      else
685	{
686	  push (@proc_list, $arg);
687	  $i++;
688	}
689    }
690    $! = $fixproc_error;
691    die "$0: no process specified\n" if ($#proc_list == -1);
692    die "$0: more than one action specified\n" if ($action_arg_count > 1);
693  }
694
695