1#!/usr/bin/perl 2# 3# fixproc [-min n] [-max n] [-check | -kill | -restart | -exist | -fix] proc ... 4# 5# fixproc exit code: 6# 0 ok 7# 1 check failed 8# 2 cannot restart 9# 3 cannot kill 10# 4 fix failed if fix is defined as kill or restart, then 11# cannot kill or cannot restart is return instead 12# 10 fixproc error 13# 14# 15# Fixes a process named "proc" by performing the specified action. The 16# actions can be check, kill, restart, exist, or fix. The action is specified 17# on the command line or is read from a default database, which describes 18# the default action to take for each process. The database format and 19# the meaning of each action are described below. 20# 21# database format 22# --------------- 23# 24# name foo required 25# cmd /a/b/name args required 26# min number optional, defaults to 1 27# max number optional, defaults to 1 28# 29# check {null, exist, shell} optional, defaults to exist if not defined 30# [shell command shell commands needed only if check=shell 31# ... 32# shell command 33# end_shell] keyword end_shell marks end of shell commands 34# fix {kill, restart, shell} required 35# [shell command shell commands needed only if fix=shell 36# ... 37# shell command 38# end_shell] keyword end_shell marks end of shell commands 39# 40# Blank lines and lines beginning with "#" are ignored. 41# 42# 43# Example: 44# 45# name test1 46# cmd nice /home/kong/z/test1 > /dev/null & 47# max 2 48# fix shell 49# xterm& 50# nice /home/kong/z/test1 > /dev/null & 51# end_shell 52# 53# 54# actions 55# ------- 56# There are 5 possible actions: kill, restart, fix, exist, check. Fix is 57# defined to be the kill action, the restart action, or a series of shell 58# commands. Check is optionally defined in the database. If check is not 59# defined, it defaults to exist. 60# 61# If the action is specified on the cmd line, it is executed regardless of 62# check. The commands executed for each action type is as follow: 63# 64# switch action: 65# kill: 66# kill process, wait 5 seconds, kill -9 if still exist 67# if still exist 68# return "cannot kill" 69# else 70# return "ok" 71# 72# restart: 73# execute kill 74# if kill returned "cannot kill" 75# return "cannot kill" 76# restart by issuing cmd to shell 77# if check defined 78# execute check 79# if check succeeds 80# return "ok" 81# else 82# return "cannot restart" 83# 84# fix: 85# if fix=kill 86# execute kill 87# else if fix=restart 88# execute restart 89# else 90# execute shell commands 91# execute check 92# 93# check: 94# if check defined as null 95# return "fixproc error" 96# else 97# execute check 98# if check succeeds 99# return (execute exist) 100# return "check failed" 101# 102# exist: 103# if proc exists in ps && (min <= num. of processes <= max) 104# return "ok" 105# else 106# return "check failed" 107# 108# 109# If the action is not specified on the cmd line, the default action is the 110# fix action defined in the database. Fix is only executed if check fails: 111# 112# if fix defined 113# if check is not defined as null 114# execute check 115# if check succeeds 116# return "ok" 117# execute action defined for fix 118# else 119# return "fixproc error" 120# 121# 122# If proc is not specified on the command line, return "fixproc error." 123# Multiple proc's can be defined on the cmd line. When an error occurs 124# when multiple proc's are specified, the first error encountered halts the 125# script. 126# 127# For check shell scripts, any non-zero exit code means the check has failed. 128# 129# 130# Timothy Kong 3/1995 131 132use File::Temp qw(tempfile); 133 134$database_file = '/local/etc/fixproc.conf'; 135 136$debug = 0; # specify debug level using -dN 137 # currently defined: -d1 138 139$no_error = 0; 140$check_failed_error = 1; 141$cannot_restart_error = 2; 142$cannot_kill_error = 3; 143$cannot_fix_error = 4; 144$fixproc_error = 10; 145 146$min = 1; 147$max = 1; 148$cmd_line_action = ''; 149%min = (); 150%max = (); 151%cmd = (); 152%check = (); 153%fix = (); 154$shell_lines = (); 155@proc_list = (); 156 157$shell_header = "#!/bin/sh\n"; 158$shell_end_marker = 'shell_end_marker'; 159 160&read_args(); 161&read_database(); 162# &dump_database(); # debug only 163 164# change the default min. and max. number of processes allowed 165if ($min != 1) 166 { 167 for $name ( keys (%min) ) 168 { 169 $min{$name} = $min; 170 } 171 } 172if ($max != 1) 173 { 174 for $name ( keys (%max) ) 175 { 176 $max{$name} = $max; 177 } 178 } 179 180# work on one process at a time 181for $proc ( @proc_list ) 182 { 183 $error_code = &work_on_proc ($proc); 184 185############# uncomment next line when fully working ############ 186# exit $error_code if ($error_code); 187 188 die "error_code = $error_code\n" if ($error_code); 189 } 190 191 192# create an executable shell script file 193sub create_sh_script 194{ 195 local ($file) = pop (@_); 196 local ($fh) = pop (@_); 197 local ($i) = pop (@_); 198 199 printf (STDERR "create_sh_script\n") if ($debug > 0); 200 201 $! = $fixproc_error; 202 while ( $shell_lines[$i] ne $shell_end_marker ) 203 { 204 printf ($fh "%s", $shell_lines[$i]); 205 $i++; 206 } 207 close ($fh); 208 chmod 0755, $file; 209} 210 211 212sub do_fix 213{ 214 local ($proc) = pop(@_); 215 216 printf (STDERR "do_fix\n") if ($debug > 0); 217 218 if ($fix{$proc} eq '') 219 { 220 $! = $fixproc_error; 221 die "$0: internal error 4\n"; 222 } 223 if ($fix{$proc} eq 'kill') 224 { 225 return &do_kill ($proc); 226 } 227 elsif ($fix{$proc} eq 'restart') 228 { 229 return &do_restart ($proc); 230 } 231 else 232 { 233 # it must be "shell", so execute the shell script defined in database 234 local ($tmpfh, $tmpfile) = tempfile("fix_XXXXXXXX", DIR => "/tmp"); 235 236 &create_sh_script ($fix{$proc}, $tmpfh, $tmpfile); 237 238 # return code is number divided by 256 239 $error_code = (system "$tmpfile") / 256; 240 unlink($tmpfile); 241 return ($fix_failed_error) if ($error_code != 0); 242 # sleep needed here? 243 return &do_exist ($proc); 244 } 245} 246 247 248sub do_check 249{ 250 local ($proc) = pop(@_); 251 252 printf (STDERR "do_check\n") if ($debug > 0); 253 254 if ($check{$proc} eq '') 255 { 256 $! = $fixproc_error; 257 die "$0: internal error 2\n"; 258 } 259 260 if ($check{$proc} ne 'exist') 261 { 262 # if not "exist", then it must be "shell", so execute the shell script 263 # defined in database 264 265 local ($tmpfh, $tmpfile) = tempfile("check_XXXXXXXX", DIR => "/tmp"); 266 267 &create_sh_script ($fix{$proc}, $tmpfh, $tmpfile); 268 269 # return code is number divided by 256 270 $error_code = (system "$tmpfile") / 256; 271 unlink($tmpfile); 272 return ($check_failed_error) if ($error_code != 0); 273 274 # check passed, continue 275 } 276 return &do_exist ($proc); 277} 278 279 280sub do_exist 281{ 282 local ($proc) = pop(@_); 283 284 printf (STDERR "do_exist\n") if ($debug > 0); 285 286 # do ps, check to see if min <= no. of processes <= max 287 $! = $fixproc_error; 288 open (COMMAND, "/bin/ps -e | /bin/grep $proc | /bin/wc -l |") 289 || die "$0: can't run ps-grep-wc command\n"; 290 $proc_count = <COMMAND>; 291 if (($proc_count < $min{$proc}) || ($proc_count > $max{$proc})) 292 { 293 return $check_failed_error; 294 } 295 return $no_error; 296} 297 298 299sub do_kill 300{ 301 local ($proc) = pop(@_); 302 local ($second_kill_needed); 303 304 printf (STDERR "do_kill\n") if ($debug > 0); 305 306 # first try kill 307 $! = $fixproc_error; 308 open (COMMAND, "/bin/ps -e | /bin/grep $proc |") 309 || die "$0: can't run ps-grep-awk command\n"; 310 while (<COMMAND>) 311 { 312 # match the first field of ps -e 313 $! = $fixproc_error; 314 /^\s*(\d+)\s/ || die "$0: can't match ps -e output\n"; 315 system "kill $1"; 316 } 317 318 # if process still exist, try kill -9 319 sleep 2; 320 $! = $fixproc_error; 321 open (COMMAND, "/bin/ps -e | /bin/grep $proc |") 322 || die "$0: can't run ps-grep-awk command\n"; 323 $second_kill_needed = 0; 324 while (<COMMAND>) 325 { 326 # match the first field of ps -e 327 $! = $fixproc_error; 328 /^\s*(\d+)\s/ || die "$0: can't match ps -e output\n"; 329 system "kill -9 $1"; 330 $second_kill_needed = 1; 331 } 332 return ($no_error) if ($second_kill_needed == 0); 333 334 # see if kill -9 worked 335 sleep 2; 336 $! = $fixproc_error; 337 open (COMMAND, "/bin/ps -e | /bin/grep $proc |") 338 || die "$0: can't run ps-grep-awk command\n"; 339 while (<COMMAND>) 340 { # a process still exist, return error 341 return $cannot_kill_error; 342 } 343 return $no_error; # good, all dead 344} 345 346 347sub do_restart 348{ 349 local ($proc) = pop(@_); 350 local ($error_code); 351 352 printf (STDERR "do_restart\n") if ($debug > 0); 353 354 $error_code = &do_kill ($proc); 355 return $error_code if ($error_code != $no_error); 356 die "$0: internal error 3\n" if ($cmd{$proc} eq ''); 357 system "$cmd{$proc}"; 358 # sleep needed here? 359 if ($check{$proc} ne 'null') 360 { 361 return $no_error if (&do_check($proc) == $no_error); 362 return $cannot_restart_error; 363 } 364} 365 366 367sub work_on_proc 368{ 369 local ($proc) = pop(@_); 370 local ($error_code); 371 372 printf (STDERR "work_on_proc\n") if ($debug > 0); 373 374 if ($cmd_line_action eq '') 375 { 376 # perform action from database 377 378 if ($check{$proc} ne 'null') 379 { 380 $error_code = &do_check ($proc); 381 if ($error_code != $check_failed_error) 382 { 383 return $error_code; 384 } 385 } 386 return &do_fix ($proc); 387 } 388 else 389 { 390 # perform action from command line 391 392 $error_code = $no_error; 393 if ($cmd_line_action eq 'kill') 394 { 395 $error_code = &do_kill ($proc); 396 } 397 elsif ($cmd_line_action eq 'restart') 398 { 399 $error_code = &do_restart ($proc); 400 } 401 elsif ($cmd_line_action eq 'fix') 402 { 403 $error_code = &do_fix ($proc); 404 } 405 elsif ($cmd_line_action eq 'check') 406 { 407 if ( $check{$proc} eq 'null' ) 408 { 409 exit $fixproc_error; 410 } 411 $error_code = &do_check ($proc); 412 } 413 elsif ($cmd_line_action eq 'exist') 414 { 415 $error_code = &do_exist ($proc); 416 } 417 else 418 { 419 $! = $fixproc_error; 420 die "$0: internal error 1\n"; 421 } 422 } 423} 424 425 426sub dump_database 427{ 428 local ($name); 429 430 for $name (keys(%cmd)) 431 { 432 printf ("name\t%s\n", $name); 433 printf ("cmd\t%s\n", $cmd{$name}); 434 printf ("min\t%s\n", $min{$name}); 435 printf ("max\t%s\n", $max{$name}); 436 if ( $check{$name} =~ /[0-9]+/ ) 437 { 438 printf ("check\tshell\n"); 439 $i = $check{$name}; 440 while ( $shell_lines[$i] ne $shell_end_marker ) 441 { 442 printf ("%s", $shell_lines[$i]); 443 $i++; 444 } 445 } 446 else 447 { 448 printf ("check\t%s\n", $check{$name}); 449 } 450 if ( $fix{$name} =~ /[0-9]+/ ) 451 { 452 printf ("fix\tshell\n"); 453 $i = $fix{$name}; 454 while ( $shell_lines[$i] ne $shell_end_marker ) 455 { 456 printf ("%s", $shell_lines[$i]); 457 $i++; 458 } 459 } 460 else 461 { 462 printf ("fix\t%s\n", $fix{$name}); 463 } 464 printf ("\n"); 465 } 466} 467 468 469sub read_database 470{ 471 local ($in_check_shell_lines) = 0; 472 local ($in_fix_shell_lines) = 0; 473 local ($name) = ''; 474 local ($str1); 475 local ($str2); 476 477 $! = $fixproc_error; 478 open (DB, $database_file) || die 'cannot open database file $database_file\n'; 479 while (<DB>) 480 { 481 if ((! /\S/) || (/^[ \t]*#.*$/)) 482 { 483 # ignore blank lines or lines beginning with "#" 484 } 485 elsif ($in_check_shell_lines) 486 { 487 if ( /^\s*end_shell\s*$/ ) 488 { 489 $in_check_shell_lines = 0; 490 push (@shell_lines, $shell_end_marker); 491 } 492 else 493 { 494 push (@shell_lines, $_); 495 } 496 } 497 elsif ($in_fix_shell_lines) 498 { 499 if ( /^\s*end_shell\s*$/ ) 500 { 501 $in_fix_shell_lines = 0; 502 push (@shell_lines, $shell_end_marker); 503 } 504 else 505 { 506 push (@shell_lines, $_); 507 } 508 } 509 else 510 { 511 if ( ! /^\s*(\S+)\s+(\S.*)\s*$/ ) 512 { 513 $! = $fixproc_error; 514 die "$0: syntax error in database\n$_"; 515 } 516 $str1 = $1; 517 $str2 = $2; 518 if ($str1 eq 'name') 519 { 520 &finish_db_entry($name); 521 $name = $str2; 522 } 523 elsif ($str1 eq 'cmd') 524 { 525 $! = $fixproc_error; 526 die "$0: cmd specified before name in database\n$_\n" 527 if ($name eq ''); 528 die "$0: cmd specified multiple times for $name in database\n" 529 if ($cmd{$name} ne ''); 530 $cmd{$name} = $str2; 531 } 532 elsif ($str1 eq 'min') 533 { 534 $! = $fixproc_error; 535 die "$0: min specified before name in database\n$_\n" 536 if ($name eq ''); 537 die "$0: min specified multiple times in database\n$_\n" 538 if ($min{$name} ne ''); 539 die "$0: non-numeric min value in database\n$_\n" 540 if ( ! ($str2 =~ /[0-9]+/ )); 541 $min{$name} = $str2; 542 } 543 elsif ($str1 eq 'max') 544 { 545 $! = $fixproc_error; 546 die "$0: max specified before name in database\n$_\n" 547 if ($name eq ''); 548 die "$0: max specified multiple times in database\n$_\n" 549 if ($max{$name} ne ''); 550 die "$0: non-numeric max value in database\n$_\n" 551 if ( ! ($str2 =~ /[0-9]+/ )); 552 $max{$name} = $str2; 553 } 554 elsif ($str1 eq 'check') 555 { 556 $! = $fixproc_error; 557 die "$0: check specified before name in database\n$_\n" 558 if ($name eq ''); 559 die "$0: check specified multiple times in database\n$_\n" 560 if ($check{$name} ne ''); 561 if ( $str2 eq 'shell' ) 562 { 563 # if $check{$name} is a number, it is a pointer into 564 # $shell_lines[] where the shell commands are kept 565 $shell_lines[$#shell_lines+1] = $shell_header; 566 $check{$name} = $#shell_lines; 567 $in_check_shell_lines = 1; 568 } 569 else 570 { 571 $check{$name} = $str2; 572 } 573 } 574 elsif ($str1 eq 'fix') 575 { 576 $! = $fixproc_error; 577 die "$0: fix specified before name in database\n$_\n" 578 if ($name eq ''); 579 die "$0: fix specified multiple times in database\n$_\n" 580 if ($fix{$name} ne ''); 581 if ( $str2 eq 'shell' ) 582 { 583 # if $fix{$name} is a number, it is a pointer into 584 # $shell_lines[] where the shell commands are kept 585 $shell_lines[$#shell_lines+1] = $shell_header; 586 $fix{$name} = $#shell_lines; 587 $in_fix_shell_lines = 1; 588 } 589 else 590 { 591 $fix{$name} = $str2; 592 } 593 } 594 } 595 } 596 &finish_db_entry($name); 597} 598 599 600sub finish_db_entry 601{ 602 local ($name) = pop(@_); 603 604 if ($name ne '') 605 { 606 $! = $fixproc_error; 607 die "$0: fix not defined for $name in database\n" 608 if ($fix{$name} eq ''); 609 die "$0: cmd not defined for $name in database\n" 610 if ($cmd{$name} eq ''); 611 $check{$name} = 'exist' if ($check{$name} eq ''); 612 $max{$name} = 1 if ($max{$name} eq ''); 613 $min{$name} = 1 if ($min{$name} eq ''); 614 } 615} 616 617 618sub read_args 619{ 620 local ($i) = 0; 621 local ($arg); 622 local ($action_arg_count) = 0; 623 624 while ( $i <= $#ARGV ) 625 { 626 $arg = $ARGV[$i]; 627 if (($arg eq '-min') || ($arg eq '-max')) 628 { 629 if (($i == $#ARGV - 1) || ($ARGV[$i+1] =~ /\D/)) # \D is non-numeric 630 { 631 $! = $fixproc_error; 632 die "$0: numeric arg missing after -min or -max\n"; 633 } 634 if ($arg eq '-min') 635 { 636 $min = $ARGV[$i+1]; 637 } 638 else 639 { 640 $max = $ARGV[$i+1]; 641 } 642 $i += 2; 643 } 644 elsif ($arg eq '-kill') 645 { 646 $cmd_line_action = 'kill'; 647 $action_arg_count++; 648 $i++; 649 } 650 elsif ($arg eq '-check') 651 { 652 $cmd_line_action = 'check'; 653 $action_arg_count++; 654 $i++; 655 } 656 elsif ($arg eq '-restart') 657 { 658 $cmd_line_action = 'restart'; 659 $action_arg_count++; 660 $i++; 661 } 662 elsif ($arg eq '-exist') 663 { 664 $cmd_line_action = 'exist'; 665 $action_arg_count++; 666 $i++; 667 } 668 elsif ($arg eq '-fix') 669 { 670 $cmd_line_action = 'fix'; 671 $action_arg_count++; 672 $i++; 673 } 674 elsif ($arg =~ /-d(\d)$/) 675 { 676 $debug = $1; 677 $i++; 678 } 679 elsif ($arg =~ /^-/) 680 { 681 $! = $fixproc_error; 682 die "$0: unknown switch $arg\n"; 683 } 684 else 685 { 686 push (@proc_list, $arg); 687 $i++; 688 } 689 } 690 $! = $fixproc_error; 691 die "$0: no process specified\n" if ($#proc_list == -1); 692 die "$0: more than one action specified\n" if ($action_arg_count > 1); 693 } 694 695