1# -*- cperl -*- 2# Copyright (c) 2004-2006 MySQL AB, 2008 Sun Microsystems, Inc. 3# Use is subject to license terms. 4# 5# This program is free software; you can redistribute it and/or modify 6# it under the terms of the GNU General Public License, version 2.0, 7# as published by the Free Software Foundation. 8# 9# This program is also distributed with certain software (including 10# but not limited to OpenSSL) that is licensed under separate terms, 11# as designated in a particular file or component or in included license 12# documentation. The authors of MySQL hereby grant you an additional 13# permission to link the program and your derivative works with the 14# separately licensed software that they have included with MySQL. 15# 16# This program is distributed in the hope that it will be useful, 17# but WITHOUT ANY WARRANTY; without even the implied warranty of 18# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 19# GNU General Public License, version 2.0, for more details. 20# 21# You should have received a copy of the GNU General Public License 22# along with this program; if not, write to the Free Software 23# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 24 25# This is a library file used by the Perl version of mysql-test-run, 26# and is part of the translation of the Bourne shell script with the 27# same name. 28 29use Socket; 30use Errno; 31use strict; 32 33use POSIX qw(WNOHANG SIGHUP); 34 35sub mtr_run ($$$$$$;$); 36sub mtr_spawn ($$$$$$;$); 37sub mtr_check_stop_servers ($); 38sub mtr_kill_leftovers (); 39sub mtr_wait_blocking ($); 40sub mtr_record_dead_children (); 41sub mtr_ndbmgm_start($$); 42sub mtr_mysqladmin_start($$$); 43sub mtr_exit ($); 44sub sleep_until_file_created ($$$); 45sub mtr_kill_processes ($); 46sub mtr_ping_with_timeout($); 47sub mtr_ping_port ($); 48 49# Local function 50sub spawn_impl ($$$$$$$); 51 52############################################################################## 53# 54# Execute an external command 55# 56############################################################################## 57 58sub mtr_run ($$$$$$;$) { 59 my $path= shift; 60 my $arg_list_t= shift; 61 my $input= shift; 62 my $output= shift; 63 my $error= shift; 64 my $pid_file= shift; # Not used 65 my $spawn_opts= shift; 66 67 return spawn_impl($path,$arg_list_t,'run',$input,$output,$error, 68 $spawn_opts); 69} 70 71sub mtr_run_test ($$$$$$;$) { 72 my $path= shift; 73 my $arg_list_t= shift; 74 my $input= shift; 75 my $output= shift; 76 my $error= shift; 77 my $pid_file= shift; # Not used 78 my $spawn_opts= shift; 79 80 return spawn_impl($path,$arg_list_t,'test',$input,$output,$error, 81 $spawn_opts); 82} 83 84sub mtr_spawn ($$$$$$;$) { 85 my $path= shift; 86 my $arg_list_t= shift; 87 my $input= shift; 88 my $output= shift; 89 my $error= shift; 90 my $pid_file= shift; # Not used 91 my $spawn_opts= shift; 92 93 return spawn_impl($path,$arg_list_t,'spawn',$input,$output,$error, 94 $spawn_opts); 95} 96 97 98 99sub spawn_impl ($$$$$$$) { 100 my $path= shift; 101 my $arg_list_t= shift; 102 my $mode= shift; 103 my $input= shift; 104 my $output= shift; 105 my $error= shift; 106 my $spawn_opts= shift; 107 108 if ( $::opt_script_debug ) 109 { 110 mtr_report(""); 111 mtr_debug("-" x 73); 112 mtr_debug("STDIN $input") if $input; 113 mtr_debug("STDOUT $output") if $output; 114 mtr_debug("STDERR $error") if $error; 115 mtr_debug("$mode: $path ", join(" ",@$arg_list_t)); 116 mtr_debug("spawn options:"); 117 if ($spawn_opts) 118 { 119 foreach my $key (sort keys %{$spawn_opts}) 120 { 121 mtr_debug(" - $key: $spawn_opts->{$key}"); 122 } 123 } 124 else 125 { 126 mtr_debug(" none"); 127 } 128 mtr_debug("-" x 73); 129 mtr_report(""); 130 } 131 132 mtr_error("Can't spawn with empty \"path\"") unless defined $path; 133 134 135 FORK: 136 { 137 my $pid= fork(); 138 139 if ( ! defined $pid ) 140 { 141 if ( $! == $!{EAGAIN} ) # See "perldoc Errno" 142 { 143 mtr_warning("Got EAGAIN from fork(), sleep 1 second and redo"); 144 sleep(1); 145 redo FORK; 146 } 147 148 mtr_error("$path ($pid) can't be forked, error: $!"); 149 150 } 151 152 if ( $pid ) 153 { 154 select(STDOUT) if $::glob_win32_perl; 155 return spawn_parent_impl($pid,$mode,$path); 156 } 157 else 158 { 159 # Child, redirect output and exec 160 161 $SIG{INT}= 'DEFAULT'; # Parent do some stuff, we don't 162 163 my $log_file_open_mode = '>'; 164 165 if ($spawn_opts and $spawn_opts->{'append_log_file'}) 166 { 167 $log_file_open_mode = '>>'; 168 } 169 170 if ( $output ) 171 { 172 if ( $::glob_win32_perl ) 173 { 174 # Don't redirect stdout on ActiveState perl since this is 175 # just another thread in the same process. 176 } 177 elsif ( ! open(STDOUT,$log_file_open_mode,$output) ) 178 { 179 mtr_child_error("can't redirect STDOUT to \"$output\": $!"); 180 } 181 } 182 183 if ( $error ) 184 { 185 if ( !$::glob_win32_perl and $output eq $error ) 186 { 187 if ( ! open(STDERR,">&STDOUT") ) 188 { 189 mtr_child_error("can't dup STDOUT: $!"); 190 } 191 } 192 else 193 { 194 if ( ! open(STDERR,$log_file_open_mode,$error) ) 195 { 196 mtr_child_error("can't redirect STDERR to \"$error\": $!"); 197 } 198 } 199 } 200 201 if ( $input ) 202 { 203 if ( ! open(STDIN,"<",$input) ) 204 { 205 mtr_child_error("can't redirect STDIN to \"$input\": $!"); 206 } 207 } 208 209 if ( ! exec($path,@$arg_list_t) ) 210 { 211 mtr_child_error("failed to execute \"$path\": $!"); 212 } 213 mtr_error("Should never come here 1!"); 214 } 215 mtr_error("Should never come here 2!"); 216 } 217 mtr_error("Should never come here 3!"); 218} 219 220 221sub spawn_parent_impl { 222 my $pid= shift; 223 my $mode= shift; 224 my $path= shift; 225 226 if ( $mode eq 'run' or $mode eq 'test' ) 227 { 228 if ( $mode eq 'run' ) 229 { 230 # Simple run of command, wait blocking for it to return 231 my $ret_pid= waitpid($pid,0); 232 if ( $ret_pid != $pid ) 233 { 234 # The "simple" waitpid has failed, print debug info 235 # and try to handle the error 236 mtr_warning("waitpid($pid, 0) returned $ret_pid " . 237 "when waiting for '$path', error: '$!'"); 238 if ( $ret_pid == -1 ) 239 { 240 # waitpid returned -1, that would indicate the process 241 # no longer exist and waitpid couldn't wait for it. 242 return 1; 243 } 244 mtr_error("Error handling failed"); 245 } 246 247 return mtr_process_exit_status($?); 248 } 249 else 250 { 251 # We run mysqltest and wait for it to return. But we try to 252 # catch dying mysqld processes as well. 253 # 254 # We do blocking waitpid() until we get the return from the 255 # "mysqltest" call. But if a mysqld process dies that we 256 # started, we take this as an error, and kill mysqltest. 257 258 259 my $exit_value= -1; 260 my $saved_exit_value; 261 my $ret_pid; # What waitpid() returns 262 263 while ( ($ret_pid= waitpid(-1,0)) != -1 ) 264 { 265 # Someone terminated, don't know who. Collect 266 # status info first before $? is lost, 267 # but not $exit_value, this is flagged from 268 269 my $timer_name= mtr_timer_timeout($::glob_timers, $ret_pid); 270 if ( $timer_name ) 271 { 272 if ( $timer_name eq "suite" ) 273 { 274 # We give up here 275 print STDERR "\n"; 276 kill(9, $pid); # Kill mysqltest 277 mtr_kill_leftovers(); # Kill servers the hard way 278 mtr_error("Test suite timeout"); 279 } 280 elsif ( $timer_name eq "testcase" ) 281 { 282 $saved_exit_value= 63; # Mark as timeout 283 kill(9, $pid); # Kill mysqltest 284 next; # Go on and catch the termination 285 } 286 } 287 288 if ( $ret_pid == $pid ) 289 { 290 # We got termination of mysqltest, we are done 291 $exit_value= mtr_process_exit_status($?); 292 last; 293 } 294 295 # One of the child processes died, unless this was expected 296 # mysqltest should be killed and test aborted 297 298 check_expected_crash_and_restart($ret_pid); 299 } 300 301 if ( $ret_pid != $pid ) 302 { 303 # We terminated the waiting because a "mysqld" process died. 304 # Kill the mysqltest process. 305 mtr_verbose("Kill mysqltest because another process died"); 306 kill(9,$pid); 307 308 $ret_pid= waitpid($pid,0); 309 310 if ( $ret_pid != $pid ) 311 { 312 mtr_error("$path ($pid) got lost somehow"); 313 } 314 } 315 316 return $saved_exit_value || $exit_value; 317 } 318 } 319 else 320 { 321 # We spawned a process we don't wait for 322 return $pid; 323 } 324} 325 326 327# ---------------------------------------------------------------------- 328# We try to emulate how an Unix shell calculates the exit code 329# ---------------------------------------------------------------------- 330 331sub mtr_process_exit_status { 332 my $raw_status= shift; 333 334 if ( $raw_status & 127 ) 335 { 336 return ($raw_status & 127) + 128; # Signal num + 128 337 } 338 else 339 { 340 return $raw_status >> 8; # Exit code 341 } 342} 343 344 345############################################################################## 346# 347# Kill processes left from previous runs 348# 349############################################################################## 350 351 352# Kill all processes(mysqld, ndbd, ndb_mgmd and im) that would conflict with 353# this run 354# Make sure to remove the PID file, if any. 355# kill IM manager first, else it will restart the servers 356sub mtr_kill_leftovers () { 357 358 mtr_report("Killing Possible Leftover Processes"); 359 mtr_debug("mtr_kill_leftovers(): started."); 360 361 my @kill_pids; 362 my %admin_pids; 363 364 foreach my $srv (@{$::master}, @{$::slave}) 365 { 366 mtr_debug(" - mysqld " . 367 "(pid: $srv->{pid}; " . 368 "pid file: '$srv->{path_pid}'; " . 369 "socket: '$srv->{path_sock}'; ". 370 "port: $srv->{port})"); 371 372 my $pid= mtr_mysqladmin_start($srv, "shutdown", 20); 373 374 # Save the pid of the mysqladmin process 375 $admin_pids{$pid}= 1; 376 377 push(@kill_pids,{ 378 pid => $srv->{'pid'}, 379 pidfile => $srv->{'path_pid'}, 380 sockfile => $srv->{'path_sock'}, 381 port => $srv->{'port'}, 382 }); 383 $srv->{'pid'}= 0; # Assume we are done with it 384 } 385 386 if ( ! $::opt_skip_ndbcluster ) 387 { 388 389 foreach my $cluster (@{$::clusters}) 390 { 391 392 # Don't shut down a "running" cluster 393 next if $cluster->{'use_running'}; 394 395 mtr_debug(" - cluster " . 396 "(pid: $cluster->{pid}; " . 397 "pid file: '$cluster->{path_pid})"); 398 399 my $pid= mtr_ndbmgm_start($cluster, "shutdown"); 400 401 # Save the pid of the ndb_mgm process 402 $admin_pids{$pid}= 1; 403 404 push(@kill_pids,{ 405 pid => $cluster->{'pid'}, 406 pidfile => $cluster->{'path_pid'} 407 }); 408 409 $cluster->{'pid'}= 0; # Assume we are done with it 410 411 foreach my $ndbd (@{$cluster->{'ndbds'}}) 412 { 413 mtr_debug(" - ndbd " . 414 "(pid: $ndbd->{pid}; " . 415 "pid file: '$ndbd->{path_pid})"); 416 417 push(@kill_pids,{ 418 pid => $ndbd->{'pid'}, 419 pidfile => $ndbd->{'path_pid'}, 420 }); 421 $ndbd->{'pid'}= 0; # Assume we are done with it 422 } 423 } 424 } 425 426 # Wait for all the admin processes to complete 427 mtr_wait_blocking(\%admin_pids); 428 429 # If we trusted "mysqladmin --shutdown_timeout= ..." we could just 430 # terminate now, but we don't (FIXME should be debugged). 431 # So we try again to ping and at least wait the same amount of time 432 # mysqladmin would for all to die. 433 434 mtr_ping_with_timeout(\@kill_pids); 435 436 # We now have tried to terminate nice. We have waited for the listen 437 # port to be free, but can't really tell if the mysqld process died 438 # or not. We now try to find the process PID from the PID file, and 439 # send a kill to that process. Note that Perl let kill(0,@pids) be 440 # a way to just return the numer of processes the kernel can send 441 # signals to. So this can be used (except on Cygwin) to determine 442 # if there are processes left running that we cound out might exists. 443 # 444 # But still after all this work, all we know is that we have 445 # the ports free. 446 447 # We scan the "var/run/" directory for other process id's to kill 448 449 my $rundir= "$::opt_vardir/run"; 450 451 mtr_debug("Processing PID files in directory '$rundir'..."); 452 453 if ( -d $rundir ) 454 { 455 opendir(RUNDIR, $rundir) 456 or mtr_error("can't open directory \"$rundir\": $!"); 457 458 my @pids; 459 460 while ( my $elem= readdir(RUNDIR) ) 461 { 462 # Only read pid from files that end with .pid 463 if ( $elem =~ /.*[.]pid$/) 464 { 465 my $pidfile= "$rundir/$elem"; 466 467 if ( -f $pidfile ) 468 { 469 mtr_debug("Processing PID file: '$pidfile'..."); 470 471 my $pid= mtr_get_pid_from_file($pidfile); 472 473 mtr_debug("Got pid: $pid from file '$pidfile'"); 474 475 if ( $::glob_cygwin_perl or kill(0, $pid) ) 476 { 477 mtr_debug("There is process with pid $pid -- scheduling for kill."); 478 push(@pids, $pid); # We know (cygwin guess) it exists 479 } 480 else 481 { 482 mtr_debug("There is no process with pid $pid -- skipping."); 483 } 484 } 485 } 486 else 487 { 488 mtr_warning("Found non pid file $elem in $rundir") 489 if -f "$rundir/$elem"; 490 next; 491 } 492 } 493 closedir(RUNDIR); 494 495 if ( @pids ) 496 { 497 mtr_debug("Killing the following processes with PID files: " . 498 join(' ', @pids) . "..."); 499 500 start_reap_all(); 501 502 if ( $::glob_cygwin_perl ) 503 { 504 # We have no (easy) way of knowing the Cygwin controlling 505 # process, in the PID file we only have the Windows process id. 506 system("kill -f " . join(" ",@pids)); # Hope for the best.... 507 mtr_debug("Sleep 5 seconds waiting for processes to die"); 508 sleep(5); 509 } 510 else 511 { 512 my $retries= 10; # 10 seconds 513 do 514 { 515 mtr_debug("Sending SIGKILL to pids: " . join(' ', @pids)); 516 kill(9, @pids); 517 mtr_report("Sleep 1 second waiting for processes to die"); 518 sleep(1) # Wait one second 519 } while ( $retries-- and kill(0, @pids) ); 520 521 if ( kill(0, @pids) ) # Check if some left 522 { 523 mtr_warning("can't kill process(es) " . join(" ", @pids)); 524 } 525 } 526 527 stop_reap_all(); 528 } 529 } 530 else 531 { 532 mtr_debug("Directory for PID files ($rundir) does not exist."); 533 } 534 535 # We may have failed everything, but we now check again if we have 536 # the listen ports free to use, and if they are free, just go for it. 537 538 mtr_debug("Checking known mysqld servers..."); 539 540 foreach my $srv ( @kill_pids ) 541 { 542 if ( defined $srv->{'port'} and mtr_ping_port($srv->{'port'}) ) 543 { 544 mtr_warning("can't kill old process holding port $srv->{'port'}"); 545 } 546 } 547 548 mtr_debug("mtr_kill_leftovers(): finished."); 549} 550 551 552# 553# Check that all processes in "spec" are shutdown gracefully 554# else kill them off hard 555# 556sub mtr_check_stop_servers ($) { 557 my $spec= shift; 558 559 # Return if no processes are defined 560 return if ! @$spec; 561 562 mtr_verbose("mtr_check_stop_servers"); 563 564 # ---------------------------------------------------------------------- 565 # Wait until servers in "spec" has stopped listening 566 # to their ports or timeout occurs 567 # ---------------------------------------------------------------------- 568 mtr_ping_with_timeout(\@$spec); 569 570 # ---------------------------------------------------------------------- 571 # Use waitpid() nonblocking for a little while, to see how 572 # many process's will exit sucessfully. 573 # This is the normal case. 574 # ---------------------------------------------------------------------- 575 my $wait_counter= 50; # Max number of times to redo the loop 576 foreach my $srv ( @$spec ) 577 { 578 my $pid= $srv->{'pid'}; 579 my $ret_pid; 580 if ( $pid ) 581 { 582 $ret_pid= waitpid($pid,&WNOHANG); 583 if ($ret_pid == $pid) 584 { 585 mtr_verbose("Caught exit of process $ret_pid"); 586 $srv->{'pid'}= 0; 587 } 588 elsif ($ret_pid == 0) 589 { 590 mtr_verbose("Process $pid is still alive"); 591 if ($wait_counter-- > 0) 592 { 593 # Give the processes more time to exit 594 select(undef, undef, undef, (0.1)); 595 redo; 596 } 597 } 598 else 599 { 600 mtr_warning("caught exit of unknown child $ret_pid"); 601 } 602 } 603 } 604 605 # ---------------------------------------------------------------------- 606 # The processes that haven't yet exited need to 607 # be killed hard, put them in "kill_pids" hash 608 # ---------------------------------------------------------------------- 609 my %kill_pids; 610 foreach my $srv ( @$spec ) 611 { 612 my $pid= $srv->{'pid'}; 613 if ( $pid ) 614 { 615 # Server is still alive, put it in list to be hard killed 616 if ($::glob_win32_perl) 617 { 618 # Kill the real process if it's known 619 $pid= $srv->{'real_pid'} if ($srv->{'real_pid'}); 620 } 621 $kill_pids{$pid}= 1; 622 623 # Write a message to the process's error log (if it has one) 624 # that it's being killed hard. 625 if ( defined $srv->{'errfile'} ) 626 { 627 mtr_tofile($srv->{'errfile'}, "Note: Forcing kill of process $pid\n"); 628 } 629 mtr_warning("Forcing kill of process $pid"); 630 631 } 632 else 633 { 634 # Server is dead, remove the pidfile if it exists 635 # 636 # Race, could have been removed between test with -f 637 # and the unlink() below, so better check again with -f 638 if ( -f $srv->{'pidfile'} and ! unlink($srv->{'pidfile'}) and 639 -f $srv->{'pidfile'} ) 640 { 641 mtr_error("can't remove $srv->{'pidfile'}"); 642 } 643 } 644 } 645 646 if ( ! keys %kill_pids ) 647 { 648 # All processes has exited gracefully 649 return; 650 } 651 652 mtr_kill_processes(\%kill_pids); 653 654 # ---------------------------------------------------------------------- 655 # All processes are killed, cleanup leftover files 656 # ---------------------------------------------------------------------- 657 { 658 my $errors= 0; 659 foreach my $srv ( @$spec ) 660 { 661 if ( $srv->{'pid'} ) 662 { 663 # Server has been hard killed, clean it's resources 664 foreach my $file ($srv->{'pidfile'}, $srv->{'sockfile'}) 665 { 666 # Know it is dead so should be no race, careful anyway 667 if ( defined $file and -f $file and ! unlink($file) and -f $file ) 668 { 669 $errors++; 670 mtr_warning("couldn't delete $file"); 671 } 672 } 673 674 if ($::glob_win32_perl and $srv->{'real_pid'}) 675 { 676 # Wait for the pseudo pid - if the real_pid was known 677 # the pseudo pid has not been waited for yet, wai blocking 678 # since it's "such a simple program" 679 mtr_verbose("Wait for pseudo process $srv->{'pid'}"); 680 my $ret_pid= waitpid($srv->{'pid'}, 0); 681 mtr_verbose("Pseudo process $ret_pid died"); 682 } 683 684 $srv->{'pid'}= 0; 685 } 686 } 687 if ( $errors ) 688 { 689 # There where errors killing processes 690 # do one last attempt to ping the servers 691 # and if they can't be pinged, assume they are dead 692 if ( ! mtr_ping_with_timeout( \@$spec ) ) 693 { 694 mtr_error("we could not kill or clean up all processes"); 695 } 696 else 697 { 698 mtr_verbose("All ports were free, continuing"); 699 } 700 } 701 } 702} 703 704 705# Wait for all the process in the list to terminate 706sub mtr_wait_blocking($) { 707 my $admin_pids= shift; 708 709 710 # Return if no processes defined 711 return if ! %$admin_pids; 712 713 mtr_verbose("mtr_wait_blocking"); 714 715 # Wait for all the started processes to exit 716 # As mysqladmin is such a simple program, we trust it to terminate itself. 717 # I.e. we wait blocking, and wait for them all before we go on. 718 foreach my $pid (keys %{$admin_pids}) 719 { 720 my $ret_pid= waitpid($pid,0); 721 722 } 723} 724 725# Start "mysqladmin <command>" for a specific mysqld 726sub mtr_mysqladmin_start($$$) { 727 my $srv= shift; 728 my $command= shift; 729 my $adm_shutdown_tmo= shift; 730 731 my $args; 732 mtr_init_args(\$args); 733 734 mtr_add_arg($args, "--no-defaults"); 735 mtr_add_arg($args, "--user=%s", $::opt_user); 736 mtr_add_arg($args, "--password="); 737 mtr_add_arg($args, "--silent"); 738 if ( -e $srv->{'path_sock'} ) 739 { 740 mtr_add_arg($args, "--socket=%s", $srv->{'path_sock'}); 741 } 742 if ( $srv->{'port'} ) 743 { 744 mtr_add_arg($args, "--port=%s", $srv->{'port'}); 745 } 746 if ( $srv->{'port'} and ! -e $srv->{'path_sock'} ) 747 { 748 mtr_add_arg($args, "--protocol=tcp"); # Needed if no --socket 749 } 750 mtr_add_arg($args, "--connect_timeout=5"); 751 752 # Shutdown time must be high as slave may be in reconnect 753 mtr_add_arg($args, "--shutdown_timeout=$adm_shutdown_tmo"); 754 mtr_add_arg($args, "$command"); 755 my $pid= mtr_spawn($::exe_mysqladmin, $args, 756 "", "", "", "", 757 { append_log_file => 1 }); 758 mtr_verbose("mtr_mysqladmin_start, pid: $pid"); 759 return $pid; 760 761} 762 763# Start "ndb_mgm shutdown" for a specific cluster, it will 764# shutdown all data nodes and leave the ndb_mgmd running 765sub mtr_ndbmgm_start($$) { 766 my $cluster= shift; 767 my $command= shift; 768 769 my $args; 770 771 mtr_init_args(\$args); 772 773 mtr_add_arg($args, "--no-defaults"); 774 mtr_add_arg($args, "--core"); 775 mtr_add_arg($args, "--try-reconnect=1"); 776 mtr_add_arg($args, "--ndb_connectstring=%s", $cluster->{'connect_string'}); 777 mtr_add_arg($args, "-e"); 778 mtr_add_arg($args, "$command"); 779 780 my $pid= mtr_spawn($::exe_ndb_mgm, $args, 781 "", "/dev/null", "/dev/null", "", 782 {}); 783 mtr_verbose("mtr_ndbmgm_start, pid: $pid"); 784 return $pid; 785 786} 787 788 789# Ping all servers in list, exit when none of them answers 790# or when timeout has passed 791sub mtr_ping_with_timeout($) { 792 my $spec= shift; 793 my $timeout= 200; # 20 seconds max 794 my $res= 1; # If we just fall through, we are done 795 # in the sense that the servers don't 796 # listen to their ports any longer 797 798 mtr_debug("Waiting for mysqld servers to stop..."); 799 800 TIME: 801 while ( $timeout-- ) 802 { 803 foreach my $srv ( @$spec ) 804 { 805 $res= 1; # We are optimistic 806 if ( $srv->{'pid'} and defined $srv->{'port'} ) 807 { 808 if ( mtr_ping_port($srv->{'port'}) ) 809 { 810 mtr_verbose("waiting for process $srv->{'pid'} to stop ". 811 "using port $srv->{'port'}"); 812 813 # Millisceond sleep emulated with select 814 select(undef, undef, undef, (0.1)); 815 $res= 0; 816 next TIME; 817 } 818 else 819 { 820 # Process was not using port 821 } 822 } 823 } 824 last; # If we got here, we are done 825 } 826 827 if ($res) 828 { 829 mtr_debug("mtr_ping_with_timeout(): All mysqld instances are down."); 830 } 831 else 832 { 833 mtr_report("mtr_ping_with_timeout(): At least one server is alive."); 834 } 835 836 return $res; 837} 838 839 840# 841# Loop through our list of processes and look for and entry 842# with the provided pid 843# Set the pid of that process to 0 if found 844# 845sub mark_process_dead($) 846{ 847 my $ret_pid= shift; 848 849 foreach my $mysqld (@{$::master}, @{$::slave}) 850 { 851 if ( $mysqld->{'pid'} eq $ret_pid ) 852 { 853 mtr_verbose("$mysqld->{'type'} $mysqld->{'idx'} exited, pid: $ret_pid"); 854 $mysqld->{'pid'}= 0; 855 return; 856 } 857 } 858 859 foreach my $cluster (@{$::clusters}) 860 { 861 if ( $cluster->{'pid'} eq $ret_pid ) 862 { 863 mtr_verbose("$cluster->{'name'} cluster ndb_mgmd exited, pid: $ret_pid"); 864 $cluster->{'pid'}= 0; 865 return; 866 } 867 868 foreach my $ndbd (@{$cluster->{'ndbds'}}) 869 { 870 if ( $ndbd->{'pid'} eq $ret_pid ) 871 { 872 mtr_verbose("$cluster->{'name'} cluster ndbd exited, pid: $ret_pid"); 873 $ndbd->{'pid'}= 0; 874 return; 875 } 876 } 877 } 878 mtr_warning("mark_process_dead couldn't find an entry for pid: $ret_pid"); 879 880} 881 882# 883# Loop through our list of processes and look for and entry 884# with the provided pid, if found check for the file indicating 885# expected crash and restart it. 886# 887sub check_expected_crash_and_restart($) 888{ 889 my $ret_pid= shift; 890 891 foreach my $mysqld (@{$::master}, @{$::slave}) 892 { 893 if ( $mysqld->{'pid'} eq $ret_pid ) 894 { 895 mtr_verbose("$mysqld->{'type'} $mysqld->{'idx'} exited, pid: $ret_pid"); 896 $mysqld->{'pid'}= 0; 897 898 # Check if crash expected and restart if it was 899 my $expect_file= "$::opt_vardir/tmp/" . "$mysqld->{'type'}" . 900 "$mysqld->{'idx'}" . ".expect"; 901 if ( -f $expect_file ) 902 { 903 mtr_verbose("Crash was expected, file $expect_file exists"); 904 mysqld_start($mysqld, $mysqld->{'start_opts'}, 905 $mysqld->{'start_slave_master_info'}); 906 unlink($expect_file); 907 } 908 909 return; 910 } 911 } 912 913 foreach my $cluster (@{$::clusters}) 914 { 915 if ( $cluster->{'pid'} eq $ret_pid ) 916 { 917 mtr_verbose("$cluster->{'name'} cluster ndb_mgmd exited, pid: $ret_pid"); 918 $cluster->{'pid'}= 0; 919 920 # Check if crash expected and restart if it was 921 my $expect_file= "$::opt_vardir/tmp/ndb_mgmd_" . "$cluster->{'type'}" . 922 ".expect"; 923 if ( -f $expect_file ) 924 { 925 mtr_verbose("Crash was expected, file $expect_file exists"); 926 ndbmgmd_start($cluster); 927 unlink($expect_file); 928 } 929 return; 930 } 931 932 foreach my $ndbd (@{$cluster->{'ndbds'}}) 933 { 934 if ( $ndbd->{'pid'} eq $ret_pid ) 935 { 936 mtr_verbose("$cluster->{'name'} cluster ndbd exited, pid: $ret_pid"); 937 $ndbd->{'pid'}= 0; 938 939 # Check if crash expected and restart if it was 940 my $expect_file= "$::opt_vardir/tmp/ndbd_" . "$cluster->{'type'}" . 941 "$ndbd->{'idx'}" . ".expect"; 942 if ( -f $expect_file ) 943 { 944 mtr_verbose("Crash was expected, file $expect_file exists"); 945 ndbd_start($cluster, $ndbd->{'idx'}, 946 $ndbd->{'start_extra_args'}); 947 unlink($expect_file); 948 } 949 return; 950 } 951 } 952 } 953 954 if ($::instance_manager->{'spawner_pid'} eq $ret_pid) 955 { 956 return; 957 } 958 959 mtr_warning("check_expected_crash_and_restart couldn't find an entry for pid: $ret_pid"); 960 961} 962 963############################################################################## 964# 965# The operating system will keep information about dead children, 966# we read this information here, and if we have records the process 967# is alive, we mark it as dead. 968# 969############################################################################## 970 971sub mtr_record_dead_children () { 972 973 my $process_died= 0; 974 my $ret_pid; 975 976 # Wait without blockinng to see if any processes had died 977 # -1 or 0 means there are no more procesess to wait for 978 while ( ($ret_pid= waitpid(-1,&WNOHANG)) != 0 and $ret_pid != -1) 979 { 980 mtr_warning("mtr_record_dead_children: $ret_pid"); 981 mark_process_dead($ret_pid); 982 $process_died= 1; 983 } 984 return $process_died; 985} 986 987sub start_reap_all { 988 # This causes terminating processes to not become zombies, avoiding 989 # the need for (or possibility of) explicit waitpid(). 990 $SIG{CHLD}= 'IGNORE'; 991 992 # On some platforms (Linux, QNX, OSX, ...) there is potential race 993 # here. If a process terminated before setting $SIG{CHLD} (but after 994 # any attempt to waitpid() it), it will still be a zombie. So we 995 # have to handle any such process here. 996 my $pid; 997 while(($pid= waitpid(-1, &WNOHANG)) != 0 and $pid != -1) 998 { 999 mtr_warning("start_reap_all pid: $pid"); 1000 mark_process_dead($pid); 1001 }; 1002} 1003 1004sub stop_reap_all { 1005 $SIG{CHLD}= 'DEFAULT'; 1006} 1007 1008 1009sub mtr_ping_port ($) { 1010 my $port= shift; 1011 1012 mtr_verbose("mtr_ping_port: $port"); 1013 1014 my $remote= "localhost"; 1015 my $iaddr= inet_aton($remote); 1016 if ( ! $iaddr ) 1017 { 1018 mtr_error("can't find IP number for $remote"); 1019 } 1020 my $paddr= sockaddr_in($port, $iaddr); 1021 my $proto= getprotobyname('tcp'); 1022 if ( ! socket(SOCK, PF_INET, SOCK_STREAM, $proto) ) 1023 { 1024 mtr_error("can't create socket: $!"); 1025 } 1026 1027 mtr_debug("Pinging server (port: $port)..."); 1028 1029 if ( connect(SOCK, $paddr) ) 1030 { 1031 close(SOCK); # FIXME check error? 1032 mtr_verbose("USED"); 1033 return 1; 1034 } 1035 else 1036 { 1037 mtr_verbose("FREE"); 1038 return 0; 1039 } 1040} 1041 1042############################################################################## 1043# 1044# Wait for a file to be created 1045# 1046############################################################################## 1047 1048# FIXME check that the pidfile contains the expected pid! 1049 1050sub sleep_until_file_created ($$$) { 1051 my $pidfile= shift; 1052 my $timeout= shift; 1053 my $pid= shift; 1054 my $sleeptime= 100; # Milliseconds 1055 my $loops= ($timeout * 1000) / $sleeptime; 1056 1057 for ( my $loop= 1; $loop <= $loops; $loop++ ) 1058 { 1059 if ( -r $pidfile ) 1060 { 1061 return 1; 1062 } 1063 1064 # Check if it died after the fork() was successful 1065 if ( $pid != 0 && waitpid($pid,&WNOHANG) == $pid ) 1066 { 1067 mtr_warning("Process $pid died"); 1068 return 0; 1069 } 1070 1071 mtr_debug("Sleep $sleeptime milliseconds waiting for $pidfile"); 1072 1073 # Print extra message every 60 seconds 1074 my $seconds= ($loop * $sleeptime) / 1000; 1075 if ( $seconds > 1 and int($seconds * 10) % 600 == 0 ) 1076 { 1077 my $left= $timeout - $seconds; 1078 mtr_warning("Waited $seconds seconds for $pidfile to be created, " . 1079 "still waiting for $left seconds..."); 1080 } 1081 1082 # Millisceond sleep emulated with select 1083 select(undef, undef, undef, ($sleeptime/1000)); 1084 } 1085 1086 return 0; 1087} 1088 1089 1090sub mtr_kill_processes ($) { 1091 my $pids = shift; 1092 1093 mtr_verbose("mtr_kill_processes (" . join(" ", keys %{$pids}) . ")"); 1094 1095 foreach my $pid (keys %{$pids}) 1096 { 1097 1098 if ($pid <= 0) 1099 { 1100 mtr_warning("Trying to kill illegal pid: $pid"); 1101 next; 1102 } 1103 1104 my $signaled_procs= kill(9, $pid); 1105 if ($signaled_procs == 0) 1106 { 1107 # No such process existed, assume it's killed 1108 mtr_verbose("killed $pid(no such process)"); 1109 } 1110 else 1111 { 1112 my $ret_pid= waitpid($pid,0); 1113 if ($ret_pid == $pid) 1114 { 1115 mtr_verbose("killed $pid(got the pid)"); 1116 } 1117 elsif ($ret_pid == -1) 1118 { 1119 mtr_verbose("killed $pid(got -1)"); 1120 } 1121 } 1122 } 1123 mtr_verbose("done killing processes"); 1124} 1125 1126 1127############################################################################## 1128# 1129# When we exit, we kill off all children 1130# 1131############################################################################## 1132 1133sub mtr_exit ($) { 1134 my $code= shift; 1135 mtr_timer_stop_all($::glob_timers); 1136 local $SIG{HUP} = 'IGNORE'; 1137 # ToDo: Signalling -$$ will only work if we are the process group 1138 # leader (in fact on QNX it will signal our session group leader, 1139 # which might be Do-compile or Pushbuild, causing tests to be 1140 # aborted). So we only do it if we are the group leader. We might 1141 # set ourselves as the group leader at startup (with 1142 # POSIX::setpgrp(0,0)), but then care must be needed to always do 1143 # proper child process cleanup. 1144 POSIX::kill(SIGHUP, -$$) if !$::glob_win32_perl and $$ == getpgrp(); 1145 1146 exit($code); 1147} 1148 1149########################################################################### 1150 11511; 1152