1# -*- cperl -*- 2# Copyright (c) 2004-2006 MySQL AB, 2008 Sun Microsystems, Inc. 3# Use is subject to license terms. 4# 5# This program is free software; you can redistribute it and/or modify 6# it under the terms of the GNU General Public License as published by 7# the Free Software Foundation; version 2 of the License. 8# 9# This program is distributed in the hope that it will be useful, 10# but WITHOUT ANY WARRANTY; without even the implied warranty of 11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12# GNU General Public License for more details. 13# 14# You should have received a copy of the GNU General Public License 15# along with this program; if not, write to the Free Software 16# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA 17 18# This is a library file used by the Perl version of mysql-test-run, 19# and is part of the translation of the Bourne shell script with the 20# same name. 21 22use Socket; 23use Errno; 24use strict; 25 26use POSIX qw(WNOHANG SIGHUP); 27 28sub mtr_run ($$$$$$;$); 29sub mtr_spawn ($$$$$$;$); 30sub mtr_check_stop_servers ($); 31sub mtr_kill_leftovers (); 32sub mtr_wait_blocking ($); 33sub mtr_record_dead_children (); 34sub mtr_mysqladmin_start($$$); 35sub mtr_exit ($); 36sub sleep_until_file_created ($$$); 37sub mtr_kill_processes ($); 38sub mtr_ping_with_timeout($); 39sub mtr_ping_port ($); 40 41# Local function 42sub spawn_impl ($$$$$$$); 43 44############################################################################## 45# 46# Execute an external command 47# 48############################################################################## 49 50sub mtr_run ($$$$$$;$) { 51 my $path= shift; 52 my $arg_list_t= shift; 53 my $input= shift; 54 my $output= shift; 55 my $error= shift; 56 my $pid_file= shift; # Not used 57 my $spawn_opts= shift; 58 59 return spawn_impl($path,$arg_list_t,'run',$input,$output,$error, 60 $spawn_opts); 61} 62 63sub mtr_run_test ($$$$$$;$) { 64 my $path= shift; 65 my $arg_list_t= shift; 66 my $input= shift; 67 my $output= shift; 68 my $error= shift; 69 my $pid_file= shift; # Not used 70 my $spawn_opts= shift; 71 72 return spawn_impl($path,$arg_list_t,'test',$input,$output,$error, 73 $spawn_opts); 74} 75 76sub mtr_spawn ($$$$$$;$) { 77 my $path= shift; 78 my $arg_list_t= shift; 79 my $input= shift; 80 my $output= shift; 81 my $error= shift; 82 my $pid_file= shift; # Not used 83 my $spawn_opts= shift; 84 85 return spawn_impl($path,$arg_list_t,'spawn',$input,$output,$error, 86 $spawn_opts); 87} 88 89 90 91sub spawn_impl ($$$$$$$) { 92 my $path= shift; 93 my $arg_list_t= shift; 94 my $mode= shift; 95 my $input= shift; 96 my $output= shift; 97 my $error= shift; 98 my $spawn_opts= shift; 99 100 if ( $::opt_script_debug ) 101 { 102 mtr_report(""); 103 mtr_debug("-" x 73); 104 mtr_debug("STDIN $input") if $input; 105 mtr_debug("STDOUT $output") if $output; 106 mtr_debug("STDERR $error") if $error; 107 mtr_debug("$mode: $path ", join(" ",@$arg_list_t)); 108 mtr_debug("spawn options:"); 109 if ($spawn_opts) 110 { 111 foreach my $key (sort keys %{$spawn_opts}) 112 { 113 mtr_debug(" - $key: $spawn_opts->{$key}"); 114 } 115 } 116 else 117 { 118 mtr_debug(" none"); 119 } 120 mtr_debug("-" x 73); 121 mtr_report(""); 122 } 123 124 mtr_error("Can't spawn with empty \"path\"") unless defined $path; 125 126 127 FORK: 128 { 129 my $pid= fork(); 130 131 if ( ! defined $pid ) 132 { 133 if ( $! == $!{EAGAIN} ) # See "perldoc Errno" 134 { 135 mtr_warning("Got EAGAIN from fork(), sleep 1 second and redo"); 136 sleep(1); 137 redo FORK; 138 } 139 140 mtr_error("$path ($pid) can't be forked, error: $!"); 141 142 } 143 144 if ( $pid ) 145 { 146 select(STDOUT) if $::glob_win32_perl; 147 return spawn_parent_impl($pid,$mode,$path); 148 } 149 else 150 { 151 # Child, redirect output and exec 152 153 $SIG{INT}= 'DEFAULT'; # Parent do some stuff, we don't 154 155 my $log_file_open_mode = '>'; 156 157 if ($spawn_opts and $spawn_opts->{'append_log_file'}) 158 { 159 $log_file_open_mode = '>>'; 160 } 161 162 if ( $output ) 163 { 164 if ( $::glob_win32_perl ) 165 { 166 # Don't redirect stdout on ActiveState perl since this is 167 # just another thread in the same process. 168 } 169 elsif ( ! open(STDOUT,$log_file_open_mode,$output) ) 170 { 171 mtr_child_error("can't redirect STDOUT to \"$output\": $!"); 172 } 173 } 174 175 if ( $error ) 176 { 177 if ( !$::glob_win32_perl and $output eq $error ) 178 { 179 if ( ! open(STDERR,">&STDOUT") ) 180 { 181 mtr_child_error("can't dup STDOUT: $!"); 182 } 183 } 184 else 185 { 186 if ( ! open(STDERR,$log_file_open_mode,$error) ) 187 { 188 mtr_child_error("can't redirect STDERR to \"$error\": $!"); 189 } 190 } 191 } 192 193 if ( $input ) 194 { 195 if ( ! open(STDIN,"<",$input) ) 196 { 197 mtr_child_error("can't redirect STDIN to \"$input\": $!"); 198 } 199 } 200 201 if ( ! exec($path,@$arg_list_t) ) 202 { 203 mtr_child_error("failed to execute \"$path\": $!"); 204 } 205 mtr_error("Should never come here 1!"); 206 } 207 mtr_error("Should never come here 2!"); 208 } 209 mtr_error("Should never come here 3!"); 210} 211 212 213sub spawn_parent_impl { 214 my $pid= shift; 215 my $mode= shift; 216 my $path= shift; 217 218 if ( $mode eq 'run' or $mode eq 'test' ) 219 { 220 if ( $mode eq 'run' ) 221 { 222 # Simple run of command, wait blocking for it to return 223 my $ret_pid= waitpid($pid,0); 224 if ( $ret_pid != $pid ) 225 { 226 # The "simple" waitpid has failed, print debug info 227 # and try to handle the error 228 mtr_warning("waitpid($pid, 0) returned $ret_pid " . 229 "when waiting for '$path', error: '$!'"); 230 if ( $ret_pid == -1 ) 231 { 232 # waitpid returned -1, that would indicate the process 233 # no longer exist and waitpid couldn't wait for it. 234 return 1; 235 } 236 mtr_error("Error handling failed"); 237 } 238 239 return mtr_process_exit_status($?); 240 } 241 else 242 { 243 # We run mysqltest and wait for it to return. But we try to 244 # catch dying mysqld processes as well. 245 # 246 # We do blocking waitpid() until we get the return from the 247 # "mysqltest" call. But if a mysqld process dies that we 248 # started, we take this as an error, and kill mysqltest. 249 250 251 my $exit_value= -1; 252 my $saved_exit_value; 253 my $ret_pid; # What waitpid() returns 254 255 while ( ($ret_pid= waitpid(-1,0)) != -1 ) 256 { 257 # Someone terminated, don't know who. Collect 258 # status info first before $? is lost, 259 # but not $exit_value, this is flagged from 260 261 my $timer_name= mtr_timer_timeout($::glob_timers, $ret_pid); 262 if ( $timer_name ) 263 { 264 if ( $timer_name eq "suite" ) 265 { 266 # We give up here 267 print STDERR "\n"; 268 kill(9, $pid); # Kill mysqltest 269 mtr_kill_leftovers(); # Kill servers the hard way 270 mtr_error("Test suite timeout"); 271 } 272 elsif ( $timer_name eq "testcase" ) 273 { 274 $saved_exit_value= 63; # Mark as timeout 275 kill(9, $pid); # Kill mysqltest 276 next; # Go on and catch the termination 277 } 278 } 279 280 if ( $ret_pid == $pid ) 281 { 282 # We got termination of mysqltest, we are done 283 $exit_value= mtr_process_exit_status($?); 284 last; 285 } 286 287 # One of the child processes died, unless this was expected 288 # mysqltest should be killed and test aborted 289 290 check_expected_crash_and_restart($ret_pid); 291 } 292 293 if ( $ret_pid != $pid ) 294 { 295 # We terminated the waiting because a "mysqld" process died. 296 # Kill the mysqltest process. 297 mtr_verbose("Kill mysqltest because another process died"); 298 kill(9,$pid); 299 300 $ret_pid= waitpid($pid,0); 301 302 if ( $ret_pid != $pid ) 303 { 304 mtr_error("$path ($pid) got lost somehow"); 305 } 306 } 307 308 return $saved_exit_value || $exit_value; 309 } 310 } 311 else 312 { 313 # We spawned a process we don't wait for 314 return $pid; 315 } 316} 317 318 319# ---------------------------------------------------------------------- 320# We try to emulate how an Unix shell calculates the exit code 321# ---------------------------------------------------------------------- 322 323sub mtr_process_exit_status { 324 my $raw_status= shift; 325 326 if ( $raw_status & 127 ) 327 { 328 return ($raw_status & 127) + 128; # Signal num + 128 329 } 330 else 331 { 332 return $raw_status >> 8; # Exit code 333 } 334} 335 336 337############################################################################## 338# 339# Kill processes left from previous runs 340# 341############################################################################## 342 343 344# Kill all processes that would conflict with this run 345# Make sure to remove the PID file, if any. 346sub mtr_kill_leftovers () { 347 348 mtr_report("Killing Possible Leftover Processes"); 349 mtr_debug("mtr_kill_leftovers(): started."); 350 351 my @kill_pids; 352 my %admin_pids; 353 354 foreach my $srv (@{$::master}, @{$::slave}) 355 { 356 mtr_debug(" - mysqld " . 357 "(pid: $srv->{pid}; " . 358 "pid file: '$srv->{path_pid}'; " . 359 "socket: '$srv->{path_sock}'; ". 360 "port: $srv->{port})"); 361 362 my $pid= mtr_mysqladmin_start($srv, "shutdown", 20); 363 364 # Save the pid of the mysqladmin process 365 $admin_pids{$pid}= 1; 366 367 push(@kill_pids,{ 368 pid => $srv->{'pid'}, 369 pidfile => $srv->{'path_pid'}, 370 sockfile => $srv->{'path_sock'}, 371 port => $srv->{'port'}, 372 }); 373 $srv->{'pid'}= 0; # Assume we are done with it 374 } 375 376 # Wait for all the admin processes to complete 377 mtr_wait_blocking(\%admin_pids); 378 379 # If we trusted "mysqladmin --shutdown_timeout= ..." we could just 380 # terminate now, but we don't (FIXME should be debugged). 381 # So we try again to ping and at least wait the same amount of time 382 # mysqladmin would for all to die. 383 384 mtr_ping_with_timeout(\@kill_pids); 385 386 # We now have tried to terminate nice. We have waited for the listen 387 # port to be free, but can't really tell if the mysqld process died 388 # or not. We now try to find the process PID from the PID file, and 389 # send a kill to that process. Note that Perl let kill(0,@pids) be 390 # a way to just return the numer of processes the kernel can send 391 # signals to. So this can be used (except on Cygwin) to determine 392 # if there are processes left running that we cound out might exists. 393 # 394 # But still after all this work, all we know is that we have 395 # the ports free. 396 397 # We scan the "var/run/" directory for other process id's to kill 398 399 my $rundir= "$::opt_vardir/run"; 400 401 mtr_debug("Processing PID files in directory '$rundir'..."); 402 403 if ( -d $rundir ) 404 { 405 opendir(RUNDIR, $rundir) 406 or mtr_error("can't open directory \"$rundir\": $!"); 407 408 my @pids; 409 410 while ( my $elem= readdir(RUNDIR) ) 411 { 412 # Only read pid from files that end with .pid 413 if ( $elem =~ /.*[.]pid$/) 414 { 415 my $pidfile= "$rundir/$elem"; 416 417 if ( -f $pidfile ) 418 { 419 mtr_debug("Processing PID file: '$pidfile'..."); 420 421 my $pid= mtr_get_pid_from_file($pidfile); 422 423 mtr_debug("Got pid: $pid from file '$pidfile'"); 424 425 if ( $::glob_cygwin_perl or kill(0, $pid) ) 426 { 427 mtr_debug("There is process with pid $pid -- scheduling for kill."); 428 push(@pids, $pid); # We know (cygwin guess) it exists 429 } 430 else 431 { 432 mtr_debug("There is no process with pid $pid -- skipping."); 433 } 434 } 435 } 436 } 437 closedir(RUNDIR); 438 439 if ( @pids ) 440 { 441 mtr_debug("Killing the following processes with PID files: " . 442 join(' ', @pids) . "..."); 443 444 start_reap_all(); 445 446 if ( $::glob_cygwin_perl ) 447 { 448 # We have no (easy) way of knowing the Cygwin controlling 449 # process, in the PID file we only have the Windows process id. 450 system("kill -f " . join(" ",@pids)); # Hope for the best.... 451 mtr_debug("Sleep 5 seconds waiting for processes to die"); 452 sleep(5); 453 } 454 else 455 { 456 my $retries= 10; # 10 seconds 457 do 458 { 459 mtr_debug("Sending SIGKILL to pids: " . join(' ', @pids)); 460 kill(9, @pids); 461 mtr_report("Sleep 1 second waiting for processes to die"); 462 sleep(1) # Wait one second 463 } while ( $retries-- and kill(0, @pids) ); 464 465 if ( kill(0, @pids) ) # Check if some left 466 { 467 mtr_warning("can't kill process(es) " . join(" ", @pids)); 468 } 469 } 470 471 stop_reap_all(); 472 } 473 } 474 else 475 { 476 mtr_debug("Directory for PID files ($rundir) does not exist."); 477 } 478 479 # We may have failed everything, but we now check again if we have 480 # the listen ports free to use, and if they are free, just go for it. 481 482 mtr_debug("Checking known mysqld servers..."); 483 484 foreach my $srv ( @kill_pids ) 485 { 486 if ( defined $srv->{'port'} and mtr_ping_port($srv->{'port'}) ) 487 { 488 mtr_warning("can't kill old process holding port $srv->{'port'}"); 489 } 490 } 491 492 mtr_debug("mtr_kill_leftovers(): finished."); 493} 494 495 496# 497# Check that all processes in "spec" are shutdown gracefully 498# else kill them off hard 499# 500sub mtr_check_stop_servers ($) { 501 my $spec= shift; 502 503 # Return if no processes are defined 504 return if ! @$spec; 505 506 mtr_verbose("mtr_check_stop_servers"); 507 508 # ---------------------------------------------------------------------- 509 # Wait until servers in "spec" has stopped listening 510 # to their ports or timeout occurs 511 # ---------------------------------------------------------------------- 512 mtr_ping_with_timeout(\@$spec); 513 514 # ---------------------------------------------------------------------- 515 # Use waitpid() nonblocking for a little while, to see how 516 # many process's will exit sucessfully. 517 # This is the normal case. 518 # ---------------------------------------------------------------------- 519 my $wait_counter= 50; # Max number of times to redo the loop 520 foreach my $srv ( @$spec ) 521 { 522 my $pid= $srv->{'pid'}; 523 my $ret_pid; 524 if ( $pid ) 525 { 526 $ret_pid= waitpid($pid,&WNOHANG); 527 if ($ret_pid == $pid) 528 { 529 mtr_verbose("Caught exit of process $ret_pid"); 530 $srv->{'pid'}= 0; 531 } 532 elsif ($ret_pid == 0) 533 { 534 mtr_verbose("Process $pid is still alive"); 535 if ($wait_counter-- > 0) 536 { 537 # Give the processes more time to exit 538 select(undef, undef, undef, (0.1)); 539 redo; 540 } 541 } 542 else 543 { 544 mtr_warning("caught exit of unknown child $ret_pid"); 545 } 546 } 547 } 548 549 # ---------------------------------------------------------------------- 550 # The processes that haven't yet exited need to 551 # be killed hard, put them in "kill_pids" hash 552 # ---------------------------------------------------------------------- 553 my %kill_pids; 554 foreach my $srv ( @$spec ) 555 { 556 my $pid= $srv->{'pid'}; 557 if ( $pid ) 558 { 559 # Server is still alive, put it in list to be hard killed 560 if ($::glob_win32_perl) 561 { 562 # Kill the real process if it's known 563 $pid= $srv->{'real_pid'} if ($srv->{'real_pid'}); 564 } 565 $kill_pids{$pid}= 1; 566 567 # Write a message to the process's error log (if it has one) 568 # that it's being killed hard. 569 if ( defined $srv->{'errfile'} ) 570 { 571 mtr_tofile($srv->{'errfile'}, "Note: Forcing kill of process $pid\n"); 572 } 573 mtr_warning("Forcing kill of process $pid"); 574 575 } 576 else 577 { 578 # Server is dead, remove the pidfile if it exists 579 # 580 # Race, could have been removed between test with -f 581 # and the unlink() below, so better check again with -f 582 if ( -f $srv->{'pidfile'} and ! unlink($srv->{'pidfile'}) and 583 -f $srv->{'pidfile'} ) 584 { 585 mtr_error("can't remove $srv->{'pidfile'}"); 586 } 587 } 588 } 589 590 if ( ! keys %kill_pids ) 591 { 592 # All processes has exited gracefully 593 return; 594 } 595 596 mtr_kill_processes(\%kill_pids); 597 598 # ---------------------------------------------------------------------- 599 # All processes are killed, cleanup leftover files 600 # ---------------------------------------------------------------------- 601 { 602 my $errors= 0; 603 foreach my $srv ( @$spec ) 604 { 605 if ( $srv->{'pid'} ) 606 { 607 # Server has been hard killed, clean it's resources 608 foreach my $file ($srv->{'pidfile'}, $srv->{'sockfile'}) 609 { 610 # Know it is dead so should be no race, careful anyway 611 if ( defined $file and -f $file and ! unlink($file) and -f $file ) 612 { 613 $errors++; 614 mtr_warning("couldn't delete $file"); 615 } 616 } 617 618 if ($::glob_win32_perl and $srv->{'real_pid'}) 619 { 620 # Wait for the pseudo pid - if the real_pid was known 621 # the pseudo pid has not been waited for yet, wai blocking 622 # since it's "such a simple program" 623 mtr_verbose("Wait for pseudo process $srv->{'pid'}"); 624 my $ret_pid= waitpid($srv->{'pid'}, 0); 625 mtr_verbose("Pseudo process $ret_pid died"); 626 } 627 628 $srv->{'pid'}= 0; 629 } 630 } 631 if ( $errors ) 632 { 633 # There where errors killing processes 634 # do one last attempt to ping the servers 635 # and if they can't be pinged, assume they are dead 636 if ( ! mtr_ping_with_timeout( \@$spec ) ) 637 { 638 mtr_error("we could not kill or clean up all processes"); 639 } 640 else 641 { 642 mtr_verbose("All ports were free, continuing"); 643 } 644 } 645 } 646} 647 648 649# Wait for all the process in the list to terminate 650sub mtr_wait_blocking($) { 651 my $admin_pids= shift; 652 653 654 # Return if no processes defined 655 return if ! %$admin_pids; 656 657 mtr_verbose("mtr_wait_blocking"); 658 659 # Wait for all the started processes to exit 660 # As mysqladmin is such a simple program, we trust it to terminate itself. 661 # I.e. we wait blocking, and wait for them all before we go on. 662 foreach my $pid (keys %{$admin_pids}) 663 { 664 my $ret_pid= waitpid($pid,0); 665 666 } 667} 668 669# Start "mysqladmin <command>" for a specific mysqld 670sub mtr_mysqladmin_start($$$) { 671 my $srv= shift; 672 my $command= shift; 673 my $adm_shutdown_tmo= shift; 674 675 my $args; 676 mtr_init_args(\$args); 677 678 mtr_add_arg($args, "--no-defaults"); 679 mtr_add_arg($args, "--user=%s", $::opt_user); 680 mtr_add_arg($args, "--password="); 681 mtr_add_arg($args, "--silent"); 682 if ( -e $srv->{'path_sock'} ) 683 { 684 mtr_add_arg($args, "--socket=%s", $srv->{'path_sock'}); 685 } 686 if ( $srv->{'port'} ) 687 { 688 mtr_add_arg($args, "--port=%s", $srv->{'port'}); 689 } 690 if ( $srv->{'port'} and ! -e $srv->{'path_sock'} ) 691 { 692 mtr_add_arg($args, "--protocol=tcp"); # Needed if no --socket 693 } 694 mtr_add_arg($args, "--connect_timeout=5"); 695 696 # Shutdown time must be high as slave may be in reconnect 697 mtr_add_arg($args, "--shutdown_timeout=$adm_shutdown_tmo"); 698 mtr_add_arg($args, "$command"); 699 my $pid= mtr_spawn($::exe_mysqladmin, $args, 700 "", "", "", "", 701 { append_log_file => 1 }); 702 mtr_verbose("mtr_mysqladmin_start, pid: $pid"); 703 return $pid; 704 705} 706 707# Ping all servers in list, exit when none of them answers 708# or when timeout has passed 709sub mtr_ping_with_timeout($) { 710 my $spec= shift; 711 my $timeout= 200; # 20 seconds max 712 my $res= 1; # If we just fall through, we are done 713 # in the sense that the servers don't 714 # listen to their ports any longer 715 716 mtr_debug("Waiting for mysqld servers to stop..."); 717 718 TIME: 719 while ( $timeout-- ) 720 { 721 foreach my $srv ( @$spec ) 722 { 723 $res= 1; # We are optimistic 724 if ( $srv->{'pid'} and defined $srv->{'port'} ) 725 { 726 if ( mtr_ping_port($srv->{'port'}) ) 727 { 728 mtr_verbose("waiting for process $srv->{'pid'} to stop ". 729 "using port $srv->{'port'}"); 730 731 # Millisceond sleep emulated with select 732 select(undef, undef, undef, (0.1)); 733 $res= 0; 734 next TIME; 735 } 736 else 737 { 738 # Process was not using port 739 } 740 } 741 } 742 last; # If we got here, we are done 743 } 744 745 if ($res) 746 { 747 mtr_debug("mtr_ping_with_timeout(): All mysqld instances are down."); 748 } 749 else 750 { 751 mtr_report("mtr_ping_with_timeout(): At least one server is alive."); 752 } 753 754 return $res; 755} 756 757 758# 759# Loop through our list of processes and look for and entry 760# with the provided pid 761# Set the pid of that process to 0 if found 762# 763sub mark_process_dead($) 764{ 765 my $ret_pid= shift; 766 767 foreach my $mysqld (@{$::master}, @{$::slave}) 768 { 769 if ( $mysqld->{'pid'} eq $ret_pid ) 770 { 771 mtr_verbose("$mysqld->{'type'} $mysqld->{'idx'} exited, pid: $ret_pid"); 772 $mysqld->{'pid'}= 0; 773 return; 774 } 775 } 776 777 mtr_warning("mark_process_dead couldn't find an entry for pid: $ret_pid"); 778 779} 780 781# 782# Loop through our list of processes and look for and entry 783# with the provided pid, if found check for the file indicating 784# expected crash and restart it. 785# 786sub check_expected_crash_and_restart($) 787{ 788 my $ret_pid= shift; 789 790 foreach my $mysqld (@{$::master}, @{$::slave}) 791 { 792 if ( $mysqld->{'pid'} eq $ret_pid ) 793 { 794 mtr_verbose("$mysqld->{'type'} $mysqld->{'idx'} exited, pid: $ret_pid"); 795 $mysqld->{'pid'}= 0; 796 797 # Check if crash expected, and restart if it was 798 my $expect_file= "$::opt_vardir/tmp/" . "$mysqld->{'type'}" . 799 "$mysqld->{'idx'}" . ".expect"; 800 while ( 1 ) 801 { 802 if ( -f $expect_file ) 803 { 804 mtr_verbose("Crash was expected, file $expect_file exists"); 805 my $expect_file_handler; 806 open($expect_file_handler, "<$expect_file") or die; 807 my @expect_lines= <$expect_file_handler>; 808 close $expect_file_handler; 809 # look at most recent order by the test 810 my $expect_content= pop @expect_lines; 811 chomp $expect_content; 812 if ( $expect_content =~ /^wait/ ) 813 { 814 mtr_verbose("Test asks that we wait before restart"); 815 # Millisceond sleep emulated with select 816 select(undef, undef, undef, (0.1)); 817 next; 818 } 819 unlink($expect_file); 820 mysqld_start($mysqld, $mysqld->{'start_opts'}, 821 $mysqld->{'start_slave_master_info'}); 822 } 823 last; 824 } 825 826 return; 827 } 828 } 829 830 mtr_warning("check_expected_crash_and_restart couldn't find an entry for pid: $ret_pid"); 831 832} 833 834############################################################################## 835# 836# The operating system will keep information about dead children, 837# we read this information here, and if we have records the process 838# is alive, we mark it as dead. 839# 840############################################################################## 841 842sub mtr_record_dead_children () { 843 844 my $process_died= 0; 845 my $ret_pid; 846 847 # Wait without blockinng to see if any processes had died 848 # -1 or 0 means there are no more procesess to wait for 849 while ( ($ret_pid= waitpid(-1,&WNOHANG)) != 0 and $ret_pid != -1) 850 { 851 mtr_warning("mtr_record_dead_children: $ret_pid"); 852 mark_process_dead($ret_pid); 853 $process_died= 1; 854 } 855 return $process_died; 856} 857 858sub start_reap_all { 859 # This causes terminating processes to not become zombies, avoiding 860 # the need for (or possibility of) explicit waitpid(). 861 $SIG{CHLD}= 'IGNORE'; 862 863 # On some platforms (Linux, QNX, OSX, ...) there is potential race 864 # here. If a process terminated before setting $SIG{CHLD} (but after 865 # any attempt to waitpid() it), it will still be a zombie. So we 866 # have to handle any such process here. 867 my $pid; 868 while(($pid= waitpid(-1, &WNOHANG)) != 0 and $pid != -1) 869 { 870 mtr_warning("start_reap_all pid: $pid"); 871 mark_process_dead($pid); 872 }; 873} 874 875sub stop_reap_all { 876 $SIG{CHLD}= 'DEFAULT'; 877} 878 879 880sub mtr_ping_port ($) { 881 my $port= shift; 882 883 mtr_verbose("mtr_ping_port: $port"); 884 885 my $remote= "localhost"; 886 my $iaddr= inet_aton($remote); 887 if ( ! $iaddr ) 888 { 889 mtr_error("can't find IP number for $remote"); 890 } 891 my $paddr= sockaddr_in($port, $iaddr); 892 my $proto= getprotobyname('tcp'); 893 if ( ! socket(SOCK, PF_INET, SOCK_STREAM, $proto) ) 894 { 895 mtr_error("can't create socket: $!"); 896 } 897 898 mtr_debug("Pinging server (port: $port)..."); 899 900 if ( connect(SOCK, $paddr) ) 901 { 902 close(SOCK); # FIXME check error? 903 mtr_verbose("USED"); 904 return 1; 905 } 906 else 907 { 908 mtr_verbose("FREE"); 909 return 0; 910 } 911} 912 913############################################################################## 914# 915# Wait for a file to be created 916# 917############################################################################## 918 919# FIXME check that the pidfile contains the expected pid! 920 921sub sleep_until_file_created ($$$) { 922 my $pidfile= shift; 923 my $timeout= shift; 924 my $pid= shift; 925 my $sleeptime= 100; # Milliseconds 926 my $loops= ($timeout * 1000) / $sleeptime; 927 928 for ( my $loop= 1; $loop <= $loops; $loop++ ) 929 { 930 if ( -r $pidfile ) 931 { 932 return 1; 933 } 934 935 # Check if it died after the fork() was successful 936 if ( $pid != 0 && waitpid($pid,&WNOHANG) == $pid ) 937 { 938 mtr_warning("Process $pid died"); 939 return 0; 940 } 941 942 mtr_debug("Sleep $sleeptime milliseconds waiting for $pidfile"); 943 944 # Print extra message every 60 seconds 945 my $seconds= ($loop * $sleeptime) / 1000; 946 if ( $seconds > 1 and int($seconds * 10) % 600 == 0 ) 947 { 948 my $left= $timeout - $seconds; 949 mtr_warning("Waited $seconds seconds for $pidfile to be created, " . 950 "still waiting for $left seconds..."); 951 } 952 953 # Millisceond sleep emulated with select 954 select(undef, undef, undef, ($sleeptime/1000)); 955 } 956 957 return 0; 958} 959 960 961sub mtr_kill_processes ($) { 962 my $pids = shift; 963 964 mtr_verbose("mtr_kill_processes (" . join(" ", keys %{$pids}) . ")"); 965 966 foreach my $pid (keys %{$pids}) 967 { 968 969 if ($pid <= 0) 970 { 971 mtr_warning("Trying to kill illegal pid: $pid"); 972 next; 973 } 974 975 my $signaled_procs= kill(9, $pid); 976 if ($signaled_procs == 0) 977 { 978 # No such process existed, assume it's killed 979 mtr_verbose("killed $pid(no such process)"); 980 } 981 else 982 { 983 my $ret_pid= waitpid($pid,0); 984 if ($ret_pid == $pid) 985 { 986 mtr_verbose("killed $pid(got the pid)"); 987 } 988 elsif ($ret_pid == -1) 989 { 990 mtr_verbose("killed $pid(got -1)"); 991 } 992 } 993 } 994 mtr_verbose("done killing processes"); 995} 996 997 998############################################################################## 999# 1000# When we exit, we kill off all children 1001# 1002############################################################################## 1003 1004sub mtr_exit ($) { 1005 my $code= shift; 1006 mtr_timer_stop_all($::glob_timers); 1007 local $SIG{HUP} = 'IGNORE'; 1008 # ToDo: Signalling -$$ will only work if we are the process group 1009 # leader (in fact on QNX it will signal our session group leader, 1010 # which might be Do-compile or Pushbuild, causing tests to be 1011 # aborted). So we only do it if we are the group leader. We might 1012 # set ourselves as the group leader at startup (with 1013 # POSIX::setpgrp(0,0)), but then care must be needed to always do 1014 # proper child process cleanup. 1015 POSIX::kill(SIGHUP, -$$) if !$::glob_win32_perl and $$ == getpgrp(); 1016 1017 exit($code); 1018} 1019 1020########################################################################### 1021 10221; 1023