1# -*- cperl -*-
2# Copyright (c) 2004-2006 MySQL AB, 2008 Sun Microsystems, Inc.
3# Use is subject to license terms.
4#
5# This program is free software; you can redistribute it and/or modify
6# it under the terms of the GNU General Public License, version 2.0,
7# as published by the Free Software Foundation.
8#
9# This program is also distributed with certain software (including
10# but not limited to OpenSSL) that is licensed under separate terms,
11# as designated in a particular file or component or in included license
12# documentation.  The authors of MySQL hereby grant you an additional
13# permission to link the program and your derivative works with the
14# separately licensed software that they have included with MySQL.
15#
16# This program is distributed in the hope that it will be useful,
17# but WITHOUT ANY WARRANTY; without even the implied warranty of
18# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19# GNU General Public License, version 2.0, for more details.
20#
21# You should have received a copy of the GNU General Public License
22# along with this program; if not, write to the Free Software
23# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
24
25# This is a library file used by the Perl version of mysql-test-run,
26# and is part of the translation of the Bourne shell script with the
27# same name.
28
29use Socket;
30use Errno;
31use strict;
32
33use POSIX qw(WNOHANG SIGHUP);
34
35sub mtr_run ($$$$$$;$);
36sub mtr_spawn ($$$$$$;$);
37sub mtr_check_stop_servers ($);
38sub mtr_kill_leftovers ();
39sub mtr_wait_blocking ($);
40sub mtr_record_dead_children ();
41sub mtr_ndbmgm_start($$);
42sub mtr_mysqladmin_start($$$);
43sub mtr_exit ($);
44sub sleep_until_file_created ($$$);
45sub mtr_kill_processes ($);
46sub mtr_ping_with_timeout($);
47sub mtr_ping_port ($);
48
49# Local function
50sub spawn_impl ($$$$$$$);
51
52##############################################################################
53#
54#  Execute an external command
55#
56##############################################################################
57
58sub mtr_run ($$$$$$;$) {
59  my $path=       shift;
60  my $arg_list_t= shift;
61  my $input=      shift;
62  my $output=     shift;
63  my $error=      shift;
64  my $pid_file=   shift; # Not used
65  my $spawn_opts= shift;
66
67  return spawn_impl($path,$arg_list_t,'run',$input,$output,$error,
68    $spawn_opts);
69}
70
71sub mtr_run_test ($$$$$$;$) {
72  my $path=       shift;
73  my $arg_list_t= shift;
74  my $input=      shift;
75  my $output=     shift;
76  my $error=      shift;
77  my $pid_file=   shift; # Not used
78  my $spawn_opts= shift;
79
80  return spawn_impl($path,$arg_list_t,'test',$input,$output,$error,
81    $spawn_opts);
82}
83
84sub mtr_spawn ($$$$$$;$) {
85  my $path=       shift;
86  my $arg_list_t= shift;
87  my $input=      shift;
88  my $output=     shift;
89  my $error=      shift;
90  my $pid_file=   shift; # Not used
91  my $spawn_opts= shift;
92
93  return spawn_impl($path,$arg_list_t,'spawn',$input,$output,$error,
94    $spawn_opts);
95}
96
97
98
99sub spawn_impl ($$$$$$$) {
100  my $path=       shift;
101  my $arg_list_t= shift;
102  my $mode=       shift;
103  my $input=      shift;
104  my $output=     shift;
105  my $error=      shift;
106  my $spawn_opts= shift;
107
108  if ( $::opt_script_debug )
109  {
110    mtr_report("");
111    mtr_debug("-" x 73);
112    mtr_debug("STDIN  $input") if $input;
113    mtr_debug("STDOUT $output") if $output;
114    mtr_debug("STDERR $error") if $error;
115    mtr_debug("$mode: $path ", join(" ",@$arg_list_t));
116    mtr_debug("spawn options:");
117    if ($spawn_opts)
118    {
119      foreach my $key (sort keys %{$spawn_opts})
120      {
121        mtr_debug("  - $key: $spawn_opts->{$key}");
122      }
123    }
124    else
125    {
126      mtr_debug("  none");
127    }
128    mtr_debug("-" x 73);
129    mtr_report("");
130  }
131
132  mtr_error("Can't spawn with empty \"path\"") unless defined $path;
133
134
135 FORK:
136  {
137    my $pid= fork();
138
139    if ( ! defined $pid )
140    {
141      if ( $! == $!{EAGAIN} )           # See "perldoc Errno"
142      {
143        mtr_warning("Got EAGAIN from fork(), sleep 1 second and redo");
144        sleep(1);
145        redo FORK;
146      }
147
148      mtr_error("$path ($pid) can't be forked, error: $!");
149
150    }
151
152    if ( $pid )
153    {
154      select(STDOUT) if $::glob_win32_perl;
155      return spawn_parent_impl($pid,$mode,$path);
156    }
157    else
158    {
159      # Child, redirect output and exec
160
161      $SIG{INT}= 'DEFAULT';         # Parent do some stuff, we don't
162
163      my $log_file_open_mode = '>';
164
165      if ($spawn_opts and $spawn_opts->{'append_log_file'})
166      {
167        $log_file_open_mode = '>>';
168      }
169
170      if ( $output )
171      {
172	if ( $::glob_win32_perl )
173	{
174	  # Don't redirect stdout on ActiveState perl since this is
175          # just another thread in the same process.
176	}
177        elsif ( ! open(STDOUT,$log_file_open_mode,$output) )
178        {
179          mtr_child_error("can't redirect STDOUT to \"$output\": $!");
180        }
181      }
182
183      if ( $error )
184      {
185        if ( !$::glob_win32_perl and $output eq $error )
186        {
187          if ( ! open(STDERR,">&STDOUT") )
188          {
189            mtr_child_error("can't dup STDOUT: $!");
190          }
191        }
192        else
193        {
194          if ( ! open(STDERR,$log_file_open_mode,$error) )
195          {
196            mtr_child_error("can't redirect STDERR to \"$error\": $!");
197          }
198        }
199      }
200
201      if ( $input )
202      {
203        if ( ! open(STDIN,"<",$input) )
204        {
205          mtr_child_error("can't redirect STDIN to \"$input\": $!");
206        }
207      }
208
209      if ( ! exec($path,@$arg_list_t) )
210      {
211        mtr_child_error("failed to execute \"$path\": $!");
212      }
213      mtr_error("Should never come here 1!");
214    }
215    mtr_error("Should never come here 2!");
216  }
217  mtr_error("Should never come here 3!");
218}
219
220
221sub spawn_parent_impl {
222  my $pid=  shift;
223  my $mode= shift;
224  my $path= shift;
225
226  if ( $mode eq 'run' or $mode eq 'test' )
227  {
228    if ( $mode eq 'run' )
229    {
230      # Simple run of command, wait blocking for it to return
231      my $ret_pid= waitpid($pid,0);
232      if ( $ret_pid != $pid )
233      {
234	# The "simple" waitpid has failed, print debug info
235	# and try to handle the error
236        mtr_warning("waitpid($pid, 0) returned $ret_pid " .
237		    "when waiting for '$path', error: '$!'");
238	if ( $ret_pid == -1 )
239	{
240	  # waitpid returned -1, that would indicate the process
241	  # no longer exist and waitpid couldn't wait for it.
242	  return 1;
243	}
244	mtr_error("Error handling failed");
245      }
246
247      return mtr_process_exit_status($?);
248    }
249    else
250    {
251      # We run mysqltest and wait for it to return. But we try to
252      # catch dying mysqld processes as well.
253      #
254      # We do blocking waitpid() until we get the return from the
255      # "mysqltest" call. But if a mysqld process dies that we
256      # started, we take this as an error, and kill mysqltest.
257
258
259      my $exit_value= -1;
260      my $saved_exit_value;
261      my $ret_pid;                      # What waitpid() returns
262
263      while ( ($ret_pid= waitpid(-1,0)) != -1 )
264      {
265        # Someone terminated, don't know who. Collect
266        # status info first before $? is lost,
267        # but not $exit_value, this is flagged from
268
269        my $timer_name= mtr_timer_timeout($::glob_timers, $ret_pid);
270        if ( $timer_name )
271        {
272          if ( $timer_name eq "suite" )
273          {
274            # We give up here
275            print STDERR "\n";
276            kill(9, $pid); # Kill mysqltest
277            mtr_kill_leftovers(); # Kill servers the hard way
278            mtr_error("Test suite timeout");
279          }
280          elsif ( $timer_name eq "testcase" )
281          {
282            $saved_exit_value=  63;       # Mark as timeout
283            kill(9, $pid);                # Kill mysqltest
284            next;                         # Go on and catch the termination
285          }
286        }
287
288        if ( $ret_pid == $pid )
289        {
290          # We got termination of mysqltest, we are done
291          $exit_value= mtr_process_exit_status($?);
292          last;
293        }
294
295        # One of the child processes died, unless this was expected
296	# mysqltest should be killed and test aborted
297
298	check_expected_crash_and_restart($ret_pid);
299      }
300
301      if ( $ret_pid != $pid )
302      {
303        # We terminated the waiting because a "mysqld" process died.
304        # Kill the mysqltest process.
305	mtr_verbose("Kill mysqltest because another process died");
306        kill(9,$pid);
307
308        $ret_pid= waitpid($pid,0);
309
310        if ( $ret_pid != $pid )
311        {
312          mtr_error("$path ($pid) got lost somehow");
313        }
314      }
315
316      return $saved_exit_value || $exit_value;
317    }
318  }
319  else
320  {
321    # We spawned a process we don't wait for
322    return $pid;
323  }
324}
325
326
327# ----------------------------------------------------------------------
328# We try to emulate how an Unix shell calculates the exit code
329# ----------------------------------------------------------------------
330
331sub mtr_process_exit_status {
332  my $raw_status= shift;
333
334  if ( $raw_status & 127 )
335  {
336    return ($raw_status & 127) + 128;  # Signal num + 128
337  }
338  else
339  {
340    return $raw_status >> 8;           # Exit code
341  }
342}
343
344
345##############################################################################
346#
347#  Kill processes left from previous runs
348#
349##############################################################################
350
351
352# Kill all processes(mysqld, ndbd, ndb_mgmd and im) that would conflict with
353# this run
354# Make sure to remove the PID file, if any.
355# kill IM manager first, else it will restart the servers
356sub mtr_kill_leftovers () {
357
358  mtr_report("Killing Possible Leftover Processes");
359  mtr_debug("mtr_kill_leftovers(): started.");
360
361  my @kill_pids;
362  my %admin_pids;
363
364  foreach my $srv (@{$::master}, @{$::slave})
365  {
366    mtr_debug("  - mysqld " .
367              "(pid: $srv->{pid}; " .
368              "pid file: '$srv->{path_pid}'; " .
369              "socket: '$srv->{path_sock}'; ".
370              "port: $srv->{port})");
371
372    my $pid= mtr_mysqladmin_start($srv, "shutdown", 20);
373
374    # Save the pid of the mysqladmin process
375    $admin_pids{$pid}= 1;
376
377    push(@kill_pids,{
378		     pid      => $srv->{'pid'},
379		     pidfile  => $srv->{'path_pid'},
380		     sockfile => $srv->{'path_sock'},
381		     port     => $srv->{'port'},
382		    });
383    $srv->{'pid'}= 0; # Assume we are done with it
384  }
385
386  if ( ! $::opt_skip_ndbcluster )
387  {
388
389    foreach my $cluster (@{$::clusters})
390    {
391
392      # Don't shut down a "running" cluster
393      next if $cluster->{'use_running'};
394
395      mtr_debug("  - cluster " .
396		"(pid: $cluster->{pid}; " .
397		"pid file: '$cluster->{path_pid})");
398
399      my $pid= mtr_ndbmgm_start($cluster, "shutdown");
400
401      # Save the pid of the ndb_mgm process
402      $admin_pids{$pid}= 1;
403
404      push(@kill_pids,{
405		       pid      => $cluster->{'pid'},
406		       pidfile  => $cluster->{'path_pid'}
407		      });
408
409      $cluster->{'pid'}= 0; # Assume we are done with it
410
411      foreach my $ndbd (@{$cluster->{'ndbds'}})
412      {
413	mtr_debug("    - ndbd " .
414		  "(pid: $ndbd->{pid}; " .
415		  "pid file: '$ndbd->{path_pid})");
416
417	push(@kill_pids,{
418			 pid      => $ndbd->{'pid'},
419			 pidfile  => $ndbd->{'path_pid'},
420			});
421	$ndbd->{'pid'}= 0; # Assume we are done with it
422      }
423    }
424  }
425
426  # Wait for all the admin processes to complete
427  mtr_wait_blocking(\%admin_pids);
428
429  # If we trusted "mysqladmin --shutdown_timeout= ..." we could just
430  # terminate now, but we don't (FIXME should be debugged).
431  # So we try again to ping and at least wait the same amount of time
432  # mysqladmin would for all to die.
433
434  mtr_ping_with_timeout(\@kill_pids);
435
436  # We now have tried to terminate nice. We have waited for the listen
437  # port to be free, but can't really tell if the mysqld process died
438  # or not. We now try to find the process PID from the PID file, and
439  # send a kill to that process. Note that Perl let kill(0,@pids) be
440  # a way to just return the numer of processes the kernel can send
441  # signals to. So this can be used (except on Cygwin) to determine
442  # if there are processes left running that we cound out might exists.
443  #
444  # But still after all this work, all we know is that we have
445  # the ports free.
446
447  # We scan the "var/run/" directory for other process id's to kill
448
449  my $rundir= "$::opt_vardir/run";
450
451  mtr_debug("Processing PID files in directory '$rundir'...");
452
453  if ( -d $rundir )
454  {
455    opendir(RUNDIR, $rundir)
456      or mtr_error("can't open directory \"$rundir\": $!");
457
458    my @pids;
459
460    while ( my $elem= readdir(RUNDIR) )
461    {
462      # Only read pid from files that end with .pid
463      if ( $elem =~ /.*[.]pid$/)
464      {
465	my $pidfile= "$rundir/$elem";
466
467	if ( -f $pidfile )
468	{
469	  mtr_debug("Processing PID file: '$pidfile'...");
470
471	  my $pid= mtr_get_pid_from_file($pidfile);
472
473	  mtr_debug("Got pid: $pid from file '$pidfile'");
474
475	  if ( $::glob_cygwin_perl or kill(0, $pid) )
476	  {
477	    mtr_debug("There is process with pid $pid -- scheduling for kill.");
478	    push(@pids, $pid);            # We know (cygwin guess) it exists
479	  }
480	  else
481	  {
482	    mtr_debug("There is no process with pid $pid -- skipping.");
483	  }
484	}
485      }
486      else
487      {
488	mtr_warning("Found non pid file $elem in $rundir")
489	  if -f "$rundir/$elem";
490	next;
491      }
492    }
493    closedir(RUNDIR);
494
495    if ( @pids )
496    {
497      mtr_debug("Killing the following processes with PID files: " .
498                join(' ', @pids) . "...");
499
500      start_reap_all();
501
502      if ( $::glob_cygwin_perl )
503      {
504        # We have no (easy) way of knowing the Cygwin controlling
505        # process, in the PID file we only have the Windows process id.
506        system("kill -f " . join(" ",@pids)); # Hope for the best....
507        mtr_debug("Sleep 5 seconds waiting for processes to die");
508        sleep(5);
509      }
510      else
511      {
512        my $retries= 10;                    # 10 seconds
513        do
514        {
515          mtr_debug("Sending SIGKILL to pids: " . join(' ', @pids));
516          kill(9, @pids);
517          mtr_report("Sleep 1 second waiting for processes to die");
518          sleep(1)                      # Wait one second
519        } while ( $retries-- and  kill(0, @pids) );
520
521        if ( kill(0, @pids) )           # Check if some left
522        {
523          mtr_warning("can't kill process(es) " . join(" ", @pids));
524        }
525      }
526
527      stop_reap_all();
528    }
529  }
530  else
531  {
532    mtr_debug("Directory for PID files ($rundir) does not exist.");
533  }
534
535  # We may have failed everything, but we now check again if we have
536  # the listen ports free to use, and if they are free, just go for it.
537
538  mtr_debug("Checking known mysqld servers...");
539
540  foreach my $srv ( @kill_pids )
541  {
542    if ( defined $srv->{'port'} and mtr_ping_port($srv->{'port'}) )
543    {
544      mtr_warning("can't kill old process holding port $srv->{'port'}");
545    }
546  }
547
548  mtr_debug("mtr_kill_leftovers(): finished.");
549}
550
551
552#
553# Check that all processes in "spec" are shutdown gracefully
554# else kill them off hard
555#
556sub mtr_check_stop_servers ($) {
557  my $spec=  shift;
558
559  # Return if no processes are defined
560  return if ! @$spec;
561
562  mtr_verbose("mtr_check_stop_servers");
563
564  # ----------------------------------------------------------------------
565  # Wait until servers in "spec" has stopped listening
566  # to their ports or timeout occurs
567  # ----------------------------------------------------------------------
568  mtr_ping_with_timeout(\@$spec);
569
570  # ----------------------------------------------------------------------
571  # Use waitpid() nonblocking for a little while, to see how
572  # many process's will exit sucessfully.
573  # This is the normal case.
574  # ----------------------------------------------------------------------
575  my $wait_counter= 50; # Max number of times to redo the loop
576  foreach my $srv ( @$spec )
577  {
578    my $pid= $srv->{'pid'};
579    my $ret_pid;
580    if ( $pid )
581    {
582      $ret_pid= waitpid($pid,&WNOHANG);
583      if ($ret_pid == $pid)
584      {
585	mtr_verbose("Caught exit of process $ret_pid");
586	$srv->{'pid'}= 0;
587      }
588      elsif ($ret_pid == 0)
589      {
590	mtr_verbose("Process $pid is still alive");
591	if ($wait_counter-- > 0)
592	{
593	  # Give the processes more time to exit
594	  select(undef, undef, undef, (0.1));
595	  redo;
596	}
597      }
598      else
599      {
600	mtr_warning("caught exit of unknown child $ret_pid");
601      }
602    }
603  }
604
605  # ----------------------------------------------------------------------
606  # The processes that haven't yet exited need to
607  # be killed hard, put them in "kill_pids" hash
608  # ----------------------------------------------------------------------
609  my %kill_pids;
610  foreach my $srv ( @$spec )
611  {
612    my $pid= $srv->{'pid'};
613    if ( $pid )
614    {
615      # Server is still alive, put it in list to be hard killed
616      if ($::glob_win32_perl)
617      {
618	# Kill the real process if it's known
619	$pid= $srv->{'real_pid'} if ($srv->{'real_pid'});
620      }
621      $kill_pids{$pid}= 1;
622
623      # Write a message to the process's error log (if it has one)
624      # that it's being killed hard.
625      if ( defined $srv->{'errfile'} )
626      {
627	mtr_tofile($srv->{'errfile'}, "Note: Forcing kill of process $pid\n");
628      }
629      mtr_warning("Forcing kill of process $pid");
630
631    }
632    else
633    {
634      # Server is dead, remove the pidfile if it exists
635      #
636      # Race, could have been removed between test with -f
637      # and the unlink() below, so better check again with -f
638      if ( -f $srv->{'pidfile'} and ! unlink($srv->{'pidfile'}) and
639           -f $srv->{'pidfile'} )
640      {
641        mtr_error("can't remove $srv->{'pidfile'}");
642      }
643    }
644  }
645
646  if ( ! keys %kill_pids )
647  {
648    # All processes has exited gracefully
649    return;
650  }
651
652  mtr_kill_processes(\%kill_pids);
653
654  # ----------------------------------------------------------------------
655  # All processes are killed, cleanup leftover files
656  # ----------------------------------------------------------------------
657  {
658    my $errors= 0;
659    foreach my $srv ( @$spec )
660    {
661      if ( $srv->{'pid'} )
662      {
663	# Server has been hard killed, clean it's resources
664	foreach my $file ($srv->{'pidfile'}, $srv->{'sockfile'})
665        {
666	  # Know it is dead so should be no race, careful anyway
667	  if ( defined $file and -f $file and ! unlink($file) and -f $file )
668          {
669	    $errors++;
670	    mtr_warning("couldn't delete $file");
671	  }
672	}
673
674	if ($::glob_win32_perl and $srv->{'real_pid'})
675	{
676	  # Wait for the pseudo pid - if the real_pid was known
677	  # the pseudo pid has not been waited for yet, wai blocking
678	  # since it's "such a simple program"
679	  mtr_verbose("Wait for pseudo process $srv->{'pid'}");
680	  my $ret_pid= waitpid($srv->{'pid'}, 0);
681	  mtr_verbose("Pseudo process $ret_pid died");
682	}
683
684	$srv->{'pid'}= 0;
685      }
686    }
687    if ( $errors )
688    {
689      # There where errors killing processes
690      # do one last attempt to ping the servers
691      # and if they can't be pinged, assume they are dead
692      if ( ! mtr_ping_with_timeout( \@$spec ) )
693      {
694	mtr_error("we could not kill or clean up all processes");
695      }
696      else
697      {
698	mtr_verbose("All ports were free, continuing");
699      }
700    }
701  }
702}
703
704
705# Wait for all the process in the list to terminate
706sub mtr_wait_blocking($) {
707  my $admin_pids= shift;
708
709
710  # Return if no processes defined
711  return if ! %$admin_pids;
712
713  mtr_verbose("mtr_wait_blocking");
714
715  # Wait for all the started processes to exit
716  # As mysqladmin is such a simple program, we trust it to terminate itself.
717  # I.e. we wait blocking, and wait for them all before we go on.
718  foreach my $pid (keys %{$admin_pids})
719  {
720    my $ret_pid= waitpid($pid,0);
721
722  }
723}
724
725# Start "mysqladmin <command>" for a specific mysqld
726sub mtr_mysqladmin_start($$$) {
727  my $srv= shift;
728  my $command= shift;
729  my $adm_shutdown_tmo= shift;
730
731  my $args;
732  mtr_init_args(\$args);
733
734  mtr_add_arg($args, "--no-defaults");
735  mtr_add_arg($args, "--user=%s", $::opt_user);
736  mtr_add_arg($args, "--password=");
737  mtr_add_arg($args, "--silent");
738  if ( -e $srv->{'path_sock'} )
739  {
740    mtr_add_arg($args, "--socket=%s", $srv->{'path_sock'});
741  }
742  if ( $srv->{'port'} )
743  {
744    mtr_add_arg($args, "--port=%s", $srv->{'port'});
745  }
746  if ( $srv->{'port'} and ! -e $srv->{'path_sock'} )
747  {
748    mtr_add_arg($args, "--protocol=tcp"); # Needed if no --socket
749  }
750  mtr_add_arg($args, "--connect_timeout=5");
751
752  # Shutdown time must be high as slave may be in reconnect
753  mtr_add_arg($args, "--shutdown_timeout=$adm_shutdown_tmo");
754  mtr_add_arg($args, "$command");
755  my $pid= mtr_spawn($::exe_mysqladmin, $args,
756		     "", "", "", "",
757		     { append_log_file => 1 });
758  mtr_verbose("mtr_mysqladmin_start, pid: $pid");
759  return $pid;
760
761}
762
763# Start "ndb_mgm shutdown" for a specific cluster, it will
764# shutdown all data nodes and leave the ndb_mgmd running
765sub mtr_ndbmgm_start($$) {
766  my $cluster= shift;
767  my $command= shift;
768
769  my $args;
770
771  mtr_init_args(\$args);
772
773  mtr_add_arg($args, "--no-defaults");
774  mtr_add_arg($args, "--core");
775  mtr_add_arg($args, "--try-reconnect=1");
776  mtr_add_arg($args, "--ndb_connectstring=%s", $cluster->{'connect_string'});
777  mtr_add_arg($args, "-e");
778  mtr_add_arg($args, "$command");
779
780  my $pid= mtr_spawn($::exe_ndb_mgm, $args,
781		     "", "/dev/null", "/dev/null", "",
782		     {});
783  mtr_verbose("mtr_ndbmgm_start, pid: $pid");
784  return $pid;
785
786}
787
788
789# Ping all servers in list, exit when none of them answers
790# or when timeout has passed
791sub mtr_ping_with_timeout($) {
792  my $spec= shift;
793  my $timeout= 200;                     # 20 seconds max
794  my $res= 1;                           # If we just fall through, we are done
795                                        # in the sense that the servers don't
796                                        # listen to their ports any longer
797
798  mtr_debug("Waiting for mysqld servers to stop...");
799
800 TIME:
801  while ( $timeout-- )
802  {
803    foreach my $srv ( @$spec )
804    {
805      $res= 1;                          # We are optimistic
806      if ( $srv->{'pid'} and defined $srv->{'port'} )
807      {
808	if ( mtr_ping_port($srv->{'port'}) )
809	{
810	  mtr_verbose("waiting for process $srv->{'pid'} to stop ".
811		      "using port $srv->{'port'}");
812
813	  # Millisceond sleep emulated with select
814	  select(undef, undef, undef, (0.1));
815	  $res= 0;
816	  next TIME;
817	}
818	else
819	{
820	  # Process was not using port
821	}
822      }
823    }
824    last;                               # If we got here, we are done
825  }
826
827  if ($res)
828  {
829    mtr_debug("mtr_ping_with_timeout(): All mysqld instances are down.");
830  }
831  else
832  {
833    mtr_report("mtr_ping_with_timeout(): At least one server is alive.");
834  }
835
836  return $res;
837}
838
839
840#
841# Loop through our list of processes and look for and entry
842# with the provided pid
843# Set the pid of that process to 0 if found
844#
845sub mark_process_dead($)
846{
847  my $ret_pid= shift;
848
849  foreach my $mysqld (@{$::master}, @{$::slave})
850  {
851    if ( $mysqld->{'pid'} eq $ret_pid )
852    {
853      mtr_verbose("$mysqld->{'type'} $mysqld->{'idx'} exited, pid: $ret_pid");
854      $mysqld->{'pid'}= 0;
855      return;
856    }
857  }
858
859  foreach my $cluster (@{$::clusters})
860  {
861    if ( $cluster->{'pid'} eq $ret_pid )
862    {
863      mtr_verbose("$cluster->{'name'} cluster ndb_mgmd exited, pid: $ret_pid");
864      $cluster->{'pid'}= 0;
865      return;
866    }
867
868    foreach my $ndbd (@{$cluster->{'ndbds'}})
869    {
870      if ( $ndbd->{'pid'} eq $ret_pid )
871      {
872	mtr_verbose("$cluster->{'name'} cluster ndbd exited, pid: $ret_pid");
873	$ndbd->{'pid'}= 0;
874	return;
875      }
876    }
877  }
878  mtr_warning("mark_process_dead couldn't find an entry for pid: $ret_pid");
879
880}
881
882#
883# Loop through our list of processes and look for and entry
884# with the provided pid, if found check for the file indicating
885# expected crash and restart it.
886#
887sub check_expected_crash_and_restart($)
888{
889  my $ret_pid= shift;
890
891  foreach my $mysqld (@{$::master}, @{$::slave})
892  {
893    if ( $mysqld->{'pid'} eq $ret_pid )
894    {
895      mtr_verbose("$mysqld->{'type'} $mysqld->{'idx'} exited, pid: $ret_pid");
896      $mysqld->{'pid'}= 0;
897
898      # Check if crash expected and restart if it was
899      my $expect_file= "$::opt_vardir/tmp/" . "$mysqld->{'type'}" .
900	"$mysqld->{'idx'}" . ".expect";
901      if ( -f $expect_file )
902      {
903	mtr_verbose("Crash was expected, file $expect_file exists");
904	mysqld_start($mysqld, $mysqld->{'start_opts'},
905		     $mysqld->{'start_slave_master_info'});
906	unlink($expect_file);
907      }
908
909      return;
910    }
911  }
912
913  foreach my $cluster (@{$::clusters})
914  {
915    if ( $cluster->{'pid'} eq $ret_pid )
916    {
917      mtr_verbose("$cluster->{'name'} cluster ndb_mgmd exited, pid: $ret_pid");
918      $cluster->{'pid'}= 0;
919
920      # Check if crash expected and restart if it was
921      my $expect_file= "$::opt_vardir/tmp/ndb_mgmd_" . "$cluster->{'type'}" .
922	".expect";
923      if ( -f $expect_file )
924      {
925	mtr_verbose("Crash was expected, file $expect_file exists");
926	ndbmgmd_start($cluster);
927	unlink($expect_file);
928      }
929      return;
930    }
931
932    foreach my $ndbd (@{$cluster->{'ndbds'}})
933    {
934      if ( $ndbd->{'pid'} eq $ret_pid )
935      {
936	mtr_verbose("$cluster->{'name'} cluster ndbd exited, pid: $ret_pid");
937	$ndbd->{'pid'}= 0;
938
939	# Check if crash expected and restart if it was
940	my $expect_file= "$::opt_vardir/tmp/ndbd_" . "$cluster->{'type'}" .
941	  "$ndbd->{'idx'}" . ".expect";
942	if ( -f $expect_file )
943	{
944	  mtr_verbose("Crash was expected, file $expect_file exists");
945	  ndbd_start($cluster, $ndbd->{'idx'},
946		     $ndbd->{'start_extra_args'});
947	  unlink($expect_file);
948	}
949	return;
950      }
951    }
952  }
953
954  if ($::instance_manager->{'spawner_pid'} eq $ret_pid)
955  {
956    return;
957  }
958
959  mtr_warning("check_expected_crash_and_restart couldn't find an entry for pid: $ret_pid");
960
961}
962
963##############################################################################
964#
965#  The operating system will keep information about dead children,
966#  we read this information here, and if we have records the process
967#  is alive, we mark it as dead.
968#
969##############################################################################
970
971sub mtr_record_dead_children () {
972
973  my $process_died= 0;
974  my $ret_pid;
975
976  # Wait without blockinng to see if any processes had died
977  # -1 or 0 means there are no more procesess to wait for
978  while ( ($ret_pid= waitpid(-1,&WNOHANG)) != 0 and $ret_pid != -1)
979  {
980    mtr_warning("mtr_record_dead_children: $ret_pid");
981    mark_process_dead($ret_pid);
982    $process_died= 1;
983  }
984  return $process_died;
985}
986
987sub start_reap_all {
988  # This causes terminating processes to not become zombies, avoiding
989  # the need for (or possibility of) explicit waitpid().
990  $SIG{CHLD}= 'IGNORE';
991
992  # On some platforms (Linux, QNX, OSX, ...) there is potential race
993  # here. If a process terminated before setting $SIG{CHLD} (but after
994  # any attempt to waitpid() it), it will still be a zombie. So we
995  # have to handle any such process here.
996  my $pid;
997  while(($pid= waitpid(-1, &WNOHANG)) != 0 and $pid != -1)
998  {
999    mtr_warning("start_reap_all pid: $pid");
1000    mark_process_dead($pid);
1001  };
1002}
1003
1004sub stop_reap_all {
1005  $SIG{CHLD}= 'DEFAULT';
1006}
1007
1008
1009sub mtr_ping_port ($) {
1010  my $port= shift;
1011
1012  mtr_verbose("mtr_ping_port: $port");
1013
1014  my $remote= "localhost";
1015  my $iaddr=  inet_aton($remote);
1016  if ( ! $iaddr )
1017  {
1018    mtr_error("can't find IP number for $remote");
1019  }
1020  my $paddr=  sockaddr_in($port, $iaddr);
1021  my $proto=  getprotobyname('tcp');
1022  if ( ! socket(SOCK, PF_INET, SOCK_STREAM, $proto) )
1023  {
1024    mtr_error("can't create socket: $!");
1025  }
1026
1027  mtr_debug("Pinging server (port: $port)...");
1028
1029  if ( connect(SOCK, $paddr) )
1030  {
1031    close(SOCK);                        # FIXME check error?
1032    mtr_verbose("USED");
1033    return 1;
1034  }
1035  else
1036  {
1037    mtr_verbose("FREE");
1038    return 0;
1039  }
1040}
1041
1042##############################################################################
1043#
1044#  Wait for a file to be created
1045#
1046##############################################################################
1047
1048# FIXME check that the pidfile contains the expected pid!
1049
1050sub sleep_until_file_created ($$$) {
1051  my $pidfile= shift;
1052  my $timeout= shift;
1053  my $pid=     shift;
1054  my $sleeptime= 100; # Milliseconds
1055  my $loops= ($timeout * 1000) / $sleeptime;
1056
1057  for ( my $loop= 1; $loop <= $loops; $loop++ )
1058  {
1059    if ( -r $pidfile )
1060    {
1061      return 1;
1062    }
1063
1064    # Check if it died after the fork() was successful
1065    if ( $pid != 0 && waitpid($pid,&WNOHANG) == $pid )
1066    {
1067      mtr_warning("Process $pid died");
1068      return 0;
1069    }
1070
1071    mtr_debug("Sleep $sleeptime milliseconds waiting for $pidfile");
1072
1073    # Print extra message every 60 seconds
1074    my $seconds= ($loop * $sleeptime) / 1000;
1075    if ( $seconds > 1 and int($seconds * 10) % 600 == 0 )
1076    {
1077      my $left= $timeout - $seconds;
1078      mtr_warning("Waited $seconds seconds for $pidfile to be created, " .
1079                  "still waiting for $left seconds...");
1080    }
1081
1082    # Millisceond sleep emulated with select
1083    select(undef, undef, undef, ($sleeptime/1000));
1084  }
1085
1086  return 0;
1087}
1088
1089
1090sub mtr_kill_processes ($) {
1091  my $pids = shift;
1092
1093  mtr_verbose("mtr_kill_processes (" . join(" ", keys %{$pids}) . ")");
1094
1095  foreach my $pid (keys %{$pids})
1096  {
1097
1098    if ($pid <= 0)
1099    {
1100      mtr_warning("Trying to kill illegal pid: $pid");
1101      next;
1102    }
1103
1104    my $signaled_procs= kill(9, $pid);
1105    if ($signaled_procs == 0)
1106    {
1107      # No such process existed, assume it's killed
1108      mtr_verbose("killed $pid(no such process)");
1109    }
1110    else
1111    {
1112      my $ret_pid= waitpid($pid,0);
1113      if ($ret_pid == $pid)
1114      {
1115	mtr_verbose("killed $pid(got the pid)");
1116      }
1117      elsif ($ret_pid == -1)
1118      {
1119	mtr_verbose("killed $pid(got -1)");
1120      }
1121    }
1122  }
1123  mtr_verbose("done killing processes");
1124}
1125
1126
1127##############################################################################
1128#
1129#  When we exit, we kill off all children
1130#
1131##############################################################################
1132
1133sub mtr_exit ($) {
1134  my $code= shift;
1135  mtr_timer_stop_all($::glob_timers);
1136  local $SIG{HUP} = 'IGNORE';
1137  # ToDo: Signalling -$$ will only work if we are the process group
1138  # leader (in fact on QNX it will signal our session group leader,
1139  # which might be Do-compile or Pushbuild, causing tests to be
1140  # aborted). So we only do it if we are the group leader. We might
1141  # set ourselves as the group leader at startup (with
1142  # POSIX::setpgrp(0,0)), but then care must be needed to always do
1143  # proper child process cleanup.
1144  POSIX::kill(SIGHUP, -$$) if !$::glob_win32_perl and $$ == getpgrp();
1145
1146  exit($code);
1147}
1148
1149###########################################################################
1150
11511;
1152