1"""
2Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
3See https://llvm.org/LICENSE.txt for license information.
4SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
5
6Provides classes used by the test results reporting infrastructure
7within the LLDB test suite.
8
9
10This module provides process-management support for the LLDB test
11running infrastructure.
12"""
13
14# System imports
15import os
16import re
17import signal
18import subprocess
19import sys
20import threading
21
22
23class CommunicatorThread(threading.Thread):
24    """Provides a thread class that communicates with a subprocess."""
25
26    def __init__(self, process, event, output_file):
27        super(CommunicatorThread, self).__init__()
28        # Don't let this thread prevent shutdown.
29        self.daemon = True
30        self.process = process
31        self.pid = process.pid
32        self.event = event
33        self.output_file = output_file
34        self.output = None
35
36    def run(self):
37        try:
38            # Communicate with the child process.
39            # This will not complete until the child process terminates.
40            self.output = self.process.communicate()
41        except Exception as exception:  # pylint: disable=broad-except
42            if self.output_file:
43                self.output_file.write(
44                    "exception while using communicate() for pid: {}\n".format(
45                        exception))
46        finally:
47            # Signal that the thread's run is complete.
48            self.event.set()
49
50
51# Provides a regular expression for matching gtimeout-based durations.
52TIMEOUT_REGEX = re.compile(r"(^\d+)([smhd])?$")
53
54
55def timeout_to_seconds(timeout):
56    """Converts timeout/gtimeout timeout values into seconds.
57
58    @param timeout a timeout in the form of xm representing x minutes.
59
60    @return None if timeout is None, or the number of seconds as a float
61    if a valid timeout format was specified.
62    """
63    if timeout is None:
64        return None
65    else:
66        match = TIMEOUT_REGEX.match(timeout)
67        if match:
68            value = float(match.group(1))
69            units = match.group(2)
70            if units is None:
71                # default is seconds.  No conversion necessary.
72                return value
73            elif units == 's':
74                # Seconds.  No conversion necessary.
75                return value
76            elif units == 'm':
77                # Value is in minutes.
78                return 60.0 * value
79            elif units == 'h':
80                # Value is in hours.
81                return (60.0 * 60.0) * value
82            elif units == 'd':
83                # Value is in days.
84                return 24 * (60.0 * 60.0) * value
85            else:
86                raise Exception("unexpected units value '{}'".format(units))
87        else:
88            raise Exception("could not parse TIMEOUT spec '{}'".format(
89                timeout))
90
91
92class ProcessHelper(object):
93    """Provides an interface for accessing process-related functionality.
94
95    This class provides a factory method that gives the caller a
96    platform-specific implementation instance of the class.
97
98    Clients of the class should stick to the methods provided in this
99    base class.
100
101    \see ProcessHelper.process_helper()
102    """
103
104    def __init__(self):
105        super(ProcessHelper, self).__init__()
106
107    @classmethod
108    def process_helper(cls):
109        """Returns a platform-specific ProcessHelper instance.
110        @return a ProcessHelper instance that does the right thing for
111        the current platform.
112        """
113
114        # If you add a new platform, create an instance here and
115        # return it.
116        if os.name == "nt":
117            return WindowsProcessHelper()
118        else:
119            # For all POSIX-like systems.
120            return UnixProcessHelper()
121
122    def create_piped_process(self, command, new_process_group=True):
123        # pylint: disable=no-self-use,unused-argument
124        # As expected.  We want derived classes to implement this.
125        """Creates a subprocess.Popen-based class with I/O piped to the parent.
126
127        @param command the command line list as would be passed to
128        subprocess.Popen().  Use the list form rather than the string form.
129
130        @param new_process_group indicates if the caller wants the
131        process to be created in its own process group.  Each OS handles
132        this concept differently.  It provides a level of isolation and
133        can simplify or enable terminating the process tree properly.
134
135        @return a subprocess.Popen-like object.
136        """
137        raise Exception("derived class must implement")
138
139    def supports_soft_terminate(self):
140        # pylint: disable=no-self-use
141        # As expected.  We want derived classes to implement this.
142        """Indicates if the platform supports soft termination.
143
144        Soft termination is the concept of a terminate mechanism that
145        allows the target process to shut down nicely, but with the
146        catch that the process might choose to ignore it.
147
148        Platform supporter note: only mark soft terminate as supported
149        if the target process has some way to evade the soft terminate
150        request; otherwise, just support the hard terminate method.
151
152        @return True if the platform supports a soft terminate mechanism.
153        """
154        # By default, we do not support a soft terminate mechanism.
155        return False
156
157    def soft_terminate(self, popen_process, log_file=None, want_core=True):
158        # pylint: disable=no-self-use,unused-argument
159        # As expected.  We want derived classes to implement this.
160        """Attempts to terminate the process in a polite way.
161
162        This terminate method is intended to give the child process a
163        chance to clean up and exit on its own, possibly with a request
164        to drop a core file or equivalent (i.e. [mini-]crashdump, crashlog,
165        etc.)  If new_process_group was set in the process creation method
166        and the platform supports it, this terminate call will attempt to
167        kill the whole process tree rooted in this child process.
168
169        @param popen_process the subprocess.Popen-like object returned
170        by one of the process-creation methods of this class.
171
172        @param log_file file-like object used to emit error-related
173        logging info.  May be None if no error-related info is desired.
174
175        @param want_core True if the caller would like to get a core
176        dump (or the analogous crash report) from the terminated process.
177        """
178        popen_process.terminate()
179
180    def hard_terminate(self, popen_process, log_file=None):
181        # pylint: disable=no-self-use,unused-argument
182        # As expected.  We want derived classes to implement this.
183        """Attempts to terminate the process immediately.
184
185        This terminate method is intended to kill child process in
186        a manner in which the child process has no ability to block,
187        and also has no ability to clean up properly.  If new_process_group
188        was specified when creating the process, and if the platform
189        implementation supports it, this will attempt to kill the
190        whole process tree rooted in the child process.
191
192        @param popen_process the subprocess.Popen-like object returned
193        by one of the process-creation methods of this class.
194
195        @param log_file file-like object used to emit error-related
196        logging info.  May be None if no error-related info is desired.
197        """
198        popen_process.kill()
199
200    def was_soft_terminate(self, returncode, with_core):
201        # pylint: disable=no-self-use,unused-argument
202        # As expected.  We want derived classes to implement this.
203        """Returns if Popen-like object returncode matches soft terminate.
204
205        @param returncode the returncode from the Popen-like object that
206        terminated with a given return code.
207
208        @param with_core indicates whether the returncode should match
209        a core-generating return signal.
210
211        @return True when the returncode represents what the system would
212        issue when a soft_terminate() with the given with_core arg occurred;
213        False otherwise.
214        """
215        if not self.supports_soft_terminate():
216            # If we don't support soft termination on this platform,
217            # then this should always be False.
218            return False
219        else:
220            # Once a platform claims to support soft terminate, it
221            # needs to be able to identify it by overriding this method.
222            raise Exception("platform needs to implement")
223
224    def was_hard_terminate(self, returncode):
225        # pylint: disable=no-self-use,unused-argument
226        # As expected.  We want derived classes to implement this.
227        """Returns if Popen-like object returncode matches that of a hard
228        terminate attempt.
229
230        @param returncode the returncode from the Popen-like object that
231        terminated with a given return code.
232
233        @return True when the returncode represents what the system would
234        issue when a hard_terminate() occurred; False
235        otherwise.
236        """
237        raise Exception("platform needs to implement")
238
239    def soft_terminate_signals(self):
240        # pylint: disable=no-self-use
241        """Retrieve signal numbers that can be sent to soft terminate.
242        @return a list of signal numbers that can be sent to soft terminate
243        a process, or None if not applicable.
244        """
245        return None
246
247    def is_exceptional_exit(self, popen_status):
248        """Returns whether the program exit status is exceptional.
249
250        Returns whether the return code from a Popen process is exceptional
251        (e.g. signals on POSIX systems).
252
253        Derived classes should override this if they can detect exceptional
254        program exit.
255
256        @return True if the given popen_status represents an exceptional
257        program exit; False otherwise.
258        """
259        return False
260
261    def exceptional_exit_details(self, popen_status):
262        """Returns the normalized exceptional exit code and a description.
263
264        Given an exceptional exit code, returns the integral value of the
265        exception (e.g. signal number for POSIX) and a description (e.g.
266        signal name on POSIX) for the result.
267
268        Derived classes should override this if they can detect exceptional
269        program exit.
270
271        It is fine to not implement this so long as is_exceptional_exit()
272        always returns False.
273
274        @return (normalized exception code, symbolic exception description)
275        """
276        raise Exception("exception_exit_details() called on unsupported class")
277
278
279class UnixProcessHelper(ProcessHelper):
280    """Provides a ProcessHelper for Unix-like operating systems.
281
282    This implementation supports anything that looks Posix-y
283    (e.g. Darwin, Linux, *BSD, etc.)
284    """
285
286    def __init__(self):
287        super(UnixProcessHelper, self).__init__()
288
289    @classmethod
290    def _create_new_process_group(cls):
291        """Creates a new process group for the calling process."""
292        os.setpgid(os.getpid(), os.getpid())
293
294    def create_piped_process(self, command, new_process_group=True):
295        # Determine what to run after the fork but before the exec.
296        if new_process_group:
297            preexec_func = self._create_new_process_group
298        else:
299            preexec_func = None
300
301        # Create the process.
302        process = subprocess.Popen(
303            command,
304            stdin=subprocess.PIPE,
305            stdout=subprocess.PIPE,
306            stderr=subprocess.PIPE,
307            universal_newlines=True,  # Elicits automatic byte -> string decoding in Py3
308            close_fds=True,
309            preexec_fn=preexec_func)
310
311        # Remember whether we're using process groups for this
312        # process.
313        process.using_process_groups = new_process_group
314        return process
315
316    def supports_soft_terminate(self):
317        # POSIX does support a soft terminate via:
318        # * SIGTERM (no core requested)
319        # * SIGQUIT (core requested if enabled, see ulimit -c)
320        return True
321
322    @classmethod
323    def _validate_pre_terminate(cls, popen_process, log_file):
324        # Validate args.
325        if popen_process is None:
326            raise ValueError("popen_process is None")
327
328        # Ensure we have something that looks like a valid process.
329        if popen_process.pid < 1:
330            if log_file:
331                log_file.write("skipping soft_terminate(): no process id")
332            return False
333
334        # We only do the process liveness check if we're not using
335        # process groups.  With process groups, checking if the main
336        # inferior process is dead and short circuiting here is no
337        # good - children of it in the process group could still be
338        # alive, and they should be killed during a timeout.
339        if not popen_process.using_process_groups:
340            # Don't kill if it's already dead.
341            popen_process.poll()
342            if popen_process.returncode is not None:
343                # It has a returncode.  It has already stopped.
344                if log_file:
345                    log_file.write(
346                        "requested to terminate pid {} but it has already "
347                        "terminated, returncode {}".format(
348                            popen_process.pid, popen_process.returncode))
349                # Move along...
350                return False
351
352        # Good to go.
353        return True
354
355    def _kill_with_signal(self, popen_process, log_file, signum):
356        # Validate we're ready to terminate this.
357        if not self._validate_pre_terminate(popen_process, log_file):
358            return
359
360        # Choose kill mechanism based on whether we're targeting
361        # a process group or just a process.
362        try:
363            if popen_process.using_process_groups:
364                # if log_file:
365                #    log_file.write(
366                #        "sending signum {} to process group {} now\n".format(
367                #            signum, popen_process.pid))
368                os.killpg(popen_process.pid, signum)
369            else:
370                # if log_file:
371                #    log_file.write(
372                #        "sending signum {} to process {} now\n".format(
373                #            signum, popen_process.pid))
374                os.kill(popen_process.pid, signum)
375        except OSError as error:
376            import errno
377            if error.errno == errno.ESRCH:
378                # This is okay - failed to find the process.  It may be that
379                # that the timeout pre-kill hook eliminated the process.  We'll
380                # ignore.
381                pass
382            else:
383                raise
384
385    def soft_terminate(self, popen_process, log_file=None, want_core=True):
386        # Choose signal based on desire for core file.
387        if want_core:
388            # SIGQUIT will generate core by default.  Can be caught.
389            signum = signal.SIGQUIT
390        else:
391            # SIGTERM is the traditional nice way to kill a process.
392            # Can be caught, doesn't generate a core.
393            signum = signal.SIGTERM
394
395        self._kill_with_signal(popen_process, log_file, signum)
396
397    def hard_terminate(self, popen_process, log_file=None):
398        self._kill_with_signal(popen_process, log_file, signal.SIGKILL)
399
400    def was_soft_terminate(self, returncode, with_core):
401        if with_core:
402            return returncode == -signal.SIGQUIT
403        else:
404            return returncode == -signal.SIGTERM
405
406    def was_hard_terminate(self, returncode):
407        return returncode == -signal.SIGKILL
408
409    def soft_terminate_signals(self):
410        return [signal.SIGQUIT, signal.SIGTERM]
411
412    def is_exceptional_exit(self, popen_status):
413        return popen_status < 0
414
415    @classmethod
416    def _signal_names_by_number(cls):
417        return dict(
418            (k, v) for v, k in reversed(sorted(signal.__dict__.items()))
419            if v.startswith('SIG') and not v.startswith('SIG_'))
420
421    def exceptional_exit_details(self, popen_status):
422        signo = -popen_status
423        signal_names_by_number = self._signal_names_by_number()
424        signal_name = signal_names_by_number.get(signo, "")
425        return (signo, signal_name)
426
427
428class WindowsProcessHelper(ProcessHelper):
429    """Provides a Windows implementation of the ProcessHelper class."""
430
431    def __init__(self):
432        super(WindowsProcessHelper, self).__init__()
433
434    def create_piped_process(self, command, new_process_group=True):
435        if new_process_group:
436            # We need this flag if we want os.kill() to work on the subprocess.
437            creation_flags = subprocess.CREATE_NEW_PROCESS_GROUP
438        else:
439            creation_flags = 0
440
441        return subprocess.Popen(
442            command,
443            stdin=subprocess.PIPE,
444            stdout=subprocess.PIPE,
445            stderr=subprocess.PIPE,
446            universal_newlines=True,  # Elicits automatic byte -> string decoding in Py3
447            creationflags=creation_flags)
448
449    def was_hard_terminate(self, returncode):
450        return returncode != 0
451
452
453class ProcessDriver(object):
454    """Drives a child process, notifies on important events, and can timeout.
455
456    Clients are expected to derive from this class and override the
457    on_process_started and on_process_exited methods if they want to
458    hook either of those.
459
460    This class supports timing out the child process in a platform-agnostic
461    way.  The on_process_exited method is informed if the exit was natural
462    or if it was due to a timeout.
463    """
464
465    def __init__(self, soft_terminate_timeout=10.0):
466        super(ProcessDriver, self).__init__()
467        self.process_helper = ProcessHelper.process_helper()
468        self.pid = None
469        # Create the synchronization event for notifying when the
470        # inferior dotest process is complete.
471        self.done_event = threading.Event()
472        self.io_thread = None
473        self.process = None
474        # Number of seconds to wait for the soft terminate to
475        # wrap up, before moving to more drastic measures.
476        # Might want this longer if core dumps are generated and
477        # take a long time to write out.
478        self.soft_terminate_timeout = soft_terminate_timeout
479        # Number of seconds to wait for the hard terminate to
480        # wrap up, before giving up on the io thread.  This should
481        # be fast.
482        self.hard_terminate_timeout = 5.0
483        self.returncode = None
484
485    # =============================================
486    # Methods for subclasses to override if desired.
487    # =============================================
488
489    def on_process_started(self):
490        pass
491
492    def on_process_exited(self, command, output, was_timeout, exit_status):
493        pass
494
495    def on_timeout_pre_kill(self):
496        """Called after the timeout interval elapses but before killing it.
497
498        This method is added to enable derived classes the ability to do
499        something to the process prior to it being killed.  For example,
500        this would be a good spot to run a program that samples the process
501        to see what it was doing (or not doing).
502
503        Do not attempt to reap the process (i.e. use wait()) in this method.
504        That will interfere with the kill mechanism and return code processing.
505        """
506
507    def write(self, content):
508        # pylint: disable=no-self-use
509        # Intended - we want derived classes to be able to override
510        # this and use any self state they may contain.
511        sys.stdout.write(content)
512
513    # ==============================================================
514    # Operations used to drive processes.  Clients will want to call
515    # one of these.
516    # ==============================================================
517
518    def run_command(self, command):
519        # Start up the child process and the thread that does the
520        # communication pump.
521        self._start_process_and_io_thread(command)
522
523        # Wait indefinitely for the child process to finish
524        # communicating.  This indicates it has closed stdout/stderr
525        # pipes and is done.
526        self.io_thread.join()
527        self.returncode = self.process.wait()
528        if self.returncode is None:
529            raise Exception(
530                "no exit status available for pid {} after the "
531                " inferior dotest.py should have completed".format(
532                    self.process.pid))
533
534        # Notify of non-timeout exit.
535        self.on_process_exited(
536            command,
537            self.io_thread.output,
538            False,
539            self.returncode)
540
541    def run_command_with_timeout(self, command, timeout, want_core):
542        # Figure out how many seconds our timeout description is requesting.
543        timeout_seconds = timeout_to_seconds(timeout)
544
545        # Start up the child process and the thread that does the
546        # communication pump.
547        self._start_process_and_io_thread(command)
548
549        self._wait_with_timeout(timeout_seconds, command, want_core)
550
551    # ================
552    # Internal details.
553    # ================
554
555    def _start_process_and_io_thread(self, command):
556        # Create the process.
557        self.process = self.process_helper.create_piped_process(command)
558        self.pid = self.process.pid
559        self.on_process_started()
560
561        # Ensure the event is cleared that is used for signaling
562        # from the communication() thread when communication is
563        # complete (i.e. the inferior process has finished).
564        self.done_event.clear()
565
566        self.io_thread = CommunicatorThread(
567            self.process, self.done_event, self.write)
568        self.io_thread.start()
569
570    def _attempt_soft_kill(self, want_core):
571        # The inferior dotest timed out.  Attempt to clean it
572        # with a non-drastic method (so it can clean up properly
573        # and/or generate a core dump).  Often the OS can't guarantee
574        # that the process will really terminate after this.
575        self.process_helper.soft_terminate(
576            self.process,
577            want_core=want_core,
578            log_file=self)
579
580        # Now wait up to a certain timeout period for the io thread
581        # to say that the communication ended.  If that wraps up
582        # within our soft terminate timeout, we're all done here.
583        self.io_thread.join(self.soft_terminate_timeout)
584        if not self.io_thread.is_alive():
585            # stdout/stderr were closed on the child process side. We
586            # should be able to wait and reap the child process here.
587            self.returncode = self.process.wait()
588            # We terminated, and the done_trying result is n/a
589            terminated = True
590            done_trying = None
591        else:
592            self.write("soft kill attempt of process {} timed out "
593                       "after {} seconds\n".format(
594                           self.process.pid, self.soft_terminate_timeout))
595            terminated = False
596            done_trying = False
597        return terminated, done_trying
598
599    def _attempt_hard_kill(self):
600        # Instruct the process to terminate and really force it to
601        # happen.  Don't give the process a chance to ignore.
602        self.process_helper.hard_terminate(
603            self.process,
604            log_file=self)
605
606        # Reap the child process.  This should not hang as the
607        # hard_kill() mechanism is supposed to really kill it.
608        # Improvement option:
609        # If this does ever hang, convert to a self.process.poll()
610        # loop checking on self.process.returncode until it is not
611        # None or the timeout occurs.
612        self.returncode = self.process.wait()
613
614        # Wait a few moments for the io thread to finish...
615        self.io_thread.join(self.hard_terminate_timeout)
616        if self.io_thread.is_alive():
617            # ... but this is not critical if it doesn't end for some
618            # reason.
619            self.write(
620                "hard kill of process {} timed out after {} seconds waiting "
621                "for the io thread (ignoring)\n".format(
622                    self.process.pid, self.hard_terminate_timeout))
623
624        # Set if it terminated.  (Set up for optional improvement above).
625        terminated = self.returncode is not None
626        # Nothing else to try.
627        done_trying = True
628
629        return terminated, done_trying
630
631    def _attempt_termination(self, attempt_count, want_core):
632        if self.process_helper.supports_soft_terminate():
633            # When soft termination is supported, we first try to stop
634            # the process with a soft terminate.  Failing that, we try
635            # the hard terminate option.
636            if attempt_count == 1:
637                return self._attempt_soft_kill(want_core)
638            elif attempt_count == 2:
639                return self._attempt_hard_kill()
640            else:
641                # We don't have anything else to try.
642                terminated = self.returncode is not None
643                done_trying = True
644                return terminated, done_trying
645        else:
646            # We only try the hard terminate option when there
647            # is no soft terminate available.
648            if attempt_count == 1:
649                return self._attempt_hard_kill()
650            else:
651                # We don't have anything else to try.
652                terminated = self.returncode is not None
653                done_trying = True
654                return terminated, done_trying
655
656    def _wait_with_timeout(self, timeout_seconds, command, want_core):
657        # Allow up to timeout seconds for the io thread to wrap up.
658        # If that completes, the child process should be done.
659        completed_normally = self.done_event.wait(timeout_seconds)
660        if completed_normally:
661            # Reap the child process here.
662            self.returncode = self.process.wait()
663        else:
664
665            # Allow derived classes to do some work after we detected
666            # a timeout but before we touch the timed-out process.
667            self.on_timeout_pre_kill()
668
669            # Prepare to stop the process
670            process_terminated = completed_normally
671            terminate_attempt_count = 0
672
673            # Try as many attempts as we support for trying to shut down
674            # the child process if it's not already shut down.
675            while not process_terminated:
676                terminate_attempt_count += 1
677                # Attempt to terminate.
678                process_terminated, done_trying = self._attempt_termination(
679                    terminate_attempt_count, want_core)
680                # Check if there's nothing more to try.
681                if done_trying:
682                    # Break out of our termination attempt loop.
683                    break
684
685        # At this point, we're calling it good.  The process
686        # finished gracefully, was shut down after one or more
687        # attempts, or we failed but gave it our best effort.
688        self.on_process_exited(
689            command,
690            self.io_thread.output,
691            not completed_normally,
692            self.returncode)
693
694
695def patched_init(self, *args, **kwargs):
696    self.original_init(*args, **kwargs)
697    # Initialize our condition variable that protects wait()/poll().
698    self.wait_condition = threading.Condition()
699
700
701def patched_wait(self, *args, **kwargs):
702    self.wait_condition.acquire()
703    try:
704        result = self.original_wait(*args, **kwargs)
705        # The process finished.  Signal the condition.
706        self.wait_condition.notify_all()
707        return result
708    finally:
709        self.wait_condition.release()
710
711
712def patched_poll(self, *args, **kwargs):
713    self.wait_condition.acquire()
714    try:
715        result = self.original_poll(*args, **kwargs)
716        if self.returncode is not None:
717            # We did complete, and we have the return value.
718            # Signal the event to indicate we're done.
719            self.wait_condition.notify_all()
720        return result
721    finally:
722        self.wait_condition.release()
723
724
725def patch_up_subprocess_popen():
726    subprocess.Popen.original_init = subprocess.Popen.__init__
727    subprocess.Popen.__init__ = patched_init
728
729    subprocess.Popen.original_wait = subprocess.Popen.wait
730    subprocess.Popen.wait = patched_wait
731
732    subprocess.Popen.original_poll = subprocess.Popen.poll
733    subprocess.Popen.poll = patched_poll
734
735# Replace key subprocess.Popen() threading-unprotected methods with
736# threading-protected versions.
737patch_up_subprocess_popen()
738