1from __future__ import absolute_import
2import errno
3import io
4import itertools
5import getopt
6import os, signal, subprocess, sys
7import re
8import stat
9import platform
10import shutil
11import tempfile
12import threading
13
14import io
15try:
16    from StringIO import StringIO
17except ImportError:
18    from io import StringIO
19
20from lit.ShCommands import GlobItem, Command
21import lit.ShUtil as ShUtil
22import lit.Test as Test
23import lit.util
24from lit.util import to_bytes, to_string, to_unicode
25from lit.BooleanExpression import BooleanExpression
26
27class InternalShellError(Exception):
28    def __init__(self, command, message):
29        self.command = command
30        self.message = message
31
32kIsWindows = platform.system() == 'Windows'
33
34# Don't use close_fds on Windows.
35kUseCloseFDs = not kIsWindows
36
37# Use temporary files to replace /dev/null on Windows.
38kAvoidDevNull = kIsWindows
39kDevNull = "/dev/null"
40
41# A regex that matches %dbg(ARG), which lit inserts at the beginning of each
42# run command pipeline such that ARG specifies the pipeline's source line
43# number.  lit later expands each %dbg(ARG) to a command that behaves as a null
44# command in the target shell so that the line number is seen in lit's verbose
45# mode.
46#
47# This regex captures ARG.  ARG must not contain a right parenthesis, which
48# terminates %dbg.  ARG must not contain quotes, in which ARG might be enclosed
49# during expansion.
50kPdbgRegex = '%dbg\\(([^)\'"]*)\\)'
51
52class ShellEnvironment(object):
53
54    """Mutable shell environment containing things like CWD and env vars.
55
56    Environment variables are not implemented, but cwd tracking is.
57    """
58
59    def __init__(self, cwd, env):
60        self.cwd = cwd
61        self.env = dict(env)
62
63class TimeoutHelper(object):
64    """
65        Object used to helper manage enforcing a timeout in
66        _executeShCmd(). It is passed through recursive calls
67        to collect processes that have been executed so that when
68        the timeout happens they can be killed.
69    """
70    def __init__(self, timeout):
71        self.timeout = timeout
72        self._procs = []
73        self._timeoutReached = False
74        self._doneKillPass = False
75        # This lock will be used to protect concurrent access
76        # to _procs and _doneKillPass
77        self._lock = None
78        self._timer = None
79
80    def cancel(self):
81        if not self.active():
82            return
83        self._timer.cancel()
84
85    def active(self):
86        return self.timeout > 0
87
88    def addProcess(self, proc):
89        if not self.active():
90            return
91        needToRunKill = False
92        with self._lock:
93            self._procs.append(proc)
94            # Avoid re-entering the lock by finding out if kill needs to be run
95            # again here but call it if necessary once we have left the lock.
96            # We could use a reentrant lock here instead but this code seems
97            # clearer to me.
98            needToRunKill = self._doneKillPass
99
100        # The initial call to _kill() from the timer thread already happened so
101        # we need to call it again from this thread, otherwise this process
102        # will be left to run even though the timeout was already hit
103        if needToRunKill:
104            assert self.timeoutReached()
105            self._kill()
106
107    def startTimer(self):
108        if not self.active():
109            return
110
111        # Do some late initialisation that's only needed
112        # if there is a timeout set
113        self._lock = threading.Lock()
114        self._timer = threading.Timer(self.timeout, self._handleTimeoutReached)
115        self._timer.start()
116
117    def _handleTimeoutReached(self):
118        self._timeoutReached = True
119        self._kill()
120
121    def timeoutReached(self):
122        return self._timeoutReached
123
124    def _kill(self):
125        """
126            This method may be called multiple times as we might get unlucky
127            and be in the middle of creating a new process in _executeShCmd()
128            which won't yet be in ``self._procs``. By locking here and in
129            addProcess() we should be able to kill processes launched after
130            the initial call to _kill()
131        """
132        with self._lock:
133            for p in self._procs:
134                lit.util.killProcessAndChildren(p.pid)
135            # Empty the list and note that we've done a pass over the list
136            self._procs = [] # Python2 doesn't have list.clear()
137            self._doneKillPass = True
138
139class ShellCommandResult(object):
140    """Captures the result of an individual command."""
141
142    def __init__(self, command, stdout, stderr, exitCode, timeoutReached,
143                 outputFiles = []):
144        self.command = command
145        self.stdout = stdout
146        self.stderr = stderr
147        self.exitCode = exitCode
148        self.timeoutReached = timeoutReached
149        self.outputFiles = list(outputFiles)
150
151def executeShCmd(cmd, shenv, results, timeout=0):
152    """
153        Wrapper around _executeShCmd that handles
154        timeout
155    """
156    # Use the helper even when no timeout is required to make
157    # other code simpler (i.e. avoid bunch of ``!= None`` checks)
158    timeoutHelper = TimeoutHelper(timeout)
159    if timeout > 0:
160        timeoutHelper.startTimer()
161    finalExitCode = _executeShCmd(cmd, shenv, results, timeoutHelper)
162    timeoutHelper.cancel()
163    timeoutInfo = None
164    if timeoutHelper.timeoutReached():
165        timeoutInfo = 'Reached timeout of {} seconds'.format(timeout)
166
167    return (finalExitCode, timeoutInfo)
168
169def expand_glob(arg, cwd):
170    if isinstance(arg, GlobItem):
171        return sorted(arg.resolve(cwd))
172    return [arg]
173
174def expand_glob_expressions(args, cwd):
175    result = [args[0]]
176    for arg in args[1:]:
177        result.extend(expand_glob(arg, cwd))
178    return result
179
180def quote_windows_command(seq):
181    """
182    Reimplement Python's private subprocess.list2cmdline for MSys compatibility
183
184    Based on CPython implementation here:
185      https://hg.python.org/cpython/file/849826a900d2/Lib/subprocess.py#l422
186
187    Some core util distributions (MSys) don't tokenize command line arguments
188    the same way that MSVC CRT does. Lit rolls its own quoting logic similar to
189    the stock CPython logic to paper over these quoting and tokenization rule
190    differences.
191
192    We use the same algorithm from MSDN as CPython
193    (http://msdn.microsoft.com/en-us/library/17w5ykft.aspx), but we treat more
194    characters as needing quoting, such as double quotes themselves, and square
195    brackets.
196
197    For MSys based tools, this is very brittle though, because quoting an
198    argument makes the MSys based tool unescape backslashes where it shouldn't
199    (e.g. "a\b\\c\\\\d" becomes "a\b\c\\d" where it should stay as it was,
200    according to regular win32 command line parsing rules).
201    """
202    result = []
203    needquote = False
204    for arg in seq:
205        bs_buf = []
206
207        # Add a space to separate this argument from the others
208        if result:
209            result.append(' ')
210
211        # This logic differs from upstream list2cmdline.
212        needquote = (" " in arg) or ("\t" in arg) or ("\"" in arg) or ("[" in arg) or not arg
213        if needquote:
214            result.append('"')
215
216        for c in arg:
217            if c == '\\':
218                # Don't know if we need to double yet.
219                bs_buf.append(c)
220            elif c == '"':
221                # Double backslashes.
222                result.append('\\' * len(bs_buf)*2)
223                bs_buf = []
224                result.append('\\"')
225            else:
226                # Normal char
227                if bs_buf:
228                    result.extend(bs_buf)
229                    bs_buf = []
230                result.append(c)
231
232        # Add remaining backslashes, if any.
233        if bs_buf:
234            result.extend(bs_buf)
235
236        if needquote:
237            result.extend(bs_buf)
238            result.append('"')
239
240    return ''.join(result)
241
242# args are from 'export' or 'env' command.
243# Skips the command, and parses its arguments.
244# Modifies env accordingly.
245# Returns copy of args without the command or its arguments.
246def updateEnv(env, args):
247    arg_idx_next = len(args)
248    unset_next_env_var = False
249    for arg_idx, arg in enumerate(args[1:]):
250        # Support for the -u flag (unsetting) for env command
251        # e.g., env -u FOO -u BAR will remove both FOO and BAR
252        # from the environment.
253        if arg == '-u':
254            unset_next_env_var = True
255            continue
256        if unset_next_env_var:
257            unset_next_env_var = False
258            if arg in env.env:
259                del env.env[arg]
260            continue
261
262        # Partition the string into KEY=VALUE.
263        key, eq, val = arg.partition('=')
264        # Stop if there was no equals.
265        if eq == '':
266            arg_idx_next = arg_idx + 1
267            break
268        env.env[key] = val
269    return args[arg_idx_next:]
270
271def executeBuiltinCd(cmd, shenv):
272    """executeBuiltinCd - Change the current directory."""
273    if len(cmd.args) != 2:
274        raise InternalShellError("'cd' supports only one argument")
275    newdir = cmd.args[1]
276    # Update the cwd in the parent environment.
277    if os.path.isabs(newdir):
278        shenv.cwd = newdir
279    else:
280        shenv.cwd = os.path.realpath(os.path.join(shenv.cwd, newdir))
281    # The cd builtin always succeeds. If the directory does not exist, the
282    # following Popen calls will fail instead.
283    return ShellCommandResult(cmd, "", "", 0, False)
284
285def executeBuiltinExport(cmd, shenv):
286    """executeBuiltinExport - Set an environment variable."""
287    if len(cmd.args) != 2:
288        raise InternalShellError("'export' supports only one argument")
289    updateEnv(shenv, cmd.args)
290    return ShellCommandResult(cmd, "", "", 0, False)
291
292def executeBuiltinEcho(cmd, shenv):
293    """Interpret a redirected echo command"""
294    opened_files = []
295    stdin, stdout, stderr = processRedirects(cmd, subprocess.PIPE, shenv,
296                                             opened_files)
297    if stdin != subprocess.PIPE or stderr != subprocess.PIPE:
298        raise InternalShellError(
299                cmd, "stdin and stderr redirects not supported for echo")
300
301    # Some tests have un-redirected echo commands to help debug test failures.
302    # Buffer our output and return it to the caller.
303    is_redirected = True
304    encode = lambda x : x
305    if stdout == subprocess.PIPE:
306        is_redirected = False
307        stdout = StringIO()
308    elif kIsWindows:
309        # Reopen stdout in binary mode to avoid CRLF translation. The versions
310        # of echo we are replacing on Windows all emit plain LF, and the LLVM
311        # tests now depend on this.
312        # When we open as binary, however, this also means that we have to write
313        # 'bytes' objects to stdout instead of 'str' objects.
314        encode = lit.util.to_bytes
315        stdout = open(stdout.name, stdout.mode + 'b')
316        opened_files.append((None, None, stdout, None))
317
318    # Implement echo flags. We only support -e and -n, and not yet in
319    # combination. We have to ignore unknown flags, because `echo "-D FOO"`
320    # prints the dash.
321    args = cmd.args[1:]
322    interpret_escapes = False
323    write_newline = True
324    while len(args) >= 1 and args[0] in ('-e', '-n'):
325        flag = args[0]
326        args = args[1:]
327        if flag == '-e':
328            interpret_escapes = True
329        elif flag == '-n':
330            write_newline = False
331
332    def maybeUnescape(arg):
333        if not interpret_escapes:
334            return arg
335
336        arg = lit.util.to_bytes(arg)
337        codec = 'string_escape' if sys.version_info < (3,0) else 'unicode_escape'
338        return arg.decode(codec)
339
340    if args:
341        for arg in args[:-1]:
342            stdout.write(encode(maybeUnescape(arg)))
343            stdout.write(encode(' '))
344        stdout.write(encode(maybeUnescape(args[-1])))
345    if write_newline:
346        stdout.write(encode('\n'))
347
348    for (name, mode, f, path) in opened_files:
349        f.close()
350
351    output = "" if is_redirected else stdout.getvalue()
352    return ShellCommandResult(cmd, output, "", 0, False)
353
354def executeBuiltinMkdir(cmd, cmd_shenv):
355    """executeBuiltinMkdir - Create new directories."""
356    args = expand_glob_expressions(cmd.args, cmd_shenv.cwd)[1:]
357    try:
358        opts, args = getopt.gnu_getopt(args, 'p')
359    except getopt.GetoptError as err:
360        raise InternalShellError(cmd, "Unsupported: 'mkdir':  %s" % str(err))
361
362    parent = False
363    for o, a in opts:
364        if o == "-p":
365            parent = True
366        else:
367            assert False, "unhandled option"
368
369    if len(args) == 0:
370        raise InternalShellError(cmd, "Error: 'mkdir' is missing an operand")
371
372    stderr = StringIO()
373    exitCode = 0
374    for dir in args:
375        cwd = cmd_shenv.cwd
376        dir = to_unicode(dir) if kIsWindows else to_bytes(dir)
377        cwd = to_unicode(cwd) if kIsWindows else to_bytes(cwd)
378        if not os.path.isabs(dir):
379            dir = os.path.realpath(os.path.join(cwd, dir))
380        if parent:
381            lit.util.mkdir_p(dir)
382        else:
383            try:
384                lit.util.mkdir(dir)
385            except OSError as err:
386                stderr.write("Error: 'mkdir' command failed, %s\n" % str(err))
387                exitCode = 1
388    return ShellCommandResult(cmd, "", stderr.getvalue(), exitCode, False)
389
390def executeBuiltinRm(cmd, cmd_shenv):
391    """executeBuiltinRm - Removes (deletes) files or directories."""
392    args = expand_glob_expressions(cmd.args, cmd_shenv.cwd)[1:]
393    try:
394        opts, args = getopt.gnu_getopt(args, "frR", ["--recursive"])
395    except getopt.GetoptError as err:
396        raise InternalShellError(cmd, "Unsupported: 'rm':  %s" % str(err))
397
398    force = False
399    recursive = False
400    for o, a in opts:
401        if o == "-f":
402            force = True
403        elif o in ("-r", "-R", "--recursive"):
404            recursive = True
405        else:
406            assert False, "unhandled option"
407
408    if len(args) == 0:
409        raise InternalShellError(cmd, "Error: 'rm' is missing an operand")
410
411    def on_rm_error(func, path, exc_info):
412        # path contains the path of the file that couldn't be removed
413        # let's just assume that it's read-only and remove it.
414        os.chmod(path, stat.S_IMODE( os.stat(path).st_mode) | stat.S_IWRITE)
415        os.remove(path)
416
417    stderr = StringIO()
418    exitCode = 0
419    for path in args:
420        cwd = cmd_shenv.cwd
421        path = to_unicode(path) if kIsWindows else to_bytes(path)
422        cwd = to_unicode(cwd) if kIsWindows else to_bytes(cwd)
423        if not os.path.isabs(path):
424            path = os.path.realpath(os.path.join(cwd, path))
425        if force and not os.path.exists(path):
426            continue
427        try:
428            if os.path.isdir(path):
429                if not recursive:
430                    stderr.write("Error: %s is a directory\n" % path)
431                    exitCode = 1
432                if platform.system() == 'Windows':
433                    # NOTE: use ctypes to access `SHFileOperationsW` on Windows to
434                    # use the NT style path to get access to long file paths which
435                    # cannot be removed otherwise.
436                    from ctypes.wintypes import BOOL, HWND, LPCWSTR, UINT, WORD
437                    from ctypes import addressof, byref, c_void_p, create_unicode_buffer
438                    from ctypes import Structure
439                    from ctypes import windll, WinError, POINTER
440
441                    class SHFILEOPSTRUCTW(Structure):
442                        _fields_ = [
443                                ('hWnd', HWND),
444                                ('wFunc', UINT),
445                                ('pFrom', LPCWSTR),
446                                ('pTo', LPCWSTR),
447                                ('fFlags', WORD),
448                                ('fAnyOperationsAborted', BOOL),
449                                ('hNameMappings', c_void_p),
450                                ('lpszProgressTitle', LPCWSTR),
451                        ]
452
453                    FO_MOVE, FO_COPY, FO_DELETE, FO_RENAME = range(1, 5)
454
455                    FOF_SILENT = 4
456                    FOF_NOCONFIRMATION = 16
457                    FOF_NOCONFIRMMKDIR = 512
458                    FOF_NOERRORUI = 1024
459
460                    FOF_NO_UI = FOF_SILENT | FOF_NOCONFIRMATION | FOF_NOERRORUI | FOF_NOCONFIRMMKDIR
461
462                    SHFileOperationW = windll.shell32.SHFileOperationW
463                    SHFileOperationW.argtypes = [POINTER(SHFILEOPSTRUCTW)]
464
465                    path = os.path.abspath(path)
466
467                    pFrom = create_unicode_buffer(path, len(path) + 2)
468                    pFrom[len(path)] = pFrom[len(path) + 1] = '\0'
469                    operation = SHFILEOPSTRUCTW(wFunc=UINT(FO_DELETE),
470                                                pFrom=LPCWSTR(addressof(pFrom)),
471                                                fFlags=FOF_NO_UI)
472                    result = SHFileOperationW(byref(operation))
473                    if result:
474                        raise WinError(result)
475                else:
476                    shutil.rmtree(path, onerror = on_rm_error if force else None)
477            else:
478                if force and not os.access(path, os.W_OK):
479                    os.chmod(path,
480                             stat.S_IMODE(os.stat(path).st_mode) | stat.S_IWRITE)
481                os.remove(path)
482        except OSError as err:
483            stderr.write("Error: 'rm' command failed, %s" % str(err))
484            exitCode = 1
485    return ShellCommandResult(cmd, "", stderr.getvalue(), exitCode, False)
486
487def executeBuiltinColon(cmd, cmd_shenv):
488    """executeBuiltinColon - Discard arguments and exit with status 0."""
489    return ShellCommandResult(cmd, "", "", 0, False)
490
491def processRedirects(cmd, stdin_source, cmd_shenv, opened_files):
492    """Return the standard fds for cmd after applying redirects
493
494    Returns the three standard file descriptors for the new child process.  Each
495    fd may be an open, writable file object or a sentinel value from the
496    subprocess module.
497    """
498
499    # Apply the redirections, we use (N,) as a sentinel to indicate stdin,
500    # stdout, stderr for N equal to 0, 1, or 2 respectively. Redirects to or
501    # from a file are represented with a list [file, mode, file-object]
502    # where file-object is initially None.
503    redirects = [(0,), (1,), (2,)]
504    for (op, filename) in cmd.redirects:
505        if op == ('>',2):
506            redirects[2] = [filename, 'w', None]
507        elif op == ('>>',2):
508            redirects[2] = [filename, 'a', None]
509        elif op == ('>&',2) and filename in '012':
510            redirects[2] = redirects[int(filename)]
511        elif op == ('>&',) or op == ('&>',):
512            redirects[1] = redirects[2] = [filename, 'w', None]
513        elif op == ('>',):
514            redirects[1] = [filename, 'w', None]
515        elif op == ('>>',):
516            redirects[1] = [filename, 'a', None]
517        elif op == ('<',):
518            redirects[0] = [filename, 'r', None]
519        else:
520            raise InternalShellError(cmd, "Unsupported redirect: %r" % ((op, filename),))
521
522    # Open file descriptors in a second pass.
523    std_fds = [None, None, None]
524    for (index, r) in enumerate(redirects):
525        # Handle the sentinel values for defaults up front.
526        if isinstance(r, tuple):
527            if r == (0,):
528                fd = stdin_source
529            elif r == (1,):
530                if index == 0:
531                    raise InternalShellError(cmd, "Unsupported redirect for stdin")
532                elif index == 1:
533                    fd = subprocess.PIPE
534                else:
535                    fd = subprocess.STDOUT
536            elif r == (2,):
537                if index != 2:
538                    raise InternalShellError(cmd, "Unsupported redirect on stdout")
539                fd = subprocess.PIPE
540            else:
541                raise InternalShellError(cmd, "Bad redirect")
542            std_fds[index] = fd
543            continue
544
545        (filename, mode, fd) = r
546
547        # Check if we already have an open fd. This can happen if stdout and
548        # stderr go to the same place.
549        if fd is not None:
550            std_fds[index] = fd
551            continue
552
553        redir_filename = None
554        name = expand_glob(filename, cmd_shenv.cwd)
555        if len(name) != 1:
556           raise InternalShellError(cmd, "Unsupported: glob in "
557                                    "redirect expanded to multiple files")
558        name = name[0]
559        if kAvoidDevNull and name == kDevNull:
560            fd = tempfile.TemporaryFile(mode=mode)
561        elif kIsWindows and name == '/dev/tty':
562            # Simulate /dev/tty on Windows.
563            # "CON" is a special filename for the console.
564            fd = open("CON", mode)
565        else:
566            # Make sure relative paths are relative to the cwd.
567            redir_filename = os.path.join(cmd_shenv.cwd, name)
568            redir_filename = to_unicode(redir_filename) \
569                    if kIsWindows else to_bytes(redir_filename)
570            fd = open(redir_filename, mode)
571        # Workaround a Win32 and/or subprocess bug when appending.
572        #
573        # FIXME: Actually, this is probably an instance of PR6753.
574        if mode == 'a':
575            fd.seek(0, 2)
576        # Mutate the underlying redirect list so that we can redirect stdout
577        # and stderr to the same place without opening the file twice.
578        r[2] = fd
579        opened_files.append((filename, mode, fd) + (redir_filename,))
580        std_fds[index] = fd
581
582    return std_fds
583
584def _executeShCmd(cmd, shenv, results, timeoutHelper):
585    if timeoutHelper.timeoutReached():
586        # Prevent further recursion if the timeout has been hit
587        # as we should try avoid launching more processes.
588        return None
589
590    if isinstance(cmd, ShUtil.Seq):
591        if cmd.op == ';':
592            res = _executeShCmd(cmd.lhs, shenv, results, timeoutHelper)
593            return _executeShCmd(cmd.rhs, shenv, results, timeoutHelper)
594
595        if cmd.op == '&':
596            raise InternalShellError(cmd,"unsupported shell operator: '&'")
597
598        if cmd.op == '||':
599            res = _executeShCmd(cmd.lhs, shenv, results, timeoutHelper)
600            if res != 0:
601                res = _executeShCmd(cmd.rhs, shenv, results, timeoutHelper)
602            return res
603
604        if cmd.op == '&&':
605            res = _executeShCmd(cmd.lhs, shenv, results, timeoutHelper)
606            if res is None:
607                return res
608
609            if res == 0:
610                res = _executeShCmd(cmd.rhs, shenv, results, timeoutHelper)
611            return res
612
613        raise ValueError('Unknown shell command: %r' % cmd.op)
614    assert isinstance(cmd, ShUtil.Pipeline)
615
616    procs = []
617    proc_not_counts = []
618    default_stdin = subprocess.PIPE
619    stderrTempFiles = []
620    opened_files = []
621    named_temp_files = []
622    builtin_commands = set(['cat', 'diff'])
623    builtin_commands_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "builtin_commands")
624    inproc_builtins = {'cd': executeBuiltinCd,
625                       'export': executeBuiltinExport,
626                       'echo': executeBuiltinEcho,
627                       'mkdir': executeBuiltinMkdir,
628                       'rm': executeBuiltinRm,
629                       ':': executeBuiltinColon}
630    # To avoid deadlock, we use a single stderr stream for piped
631    # output. This is null until we have seen some output using
632    # stderr.
633    for i,j in enumerate(cmd.commands):
634        # Reference the global environment by default.
635        cmd_shenv = shenv
636        args = list(j.args)
637        not_args = []
638        not_count = 0
639        not_crash = False
640        while True:
641            if args[0] == 'env':
642                # Create a copy of the global environment and modify it for
643                # this one command. There might be multiple envs in a pipeline,
644                # and there might be multiple envs in a command (usually when
645                # one comes from a substitution):
646                #   env FOO=1 llc < %s | env BAR=2 llvm-mc | FileCheck %s
647                #   env FOO=1 %{another_env_plus_cmd} | FileCheck %s
648                if cmd_shenv is shenv:
649                    cmd_shenv = ShellEnvironment(shenv.cwd, shenv.env)
650                args = updateEnv(cmd_shenv, args)
651                if not args:
652                    raise InternalShellError(j, "Error: 'env' requires a"
653                                                " subcommand")
654            elif args[0] == 'not':
655                not_args.append(args.pop(0))
656                not_count += 1
657                if args and args[0] == '--crash':
658                    not_args.append(args.pop(0))
659                    not_crash = True
660                if not args:
661                    raise InternalShellError(j, "Error: 'not' requires a"
662                                                " subcommand")
663            elif args[0] == '!':
664                not_args.append(args.pop(0))
665                not_count += 1
666                if not args:
667                    raise InternalShellError(j, "Error: '!' requires a"
668                                                " subcommand")
669            else:
670                break
671
672        # Handle in-process builtins.
673        #
674        # Handle "echo" as a builtin if it is not part of a pipeline. This
675        # greatly speeds up tests that construct input files by repeatedly
676        # echo-appending to a file.
677        # FIXME: Standardize on the builtin echo implementation. We can use a
678        # temporary file to sidestep blocking pipe write issues.
679        inproc_builtin = inproc_builtins.get(args[0], None)
680        if inproc_builtin and (args[0] != 'echo' or len(cmd.commands) == 1):
681            # env calling an in-process builtin is useless, so we take the safe
682            # approach of complaining.
683            if not cmd_shenv is shenv:
684                raise InternalShellError(j, "Error: 'env' cannot call '{}'"
685                                            .format(args[0]))
686            if not_crash:
687                raise InternalShellError(j, "Error: 'not --crash' cannot call"
688                                            " '{}'".format(args[0]))
689            if len(cmd.commands) != 1:
690                raise InternalShellError(j, "Unsupported: '{}' cannot be part"
691                                            " of a pipeline".format(args[0]))
692            result = inproc_builtin(Command(args, j.redirects), cmd_shenv)
693            if not_count % 2:
694                result.exitCode = int(not result.exitCode)
695            result.command.args = j.args;
696            results.append(result)
697            return result.exitCode
698
699        # Resolve any out-of-process builtin command before adding back 'not'
700        # commands.
701        if args[0] in builtin_commands:
702            args.insert(0, sys.executable)
703            cmd_shenv.env['PYTHONPATH'] = \
704                os.path.dirname(os.path.abspath(__file__))
705            args[1] = os.path.join(builtin_commands_dir, args[1] + ".py")
706
707        # We had to search through the 'not' commands to find all the 'env'
708        # commands and any other in-process builtin command.  We don't want to
709        # reimplement 'not' and its '--crash' here, so just push all 'not'
710        # commands back to be called as external commands.  Because this
711        # approach effectively moves all 'env' commands up front, it relies on
712        # the assumptions that (1) environment variables are not intended to be
713        # relevant to 'not' commands and (2) the 'env' command should always
714        # blindly pass along the status it receives from any command it calls.
715
716        # For plain negations, either 'not' without '--crash', or the shell
717        # operator '!', leave them out from the command to execute and
718        # invert the result code afterwards.
719        if not_crash:
720            args = not_args + args
721            not_count = 0
722        else:
723            not_args = []
724
725        stdin, stdout, stderr = processRedirects(j, default_stdin, cmd_shenv,
726                                                 opened_files)
727
728        # If stderr wants to come from stdout, but stdout isn't a pipe, then put
729        # stderr on a pipe and treat it as stdout.
730        if (stderr == subprocess.STDOUT and stdout != subprocess.PIPE):
731            stderr = subprocess.PIPE
732            stderrIsStdout = True
733        else:
734            stderrIsStdout = False
735
736            # Don't allow stderr on a PIPE except for the last
737            # process, this could deadlock.
738            #
739            # FIXME: This is slow, but so is deadlock.
740            if stderr == subprocess.PIPE and j != cmd.commands[-1]:
741                stderr = tempfile.TemporaryFile(mode='w+b')
742                stderrTempFiles.append((i, stderr))
743
744        # Resolve the executable path ourselves.
745        executable = None
746        # For paths relative to cwd, use the cwd of the shell environment.
747        if args[0].startswith('.'):
748            exe_in_cwd = os.path.join(cmd_shenv.cwd, args[0])
749            if os.path.isfile(exe_in_cwd):
750                executable = exe_in_cwd
751        if not executable:
752            executable = lit.util.which(args[0], cmd_shenv.env['PATH'])
753        if not executable:
754            raise InternalShellError(j, '%r: command not found' % args[0])
755
756        # Replace uses of /dev/null with temporary files.
757        if kAvoidDevNull:
758            # In Python 2.x, basestring is the base class for all string (including unicode)
759            # In Python 3.x, basestring no longer exist and str is always unicode
760            try:
761                str_type = basestring
762            except NameError:
763                str_type = str
764            for i,arg in enumerate(args):
765                if isinstance(arg, str_type) and kDevNull in arg:
766                    f = tempfile.NamedTemporaryFile(delete=False)
767                    f.close()
768                    named_temp_files.append(f.name)
769                    args[i] = arg.replace(kDevNull, f.name)
770
771        # Expand all glob expressions
772        args = expand_glob_expressions(args, cmd_shenv.cwd)
773
774        # On Windows, do our own command line quoting for better compatibility
775        # with some core utility distributions.
776        if kIsWindows:
777            args = quote_windows_command(args)
778
779        try:
780            procs.append(subprocess.Popen(args, cwd=cmd_shenv.cwd,
781                                          executable = executable,
782                                          stdin = stdin,
783                                          stdout = stdout,
784                                          stderr = stderr,
785                                          env = cmd_shenv.env,
786                                          close_fds = kUseCloseFDs))
787            proc_not_counts.append(not_count)
788            # Let the helper know about this process
789            timeoutHelper.addProcess(procs[-1])
790        except OSError as e:
791            raise InternalShellError(j, 'Could not create process ({}) due to {}'.format(executable, e))
792
793        # Immediately close stdin for any process taking stdin from us.
794        if stdin == subprocess.PIPE:
795            procs[-1].stdin.close()
796            procs[-1].stdin = None
797
798        # Update the current stdin source.
799        if stdout == subprocess.PIPE:
800            default_stdin = procs[-1].stdout
801        elif stderrIsStdout:
802            default_stdin = procs[-1].stderr
803        else:
804            default_stdin = subprocess.PIPE
805
806    # Explicitly close any redirected files. We need to do this now because we
807    # need to release any handles we may have on the temporary files (important
808    # on Win32, for example). Since we have already spawned the subprocess, our
809    # handles have already been transferred so we do not need them anymore.
810    for (name, mode, f, path) in opened_files:
811        f.close()
812
813    # FIXME: There is probably still deadlock potential here. Yawn.
814    procData = [None] * len(procs)
815    procData[-1] = procs[-1].communicate()
816
817    for i in range(len(procs) - 1):
818        if procs[i].stdout is not None:
819            out = procs[i].stdout.read()
820        else:
821            out = ''
822        if procs[i].stderr is not None:
823            err = procs[i].stderr.read()
824        else:
825            err = ''
826        procData[i] = (out,err)
827
828    # Read stderr out of the temp files.
829    for i,f in stderrTempFiles:
830        f.seek(0, 0)
831        procData[i] = (procData[i][0], f.read())
832        f.close()
833
834    exitCode = None
835    for i,(out,err) in enumerate(procData):
836        res = procs[i].wait()
837        # Detect Ctrl-C in subprocess.
838        if res == -signal.SIGINT:
839            raise KeyboardInterrupt
840        if proc_not_counts[i] % 2:
841            res = not res
842        elif proc_not_counts[i] > 1:
843            res = 1 if res != 0 else 0
844
845        # Ensure the resulting output is always of string type.
846        try:
847            if out is None:
848                out = ''
849            else:
850                out = to_string(out.decode('utf-8', errors='replace'))
851        except:
852            out = str(out)
853        try:
854            if err is None:
855                err = ''
856            else:
857                err = to_string(err.decode('utf-8', errors='replace'))
858        except:
859            err = str(err)
860
861        # Gather the redirected output files for failed commands.
862        output_files = []
863        if res != 0:
864            for (name, mode, f, path) in sorted(opened_files):
865                if path is not None and mode in ('w', 'a'):
866                    try:
867                        with open(path, 'rb') as f:
868                            data = f.read()
869                    except:
870                        data = None
871                    if data is not None:
872                        output_files.append((name, path, data))
873
874        results.append(ShellCommandResult(
875            cmd.commands[i], out, err, res, timeoutHelper.timeoutReached(),
876            output_files))
877        if cmd.pipe_err:
878            # Take the last failing exit code from the pipeline.
879            if not exitCode or res != 0:
880                exitCode = res
881        else:
882            exitCode = res
883
884    # Remove any named temporary files we created.
885    for f in named_temp_files:
886        try:
887            os.remove(f)
888        except OSError:
889            pass
890
891    if cmd.negate:
892        exitCode = not exitCode
893
894    return exitCode
895
896def executeScriptInternal(test, litConfig, tmpBase, commands, cwd):
897    cmds = []
898    for i, ln in enumerate(commands):
899        ln = commands[i] = re.sub(kPdbgRegex, ": '\\1'; ", ln)
900        try:
901            cmds.append(ShUtil.ShParser(ln, litConfig.isWindows,
902                                        test.config.pipefail).parse())
903        except:
904            return lit.Test.Result(Test.FAIL, "shell parser error on: %r" % ln)
905
906    cmd = cmds[0]
907    for c in cmds[1:]:
908        cmd = ShUtil.Seq(cmd, '&&', c)
909
910    results = []
911    timeoutInfo = None
912    try:
913        shenv = ShellEnvironment(cwd, test.config.environment)
914        exitCode, timeoutInfo = executeShCmd(cmd, shenv, results, timeout=litConfig.maxIndividualTestTime)
915    except InternalShellError:
916        e = sys.exc_info()[1]
917        exitCode = 127
918        results.append(
919            ShellCommandResult(e.command, '', e.message, exitCode, False))
920
921    out = err = ''
922    for i,result in enumerate(results):
923        # Write the command line run.
924        out += '$ %s\n' % (' '.join('"%s"' % s
925                                    for s in result.command.args),)
926
927        # If nothing interesting happened, move on.
928        if litConfig.maxIndividualTestTime == 0 and \
929               result.exitCode == 0 and \
930               not result.stdout.strip() and not result.stderr.strip():
931            continue
932
933        # Otherwise, something failed or was printed, show it.
934
935        # Add the command output, if redirected.
936        for (name, path, data) in result.outputFiles:
937            if data.strip():
938                out += "# redirected output from %r:\n" % (name,)
939                data = to_string(data.decode('utf-8', errors='replace'))
940                if len(data) > 1024:
941                    out += data[:1024] + "\n...\n"
942                    out += "note: data was truncated\n"
943                else:
944                    out += data
945                out += "\n"
946
947        if result.stdout.strip():
948            out += '# command output:\n%s\n' % (result.stdout,)
949        if result.stderr.strip():
950            out += '# command stderr:\n%s\n' % (result.stderr,)
951        if not result.stdout.strip() and not result.stderr.strip():
952            out += "note: command had no output on stdout or stderr\n"
953
954        # Show the error conditions:
955        if result.exitCode != 0:
956            # On Windows, a negative exit code indicates a signal, and those are
957            # easier to recognize or look up if we print them in hex.
958            if litConfig.isWindows and result.exitCode < 0:
959                codeStr = hex(int(result.exitCode & 0xFFFFFFFF)).rstrip("L")
960            else:
961                codeStr = str(result.exitCode)
962            out += "error: command failed with exit status: %s\n" % (
963                codeStr,)
964        if litConfig.maxIndividualTestTime > 0 and result.timeoutReached:
965            out += 'error: command reached timeout: %s\n' % (
966                str(result.timeoutReached),)
967
968    return out, err, exitCode, timeoutInfo
969
970def executeScript(test, litConfig, tmpBase, commands, cwd):
971    bashPath = litConfig.getBashPath()
972    isWin32CMDEXE = (litConfig.isWindows and not bashPath)
973    script = tmpBase + '.script'
974    if isWin32CMDEXE:
975        script += '.bat'
976
977    # Write script file
978    mode = 'w'
979    open_kwargs = {}
980    if litConfig.isWindows and not isWin32CMDEXE:
981        mode += 'b'  # Avoid CRLFs when writing bash scripts.
982    elif sys.version_info > (3,0):
983        open_kwargs['encoding'] = 'utf-8'
984    f = open(script, mode, **open_kwargs)
985    if isWin32CMDEXE:
986        for i, ln in enumerate(commands):
987            commands[i] = re.sub(kPdbgRegex, "echo '\\1' > nul && ", ln)
988        if litConfig.echo_all_commands:
989            f.write('@echo on\n')
990        else:
991            f.write('@echo off\n')
992        f.write('\n@if %ERRORLEVEL% NEQ 0 EXIT\n'.join(commands))
993    else:
994        for i, ln in enumerate(commands):
995            commands[i] = re.sub(kPdbgRegex, ": '\\1'; ", ln)
996        if test.config.pipefail:
997            f.write(b'set -o pipefail;' if mode == 'wb' else 'set -o pipefail;')
998        if litConfig.echo_all_commands:
999            f.write(b'set -x;' if mode == 'wb' else 'set -x;')
1000        if sys.version_info > (3,0) and mode == 'wb':
1001            f.write(bytes('{ ' + '; } &&\n{ '.join(commands) + '; }', 'utf-8'))
1002        else:
1003            f.write('{ ' + '; } &&\n{ '.join(commands) + '; }')
1004    f.write(b'\n' if mode == 'wb' else '\n')
1005    f.close()
1006
1007    if isWin32CMDEXE:
1008        command = ['cmd','/c', script]
1009    else:
1010        if bashPath:
1011            command = [bashPath, script]
1012        else:
1013            command = ['/bin/sh', script]
1014        if litConfig.useValgrind:
1015            # FIXME: Running valgrind on sh is overkill. We probably could just
1016            # run on clang with no real loss.
1017            command = litConfig.valgrindArgs + command
1018
1019    try:
1020        out, err, exitCode = lit.util.executeCommand(command, cwd=cwd,
1021                                       env=test.config.environment,
1022                                       timeout=litConfig.maxIndividualTestTime)
1023        return (out, err, exitCode, None)
1024    except lit.util.ExecuteCommandTimeoutException as e:
1025        return (e.out, e.err, e.exitCode, e.msg)
1026
1027def parseIntegratedTestScriptCommands(source_path, keywords):
1028    """
1029    parseIntegratedTestScriptCommands(source_path) -> commands
1030
1031    Parse the commands in an integrated test script file into a list of
1032    (line_number, command_type, line).
1033    """
1034
1035    # This code is carefully written to be dual compatible with Python 2.5+ and
1036    # Python 3 without requiring input files to always have valid codings. The
1037    # trick we use is to open the file in binary mode and use the regular
1038    # expression library to find the commands, with it scanning strings in
1039    # Python2 and bytes in Python3.
1040    #
1041    # Once we find a match, we do require each script line to be decodable to
1042    # UTF-8, so we convert the outputs to UTF-8 before returning. This way the
1043    # remaining code can work with "strings" agnostic of the executing Python
1044    # version.
1045
1046    keywords_re = re.compile(
1047        to_bytes("(%s)(.*)\n" % ("|".join(re.escape(k) for k in keywords),)))
1048
1049    f = open(source_path, 'rb')
1050    try:
1051        # Read the entire file contents.
1052        data = f.read()
1053
1054        # Ensure the data ends with a newline.
1055        if not data.endswith(to_bytes('\n')):
1056            data = data + to_bytes('\n')
1057
1058        # Iterate over the matches.
1059        line_number = 1
1060        last_match_position = 0
1061        for match in keywords_re.finditer(data):
1062            # Compute the updated line number by counting the intervening
1063            # newlines.
1064            match_position = match.start()
1065            line_number += data.count(to_bytes('\n'), last_match_position,
1066                                      match_position)
1067            last_match_position = match_position
1068
1069            # Convert the keyword and line to UTF-8 strings and yield the
1070            # command. Note that we take care to return regular strings in
1071            # Python 2, to avoid other code having to differentiate between the
1072            # str and unicode types.
1073            #
1074            # Opening the file in binary mode prevented Windows \r newline
1075            # characters from being converted to Unix \n newlines, so manually
1076            # strip those from the yielded lines.
1077            keyword,ln = match.groups()
1078            yield (line_number, to_string(keyword.decode('utf-8')),
1079                   to_string(ln.decode('utf-8').rstrip('\r')))
1080    finally:
1081        f.close()
1082
1083def getTempPaths(test):
1084    """Get the temporary location, this is always relative to the test suite
1085    root, not test source root."""
1086    execpath = test.getExecPath()
1087    execdir,execbase = os.path.split(execpath)
1088    tmpDir = os.path.join(execdir, 'Output')
1089    tmpBase = os.path.join(tmpDir, execbase)
1090    return tmpDir, tmpBase
1091
1092def colonNormalizePath(path):
1093    if kIsWindows:
1094        return re.sub(r'^(.):', r'\1', path.replace('\\', '/'))
1095    else:
1096        assert path[0] == '/'
1097        return path[1:]
1098
1099def getDefaultSubstitutions(test, tmpDir, tmpBase, normalize_slashes=False):
1100    sourcepath = test.getSourcePath()
1101    sourcedir = os.path.dirname(sourcepath)
1102
1103    # Normalize slashes, if requested.
1104    if normalize_slashes:
1105        sourcepath = sourcepath.replace('\\', '/')
1106        sourcedir = sourcedir.replace('\\', '/')
1107        tmpDir = tmpDir.replace('\\', '/')
1108        tmpBase = tmpBase.replace('\\', '/')
1109
1110    substitutions = []
1111    substitutions.extend(test.config.substitutions)
1112    tmpName = tmpBase + '.tmp'
1113    baseName = os.path.basename(tmpBase)
1114    substitutions.extend([('%s', sourcepath),
1115                          ('%S', sourcedir),
1116                          ('%p', sourcedir),
1117                          ('%{pathsep}', os.pathsep),
1118                          ('%t', tmpName),
1119                          ('%basename_t', baseName),
1120                          ('%T', tmpDir)])
1121
1122    # "%/[STpst]" should be normalized.
1123    substitutions.extend([
1124            ('%/s', sourcepath.replace('\\', '/')),
1125            ('%/S', sourcedir.replace('\\', '/')),
1126            ('%/p', sourcedir.replace('\\', '/')),
1127            ('%/t', tmpBase.replace('\\', '/') + '.tmp'),
1128            ('%/T', tmpDir.replace('\\', '/')),
1129            ])
1130
1131    # "%{/[STpst]:regex_replacement}" should be normalized like "%/[STpst]" but we're
1132    # also in a regex replacement context of a s@@@ regex.
1133    def regex_escape(s):
1134        s = s.replace('@', r'\@')
1135        s = s.replace('&', r'\&')
1136        return s
1137    substitutions.extend([
1138            ('%{/s:regex_replacement}',
1139             regex_escape(sourcepath.replace('\\', '/'))),
1140            ('%{/S:regex_replacement}',
1141             regex_escape(sourcedir.replace('\\', '/'))),
1142            ('%{/p:regex_replacement}',
1143             regex_escape(sourcedir.replace('\\', '/'))),
1144            ('%{/t:regex_replacement}',
1145             regex_escape(tmpBase.replace('\\', '/')) + '.tmp'),
1146            ('%{/T:regex_replacement}',
1147             regex_escape(tmpDir.replace('\\', '/'))),
1148            ])
1149
1150    # "%:[STpst]" are normalized paths without colons and without a leading
1151    # slash.
1152    substitutions.extend([
1153            ('%:s', colonNormalizePath(sourcepath)),
1154            ('%:S', colonNormalizePath(sourcedir)),
1155            ('%:p', colonNormalizePath(sourcedir)),
1156            ('%:t', colonNormalizePath(tmpBase + '.tmp')),
1157            ('%:T', colonNormalizePath(tmpDir)),
1158            ])
1159    return substitutions
1160
1161def _memoize(f):
1162    cache = {}  # Intentionally unbounded, see applySubstitutions()
1163    def memoized(x):
1164        if x not in cache:
1165            cache[x] = f(x)
1166        return cache[x]
1167    return memoized
1168
1169@_memoize
1170def _caching_re_compile(r):
1171    return re.compile(r)
1172
1173def applySubstitutions(script, substitutions, recursion_limit=None):
1174    """
1175    Apply substitutions to the script.  Allow full regular expression syntax.
1176    Replace each matching occurrence of regular expression pattern a with
1177    substitution b in line ln.
1178
1179    If a substitution expands into another substitution, it is expanded
1180    recursively until the line has no more expandable substitutions. If
1181    the line can still can be substituted after being substituted
1182    `recursion_limit` times, it is an error. If the `recursion_limit` is
1183    `None` (the default), no recursive substitution is performed at all.
1184    """
1185
1186    # We use #_MARKER_# to hide %% while we do the other substitutions.
1187    def escape(ln):
1188        return _caching_re_compile('%%').sub('#_MARKER_#', ln)
1189
1190    def unescape(ln):
1191        return _caching_re_compile('#_MARKER_#').sub('%', ln)
1192
1193    def processLine(ln):
1194        # Apply substitutions
1195        for a,b in substitutions:
1196            if kIsWindows:
1197                b = b.replace("\\","\\\\")
1198            # re.compile() has a built-in LRU cache with 512 entries. In some
1199            # test suites lit ends up thrashing that cache, which made e.g.
1200            # check-llvm run 50% slower.  Use an explicit, unbounded cache
1201            # to prevent that from happening.  Since lit is fairly
1202            # short-lived, since the set of substitutions is fairly small, and
1203            # since thrashing has such bad consequences, not bounding the cache
1204            # seems reasonable.
1205            ln = _caching_re_compile(a).sub(str(b), escape(ln))
1206
1207        # Strip the trailing newline and any extra whitespace.
1208        return ln.strip()
1209
1210    def processLineToFixedPoint(ln):
1211        assert isinstance(recursion_limit, int) and recursion_limit >= 0
1212        origLine = ln
1213        steps = 0
1214        processed = processLine(ln)
1215        while processed != ln and steps < recursion_limit:
1216            ln = processed
1217            processed = processLine(ln)
1218            steps += 1
1219
1220        if processed != ln:
1221            raise ValueError("Recursive substitution of '%s' did not complete "
1222                             "in the provided recursion limit (%s)" % \
1223                             (origLine, recursion_limit))
1224
1225        return processed
1226
1227    process = processLine if recursion_limit is None else processLineToFixedPoint
1228
1229    return [unescape(process(ln)) for ln in script]
1230
1231
1232class ParserKind(object):
1233    """
1234    An enumeration representing the style of an integrated test keyword or
1235    command.
1236
1237    TAG: A keyword taking no value. Ex 'END.'
1238    COMMAND: A keyword taking a list of shell commands. Ex 'RUN:'
1239    LIST: A keyword taking a comma-separated list of values.
1240    BOOLEAN_EXPR: A keyword taking a comma-separated list of
1241        boolean expressions. Ex 'XFAIL:'
1242    INTEGER: A keyword taking a single integer. Ex 'ALLOW_RETRIES:'
1243    CUSTOM: A keyword with custom parsing semantics.
1244    """
1245    TAG = 0
1246    COMMAND = 1
1247    LIST = 2
1248    BOOLEAN_EXPR = 3
1249    INTEGER = 4
1250    CUSTOM = 5
1251
1252    @staticmethod
1253    def allowedKeywordSuffixes(value):
1254        return { ParserKind.TAG:          ['.'],
1255                 ParserKind.COMMAND:      [':'],
1256                 ParserKind.LIST:         [':'],
1257                 ParserKind.BOOLEAN_EXPR: [':'],
1258                 ParserKind.INTEGER:      [':'],
1259                 ParserKind.CUSTOM:       [':', '.']
1260               } [value]
1261
1262    @staticmethod
1263    def str(value):
1264        return { ParserKind.TAG:          'TAG',
1265                 ParserKind.COMMAND:      'COMMAND',
1266                 ParserKind.LIST:         'LIST',
1267                 ParserKind.BOOLEAN_EXPR: 'BOOLEAN_EXPR',
1268                 ParserKind.INTEGER:      'INTEGER',
1269                 ParserKind.CUSTOM:       'CUSTOM'
1270               } [value]
1271
1272
1273class IntegratedTestKeywordParser(object):
1274    """A parser for LLVM/Clang style integrated test scripts.
1275
1276    keyword: The keyword to parse for. It must end in either '.' or ':'.
1277    kind: An value of ParserKind.
1278    parser: A custom parser. This value may only be specified with
1279            ParserKind.CUSTOM.
1280    """
1281    def __init__(self, keyword, kind, parser=None, initial_value=None):
1282        allowedSuffixes = ParserKind.allowedKeywordSuffixes(kind)
1283        if len(keyword) == 0 or keyword[-1] not in allowedSuffixes:
1284            if len(allowedSuffixes) == 1:
1285                raise ValueError("Keyword '%s' of kind '%s' must end in '%s'"
1286                                 % (keyword, ParserKind.str(kind),
1287                                    allowedSuffixes[0]))
1288            else:
1289                raise ValueError("Keyword '%s' of kind '%s' must end in "
1290                                 " one of '%s'"
1291                                 % (keyword, ParserKind.str(kind),
1292                                    ' '.join(allowedSuffixes)))
1293
1294        if parser is not None and kind != ParserKind.CUSTOM:
1295            raise ValueError("custom parsers can only be specified with "
1296                             "ParserKind.CUSTOM")
1297        self.keyword = keyword
1298        self.kind = kind
1299        self.parsed_lines = []
1300        self.value = initial_value
1301        self.parser = parser
1302
1303        if kind == ParserKind.COMMAND:
1304            self.parser = lambda line_number, line, output: \
1305                                 self._handleCommand(line_number, line, output,
1306                                                     self.keyword)
1307        elif kind == ParserKind.LIST:
1308            self.parser = self._handleList
1309        elif kind == ParserKind.BOOLEAN_EXPR:
1310            self.parser = self._handleBooleanExpr
1311        elif kind == ParserKind.INTEGER:
1312            self.parser = self._handleSingleInteger
1313        elif kind == ParserKind.TAG:
1314            self.parser = self._handleTag
1315        elif kind == ParserKind.CUSTOM:
1316            if parser is None:
1317                raise ValueError("ParserKind.CUSTOM requires a custom parser")
1318            self.parser = parser
1319        else:
1320            raise ValueError("Unknown kind '%s'" % kind)
1321
1322    def parseLine(self, line_number, line):
1323        try:
1324            self.parsed_lines += [(line_number, line)]
1325            self.value = self.parser(line_number, line, self.value)
1326        except ValueError as e:
1327            raise ValueError(str(e) + ("\nin %s directive on test line %d" %
1328                                       (self.keyword, line_number)))
1329
1330    def getValue(self):
1331        return self.value
1332
1333    @staticmethod
1334    def _handleTag(line_number, line, output):
1335        """A helper for parsing TAG type keywords"""
1336        return (not line.strip() or output)
1337
1338    @staticmethod
1339    def _handleCommand(line_number, line, output, keyword):
1340        """A helper for parsing COMMAND type keywords"""
1341        # Trim trailing whitespace.
1342        line = line.rstrip()
1343        # Substitute line number expressions
1344        line = re.sub(r'%\(line\)', str(line_number), line)
1345
1346        def replace_line_number(match):
1347            if match.group(1) == '+':
1348                return str(line_number + int(match.group(2)))
1349            if match.group(1) == '-':
1350                return str(line_number - int(match.group(2)))
1351        line = re.sub(r'%\(line *([\+-]) *(\d+)\)', replace_line_number, line)
1352        # Collapse lines with trailing '\\'.
1353        if output and output[-1][-1] == '\\':
1354            output[-1] = output[-1][:-1] + line
1355        else:
1356            if output is None:
1357                output = []
1358            pdbg = "%dbg({keyword} at line {line_number})".format(
1359                keyword=keyword,
1360                line_number=line_number)
1361            assert re.match(kPdbgRegex + "$", pdbg), \
1362                   "kPdbgRegex expected to match actual %dbg usage"
1363            line = "{pdbg} {real_command}".format(
1364                pdbg=pdbg,
1365                real_command=line)
1366            output.append(line)
1367        return output
1368
1369    @staticmethod
1370    def _handleList(line_number, line, output):
1371        """A parser for LIST type keywords"""
1372        if output is None:
1373            output = []
1374        output.extend([s.strip() for s in line.split(',')])
1375        return output
1376
1377    @staticmethod
1378    def _handleSingleInteger(line_number, line, output):
1379        """A parser for INTEGER type keywords"""
1380        if output is None:
1381            output = []
1382        try:
1383            n = int(line)
1384        except ValueError:
1385            raise ValueError("INTEGER parser requires the input to be an integer (got {})".format(line))
1386        output.append(n)
1387        return output
1388
1389    @staticmethod
1390    def _handleBooleanExpr(line_number, line, output):
1391        """A parser for BOOLEAN_EXPR type keywords"""
1392        parts = [s.strip() for s in line.split(',') if s.strip() != '']
1393        if output and output[-1][-1] == '\\':
1394            output[-1] = output[-1][:-1] + parts[0]
1395            del parts[0]
1396        if output is None:
1397            output = []
1398        output.extend(parts)
1399        # Evaluate each expression to verify syntax.
1400        # We don't want any results, just the raised ValueError.
1401        for s in output:
1402            if s != '*' and not s.endswith('\\'):
1403                BooleanExpression.evaluate(s, [])
1404        return output
1405
1406
1407def _parseKeywords(sourcepath, additional_parsers=[],
1408                   require_script=True):
1409    """_parseKeywords
1410
1411    Scan an LLVM/Clang style integrated test script and extract all the lines
1412    pertaining to a special parser. This includes 'RUN', 'XFAIL', 'REQUIRES',
1413    'UNSUPPORTED' and 'ALLOW_RETRIES', as well as other specified custom
1414    parsers.
1415
1416    Returns a dictionary mapping each custom parser to its value after
1417    parsing the test.
1418    """
1419    # Install the built-in keyword parsers.
1420    script = []
1421    builtin_parsers = [
1422        IntegratedTestKeywordParser('RUN:', ParserKind.COMMAND, initial_value=script),
1423        IntegratedTestKeywordParser('XFAIL:', ParserKind.BOOLEAN_EXPR),
1424        IntegratedTestKeywordParser('REQUIRES:', ParserKind.BOOLEAN_EXPR),
1425        IntegratedTestKeywordParser('UNSUPPORTED:', ParserKind.BOOLEAN_EXPR),
1426        IntegratedTestKeywordParser('ALLOW_RETRIES:', ParserKind.INTEGER),
1427        IntegratedTestKeywordParser('END.', ParserKind.TAG)
1428    ]
1429    keyword_parsers = {p.keyword: p for p in builtin_parsers}
1430
1431    # Install user-defined additional parsers.
1432    for parser in additional_parsers:
1433        if not isinstance(parser, IntegratedTestKeywordParser):
1434            raise ValueError('Additional parser must be an instance of '
1435                             'IntegratedTestKeywordParser')
1436        if parser.keyword in keyword_parsers:
1437            raise ValueError("Parser for keyword '%s' already exists"
1438                             % parser.keyword)
1439        keyword_parsers[parser.keyword] = parser
1440
1441    # Collect the test lines from the script.
1442    for line_number, command_type, ln in \
1443            parseIntegratedTestScriptCommands(sourcepath,
1444                                              keyword_parsers.keys()):
1445        parser = keyword_parsers[command_type]
1446        parser.parseLine(line_number, ln)
1447        if command_type == 'END.' and parser.getValue() is True:
1448            break
1449
1450    # Verify the script contains a run line.
1451    if require_script and not script:
1452        raise ValueError("Test has no 'RUN:' line")
1453
1454    # Check for unterminated run lines.
1455    if script and script[-1][-1] == '\\':
1456        raise ValueError("Test has unterminated 'RUN:' lines (with '\\')")
1457
1458    # Check boolean expressions for unterminated lines.
1459    for key in keyword_parsers:
1460        kp = keyword_parsers[key]
1461        if kp.kind != ParserKind.BOOLEAN_EXPR:
1462            continue
1463        value = kp.getValue()
1464        if value and value[-1][-1] == '\\':
1465            raise ValueError("Test has unterminated '{key}' lines (with '\\')"
1466                             .format(key=key))
1467
1468    # Make sure there's at most one ALLOW_RETRIES: line
1469    allowed_retries = keyword_parsers['ALLOW_RETRIES:'].getValue()
1470    if allowed_retries and len(allowed_retries) > 1:
1471        raise ValueError("Test has more than one ALLOW_RETRIES lines")
1472
1473    return {p.keyword: p.getValue() for p in keyword_parsers.values()}
1474
1475
1476def parseIntegratedTestScript(test, additional_parsers=[],
1477                              require_script=True):
1478    """parseIntegratedTestScript - Scan an LLVM/Clang style integrated test
1479    script and extract the lines to 'RUN' as well as 'XFAIL', 'REQUIRES',
1480    'UNSUPPORTED' and 'ALLOW_RETRIES' information into the given test.
1481
1482    If additional parsers are specified then the test is also scanned for the
1483    keywords they specify and all matches are passed to the custom parser.
1484
1485    If 'require_script' is False an empty script
1486    may be returned. This can be used for test formats where the actual script
1487    is optional or ignored.
1488    """
1489    # Parse the test sources and extract test properties
1490    try:
1491        parsed = _parseKeywords(test.getSourcePath(), additional_parsers,
1492                                require_script)
1493    except ValueError as e:
1494        return lit.Test.Result(Test.UNRESOLVED, str(e))
1495    script = parsed['RUN:'] or []
1496    test.xfails += parsed['XFAIL:'] or []
1497    test.requires += parsed['REQUIRES:'] or []
1498    test.unsupported += parsed['UNSUPPORTED:'] or []
1499    if parsed['ALLOW_RETRIES:']:
1500        test.allowed_retries = parsed['ALLOW_RETRIES:'][0]
1501
1502    # Enforce REQUIRES:
1503    missing_required_features = test.getMissingRequiredFeatures()
1504    if missing_required_features:
1505        msg = ', '.join(missing_required_features)
1506        return lit.Test.Result(Test.UNSUPPORTED,
1507                               "Test requires the following unavailable "
1508                               "features: %s" % msg)
1509
1510    # Enforce UNSUPPORTED:
1511    unsupported_features = test.getUnsupportedFeatures()
1512    if unsupported_features:
1513        msg = ', '.join(unsupported_features)
1514        return lit.Test.Result(
1515            Test.UNSUPPORTED,
1516            "Test does not support the following features "
1517            "and/or targets: %s" % msg)
1518
1519    # Enforce limit_to_features.
1520    if not test.isWithinFeatureLimits():
1521        msg = ', '.join(test.config.limit_to_features)
1522        return lit.Test.Result(Test.UNSUPPORTED,
1523                               "Test does not require any of the features "
1524                               "specified in limit_to_features: %s" % msg)
1525
1526    return script
1527
1528
1529def _runShTest(test, litConfig, useExternalSh, script, tmpBase):
1530    def runOnce(execdir):
1531        if useExternalSh:
1532            res = executeScript(test, litConfig, tmpBase, script, execdir)
1533        else:
1534            res = executeScriptInternal(test, litConfig, tmpBase, script, execdir)
1535        if isinstance(res, lit.Test.Result):
1536            return res
1537
1538        out,err,exitCode,timeoutInfo = res
1539        if exitCode == 0:
1540            status = Test.PASS
1541        else:
1542            if timeoutInfo is None:
1543                status = Test.FAIL
1544            else:
1545                status = Test.TIMEOUT
1546        return out,err,exitCode,timeoutInfo,status
1547
1548    # Create the output directory if it does not already exist.
1549    lit.util.mkdir_p(os.path.dirname(tmpBase))
1550
1551    # Re-run failed tests up to test.allowed_retries times.
1552    execdir = os.path.dirname(test.getExecPath())
1553    attempts = test.allowed_retries + 1
1554    for i in range(attempts):
1555        res = runOnce(execdir)
1556        if isinstance(res, lit.Test.Result):
1557            return res
1558
1559        out,err,exitCode,timeoutInfo,status = res
1560        if status != Test.FAIL:
1561            break
1562
1563    # If we had to run the test more than once, count it as a flaky pass. These
1564    # will be printed separately in the test summary.
1565    if i > 0 and status == Test.PASS:
1566        status = Test.FLAKYPASS
1567
1568    # Form the output log.
1569    output = """Script:\n--\n%s\n--\nExit Code: %d\n""" % (
1570        '\n'.join(script), exitCode)
1571
1572    if timeoutInfo is not None:
1573        output += """Timeout: %s\n""" % (timeoutInfo,)
1574    output += "\n"
1575
1576    # Append the outputs, if present.
1577    if out:
1578        output += """Command Output (stdout):\n--\n%s\n--\n""" % (out,)
1579    if err:
1580        output += """Command Output (stderr):\n--\n%s\n--\n""" % (err,)
1581
1582    return lit.Test.Result(status, output)
1583
1584
1585def executeShTest(test, litConfig, useExternalSh,
1586                  extra_substitutions=[],
1587                  preamble_commands=[]):
1588    if test.config.unsupported:
1589        return lit.Test.Result(Test.UNSUPPORTED, 'Test is unsupported')
1590
1591    script = list(preamble_commands)
1592    parsed = parseIntegratedTestScript(test, require_script=not script)
1593    if isinstance(parsed, lit.Test.Result):
1594        return parsed
1595    script += parsed
1596
1597    if litConfig.noExecute:
1598        return lit.Test.Result(Test.PASS)
1599
1600    tmpDir, tmpBase = getTempPaths(test)
1601    substitutions = list(extra_substitutions)
1602    substitutions += getDefaultSubstitutions(test, tmpDir, tmpBase,
1603                                             normalize_slashes=useExternalSh)
1604    script = applySubstitutions(script, substitutions,
1605                                recursion_limit=test.config.recursiveExpansionLimit)
1606
1607    return _runShTest(test, litConfig, useExternalSh, script, tmpBase)
1608