1#!/usr/bin/env python
2
3"""Updates FileCheck checks in MIR tests.
4
5This script is a utility to update MIR based tests with new FileCheck
6patterns.
7
8The checks added by this script will cover the entire body of each
9function it handles. Virtual registers used are given names via
10FileCheck patterns, so if you do want to check a subset of the body it
11should be straightforward to trim out the irrelevant parts. None of
12the YAML metadata will be checked, other than function names.
13
14If there are multiple llc commands in a test, the full set of checks
15will be repeated for each different check pattern. Checks for patterns
16that are common between different commands will be left as-is by
17default, or removed if the --remove-common-prefixes flag is provided.
18"""
19
20from __future__ import print_function
21
22import argparse
23import collections
24import os
25import re
26import subprocess
27import sys
28
29from UpdateTestChecks import common
30
31MIR_FUNC_NAME_RE = re.compile(r' *name: *(?P<func>[A-Za-z0-9_.-]+)')
32MIR_BODY_BEGIN_RE = re.compile(r' *body: *\|')
33MIR_BASIC_BLOCK_RE = re.compile(r' *bb\.[0-9]+.*:$')
34VREG_RE = re.compile(r'(%[0-9]+)(?::[a-z0-9_]+)?(?:\([<>a-z0-9 ]+\))?')
35VREG_DEF_RE = re.compile(
36    r'^ *(?P<vregs>{0}(?:, {0})*) '
37    r'= (?P<opcode>[A-Zt][A-Za-z0-9_]+)'.format(VREG_RE.pattern))
38MIR_PREFIX_DATA_RE = re.compile(r'^ *(;|bb.[0-9].*: *$|[a-z]+:( |$)|$)')
39
40IR_FUNC_NAME_RE = re.compile(
41    r'^\s*define\s+(?:internal\s+)?[^@]*@(?P<func>[A-Za-z0-9_.]+)\s*\(')
42IR_PREFIX_DATA_RE = re.compile(r'^ *(;|$)')
43
44MIR_FUNC_RE = re.compile(
45    r'^---$'
46    r'\n'
47    r'^ *name: *(?P<func>[A-Za-z0-9_.-]+)$'
48    r'.*?'
49    r'^ *body: *\|\n'
50    r'(?P<body>.*?)\n'
51    r'^\.\.\.$',
52    flags=(re.M | re.S))
53
54
55class LLC:
56    def __init__(self, bin):
57        self.bin = bin
58
59    def __call__(self, args, ir):
60        if ir.endswith('.mir'):
61            args = '{} -x mir'.format(args)
62        with open(ir) as ir_file:
63            stdout = subprocess.check_output('{} {}'.format(self.bin, args),
64                                             shell=True, stdin=ir_file)
65            # Fix line endings to unix CR style.
66            stdout = stdout.replace('\r\n', '\n')
67        return stdout
68
69
70class Run:
71    def __init__(self, prefixes, cmd_args, triple):
72        self.prefixes = prefixes
73        self.cmd_args = cmd_args
74        self.triple = triple
75
76    def __getitem__(self, index):
77        return [self.prefixes, self.cmd_args, self.triple][index]
78
79
80def log(msg, verbose=True):
81    if verbose:
82        print(msg, file=sys.stderr)
83
84
85def warn(msg, test_file=None):
86    if test_file:
87        msg = '{}: {}'.format(test_file, msg)
88    print('WARNING: {}'.format(msg), file=sys.stderr)
89
90
91def find_triple_in_ir(lines, verbose=False):
92    for l in lines:
93        m = common.TRIPLE_IR_RE.match(l)
94        if m:
95            return m.group(1)
96    return None
97
98
99def find_run_lines(test, lines, verbose=False):
100    raw_lines = [m.group(1)
101                 for m in [common.RUN_LINE_RE.match(l) for l in lines] if m]
102    run_lines = [raw_lines[0]] if len(raw_lines) > 0 else []
103    for l in raw_lines[1:]:
104        if run_lines[-1].endswith("\\"):
105            run_lines[-1] = run_lines[-1].rstrip("\\") + " " + l
106        else:
107            run_lines.append(l)
108    if verbose:
109        log('Found {} RUN lines:'.format(len(run_lines)))
110        for l in run_lines:
111            log('  RUN: {}'.format(l))
112    return run_lines
113
114
115def build_run_list(test, run_lines, verbose=False):
116    run_list = []
117    all_prefixes = []
118    for l in run_lines:
119        commands = [cmd.strip() for cmd in l.split('|', 1)]
120        llc_cmd = commands[0]
121        filecheck_cmd = commands[1] if len(commands) > 1 else ''
122
123        if not llc_cmd.startswith('llc '):
124            warn('Skipping non-llc RUN line: {}'.format(l), test_file=test)
125            continue
126        if not filecheck_cmd.startswith('FileCheck '):
127            warn('Skipping non-FileChecked RUN line: {}'.format(l),
128                 test_file=test)
129            continue
130
131        triple = None
132        m = common.TRIPLE_ARG_RE.search(llc_cmd)
133        if m:
134            triple = m.group(1)
135        # If we find -march but not -mtriple, use that.
136        m = common.MARCH_ARG_RE.search(llc_cmd)
137        if m and not triple:
138            triple = '{}--'.format(m.group(1))
139
140        cmd_args = llc_cmd[len('llc'):].strip()
141        cmd_args = cmd_args.replace('< %s', '').replace('%s', '').strip()
142
143        check_prefixes = [
144            item
145            for m in common.CHECK_PREFIX_RE.finditer(filecheck_cmd)
146            for item in m.group(1).split(',')]
147        if not check_prefixes:
148            check_prefixes = ['CHECK']
149        all_prefixes += check_prefixes
150
151        run_list.append(Run(check_prefixes, cmd_args, triple))
152
153    # Remove any common prefixes. We'll just leave those entirely alone.
154    common_prefixes = set([prefix for prefix in all_prefixes
155                           if all_prefixes.count(prefix) > 1])
156    for run in run_list:
157        run.prefixes = [p for p in run.prefixes if p not in common_prefixes]
158
159    return run_list, common_prefixes
160
161
162def find_functions_with_one_bb(lines, verbose=False):
163    result = []
164    cur_func = None
165    bbs = 0
166    for line in lines:
167        m = MIR_FUNC_NAME_RE.match(line)
168        if m:
169            if bbs == 1:
170                result.append(cur_func)
171            cur_func = m.group('func')
172            bbs = 0
173        m = MIR_BASIC_BLOCK_RE.match(line)
174        if m:
175            bbs += 1
176    if bbs == 1:
177        result.append(cur_func)
178    return result
179
180
181def build_function_body_dictionary(test, raw_tool_output, triple, prefixes,
182                                   func_dict, verbose):
183    for m in MIR_FUNC_RE.finditer(raw_tool_output):
184        func = m.group('func')
185        body = m.group('body')
186        if verbose:
187            log('Processing function: {}'.format(func))
188            for l in body.splitlines():
189                log('  {}'.format(l))
190        for prefix in prefixes:
191            if func in func_dict[prefix] and func_dict[prefix][func] != body:
192                warn('Found conflicting asm for prefix: {}'.format(prefix),
193                     test_file=test)
194            func_dict[prefix][func] = body
195
196
197def add_checks_for_function(test, output_lines, run_list, func_dict, func_name,
198                            single_bb, verbose=False):
199    printed_prefixes = set()
200    for run in run_list:
201        for prefix in run.prefixes:
202            if prefix in printed_prefixes:
203                continue
204            if not func_dict[prefix][func_name]:
205                continue
206            # if printed_prefixes:
207            #     # Add some space between different check prefixes.
208            #     output_lines.append('')
209            printed_prefixes.add(prefix)
210            log('Adding {} lines for {}'.format(prefix, func_name), verbose)
211            add_check_lines(test, output_lines, prefix, func_name, single_bb,
212                            func_dict[prefix][func_name].splitlines())
213            break
214    return output_lines
215
216
217def add_check_lines(test, output_lines, prefix, func_name, single_bb,
218                    func_body):
219    if single_bb:
220        # Don't bother checking the basic block label for a single BB
221        func_body.pop(0)
222
223    if not func_body:
224        warn('Function has no instructions to check: {}'.format(func_name),
225             test_file=test)
226        return
227
228    first_line = func_body[0]
229    indent = len(first_line) - len(first_line.lstrip(' '))
230    # A check comment, indented the appropriate amount
231    check = '{:>{}}; {}'.format('', indent, prefix)
232
233    output_lines.append('{}-LABEL: name: {}'.format(check, func_name))
234
235    vreg_map = {}
236    for func_line in func_body:
237        if not func_line.strip():
238            continue
239        m = VREG_DEF_RE.match(func_line)
240        if m:
241            for vreg in VREG_RE.finditer(m.group('vregs')):
242                name = mangle_vreg(m.group('opcode'), vreg_map.values())
243                vreg_map[vreg.group(1)] = name
244                func_line = func_line.replace(
245                    vreg.group(1), '[[{}:%[0-9]+]]'.format(name), 1)
246        for number, name in vreg_map.items():
247            func_line = re.sub(r'{}\b'.format(number), '[[{}]]'.format(name),
248                               func_line)
249        check_line = '{}: {}'.format(check, func_line[indent:]).rstrip()
250        output_lines.append(check_line)
251
252
253def mangle_vreg(opcode, current_names):
254    base = opcode
255    # Simplify some common prefixes and suffixes
256    if opcode.startswith('G_'):
257        base = base[len('G_'):]
258    if opcode.endswith('_PSEUDO'):
259        base = base[:len('_PSEUDO')]
260    # Shorten some common opcodes with long-ish names
261    base = dict(IMPLICIT_DEF='DEF',
262                GLOBAL_VALUE='GV',
263                CONSTANT='C',
264                FCONSTANT='C',
265                MERGE_VALUES='MV',
266                UNMERGE_VALUES='UV',
267                INTRINSIC='INT',
268                INTRINSIC_W_SIDE_EFFECTS='INT',
269                INSERT_VECTOR_ELT='IVEC',
270                EXTRACT_VECTOR_ELT='EVEC',
271                SHUFFLE_VECTOR='SHUF').get(base, base)
272    # Avoid ambiguity when opcodes end in numbers
273    if len(base.rstrip('0123456789')) < len(base):
274        base += '_'
275
276    i = 0
277    for name in current_names:
278        if name.rstrip('0123456789') == base:
279            i += 1
280    if i:
281        return '{}{}'.format(base, i)
282    return base
283
284
285def should_add_line_to_output(input_line, prefix_set):
286    # Skip any check lines that we're handling.
287    m = common.CHECK_RE.match(input_line)
288    if m and m.group(1) in prefix_set:
289        return False
290    return True
291
292
293def update_test_file(llc, test, remove_common_prefixes=False, verbose=False):
294    log('Scanning for RUN lines in test file: {}'.format(test), verbose)
295    with open(test) as fd:
296        input_lines = [l.rstrip() for l in fd]
297
298    triple_in_ir = find_triple_in_ir(input_lines, verbose)
299    run_lines = find_run_lines(test, input_lines, verbose)
300    run_list, common_prefixes = build_run_list(test, run_lines, verbose)
301
302    simple_functions = find_functions_with_one_bb(input_lines, verbose)
303
304    func_dict = {}
305    for run in run_list:
306        for prefix in run.prefixes:
307            func_dict.update({prefix: dict()})
308    for prefixes, llc_args, triple_in_cmd in run_list:
309        log('Extracted LLC cmd: llc {}'.format(llc_args), verbose)
310        log('Extracted FileCheck prefixes: {}'.format(prefixes), verbose)
311
312        raw_tool_output = llc(llc_args, test)
313        if not triple_in_cmd and not triple_in_ir:
314            warn('No triple found: skipping file', test_file=test)
315            return
316
317        build_function_body_dictionary(test, raw_tool_output,
318                                       triple_in_cmd or triple_in_ir,
319                                       prefixes, func_dict, verbose)
320
321    state = 'toplevel'
322    func_name = None
323    prefix_set = set([prefix for run in run_list for prefix in run.prefixes])
324    log('Rewriting FileCheck prefixes: {}'.format(prefix_set), verbose)
325
326    if remove_common_prefixes:
327        prefix_set.update(common_prefixes)
328    elif common_prefixes:
329        warn('Ignoring common prefixes: {}'.format(common_prefixes),
330             test_file=test)
331
332    comment_char = '#' if test.endswith('.mir') else ';'
333    autogenerated_note = ('{} NOTE: Assertions have been autogenerated by '
334                          'utils/{}'.format(comment_char,
335                                            os.path.basename(__file__)))
336    output_lines = []
337    output_lines.append(autogenerated_note)
338
339    for input_line in input_lines:
340        if input_line == autogenerated_note:
341            continue
342
343        if state == 'toplevel':
344            m = IR_FUNC_NAME_RE.match(input_line)
345            if m:
346                state = 'ir function prefix'
347                func_name = m.group('func')
348            if input_line.rstrip('| \r\n') == '---':
349                state = 'document'
350            output_lines.append(input_line)
351        elif state == 'document':
352            m = MIR_FUNC_NAME_RE.match(input_line)
353            if m:
354                state = 'mir function metadata'
355                func_name = m.group('func')
356            if input_line.strip() == '...':
357                state = 'toplevel'
358                func_name = None
359            if should_add_line_to_output(input_line, prefix_set):
360                output_lines.append(input_line)
361        elif state == 'mir function metadata':
362            if should_add_line_to_output(input_line, prefix_set):
363                output_lines.append(input_line)
364            m = MIR_BODY_BEGIN_RE.match(input_line)
365            if m:
366                if func_name in simple_functions:
367                    # If there's only one block, put the checks inside it
368                    state = 'mir function prefix'
369                    continue
370                state = 'mir function body'
371                add_checks_for_function(test, output_lines, run_list,
372                                        func_dict, func_name, single_bb=False,
373                                        verbose=verbose)
374        elif state == 'mir function prefix':
375            m = MIR_PREFIX_DATA_RE.match(input_line)
376            if not m:
377                state = 'mir function body'
378                add_checks_for_function(test, output_lines, run_list,
379                                        func_dict, func_name, single_bb=True,
380                                        verbose=verbose)
381
382            if should_add_line_to_output(input_line, prefix_set):
383                output_lines.append(input_line)
384        elif state == 'mir function body':
385            if input_line.strip() == '...':
386                state = 'toplevel'
387                func_name = None
388            if should_add_line_to_output(input_line, prefix_set):
389                output_lines.append(input_line)
390        elif state == 'ir function prefix':
391            m = IR_PREFIX_DATA_RE.match(input_line)
392            if not m:
393                state = 'ir function body'
394                add_checks_for_function(test, output_lines, run_list,
395                                        func_dict, func_name, single_bb=False,
396                                        verbose=verbose)
397
398            if should_add_line_to_output(input_line, prefix_set):
399                output_lines.append(input_line)
400        elif state == 'ir function body':
401            if input_line.strip() == '}':
402                state = 'toplevel'
403                func_name = None
404            if should_add_line_to_output(input_line, prefix_set):
405                output_lines.append(input_line)
406
407
408    log('Writing {} lines to {}...'.format(len(output_lines), test), verbose)
409
410    with open(test, 'wb') as fd:
411        fd.writelines([l + '\n' for l in output_lines])
412
413
414def main():
415    parser = argparse.ArgumentParser(
416        description=__doc__, formatter_class=argparse.RawTextHelpFormatter)
417    parser.add_argument('-v', '--verbose', action='store_true',
418                        help='Show verbose output')
419    parser.add_argument('--llc-binary', dest='llc', default='llc', type=LLC,
420                        help='The "llc" binary to generate the test case with')
421    parser.add_argument('--remove-common-prefixes', action='store_true',
422                        help='Remove existing check lines whose prefixes are '
423                             'shared between multiple commands')
424    parser.add_argument('tests', nargs='+')
425    args = parser.parse_args()
426
427    for test in args.tests:
428        try:
429            update_test_file(args.llc, test, args.remove_common_prefixes,
430                             verbose=args.verbose)
431        except Exception:
432            warn('Error processing file', test_file=test)
433            raise
434
435
436if __name__ == '__main__':
437  main()
438