xref: /openbsd/gnu/llvm/llvm/utils/abtest.py (revision 73471bf0)
1#!/usr/bin/env python
2#
3# Given a previous good compile narrow down miscompiles.
4# Expects two directories named "before" and "after" each containing a set of
5# assembly or object files where the "after" version is assumed to be broken.
6# You also have to provide a script called "link_test". It is called with a
7# list of files which should be linked together and result tested. "link_test"
8# should returns with exitcode 0 if the linking and testing succeeded.
9#
10# If a response file is provided, only the object files that are listed in the
11# file are inspected. In addition, the "link_test" is called with a temporary
12# response file representing one iteration of bisection.
13#
14# abtest.py operates by taking all files from the "before" directory and
15# in each step replacing one of them with a file from the "bad" directory.
16#
17# Additionally you can perform the same steps with a single .s file. In this
18# mode functions are identified by " -- Begin function FunctionName" and
19# " -- End function" markers. The abtest.py then takes all
20# function from the file in the "before" directory and replaces one function
21# with the corresponding function from the "bad" file in each step.
22#
23# Example usage to identify miscompiled files:
24#    1. Create a link_test script, make it executable. Simple Example:
25#          clang "$@" -o /tmp/test && /tmp/test || echo "PROBLEM"
26#    2. Run the script to figure out which files are miscompiled:
27#       > ./abtest.py
28#       somefile.s: ok
29#       someotherfile.s: skipped: same content
30#       anotherfile.s: failed: './link_test' exitcode != 0
31#       ...
32# Example usage to identify miscompiled functions inside a file:
33#    3. Run the tests on a single file (assuming before/file.s and
34#       after/file.s exist)
35#       > ./abtest.py file.s
36#       funcname1 [0/XX]: ok
37#       funcname2 [1/XX]: ok
38#       funcname3 [2/XX]: skipped: same content
39#       funcname4 [3/XX]: failed: './link_test' exitcode != 0
40#       ...
41from fnmatch import filter
42from sys import stderr
43import argparse
44import filecmp
45import os
46import subprocess
47import sys
48import tempfile
49
50# Specify LINKTEST via `--test`. Default value is './link_test'.
51LINKTEST = ""
52ESCAPE = "\033[%sm"
53BOLD = ESCAPE % "1"
54RED = ESCAPE % "31"
55NORMAL = ESCAPE % "0"
56FAILED = RED + "failed" + NORMAL
57
58
59def find(dir, file_filter=None):
60    files = [
61        walkdir[0]+"/"+file
62        for walkdir in os.walk(dir)
63        for file in walkdir[2]
64    ]
65    if file_filter is not None:
66        files = filter(files, file_filter)
67    return sorted(files)
68
69
70def error(message):
71    stderr.write("Error: %s\n" % (message,))
72
73
74def warn(message):
75    stderr.write("Warning: %s\n" % (message,))
76
77
78def info(message):
79    stderr.write("Info: %s\n" % (message,))
80
81
82def announce_test(name):
83    stderr.write("%s%s%s: " % (BOLD, name, NORMAL))
84    stderr.flush()
85
86
87def announce_result(result):
88    stderr.write(result)
89    stderr.write("\n")
90    stderr.flush()
91
92
93def format_namelist(l):
94    result = ", ".join(l[0:3])
95    if len(l) > 3:
96        result += "... (%d total)" % len(l)
97    return result
98
99
100def check_sanity(choices, perform_test):
101    announce_test("sanity check A")
102    all_a = {name: a_b[0] for name, a_b in choices}
103    res_a = perform_test(all_a)
104    if res_a is not True:
105        error("Picking all choices from A failed to pass the test")
106        sys.exit(1)
107
108    announce_test("sanity check B (expecting failure)")
109    all_b = {name: a_b[1] for name, a_b in choices}
110    res_b = perform_test(all_b)
111    if res_b is not False:
112        error("Picking all choices from B did unexpectedly pass the test")
113        sys.exit(1)
114
115
116def check_sequentially(choices, perform_test):
117    known_good = set()
118    all_a = {name: a_b[0] for name, a_b in choices}
119    n = 1
120    for name, a_b in sorted(choices):
121        picks = dict(all_a)
122        picks[name] = a_b[1]
123        announce_test("checking %s [%d/%d]" % (name, n, len(choices)))
124        n += 1
125        res = perform_test(picks)
126        if res is True:
127            known_good.add(name)
128    return known_good
129
130
131def check_bisect(choices, perform_test):
132    known_good = set()
133    if len(choices) == 0:
134        return known_good
135
136    choice_map = dict(choices)
137    all_a = {name: a_b[0] for name, a_b in choices}
138
139    def test_partition(partition, upcoming_partition):
140        # Compute the maximum number of checks we have to do in the worst case.
141        max_remaining_steps = len(partition) * 2 - 1
142        if upcoming_partition is not None:
143            max_remaining_steps += len(upcoming_partition) * 2 - 1
144        for x in partitions_to_split:
145            max_remaining_steps += (len(x) - 1) * 2
146
147        picks = dict(all_a)
148        for x in partition:
149            picks[x] = choice_map[x][1]
150        announce_test("checking %s [<=%d remaining]" %
151                      (format_namelist(partition), max_remaining_steps))
152        res = perform_test(picks)
153        if res is True:
154            known_good.update(partition)
155        elif len(partition) > 1:
156            partitions_to_split.insert(0, partition)
157
158    # TODO:
159    # - We could optimize based on the knowledge that when splitting a failed
160    #   partition into two and one side checks out okay then we can deduce that
161    #   the other partition must be a failure.
162    all_choice_names = [name for name, _ in choices]
163    partitions_to_split = [all_choice_names]
164    while len(partitions_to_split) > 0:
165        partition = partitions_to_split.pop()
166
167        middle = len(partition) // 2
168        left = partition[0:middle]
169        right = partition[middle:]
170
171        if len(left) > 0:
172            test_partition(left, right)
173        assert len(right) > 0
174        test_partition(right, None)
175
176    return known_good
177
178
179def extract_functions(file):
180    functions = []
181    in_function = None
182    for line in open(file):
183        marker = line.find(" -- Begin function ")
184        if marker != -1:
185            if in_function is not None:
186                warn("Missing end of function %s" % (in_function,))
187            funcname = line[marker + 19:-1]
188            in_function = funcname
189            text = line
190            continue
191
192        marker = line.find(" -- End function")
193        if marker != -1:
194            text += line
195            functions.append((in_function, text))
196            in_function = None
197            continue
198
199        if in_function is not None:
200            text += line
201    return functions
202
203
204def replace_functions(source, dest, replacements):
205    out = open(dest, "w")
206    skip = False
207    in_function = None
208    for line in open(source):
209        marker = line.find(" -- Begin function ")
210        if marker != -1:
211            if in_function is not None:
212                warn("Missing end of function %s" % (in_function,))
213            funcname = line[marker + 19:-1]
214            in_function = funcname
215            replacement = replacements.get(in_function)
216            if replacement is not None:
217                out.write(replacement)
218                skip = True
219        else:
220            marker = line.find(" -- End function")
221            if marker != -1:
222                in_function = None
223                if skip:
224                    skip = False
225                    continue
226
227        if not skip:
228            out.write(line)
229
230
231def testrun(files):
232    linkline = "%s %s" % (LINKTEST, " ".join(files),)
233    res = subprocess.call(linkline, shell=True)
234    if res != 0:
235        announce_result(FAILED + ": '%s' exitcode != 0" % LINKTEST)
236        return False
237    else:
238        announce_result("ok")
239        return True
240
241
242def prepare_files(gooddir, baddir, rspfile):
243    files_a = []
244    files_b = []
245
246    if rspfile is not None:
247        def get_basename(name):
248            # remove prefix
249            if name.startswith(gooddir):
250                return name[len(gooddir):]
251            if name.startswith(baddir):
252                return name[len(baddir):]
253            assert False, ""
254
255        with open(rspfile, "r") as rf:
256            for line in rf.read().splitlines():
257                for obj in line.split():
258                    assert not os.path.isabs(obj), "TODO: support abs path"
259                    files_a.append(gooddir + "/" + obj)
260                    files_b.append(baddir + "/" + obj)
261    else:
262        get_basename = lambda name: os.path.basename(name)
263        files_a = find(gooddir, "*")
264        files_b = find(baddir, "*")
265
266    basenames_a = set(map(get_basename, files_a))
267    basenames_b = set(map(get_basename, files_b))
268
269    for name in files_b:
270        basename = get_basename(name)
271        if basename not in basenames_a:
272            warn("There is no corresponding file to '%s' in %s" %
273                 (name, gooddir))
274    choices = []
275    skipped = []
276    for name in files_a:
277        basename = get_basename(name)
278        if basename not in basenames_b:
279            warn("There is no corresponding file to '%s' in %s" %
280                 (name, baddir))
281
282        file_a = gooddir + "/" + basename
283        file_b = baddir + "/" + basename
284        if filecmp.cmp(file_a, file_b):
285            skipped.append(basename)
286            continue
287
288        choice = (basename, (file_a, file_b))
289        choices.append(choice)
290
291    if len(skipped) > 0:
292        info("Skipped (same content): %s" % format_namelist(skipped))
293
294    def perform_test(picks):
295        files = []
296        # Note that we iterate over files_a so we don't change the order
297        # (cannot use `picks` as it is a dictionary without order)
298        for x in files_a:
299            basename = get_basename(x)
300            picked = picks.get(basename)
301            if picked is None:
302                assert basename in skipped
303                files.append(x)
304            else:
305                files.append(picked)
306
307        # If response file is used, create a temporary response file for the
308        # picked files.
309        if rspfile is not None:
310            with tempfile.NamedTemporaryFile('w', suffix='.rsp',
311                                             delete=False) as tf:
312                tf.write(" ".join(files))
313                tf.flush()
314            ret = testrun([tf.name])
315            os.remove(tf.name)
316            return ret
317
318        return testrun(files)
319
320    return perform_test, choices
321
322
323def prepare_functions(to_check, gooddir, goodfile, badfile):
324    files_good = find(gooddir, "*")
325
326    functions_a = extract_functions(goodfile)
327    functions_a_map = dict(functions_a)
328    functions_b_map = dict(extract_functions(badfile))
329
330    for name in functions_b_map.keys():
331        if name not in functions_a_map:
332            warn("Function '%s' missing from good file" % name)
333    choices = []
334    skipped = []
335    for name, candidate_a in functions_a:
336        candidate_b = functions_b_map.get(name)
337        if candidate_b is None:
338            warn("Function '%s' missing from bad file" % name)
339            continue
340        if candidate_a == candidate_b:
341            skipped.append(name)
342            continue
343        choice = name, (candidate_a, candidate_b)
344        choices.append(choice)
345
346    if len(skipped) > 0:
347        info("Skipped (same content): %s" % format_namelist(skipped))
348
349    combined_file = '/tmp/combined2.s'
350    files = []
351    found_good_file = False
352    for c in files_good:
353        if os.path.basename(c) == to_check:
354            found_good_file = True
355            files.append(combined_file)
356            continue
357        files.append(c)
358    assert found_good_file
359
360    def perform_test(picks):
361        for name, x in picks.items():
362            assert x == functions_a_map[name] or x == functions_b_map[name]
363        replace_functions(goodfile, combined_file, picks)
364        return testrun(files)
365    return perform_test, choices
366
367
368def main():
369    parser = argparse.ArgumentParser()
370    parser.add_argument('--a', dest='dir_a', default='before')
371    parser.add_argument('--b', dest='dir_b', default='after')
372    parser.add_argument('--rsp', default=None)
373    parser.add_argument('--test', default='./link_test')
374    parser.add_argument('--insane', help='Skip sanity check',
375                        action='store_true')
376    parser.add_argument('--seq',
377                        help='Check sequentially instead of bisection',
378                        action='store_true')
379    parser.add_argument('file', metavar='file', nargs='?')
380    config = parser.parse_args()
381
382    gooddir = config.dir_a
383    baddir = config.dir_b
384    rspfile = config.rsp
385    global LINKTEST
386    LINKTEST = config.test
387
388    # Preparation phase: Creates a dictionary mapping names to a list of two
389    # choices each. The bisection algorithm will pick one choice for each name
390    # and then run the perform_test function on it.
391    if config.file is not None:
392        goodfile = gooddir + "/" + config.file
393        badfile = baddir + "/" + config.file
394        perform_test, choices = prepare_functions(config.file, gooddir,
395                                                  goodfile, badfile)
396    else:
397        perform_test, choices = prepare_files(gooddir, baddir, rspfile)
398
399    info("%d bisection choices" % len(choices))
400
401    # "Checking whether build environment is sane ..."
402    if not config.insane:
403        if not os.access(LINKTEST, os.X_OK):
404            error("Expect '%s' to be present and executable" % (LINKTEST,))
405            exit(1)
406
407        check_sanity(choices, perform_test)
408
409    if config.seq:
410        known_good = check_sequentially(choices, perform_test)
411    else:
412        known_good = check_bisect(choices, perform_test)
413
414    stderr.write("")
415    if len(known_good) != len(choices):
416        stderr.write("== Failing ==\n")
417        for name, _ in choices:
418            if name not in known_good:
419                stderr.write("%s\n" % name)
420    else:
421        # This shouldn't happen when the sanity check works...
422        # Maybe link_test isn't deterministic?
423        stderr.write("Could not identify failing parts?!?")
424
425
426if __name__ == '__main__':
427    main()
428