1#!/usr/bin/env python3
2
3#
4# This Source Code Form is subject to the terms of the Mozilla Public
5# License, v. 2.0. If a copy of the MPL was not distributed with this
6# file, You can obtain one at http://mozilla.org/MPL/2.0/.
7
8"""
9Runs the static rooting analysis
10"""
11
12from subprocess import Popen
13import argparse
14import os
15import subprocess
16import sys
17
18try:
19    from shlex import quote
20except ImportError:
21    from pipes import quote
22
23
24def execfile(thefile, globals):
25    exec(compile(open(thefile).read(), filename=thefile, mode="exec"), globals)
26
27
28# Label a string as an output.
29class Output(str):
30    pass
31
32
33# Label a string as a pattern for multiple inputs.
34class MultiInput(str):
35    pass
36
37
38def env(config):
39    e = dict(os.environ)
40    e["PATH"] = ":".join(
41        p for p in (config.get("gcc_bin"), config.get("sixgill_bin"), e["PATH"]) if p
42    )
43    e["XDB"] = "%(sixgill_bin)s/xdb.so" % config
44    e["SOURCE"] = config["source"]
45    e["ANALYZED_OBJDIR"] = config["objdir"]
46    return e
47
48
49def fill(command, config):
50    filled = []
51    for s in command:
52        try:
53            rep = s.format(**config)
54        except KeyError:
55            print("Substitution failed: %s" % s)
56            filled = None
57            break
58
59        if isinstance(s, Output):
60            filled.append(Output(rep))
61        elif isinstance(s, MultiInput):
62            N = int(config["jobs"])
63            for i in range(1, N + 1):
64                filled.append(rep.format(i=i, n=N))
65        else:
66            filled.append(rep)
67
68    if filled is None:
69        raise Exception("substitution failure")
70
71    return tuple(filled)
72
73
74def print_command(command, outfile=None, env=None):
75    output = " ".join(quote(s) for s in command)
76    if outfile:
77        output += " > " + outfile
78    if env:
79        changed = {}
80        e = os.environ
81        for key, value in env.items():
82            if (key not in e) or (e[key] != value):
83                changed[key] = value
84        if changed:
85            outputs = []
86            for key, value in changed.items():
87                if key in e and e[key] in value:
88                    start = value.index(e[key])
89                    end = start + len(e[key])
90                    outputs.append(
91                        '%s="%s${%s}%s"' % (key, value[:start], key, value[end:])
92                    )
93                else:
94                    outputs.append("%s='%s'" % (key, value))
95            output = " ".join(outputs) + " " + output
96
97    print(output)
98
99
100JOBS = {
101    "dbs": {
102        "command": [
103            "{analysis_scriptdir}/run_complete",
104            "--foreground",
105            "--no-logs",
106            "--build-root={objdir}",
107            "--wrap-dir={sixgill}/scripts/wrap_gcc",
108            "--work-dir=work",
109            "-b",
110            "{sixgill_bin}",
111            "--buildcommand={buildcommand}",
112            ".",
113        ],
114        "outputs": [],
115    },
116    "list-dbs": {"command": ["ls", "-l"]},
117    "rawcalls": {
118        "command": [
119            "{js}",
120            "{analysis_scriptdir}/computeCallgraph.js",
121            "{typeInfo}",
122            Output("rawcalls"),
123            Output("rawEdges"),
124            "{i}",
125            "{n}",
126        ],
127        "multi-output": True,
128        "outputs": ["rawcalls.{i}.of.{n}", "gcEdges.{i}.of.{n}"],
129    },
130    "mergeJSON": {
131        "command": [
132            "{js}",
133            "{analysis_scriptdir}/mergeJSON.js",
134            MultiInput("{rawEdges}"),
135            Output("gcEdges"),
136        ],
137        "outputs": ["gcEdges.json"],
138    },
139    "gcFunctions": {
140        "command": [
141            "{js}",
142            "{analysis_scriptdir}/computeGCFunctions.js",
143            MultiInput("{rawcalls}"),
144            "--outputs",
145            Output("callgraph"),
146            Output("gcFunctions"),
147            Output("gcFunctions_list"),
148            Output("limitedFunctions_list"),
149        ],
150        "outputs": [
151            "callgraph.txt",
152            "gcFunctions.txt",
153            "gcFunctions.lst",
154            "limitedFunctions.lst",
155        ],
156    },
157    "gcTypes": {
158        "command": [
159            "{js}",
160            "{analysis_scriptdir}/computeGCTypes.js",
161            Output("gcTypes"),
162            Output("typeInfo"),
163        ],
164        "outputs": ["gcTypes.txt", "typeInfo.txt"],
165    },
166    "allFunctions": {
167        "command": ["{sixgill_bin}/xdbkeys", "src_body.xdb"],
168        "redirect-output": "allFunctions.txt",
169    },
170    "hazards": {
171        "command": [
172            "{js}",
173            "{analysis_scriptdir}/analyzeRoots.js",
174            "{gcFunctions_list}",
175            "{gcEdges}",
176            "{limitedFunctions_list}",
177            "{gcTypes}",
178            "{typeInfo}",
179            "{i}",
180            "{n}",
181            "tmp.{i}.of.{n}",
182        ],
183        "multi-output": True,
184        "redirect-output": "rootingHazards.{i}.of.{n}",
185    },
186    "gather-hazards": {
187        "command": ["cat", MultiInput("{hazards}")],
188        "redirect-output": "rootingHazards.txt",
189    },
190    "explain": {
191        "command": [
192            sys.executable,
193            "{analysis_scriptdir}/explain.py",
194            "{gather-hazards}",
195            "{gcFunctions}",
196            Output("explained_hazards"),
197            Output("unnecessary"),
198            Output("refs"),
199        ],
200        "outputs": ["hazards.txt", "unnecessary.txt", "refs.txt"],
201    },
202    "heapwrites": {
203        "command": ["{js}", "{analysis_scriptdir}/analyzeHeapWrites.js"],
204        "redirect-output": "heapWriteHazards.txt",
205    },
206}
207
208
209# Generator of (i, j, item) tuples:
210#  - i is just the index of the yielded tuple (a la enumerate())
211#  - j is the index of the item in the command list
212#  - item is command[j]
213def out_indexes(command):
214    i = 0
215    for (j, fragment) in enumerate(command):
216        if isinstance(fragment, Output):
217            yield (i, j, fragment)
218            i += 1
219
220
221def run_job(name, config):
222    job = JOBS[name]
223    outs = job.get("outputs") or job.get("redirect-output")
224    print("Running " + name + " to generate " + str(outs))
225    if "function" in job:
226        job["function"](config, job["redirect-output"])
227        return
228
229    N = int(config["jobs"]) if job.get("multi-output") else 1
230    config["n"] = N
231    jobs = {}
232    for i in range(1, N + 1):
233        config["i"] = i
234        cmd = fill(job["command"], config)
235        info = spawn_command(cmd, job, name, config)
236        jobs[info["proc"].pid] = info
237
238    final_status = 0
239    while jobs:
240        pid, status = os.wait()
241        final_status = final_status or status
242        info = jobs[pid]
243        del jobs[pid]
244        if "redirect" in info:
245            info["redirect"].close()
246
247        # Rename the temporary files to their final names.
248        for (temp, final) in info["rename_map"].items():
249            try:
250                if config["verbose"]:
251                    print("Renaming %s -> %s" % (temp, final))
252                os.rename(temp, final)
253            except OSError:
254                print("Error renaming %s -> %s" % (temp, final))
255                raise
256
257    if final_status != 0:
258        raise Exception("job {} returned status {}".format(name, final_status))
259
260
261def spawn_command(cmdspec, job, name, config):
262    rename_map = {}
263
264    if "redirect-output" in job:
265        stdout_filename = "{}.tmp{}".format(name, config.get("i", ""))
266        final_outfile = job["redirect-output"].format(**config)
267        rename_map[stdout_filename] = final_outfile
268        command = cmdspec
269        if config["verbose"]:
270            print_command(cmdspec, outfile=final_outfile, env=env(config))
271    else:
272        outfiles = job["outputs"]
273        outfiles = fill(outfiles, config)
274        stdout_filename = None
275
276        # To print the supposedly-executed command, replace the Outputs in the
277        # command with final output file names. (The actual command will be
278        # using temporary files that get renamed at the end.)
279        if config["verbose"]:
280            pc = list(cmdspec)
281            for (i, j, name) in out_indexes(cmdspec):
282                pc[j] = outfiles[i]
283            print_command(pc, env=env(config))
284
285        # Replace the Outputs with temporary filenames, and record a mapping
286        # from those temp names to their actual final names that will be used
287        # if the command succeeds.
288        command = list(cmdspec)
289        for (i, j, name) in out_indexes(cmdspec):
290            command[j] = "{}.tmp{}".format(name, config.get("i", ""))
291            rename_map[command[j]] = outfiles[i]
292
293    sys.stdout.flush()
294    info = {"rename_map": rename_map}
295    if stdout_filename:
296        info["redirect"] = open(stdout_filename, "w")
297        info["proc"] = Popen(command, stdout=info["redirect"], env=env(config))
298    else:
299        info["proc"] = Popen(command, env=env(config))
300
301    if config["verbose"]:
302        print("Spawned process {}".format(info["proc"].pid))
303
304    return info
305
306
307# Default to conservatively assuming 4GB/job.
308def max_parallel_jobs(job_size=4 * 2 ** 30):
309    """Return the max number of parallel jobs we can run without overfilling
310    memory, assuming heavyweight jobs."""
311    from_cores = int(subprocess.check_output(["nproc", "--ignore=1"]).strip())
312    mem_bytes = os.sysconf("SC_PAGE_SIZE") * os.sysconf("SC_PHYS_PAGES")
313    from_mem = round(mem_bytes / job_size)
314    return min(from_cores, from_mem)
315
316
317config = {"analysis_scriptdir": os.path.dirname(__file__)}
318
319defaults = [
320    "%s/defaults.py" % config["analysis_scriptdir"],
321    "%s/defaults.py" % os.getcwd(),
322]
323
324parser = argparse.ArgumentParser(
325    description="Statically analyze build tree for rooting hazards."
326)
327parser.add_argument(
328    "step", metavar="STEP", type=str, nargs="?", help="run only step STEP"
329)
330parser.add_argument(
331    "--source", metavar="SOURCE", type=str, nargs="?", help="source code to analyze"
332)
333parser.add_argument(
334    "--objdir",
335    metavar="DIR",
336    type=str,
337    nargs="?",
338    help="object directory of compiled files",
339)
340parser.add_argument(
341    "--js",
342    metavar="JSSHELL",
343    type=str,
344    nargs="?",
345    help="full path to ctypes-capable JS shell",
346)
347parser.add_argument(
348    "--first",
349    metavar="STEP",
350    type=str,
351    nargs="?",
352    help="execute all jobs starting with STEP",
353)
354parser.add_argument(
355    "--last", metavar="STEP", type=str, nargs="?", help="stop at step STEP"
356)
357parser.add_argument(
358    "--jobs",
359    "-j",
360    default=None,
361    metavar="JOBS",
362    type=int,
363    help="number of simultaneous analyzeRoots.js jobs",
364)
365parser.add_argument(
366    "--list", const=True, nargs="?", type=bool, help="display available steps"
367)
368parser.add_argument(
369    "--buildcommand",
370    "--build",
371    "-b",
372    type=str,
373    nargs="?",
374    help="command to build the tree being analyzed",
375)
376parser.add_argument(
377    "--tag",
378    "-t",
379    type=str,
380    nargs="?",
381    help='name of job, also sets build command to "build.<tag>"',
382)
383parser.add_argument(
384    "--expect-file",
385    type=str,
386    nargs="?",
387    help="deprecated option, temporarily still present for backwards " "compatibility",
388)
389parser.add_argument(
390    "--verbose",
391    "-v",
392    action="count",
393    default=1,
394    help="Display cut & paste commands to run individual steps",
395)
396parser.add_argument("--quiet", "-q", action="count", default=0, help="Suppress output")
397
398args = parser.parse_args()
399args.verbose = max(0, args.verbose - args.quiet)
400
401for default in defaults:
402    try:
403        execfile(default, config)
404        if args.verbose:
405            print("Loaded %s" % default)
406    except Exception:
407        pass
408
409data = config.copy()
410
411for k, v in vars(args).items():
412    if v is not None:
413        data[k] = v
414
415if args.tag and not args.buildcommand:
416    args.buildcommand = "build.%s" % args.tag
417
418if args.jobs is not None:
419    data["jobs"] = args.jobs
420if not data.get("jobs"):
421    data["jobs"] = max_parallel_jobs()
422
423if args.buildcommand:
424    data["buildcommand"] = args.buildcommand
425elif "BUILD" in os.environ:
426    data["buildcommand"] = os.environ["BUILD"]
427else:
428    data["buildcommand"] = "make -j{} -s".format(data["jobs"])
429
430if "ANALYZED_OBJDIR" in os.environ:
431    data["objdir"] = os.environ["ANALYZED_OBJDIR"]
432
433if "GECKO_PATH" in os.environ:
434    data["source"] = os.environ["GECKO_PATH"]
435if "SOURCE" in os.environ:
436    data["source"] = os.environ["SOURCE"]
437
438steps = [
439    "dbs",
440    "gcTypes",
441    "rawcalls",
442    "gcFunctions",
443    "mergeJSON",
444    "allFunctions",
445    "hazards",
446    "gather-hazards",
447    "explain",
448    "heapwrites",
449]
450
451if args.list:
452    for step in steps:
453        job = JOBS[step]
454        outfiles = job.get("outputs") or job.get("redirect-output")
455        if outfiles:
456            print(
457                "%s\n    ->%s %s"
458                % (step, "*" if job.get("multi-output") else "", outfiles)
459            )
460        else:
461            print(step)
462    sys.exit(0)
463
464for step in steps:
465    job = JOBS[step]
466    if "redirect-output" in job:
467        data[step] = job["redirect-output"]
468    elif "outputs" in job and "command" in job:
469        outfiles = job["outputs"]
470        for (i, j, name) in out_indexes(job["command"]):
471            data[name] = outfiles[i]
472        num_outputs = len(list(out_indexes(job["command"])))
473        assert (
474            len(outfiles) == num_outputs
475        ), 'step "%s": mismatched number of output files (%d) and params (%d)' % (
476            step,
477            num_outputs,
478            len(outfiles),
479        )  # NOQA: E501
480
481if args.step:
482    if args.first or args.last:
483        raise Exception(
484            "--first and --last cannot be used when a step argument is given"
485        )
486    steps = [args.step]
487else:
488    if args.first:
489        steps = steps[steps.index(args.first) :]
490    if args.last:
491        steps = steps[: steps.index(args.last) + 1]
492
493for step in steps:
494    run_job(step, data)
495