1#!/usr/bin/env python3 2 3# 4# This Source Code Form is subject to the terms of the Mozilla Public 5# License, v. 2.0. If a copy of the MPL was not distributed with this 6# file, You can obtain one at http://mozilla.org/MPL/2.0/. 7 8""" 9Runs the static rooting analysis 10""" 11 12from subprocess import Popen 13import argparse 14import os 15import subprocess 16import sys 17 18try: 19 from shlex import quote 20except ImportError: 21 from pipes import quote 22 23 24def execfile(thefile, globals): 25 exec(compile(open(thefile).read(), filename=thefile, mode="exec"), globals) 26 27 28# Label a string as an output. 29class Output(str): 30 pass 31 32 33# Label a string as a pattern for multiple inputs. 34class MultiInput(str): 35 pass 36 37 38def env(config): 39 e = dict(os.environ) 40 e["PATH"] = ":".join( 41 p for p in (config.get("gcc_bin"), config.get("sixgill_bin"), e["PATH"]) if p 42 ) 43 e["XDB"] = "%(sixgill_bin)s/xdb.so" % config 44 e["SOURCE"] = config["source"] 45 e["ANALYZED_OBJDIR"] = config["objdir"] 46 return e 47 48 49def fill(command, config): 50 filled = [] 51 for s in command: 52 try: 53 rep = s.format(**config) 54 except KeyError: 55 print("Substitution failed: %s" % s) 56 filled = None 57 break 58 59 if isinstance(s, Output): 60 filled.append(Output(rep)) 61 elif isinstance(s, MultiInput): 62 N = int(config["jobs"]) 63 for i in range(1, N + 1): 64 filled.append(rep.format(i=i, n=N)) 65 else: 66 filled.append(rep) 67 68 if filled is None: 69 raise Exception("substitution failure") 70 71 return tuple(filled) 72 73 74def print_command(command, outfile=None, env=None): 75 output = " ".join(quote(s) for s in command) 76 if outfile: 77 output += " > " + outfile 78 if env: 79 changed = {} 80 e = os.environ 81 for key, value in env.items(): 82 if (key not in e) or (e[key] != value): 83 changed[key] = value 84 if changed: 85 outputs = [] 86 for key, value in changed.items(): 87 if key in e and e[key] in value: 88 start = value.index(e[key]) 89 end = start + len(e[key]) 90 outputs.append( 91 '%s="%s${%s}%s"' % (key, value[:start], key, value[end:]) 92 ) 93 else: 94 outputs.append("%s='%s'" % (key, value)) 95 output = " ".join(outputs) + " " + output 96 97 print(output) 98 99 100JOBS = { 101 "dbs": { 102 "command": [ 103 "{analysis_scriptdir}/run_complete", 104 "--foreground", 105 "--no-logs", 106 "--build-root={objdir}", 107 "--wrap-dir={sixgill}/scripts/wrap_gcc", 108 "--work-dir=work", 109 "-b", 110 "{sixgill_bin}", 111 "--buildcommand={buildcommand}", 112 ".", 113 ], 114 "outputs": [], 115 }, 116 "list-dbs": {"command": ["ls", "-l"]}, 117 "rawcalls": { 118 "command": [ 119 "{js}", 120 "{analysis_scriptdir}/computeCallgraph.js", 121 "{typeInfo}", 122 Output("rawcalls"), 123 Output("rawEdges"), 124 "{i}", 125 "{n}", 126 ], 127 "multi-output": True, 128 "outputs": ["rawcalls.{i}.of.{n}", "gcEdges.{i}.of.{n}"], 129 }, 130 "mergeJSON": { 131 "command": [ 132 "{js}", 133 "{analysis_scriptdir}/mergeJSON.js", 134 MultiInput("{rawEdges}"), 135 Output("gcEdges"), 136 ], 137 "outputs": ["gcEdges.json"], 138 }, 139 "gcFunctions": { 140 "command": [ 141 "{js}", 142 "{analysis_scriptdir}/computeGCFunctions.js", 143 MultiInput("{rawcalls}"), 144 "--outputs", 145 Output("callgraph"), 146 Output("gcFunctions"), 147 Output("gcFunctions_list"), 148 Output("limitedFunctions_list"), 149 ], 150 "outputs": [ 151 "callgraph.txt", 152 "gcFunctions.txt", 153 "gcFunctions.lst", 154 "limitedFunctions.lst", 155 ], 156 }, 157 "gcTypes": { 158 "command": [ 159 "{js}", 160 "{analysis_scriptdir}/computeGCTypes.js", 161 Output("gcTypes"), 162 Output("typeInfo"), 163 ], 164 "outputs": ["gcTypes.txt", "typeInfo.txt"], 165 }, 166 "allFunctions": { 167 "command": ["{sixgill_bin}/xdbkeys", "src_body.xdb"], 168 "redirect-output": "allFunctions.txt", 169 }, 170 "hazards": { 171 "command": [ 172 "{js}", 173 "{analysis_scriptdir}/analyzeRoots.js", 174 "{gcFunctions_list}", 175 "{gcEdges}", 176 "{limitedFunctions_list}", 177 "{gcTypes}", 178 "{typeInfo}", 179 "{i}", 180 "{n}", 181 "tmp.{i}.of.{n}", 182 ], 183 "multi-output": True, 184 "redirect-output": "rootingHazards.{i}.of.{n}", 185 }, 186 "gather-hazards": { 187 "command": ["cat", MultiInput("{hazards}")], 188 "redirect-output": "rootingHazards.txt", 189 }, 190 "explain": { 191 "command": [ 192 sys.executable, 193 "{analysis_scriptdir}/explain.py", 194 "{gather-hazards}", 195 "{gcFunctions}", 196 Output("explained_hazards"), 197 Output("unnecessary"), 198 Output("refs"), 199 ], 200 "outputs": ["hazards.txt", "unnecessary.txt", "refs.txt"], 201 }, 202 "heapwrites": { 203 "command": ["{js}", "{analysis_scriptdir}/analyzeHeapWrites.js"], 204 "redirect-output": "heapWriteHazards.txt", 205 }, 206} 207 208 209# Generator of (i, j, item) tuples: 210# - i is just the index of the yielded tuple (a la enumerate()) 211# - j is the index of the item in the command list 212# - item is command[j] 213def out_indexes(command): 214 i = 0 215 for (j, fragment) in enumerate(command): 216 if isinstance(fragment, Output): 217 yield (i, j, fragment) 218 i += 1 219 220 221def run_job(name, config): 222 job = JOBS[name] 223 outs = job.get("outputs") or job.get("redirect-output") 224 print("Running " + name + " to generate " + str(outs)) 225 if "function" in job: 226 job["function"](config, job["redirect-output"]) 227 return 228 229 N = int(config["jobs"]) if job.get("multi-output") else 1 230 config["n"] = N 231 jobs = {} 232 for i in range(1, N + 1): 233 config["i"] = i 234 cmd = fill(job["command"], config) 235 info = spawn_command(cmd, job, name, config) 236 jobs[info["proc"].pid] = info 237 238 final_status = 0 239 while jobs: 240 pid, status = os.wait() 241 final_status = final_status or status 242 info = jobs[pid] 243 del jobs[pid] 244 if "redirect" in info: 245 info["redirect"].close() 246 247 # Rename the temporary files to their final names. 248 for (temp, final) in info["rename_map"].items(): 249 try: 250 if config["verbose"]: 251 print("Renaming %s -> %s" % (temp, final)) 252 os.rename(temp, final) 253 except OSError: 254 print("Error renaming %s -> %s" % (temp, final)) 255 raise 256 257 if final_status != 0: 258 raise Exception("job {} returned status {}".format(name, final_status)) 259 260 261def spawn_command(cmdspec, job, name, config): 262 rename_map = {} 263 264 if "redirect-output" in job: 265 stdout_filename = "{}.tmp{}".format(name, config.get("i", "")) 266 final_outfile = job["redirect-output"].format(**config) 267 rename_map[stdout_filename] = final_outfile 268 command = cmdspec 269 if config["verbose"]: 270 print_command(cmdspec, outfile=final_outfile, env=env(config)) 271 else: 272 outfiles = job["outputs"] 273 outfiles = fill(outfiles, config) 274 stdout_filename = None 275 276 # To print the supposedly-executed command, replace the Outputs in the 277 # command with final output file names. (The actual command will be 278 # using temporary files that get renamed at the end.) 279 if config["verbose"]: 280 pc = list(cmdspec) 281 for (i, j, name) in out_indexes(cmdspec): 282 pc[j] = outfiles[i] 283 print_command(pc, env=env(config)) 284 285 # Replace the Outputs with temporary filenames, and record a mapping 286 # from those temp names to their actual final names that will be used 287 # if the command succeeds. 288 command = list(cmdspec) 289 for (i, j, name) in out_indexes(cmdspec): 290 command[j] = "{}.tmp{}".format(name, config.get("i", "")) 291 rename_map[command[j]] = outfiles[i] 292 293 sys.stdout.flush() 294 info = {"rename_map": rename_map} 295 if stdout_filename: 296 info["redirect"] = open(stdout_filename, "w") 297 info["proc"] = Popen(command, stdout=info["redirect"], env=env(config)) 298 else: 299 info["proc"] = Popen(command, env=env(config)) 300 301 if config["verbose"]: 302 print("Spawned process {}".format(info["proc"].pid)) 303 304 return info 305 306 307# Default to conservatively assuming 4GB/job. 308def max_parallel_jobs(job_size=4 * 2 ** 30): 309 """Return the max number of parallel jobs we can run without overfilling 310 memory, assuming heavyweight jobs.""" 311 from_cores = int(subprocess.check_output(["nproc", "--ignore=1"]).strip()) 312 mem_bytes = os.sysconf("SC_PAGE_SIZE") * os.sysconf("SC_PHYS_PAGES") 313 from_mem = round(mem_bytes / job_size) 314 return min(from_cores, from_mem) 315 316 317config = {"analysis_scriptdir": os.path.dirname(__file__)} 318 319defaults = [ 320 "%s/defaults.py" % config["analysis_scriptdir"], 321 "%s/defaults.py" % os.getcwd(), 322] 323 324parser = argparse.ArgumentParser( 325 description="Statically analyze build tree for rooting hazards." 326) 327parser.add_argument( 328 "step", metavar="STEP", type=str, nargs="?", help="run only step STEP" 329) 330parser.add_argument( 331 "--source", metavar="SOURCE", type=str, nargs="?", help="source code to analyze" 332) 333parser.add_argument( 334 "--objdir", 335 metavar="DIR", 336 type=str, 337 nargs="?", 338 help="object directory of compiled files", 339) 340parser.add_argument( 341 "--js", 342 metavar="JSSHELL", 343 type=str, 344 nargs="?", 345 help="full path to ctypes-capable JS shell", 346) 347parser.add_argument( 348 "--first", 349 metavar="STEP", 350 type=str, 351 nargs="?", 352 help="execute all jobs starting with STEP", 353) 354parser.add_argument( 355 "--last", metavar="STEP", type=str, nargs="?", help="stop at step STEP" 356) 357parser.add_argument( 358 "--jobs", 359 "-j", 360 default=None, 361 metavar="JOBS", 362 type=int, 363 help="number of simultaneous analyzeRoots.js jobs", 364) 365parser.add_argument( 366 "--list", const=True, nargs="?", type=bool, help="display available steps" 367) 368parser.add_argument( 369 "--buildcommand", 370 "--build", 371 "-b", 372 type=str, 373 nargs="?", 374 help="command to build the tree being analyzed", 375) 376parser.add_argument( 377 "--tag", 378 "-t", 379 type=str, 380 nargs="?", 381 help='name of job, also sets build command to "build.<tag>"', 382) 383parser.add_argument( 384 "--expect-file", 385 type=str, 386 nargs="?", 387 help="deprecated option, temporarily still present for backwards " "compatibility", 388) 389parser.add_argument( 390 "--verbose", 391 "-v", 392 action="count", 393 default=1, 394 help="Display cut & paste commands to run individual steps", 395) 396parser.add_argument("--quiet", "-q", action="count", default=0, help="Suppress output") 397 398args = parser.parse_args() 399args.verbose = max(0, args.verbose - args.quiet) 400 401for default in defaults: 402 try: 403 execfile(default, config) 404 if args.verbose: 405 print("Loaded %s" % default) 406 except Exception: 407 pass 408 409data = config.copy() 410 411for k, v in vars(args).items(): 412 if v is not None: 413 data[k] = v 414 415if args.tag and not args.buildcommand: 416 args.buildcommand = "build.%s" % args.tag 417 418if args.jobs is not None: 419 data["jobs"] = args.jobs 420if not data.get("jobs"): 421 data["jobs"] = max_parallel_jobs() 422 423if args.buildcommand: 424 data["buildcommand"] = args.buildcommand 425elif "BUILD" in os.environ: 426 data["buildcommand"] = os.environ["BUILD"] 427else: 428 data["buildcommand"] = "make -j{} -s".format(data["jobs"]) 429 430if "ANALYZED_OBJDIR" in os.environ: 431 data["objdir"] = os.environ["ANALYZED_OBJDIR"] 432 433if "GECKO_PATH" in os.environ: 434 data["source"] = os.environ["GECKO_PATH"] 435if "SOURCE" in os.environ: 436 data["source"] = os.environ["SOURCE"] 437 438steps = [ 439 "dbs", 440 "gcTypes", 441 "rawcalls", 442 "gcFunctions", 443 "mergeJSON", 444 "allFunctions", 445 "hazards", 446 "gather-hazards", 447 "explain", 448 "heapwrites", 449] 450 451if args.list: 452 for step in steps: 453 job = JOBS[step] 454 outfiles = job.get("outputs") or job.get("redirect-output") 455 if outfiles: 456 print( 457 "%s\n ->%s %s" 458 % (step, "*" if job.get("multi-output") else "", outfiles) 459 ) 460 else: 461 print(step) 462 sys.exit(0) 463 464for step in steps: 465 job = JOBS[step] 466 if "redirect-output" in job: 467 data[step] = job["redirect-output"] 468 elif "outputs" in job and "command" in job: 469 outfiles = job["outputs"] 470 for (i, j, name) in out_indexes(job["command"]): 471 data[name] = outfiles[i] 472 num_outputs = len(list(out_indexes(job["command"]))) 473 assert ( 474 len(outfiles) == num_outputs 475 ), 'step "%s": mismatched number of output files (%d) and params (%d)' % ( 476 step, 477 num_outputs, 478 len(outfiles), 479 ) # NOQA: E501 480 481if args.step: 482 if args.first or args.last: 483 raise Exception( 484 "--first and --last cannot be used when a step argument is given" 485 ) 486 steps = [args.step] 487else: 488 if args.first: 489 steps = steps[steps.index(args.first) :] 490 if args.last: 491 steps = steps[: steps.index(args.last) + 1] 492 493for step in steps: 494 run_job(step, data) 495