1# Licensed under the Apache License: http://www.apache.org/licenses/LICENSE-2.0 2# For details: https://github.com/nedbat/coveragepy/blob/master/NOTICE.txt 3 4"""Code parsing for coverage.py.""" 5 6import ast 7import collections 8import os 9import re 10import token 11import tokenize 12 13from coverage import env 14from coverage.backward import range # pylint: disable=redefined-builtin 15from coverage.backward import bytes_to_ints, string_class 16from coverage.bytecode import code_objects 17from coverage.debug import short_stack 18from coverage.misc import contract, join_regex, new_contract, nice_pair, one_of 19from coverage.misc import NoSource, NotPython, StopEverything 20from coverage.phystokens import compile_unicode, generate_tokens, neuter_encoding_declaration 21 22 23class PythonParser(object): 24 """Parse code to find executable lines, excluded lines, etc. 25 26 This information is all based on static analysis: no code execution is 27 involved. 28 29 """ 30 @contract(text='unicode|None') 31 def __init__(self, text=None, filename=None, exclude=None): 32 """ 33 Source can be provided as `text`, the text itself, or `filename`, from 34 which the text will be read. Excluded lines are those that match 35 `exclude`, a regex. 36 37 """ 38 assert text or filename, "PythonParser needs either text or filename" 39 self.filename = filename or "<code>" 40 self.text = text 41 if not self.text: 42 from coverage.python import get_python_source 43 try: 44 self.text = get_python_source(self.filename) 45 except IOError as err: 46 raise NoSource( 47 "No source for code: '%s': %s" % (self.filename, err) 48 ) 49 50 self.exclude = exclude 51 52 # The text lines of the parsed code. 53 self.lines = self.text.split('\n') 54 55 # The normalized line numbers of the statements in the code. Exclusions 56 # are taken into account, and statements are adjusted to their first 57 # lines. 58 self.statements = set() 59 60 # The normalized line numbers of the excluded lines in the code, 61 # adjusted to their first lines. 62 self.excluded = set() 63 64 # The raw_* attributes are only used in this class, and in 65 # lab/parser.py to show how this class is working. 66 67 # The line numbers that start statements, as reported by the line 68 # number table in the bytecode. 69 self.raw_statements = set() 70 71 # The raw line numbers of excluded lines of code, as marked by pragmas. 72 self.raw_excluded = set() 73 74 # The line numbers of class and function definitions. 75 self.raw_classdefs = set() 76 77 # The line numbers of docstring lines. 78 self.raw_docstrings = set() 79 80 # Internal detail, used by lab/parser.py. 81 self.show_tokens = False 82 83 # A dict mapping line numbers to lexical statement starts for 84 # multi-line statements. 85 self._multiline = {} 86 87 # Lazily-created ByteParser, arc data, and missing arc descriptions. 88 self._byte_parser = None 89 self._all_arcs = None 90 self._missing_arc_fragments = None 91 92 @property 93 def byte_parser(self): 94 """Create a ByteParser on demand.""" 95 if not self._byte_parser: 96 self._byte_parser = ByteParser(self.text, filename=self.filename) 97 return self._byte_parser 98 99 def lines_matching(self, *regexes): 100 """Find the lines matching one of a list of regexes. 101 102 Returns a set of line numbers, the lines that contain a match for one 103 of the regexes in `regexes`. The entire line needn't match, just a 104 part of it. 105 106 """ 107 combined = join_regex(regexes) 108 if env.PY2: 109 combined = combined.decode("utf8") 110 regex_c = re.compile(combined) 111 matches = set() 112 for i, ltext in enumerate(self.lines, start=1): 113 if regex_c.search(ltext): 114 matches.add(i) 115 return matches 116 117 def _raw_parse(self): 118 """Parse the source to find the interesting facts about its lines. 119 120 A handful of attributes are updated. 121 122 """ 123 # Find lines which match an exclusion pattern. 124 if self.exclude: 125 self.raw_excluded = self.lines_matching(self.exclude) 126 127 # Tokenize, to find excluded suites, to find docstrings, and to find 128 # multi-line statements. 129 indent = 0 130 exclude_indent = 0 131 excluding = False 132 excluding_decorators = False 133 prev_toktype = token.INDENT 134 first_line = None 135 empty = True 136 first_on_line = True 137 138 tokgen = generate_tokens(self.text) 139 for toktype, ttext, (slineno, _), (elineno, _), ltext in tokgen: 140 if self.show_tokens: # pragma: debugging 141 print("%10s %5s %-20r %r" % ( 142 tokenize.tok_name.get(toktype, toktype), 143 nice_pair((slineno, elineno)), ttext, ltext 144 )) 145 if toktype == token.INDENT: 146 indent += 1 147 elif toktype == token.DEDENT: 148 indent -= 1 149 elif toktype == token.NAME: 150 if ttext == 'class': 151 # Class definitions look like branches in the bytecode, so 152 # we need to exclude them. The simplest way is to note the 153 # lines with the 'class' keyword. 154 self.raw_classdefs.add(slineno) 155 elif toktype == token.OP: 156 if ttext == ':': 157 should_exclude = (elineno in self.raw_excluded) or excluding_decorators 158 if not excluding and should_exclude: 159 # Start excluding a suite. We trigger off of the colon 160 # token so that the #pragma comment will be recognized on 161 # the same line as the colon. 162 self.raw_excluded.add(elineno) 163 exclude_indent = indent 164 excluding = True 165 excluding_decorators = False 166 elif ttext == '@' and first_on_line: 167 # A decorator. 168 if elineno in self.raw_excluded: 169 excluding_decorators = True 170 if excluding_decorators: 171 self.raw_excluded.add(elineno) 172 elif toktype == token.STRING and prev_toktype == token.INDENT: 173 # Strings that are first on an indented line are docstrings. 174 # (a trick from trace.py in the stdlib.) This works for 175 # 99.9999% of cases. For the rest (!) see: 176 # http://stackoverflow.com/questions/1769332/x/1769794#1769794 177 self.raw_docstrings.update(range(slineno, elineno+1)) 178 elif toktype == token.NEWLINE: 179 if first_line is not None and elineno != first_line: 180 # We're at the end of a line, and we've ended on a 181 # different line than the first line of the statement, 182 # so record a multi-line range. 183 for l in range(first_line, elineno+1): 184 self._multiline[l] = first_line 185 first_line = None 186 first_on_line = True 187 188 if ttext.strip() and toktype != tokenize.COMMENT: 189 # A non-whitespace token. 190 empty = False 191 if first_line is None: 192 # The token is not whitespace, and is the first in a 193 # statement. 194 first_line = slineno 195 # Check whether to end an excluded suite. 196 if excluding and indent <= exclude_indent: 197 excluding = False 198 if excluding: 199 self.raw_excluded.add(elineno) 200 first_on_line = False 201 202 prev_toktype = toktype 203 204 # Find the starts of the executable statements. 205 if not empty: 206 self.raw_statements.update(self.byte_parser._find_statements()) 207 208 def first_line(self, line): 209 """Return the first line number of the statement including `line`.""" 210 if line < 0: 211 line = -self._multiline.get(-line, -line) 212 else: 213 line = self._multiline.get(line, line) 214 return line 215 216 def first_lines(self, lines): 217 """Map the line numbers in `lines` to the correct first line of the 218 statement. 219 220 Returns a set of the first lines. 221 222 """ 223 return set(self.first_line(l) for l in lines) 224 225 def translate_lines(self, lines): 226 """Implement `FileReporter.translate_lines`.""" 227 return self.first_lines(lines) 228 229 def translate_arcs(self, arcs): 230 """Implement `FileReporter.translate_arcs`.""" 231 return [(self.first_line(a), self.first_line(b)) for (a, b) in arcs] 232 233 def parse_source(self): 234 """Parse source text to find executable lines, excluded lines, etc. 235 236 Sets the .excluded and .statements attributes, normalized to the first 237 line of multi-line statements. 238 239 """ 240 try: 241 self._raw_parse() 242 except (tokenize.TokenError, IndentationError) as err: 243 if hasattr(err, "lineno"): 244 lineno = err.lineno # IndentationError 245 else: 246 lineno = err.args[1][0] # TokenError 247 raise NotPython( 248 u"Couldn't parse '%s' as Python source: '%s' at line %d" % ( 249 self.filename, err.args[0], lineno 250 ) 251 ) 252 253 self.excluded = self.first_lines(self.raw_excluded) 254 255 ignore = self.excluded | self.raw_docstrings 256 starts = self.raw_statements - ignore 257 self.statements = self.first_lines(starts) - ignore 258 259 def arcs(self): 260 """Get information about the arcs available in the code. 261 262 Returns a set of line number pairs. Line numbers have been normalized 263 to the first line of multi-line statements. 264 265 """ 266 if self._all_arcs is None: 267 self._analyze_ast() 268 return self._all_arcs 269 270 def _analyze_ast(self): 271 """Run the AstArcAnalyzer and save its results. 272 273 `_all_arcs` is the set of arcs in the code. 274 275 """ 276 aaa = AstArcAnalyzer(self.text, self.raw_statements, self._multiline) 277 aaa.analyze() 278 279 self._all_arcs = set() 280 for l1, l2 in aaa.arcs: 281 fl1 = self.first_line(l1) 282 fl2 = self.first_line(l2) 283 if fl1 != fl2: 284 self._all_arcs.add((fl1, fl2)) 285 286 self._missing_arc_fragments = aaa.missing_arc_fragments 287 288 def exit_counts(self): 289 """Get a count of exits from that each line. 290 291 Excluded lines are excluded. 292 293 """ 294 exit_counts = collections.defaultdict(int) 295 for l1, l2 in self.arcs(): 296 if l1 < 0: 297 # Don't ever report -1 as a line number 298 continue 299 if l1 in self.excluded: 300 # Don't report excluded lines as line numbers. 301 continue 302 if l2 in self.excluded: 303 # Arcs to excluded lines shouldn't count. 304 continue 305 exit_counts[l1] += 1 306 307 # Class definitions have one extra exit, so remove one for each: 308 for l in self.raw_classdefs: 309 # Ensure key is there: class definitions can include excluded lines. 310 if l in exit_counts: 311 exit_counts[l] -= 1 312 313 return exit_counts 314 315 def missing_arc_description(self, start, end, executed_arcs=None): 316 """Provide an English sentence describing a missing arc.""" 317 if self._missing_arc_fragments is None: 318 self._analyze_ast() 319 320 actual_start = start 321 322 if ( 323 executed_arcs and 324 end < 0 and end == -start and 325 (end, start) not in executed_arcs and 326 (end, start) in self._missing_arc_fragments 327 ): 328 # It's a one-line callable, and we never even started it, 329 # and we have a message about not starting it. 330 start, end = end, start 331 332 fragment_pairs = self._missing_arc_fragments.get((start, end), [(None, None)]) 333 334 msgs = [] 335 for fragment_pair in fragment_pairs: 336 smsg, emsg = fragment_pair 337 338 if emsg is None: 339 if end < 0: 340 # Hmm, maybe we have a one-line callable, let's check. 341 if (-end, end) in self._missing_arc_fragments: 342 return self.missing_arc_description(-end, end) 343 emsg = "didn't jump to the function exit" 344 else: 345 emsg = "didn't jump to line {lineno}" 346 emsg = emsg.format(lineno=end) 347 348 msg = "line {start} {emsg}".format(start=actual_start, emsg=emsg) 349 if smsg is not None: 350 msg += ", because {smsg}".format(smsg=smsg.format(lineno=actual_start)) 351 352 msgs.append(msg) 353 354 return " or ".join(msgs) 355 356 357class ByteParser(object): 358 """Parse bytecode to understand the structure of code.""" 359 360 @contract(text='unicode') 361 def __init__(self, text, code=None, filename=None): 362 self.text = text 363 if code: 364 self.code = code 365 else: 366 try: 367 self.code = compile_unicode(text, filename, "exec") 368 except SyntaxError as synerr: 369 raise NotPython( 370 u"Couldn't parse '%s' as Python source: '%s' at line %d" % ( 371 filename, synerr.msg, synerr.lineno 372 ) 373 ) 374 375 # Alternative Python implementations don't always provide all the 376 # attributes on code objects that we need to do the analysis. 377 for attr in ['co_lnotab', 'co_firstlineno']: 378 if not hasattr(self.code, attr): 379 raise StopEverything( # pragma: only jython 380 "This implementation of Python doesn't support code analysis.\n" 381 "Run coverage.py under another Python for this command." 382 ) 383 384 def child_parsers(self): 385 """Iterate over all the code objects nested within this one. 386 387 The iteration includes `self` as its first value. 388 389 """ 390 return (ByteParser(self.text, code=c) for c in code_objects(self.code)) 391 392 def _bytes_lines(self): 393 """Map byte offsets to line numbers in `code`. 394 395 Uses co_lnotab described in Python/compile.c to map byte offsets to 396 line numbers. Produces a sequence: (b0, l0), (b1, l1), ... 397 398 Only byte offsets that correspond to line numbers are included in the 399 results. 400 401 """ 402 # Adapted from dis.py in the standard library. 403 byte_increments = bytes_to_ints(self.code.co_lnotab[0::2]) 404 line_increments = bytes_to_ints(self.code.co_lnotab[1::2]) 405 406 last_line_num = None 407 line_num = self.code.co_firstlineno 408 byte_num = 0 409 for byte_incr, line_incr in zip(byte_increments, line_increments): 410 if byte_incr: 411 if line_num != last_line_num: 412 yield (byte_num, line_num) 413 last_line_num = line_num 414 byte_num += byte_incr 415 if env.PYBEHAVIOR.negative_lnotab and line_incr >= 0x80: 416 line_incr -= 0x100 417 line_num += line_incr 418 if line_num != last_line_num: 419 yield (byte_num, line_num) 420 421 def _find_statements(self): 422 """Find the statements in `self.code`. 423 424 Produce a sequence of line numbers that start statements. Recurses 425 into all code objects reachable from `self.code`. 426 427 """ 428 for bp in self.child_parsers(): 429 # Get all of the lineno information from this code. 430 for _, l in bp._bytes_lines(): 431 yield l 432 433 434# 435# AST analysis 436# 437 438class LoopBlock(object): 439 """A block on the block stack representing a `for` or `while` loop.""" 440 @contract(start=int) 441 def __init__(self, start): 442 # The line number where the loop starts. 443 self.start = start 444 # A set of ArcStarts, the arcs from break statements exiting this loop. 445 self.break_exits = set() 446 447 448class FunctionBlock(object): 449 """A block on the block stack representing a function definition.""" 450 @contract(start=int, name=str) 451 def __init__(self, start, name): 452 # The line number where the function starts. 453 self.start = start 454 # The name of the function. 455 self.name = name 456 457 458class TryBlock(object): 459 """A block on the block stack representing a `try` block.""" 460 @contract(handler_start='int|None', final_start='int|None') 461 def __init__(self, handler_start, final_start): 462 # The line number of the first "except" handler, if any. 463 self.handler_start = handler_start 464 # The line number of the "finally:" clause, if any. 465 self.final_start = final_start 466 467 # The ArcStarts for breaks/continues/returns/raises inside the "try:" 468 # that need to route through the "finally:" clause. 469 self.break_from = set() 470 self.continue_from = set() 471 self.return_from = set() 472 self.raise_from = set() 473 474 475class ArcStart(collections.namedtuple("Arc", "lineno, cause")): 476 """The information needed to start an arc. 477 478 `lineno` is the line number the arc starts from. 479 480 `cause` is an English text fragment used as the `startmsg` for 481 AstArcAnalyzer.missing_arc_fragments. It will be used to describe why an 482 arc wasn't executed, so should fit well into a sentence of the form, 483 "Line 17 didn't run because {cause}." The fragment can include "{lineno}" 484 to have `lineno` interpolated into it. 485 486 """ 487 def __new__(cls, lineno, cause=None): 488 return super(ArcStart, cls).__new__(cls, lineno, cause) 489 490 491# Define contract words that PyContract doesn't have. 492# ArcStarts is for a list or set of ArcStart's. 493new_contract('ArcStarts', lambda seq: all(isinstance(x, ArcStart) for x in seq)) 494 495 496# Turn on AST dumps with an environment variable. 497# $set_env.py: COVERAGE_AST_DUMP - Dump the AST nodes when parsing code. 498AST_DUMP = bool(int(os.environ.get("COVERAGE_AST_DUMP", 0))) 499 500class NodeList(object): 501 """A synthetic fictitious node, containing a sequence of nodes. 502 503 This is used when collapsing optimized if-statements, to represent the 504 unconditional execution of one of the clauses. 505 506 """ 507 def __init__(self, body): 508 self.body = body 509 self.lineno = body[0].lineno 510 511 512# TODO: some add_arcs methods here don't add arcs, they return them. Rename them. 513# TODO: the cause messages have too many commas. 514# TODO: Shouldn't the cause messages join with "and" instead of "or"? 515 516class AstArcAnalyzer(object): 517 """Analyze source text with an AST to find executable code paths.""" 518 519 @contract(text='unicode', statements=set) 520 def __init__(self, text, statements, multiline): 521 self.root_node = ast.parse(neuter_encoding_declaration(text)) 522 # TODO: I think this is happening in too many places. 523 self.statements = set(multiline.get(l, l) for l in statements) 524 self.multiline = multiline 525 526 if AST_DUMP: # pragma: debugging 527 # Dump the AST so that failing tests have helpful output. 528 print("Statements: {}".format(self.statements)) 529 print("Multiline map: {}".format(self.multiline)) 530 ast_dump(self.root_node) 531 532 self.arcs = set() 533 534 # A map from arc pairs to a list of pairs of sentence fragments: 535 # { (start, end): [(startmsg, endmsg), ...], } 536 # 537 # For an arc from line 17, they should be usable like: 538 # "Line 17 {endmsg}, because {startmsg}" 539 self.missing_arc_fragments = collections.defaultdict(list) 540 self.block_stack = [] 541 542 # $set_env.py: COVERAGE_TRACK_ARCS - Trace every arc added while parsing code. 543 self.debug = bool(int(os.environ.get("COVERAGE_TRACK_ARCS", 0))) 544 545 def analyze(self): 546 """Examine the AST tree from `root_node` to determine possible arcs. 547 548 This sets the `arcs` attribute to be a set of (from, to) line number 549 pairs. 550 551 """ 552 for node in ast.walk(self.root_node): 553 node_name = node.__class__.__name__ 554 code_object_handler = getattr(self, "_code_object__" + node_name, None) 555 if code_object_handler is not None: 556 code_object_handler(node) 557 558 @contract(start=int, end=int) 559 def add_arc(self, start, end, smsg=None, emsg=None): 560 """Add an arc, including message fragments to use if it is missing.""" 561 if self.debug: # pragma: debugging 562 print("\nAdding arc: ({}, {}): {!r}, {!r}".format(start, end, smsg, emsg)) 563 print(short_stack(limit=6)) 564 self.arcs.add((start, end)) 565 566 if smsg is not None or emsg is not None: 567 self.missing_arc_fragments[(start, end)].append((smsg, emsg)) 568 569 def nearest_blocks(self): 570 """Yield the blocks in nearest-to-farthest order.""" 571 return reversed(self.block_stack) 572 573 @contract(returns=int) 574 def line_for_node(self, node): 575 """What is the right line number to use for this node? 576 577 This dispatches to _line__Node functions where needed. 578 579 """ 580 node_name = node.__class__.__name__ 581 handler = getattr(self, "_line__" + node_name, None) 582 if handler is not None: 583 return handler(node) 584 else: 585 return node.lineno 586 587 def _line_decorated(self, node): 588 """Compute first line number for things that can be decorated (classes and functions).""" 589 lineno = node.lineno 590 if env.PYBEHAVIOR.trace_decorated_def: 591 if node.decorator_list: 592 lineno = node.decorator_list[0].lineno 593 return lineno 594 595 def _line__Assign(self, node): 596 return self.line_for_node(node.value) 597 598 _line__ClassDef = _line_decorated 599 600 def _line__Dict(self, node): 601 # Python 3.5 changed how dict literals are made. 602 if env.PYVERSION >= (3, 5) and node.keys: 603 if node.keys[0] is not None: 604 return node.keys[0].lineno 605 else: 606 # Unpacked dict literals `{**{'a':1}}` have None as the key, 607 # use the value in that case. 608 return node.values[0].lineno 609 else: 610 return node.lineno 611 612 _line__FunctionDef = _line_decorated 613 _line__AsyncFunctionDef = _line_decorated 614 615 def _line__List(self, node): 616 if node.elts: 617 return self.line_for_node(node.elts[0]) 618 else: 619 return node.lineno 620 621 def _line__Module(self, node): 622 if node.body: 623 return self.line_for_node(node.body[0]) 624 else: 625 # Empty modules have no line number, they always start at 1. 626 return 1 627 628 # The node types that just flow to the next node with no complications. 629 OK_TO_DEFAULT = set([ 630 "Assign", "Assert", "AugAssign", "Delete", "Exec", "Expr", "Global", 631 "Import", "ImportFrom", "Nonlocal", "Pass", "Print", 632 ]) 633 634 @contract(returns='ArcStarts') 635 def add_arcs(self, node): 636 """Add the arcs for `node`. 637 638 Return a set of ArcStarts, exits from this node to the next. Because a 639 node represents an entire sub-tree (including its children), the exits 640 from a node can be arbitrarily complex:: 641 642 if something(1): 643 if other(2): 644 doit(3) 645 else: 646 doit(5) 647 648 There are two exits from line 1: they start at line 3 and line 5. 649 650 """ 651 node_name = node.__class__.__name__ 652 handler = getattr(self, "_handle__" + node_name, None) 653 if handler is not None: 654 return handler(node) 655 else: 656 # No handler: either it's something that's ok to default (a simple 657 # statement), or it's something we overlooked. Change this 0 to 1 658 # to see if it's overlooked. 659 if 0: 660 if node_name not in self.OK_TO_DEFAULT: 661 print("*** Unhandled: {}".format(node)) 662 663 # Default for simple statements: one exit from this node. 664 return set([ArcStart(self.line_for_node(node))]) 665 666 @one_of("from_start, prev_starts") 667 @contract(returns='ArcStarts') 668 def add_body_arcs(self, body, from_start=None, prev_starts=None): 669 """Add arcs for the body of a compound statement. 670 671 `body` is the body node. `from_start` is a single `ArcStart` that can 672 be the previous line in flow before this body. `prev_starts` is a set 673 of ArcStarts that can be the previous line. Only one of them should be 674 given. 675 676 Returns a set of ArcStarts, the exits from this body. 677 678 """ 679 if prev_starts is None: 680 prev_starts = set([from_start]) 681 for body_node in body: 682 lineno = self.line_for_node(body_node) 683 first_line = self.multiline.get(lineno, lineno) 684 if first_line not in self.statements: 685 body_node = self.find_non_missing_node(body_node) 686 if body_node is None: 687 continue 688 lineno = self.line_for_node(body_node) 689 for prev_start in prev_starts: 690 self.add_arc(prev_start.lineno, lineno, prev_start.cause) 691 prev_starts = self.add_arcs(body_node) 692 return prev_starts 693 694 def find_non_missing_node(self, node): 695 """Search `node` looking for a child that has not been optimized away. 696 697 This might return the node you started with, or it will work recursively 698 to find a child node in self.statements. 699 700 Returns a node, or None if none of the node remains. 701 702 """ 703 # This repeats work just done in add_body_arcs, but this duplication 704 # means we can avoid a function call in the 99.9999% case of not 705 # optimizing away statements. 706 lineno = self.line_for_node(node) 707 first_line = self.multiline.get(lineno, lineno) 708 if first_line in self.statements: 709 return node 710 711 missing_fn = getattr(self, "_missing__" + node.__class__.__name__, None) 712 if missing_fn: 713 node = missing_fn(node) 714 else: 715 node = None 716 return node 717 718 # Missing nodes: _missing__* 719 # 720 # Entire statements can be optimized away by Python. They will appear in 721 # the AST, but not the bytecode. These functions are called (by 722 # find_non_missing_node) to find a node to use instead of the missing 723 # node. They can return None if the node should truly be gone. 724 725 def _missing__If(self, node): 726 # If the if-node is missing, then one of its children might still be 727 # here, but not both. So return the first of the two that isn't missing. 728 # Use a NodeList to hold the clauses as a single node. 729 non_missing = self.find_non_missing_node(NodeList(node.body)) 730 if non_missing: 731 return non_missing 732 if node.orelse: 733 return self.find_non_missing_node(NodeList(node.orelse)) 734 return None 735 736 def _missing__NodeList(self, node): 737 # A NodeList might be a mixture of missing and present nodes. Find the 738 # ones that are present. 739 non_missing_children = [] 740 for child in node.body: 741 child = self.find_non_missing_node(child) 742 if child is not None: 743 non_missing_children.append(child) 744 745 # Return the simplest representation of the present children. 746 if not non_missing_children: 747 return None 748 if len(non_missing_children) == 1: 749 return non_missing_children[0] 750 return NodeList(non_missing_children) 751 752 def _missing__While(self, node): 753 body_nodes = self.find_non_missing_node(NodeList(node.body)) 754 if not body_nodes: 755 return None 756 # Make a synthetic While-true node. 757 new_while = ast.While() 758 new_while.lineno = body_nodes.lineno 759 new_while.test = ast.Name() 760 new_while.test.lineno = body_nodes.lineno 761 new_while.test.id = "True" 762 new_while.body = body_nodes.body 763 new_while.orelse = None 764 return new_while 765 766 def is_constant_expr(self, node): 767 """Is this a compile-time constant?""" 768 node_name = node.__class__.__name__ 769 if node_name in ["Constant", "NameConstant", "Num"]: 770 return "Num" 771 elif node_name == "Name": 772 if node.id in ["True", "False", "None", "__debug__"]: 773 return "Name" 774 return None 775 776 # In the fullness of time, these might be good tests to write: 777 # while EXPR: 778 # while False: 779 # listcomps hidden deep in other expressions 780 # listcomps hidden in lists: x = [[i for i in range(10)]] 781 # nested function definitions 782 783 784 # Exit processing: process_*_exits 785 # 786 # These functions process the four kinds of jump exits: break, continue, 787 # raise, and return. To figure out where an exit goes, we have to look at 788 # the block stack context. For example, a break will jump to the nearest 789 # enclosing loop block, or the nearest enclosing finally block, whichever 790 # is nearer. 791 792 @contract(exits='ArcStarts') 793 def process_break_exits(self, exits): 794 """Add arcs due to jumps from `exits` being breaks.""" 795 for block in self.nearest_blocks(): 796 if isinstance(block, LoopBlock): 797 block.break_exits.update(exits) 798 break 799 elif isinstance(block, TryBlock) and block.final_start is not None: 800 block.break_from.update(exits) 801 break 802 803 @contract(exits='ArcStarts') 804 def process_continue_exits(self, exits): 805 """Add arcs due to jumps from `exits` being continues.""" 806 for block in self.nearest_blocks(): 807 if isinstance(block, LoopBlock): 808 for xit in exits: 809 self.add_arc(xit.lineno, block.start, xit.cause) 810 break 811 elif isinstance(block, TryBlock) and block.final_start is not None: 812 block.continue_from.update(exits) 813 break 814 815 @contract(exits='ArcStarts') 816 def process_raise_exits(self, exits): 817 """Add arcs due to jumps from `exits` being raises.""" 818 for block in self.nearest_blocks(): 819 if isinstance(block, TryBlock): 820 if block.handler_start is not None: 821 for xit in exits: 822 self.add_arc(xit.lineno, block.handler_start, xit.cause) 823 break 824 elif block.final_start is not None: 825 block.raise_from.update(exits) 826 break 827 elif isinstance(block, FunctionBlock): 828 for xit in exits: 829 self.add_arc( 830 xit.lineno, -block.start, xit.cause, 831 "didn't except from function {!r}".format(block.name), 832 ) 833 break 834 835 @contract(exits='ArcStarts') 836 def process_return_exits(self, exits): 837 """Add arcs due to jumps from `exits` being returns.""" 838 for block in self.nearest_blocks(): 839 if isinstance(block, TryBlock) and block.final_start is not None: 840 block.return_from.update(exits) 841 break 842 elif isinstance(block, FunctionBlock): 843 for xit in exits: 844 self.add_arc( 845 xit.lineno, -block.start, xit.cause, 846 "didn't return from function {!r}".format(block.name), 847 ) 848 break 849 850 851 # Handlers: _handle__* 852 # 853 # Each handler deals with a specific AST node type, dispatched from 854 # add_arcs. Handlers return the set of exits from that node, and can 855 # also call self.add_arc to record arcs they find. These functions mirror 856 # the Python semantics of each syntactic construct. See the docstring 857 # for add_arcs to understand the concept of exits from a node. 858 859 @contract(returns='ArcStarts') 860 def _handle__Break(self, node): 861 here = self.line_for_node(node) 862 break_start = ArcStart(here, cause="the break on line {lineno} wasn't executed") 863 self.process_break_exits([break_start]) 864 return set() 865 866 @contract(returns='ArcStarts') 867 def _handle_decorated(self, node): 868 """Add arcs for things that can be decorated (classes and functions).""" 869 main_line = last = node.lineno 870 if node.decorator_list: 871 if env.PYBEHAVIOR.trace_decorated_def: 872 last = None 873 for dec_node in node.decorator_list: 874 dec_start = self.line_for_node(dec_node) 875 if last is not None and dec_start != last: 876 self.add_arc(last, dec_start) 877 last = dec_start 878 if env.PYBEHAVIOR.trace_decorated_def: 879 self.add_arc(last, main_line) 880 last = main_line 881 # The definition line may have been missed, but we should have it 882 # in `self.statements`. For some constructs, `line_for_node` is 883 # not what we'd think of as the first line in the statement, so map 884 # it to the first one. 885 if node.body: 886 body_start = self.line_for_node(node.body[0]) 887 body_start = self.multiline.get(body_start, body_start) 888 for lineno in range(last+1, body_start): 889 if lineno in self.statements: 890 self.add_arc(last, lineno) 891 last = lineno 892 # The body is handled in collect_arcs. 893 return set([ArcStart(last)]) 894 895 _handle__ClassDef = _handle_decorated 896 897 @contract(returns='ArcStarts') 898 def _handle__Continue(self, node): 899 here = self.line_for_node(node) 900 continue_start = ArcStart(here, cause="the continue on line {lineno} wasn't executed") 901 self.process_continue_exits([continue_start]) 902 return set() 903 904 @contract(returns='ArcStarts') 905 def _handle__For(self, node): 906 start = self.line_for_node(node.iter) 907 self.block_stack.append(LoopBlock(start=start)) 908 from_start = ArcStart(start, cause="the loop on line {lineno} never started") 909 exits = self.add_body_arcs(node.body, from_start=from_start) 910 # Any exit from the body will go back to the top of the loop. 911 for xit in exits: 912 self.add_arc(xit.lineno, start, xit.cause) 913 my_block = self.block_stack.pop() 914 exits = my_block.break_exits 915 from_start = ArcStart(start, cause="the loop on line {lineno} didn't complete") 916 if node.orelse: 917 else_exits = self.add_body_arcs(node.orelse, from_start=from_start) 918 exits |= else_exits 919 else: 920 # No else clause: exit from the for line. 921 exits.add(from_start) 922 return exits 923 924 _handle__AsyncFor = _handle__For 925 926 _handle__FunctionDef = _handle_decorated 927 _handle__AsyncFunctionDef = _handle_decorated 928 929 @contract(returns='ArcStarts') 930 def _handle__If(self, node): 931 start = self.line_for_node(node.test) 932 from_start = ArcStart(start, cause="the condition on line {lineno} was never true") 933 exits = self.add_body_arcs(node.body, from_start=from_start) 934 from_start = ArcStart(start, cause="the condition on line {lineno} was never false") 935 exits |= self.add_body_arcs(node.orelse, from_start=from_start) 936 return exits 937 938 @contract(returns='ArcStarts') 939 def _handle__NodeList(self, node): 940 start = self.line_for_node(node) 941 exits = self.add_body_arcs(node.body, from_start=ArcStart(start)) 942 return exits 943 944 @contract(returns='ArcStarts') 945 def _handle__Raise(self, node): 946 here = self.line_for_node(node) 947 raise_start = ArcStart(here, cause="the raise on line {lineno} wasn't executed") 948 self.process_raise_exits([raise_start]) 949 # `raise` statement jumps away, no exits from here. 950 return set() 951 952 @contract(returns='ArcStarts') 953 def _handle__Return(self, node): 954 here = self.line_for_node(node) 955 return_start = ArcStart(here, cause="the return on line {lineno} wasn't executed") 956 self.process_return_exits([return_start]) 957 # `return` statement jumps away, no exits from here. 958 return set() 959 960 @contract(returns='ArcStarts') 961 def _handle__Try(self, node): 962 if node.handlers: 963 handler_start = self.line_for_node(node.handlers[0]) 964 else: 965 handler_start = None 966 967 if node.finalbody: 968 final_start = self.line_for_node(node.finalbody[0]) 969 else: 970 final_start = None 971 972 try_block = TryBlock(handler_start, final_start) 973 self.block_stack.append(try_block) 974 975 start = self.line_for_node(node) 976 exits = self.add_body_arcs(node.body, from_start=ArcStart(start)) 977 978 # We're done with the `try` body, so this block no longer handles 979 # exceptions. We keep the block so the `finally` clause can pick up 980 # flows from the handlers and `else` clause. 981 if node.finalbody: 982 try_block.handler_start = None 983 if node.handlers: 984 # If there are `except` clauses, then raises in the try body 985 # will already jump to them. Start this set over for raises in 986 # `except` and `else`. 987 try_block.raise_from = set([]) 988 else: 989 self.block_stack.pop() 990 991 handler_exits = set() 992 993 if node.handlers: 994 last_handler_start = None 995 for handler_node in node.handlers: 996 handler_start = self.line_for_node(handler_node) 997 if last_handler_start is not None: 998 self.add_arc(last_handler_start, handler_start) 999 last_handler_start = handler_start 1000 from_cause = "the exception caught by line {lineno} didn't happen" 1001 from_start = ArcStart(handler_start, cause=from_cause) 1002 handler_exits |= self.add_body_arcs(handler_node.body, from_start=from_start) 1003 1004 if node.orelse: 1005 exits = self.add_body_arcs(node.orelse, prev_starts=exits) 1006 1007 exits |= handler_exits 1008 1009 if node.finalbody: 1010 self.block_stack.pop() 1011 final_from = ( # You can get to the `finally` clause from: 1012 exits | # the exits of the body or `else` clause, 1013 try_block.break_from | # or a `break`, 1014 try_block.continue_from | # or a `continue`, 1015 try_block.raise_from | # or a `raise`, 1016 try_block.return_from # or a `return`. 1017 ) 1018 1019 final_exits = self.add_body_arcs(node.finalbody, prev_starts=final_from) 1020 1021 if try_block.break_from: 1022 if env.PYBEHAVIOR.finally_jumps_back: 1023 for break_line in try_block.break_from: 1024 lineno = break_line.lineno 1025 cause = break_line.cause.format(lineno=lineno) 1026 for final_exit in final_exits: 1027 self.add_arc(final_exit.lineno, lineno, cause) 1028 breaks = try_block.break_from 1029 else: 1030 breaks = self._combine_finally_starts(try_block.break_from, final_exits) 1031 self.process_break_exits(breaks) 1032 1033 if try_block.continue_from: 1034 if env.PYBEHAVIOR.finally_jumps_back: 1035 for continue_line in try_block.continue_from: 1036 lineno = continue_line.lineno 1037 cause = continue_line.cause.format(lineno=lineno) 1038 for final_exit in final_exits: 1039 self.add_arc(final_exit.lineno, lineno, cause) 1040 continues = try_block.continue_from 1041 else: 1042 continues = self._combine_finally_starts(try_block.continue_from, final_exits) 1043 self.process_continue_exits(continues) 1044 1045 if try_block.raise_from: 1046 self.process_raise_exits( 1047 self._combine_finally_starts(try_block.raise_from, final_exits) 1048 ) 1049 1050 if try_block.return_from: 1051 if env.PYBEHAVIOR.finally_jumps_back: 1052 for return_line in try_block.return_from: 1053 lineno = return_line.lineno 1054 cause = return_line.cause.format(lineno=lineno) 1055 for final_exit in final_exits: 1056 self.add_arc(final_exit.lineno, lineno, cause) 1057 returns = try_block.return_from 1058 else: 1059 returns = self._combine_finally_starts(try_block.return_from, final_exits) 1060 self.process_return_exits(returns) 1061 1062 if exits: 1063 # The finally clause's exits are only exits for the try block 1064 # as a whole if the try block had some exits to begin with. 1065 exits = final_exits 1066 1067 return exits 1068 1069 @contract(starts='ArcStarts', exits='ArcStarts', returns='ArcStarts') 1070 def _combine_finally_starts(self, starts, exits): 1071 """Helper for building the cause of `finally` branches. 1072 1073 "finally" clauses might not execute their exits, and the causes could 1074 be due to a failure to execute any of the exits in the try block. So 1075 we use the causes from `starts` as the causes for `exits`. 1076 """ 1077 causes = [] 1078 for start in sorted(starts): 1079 if start.cause is not None: 1080 causes.append(start.cause.format(lineno=start.lineno)) 1081 cause = " or ".join(causes) 1082 exits = set(ArcStart(xit.lineno, cause) for xit in exits) 1083 return exits 1084 1085 @contract(returns='ArcStarts') 1086 def _handle__TryExcept(self, node): 1087 # Python 2.7 uses separate TryExcept and TryFinally nodes. If we get 1088 # TryExcept, it means there was no finally, so fake it, and treat as 1089 # a general Try node. 1090 node.finalbody = [] 1091 return self._handle__Try(node) 1092 1093 @contract(returns='ArcStarts') 1094 def _handle__TryFinally(self, node): 1095 # Python 2.7 uses separate TryExcept and TryFinally nodes. If we get 1096 # TryFinally, see if there's a TryExcept nested inside. If so, merge 1097 # them. Otherwise, fake fields to complete a Try node. 1098 node.handlers = [] 1099 node.orelse = [] 1100 1101 first = node.body[0] 1102 if first.__class__.__name__ == "TryExcept" and node.lineno == first.lineno: 1103 assert len(node.body) == 1 1104 node.body = first.body 1105 node.handlers = first.handlers 1106 node.orelse = first.orelse 1107 1108 return self._handle__Try(node) 1109 1110 @contract(returns='ArcStarts') 1111 def _handle__While(self, node): 1112 constant_test = self.is_constant_expr(node.test) 1113 start = to_top = self.line_for_node(node.test) 1114 if constant_test and (env.PY3 or constant_test == "Num"): 1115 to_top = self.line_for_node(node.body[0]) 1116 self.block_stack.append(LoopBlock(start=to_top)) 1117 from_start = ArcStart(start, cause="the condition on line {lineno} was never true") 1118 exits = self.add_body_arcs(node.body, from_start=from_start) 1119 for xit in exits: 1120 self.add_arc(xit.lineno, to_top, xit.cause) 1121 exits = set() 1122 my_block = self.block_stack.pop() 1123 exits.update(my_block.break_exits) 1124 from_start = ArcStart(start, cause="the condition on line {lineno} was never false") 1125 if node.orelse: 1126 else_exits = self.add_body_arcs(node.orelse, from_start=from_start) 1127 exits |= else_exits 1128 else: 1129 # No `else` clause: you can exit from the start. 1130 if not constant_test: 1131 exits.add(from_start) 1132 return exits 1133 1134 @contract(returns='ArcStarts') 1135 def _handle__With(self, node): 1136 start = self.line_for_node(node) 1137 exits = self.add_body_arcs(node.body, from_start=ArcStart(start)) 1138 return exits 1139 1140 _handle__AsyncWith = _handle__With 1141 1142 def _code_object__Module(self, node): 1143 start = self.line_for_node(node) 1144 if node.body: 1145 exits = self.add_body_arcs(node.body, from_start=ArcStart(-start)) 1146 for xit in exits: 1147 self.add_arc(xit.lineno, -start, xit.cause, "didn't exit the module") 1148 else: 1149 # Empty module. 1150 self.add_arc(-start, start) 1151 self.add_arc(start, -start) 1152 1153 def _code_object__FunctionDef(self, node): 1154 start = self.line_for_node(node) 1155 self.block_stack.append(FunctionBlock(start=start, name=node.name)) 1156 exits = self.add_body_arcs(node.body, from_start=ArcStart(-start)) 1157 self.process_return_exits(exits) 1158 self.block_stack.pop() 1159 1160 _code_object__AsyncFunctionDef = _code_object__FunctionDef 1161 1162 def _code_object__ClassDef(self, node): 1163 start = self.line_for_node(node) 1164 self.add_arc(-start, start) 1165 exits = self.add_body_arcs(node.body, from_start=ArcStart(start)) 1166 for xit in exits: 1167 self.add_arc( 1168 xit.lineno, -start, xit.cause, 1169 "didn't exit the body of class {!r}".format(node.name), 1170 ) 1171 1172 def _make_oneline_code_method(noun): # pylint: disable=no-self-argument 1173 """A function to make methods for online callable _code_object__ methods.""" 1174 def _code_object__oneline_callable(self, node): 1175 start = self.line_for_node(node) 1176 self.add_arc(-start, start, None, "didn't run the {} on line {}".format(noun, start)) 1177 self.add_arc( 1178 start, -start, None, 1179 "didn't finish the {} on line {}".format(noun, start), 1180 ) 1181 return _code_object__oneline_callable 1182 1183 _code_object__Lambda = _make_oneline_code_method("lambda") 1184 _code_object__GeneratorExp = _make_oneline_code_method("generator expression") 1185 _code_object__DictComp = _make_oneline_code_method("dictionary comprehension") 1186 _code_object__SetComp = _make_oneline_code_method("set comprehension") 1187 if env.PY3: 1188 _code_object__ListComp = _make_oneline_code_method("list comprehension") 1189 1190 1191if AST_DUMP: # pragma: debugging 1192 # Code only used when dumping the AST for debugging. 1193 1194 SKIP_DUMP_FIELDS = ["ctx"] 1195 1196 def _is_simple_value(value): 1197 """Is `value` simple enough to be displayed on a single line?""" 1198 return ( 1199 value in [None, [], (), {}, set()] or 1200 isinstance(value, (string_class, int, float)) 1201 ) 1202 1203 def ast_dump(node, depth=0): 1204 """Dump the AST for `node`. 1205 1206 This recursively walks the AST, printing a readable version. 1207 1208 """ 1209 indent = " " * depth 1210 if not isinstance(node, ast.AST): 1211 print("{}<{} {!r}>".format(indent, node.__class__.__name__, node)) 1212 return 1213 1214 lineno = getattr(node, "lineno", None) 1215 if lineno is not None: 1216 linemark = " @ {}".format(node.lineno) 1217 else: 1218 linemark = "" 1219 head = "{}<{}{}".format(indent, node.__class__.__name__, linemark) 1220 1221 named_fields = [ 1222 (name, value) 1223 for name, value in ast.iter_fields(node) 1224 if name not in SKIP_DUMP_FIELDS 1225 ] 1226 if not named_fields: 1227 print("{}>".format(head)) 1228 elif len(named_fields) == 1 and _is_simple_value(named_fields[0][1]): 1229 field_name, value = named_fields[0] 1230 print("{} {}: {!r}>".format(head, field_name, value)) 1231 else: 1232 print(head) 1233 if 0: 1234 print("{}# mro: {}".format( 1235 indent, ", ".join(c.__name__ for c in node.__class__.__mro__[1:]), 1236 )) 1237 next_indent = indent + " " 1238 for field_name, value in named_fields: 1239 prefix = "{}{}:".format(next_indent, field_name) 1240 if _is_simple_value(value): 1241 print("{} {!r}".format(prefix, value)) 1242 elif isinstance(value, list): 1243 print("{} [".format(prefix)) 1244 for n in value: 1245 ast_dump(n, depth + 8) 1246 print("{}]".format(next_indent)) 1247 else: 1248 print(prefix) 1249 ast_dump(value, depth + 8) 1250 1251 print("{}>".format(indent)) 1252