1import ast 2from fnmatch import fnmatch, fnmatchcase 3from pathlib import Path 4import pkgutil 5import re 6import string 7import sys 8 9from vulture import lines 10from vulture import noqa 11from vulture import utils 12from vulture.config import make_config 13 14 15DEFAULT_CONFIDENCE = 60 16 17IGNORED_VARIABLE_NAMES = {"object", "self"} 18 19ERROR_CODES = { 20 "attribute": "V101", 21 "class": "V102", 22 "function": "V103", 23 "import": "V104", 24 "method": "V105", 25 "property": "V106", 26 "variable": "V107", 27 "unreachable_code": "V201", 28} 29 30 31def _get_unused_items(defined_items, used_names): 32 unused_items = [ 33 item for item in set(defined_items) if item.name not in used_names 34 ] 35 unused_items.sort(key=lambda item: item.name.lower()) 36 return unused_items 37 38 39def _is_special_name(name): 40 return name.startswith("__") and name.endswith("__") 41 42 43def _match(name, patterns, case=True): 44 func = fnmatchcase if case else fnmatch 45 return any(func(name, pattern) for pattern in patterns) 46 47 48def _is_test_file(filename): 49 return _match( 50 filename.resolve(), 51 ["*/test/*", "*/tests/*", "*/test*.py", "*[-_]test.py"], 52 case=False, 53 ) 54 55 56def _ignore_class(filename, class_name): 57 return _is_test_file(filename) and "Test" in class_name 58 59 60def _ignore_import(filename, import_name): 61 """ 62 Ignore star-imported names since we can't detect whether they are used. 63 Ignore imports from __init__.py files since they're commonly used to 64 collect objects from a package. 65 """ 66 return filename.name == "__init__.py" or import_name == "*" 67 68 69def _ignore_function(filename, function_name): 70 return function_name.startswith("test_") and _is_test_file(filename) 71 72 73def _ignore_method(filename, method_name): 74 return _is_special_name(method_name) or ( 75 method_name.startswith("test_") and _is_test_file(filename) 76 ) 77 78 79def _ignore_variable(filename, varname): 80 """ 81 Ignore _ (Python idiom), _x (pylint convention) and 82 __x__ (special variable or method), but not __x. 83 """ 84 return ( 85 varname in IGNORED_VARIABLE_NAMES 86 or (varname.startswith("_") and not varname.startswith("__")) 87 or _is_special_name(varname) 88 ) 89 90 91class Item: 92 """ 93 Hold the name, type and location of defined code. 94 """ 95 96 __slots__ = ( 97 "name", 98 "typ", 99 "filename", 100 "first_lineno", 101 "last_lineno", 102 "message", 103 "confidence", 104 ) 105 106 def __init__( 107 self, 108 name, 109 typ, 110 filename, 111 first_lineno, 112 last_lineno, 113 message="", 114 confidence=DEFAULT_CONFIDENCE, 115 ): 116 self.name = name 117 self.typ = typ 118 self.filename = filename 119 self.first_lineno = first_lineno 120 self.last_lineno = last_lineno 121 self.message = message or f"unused {typ} '{name}'" 122 self.confidence = confidence 123 124 @property 125 def size(self): 126 assert self.last_lineno >= self.first_lineno 127 return self.last_lineno - self.first_lineno + 1 128 129 def get_report(self, add_size=False): 130 if add_size: 131 line_format = "line" if self.size == 1 else "lines" 132 size_report = f", {self.size:d} {line_format}" 133 else: 134 size_report = "" 135 return "{}:{:d}: {} ({}% confidence{})".format( 136 utils.format_path(self.filename), 137 self.first_lineno, 138 self.message, 139 self.confidence, 140 size_report, 141 ) 142 143 def get_whitelist_string(self): 144 filename = utils.format_path(self.filename) 145 if self.typ == "unreachable_code": 146 return f"# {self.message} ({filename}:{self.first_lineno})" 147 else: 148 prefix = "" 149 if self.typ in ["attribute", "method", "property"]: 150 prefix = "_." 151 return "{}{} # unused {} ({}:{:d})".format( 152 prefix, self.name, self.typ, filename, self.first_lineno 153 ) 154 155 def _tuple(self): 156 return (self.filename, self.first_lineno, self.name) 157 158 def __repr__(self): 159 return repr(self.name) 160 161 def __eq__(self, other): 162 return self._tuple() == other._tuple() 163 164 def __hash__(self): 165 return hash(self._tuple()) 166 167 168class Vulture(ast.NodeVisitor): 169 """Find dead code.""" 170 171 def __init__( 172 self, verbose=False, ignore_names=None, ignore_decorators=None 173 ): 174 self.verbose = verbose 175 176 def get_list(typ): 177 return utils.LoggingList(typ, self.verbose) 178 179 self.defined_attrs = get_list("attribute") 180 self.defined_classes = get_list("class") 181 self.defined_funcs = get_list("function") 182 self.defined_imports = get_list("import") 183 self.defined_methods = get_list("method") 184 self.defined_props = get_list("property") 185 self.defined_vars = get_list("variable") 186 self.unreachable_code = get_list("unreachable_code") 187 188 self.used_names = utils.LoggingSet("name", self.verbose) 189 190 self.ignore_names = ignore_names or [] 191 self.ignore_decorators = ignore_decorators or [] 192 193 self.filename = Path() 194 self.code = [] 195 self.found_dead_code_or_error = False 196 197 def scan(self, code, filename=""): 198 filename = Path(filename) 199 self.code = code.splitlines() 200 self.noqa_lines = noqa.parse_noqa(self.code) 201 self.filename = filename 202 203 def handle_syntax_error(e): 204 text = f' at "{e.text.strip()}"' if e.text else "" 205 print( 206 f"{utils.format_path(filename)}:{e.lineno}: {e.msg}{text}", 207 file=sys.stderr, 208 ) 209 self.found_dead_code_or_error = True 210 211 try: 212 node = ( 213 ast.parse( 214 code, filename=str(self.filename), type_comments=True 215 ) 216 if sys.version_info >= (3, 8) # type_comments requires 3.8+ 217 else ast.parse(code, filename=str(self.filename)) 218 ) 219 except SyntaxError as err: 220 handle_syntax_error(err) 221 except ValueError as err: 222 # ValueError is raised if source contains null bytes. 223 print( 224 f'{utils.format_path(filename)}: invalid source code "{err}"', 225 file=sys.stderr, 226 ) 227 self.found_dead_code_or_error = True 228 else: 229 # When parsing type comments, visiting can throw SyntaxError. 230 try: 231 self.visit(node) 232 except SyntaxError as err: 233 handle_syntax_error(err) 234 235 def scavenge(self, paths, exclude=None): 236 def prepare_pattern(pattern): 237 if not any(char in pattern for char in "*?["): 238 pattern = f"*{pattern}*" 239 return pattern 240 241 exclude = [prepare_pattern(pattern) for pattern in (exclude or [])] 242 243 def exclude_path(path): 244 return _match(path, exclude, case=False) 245 246 paths = [Path(path) for path in paths] 247 248 for module in utils.get_modules(paths): 249 if exclude_path(module): 250 self._log("Excluded:", module) 251 continue 252 253 self._log("Scanning:", module) 254 try: 255 module_string = utils.read_file(module) 256 except utils.VultureInputException as err: # noqa: F841 257 print( 258 f"Error: Could not read file {module} - {err}\n" 259 f"Try to change the encoding to UTF-8.", 260 file=sys.stderr, 261 ) 262 self.found_dead_code_or_error = True 263 else: 264 self.scan(module_string, filename=module) 265 266 unique_imports = {item.name for item in self.defined_imports} 267 for import_name in unique_imports: 268 path = Path("whitelists") / (import_name + "_whitelist.py") 269 if exclude_path(path): 270 self._log("Excluded whitelist:", path) 271 else: 272 try: 273 module_data = pkgutil.get_data("vulture", str(path)) 274 self._log("Included whitelist:", path) 275 except OSError: 276 # Most imported modules don't have a whitelist. 277 continue 278 module_string = module_data.decode("utf-8") 279 self.scan(module_string, filename=path) 280 281 def get_unused_code(self, min_confidence=0, sort_by_size=False): 282 """ 283 Return ordered list of unused Item objects. 284 """ 285 if not 0 <= min_confidence <= 100: 286 raise ValueError("min_confidence must be between 0 and 100.") 287 288 def by_name(item): 289 return (str(item.filename).lower(), item.first_lineno) 290 291 def by_size(item): 292 return (item.size,) + by_name(item) 293 294 unused_code = ( 295 self.unused_attrs 296 + self.unused_classes 297 + self.unused_funcs 298 + self.unused_imports 299 + self.unused_methods 300 + self.unused_props 301 + self.unused_vars 302 + self.unreachable_code 303 ) 304 305 confidently_unused = [ 306 obj for obj in unused_code if obj.confidence >= min_confidence 307 ] 308 309 return sorted( 310 confidently_unused, key=by_size if sort_by_size else by_name 311 ) 312 313 def report( 314 self, min_confidence=0, sort_by_size=False, make_whitelist=False 315 ): 316 """ 317 Print ordered list of Item objects to stdout. 318 """ 319 for item in self.get_unused_code( 320 min_confidence=min_confidence, sort_by_size=sort_by_size 321 ): 322 print( 323 item.get_whitelist_string() 324 if make_whitelist 325 else item.get_report(add_size=sort_by_size) 326 ) 327 self.found_dead_code_or_error = True 328 return self.found_dead_code_or_error 329 330 @property 331 def unused_classes(self): 332 return _get_unused_items(self.defined_classes, self.used_names) 333 334 @property 335 def unused_funcs(self): 336 return _get_unused_items(self.defined_funcs, self.used_names) 337 338 @property 339 def unused_imports(self): 340 return _get_unused_items(self.defined_imports, self.used_names) 341 342 @property 343 def unused_methods(self): 344 return _get_unused_items(self.defined_methods, self.used_names) 345 346 @property 347 def unused_props(self): 348 return _get_unused_items(self.defined_props, self.used_names) 349 350 @property 351 def unused_vars(self): 352 return _get_unused_items(self.defined_vars, self.used_names) 353 354 @property 355 def unused_attrs(self): 356 return _get_unused_items(self.defined_attrs, self.used_names) 357 358 def _log(self, *args): 359 if self.verbose: 360 print(*args) 361 362 def _add_aliases(self, node): 363 """ 364 We delegate to this method instead of using visit_alias() to have 365 access to line numbers and to filter imports from __future__. 366 """ 367 assert isinstance(node, (ast.Import, ast.ImportFrom)) 368 for name_and_alias in node.names: 369 # Store only top-level module name ("os.path" -> "os"). 370 # We can't easily detect when "os.path" is used. 371 name = name_and_alias.name.partition(".")[0] 372 alias = name_and_alias.asname 373 self._define( 374 self.defined_imports, 375 alias or name, 376 node, 377 confidence=90, 378 ignore=_ignore_import, 379 ) 380 if alias is not None: 381 self.used_names.add(name_and_alias.name) 382 383 def _handle_conditional_node(self, node, name): 384 if utils.condition_is_always_false(node.test): 385 self._define( 386 self.unreachable_code, 387 name, 388 node, 389 last_node=node.body 390 if isinstance(node, ast.IfExp) 391 else node.body[-1], 392 message=f"unsatisfiable '{name}' condition", 393 confidence=100, 394 ) 395 elif utils.condition_is_always_true(node.test): 396 else_body = node.orelse 397 if name == "ternary": 398 self._define( 399 self.unreachable_code, 400 name, 401 else_body, 402 message="unreachable 'else' expression", 403 confidence=100, 404 ) 405 elif else_body: 406 self._define( 407 self.unreachable_code, 408 "else", 409 else_body[0], 410 last_node=else_body[-1], 411 message="unreachable 'else' block", 412 confidence=100, 413 ) 414 elif name == "if": 415 # Redundant if-condition without else block. 416 self._define( 417 self.unreachable_code, 418 name, 419 node, 420 message="redundant if-condition", 421 confidence=100, 422 ) 423 424 def _define( 425 self, 426 collection, 427 name, 428 first_node, 429 last_node=None, 430 message="", 431 confidence=DEFAULT_CONFIDENCE, 432 ignore=None, 433 ): 434 def ignored(lineno): 435 return ( 436 (ignore and ignore(self.filename, name)) 437 or _match(name, self.ignore_names) 438 or noqa.ignore_line(self.noqa_lines, lineno, ERROR_CODES[typ]) 439 ) 440 441 last_node = last_node or first_node 442 typ = collection.typ 443 first_lineno = lines.get_first_line_number(first_node) 444 445 if ignored(first_lineno): 446 self._log(f'Ignoring {typ} "{name}"') 447 else: 448 last_lineno = lines.get_last_line_number(last_node) 449 collection.append( 450 Item( 451 name, 452 typ, 453 self.filename, 454 first_lineno, 455 last_lineno, 456 message=message, 457 confidence=confidence, 458 ) 459 ) 460 461 def _define_variable(self, name, node, confidence=DEFAULT_CONFIDENCE): 462 self._define( 463 self.defined_vars, 464 name, 465 node, 466 confidence=confidence, 467 ignore=_ignore_variable, 468 ) 469 470 def visit_arg(self, node): 471 """Function argument""" 472 self._define_variable(node.arg, node, confidence=100) 473 474 def visit_AsyncFunctionDef(self, node): 475 return self.visit_FunctionDef(node) 476 477 def visit_Attribute(self, node): 478 if isinstance(node.ctx, ast.Store): 479 self._define(self.defined_attrs, node.attr, node) 480 elif isinstance(node.ctx, ast.Load): 481 self.used_names.add(node.attr) 482 483 def visit_BinOp(self, node): 484 """ 485 Parse variable names in old format strings: 486 487 "%(my_var)s" % locals() 488 """ 489 if ( 490 isinstance(node.left, ast.Str) 491 and isinstance(node.op, ast.Mod) 492 and self._is_locals_call(node.right) 493 ): 494 self.used_names |= set(re.findall(r"%\((\w+)\)", node.left.s)) 495 496 def visit_Call(self, node): 497 # Count getattr/hasattr(x, "some_attr", ...) as usage of some_attr. 498 if isinstance(node.func, ast.Name) and ( 499 (node.func.id == "getattr" and 2 <= len(node.args) <= 3) 500 or (node.func.id == "hasattr" and len(node.args) == 2) 501 ): 502 attr_name_arg = node.args[1] 503 if isinstance(attr_name_arg, ast.Str): 504 self.used_names.add(attr_name_arg.s) 505 506 # Parse variable names in new format strings: 507 # "{my_var}".format(**locals()) 508 if ( 509 isinstance(node.func, ast.Attribute) 510 and isinstance(node.func.value, ast.Str) 511 and node.func.attr == "format" 512 and any( 513 kw.arg is None and self._is_locals_call(kw.value) 514 for kw in node.keywords 515 ) 516 ): 517 self._handle_new_format_string(node.func.value.s) 518 519 def _handle_new_format_string(self, s): 520 def is_identifier(name): 521 return bool(re.match(r"[a-zA-Z_][a-zA-Z0-9_]*", name)) 522 523 parser = string.Formatter() 524 try: 525 names = [name for _, name, _, _ in parser.parse(s) if name] 526 except ValueError: 527 # Invalid format string. 528 names = [] 529 530 for field_name in names: 531 # Remove brackets and their contents: "a[0][b].c[d].e" -> "a.c.e", 532 # then split the resulting string: "a.b.c" -> ["a", "b", "c"] 533 vars = re.sub(r"\[\w*\]", "", field_name).split(".") 534 for var in vars: 535 if is_identifier(var): 536 self.used_names.add(var) 537 538 @staticmethod 539 def _is_locals_call(node): 540 """Return True if the node is `locals()`.""" 541 return ( 542 isinstance(node, ast.Call) 543 and isinstance(node.func, ast.Name) 544 and node.func.id == "locals" 545 and not node.args 546 and not node.keywords 547 ) 548 549 def visit_ClassDef(self, node): 550 for decorator in node.decorator_list: 551 if _match( 552 utils.get_decorator_name(decorator), self.ignore_decorators 553 ): 554 self._log( 555 f'Ignoring class "{node.name}" (decorator whitelisted)' 556 ) 557 break 558 else: 559 self._define( 560 self.defined_classes, node.name, node, ignore=_ignore_class 561 ) 562 563 def visit_FunctionDef(self, node): 564 decorator_names = [ 565 utils.get_decorator_name(decorator) 566 for decorator in node.decorator_list 567 ] 568 569 first_arg = node.args.args[0].arg if node.args.args else None 570 571 if "@property" in decorator_names: 572 typ = "property" 573 elif ( 574 "@staticmethod" in decorator_names 575 or "@classmethod" in decorator_names 576 or first_arg == "self" 577 ): 578 typ = "method" 579 else: 580 typ = "function" 581 582 if any( 583 _match(name, self.ignore_decorators) for name in decorator_names 584 ): 585 self._log(f'Ignoring {typ} "{node.name}" (decorator whitelisted)') 586 elif typ == "property": 587 self._define(self.defined_props, node.name, node) 588 elif typ == "method": 589 self._define( 590 self.defined_methods, node.name, node, ignore=_ignore_method 591 ) 592 else: 593 self._define( 594 self.defined_funcs, node.name, node, ignore=_ignore_function 595 ) 596 597 def visit_If(self, node): 598 self._handle_conditional_node(node, "if") 599 600 def visit_IfExp(self, node): 601 self._handle_conditional_node(node, "ternary") 602 603 def visit_Import(self, node): 604 self._add_aliases(node) 605 606 def visit_ImportFrom(self, node): 607 if node.module != "__future__": 608 self._add_aliases(node) 609 610 def visit_Name(self, node): 611 if ( 612 isinstance(node.ctx, ast.Load) 613 and node.id not in IGNORED_VARIABLE_NAMES 614 ): 615 self.used_names.add(node.id) 616 elif isinstance(node.ctx, (ast.Param, ast.Store)): 617 self._define_variable(node.id, node) 618 619 def visit_While(self, node): 620 self._handle_conditional_node(node, "while") 621 622 def visit(self, node): 623 method = "visit_" + node.__class__.__name__ 624 visitor = getattr(self, method, None) 625 if self.verbose: 626 lineno = getattr(node, "lineno", 1) 627 line = self.code[lineno - 1] if self.code else "" 628 self._log(lineno, ast.dump(node), line) 629 if visitor: 630 visitor(node) 631 632 # There isn't a clean subset of node types that might have type 633 # comments, so just check all of them. 634 type_comment = getattr(node, "type_comment", None) 635 if type_comment is not None: 636 mode = ( 637 "func_type" 638 if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)) 639 else "eval" 640 ) 641 self.visit( 642 ast.parse(type_comment, filename="<type_comment>", mode=mode) 643 ) 644 645 return self.generic_visit(node) 646 647 def _handle_ast_list(self, ast_list): 648 """ 649 Find unreachable nodes in the given sequence of ast nodes. 650 """ 651 for index, node in enumerate(ast_list): 652 if isinstance( 653 node, (ast.Break, ast.Continue, ast.Raise, ast.Return) 654 ): 655 try: 656 first_unreachable_node = ast_list[index + 1] 657 except IndexError: 658 continue 659 class_name = node.__class__.__name__.lower() 660 self._define( 661 self.unreachable_code, 662 class_name, 663 first_unreachable_node, 664 last_node=ast_list[-1], 665 message=f"unreachable code after '{class_name}'", 666 confidence=100, 667 ) 668 return 669 670 def generic_visit(self, node): 671 """Called if no explicit visitor function exists for a node.""" 672 for _, value in ast.iter_fields(node): 673 if isinstance(value, list): 674 self._handle_ast_list(value) 675 for item in value: 676 if isinstance(item, ast.AST): 677 self.visit(item) 678 elif isinstance(value, ast.AST): 679 self.visit(value) 680 681 682def main(): 683 config = make_config() 684 vulture = Vulture( 685 verbose=config["verbose"], 686 ignore_names=config["ignore_names"], 687 ignore_decorators=config["ignore_decorators"], 688 ) 689 vulture.scavenge(config["paths"], exclude=config["exclude"]) 690 sys.exit( 691 vulture.report( 692 min_confidence=config["min_confidence"], 693 sort_by_size=config["sort_by_size"], 694 make_whitelist=config["make_whitelist"], 695 ) 696 ) 697