1import ast
2from fnmatch import fnmatch, fnmatchcase
3from pathlib import Path
4import pkgutil
5import re
6import string
7import sys
8
9from vulture import lines
10from vulture import noqa
11from vulture import utils
12from vulture.config import make_config
13
14
15DEFAULT_CONFIDENCE = 60
16
17IGNORED_VARIABLE_NAMES = {"object", "self"}
18
19ERROR_CODES = {
20    "attribute": "V101",
21    "class": "V102",
22    "function": "V103",
23    "import": "V104",
24    "method": "V105",
25    "property": "V106",
26    "variable": "V107",
27    "unreachable_code": "V201",
28}
29
30
31def _get_unused_items(defined_items, used_names):
32    unused_items = [
33        item for item in set(defined_items) if item.name not in used_names
34    ]
35    unused_items.sort(key=lambda item: item.name.lower())
36    return unused_items
37
38
39def _is_special_name(name):
40    return name.startswith("__") and name.endswith("__")
41
42
43def _match(name, patterns, case=True):
44    func = fnmatchcase if case else fnmatch
45    return any(func(name, pattern) for pattern in patterns)
46
47
48def _is_test_file(filename):
49    return _match(
50        filename.resolve(),
51        ["*/test/*", "*/tests/*", "*/test*.py", "*[-_]test.py"],
52        case=False,
53    )
54
55
56def _ignore_class(filename, class_name):
57    return _is_test_file(filename) and "Test" in class_name
58
59
60def _ignore_import(filename, import_name):
61    """
62    Ignore star-imported names since we can't detect whether they are used.
63    Ignore imports from __init__.py files since they're commonly used to
64    collect objects from a package.
65    """
66    return filename.name == "__init__.py" or import_name == "*"
67
68
69def _ignore_function(filename, function_name):
70    return function_name.startswith("test_") and _is_test_file(filename)
71
72
73def _ignore_method(filename, method_name):
74    return _is_special_name(method_name) or (
75        method_name.startswith("test_") and _is_test_file(filename)
76    )
77
78
79def _ignore_variable(filename, varname):
80    """
81    Ignore _ (Python idiom), _x (pylint convention) and
82    __x__ (special variable or method), but not __x.
83    """
84    return (
85        varname in IGNORED_VARIABLE_NAMES
86        or (varname.startswith("_") and not varname.startswith("__"))
87        or _is_special_name(varname)
88    )
89
90
91class Item:
92    """
93    Hold the name, type and location of defined code.
94    """
95
96    __slots__ = (
97        "name",
98        "typ",
99        "filename",
100        "first_lineno",
101        "last_lineno",
102        "message",
103        "confidence",
104    )
105
106    def __init__(
107        self,
108        name,
109        typ,
110        filename,
111        first_lineno,
112        last_lineno,
113        message="",
114        confidence=DEFAULT_CONFIDENCE,
115    ):
116        self.name = name
117        self.typ = typ
118        self.filename = filename
119        self.first_lineno = first_lineno
120        self.last_lineno = last_lineno
121        self.message = message or f"unused {typ} '{name}'"
122        self.confidence = confidence
123
124    @property
125    def size(self):
126        assert self.last_lineno >= self.first_lineno
127        return self.last_lineno - self.first_lineno + 1
128
129    def get_report(self, add_size=False):
130        if add_size:
131            line_format = "line" if self.size == 1 else "lines"
132            size_report = f", {self.size:d} {line_format}"
133        else:
134            size_report = ""
135        return "{}:{:d}: {} ({}% confidence{})".format(
136            utils.format_path(self.filename),
137            self.first_lineno,
138            self.message,
139            self.confidence,
140            size_report,
141        )
142
143    def get_whitelist_string(self):
144        filename = utils.format_path(self.filename)
145        if self.typ == "unreachable_code":
146            return f"# {self.message} ({filename}:{self.first_lineno})"
147        else:
148            prefix = ""
149            if self.typ in ["attribute", "method", "property"]:
150                prefix = "_."
151            return "{}{}  # unused {} ({}:{:d})".format(
152                prefix, self.name, self.typ, filename, self.first_lineno
153            )
154
155    def _tuple(self):
156        return (self.filename, self.first_lineno, self.name)
157
158    def __repr__(self):
159        return repr(self.name)
160
161    def __eq__(self, other):
162        return self._tuple() == other._tuple()
163
164    def __hash__(self):
165        return hash(self._tuple())
166
167
168class Vulture(ast.NodeVisitor):
169    """Find dead code."""
170
171    def __init__(
172        self, verbose=False, ignore_names=None, ignore_decorators=None
173    ):
174        self.verbose = verbose
175
176        def get_list(typ):
177            return utils.LoggingList(typ, self.verbose)
178
179        self.defined_attrs = get_list("attribute")
180        self.defined_classes = get_list("class")
181        self.defined_funcs = get_list("function")
182        self.defined_imports = get_list("import")
183        self.defined_methods = get_list("method")
184        self.defined_props = get_list("property")
185        self.defined_vars = get_list("variable")
186        self.unreachable_code = get_list("unreachable_code")
187
188        self.used_names = utils.LoggingSet("name", self.verbose)
189
190        self.ignore_names = ignore_names or []
191        self.ignore_decorators = ignore_decorators or []
192
193        self.filename = Path()
194        self.code = []
195        self.found_dead_code_or_error = False
196
197    def scan(self, code, filename=""):
198        filename = Path(filename)
199        self.code = code.splitlines()
200        self.noqa_lines = noqa.parse_noqa(self.code)
201        self.filename = filename
202
203        def handle_syntax_error(e):
204            text = f' at "{e.text.strip()}"' if e.text else ""
205            print(
206                f"{utils.format_path(filename)}:{e.lineno}: {e.msg}{text}",
207                file=sys.stderr,
208            )
209            self.found_dead_code_or_error = True
210
211        try:
212            node = (
213                ast.parse(
214                    code, filename=str(self.filename), type_comments=True
215                )
216                if sys.version_info >= (3, 8)  # type_comments requires 3.8+
217                else ast.parse(code, filename=str(self.filename))
218            )
219        except SyntaxError as err:
220            handle_syntax_error(err)
221        except ValueError as err:
222            # ValueError is raised if source contains null bytes.
223            print(
224                f'{utils.format_path(filename)}: invalid source code "{err}"',
225                file=sys.stderr,
226            )
227            self.found_dead_code_or_error = True
228        else:
229            # When parsing type comments, visiting can throw SyntaxError.
230            try:
231                self.visit(node)
232            except SyntaxError as err:
233                handle_syntax_error(err)
234
235    def scavenge(self, paths, exclude=None):
236        def prepare_pattern(pattern):
237            if not any(char in pattern for char in "*?["):
238                pattern = f"*{pattern}*"
239            return pattern
240
241        exclude = [prepare_pattern(pattern) for pattern in (exclude or [])]
242
243        def exclude_path(path):
244            return _match(path, exclude, case=False)
245
246        paths = [Path(path) for path in paths]
247
248        for module in utils.get_modules(paths):
249            if exclude_path(module):
250                self._log("Excluded:", module)
251                continue
252
253            self._log("Scanning:", module)
254            try:
255                module_string = utils.read_file(module)
256            except utils.VultureInputException as err:  # noqa: F841
257                print(
258                    f"Error: Could not read file {module} - {err}\n"
259                    f"Try to change the encoding to UTF-8.",
260                    file=sys.stderr,
261                )
262                self.found_dead_code_or_error = True
263            else:
264                self.scan(module_string, filename=module)
265
266        unique_imports = {item.name for item in self.defined_imports}
267        for import_name in unique_imports:
268            path = Path("whitelists") / (import_name + "_whitelist.py")
269            if exclude_path(path):
270                self._log("Excluded whitelist:", path)
271            else:
272                try:
273                    module_data = pkgutil.get_data("vulture", str(path))
274                    self._log("Included whitelist:", path)
275                except OSError:
276                    # Most imported modules don't have a whitelist.
277                    continue
278                module_string = module_data.decode("utf-8")
279                self.scan(module_string, filename=path)
280
281    def get_unused_code(self, min_confidence=0, sort_by_size=False):
282        """
283        Return ordered list of unused Item objects.
284        """
285        if not 0 <= min_confidence <= 100:
286            raise ValueError("min_confidence must be between 0 and 100.")
287
288        def by_name(item):
289            return (str(item.filename).lower(), item.first_lineno)
290
291        def by_size(item):
292            return (item.size,) + by_name(item)
293
294        unused_code = (
295            self.unused_attrs
296            + self.unused_classes
297            + self.unused_funcs
298            + self.unused_imports
299            + self.unused_methods
300            + self.unused_props
301            + self.unused_vars
302            + self.unreachable_code
303        )
304
305        confidently_unused = [
306            obj for obj in unused_code if obj.confidence >= min_confidence
307        ]
308
309        return sorted(
310            confidently_unused, key=by_size if sort_by_size else by_name
311        )
312
313    def report(
314        self, min_confidence=0, sort_by_size=False, make_whitelist=False
315    ):
316        """
317        Print ordered list of Item objects to stdout.
318        """
319        for item in self.get_unused_code(
320            min_confidence=min_confidence, sort_by_size=sort_by_size
321        ):
322            print(
323                item.get_whitelist_string()
324                if make_whitelist
325                else item.get_report(add_size=sort_by_size)
326            )
327            self.found_dead_code_or_error = True
328        return self.found_dead_code_or_error
329
330    @property
331    def unused_classes(self):
332        return _get_unused_items(self.defined_classes, self.used_names)
333
334    @property
335    def unused_funcs(self):
336        return _get_unused_items(self.defined_funcs, self.used_names)
337
338    @property
339    def unused_imports(self):
340        return _get_unused_items(self.defined_imports, self.used_names)
341
342    @property
343    def unused_methods(self):
344        return _get_unused_items(self.defined_methods, self.used_names)
345
346    @property
347    def unused_props(self):
348        return _get_unused_items(self.defined_props, self.used_names)
349
350    @property
351    def unused_vars(self):
352        return _get_unused_items(self.defined_vars, self.used_names)
353
354    @property
355    def unused_attrs(self):
356        return _get_unused_items(self.defined_attrs, self.used_names)
357
358    def _log(self, *args):
359        if self.verbose:
360            print(*args)
361
362    def _add_aliases(self, node):
363        """
364        We delegate to this method instead of using visit_alias() to have
365        access to line numbers and to filter imports from __future__.
366        """
367        assert isinstance(node, (ast.Import, ast.ImportFrom))
368        for name_and_alias in node.names:
369            # Store only top-level module name ("os.path" -> "os").
370            # We can't easily detect when "os.path" is used.
371            name = name_and_alias.name.partition(".")[0]
372            alias = name_and_alias.asname
373            self._define(
374                self.defined_imports,
375                alias or name,
376                node,
377                confidence=90,
378                ignore=_ignore_import,
379            )
380            if alias is not None:
381                self.used_names.add(name_and_alias.name)
382
383    def _handle_conditional_node(self, node, name):
384        if utils.condition_is_always_false(node.test):
385            self._define(
386                self.unreachable_code,
387                name,
388                node,
389                last_node=node.body
390                if isinstance(node, ast.IfExp)
391                else node.body[-1],
392                message=f"unsatisfiable '{name}' condition",
393                confidence=100,
394            )
395        elif utils.condition_is_always_true(node.test):
396            else_body = node.orelse
397            if name == "ternary":
398                self._define(
399                    self.unreachable_code,
400                    name,
401                    else_body,
402                    message="unreachable 'else' expression",
403                    confidence=100,
404                )
405            elif else_body:
406                self._define(
407                    self.unreachable_code,
408                    "else",
409                    else_body[0],
410                    last_node=else_body[-1],
411                    message="unreachable 'else' block",
412                    confidence=100,
413                )
414            elif name == "if":
415                # Redundant if-condition without else block.
416                self._define(
417                    self.unreachable_code,
418                    name,
419                    node,
420                    message="redundant if-condition",
421                    confidence=100,
422                )
423
424    def _define(
425        self,
426        collection,
427        name,
428        first_node,
429        last_node=None,
430        message="",
431        confidence=DEFAULT_CONFIDENCE,
432        ignore=None,
433    ):
434        def ignored(lineno):
435            return (
436                (ignore and ignore(self.filename, name))
437                or _match(name, self.ignore_names)
438                or noqa.ignore_line(self.noqa_lines, lineno, ERROR_CODES[typ])
439            )
440
441        last_node = last_node or first_node
442        typ = collection.typ
443        first_lineno = lines.get_first_line_number(first_node)
444
445        if ignored(first_lineno):
446            self._log(f'Ignoring {typ} "{name}"')
447        else:
448            last_lineno = lines.get_last_line_number(last_node)
449            collection.append(
450                Item(
451                    name,
452                    typ,
453                    self.filename,
454                    first_lineno,
455                    last_lineno,
456                    message=message,
457                    confidence=confidence,
458                )
459            )
460
461    def _define_variable(self, name, node, confidence=DEFAULT_CONFIDENCE):
462        self._define(
463            self.defined_vars,
464            name,
465            node,
466            confidence=confidence,
467            ignore=_ignore_variable,
468        )
469
470    def visit_arg(self, node):
471        """Function argument"""
472        self._define_variable(node.arg, node, confidence=100)
473
474    def visit_AsyncFunctionDef(self, node):
475        return self.visit_FunctionDef(node)
476
477    def visit_Attribute(self, node):
478        if isinstance(node.ctx, ast.Store):
479            self._define(self.defined_attrs, node.attr, node)
480        elif isinstance(node.ctx, ast.Load):
481            self.used_names.add(node.attr)
482
483    def visit_BinOp(self, node):
484        """
485        Parse variable names in old format strings:
486
487        "%(my_var)s" % locals()
488        """
489        if (
490            isinstance(node.left, ast.Str)
491            and isinstance(node.op, ast.Mod)
492            and self._is_locals_call(node.right)
493        ):
494            self.used_names |= set(re.findall(r"%\((\w+)\)", node.left.s))
495
496    def visit_Call(self, node):
497        # Count getattr/hasattr(x, "some_attr", ...) as usage of some_attr.
498        if isinstance(node.func, ast.Name) and (
499            (node.func.id == "getattr" and 2 <= len(node.args) <= 3)
500            or (node.func.id == "hasattr" and len(node.args) == 2)
501        ):
502            attr_name_arg = node.args[1]
503            if isinstance(attr_name_arg, ast.Str):
504                self.used_names.add(attr_name_arg.s)
505
506        # Parse variable names in new format strings:
507        # "{my_var}".format(**locals())
508        if (
509            isinstance(node.func, ast.Attribute)
510            and isinstance(node.func.value, ast.Str)
511            and node.func.attr == "format"
512            and any(
513                kw.arg is None and self._is_locals_call(kw.value)
514                for kw in node.keywords
515            )
516        ):
517            self._handle_new_format_string(node.func.value.s)
518
519    def _handle_new_format_string(self, s):
520        def is_identifier(name):
521            return bool(re.match(r"[a-zA-Z_][a-zA-Z0-9_]*", name))
522
523        parser = string.Formatter()
524        try:
525            names = [name for _, name, _, _ in parser.parse(s) if name]
526        except ValueError:
527            # Invalid format string.
528            names = []
529
530        for field_name in names:
531            # Remove brackets and their contents: "a[0][b].c[d].e" -> "a.c.e",
532            # then split the resulting string: "a.b.c" -> ["a", "b", "c"]
533            vars = re.sub(r"\[\w*\]", "", field_name).split(".")
534            for var in vars:
535                if is_identifier(var):
536                    self.used_names.add(var)
537
538    @staticmethod
539    def _is_locals_call(node):
540        """Return True if the node is `locals()`."""
541        return (
542            isinstance(node, ast.Call)
543            and isinstance(node.func, ast.Name)
544            and node.func.id == "locals"
545            and not node.args
546            and not node.keywords
547        )
548
549    def visit_ClassDef(self, node):
550        for decorator in node.decorator_list:
551            if _match(
552                utils.get_decorator_name(decorator), self.ignore_decorators
553            ):
554                self._log(
555                    f'Ignoring class "{node.name}" (decorator whitelisted)'
556                )
557                break
558        else:
559            self._define(
560                self.defined_classes, node.name, node, ignore=_ignore_class
561            )
562
563    def visit_FunctionDef(self, node):
564        decorator_names = [
565            utils.get_decorator_name(decorator)
566            for decorator in node.decorator_list
567        ]
568
569        first_arg = node.args.args[0].arg if node.args.args else None
570
571        if "@property" in decorator_names:
572            typ = "property"
573        elif (
574            "@staticmethod" in decorator_names
575            or "@classmethod" in decorator_names
576            or first_arg == "self"
577        ):
578            typ = "method"
579        else:
580            typ = "function"
581
582        if any(
583            _match(name, self.ignore_decorators) for name in decorator_names
584        ):
585            self._log(f'Ignoring {typ} "{node.name}" (decorator whitelisted)')
586        elif typ == "property":
587            self._define(self.defined_props, node.name, node)
588        elif typ == "method":
589            self._define(
590                self.defined_methods, node.name, node, ignore=_ignore_method
591            )
592        else:
593            self._define(
594                self.defined_funcs, node.name, node, ignore=_ignore_function
595            )
596
597    def visit_If(self, node):
598        self._handle_conditional_node(node, "if")
599
600    def visit_IfExp(self, node):
601        self._handle_conditional_node(node, "ternary")
602
603    def visit_Import(self, node):
604        self._add_aliases(node)
605
606    def visit_ImportFrom(self, node):
607        if node.module != "__future__":
608            self._add_aliases(node)
609
610    def visit_Name(self, node):
611        if (
612            isinstance(node.ctx, ast.Load)
613            and node.id not in IGNORED_VARIABLE_NAMES
614        ):
615            self.used_names.add(node.id)
616        elif isinstance(node.ctx, (ast.Param, ast.Store)):
617            self._define_variable(node.id, node)
618
619    def visit_While(self, node):
620        self._handle_conditional_node(node, "while")
621
622    def visit(self, node):
623        method = "visit_" + node.__class__.__name__
624        visitor = getattr(self, method, None)
625        if self.verbose:
626            lineno = getattr(node, "lineno", 1)
627            line = self.code[lineno - 1] if self.code else ""
628            self._log(lineno, ast.dump(node), line)
629        if visitor:
630            visitor(node)
631
632        # There isn't a clean subset of node types that might have type
633        # comments, so just check all of them.
634        type_comment = getattr(node, "type_comment", None)
635        if type_comment is not None:
636            mode = (
637                "func_type"
638                if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef))
639                else "eval"
640            )
641            self.visit(
642                ast.parse(type_comment, filename="<type_comment>", mode=mode)
643            )
644
645        return self.generic_visit(node)
646
647    def _handle_ast_list(self, ast_list):
648        """
649        Find unreachable nodes in the given sequence of ast nodes.
650        """
651        for index, node in enumerate(ast_list):
652            if isinstance(
653                node, (ast.Break, ast.Continue, ast.Raise, ast.Return)
654            ):
655                try:
656                    first_unreachable_node = ast_list[index + 1]
657                except IndexError:
658                    continue
659                class_name = node.__class__.__name__.lower()
660                self._define(
661                    self.unreachable_code,
662                    class_name,
663                    first_unreachable_node,
664                    last_node=ast_list[-1],
665                    message=f"unreachable code after '{class_name}'",
666                    confidence=100,
667                )
668                return
669
670    def generic_visit(self, node):
671        """Called if no explicit visitor function exists for a node."""
672        for _, value in ast.iter_fields(node):
673            if isinstance(value, list):
674                self._handle_ast_list(value)
675                for item in value:
676                    if isinstance(item, ast.AST):
677                        self.visit(item)
678            elif isinstance(value, ast.AST):
679                self.visit(value)
680
681
682def main():
683    config = make_config()
684    vulture = Vulture(
685        verbose=config["verbose"],
686        ignore_names=config["ignore_names"],
687        ignore_decorators=config["ignore_decorators"],
688    )
689    vulture.scavenge(config["paths"], exclude=config["exclude"])
690    sys.exit(
691        vulture.report(
692            min_confidence=config["min_confidence"],
693            sort_by_size=config["sort_by_size"],
694            make_whitelist=config["make_whitelist"],
695        )
696    )
697