1# Licensed under the Apache License: http://www.apache.org/licenses/LICENSE-2.0
2# For details: https://github.com/nedbat/coveragepy/blob/master/NOTICE.txt
3
4"""Determining whether files are being measured/reported or not."""
5
6# For finding the stdlib
7import atexit
8import inspect
9import itertools
10import os
11import platform
12import re
13import sys
14import traceback
15
16from coverage import env
17from coverage.backward import code_object
18from coverage.disposition import FileDisposition, disposition_init
19from coverage.files import TreeMatcher, FnmatchMatcher, ModuleMatcher
20from coverage.files import prep_patterns, find_python_files, canonical_filename
21from coverage.misc import CoverageException
22from coverage.python import source_for_file, source_for_morf
23
24
25# Pypy has some unusual stuff in the "stdlib".  Consider those locations
26# when deciding where the stdlib is.  These modules are not used for anything,
27# they are modules importable from the pypy lib directories, so that we can
28# find those directories.
29_structseq = _pypy_irc_topic = None
30if env.PYPY:
31    try:
32        import _structseq
33    except ImportError:
34        pass
35
36    try:
37        import _pypy_irc_topic
38    except ImportError:
39        pass
40
41
42def canonical_path(morf, directory=False):
43    """Return the canonical path of the module or file `morf`.
44
45    If the module is a package, then return its directory. If it is a
46    module, then return its file, unless `directory` is True, in which
47    case return its enclosing directory.
48
49    """
50    morf_path = canonical_filename(source_for_morf(morf))
51    if morf_path.endswith("__init__.py") or directory:
52        morf_path = os.path.split(morf_path)[0]
53    return morf_path
54
55
56def name_for_module(filename, frame):
57    """Get the name of the module for a filename and frame.
58
59    For configurability's sake, we allow __main__ modules to be matched by
60    their importable name.
61
62    If loaded via runpy (aka -m), we can usually recover the "original"
63    full dotted module name, otherwise, we resort to interpreting the
64    file name to get the module's name.  In the case that the module name
65    can't be determined, None is returned.
66
67    """
68    module_globals = frame.f_globals if frame is not None else {}
69    if module_globals is None:          # pragma: only ironpython
70        # IronPython doesn't provide globals: https://github.com/IronLanguages/main/issues/1296
71        module_globals = {}
72
73    dunder_name = module_globals.get('__name__', None)
74
75    if isinstance(dunder_name, str) and dunder_name != '__main__':
76        # This is the usual case: an imported module.
77        return dunder_name
78
79    loader = module_globals.get('__loader__', None)
80    for attrname in ('fullname', 'name'):   # attribute renamed in py3.2
81        if hasattr(loader, attrname):
82            fullname = getattr(loader, attrname)
83        else:
84            continue
85
86        if isinstance(fullname, str) and fullname != '__main__':
87            # Module loaded via: runpy -m
88            return fullname
89
90    # Script as first argument to Python command line.
91    inspectedname = inspect.getmodulename(filename)
92    if inspectedname is not None:
93        return inspectedname
94    else:
95        return dunder_name
96
97
98def module_is_namespace(mod):
99    """Is the module object `mod` a PEP420 namespace module?"""
100    return hasattr(mod, '__path__') and getattr(mod, '__file__', None) is None
101
102
103def module_has_file(mod):
104    """Does the module object `mod` have an existing __file__ ?"""
105    mod__file__ = getattr(mod, '__file__', None)
106    if mod__file__ is None:
107        return False
108    return os.path.exists(mod__file__)
109
110
111class InOrOut(object):
112    """Machinery for determining what files to measure."""
113
114    def __init__(self, warn, debug):
115        self.warn = warn
116        self.debug = debug
117
118        # The matchers for should_trace.
119        self.source_match = None
120        self.source_pkgs_match = None
121        self.pylib_paths = self.cover_paths = None
122        self.pylib_match = self.cover_match = None
123        self.include_match = self.omit_match = None
124        self.plugins = []
125        self.disp_class = FileDisposition
126
127        # The source argument can be directories or package names.
128        self.source = []
129        self.source_pkgs = []
130        self.source_pkgs_unmatched = []
131        self.omit = self.include = None
132
133    def configure(self, config):
134        """Apply the configuration to get ready for decision-time."""
135        self.source_pkgs.extend(config.source_pkgs)
136        for src in config.source or []:
137            if os.path.isdir(src):
138                self.source.append(canonical_filename(src))
139            else:
140                self.source_pkgs.append(src)
141        self.source_pkgs_unmatched = self.source_pkgs[:]
142
143        self.omit = prep_patterns(config.run_omit)
144        self.include = prep_patterns(config.run_include)
145
146        # The directories for files considered "installed with the interpreter".
147        self.pylib_paths = set()
148        if not config.cover_pylib:
149            # Look at where some standard modules are located. That's the
150            # indication for "installed with the interpreter". In some
151            # environments (virtualenv, for example), these modules may be
152            # spread across a few locations. Look at all the candidate modules
153            # we've imported, and take all the different ones.
154            for m in (atexit, inspect, os, platform, _pypy_irc_topic, re, _structseq, traceback):
155                if m is not None and hasattr(m, "__file__"):
156                    self.pylib_paths.add(canonical_path(m, directory=True))
157
158            if _structseq and not hasattr(_structseq, '__file__'):
159                # PyPy 2.4 has no __file__ in the builtin modules, but the code
160                # objects still have the file names.  So dig into one to find
161                # the path to exclude.  The "filename" might be synthetic,
162                # don't be fooled by those.
163                structseq_file = code_object(_structseq.structseq_new).co_filename
164                if not structseq_file.startswith("<"):
165                    self.pylib_paths.add(canonical_path(structseq_file))
166
167        # To avoid tracing the coverage.py code itself, we skip anything
168        # located where we are.
169        self.cover_paths = [canonical_path(__file__, directory=True)]
170        if env.TESTING:
171            # Don't include our own test code.
172            self.cover_paths.append(os.path.join(self.cover_paths[0], "tests"))
173
174            # When testing, we use PyContracts, which should be considered
175            # part of coverage.py, and it uses six. Exclude those directories
176            # just as we exclude ourselves.
177            import contracts
178            import six
179            for mod in [contracts, six]:
180                self.cover_paths.append(canonical_path(mod))
181
182        def debug(msg):
183            if self.debug:
184                self.debug.write(msg)
185
186        # Create the matchers we need for should_trace
187        if self.source or self.source_pkgs:
188            against = []
189            if self.source:
190                self.source_match = TreeMatcher(self.source)
191                against.append("trees {!r}".format(self.source_match))
192            if self.source_pkgs:
193                self.source_pkgs_match = ModuleMatcher(self.source_pkgs)
194                against.append("modules {!r}".format(self.source_pkgs_match))
195            debug("Source matching against " + " and ".join(against))
196        else:
197            if self.cover_paths:
198                self.cover_match = TreeMatcher(self.cover_paths)
199                debug("Coverage code matching: {!r}".format(self.cover_match))
200            if self.pylib_paths:
201                self.pylib_match = TreeMatcher(self.pylib_paths)
202                debug("Python stdlib matching: {!r}".format(self.pylib_match))
203        if self.include:
204            self.include_match = FnmatchMatcher(self.include)
205            debug("Include matching: {!r}".format(self.include_match))
206        if self.omit:
207            self.omit_match = FnmatchMatcher(self.omit)
208            debug("Omit matching: {!r}".format(self.omit_match))
209
210    def should_trace(self, filename, frame=None):
211        """Decide whether to trace execution in `filename`, with a reason.
212
213        This function is called from the trace function.  As each new file name
214        is encountered, this function determines whether it is traced or not.
215
216        Returns a FileDisposition object.
217
218        """
219        original_filename = filename
220        disp = disposition_init(self.disp_class, filename)
221
222        def nope(disp, reason):
223            """Simple helper to make it easy to return NO."""
224            disp.trace = False
225            disp.reason = reason
226            return disp
227
228        if frame is not None:
229            # Compiled Python files have two file names: frame.f_code.co_filename is
230            # the file name at the time the .pyc was compiled.  The second name is
231            # __file__, which is where the .pyc was actually loaded from.  Since
232            # .pyc files can be moved after compilation (for example, by being
233            # installed), we look for __file__ in the frame and prefer it to the
234            # co_filename value.
235            dunder_file = frame.f_globals and frame.f_globals.get('__file__')
236            if dunder_file:
237                filename = source_for_file(dunder_file)
238                if original_filename and not original_filename.startswith('<'):
239                    orig = os.path.basename(original_filename)
240                    if orig != os.path.basename(filename):
241                        # Files shouldn't be renamed when moved. This happens when
242                        # exec'ing code.  If it seems like something is wrong with
243                        # the frame's file name, then just use the original.
244                        filename = original_filename
245
246        if not filename:
247            # Empty string is pretty useless.
248            return nope(disp, "empty string isn't a file name")
249
250        if filename.startswith('memory:'):
251            return nope(disp, "memory isn't traceable")
252
253        if filename.startswith('<'):
254            # Lots of non-file execution is represented with artificial
255            # file names like "<string>", "<doctest readme.txt[0]>", or
256            # "<exec_function>".  Don't ever trace these executions, since we
257            # can't do anything with the data later anyway.
258            return nope(disp, "not a real file name")
259
260        # pyexpat does a dumb thing, calling the trace function explicitly from
261        # C code with a C file name.
262        if re.search(r"[/\\]Modules[/\\]pyexpat.c", filename):
263            return nope(disp, "pyexpat lies about itself")
264
265        # Jython reports the .class file to the tracer, use the source file.
266        if filename.endswith("$py.class"):
267            filename = filename[:-9] + ".py"
268
269        canonical = canonical_filename(filename)
270        disp.canonical_filename = canonical
271
272        # Try the plugins, see if they have an opinion about the file.
273        plugin = None
274        for plugin in self.plugins.file_tracers:
275            if not plugin._coverage_enabled:
276                continue
277
278            try:
279                file_tracer = plugin.file_tracer(canonical)
280                if file_tracer is not None:
281                    file_tracer._coverage_plugin = plugin
282                    disp.trace = True
283                    disp.file_tracer = file_tracer
284                    if file_tracer.has_dynamic_source_filename():
285                        disp.has_dynamic_filename = True
286                    else:
287                        disp.source_filename = canonical_filename(
288                            file_tracer.source_filename()
289                        )
290                    break
291            except Exception:
292                self.warn(
293                    "Disabling plug-in %r due to an exception:" % (plugin._coverage_plugin_name)
294                )
295                traceback.print_exc()
296                plugin._coverage_enabled = False
297                continue
298        else:
299            # No plugin wanted it: it's Python.
300            disp.trace = True
301            disp.source_filename = canonical
302
303        if not disp.has_dynamic_filename:
304            if not disp.source_filename:
305                raise CoverageException(
306                    "Plugin %r didn't set source_filename for %r" %
307                    (plugin, disp.original_filename)
308                )
309            reason = self.check_include_omit_etc(disp.source_filename, frame)
310            if reason:
311                nope(disp, reason)
312
313        return disp
314
315    def check_include_omit_etc(self, filename, frame):
316        """Check a file name against the include, omit, etc, rules.
317
318        Returns a string or None.  String means, don't trace, and is the reason
319        why.  None means no reason found to not trace.
320
321        """
322        modulename = name_for_module(filename, frame)
323
324        # If the user specified source or include, then that's authoritative
325        # about the outer bound of what to measure and we don't have to apply
326        # any canned exclusions. If they didn't, then we have to exclude the
327        # stdlib and coverage.py directories.
328        if self.source_match or self.source_pkgs_match:
329            extra = ""
330            ok = False
331            if self.source_pkgs_match:
332                if self.source_pkgs_match.match(modulename):
333                    ok = True
334                    if modulename in self.source_pkgs_unmatched:
335                        self.source_pkgs_unmatched.remove(modulename)
336                else:
337                    extra = "module {!r} ".format(modulename)
338            if not ok and self.source_match:
339                if self.source_match.match(filename):
340                    ok = True
341            if not ok:
342                return extra + "falls outside the --source spec"
343        elif self.include_match:
344            if not self.include_match.match(filename):
345                return "falls outside the --include trees"
346        else:
347            # If we aren't supposed to trace installed code, then check if this
348            # is near the Python standard library and skip it if so.
349            if self.pylib_match and self.pylib_match.match(filename):
350                return "is in the stdlib"
351
352            # We exclude the coverage.py code itself, since a little of it
353            # will be measured otherwise.
354            if self.cover_match and self.cover_match.match(filename):
355                return "is part of coverage.py"
356
357        # Check the file against the omit pattern.
358        if self.omit_match and self.omit_match.match(filename):
359            return "is inside an --omit pattern"
360
361        # No point tracing a file we can't later write to SQLite.
362        try:
363            filename.encode("utf8")
364        except UnicodeEncodeError:
365            return "non-encodable filename"
366
367        # No reason found to skip this file.
368        return None
369
370    def warn_conflicting_settings(self):
371        """Warn if there are settings that conflict."""
372        if self.include:
373            if self.source or self.source_pkgs:
374                self.warn("--include is ignored because --source is set", slug="include-ignored")
375
376    def warn_already_imported_files(self):
377        """Warn if files have already been imported that we will be measuring."""
378        if self.include or self.source or self.source_pkgs:
379            warned = set()
380            for mod in list(sys.modules.values()):
381                filename = getattr(mod, "__file__", None)
382                if filename is None:
383                    continue
384                if filename in warned:
385                    continue
386
387                disp = self.should_trace(filename)
388                if disp.trace:
389                    msg = "Already imported a file that will be measured: {}".format(filename)
390                    self.warn(msg, slug="already-imported")
391                    warned.add(filename)
392
393    def warn_unimported_source(self):
394        """Warn about source packages that were of interest, but never traced."""
395        for pkg in self.source_pkgs_unmatched:
396            self._warn_about_unmeasured_code(pkg)
397
398    def _warn_about_unmeasured_code(self, pkg):
399        """Warn about a package or module that we never traced.
400
401        `pkg` is a string, the name of the package or module.
402
403        """
404        mod = sys.modules.get(pkg)
405        if mod is None:
406            self.warn("Module %s was never imported." % pkg, slug="module-not-imported")
407            return
408
409        if module_is_namespace(mod):
410            # A namespace package. It's OK for this not to have been traced,
411            # since there is no code directly in it.
412            return
413
414        if not module_has_file(mod):
415            self.warn("Module %s has no Python source." % pkg, slug="module-not-python")
416            return
417
418        # The module was in sys.modules, and seems like a module with code, but
419        # we never measured it. I guess that means it was imported before
420        # coverage even started.
421        self.warn(
422            "Module %s was previously imported, but not measured" % pkg,
423            slug="module-not-measured",
424        )
425
426    def find_possibly_unexecuted_files(self):
427        """Find files in the areas of interest that might be untraced.
428
429        Yields pairs: file path, and responsible plug-in name.
430        """
431        for pkg in self.source_pkgs:
432            if (not pkg in sys.modules or
433                not module_has_file(sys.modules[pkg])):
434                continue
435            pkg_file = source_for_file(sys.modules[pkg].__file__)
436            for ret in self._find_executable_files(canonical_path(pkg_file)):
437                yield ret
438
439        for src in self.source:
440            for ret in self._find_executable_files(src):
441                yield ret
442
443    def _find_plugin_files(self, src_dir):
444        """Get executable files from the plugins."""
445        for plugin in self.plugins.file_tracers:
446            for x_file in plugin.find_executable_files(src_dir):
447                yield x_file, plugin._coverage_plugin_name
448
449    def _find_executable_files(self, src_dir):
450        """Find executable files in `src_dir`.
451
452        Search for files in `src_dir` that can be executed because they
453        are probably importable. Don't include ones that have been omitted
454        by the configuration.
455
456        Yield the file path, and the plugin name that handles the file.
457
458        """
459        py_files = ((py_file, None) for py_file in find_python_files(src_dir))
460        plugin_files = self._find_plugin_files(src_dir)
461
462        for file_path, plugin_name in itertools.chain(py_files, plugin_files):
463            file_path = canonical_filename(file_path)
464            if self.omit_match and self.omit_match.match(file_path):
465                # Turns out this file was omitted, so don't pull it back
466                # in as unexecuted.
467                continue
468            yield file_path, plugin_name
469
470    def sys_info(self):
471        """Our information for Coverage.sys_info.
472
473        Returns a list of (key, value) pairs.
474        """
475        info = [
476            ('cover_paths', self.cover_paths),
477            ('pylib_paths', self.pylib_paths),
478        ]
479
480        matcher_names = [
481            'source_match', 'source_pkgs_match',
482            'include_match', 'omit_match',
483            'cover_match', 'pylib_match',
484            ]
485
486        for matcher_name in matcher_names:
487            matcher = getattr(self, matcher_name)
488            if matcher:
489                matcher_info = matcher.info()
490            else:
491                matcher_info = '-none-'
492            info.append((matcher_name, matcher_info))
493
494        return info
495