1# Licensed under the Apache License: http://www.apache.org/licenses/LICENSE-2.0 2# For details: https://github.com/nedbat/coveragepy/blob/master/NOTICE.txt 3 4"""Determining whether files are being measured/reported or not.""" 5 6# For finding the stdlib 7import atexit 8import inspect 9import itertools 10import os 11import platform 12import re 13import sys 14import traceback 15 16from coverage import env 17from coverage.backward import code_object 18from coverage.disposition import FileDisposition, disposition_init 19from coverage.files import TreeMatcher, FnmatchMatcher, ModuleMatcher 20from coverage.files import prep_patterns, find_python_files, canonical_filename 21from coverage.misc import CoverageException 22from coverage.python import source_for_file, source_for_morf 23 24 25# Pypy has some unusual stuff in the "stdlib". Consider those locations 26# when deciding where the stdlib is. These modules are not used for anything, 27# they are modules importable from the pypy lib directories, so that we can 28# find those directories. 29_structseq = _pypy_irc_topic = None 30if env.PYPY: 31 try: 32 import _structseq 33 except ImportError: 34 pass 35 36 try: 37 import _pypy_irc_topic 38 except ImportError: 39 pass 40 41 42def canonical_path(morf, directory=False): 43 """Return the canonical path of the module or file `morf`. 44 45 If the module is a package, then return its directory. If it is a 46 module, then return its file, unless `directory` is True, in which 47 case return its enclosing directory. 48 49 """ 50 morf_path = canonical_filename(source_for_morf(morf)) 51 if morf_path.endswith("__init__.py") or directory: 52 morf_path = os.path.split(morf_path)[0] 53 return morf_path 54 55 56def name_for_module(filename, frame): 57 """Get the name of the module for a filename and frame. 58 59 For configurability's sake, we allow __main__ modules to be matched by 60 their importable name. 61 62 If loaded via runpy (aka -m), we can usually recover the "original" 63 full dotted module name, otherwise, we resort to interpreting the 64 file name to get the module's name. In the case that the module name 65 can't be determined, None is returned. 66 67 """ 68 module_globals = frame.f_globals if frame is not None else {} 69 if module_globals is None: # pragma: only ironpython 70 # IronPython doesn't provide globals: https://github.com/IronLanguages/main/issues/1296 71 module_globals = {} 72 73 dunder_name = module_globals.get('__name__', None) 74 75 if isinstance(dunder_name, str) and dunder_name != '__main__': 76 # This is the usual case: an imported module. 77 return dunder_name 78 79 loader = module_globals.get('__loader__', None) 80 for attrname in ('fullname', 'name'): # attribute renamed in py3.2 81 if hasattr(loader, attrname): 82 fullname = getattr(loader, attrname) 83 else: 84 continue 85 86 if isinstance(fullname, str) and fullname != '__main__': 87 # Module loaded via: runpy -m 88 return fullname 89 90 # Script as first argument to Python command line. 91 inspectedname = inspect.getmodulename(filename) 92 if inspectedname is not None: 93 return inspectedname 94 else: 95 return dunder_name 96 97 98def module_is_namespace(mod): 99 """Is the module object `mod` a PEP420 namespace module?""" 100 return hasattr(mod, '__path__') and getattr(mod, '__file__', None) is None 101 102 103def module_has_file(mod): 104 """Does the module object `mod` have an existing __file__ ?""" 105 mod__file__ = getattr(mod, '__file__', None) 106 if mod__file__ is None: 107 return False 108 return os.path.exists(mod__file__) 109 110 111class InOrOut(object): 112 """Machinery for determining what files to measure.""" 113 114 def __init__(self, warn, debug): 115 self.warn = warn 116 self.debug = debug 117 118 # The matchers for should_trace. 119 self.source_match = None 120 self.source_pkgs_match = None 121 self.pylib_paths = self.cover_paths = None 122 self.pylib_match = self.cover_match = None 123 self.include_match = self.omit_match = None 124 self.plugins = [] 125 self.disp_class = FileDisposition 126 127 # The source argument can be directories or package names. 128 self.source = [] 129 self.source_pkgs = [] 130 self.source_pkgs_unmatched = [] 131 self.omit = self.include = None 132 133 def configure(self, config): 134 """Apply the configuration to get ready for decision-time.""" 135 self.source_pkgs.extend(config.source_pkgs) 136 for src in config.source or []: 137 if os.path.isdir(src): 138 self.source.append(canonical_filename(src)) 139 else: 140 self.source_pkgs.append(src) 141 self.source_pkgs_unmatched = self.source_pkgs[:] 142 143 self.omit = prep_patterns(config.run_omit) 144 self.include = prep_patterns(config.run_include) 145 146 # The directories for files considered "installed with the interpreter". 147 self.pylib_paths = set() 148 if not config.cover_pylib: 149 # Look at where some standard modules are located. That's the 150 # indication for "installed with the interpreter". In some 151 # environments (virtualenv, for example), these modules may be 152 # spread across a few locations. Look at all the candidate modules 153 # we've imported, and take all the different ones. 154 for m in (atexit, inspect, os, platform, _pypy_irc_topic, re, _structseq, traceback): 155 if m is not None and hasattr(m, "__file__"): 156 self.pylib_paths.add(canonical_path(m, directory=True)) 157 158 if _structseq and not hasattr(_structseq, '__file__'): 159 # PyPy 2.4 has no __file__ in the builtin modules, but the code 160 # objects still have the file names. So dig into one to find 161 # the path to exclude. The "filename" might be synthetic, 162 # don't be fooled by those. 163 structseq_file = code_object(_structseq.structseq_new).co_filename 164 if not structseq_file.startswith("<"): 165 self.pylib_paths.add(canonical_path(structseq_file)) 166 167 # To avoid tracing the coverage.py code itself, we skip anything 168 # located where we are. 169 self.cover_paths = [canonical_path(__file__, directory=True)] 170 if env.TESTING: 171 # Don't include our own test code. 172 self.cover_paths.append(os.path.join(self.cover_paths[0], "tests")) 173 174 # When testing, we use PyContracts, which should be considered 175 # part of coverage.py, and it uses six. Exclude those directories 176 # just as we exclude ourselves. 177 import contracts 178 import six 179 for mod in [contracts, six]: 180 self.cover_paths.append(canonical_path(mod)) 181 182 def debug(msg): 183 if self.debug: 184 self.debug.write(msg) 185 186 # Create the matchers we need for should_trace 187 if self.source or self.source_pkgs: 188 against = [] 189 if self.source: 190 self.source_match = TreeMatcher(self.source) 191 against.append("trees {!r}".format(self.source_match)) 192 if self.source_pkgs: 193 self.source_pkgs_match = ModuleMatcher(self.source_pkgs) 194 against.append("modules {!r}".format(self.source_pkgs_match)) 195 debug("Source matching against " + " and ".join(against)) 196 else: 197 if self.cover_paths: 198 self.cover_match = TreeMatcher(self.cover_paths) 199 debug("Coverage code matching: {!r}".format(self.cover_match)) 200 if self.pylib_paths: 201 self.pylib_match = TreeMatcher(self.pylib_paths) 202 debug("Python stdlib matching: {!r}".format(self.pylib_match)) 203 if self.include: 204 self.include_match = FnmatchMatcher(self.include) 205 debug("Include matching: {!r}".format(self.include_match)) 206 if self.omit: 207 self.omit_match = FnmatchMatcher(self.omit) 208 debug("Omit matching: {!r}".format(self.omit_match)) 209 210 def should_trace(self, filename, frame=None): 211 """Decide whether to trace execution in `filename`, with a reason. 212 213 This function is called from the trace function. As each new file name 214 is encountered, this function determines whether it is traced or not. 215 216 Returns a FileDisposition object. 217 218 """ 219 original_filename = filename 220 disp = disposition_init(self.disp_class, filename) 221 222 def nope(disp, reason): 223 """Simple helper to make it easy to return NO.""" 224 disp.trace = False 225 disp.reason = reason 226 return disp 227 228 if frame is not None: 229 # Compiled Python files have two file names: frame.f_code.co_filename is 230 # the file name at the time the .pyc was compiled. The second name is 231 # __file__, which is where the .pyc was actually loaded from. Since 232 # .pyc files can be moved after compilation (for example, by being 233 # installed), we look for __file__ in the frame and prefer it to the 234 # co_filename value. 235 dunder_file = frame.f_globals and frame.f_globals.get('__file__') 236 if dunder_file: 237 filename = source_for_file(dunder_file) 238 if original_filename and not original_filename.startswith('<'): 239 orig = os.path.basename(original_filename) 240 if orig != os.path.basename(filename): 241 # Files shouldn't be renamed when moved. This happens when 242 # exec'ing code. If it seems like something is wrong with 243 # the frame's file name, then just use the original. 244 filename = original_filename 245 246 if not filename: 247 # Empty string is pretty useless. 248 return nope(disp, "empty string isn't a file name") 249 250 if filename.startswith('memory:'): 251 return nope(disp, "memory isn't traceable") 252 253 if filename.startswith('<'): 254 # Lots of non-file execution is represented with artificial 255 # file names like "<string>", "<doctest readme.txt[0]>", or 256 # "<exec_function>". Don't ever trace these executions, since we 257 # can't do anything with the data later anyway. 258 return nope(disp, "not a real file name") 259 260 # pyexpat does a dumb thing, calling the trace function explicitly from 261 # C code with a C file name. 262 if re.search(r"[/\\]Modules[/\\]pyexpat.c", filename): 263 return nope(disp, "pyexpat lies about itself") 264 265 # Jython reports the .class file to the tracer, use the source file. 266 if filename.endswith("$py.class"): 267 filename = filename[:-9] + ".py" 268 269 canonical = canonical_filename(filename) 270 disp.canonical_filename = canonical 271 272 # Try the plugins, see if they have an opinion about the file. 273 plugin = None 274 for plugin in self.plugins.file_tracers: 275 if not plugin._coverage_enabled: 276 continue 277 278 try: 279 file_tracer = plugin.file_tracer(canonical) 280 if file_tracer is not None: 281 file_tracer._coverage_plugin = plugin 282 disp.trace = True 283 disp.file_tracer = file_tracer 284 if file_tracer.has_dynamic_source_filename(): 285 disp.has_dynamic_filename = True 286 else: 287 disp.source_filename = canonical_filename( 288 file_tracer.source_filename() 289 ) 290 break 291 except Exception: 292 self.warn( 293 "Disabling plug-in %r due to an exception:" % (plugin._coverage_plugin_name) 294 ) 295 traceback.print_exc() 296 plugin._coverage_enabled = False 297 continue 298 else: 299 # No plugin wanted it: it's Python. 300 disp.trace = True 301 disp.source_filename = canonical 302 303 if not disp.has_dynamic_filename: 304 if not disp.source_filename: 305 raise CoverageException( 306 "Plugin %r didn't set source_filename for %r" % 307 (plugin, disp.original_filename) 308 ) 309 reason = self.check_include_omit_etc(disp.source_filename, frame) 310 if reason: 311 nope(disp, reason) 312 313 return disp 314 315 def check_include_omit_etc(self, filename, frame): 316 """Check a file name against the include, omit, etc, rules. 317 318 Returns a string or None. String means, don't trace, and is the reason 319 why. None means no reason found to not trace. 320 321 """ 322 modulename = name_for_module(filename, frame) 323 324 # If the user specified source or include, then that's authoritative 325 # about the outer bound of what to measure and we don't have to apply 326 # any canned exclusions. If they didn't, then we have to exclude the 327 # stdlib and coverage.py directories. 328 if self.source_match or self.source_pkgs_match: 329 extra = "" 330 ok = False 331 if self.source_pkgs_match: 332 if self.source_pkgs_match.match(modulename): 333 ok = True 334 if modulename in self.source_pkgs_unmatched: 335 self.source_pkgs_unmatched.remove(modulename) 336 else: 337 extra = "module {!r} ".format(modulename) 338 if not ok and self.source_match: 339 if self.source_match.match(filename): 340 ok = True 341 if not ok: 342 return extra + "falls outside the --source spec" 343 elif self.include_match: 344 if not self.include_match.match(filename): 345 return "falls outside the --include trees" 346 else: 347 # If we aren't supposed to trace installed code, then check if this 348 # is near the Python standard library and skip it if so. 349 if self.pylib_match and self.pylib_match.match(filename): 350 return "is in the stdlib" 351 352 # We exclude the coverage.py code itself, since a little of it 353 # will be measured otherwise. 354 if self.cover_match and self.cover_match.match(filename): 355 return "is part of coverage.py" 356 357 # Check the file against the omit pattern. 358 if self.omit_match and self.omit_match.match(filename): 359 return "is inside an --omit pattern" 360 361 # No point tracing a file we can't later write to SQLite. 362 try: 363 filename.encode("utf8") 364 except UnicodeEncodeError: 365 return "non-encodable filename" 366 367 # No reason found to skip this file. 368 return None 369 370 def warn_conflicting_settings(self): 371 """Warn if there are settings that conflict.""" 372 if self.include: 373 if self.source or self.source_pkgs: 374 self.warn("--include is ignored because --source is set", slug="include-ignored") 375 376 def warn_already_imported_files(self): 377 """Warn if files have already been imported that we will be measuring.""" 378 if self.include or self.source or self.source_pkgs: 379 warned = set() 380 for mod in list(sys.modules.values()): 381 filename = getattr(mod, "__file__", None) 382 if filename is None: 383 continue 384 if filename in warned: 385 continue 386 387 disp = self.should_trace(filename) 388 if disp.trace: 389 msg = "Already imported a file that will be measured: {}".format(filename) 390 self.warn(msg, slug="already-imported") 391 warned.add(filename) 392 393 def warn_unimported_source(self): 394 """Warn about source packages that were of interest, but never traced.""" 395 for pkg in self.source_pkgs_unmatched: 396 self._warn_about_unmeasured_code(pkg) 397 398 def _warn_about_unmeasured_code(self, pkg): 399 """Warn about a package or module that we never traced. 400 401 `pkg` is a string, the name of the package or module. 402 403 """ 404 mod = sys.modules.get(pkg) 405 if mod is None: 406 self.warn("Module %s was never imported." % pkg, slug="module-not-imported") 407 return 408 409 if module_is_namespace(mod): 410 # A namespace package. It's OK for this not to have been traced, 411 # since there is no code directly in it. 412 return 413 414 if not module_has_file(mod): 415 self.warn("Module %s has no Python source." % pkg, slug="module-not-python") 416 return 417 418 # The module was in sys.modules, and seems like a module with code, but 419 # we never measured it. I guess that means it was imported before 420 # coverage even started. 421 self.warn( 422 "Module %s was previously imported, but not measured" % pkg, 423 slug="module-not-measured", 424 ) 425 426 def find_possibly_unexecuted_files(self): 427 """Find files in the areas of interest that might be untraced. 428 429 Yields pairs: file path, and responsible plug-in name. 430 """ 431 for pkg in self.source_pkgs: 432 if (not pkg in sys.modules or 433 not module_has_file(sys.modules[pkg])): 434 continue 435 pkg_file = source_for_file(sys.modules[pkg].__file__) 436 for ret in self._find_executable_files(canonical_path(pkg_file)): 437 yield ret 438 439 for src in self.source: 440 for ret in self._find_executable_files(src): 441 yield ret 442 443 def _find_plugin_files(self, src_dir): 444 """Get executable files from the plugins.""" 445 for plugin in self.plugins.file_tracers: 446 for x_file in plugin.find_executable_files(src_dir): 447 yield x_file, plugin._coverage_plugin_name 448 449 def _find_executable_files(self, src_dir): 450 """Find executable files in `src_dir`. 451 452 Search for files in `src_dir` that can be executed because they 453 are probably importable. Don't include ones that have been omitted 454 by the configuration. 455 456 Yield the file path, and the plugin name that handles the file. 457 458 """ 459 py_files = ((py_file, None) for py_file in find_python_files(src_dir)) 460 plugin_files = self._find_plugin_files(src_dir) 461 462 for file_path, plugin_name in itertools.chain(py_files, plugin_files): 463 file_path = canonical_filename(file_path) 464 if self.omit_match and self.omit_match.match(file_path): 465 # Turns out this file was omitted, so don't pull it back 466 # in as unexecuted. 467 continue 468 yield file_path, plugin_name 469 470 def sys_info(self): 471 """Our information for Coverage.sys_info. 472 473 Returns a list of (key, value) pairs. 474 """ 475 info = [ 476 ('cover_paths', self.cover_paths), 477 ('pylib_paths', self.pylib_paths), 478 ] 479 480 matcher_names = [ 481 'source_match', 'source_pkgs_match', 482 'include_match', 'omit_match', 483 'cover_match', 'pylib_match', 484 ] 485 486 for matcher_name in matcher_names: 487 matcher = getattr(self, matcher_name) 488 if matcher: 489 matcher_info = matcher.info() 490 else: 491 matcher_info = '-none-' 492 info.append((matcher_name, matcher_info)) 493 494 return info 495