1# Licensed under the Apache License: http://www.apache.org/licenses/LICENSE-2.0 2# For details: https://github.com/nedbat/coveragepy/blob/master/NOTICE.txt 3 4"""Raw data collector for coverage.py.""" 5 6import os 7import sys 8 9from coverage import env 10from coverage.backward import litems, range # pylint: disable=redefined-builtin 11from coverage.debug import short_stack 12from coverage.disposition import FileDisposition 13from coverage.misc import CoverageException, isolate_module 14from coverage.pytracer import PyTracer 15 16os = isolate_module(os) 17 18 19try: 20 # Use the C extension code when we can, for speed. 21 from coverage.tracer import CTracer, CFileDisposition 22except ImportError: 23 # Couldn't import the C extension, maybe it isn't built. 24 if os.getenv('COVERAGE_TEST_TRACER') == 'c': 25 # During testing, we use the COVERAGE_TEST_TRACER environment variable 26 # to indicate that we've fiddled with the environment to test this 27 # fallback code. If we thought we had a C tracer, but couldn't import 28 # it, then exit quickly and clearly instead of dribbling confusing 29 # errors. I'm using sys.exit here instead of an exception because an 30 # exception here causes all sorts of other noise in unittest. 31 sys.stderr.write("*** COVERAGE_TEST_TRACER is 'c' but can't import CTracer!\n") 32 sys.exit(1) 33 CTracer = None 34 35 36class Collector(object): 37 """Collects trace data. 38 39 Creates a Tracer object for each thread, since they track stack 40 information. Each Tracer points to the same shared data, contributing 41 traced data points. 42 43 When the Collector is started, it creates a Tracer for the current thread, 44 and installs a function to create Tracers for each new thread started. 45 When the Collector is stopped, all active Tracers are stopped. 46 47 Threads started while the Collector is stopped will never have Tracers 48 associated with them. 49 50 """ 51 52 # The stack of active Collectors. Collectors are added here when started, 53 # and popped when stopped. Collectors on the stack are paused when not 54 # the top, and resumed when they become the top again. 55 _collectors = [] 56 57 # The concurrency settings we support here. 58 SUPPORTED_CONCURRENCIES = set(["greenlet", "eventlet", "gevent", "thread"]) 59 60 def __init__( 61 self, should_trace, check_include, should_start_context, file_mapper, 62 timid, branch, warn, concurrency, 63 ): 64 """Create a collector. 65 66 `should_trace` is a function, taking a file name and a frame, and 67 returning a `coverage.FileDisposition object`. 68 69 `check_include` is a function taking a file name and a frame. It returns 70 a boolean: True if the file should be traced, False if not. 71 72 `should_start_context` is a function taking a frame, and returning a 73 string. If the frame should be the start of a new context, the string 74 is the new context. If the frame should not be the start of a new 75 context, return None. 76 77 `file_mapper` is a function taking a filename, and returning a Unicode 78 filename. The result is the name that will be recorded in the data 79 file. 80 81 If `timid` is true, then a slower simpler trace function will be 82 used. This is important for some environments where manipulation of 83 tracing functions make the faster more sophisticated trace function not 84 operate properly. 85 86 If `branch` is true, then branches will be measured. This involves 87 collecting data on which statements followed each other (arcs). Use 88 `get_arc_data` to get the arc data. 89 90 `warn` is a warning function, taking a single string message argument 91 and an optional slug argument which will be a string or None, to be 92 used if a warning needs to be issued. 93 94 `concurrency` is a list of strings indicating the concurrency libraries 95 in use. Valid values are "greenlet", "eventlet", "gevent", or "thread" 96 (the default). Of these four values, only one can be supplied. Other 97 values are ignored. 98 99 """ 100 self.should_trace = should_trace 101 self.check_include = check_include 102 self.should_start_context = should_start_context 103 self.file_mapper = file_mapper 104 self.warn = warn 105 self.branch = branch 106 self.threading = None 107 self.covdata = None 108 109 self.static_context = None 110 111 self.origin = short_stack() 112 113 self.concur_id_func = None 114 self.mapped_file_cache = {} 115 116 # We can handle a few concurrency options here, but only one at a time. 117 these_concurrencies = self.SUPPORTED_CONCURRENCIES.intersection(concurrency) 118 if len(these_concurrencies) > 1: 119 raise CoverageException("Conflicting concurrency settings: %s" % concurrency) 120 self.concurrency = these_concurrencies.pop() if these_concurrencies else '' 121 122 try: 123 if self.concurrency == "greenlet": 124 import greenlet 125 self.concur_id_func = greenlet.getcurrent 126 elif self.concurrency == "eventlet": 127 import eventlet.greenthread # pylint: disable=import-error,useless-suppression 128 self.concur_id_func = eventlet.greenthread.getcurrent 129 elif self.concurrency == "gevent": 130 import gevent # pylint: disable=import-error,useless-suppression 131 self.concur_id_func = gevent.getcurrent 132 elif self.concurrency == "thread" or not self.concurrency: 133 # It's important to import threading only if we need it. If 134 # it's imported early, and the program being measured uses 135 # gevent, then gevent's monkey-patching won't work properly. 136 import threading 137 self.threading = threading 138 else: 139 raise CoverageException("Don't understand concurrency=%s" % concurrency) 140 except ImportError: 141 raise CoverageException( 142 "Couldn't trace with concurrency=%s, the module isn't installed." % ( 143 self.concurrency, 144 ) 145 ) 146 147 self.reset() 148 149 if timid: 150 # Being timid: use the simple Python trace function. 151 self._trace_class = PyTracer 152 else: 153 # Being fast: use the C Tracer if it is available, else the Python 154 # trace function. 155 self._trace_class = CTracer or PyTracer 156 157 if self._trace_class is CTracer: 158 self.file_disposition_class = CFileDisposition 159 self.supports_plugins = True 160 else: 161 self.file_disposition_class = FileDisposition 162 self.supports_plugins = False 163 164 def __repr__(self): 165 return "<Collector at 0x%x: %s>" % (id(self), self.tracer_name()) 166 167 def use_data(self, covdata, context): 168 """Use `covdata` for recording data.""" 169 self.covdata = covdata 170 self.static_context = context 171 self.covdata.set_context(self.static_context) 172 173 def tracer_name(self): 174 """Return the class name of the tracer we're using.""" 175 return self._trace_class.__name__ 176 177 def _clear_data(self): 178 """Clear out existing data, but stay ready for more collection.""" 179 # We used to used self.data.clear(), but that would remove filename 180 # keys and data values that were still in use higher up the stack 181 # when we are called as part of switch_context. 182 for d in self.data.values(): 183 d.clear() 184 185 for tracer in self.tracers: 186 tracer.reset_activity() 187 188 def reset(self): 189 """Clear collected data, and prepare to collect more.""" 190 # A dictionary mapping file names to dicts with line number keys (if not 191 # branch coverage), or mapping file names to dicts with line number 192 # pairs as keys (if branch coverage). 193 self.data = {} 194 195 # A dictionary mapping file names to file tracer plugin names that will 196 # handle them. 197 self.file_tracers = {} 198 199 self.disabled_plugins = set() 200 201 # The .should_trace_cache attribute is a cache from file names to 202 # coverage.FileDisposition objects, or None. When a file is first 203 # considered for tracing, a FileDisposition is obtained from 204 # Coverage.should_trace. Its .trace attribute indicates whether the 205 # file should be traced or not. If it should be, a plugin with dynamic 206 # file names can decide not to trace it based on the dynamic file name 207 # being excluded by the inclusion rules, in which case the 208 # FileDisposition will be replaced by None in the cache. 209 if env.PYPY: 210 import __pypy__ # pylint: disable=import-error 211 # Alex Gaynor said: 212 # should_trace_cache is a strictly growing key: once a key is in 213 # it, it never changes. Further, the keys used to access it are 214 # generally constant, given sufficient context. That is to say, at 215 # any given point _trace() is called, pypy is able to know the key. 216 # This is because the key is determined by the physical source code 217 # line, and that's invariant with the call site. 218 # 219 # This property of a dict with immutable keys, combined with 220 # call-site-constant keys is a match for PyPy's module dict, 221 # which is optimized for such workloads. 222 # 223 # This gives a 20% benefit on the workload described at 224 # https://bitbucket.org/pypy/pypy/issue/1871/10x-slower-than-cpython-under-coverage 225 self.should_trace_cache = __pypy__.newdict("module") 226 else: 227 self.should_trace_cache = {} 228 229 # Our active Tracers. 230 self.tracers = [] 231 232 self._clear_data() 233 234 def _start_tracer(self): 235 """Start a new Tracer object, and store it in self.tracers.""" 236 tracer = self._trace_class() 237 tracer.data = self.data 238 tracer.trace_arcs = self.branch 239 tracer.should_trace = self.should_trace 240 tracer.should_trace_cache = self.should_trace_cache 241 tracer.warn = self.warn 242 243 if hasattr(tracer, 'concur_id_func'): 244 tracer.concur_id_func = self.concur_id_func 245 elif self.concur_id_func: 246 raise CoverageException( 247 "Can't support concurrency=%s with %s, only threads are supported" % ( 248 self.concurrency, self.tracer_name(), 249 ) 250 ) 251 252 if hasattr(tracer, 'file_tracers'): 253 tracer.file_tracers = self.file_tracers 254 if hasattr(tracer, 'threading'): 255 tracer.threading = self.threading 256 if hasattr(tracer, 'check_include'): 257 tracer.check_include = self.check_include 258 if hasattr(tracer, 'should_start_context'): 259 tracer.should_start_context = self.should_start_context 260 tracer.switch_context = self.switch_context 261 if hasattr(tracer, 'disable_plugin'): 262 tracer.disable_plugin = self.disable_plugin 263 264 fn = tracer.start() 265 self.tracers.append(tracer) 266 267 return fn 268 269 # The trace function has to be set individually on each thread before 270 # execution begins. Ironically, the only support the threading module has 271 # for running code before the thread main is the tracing function. So we 272 # install this as a trace function, and the first time it's called, it does 273 # the real trace installation. 274 275 def _installation_trace(self, frame, event, arg): 276 """Called on new threads, installs the real tracer.""" 277 # Remove ourselves as the trace function. 278 sys.settrace(None) 279 # Install the real tracer. 280 fn = self._start_tracer() 281 # Invoke the real trace function with the current event, to be sure 282 # not to lose an event. 283 if fn: 284 fn = fn(frame, event, arg) 285 # Return the new trace function to continue tracing in this scope. 286 return fn 287 288 def start(self): 289 """Start collecting trace information.""" 290 if self._collectors: 291 self._collectors[-1].pause() 292 293 self.tracers = [] 294 295 # Check to see whether we had a fullcoverage tracer installed. If so, 296 # get the stack frames it stashed away for us. 297 traces0 = [] 298 fn0 = sys.gettrace() 299 if fn0: 300 tracer0 = getattr(fn0, '__self__', None) 301 if tracer0: 302 traces0 = getattr(tracer0, 'traces', []) 303 304 try: 305 # Install the tracer on this thread. 306 fn = self._start_tracer() 307 except: 308 if self._collectors: 309 self._collectors[-1].resume() 310 raise 311 312 # If _start_tracer succeeded, then we add ourselves to the global 313 # stack of collectors. 314 self._collectors.append(self) 315 316 # Replay all the events from fullcoverage into the new trace function. 317 for args in traces0: 318 (frame, event, arg), lineno = args 319 try: 320 fn(frame, event, arg, lineno=lineno) 321 except TypeError: 322 raise Exception("fullcoverage must be run with the C trace function.") 323 324 # Install our installation tracer in threading, to jump-start other 325 # threads. 326 if self.threading: 327 self.threading.settrace(self._installation_trace) 328 329 def stop(self): 330 """Stop collecting trace information.""" 331 assert self._collectors 332 if self._collectors[-1] is not self: 333 print("self._collectors:") 334 for c in self._collectors: 335 print(" {!r}\n{}".format(c, c.origin)) 336 assert self._collectors[-1] is self, ( 337 "Expected current collector to be %r, but it's %r" % (self, self._collectors[-1]) 338 ) 339 340 self.pause() 341 342 # Remove this Collector from the stack, and resume the one underneath 343 # (if any). 344 self._collectors.pop() 345 if self._collectors: 346 self._collectors[-1].resume() 347 348 def pause(self): 349 """Pause tracing, but be prepared to `resume`.""" 350 for tracer in self.tracers: 351 tracer.stop() 352 stats = tracer.get_stats() 353 if stats: 354 print("\nCoverage.py tracer stats:") 355 for k in sorted(stats.keys()): 356 print("%20s: %s" % (k, stats[k])) 357 if self.threading: 358 self.threading.settrace(None) 359 360 def resume(self): 361 """Resume tracing after a `pause`.""" 362 for tracer in self.tracers: 363 tracer.start() 364 if self.threading: 365 self.threading.settrace(self._installation_trace) 366 else: 367 self._start_tracer() 368 369 def _activity(self): 370 """Has any activity been traced? 371 372 Returns a boolean, True if any trace function was invoked. 373 374 """ 375 return any(tracer.activity() for tracer in self.tracers) 376 377 def switch_context(self, new_context): 378 """Switch to a new dynamic context.""" 379 self.flush_data() 380 if self.static_context: 381 context = self.static_context 382 if new_context: 383 context += "|" + new_context 384 else: 385 context = new_context 386 self.covdata.set_context(context) 387 388 def disable_plugin(self, disposition): 389 """Disable the plugin mentioned in `disposition`.""" 390 file_tracer = disposition.file_tracer 391 plugin = file_tracer._coverage_plugin 392 plugin_name = plugin._coverage_plugin_name 393 self.warn("Disabling plug-in {!r} due to previous exception".format(plugin_name)) 394 plugin._coverage_enabled = False 395 disposition.trace = False 396 397 def cached_mapped_file(self, filename): 398 """A locally cached version of file names mapped through file_mapper.""" 399 key = (type(filename), filename) 400 try: 401 return self.mapped_file_cache[key] 402 except KeyError: 403 return self.mapped_file_cache.setdefault(key, self.file_mapper(filename)) 404 405 def mapped_file_dict(self, d): 406 """Return a dict like d, but with keys modified by file_mapper.""" 407 # The call to litems() ensures that the GIL protects the dictionary 408 # iterator against concurrent modifications by tracers running 409 # in other threads. We try three times in case of concurrent 410 # access, hoping to get a clean copy. 411 runtime_err = None 412 for _ in range(3): 413 try: 414 items = litems(d) 415 except RuntimeError as ex: 416 runtime_err = ex 417 else: 418 break 419 else: 420 raise runtime_err 421 422 return dict((self.cached_mapped_file(k), v) for k, v in items if v) 423 424 def plugin_was_disabled(self, plugin): 425 """Record that `plugin` was disabled during the run.""" 426 self.disabled_plugins.add(plugin._coverage_plugin_name) 427 428 def flush_data(self): 429 """Save the collected data to our associated `CoverageData`. 430 431 Data may have also been saved along the way. This forces the 432 last of the data to be saved. 433 434 Returns True if there was data to save, False if not. 435 """ 436 if not self._activity(): 437 return False 438 439 if self.branch: 440 self.covdata.add_arcs(self.mapped_file_dict(self.data)) 441 else: 442 self.covdata.add_lines(self.mapped_file_dict(self.data)) 443 444 file_tracers = { 445 k: v for k, v in self.file_tracers.items() 446 if v not in self.disabled_plugins 447 } 448 self.covdata.add_file_tracers(self.mapped_file_dict(file_tracers)) 449 450 self._clear_data() 451 return True 452