1# Licensed under the Apache License: http://www.apache.org/licenses/LICENSE-2.0 2# For details: https://github.com/nedbat/coveragepy/blob/master/NOTICE.txt 3 4"""Raw data collector for coverage.py.""" 5 6import os 7import sys 8 9from coverage import env 10from coverage.backward import litems, range # pylint: disable=redefined-builtin 11from coverage.debug import short_stack 12from coverage.disposition import FileDisposition 13from coverage.misc import CoverageException, isolate_module 14from coverage.pytracer import PyTracer 15 16os = isolate_module(os) 17 18 19try: 20 # Use the C extension code when we can, for speed. 21 from coverage.tracer import CTracer, CFileDisposition 22except ImportError: 23 # Couldn't import the C extension, maybe it isn't built. 24 if os.getenv('COVERAGE_TEST_TRACER') == 'c': 25 # During testing, we use the COVERAGE_TEST_TRACER environment variable 26 # to indicate that we've fiddled with the environment to test this 27 # fallback code. If we thought we had a C tracer, but couldn't import 28 # it, then exit quickly and clearly instead of dribbling confusing 29 # errors. I'm using sys.exit here instead of an exception because an 30 # exception here causes all sorts of other noise in unittest. 31 sys.stderr.write("*** COVERAGE_TEST_TRACER is 'c' but can't import CTracer!\n") 32 sys.exit(1) 33 CTracer = None 34 35 36class Collector(object): 37 """Collects trace data. 38 39 Creates a Tracer object for each thread, since they track stack 40 information. Each Tracer points to the same shared data, contributing 41 traced data points. 42 43 When the Collector is started, it creates a Tracer for the current thread, 44 and installs a function to create Tracers for each new thread started. 45 When the Collector is stopped, all active Tracers are stopped. 46 47 Threads started while the Collector is stopped will never have Tracers 48 associated with them. 49 50 """ 51 52 # The stack of active Collectors. Collectors are added here when started, 53 # and popped when stopped. Collectors on the stack are paused when not 54 # the top, and resumed when they become the top again. 55 _collectors = [] 56 57 # The concurrency settings we support here. 58 SUPPORTED_CONCURRENCIES = set(["greenlet", "eventlet", "gevent", "thread"]) 59 60 def __init__( 61 self, should_trace, check_include, should_start_context, file_mapper, 62 timid, branch, warn, concurrency, 63 ): 64 """Create a collector. 65 66 `should_trace` is a function, taking a file name and a frame, and 67 returning a `coverage.FileDisposition object`. 68 69 `check_include` is a function taking a file name and a frame. It returns 70 a boolean: True if the file should be traced, False if not. 71 72 `should_start_context` is a function taking a frame, and returning a 73 string. If the frame should be the start of a new context, the string 74 is the new context. If the frame should not be the start of a new 75 context, return None. 76 77 `file_mapper` is a function taking a filename, and returning a Unicode 78 filename. The result is the name that will be recorded in the data 79 file. 80 81 If `timid` is true, then a slower simpler trace function will be 82 used. This is important for some environments where manipulation of 83 tracing functions make the faster more sophisticated trace function not 84 operate properly. 85 86 If `branch` is true, then branches will be measured. This involves 87 collecting data on which statements followed each other (arcs). Use 88 `get_arc_data` to get the arc data. 89 90 `warn` is a warning function, taking a single string message argument 91 and an optional slug argument which will be a string or None, to be 92 used if a warning needs to be issued. 93 94 `concurrency` is a list of strings indicating the concurrency libraries 95 in use. Valid values are "greenlet", "eventlet", "gevent", or "thread" 96 (the default). Of these four values, only one can be supplied. Other 97 values are ignored. 98 99 """ 100 self.should_trace = should_trace 101 self.check_include = check_include 102 self.should_start_context = should_start_context 103 self.file_mapper = file_mapper 104 self.warn = warn 105 self.branch = branch 106 self.threading = None 107 self.covdata = None 108 109 self.static_context = None 110 111 self.origin = short_stack() 112 113 self.concur_id_func = None 114 self.mapped_file_cache = {} 115 116 # We can handle a few concurrency options here, but only one at a time. 117 these_concurrencies = self.SUPPORTED_CONCURRENCIES.intersection(concurrency) 118 if len(these_concurrencies) > 1: 119 raise CoverageException("Conflicting concurrency settings: %s" % concurrency) 120 self.concurrency = these_concurrencies.pop() if these_concurrencies else '' 121 122 try: 123 if self.concurrency == "greenlet": 124 import greenlet 125 self.concur_id_func = greenlet.getcurrent 126 elif self.concurrency == "eventlet": 127 import eventlet.greenthread # pylint: disable=import-error,useless-suppression 128 self.concur_id_func = eventlet.greenthread.getcurrent 129 elif self.concurrency == "gevent": 130 import gevent # pylint: disable=import-error,useless-suppression 131 self.concur_id_func = gevent.getcurrent 132 elif self.concurrency == "thread" or not self.concurrency: 133 # It's important to import threading only if we need it. If 134 # it's imported early, and the program being measured uses 135 # gevent, then gevent's monkey-patching won't work properly. 136 import threading 137 self.threading = threading 138 else: 139 raise CoverageException("Don't understand concurrency=%s" % concurrency) 140 except ImportError: 141 raise CoverageException( 142 "Couldn't trace with concurrency=%s, the module isn't installed." % ( 143 self.concurrency, 144 ) 145 ) 146 147 self.reset() 148 149 if timid: 150 # Being timid: use the simple Python trace function. 151 self._trace_class = PyTracer 152 else: 153 # Being fast: use the C Tracer if it is available, else the Python 154 # trace function. 155 self._trace_class = CTracer or PyTracer 156 157 if self._trace_class is CTracer: 158 self.file_disposition_class = CFileDisposition 159 self.supports_plugins = True 160 else: 161 self.file_disposition_class = FileDisposition 162 self.supports_plugins = False 163 164 def __repr__(self): 165 return "<Collector at 0x%x: %s>" % (id(self), self.tracer_name()) 166 167 def use_data(self, covdata, context): 168 """Use `covdata` for recording data.""" 169 self.covdata = covdata 170 self.static_context = context 171 self.covdata.set_context(self.static_context) 172 173 def tracer_name(self): 174 """Return the class name of the tracer we're using.""" 175 return self._trace_class.__name__ 176 177 def _clear_data(self): 178 """Clear out existing data, but stay ready for more collection.""" 179 # We used to used self.data.clear(), but that would remove filename 180 # keys and data values that were still in use higher up the stack 181 # when we are called as part of switch_context. 182 for d in self.data.values(): 183 d.clear() 184 185 for tracer in self.tracers: 186 tracer.reset_activity() 187 188 def reset(self): 189 """Clear collected data, and prepare to collect more.""" 190 # A dictionary mapping file names to dicts with line number keys (if not 191 # branch coverage), or mapping file names to dicts with line number 192 # pairs as keys (if branch coverage). 193 self.data = {} 194 195 # A dictionary mapping file names to file tracer plugin names that will 196 # handle them. 197 self.file_tracers = {} 198 199 # The .should_trace_cache attribute is a cache from file names to 200 # coverage.FileDisposition objects, or None. When a file is first 201 # considered for tracing, a FileDisposition is obtained from 202 # Coverage.should_trace. Its .trace attribute indicates whether the 203 # file should be traced or not. If it should be, a plugin with dynamic 204 # file names can decide not to trace it based on the dynamic file name 205 # being excluded by the inclusion rules, in which case the 206 # FileDisposition will be replaced by None in the cache. 207 if env.PYPY: 208 import __pypy__ # pylint: disable=import-error 209 # Alex Gaynor said: 210 # should_trace_cache is a strictly growing key: once a key is in 211 # it, it never changes. Further, the keys used to access it are 212 # generally constant, given sufficient context. That is to say, at 213 # any given point _trace() is called, pypy is able to know the key. 214 # This is because the key is determined by the physical source code 215 # line, and that's invariant with the call site. 216 # 217 # This property of a dict with immutable keys, combined with 218 # call-site-constant keys is a match for PyPy's module dict, 219 # which is optimized for such workloads. 220 # 221 # This gives a 20% benefit on the workload described at 222 # https://bitbucket.org/pypy/pypy/issue/1871/10x-slower-than-cpython-under-coverage 223 self.should_trace_cache = __pypy__.newdict("module") 224 else: 225 self.should_trace_cache = {} 226 227 # Our active Tracers. 228 self.tracers = [] 229 230 self._clear_data() 231 232 def _start_tracer(self): 233 """Start a new Tracer object, and store it in self.tracers.""" 234 tracer = self._trace_class() 235 tracer.data = self.data 236 tracer.trace_arcs = self.branch 237 tracer.should_trace = self.should_trace 238 tracer.should_trace_cache = self.should_trace_cache 239 tracer.warn = self.warn 240 241 if hasattr(tracer, 'concur_id_func'): 242 tracer.concur_id_func = self.concur_id_func 243 elif self.concur_id_func: 244 raise CoverageException( 245 "Can't support concurrency=%s with %s, only threads are supported" % ( 246 self.concurrency, self.tracer_name(), 247 ) 248 ) 249 250 if hasattr(tracer, 'file_tracers'): 251 tracer.file_tracers = self.file_tracers 252 if hasattr(tracer, 'threading'): 253 tracer.threading = self.threading 254 if hasattr(tracer, 'check_include'): 255 tracer.check_include = self.check_include 256 if hasattr(tracer, 'should_start_context'): 257 tracer.should_start_context = self.should_start_context 258 tracer.switch_context = self.switch_context 259 260 fn = tracer.start() 261 self.tracers.append(tracer) 262 263 return fn 264 265 # The trace function has to be set individually on each thread before 266 # execution begins. Ironically, the only support the threading module has 267 # for running code before the thread main is the tracing function. So we 268 # install this as a trace function, and the first time it's called, it does 269 # the real trace installation. 270 271 def _installation_trace(self, frame, event, arg): 272 """Called on new threads, installs the real tracer.""" 273 # Remove ourselves as the trace function. 274 sys.settrace(None) 275 # Install the real tracer. 276 fn = self._start_tracer() 277 # Invoke the real trace function with the current event, to be sure 278 # not to lose an event. 279 if fn: 280 fn = fn(frame, event, arg) 281 # Return the new trace function to continue tracing in this scope. 282 return fn 283 284 def start(self): 285 """Start collecting trace information.""" 286 if self._collectors: 287 self._collectors[-1].pause() 288 289 self.tracers = [] 290 291 # Check to see whether we had a fullcoverage tracer installed. If so, 292 # get the stack frames it stashed away for us. 293 traces0 = [] 294 fn0 = sys.gettrace() 295 if fn0: 296 tracer0 = getattr(fn0, '__self__', None) 297 if tracer0: 298 traces0 = getattr(tracer0, 'traces', []) 299 300 try: 301 # Install the tracer on this thread. 302 fn = self._start_tracer() 303 except: 304 if self._collectors: 305 self._collectors[-1].resume() 306 raise 307 308 # If _start_tracer succeeded, then we add ourselves to the global 309 # stack of collectors. 310 self._collectors.append(self) 311 312 # Replay all the events from fullcoverage into the new trace function. 313 for args in traces0: 314 (frame, event, arg), lineno = args 315 try: 316 fn(frame, event, arg, lineno=lineno) 317 except TypeError: 318 raise Exception("fullcoverage must be run with the C trace function.") 319 320 # Install our installation tracer in threading, to jump-start other 321 # threads. 322 if self.threading: 323 self.threading.settrace(self._installation_trace) 324 325 def stop(self): 326 """Stop collecting trace information.""" 327 assert self._collectors 328 if self._collectors[-1] is not self: 329 print("self._collectors:") 330 for c in self._collectors: 331 print(" {!r}\n{}".format(c, c.origin)) 332 assert self._collectors[-1] is self, ( 333 "Expected current collector to be %r, but it's %r" % (self, self._collectors[-1]) 334 ) 335 336 self.pause() 337 338 # Remove this Collector from the stack, and resume the one underneath 339 # (if any). 340 self._collectors.pop() 341 if self._collectors: 342 self._collectors[-1].resume() 343 344 def pause(self): 345 """Pause tracing, but be prepared to `resume`.""" 346 for tracer in self.tracers: 347 tracer.stop() 348 stats = tracer.get_stats() 349 if stats: 350 print("\nCoverage.py tracer stats:") 351 for k in sorted(stats.keys()): 352 print("%20s: %s" % (k, stats[k])) 353 if self.threading: 354 self.threading.settrace(None) 355 356 def resume(self): 357 """Resume tracing after a `pause`.""" 358 for tracer in self.tracers: 359 tracer.start() 360 if self.threading: 361 self.threading.settrace(self._installation_trace) 362 else: 363 self._start_tracer() 364 365 def _activity(self): 366 """Has any activity been traced? 367 368 Returns a boolean, True if any trace function was invoked. 369 370 """ 371 return any(tracer.activity() for tracer in self.tracers) 372 373 def switch_context(self, new_context): 374 """Switch to a new dynamic context.""" 375 self.flush_data() 376 if self.static_context: 377 context = self.static_context 378 if new_context: 379 context += "|" + new_context 380 else: 381 context = new_context 382 self.covdata.set_context(context) 383 384 def cached_mapped_file(self, filename): 385 """A locally cached version of file names mapped through file_mapper.""" 386 key = (type(filename), filename) 387 try: 388 return self.mapped_file_cache[key] 389 except KeyError: 390 return self.mapped_file_cache.setdefault(key, self.file_mapper(filename)) 391 392 def mapped_file_dict(self, d): 393 """Return a dict like d, but with keys modified by file_mapper.""" 394 # The call to litems() ensures that the GIL protects the dictionary 395 # iterator against concurrent modifications by tracers running 396 # in other threads. We try three times in case of concurrent 397 # access, hoping to get a clean copy. 398 runtime_err = None 399 for _ in range(3): 400 try: 401 items = litems(d) 402 except RuntimeError as ex: 403 runtime_err = ex 404 else: 405 break 406 else: 407 raise runtime_err 408 409 return dict((self.cached_mapped_file(k), v) for k, v in items if v) 410 411 def flush_data(self): 412 """Save the collected data to our associated `CoverageData`. 413 414 Data may have also been saved along the way. This forces the 415 last of the data to be saved. 416 417 Returns True if there was data to save, False if not. 418 """ 419 if not self._activity(): 420 return False 421 422 if self.branch: 423 self.covdata.add_arcs(self.mapped_file_dict(self.data)) 424 else: 425 self.covdata.add_lines(self.mapped_file_dict(self.data)) 426 self.covdata.add_file_tracers(self.mapped_file_dict(self.file_tracers)) 427 428 self._clear_data() 429 return True 430