1# Licensed under the Apache License: http://www.apache.org/licenses/LICENSE-2.0
2# For details: https://github.com/nedbat/coveragepy/blob/master/NOTICE.txt
3
4"""Raw data collector for coverage.py."""
5
6import os
7import sys
8
9from coverage import env
10from coverage.backward import litems, range     # pylint: disable=redefined-builtin
11from coverage.debug import short_stack
12from coverage.disposition import FileDisposition
13from coverage.misc import CoverageException, isolate_module
14from coverage.pytracer import PyTracer
15
16os = isolate_module(os)
17
18
19try:
20    # Use the C extension code when we can, for speed.
21    from coverage.tracer import CTracer, CFileDisposition
22except ImportError:
23    # Couldn't import the C extension, maybe it isn't built.
24    if os.getenv('COVERAGE_TEST_TRACER') == 'c':
25        # During testing, we use the COVERAGE_TEST_TRACER environment variable
26        # to indicate that we've fiddled with the environment to test this
27        # fallback code.  If we thought we had a C tracer, but couldn't import
28        # it, then exit quickly and clearly instead of dribbling confusing
29        # errors. I'm using sys.exit here instead of an exception because an
30        # exception here causes all sorts of other noise in unittest.
31        sys.stderr.write("*** COVERAGE_TEST_TRACER is 'c' but can't import CTracer!\n")
32        sys.exit(1)
33    CTracer = None
34
35
36class Collector(object):
37    """Collects trace data.
38
39    Creates a Tracer object for each thread, since they track stack
40    information.  Each Tracer points to the same shared data, contributing
41    traced data points.
42
43    When the Collector is started, it creates a Tracer for the current thread,
44    and installs a function to create Tracers for each new thread started.
45    When the Collector is stopped, all active Tracers are stopped.
46
47    Threads started while the Collector is stopped will never have Tracers
48    associated with them.
49
50    """
51
52    # The stack of active Collectors.  Collectors are added here when started,
53    # and popped when stopped.  Collectors on the stack are paused when not
54    # the top, and resumed when they become the top again.
55    _collectors = []
56
57    # The concurrency settings we support here.
58    SUPPORTED_CONCURRENCIES = set(["greenlet", "eventlet", "gevent", "thread"])
59
60    def __init__(
61        self, should_trace, check_include, should_start_context, file_mapper,
62        timid, branch, warn, concurrency,
63    ):
64        """Create a collector.
65
66        `should_trace` is a function, taking a file name and a frame, and
67        returning a `coverage.FileDisposition object`.
68
69        `check_include` is a function taking a file name and a frame. It returns
70        a boolean: True if the file should be traced, False if not.
71
72        `should_start_context` is a function taking a frame, and returning a
73        string. If the frame should be the start of a new context, the string
74        is the new context. If the frame should not be the start of a new
75        context, return None.
76
77        `file_mapper` is a function taking a filename, and returning a Unicode
78        filename.  The result is the name that will be recorded in the data
79        file.
80
81        If `timid` is true, then a slower simpler trace function will be
82        used.  This is important for some environments where manipulation of
83        tracing functions make the faster more sophisticated trace function not
84        operate properly.
85
86        If `branch` is true, then branches will be measured.  This involves
87        collecting data on which statements followed each other (arcs).  Use
88        `get_arc_data` to get the arc data.
89
90        `warn` is a warning function, taking a single string message argument
91        and an optional slug argument which will be a string or None, to be
92        used if a warning needs to be issued.
93
94        `concurrency` is a list of strings indicating the concurrency libraries
95        in use.  Valid values are "greenlet", "eventlet", "gevent", or "thread"
96        (the default).  Of these four values, only one can be supplied.  Other
97        values are ignored.
98
99        """
100        self.should_trace = should_trace
101        self.check_include = check_include
102        self.should_start_context = should_start_context
103        self.file_mapper = file_mapper
104        self.warn = warn
105        self.branch = branch
106        self.threading = None
107        self.covdata = None
108
109        self.static_context = None
110
111        self.origin = short_stack()
112
113        self.concur_id_func = None
114        self.mapped_file_cache = {}
115
116        # We can handle a few concurrency options here, but only one at a time.
117        these_concurrencies = self.SUPPORTED_CONCURRENCIES.intersection(concurrency)
118        if len(these_concurrencies) > 1:
119            raise CoverageException("Conflicting concurrency settings: %s" % concurrency)
120        self.concurrency = these_concurrencies.pop() if these_concurrencies else ''
121
122        try:
123            if self.concurrency == "greenlet":
124                import greenlet
125                self.concur_id_func = greenlet.getcurrent
126            elif self.concurrency == "eventlet":
127                import eventlet.greenthread     # pylint: disable=import-error,useless-suppression
128                self.concur_id_func = eventlet.greenthread.getcurrent
129            elif self.concurrency == "gevent":
130                import gevent                   # pylint: disable=import-error,useless-suppression
131                self.concur_id_func = gevent.getcurrent
132            elif self.concurrency == "thread" or not self.concurrency:
133                # It's important to import threading only if we need it.  If
134                # it's imported early, and the program being measured uses
135                # gevent, then gevent's monkey-patching won't work properly.
136                import threading
137                self.threading = threading
138            else:
139                raise CoverageException("Don't understand concurrency=%s" % concurrency)
140        except ImportError:
141            raise CoverageException(
142                "Couldn't trace with concurrency=%s, the module isn't installed." % (
143                    self.concurrency,
144                )
145            )
146
147        self.reset()
148
149        if timid:
150            # Being timid: use the simple Python trace function.
151            self._trace_class = PyTracer
152        else:
153            # Being fast: use the C Tracer if it is available, else the Python
154            # trace function.
155            self._trace_class = CTracer or PyTracer
156
157        if self._trace_class is CTracer:
158            self.file_disposition_class = CFileDisposition
159            self.supports_plugins = True
160        else:
161            self.file_disposition_class = FileDisposition
162            self.supports_plugins = False
163
164    def __repr__(self):
165        return "<Collector at 0x%x: %s>" % (id(self), self.tracer_name())
166
167    def use_data(self, covdata, context):
168        """Use `covdata` for recording data."""
169        self.covdata = covdata
170        self.static_context = context
171        self.covdata.set_context(self.static_context)
172
173    def tracer_name(self):
174        """Return the class name of the tracer we're using."""
175        return self._trace_class.__name__
176
177    def _clear_data(self):
178        """Clear out existing data, but stay ready for more collection."""
179        # We used to used self.data.clear(), but that would remove filename
180        # keys and data values that were still in use higher up the stack
181        # when we are called as part of switch_context.
182        for d in self.data.values():
183            d.clear()
184
185        for tracer in self.tracers:
186            tracer.reset_activity()
187
188    def reset(self):
189        """Clear collected data, and prepare to collect more."""
190        # A dictionary mapping file names to dicts with line number keys (if not
191        # branch coverage), or mapping file names to dicts with line number
192        # pairs as keys (if branch coverage).
193        self.data = {}
194
195        # A dictionary mapping file names to file tracer plugin names that will
196        # handle them.
197        self.file_tracers = {}
198
199        self.disabled_plugins = set()
200
201        # The .should_trace_cache attribute is a cache from file names to
202        # coverage.FileDisposition objects, or None.  When a file is first
203        # considered for tracing, a FileDisposition is obtained from
204        # Coverage.should_trace.  Its .trace attribute indicates whether the
205        # file should be traced or not.  If it should be, a plugin with dynamic
206        # file names can decide not to trace it based on the dynamic file name
207        # being excluded by the inclusion rules, in which case the
208        # FileDisposition will be replaced by None in the cache.
209        if env.PYPY:
210            import __pypy__                     # pylint: disable=import-error
211            # Alex Gaynor said:
212            # should_trace_cache is a strictly growing key: once a key is in
213            # it, it never changes.  Further, the keys used to access it are
214            # generally constant, given sufficient context. That is to say, at
215            # any given point _trace() is called, pypy is able to know the key.
216            # This is because the key is determined by the physical source code
217            # line, and that's invariant with the call site.
218            #
219            # This property of a dict with immutable keys, combined with
220            # call-site-constant keys is a match for PyPy's module dict,
221            # which is optimized for such workloads.
222            #
223            # This gives a 20% benefit on the workload described at
224            # https://bitbucket.org/pypy/pypy/issue/1871/10x-slower-than-cpython-under-coverage
225            self.should_trace_cache = __pypy__.newdict("module")
226        else:
227            self.should_trace_cache = {}
228
229        # Our active Tracers.
230        self.tracers = []
231
232        self._clear_data()
233
234    def _start_tracer(self):
235        """Start a new Tracer object, and store it in self.tracers."""
236        tracer = self._trace_class()
237        tracer.data = self.data
238        tracer.trace_arcs = self.branch
239        tracer.should_trace = self.should_trace
240        tracer.should_trace_cache = self.should_trace_cache
241        tracer.warn = self.warn
242
243        if hasattr(tracer, 'concur_id_func'):
244            tracer.concur_id_func = self.concur_id_func
245        elif self.concur_id_func:
246            raise CoverageException(
247                "Can't support concurrency=%s with %s, only threads are supported" % (
248                    self.concurrency, self.tracer_name(),
249                )
250            )
251
252        if hasattr(tracer, 'file_tracers'):
253            tracer.file_tracers = self.file_tracers
254        if hasattr(tracer, 'threading'):
255            tracer.threading = self.threading
256        if hasattr(tracer, 'check_include'):
257            tracer.check_include = self.check_include
258        if hasattr(tracer, 'should_start_context'):
259            tracer.should_start_context = self.should_start_context
260            tracer.switch_context = self.switch_context
261        if hasattr(tracer, 'disable_plugin'):
262            tracer.disable_plugin = self.disable_plugin
263
264        fn = tracer.start()
265        self.tracers.append(tracer)
266
267        return fn
268
269    # The trace function has to be set individually on each thread before
270    # execution begins.  Ironically, the only support the threading module has
271    # for running code before the thread main is the tracing function.  So we
272    # install this as a trace function, and the first time it's called, it does
273    # the real trace installation.
274
275    def _installation_trace(self, frame, event, arg):
276        """Called on new threads, installs the real tracer."""
277        # Remove ourselves as the trace function.
278        sys.settrace(None)
279        # Install the real tracer.
280        fn = self._start_tracer()
281        # Invoke the real trace function with the current event, to be sure
282        # not to lose an event.
283        if fn:
284            fn = fn(frame, event, arg)
285        # Return the new trace function to continue tracing in this scope.
286        return fn
287
288    def start(self):
289        """Start collecting trace information."""
290        if self._collectors:
291            self._collectors[-1].pause()
292
293        self.tracers = []
294
295        # Check to see whether we had a fullcoverage tracer installed. If so,
296        # get the stack frames it stashed away for us.
297        traces0 = []
298        fn0 = sys.gettrace()
299        if fn0:
300            tracer0 = getattr(fn0, '__self__', None)
301            if tracer0:
302                traces0 = getattr(tracer0, 'traces', [])
303
304        try:
305            # Install the tracer on this thread.
306            fn = self._start_tracer()
307        except:
308            if self._collectors:
309                self._collectors[-1].resume()
310            raise
311
312        # If _start_tracer succeeded, then we add ourselves to the global
313        # stack of collectors.
314        self._collectors.append(self)
315
316        # Replay all the events from fullcoverage into the new trace function.
317        for args in traces0:
318            (frame, event, arg), lineno = args
319            try:
320                fn(frame, event, arg, lineno=lineno)
321            except TypeError:
322                raise Exception("fullcoverage must be run with the C trace function.")
323
324        # Install our installation tracer in threading, to jump-start other
325        # threads.
326        if self.threading:
327            self.threading.settrace(self._installation_trace)
328
329    def stop(self):
330        """Stop collecting trace information."""
331        assert self._collectors
332        if self._collectors[-1] is not self:
333            print("self._collectors:")
334            for c in self._collectors:
335                print("  {!r}\n{}".format(c, c.origin))
336        assert self._collectors[-1] is self, (
337            "Expected current collector to be %r, but it's %r" % (self, self._collectors[-1])
338        )
339
340        self.pause()
341
342        # Remove this Collector from the stack, and resume the one underneath
343        # (if any).
344        self._collectors.pop()
345        if self._collectors:
346            self._collectors[-1].resume()
347
348    def pause(self):
349        """Pause tracing, but be prepared to `resume`."""
350        for tracer in self.tracers:
351            tracer.stop()
352            stats = tracer.get_stats()
353            if stats:
354                print("\nCoverage.py tracer stats:")
355                for k in sorted(stats.keys()):
356                    print("%20s: %s" % (k, stats[k]))
357        if self.threading:
358            self.threading.settrace(None)
359
360    def resume(self):
361        """Resume tracing after a `pause`."""
362        for tracer in self.tracers:
363            tracer.start()
364        if self.threading:
365            self.threading.settrace(self._installation_trace)
366        else:
367            self._start_tracer()
368
369    def _activity(self):
370        """Has any activity been traced?
371
372        Returns a boolean, True if any trace function was invoked.
373
374        """
375        return any(tracer.activity() for tracer in self.tracers)
376
377    def switch_context(self, new_context):
378        """Switch to a new dynamic context."""
379        self.flush_data()
380        if self.static_context:
381            context = self.static_context
382            if new_context:
383                context += "|" + new_context
384        else:
385            context = new_context
386        self.covdata.set_context(context)
387
388    def disable_plugin(self, disposition):
389        """Disable the plugin mentioned in `disposition`."""
390        file_tracer = disposition.file_tracer
391        plugin = file_tracer._coverage_plugin
392        plugin_name = plugin._coverage_plugin_name
393        self.warn("Disabling plug-in {!r} due to previous exception".format(plugin_name))
394        plugin._coverage_enabled = False
395        disposition.trace = False
396
397    def cached_mapped_file(self, filename):
398        """A locally cached version of file names mapped through file_mapper."""
399        key = (type(filename), filename)
400        try:
401            return self.mapped_file_cache[key]
402        except KeyError:
403            return self.mapped_file_cache.setdefault(key, self.file_mapper(filename))
404
405    def mapped_file_dict(self, d):
406        """Return a dict like d, but with keys modified by file_mapper."""
407        # The call to litems() ensures that the GIL protects the dictionary
408        # iterator against concurrent modifications by tracers running
409        # in other threads. We try three times in case of concurrent
410        # access, hoping to get a clean copy.
411        runtime_err = None
412        for _ in range(3):
413            try:
414                items = litems(d)
415            except RuntimeError as ex:
416                runtime_err = ex
417            else:
418                break
419        else:
420            raise runtime_err
421
422        return dict((self.cached_mapped_file(k), v) for k, v in items if v)
423
424    def plugin_was_disabled(self, plugin):
425        """Record that `plugin` was disabled during the run."""
426        self.disabled_plugins.add(plugin._coverage_plugin_name)
427
428    def flush_data(self):
429        """Save the collected data to our associated `CoverageData`.
430
431        Data may have also been saved along the way. This forces the
432        last of the data to be saved.
433
434        Returns True if there was data to save, False if not.
435        """
436        if not self._activity():
437            return False
438
439        if self.branch:
440            self.covdata.add_arcs(self.mapped_file_dict(self.data))
441        else:
442            self.covdata.add_lines(self.mapped_file_dict(self.data))
443
444        file_tracers = {
445            k: v for k, v in self.file_tracers.items()
446            if v not in self.disabled_plugins
447        }
448        self.covdata.add_file_tracers(self.mapped_file_dict(file_tracers))
449
450        self._clear_data()
451        return True
452