1# Licensed under the Apache License: http://www.apache.org/licenses/LICENSE-2.0
2# For details: https://github.com/nedbat/coveragepy/blob/master/NOTICE.txt
3
4"""Raw data collector for coverage.py."""
5
6import os
7import sys
8
9from coverage import env
10from coverage.backward import litems, range     # pylint: disable=redefined-builtin
11from coverage.debug import short_stack
12from coverage.disposition import FileDisposition
13from coverage.misc import CoverageException, isolate_module
14from coverage.pytracer import PyTracer
15
16os = isolate_module(os)
17
18
19try:
20    # Use the C extension code when we can, for speed.
21    from coverage.tracer import CTracer, CFileDisposition
22except ImportError:
23    # Couldn't import the C extension, maybe it isn't built.
24    if os.getenv('COVERAGE_TEST_TRACER') == 'c':
25        # During testing, we use the COVERAGE_TEST_TRACER environment variable
26        # to indicate that we've fiddled with the environment to test this
27        # fallback code.  If we thought we had a C tracer, but couldn't import
28        # it, then exit quickly and clearly instead of dribbling confusing
29        # errors. I'm using sys.exit here instead of an exception because an
30        # exception here causes all sorts of other noise in unittest.
31        sys.stderr.write("*** COVERAGE_TEST_TRACER is 'c' but can't import CTracer!\n")
32        sys.exit(1)
33    CTracer = None
34
35
36class Collector(object):
37    """Collects trace data.
38
39    Creates a Tracer object for each thread, since they track stack
40    information.  Each Tracer points to the same shared data, contributing
41    traced data points.
42
43    When the Collector is started, it creates a Tracer for the current thread,
44    and installs a function to create Tracers for each new thread started.
45    When the Collector is stopped, all active Tracers are stopped.
46
47    Threads started while the Collector is stopped will never have Tracers
48    associated with them.
49
50    """
51
52    # The stack of active Collectors.  Collectors are added here when started,
53    # and popped when stopped.  Collectors on the stack are paused when not
54    # the top, and resumed when they become the top again.
55    _collectors = []
56
57    # The concurrency settings we support here.
58    SUPPORTED_CONCURRENCIES = set(["greenlet", "eventlet", "gevent", "thread"])
59
60    def __init__(
61        self, should_trace, check_include, should_start_context, file_mapper,
62        timid, branch, warn, concurrency,
63    ):
64        """Create a collector.
65
66        `should_trace` is a function, taking a file name and a frame, and
67        returning a `coverage.FileDisposition object`.
68
69        `check_include` is a function taking a file name and a frame. It returns
70        a boolean: True if the file should be traced, False if not.
71
72        `should_start_context` is a function taking a frame, and returning a
73        string. If the frame should be the start of a new context, the string
74        is the new context. If the frame should not be the start of a new
75        context, return None.
76
77        `file_mapper` is a function taking a filename, and returning a Unicode
78        filename.  The result is the name that will be recorded in the data
79        file.
80
81        If `timid` is true, then a slower simpler trace function will be
82        used.  This is important for some environments where manipulation of
83        tracing functions make the faster more sophisticated trace function not
84        operate properly.
85
86        If `branch` is true, then branches will be measured.  This involves
87        collecting data on which statements followed each other (arcs).  Use
88        `get_arc_data` to get the arc data.
89
90        `warn` is a warning function, taking a single string message argument
91        and an optional slug argument which will be a string or None, to be
92        used if a warning needs to be issued.
93
94        `concurrency` is a list of strings indicating the concurrency libraries
95        in use.  Valid values are "greenlet", "eventlet", "gevent", or "thread"
96        (the default).  Of these four values, only one can be supplied.  Other
97        values are ignored.
98
99        """
100        self.should_trace = should_trace
101        self.check_include = check_include
102        self.should_start_context = should_start_context
103        self.file_mapper = file_mapper
104        self.warn = warn
105        self.branch = branch
106        self.threading = None
107        self.covdata = None
108
109        self.static_context = None
110
111        self.origin = short_stack()
112
113        self.concur_id_func = None
114        self.mapped_file_cache = {}
115
116        # We can handle a few concurrency options here, but only one at a time.
117        these_concurrencies = self.SUPPORTED_CONCURRENCIES.intersection(concurrency)
118        if len(these_concurrencies) > 1:
119            raise CoverageException("Conflicting concurrency settings: %s" % concurrency)
120        self.concurrency = these_concurrencies.pop() if these_concurrencies else ''
121
122        try:
123            if self.concurrency == "greenlet":
124                import greenlet
125                self.concur_id_func = greenlet.getcurrent
126            elif self.concurrency == "eventlet":
127                import eventlet.greenthread     # pylint: disable=import-error,useless-suppression
128                self.concur_id_func = eventlet.greenthread.getcurrent
129            elif self.concurrency == "gevent":
130                import gevent                   # pylint: disable=import-error,useless-suppression
131                self.concur_id_func = gevent.getcurrent
132            elif self.concurrency == "thread" or not self.concurrency:
133                # It's important to import threading only if we need it.  If
134                # it's imported early, and the program being measured uses
135                # gevent, then gevent's monkey-patching won't work properly.
136                import threading
137                self.threading = threading
138            else:
139                raise CoverageException("Don't understand concurrency=%s" % concurrency)
140        except ImportError:
141            raise CoverageException(
142                "Couldn't trace with concurrency=%s, the module isn't installed." % (
143                    self.concurrency,
144                )
145            )
146
147        self.reset()
148
149        if timid:
150            # Being timid: use the simple Python trace function.
151            self._trace_class = PyTracer
152        else:
153            # Being fast: use the C Tracer if it is available, else the Python
154            # trace function.
155            self._trace_class = CTracer or PyTracer
156
157        if self._trace_class is CTracer:
158            self.file_disposition_class = CFileDisposition
159            self.supports_plugins = True
160        else:
161            self.file_disposition_class = FileDisposition
162            self.supports_plugins = False
163
164    def __repr__(self):
165        return "<Collector at 0x%x: %s>" % (id(self), self.tracer_name())
166
167    def use_data(self, covdata, context):
168        """Use `covdata` for recording data."""
169        self.covdata = covdata
170        self.static_context = context
171        self.covdata.set_context(self.static_context)
172
173    def tracer_name(self):
174        """Return the class name of the tracer we're using."""
175        return self._trace_class.__name__
176
177    def _clear_data(self):
178        """Clear out existing data, but stay ready for more collection."""
179        # We used to used self.data.clear(), but that would remove filename
180        # keys and data values that were still in use higher up the stack
181        # when we are called as part of switch_context.
182        for d in self.data.values():
183            d.clear()
184
185        for tracer in self.tracers:
186            tracer.reset_activity()
187
188    def reset(self):
189        """Clear collected data, and prepare to collect more."""
190        # A dictionary mapping file names to dicts with line number keys (if not
191        # branch coverage), or mapping file names to dicts with line number
192        # pairs as keys (if branch coverage).
193        self.data = {}
194
195        # A dictionary mapping file names to file tracer plugin names that will
196        # handle them.
197        self.file_tracers = {}
198
199        # The .should_trace_cache attribute is a cache from file names to
200        # coverage.FileDisposition objects, or None.  When a file is first
201        # considered for tracing, a FileDisposition is obtained from
202        # Coverage.should_trace.  Its .trace attribute indicates whether the
203        # file should be traced or not.  If it should be, a plugin with dynamic
204        # file names can decide not to trace it based on the dynamic file name
205        # being excluded by the inclusion rules, in which case the
206        # FileDisposition will be replaced by None in the cache.
207        if env.PYPY:
208            import __pypy__                     # pylint: disable=import-error
209            # Alex Gaynor said:
210            # should_trace_cache is a strictly growing key: once a key is in
211            # it, it never changes.  Further, the keys used to access it are
212            # generally constant, given sufficient context. That is to say, at
213            # any given point _trace() is called, pypy is able to know the key.
214            # This is because the key is determined by the physical source code
215            # line, and that's invariant with the call site.
216            #
217            # This property of a dict with immutable keys, combined with
218            # call-site-constant keys is a match for PyPy's module dict,
219            # which is optimized for such workloads.
220            #
221            # This gives a 20% benefit on the workload described at
222            # https://bitbucket.org/pypy/pypy/issue/1871/10x-slower-than-cpython-under-coverage
223            self.should_trace_cache = __pypy__.newdict("module")
224        else:
225            self.should_trace_cache = {}
226
227        # Our active Tracers.
228        self.tracers = []
229
230        self._clear_data()
231
232    def _start_tracer(self):
233        """Start a new Tracer object, and store it in self.tracers."""
234        tracer = self._trace_class()
235        tracer.data = self.data
236        tracer.trace_arcs = self.branch
237        tracer.should_trace = self.should_trace
238        tracer.should_trace_cache = self.should_trace_cache
239        tracer.warn = self.warn
240
241        if hasattr(tracer, 'concur_id_func'):
242            tracer.concur_id_func = self.concur_id_func
243        elif self.concur_id_func:
244            raise CoverageException(
245                "Can't support concurrency=%s with %s, only threads are supported" % (
246                    self.concurrency, self.tracer_name(),
247                )
248            )
249
250        if hasattr(tracer, 'file_tracers'):
251            tracer.file_tracers = self.file_tracers
252        if hasattr(tracer, 'threading'):
253            tracer.threading = self.threading
254        if hasattr(tracer, 'check_include'):
255            tracer.check_include = self.check_include
256        if hasattr(tracer, 'should_start_context'):
257            tracer.should_start_context = self.should_start_context
258            tracer.switch_context = self.switch_context
259
260        fn = tracer.start()
261        self.tracers.append(tracer)
262
263        return fn
264
265    # The trace function has to be set individually on each thread before
266    # execution begins.  Ironically, the only support the threading module has
267    # for running code before the thread main is the tracing function.  So we
268    # install this as a trace function, and the first time it's called, it does
269    # the real trace installation.
270
271    def _installation_trace(self, frame, event, arg):
272        """Called on new threads, installs the real tracer."""
273        # Remove ourselves as the trace function.
274        sys.settrace(None)
275        # Install the real tracer.
276        fn = self._start_tracer()
277        # Invoke the real trace function with the current event, to be sure
278        # not to lose an event.
279        if fn:
280            fn = fn(frame, event, arg)
281        # Return the new trace function to continue tracing in this scope.
282        return fn
283
284    def start(self):
285        """Start collecting trace information."""
286        if self._collectors:
287            self._collectors[-1].pause()
288
289        self.tracers = []
290
291        # Check to see whether we had a fullcoverage tracer installed. If so,
292        # get the stack frames it stashed away for us.
293        traces0 = []
294        fn0 = sys.gettrace()
295        if fn0:
296            tracer0 = getattr(fn0, '__self__', None)
297            if tracer0:
298                traces0 = getattr(tracer0, 'traces', [])
299
300        try:
301            # Install the tracer on this thread.
302            fn = self._start_tracer()
303        except:
304            if self._collectors:
305                self._collectors[-1].resume()
306            raise
307
308        # If _start_tracer succeeded, then we add ourselves to the global
309        # stack of collectors.
310        self._collectors.append(self)
311
312        # Replay all the events from fullcoverage into the new trace function.
313        for args in traces0:
314            (frame, event, arg), lineno = args
315            try:
316                fn(frame, event, arg, lineno=lineno)
317            except TypeError:
318                raise Exception("fullcoverage must be run with the C trace function.")
319
320        # Install our installation tracer in threading, to jump-start other
321        # threads.
322        if self.threading:
323            self.threading.settrace(self._installation_trace)
324
325    def stop(self):
326        """Stop collecting trace information."""
327        assert self._collectors
328        if self._collectors[-1] is not self:
329            print("self._collectors:")
330            for c in self._collectors:
331                print("  {!r}\n{}".format(c, c.origin))
332        assert self._collectors[-1] is self, (
333            "Expected current collector to be %r, but it's %r" % (self, self._collectors[-1])
334        )
335
336        self.pause()
337
338        # Remove this Collector from the stack, and resume the one underneath
339        # (if any).
340        self._collectors.pop()
341        if self._collectors:
342            self._collectors[-1].resume()
343
344    def pause(self):
345        """Pause tracing, but be prepared to `resume`."""
346        for tracer in self.tracers:
347            tracer.stop()
348            stats = tracer.get_stats()
349            if stats:
350                print("\nCoverage.py tracer stats:")
351                for k in sorted(stats.keys()):
352                    print("%20s: %s" % (k, stats[k]))
353        if self.threading:
354            self.threading.settrace(None)
355
356    def resume(self):
357        """Resume tracing after a `pause`."""
358        for tracer in self.tracers:
359            tracer.start()
360        if self.threading:
361            self.threading.settrace(self._installation_trace)
362        else:
363            self._start_tracer()
364
365    def _activity(self):
366        """Has any activity been traced?
367
368        Returns a boolean, True if any trace function was invoked.
369
370        """
371        return any(tracer.activity() for tracer in self.tracers)
372
373    def switch_context(self, new_context):
374        """Switch to a new dynamic context."""
375        self.flush_data()
376        if self.static_context:
377            context = self.static_context
378            if new_context:
379                context += "|" + new_context
380        else:
381            context = new_context
382        self.covdata.set_context(context)
383
384    def cached_mapped_file(self, filename):
385        """A locally cached version of file names mapped through file_mapper."""
386        key = (type(filename), filename)
387        try:
388            return self.mapped_file_cache[key]
389        except KeyError:
390            return self.mapped_file_cache.setdefault(key, self.file_mapper(filename))
391
392    def mapped_file_dict(self, d):
393        """Return a dict like d, but with keys modified by file_mapper."""
394        # The call to litems() ensures that the GIL protects the dictionary
395        # iterator against concurrent modifications by tracers running
396        # in other threads. We try three times in case of concurrent
397        # access, hoping to get a clean copy.
398        runtime_err = None
399        for _ in range(3):
400            try:
401                items = litems(d)
402            except RuntimeError as ex:
403                runtime_err = ex
404            else:
405                break
406        else:
407            raise runtime_err
408
409        return dict((self.cached_mapped_file(k), v) for k, v in items if v)
410
411    def flush_data(self):
412        """Save the collected data to our associated `CoverageData`.
413
414        Data may have also been saved along the way. This forces the
415        last of the data to be saved.
416
417        Returns True if there was data to save, False if not.
418        """
419        if not self._activity():
420            return False
421
422        if self.branch:
423            self.covdata.add_arcs(self.mapped_file_dict(self.data))
424        else:
425            self.covdata.add_lines(self.mapped_file_dict(self.data))
426        self.covdata.add_file_tracers(self.mapped_file_dict(self.file_tracers))
427
428        self._clear_data()
429        return True
430