1# Licensed under the Apache License: http://www.apache.org/licenses/LICENSE-2.0
2# For details: https://bitbucket.org/ned/coveragepy/src/default/NOTICE.txt
3
4"""Raw data collector for coverage.py."""
5
6import os
7import sys
8
9from coverage import env
10from coverage.backward import litems, range     # pylint: disable=redefined-builtin
11from coverage.debug import short_stack
12from coverage.files import abs_file
13from coverage.misc import CoverageException, isolate_module
14from coverage.pytracer import PyTracer
15
16os = isolate_module(os)
17
18
19try:
20    # Use the C extension code when we can, for speed.
21    from coverage.tracer import CTracer, CFileDisposition
22except ImportError:
23    # Couldn't import the C extension, maybe it isn't built.
24    if os.getenv('COVERAGE_TEST_TRACER') == 'c':
25        # During testing, we use the COVERAGE_TEST_TRACER environment variable
26        # to indicate that we've fiddled with the environment to test this
27        # fallback code.  If we thought we had a C tracer, but couldn't import
28        # it, then exit quickly and clearly instead of dribbling confusing
29        # errors. I'm using sys.exit here instead of an exception because an
30        # exception here causes all sorts of other noise in unittest.
31        sys.stderr.write("*** COVERAGE_TEST_TRACER is 'c' but can't import CTracer!\n")
32        sys.exit(1)
33    CTracer = None
34
35
36class FileDisposition(object):
37    """A simple value type for recording what to do with a file."""
38    pass
39
40
41def should_start_context(frame):
42    """Who-Tests-What hack: Determine whether this frame begins a new who-context."""
43    fn_name = frame.f_code.co_name
44    if fn_name.startswith("test"):
45        return fn_name
46    return None
47
48
49class Collector(object):
50    """Collects trace data.
51
52    Creates a Tracer object for each thread, since they track stack
53    information.  Each Tracer points to the same shared data, contributing
54    traced data points.
55
56    When the Collector is started, it creates a Tracer for the current thread,
57    and installs a function to create Tracers for each new thread started.
58    When the Collector is stopped, all active Tracers are stopped.
59
60    Threads started while the Collector is stopped will never have Tracers
61    associated with them.
62
63    """
64
65    # The stack of active Collectors.  Collectors are added here when started,
66    # and popped when stopped.  Collectors on the stack are paused when not
67    # the top, and resumed when they become the top again.
68    _collectors = []
69
70    # The concurrency settings we support here.
71    SUPPORTED_CONCURRENCIES = set(["greenlet", "eventlet", "gevent", "thread"])
72
73    def __init__(self, should_trace, check_include, timid, branch, warn, concurrency):
74        """Create a collector.
75
76        `should_trace` is a function, taking a file name and a frame, and
77        returning a `coverage.FileDisposition object`.
78
79        `check_include` is a function taking a file name and a frame. It returns
80        a boolean: True if the file should be traced, False if not.
81
82        If `timid` is true, then a slower simpler trace function will be
83        used.  This is important for some environments where manipulation of
84        tracing functions make the faster more sophisticated trace function not
85        operate properly.
86
87        If `branch` is true, then branches will be measured.  This involves
88        collecting data on which statements followed each other (arcs).  Use
89        `get_arc_data` to get the arc data.
90
91        `warn` is a warning function, taking a single string message argument
92        and an optional slug argument which will be a string or None, to be
93        used if a warning needs to be issued.
94
95        `concurrency` is a list of strings indicating the concurrency libraries
96        in use.  Valid values are "greenlet", "eventlet", "gevent", or "thread"
97        (the default).  Of these four values, only one can be supplied.  Other
98        values are ignored.
99
100        """
101        self.should_trace = should_trace
102        self.check_include = check_include
103        self.warn = warn
104        self.branch = branch
105        self.threading = None
106
107        self.origin = short_stack()
108
109        self.concur_id_func = None
110
111        # We can handle a few concurrency options here, but only one at a time.
112        these_concurrencies = self.SUPPORTED_CONCURRENCIES.intersection(concurrency)
113        if len(these_concurrencies) > 1:
114            raise CoverageException("Conflicting concurrency settings: %s" % concurrency)
115        self.concurrency = these_concurrencies.pop() if these_concurrencies else ''
116
117        try:
118            if self.concurrency == "greenlet":
119                import greenlet
120                self.concur_id_func = greenlet.getcurrent
121            elif self.concurrency == "eventlet":
122                import eventlet.greenthread     # pylint: disable=import-error,useless-suppression
123                self.concur_id_func = eventlet.greenthread.getcurrent
124            elif self.concurrency == "gevent":
125                import gevent                   # pylint: disable=import-error,useless-suppression
126                self.concur_id_func = gevent.getcurrent
127            elif self.concurrency == "thread" or not self.concurrency:
128                # It's important to import threading only if we need it.  If
129                # it's imported early, and the program being measured uses
130                # gevent, then gevent's monkey-patching won't work properly.
131                import threading
132                self.threading = threading
133            else:
134                raise CoverageException("Don't understand concurrency=%s" % concurrency)
135        except ImportError:
136            raise CoverageException(
137                "Couldn't trace with concurrency=%s, the module isn't installed." % (
138                    self.concurrency,
139                )
140            )
141
142        # Who-Tests-What is just a hack at the moment, so turn it on with an
143        # environment variable.
144        self.wtw = int(os.getenv('COVERAGE_WTW', 0))
145
146        self.reset()
147
148        if timid:
149            # Being timid: use the simple Python trace function.
150            self._trace_class = PyTracer
151        else:
152            # Being fast: use the C Tracer if it is available, else the Python
153            # trace function.
154            self._trace_class = CTracer or PyTracer
155
156        if self._trace_class is CTracer:
157            self.file_disposition_class = CFileDisposition
158            self.supports_plugins = True
159        else:
160            self.file_disposition_class = FileDisposition
161            self.supports_plugins = False
162
163    def __repr__(self):
164        return "<Collector at 0x%x: %s>" % (id(self), self.tracer_name())
165
166    def tracer_name(self):
167        """Return the class name of the tracer we're using."""
168        return self._trace_class.__name__
169
170    def _clear_data(self):
171        """Clear out existing data, but stay ready for more collection."""
172        self.data.clear()
173
174        for tracer in self.tracers:
175            tracer.reset_activity()
176
177    def reset(self):
178        """Clear collected data, and prepare to collect more."""
179        # A dictionary mapping file names to dicts with line number keys (if not
180        # branch coverage), or mapping file names to dicts with line number
181        # pairs as keys (if branch coverage).
182        self.data = {}
183
184        # A dict mapping contexts to data dictionaries.
185        self.contexts = {}
186        self.contexts[None] = self.data
187
188        # A dictionary mapping file names to file tracer plugin names that will
189        # handle them.
190        self.file_tracers = {}
191
192        # The .should_trace_cache attribute is a cache from file names to
193        # coverage.FileDisposition objects, or None.  When a file is first
194        # considered for tracing, a FileDisposition is obtained from
195        # Coverage.should_trace.  Its .trace attribute indicates whether the
196        # file should be traced or not.  If it should be, a plugin with dynamic
197        # file names can decide not to trace it based on the dynamic file name
198        # being excluded by the inclusion rules, in which case the
199        # FileDisposition will be replaced by None in the cache.
200        if env.PYPY:
201            import __pypy__                     # pylint: disable=import-error
202            # Alex Gaynor said:
203            # should_trace_cache is a strictly growing key: once a key is in
204            # it, it never changes.  Further, the keys used to access it are
205            # generally constant, given sufficient context. That is to say, at
206            # any given point _trace() is called, pypy is able to know the key.
207            # This is because the key is determined by the physical source code
208            # line, and that's invariant with the call site.
209            #
210            # This property of a dict with immutable keys, combined with
211            # call-site-constant keys is a match for PyPy's module dict,
212            # which is optimized for such workloads.
213            #
214            # This gives a 20% benefit on the workload described at
215            # https://bitbucket.org/pypy/pypy/issue/1871/10x-slower-than-cpython-under-coverage
216            self.should_trace_cache = __pypy__.newdict("module")
217        else:
218            self.should_trace_cache = {}
219
220        # Our active Tracers.
221        self.tracers = []
222
223        self._clear_data()
224
225    def _start_tracer(self):
226        """Start a new Tracer object, and store it in self.tracers."""
227        tracer = self._trace_class()
228        tracer.data = self.data
229        tracer.trace_arcs = self.branch
230        tracer.should_trace = self.should_trace
231        tracer.should_trace_cache = self.should_trace_cache
232        tracer.warn = self.warn
233
234        if hasattr(tracer, 'concur_id_func'):
235            tracer.concur_id_func = self.concur_id_func
236        elif self.concur_id_func:
237            raise CoverageException(
238                "Can't support concurrency=%s with %s, only threads are supported" % (
239                    self.concurrency, self.tracer_name(),
240                )
241            )
242
243        if hasattr(tracer, 'file_tracers'):
244            tracer.file_tracers = self.file_tracers
245        if hasattr(tracer, 'threading'):
246            tracer.threading = self.threading
247        if hasattr(tracer, 'check_include'):
248            tracer.check_include = self.check_include
249        if self.wtw:
250            if hasattr(tracer, 'should_start_context'):
251                tracer.should_start_context = should_start_context
252            if hasattr(tracer, 'switch_context'):
253                tracer.switch_context = self.switch_context
254
255        fn = tracer.start()
256        self.tracers.append(tracer)
257
258        return fn
259
260    # The trace function has to be set individually on each thread before
261    # execution begins.  Ironically, the only support the threading module has
262    # for running code before the thread main is the tracing function.  So we
263    # install this as a trace function, and the first time it's called, it does
264    # the real trace installation.
265
266    def _installation_trace(self, frame, event, arg):
267        """Called on new threads, installs the real tracer."""
268        # Remove ourselves as the trace function.
269        sys.settrace(None)
270        # Install the real tracer.
271        fn = self._start_tracer()
272        # Invoke the real trace function with the current event, to be sure
273        # not to lose an event.
274        if fn:
275            fn = fn(frame, event, arg)
276        # Return the new trace function to continue tracing in this scope.
277        return fn
278
279    def start(self):
280        """Start collecting trace information."""
281        if self._collectors:
282            self._collectors[-1].pause()
283
284        self.tracers = []
285
286        # Check to see whether we had a fullcoverage tracer installed. If so,
287        # get the stack frames it stashed away for us.
288        traces0 = []
289        fn0 = sys.gettrace()
290        if fn0:
291            tracer0 = getattr(fn0, '__self__', None)
292            if tracer0:
293                traces0 = getattr(tracer0, 'traces', [])
294
295        try:
296            # Install the tracer on this thread.
297            fn = self._start_tracer()
298        except:
299            if self._collectors:
300                self._collectors[-1].resume()
301            raise
302
303        # If _start_tracer succeeded, then we add ourselves to the global
304        # stack of collectors.
305        self._collectors.append(self)
306
307        # Replay all the events from fullcoverage into the new trace function.
308        for args in traces0:
309            (frame, event, arg), lineno = args
310            try:
311                fn(frame, event, arg, lineno=lineno)
312            except TypeError:
313                raise Exception("fullcoverage must be run with the C trace function.")
314
315        # Install our installation tracer in threading, to jump-start other
316        # threads.
317        if self.threading:
318            self.threading.settrace(self._installation_trace)
319
320    def stop(self):
321        """Stop collecting trace information."""
322        assert self._collectors
323        if self._collectors[-1] is not self:
324            print("self._collectors:")
325            for c in self._collectors:
326                print("  {!r}\n{}".format(c, c.origin))
327        assert self._collectors[-1] is self, (
328            "Expected current collector to be %r, but it's %r" % (self, self._collectors[-1])
329        )
330
331        self.pause()
332
333        # Remove this Collector from the stack, and resume the one underneath
334        # (if any).
335        self._collectors.pop()
336        if self._collectors:
337            self._collectors[-1].resume()
338
339    def pause(self):
340        """Pause tracing, but be prepared to `resume`."""
341        for tracer in self.tracers:
342            tracer.stop()
343            stats = tracer.get_stats()
344            if stats:
345                print("\nCoverage.py tracer stats:")
346                for k in sorted(stats.keys()):
347                    print("%20s: %s" % (k, stats[k]))
348        if self.threading:
349            self.threading.settrace(None)
350
351    def resume(self):
352        """Resume tracing after a `pause`."""
353        for tracer in self.tracers:
354            tracer.start()
355        if self.threading:
356            self.threading.settrace(self._installation_trace)
357        else:
358            self._start_tracer()
359
360    def _activity(self):
361        """Has any activity been traced?
362
363        Returns a boolean, True if any trace function was invoked.
364
365        """
366        return any(tracer.activity() for tracer in self.tracers)
367
368    def switch_context(self, new_context):
369        """Who-Tests-What hack: switch to a new who-context."""
370        # Make a new data dict, or find the existing one, and switch all the
371        # tracers to use it.
372        data = self.contexts.setdefault(new_context, {})
373        for tracer in self.tracers:
374            tracer.data = data
375
376    def save_data(self, covdata):
377        """Save the collected data to a `CoverageData`.
378
379        Returns True if there was data to save, False if not.
380        """
381        if not self._activity():
382            return False
383
384        def abs_file_dict(d):
385            """Return a dict like d, but with keys modified by `abs_file`."""
386            # The call to litems() ensures that the GIL protects the dictionary
387            # iterator against concurrent modifications by tracers running
388            # in other threads. We try three times in case of concurrent
389            # access, hoping to get a clean copy.
390            runtime_err = None
391            for _ in range(3):
392                try:
393                    items = litems(d)
394                except RuntimeError as ex:
395                    runtime_err = ex
396                else:
397                    break
398            else:
399                raise runtime_err       # pylint: disable=raising-bad-type
400
401            return dict((abs_file(k), v) for k, v in items)
402
403        if self.branch:
404            covdata.add_arcs(abs_file_dict(self.data))
405        else:
406            covdata.add_lines(abs_file_dict(self.data))
407        covdata.add_file_tracers(abs_file_dict(self.file_tracers))
408
409        if self.wtw:
410            # Just a hack, so just hack it.
411            import pprint
412            out_file = "coverage_wtw_{:06}.py".format(os.getpid())
413            with open(out_file, "w") as wtw_out:
414                pprint.pprint(self.contexts, wtw_out)
415
416        self._clear_data()
417        return True
418