1"""
2A Cython plugin for coverage.py
3
4Requires the coverage package at least in version 4.0 (which added the plugin API).
5"""
6
7from __future__ import absolute_import
8
9import re
10import os.path
11import sys
12from collections import defaultdict
13
14from coverage.plugin import CoveragePlugin, FileTracer, FileReporter  # requires coverage.py 4.0+
15from coverage.files import canonical_filename
16
17from .Utils import find_root_package_dir, is_package_dir, open_source_file
18
19
20from . import __version__
21
22
23C_FILE_EXTENSIONS = ['.c', '.cpp', '.cc', '.cxx']
24MODULE_FILE_EXTENSIONS = set(['.py', '.pyx', '.pxd'] + C_FILE_EXTENSIONS)
25
26
27def _find_c_source(base_path):
28    file_exists = os.path.exists
29    for ext in C_FILE_EXTENSIONS:
30        file_name = base_path + ext
31        if file_exists(file_name):
32            return file_name
33    return None
34
35
36def _find_dep_file_path(main_file, file_path, relative_path_search=False):
37    abs_path = os.path.abspath(file_path)
38    if not os.path.exists(abs_path) and (file_path.endswith('.pxi') or
39                                         relative_path_search):
40        # files are looked up relative to the main source file
41        rel_file_path = os.path.join(os.path.dirname(main_file), file_path)
42        if os.path.exists(rel_file_path):
43            abs_path = os.path.abspath(rel_file_path)
44    # search sys.path for external locations if a valid file hasn't been found
45    if not os.path.exists(abs_path):
46        for sys_path in sys.path:
47            test_path = os.path.realpath(os.path.join(sys_path, file_path))
48            if os.path.exists(test_path):
49                return canonical_filename(test_path)
50    return canonical_filename(abs_path)
51
52
53class Plugin(CoveragePlugin):
54    # map from traced file paths to absolute file paths
55    _file_path_map = None
56    # map from traced file paths to corresponding C files
57    _c_files_map = None
58    # map from parsed C files to their content
59    _parsed_c_files = None
60
61    def sys_info(self):
62        return [('Cython version', __version__)]
63
64    def file_tracer(self, filename):
65        """
66        Try to find a C source file for a file path found by the tracer.
67        """
68        if filename.startswith('<') or filename.startswith('memory:'):
69            return None
70        c_file = py_file = None
71        filename = canonical_filename(os.path.abspath(filename))
72        if self._c_files_map and filename in self._c_files_map:
73            c_file = self._c_files_map[filename][0]
74
75        if c_file is None:
76            c_file, py_file = self._find_source_files(filename)
77            if not c_file:
78                return None  # unknown file
79
80            # parse all source file paths and lines from C file
81            # to learn about all relevant source files right away (pyx/pxi/pxd)
82            # FIXME: this might already be too late if the first executed line
83            #        is not from the main .pyx file but a file with a different
84            #        name than the .c file (which prevents us from finding the
85            #        .c file)
86            _, code = self._read_source_lines(c_file, filename)
87            if code is None:
88                return None  # no source found
89
90        if self._file_path_map is None:
91            self._file_path_map = {}
92        return CythonModuleTracer(filename, py_file, c_file, self._c_files_map, self._file_path_map)
93
94    def file_reporter(self, filename):
95        # TODO: let coverage.py handle .py files itself
96        #ext = os.path.splitext(filename)[1].lower()
97        #if ext == '.py':
98        #    from coverage.python import PythonFileReporter
99        #    return PythonFileReporter(filename)
100
101        filename = canonical_filename(os.path.abspath(filename))
102        if self._c_files_map and filename in self._c_files_map:
103            c_file, rel_file_path, code = self._c_files_map[filename]
104        else:
105            c_file, _ = self._find_source_files(filename)
106            if not c_file:
107                return None  # unknown file
108            rel_file_path, code = self._read_source_lines(c_file, filename)
109            if code is None:
110                return None  # no source found
111        return CythonModuleReporter(c_file, filename, rel_file_path, code)
112
113    def _find_source_files(self, filename):
114        basename, ext = os.path.splitext(filename)
115        ext = ext.lower()
116        if ext in MODULE_FILE_EXTENSIONS:
117            pass
118        elif ext == '.pyd':
119            # Windows extension module
120            platform_suffix = re.search(r'[.]cp[0-9]+-win[_a-z0-9]*$', basename, re.I)
121            if platform_suffix:
122                basename = basename[:platform_suffix.start()]
123        elif ext == '.so':
124            # Linux/Unix/Mac extension module
125            platform_suffix = re.search(r'[.](?:cpython|pypy)-[0-9]+[-_a-z0-9]*$', basename, re.I)
126            if platform_suffix:
127                basename = basename[:platform_suffix.start()]
128        elif ext == '.pxi':
129            # if we get here, it means that the first traced line of a Cython module was
130            # not in the main module but in an include file, so try a little harder to
131            # find the main source file
132            self._find_c_source_files(os.path.dirname(filename), filename)
133            if filename in self._c_files_map:
134                return self._c_files_map[filename][0], None
135        else:
136            # none of our business
137            return None, None
138
139        c_file = filename if ext in C_FILE_EXTENSIONS else _find_c_source(basename)
140        if c_file is None:
141            # a module "pkg/mod.so" can have a source file "pkg/pkg.mod.c"
142            package_root = find_root_package_dir.uncached(filename)
143            package_path = os.path.relpath(basename, package_root).split(os.path.sep)
144            if len(package_path) > 1:
145                test_basepath = os.path.join(os.path.dirname(filename), '.'.join(package_path))
146                c_file = _find_c_source(test_basepath)
147
148        py_source_file = None
149        if c_file:
150            py_source_file = os.path.splitext(c_file)[0] + '.py'
151            if not os.path.exists(py_source_file):
152                py_source_file = None
153
154            try:
155                with open(c_file, 'rb') as f:
156                    if b'/* Generated by Cython ' not in f.read(30):
157                        return None, None  # not a Cython file
158            except (IOError, OSError):
159                c_file = None
160
161        return c_file, py_source_file
162
163    def _find_c_source_files(self, dir_path, source_file):
164        """
165        Desperately parse all C files in the directory or its package parents
166        (not re-descending) to find the (included) source file in one of them.
167        """
168        if not os.path.isdir(dir_path):
169            return
170        splitext = os.path.splitext
171        for filename in os.listdir(dir_path):
172            ext = splitext(filename)[1].lower()
173            if ext in C_FILE_EXTENSIONS:
174                self._read_source_lines(os.path.join(dir_path, filename), source_file)
175                if source_file in self._c_files_map:
176                    return
177        # not found? then try one package up
178        if is_package_dir(dir_path):
179            self._find_c_source_files(os.path.dirname(dir_path), source_file)
180
181    def _read_source_lines(self, c_file, sourcefile):
182        """
183        Parse a Cython generated C/C++ source file and find the executable lines.
184        Each executable line starts with a comment header that states source file
185        and line number, as well as the surrounding range of source code lines.
186        """
187        if self._parsed_c_files is None:
188            self._parsed_c_files = {}
189        if c_file in self._parsed_c_files:
190            code_lines = self._parsed_c_files[c_file]
191        else:
192            code_lines = self._parse_cfile_lines(c_file)
193            self._parsed_c_files[c_file] = code_lines
194
195        if self._c_files_map is None:
196            self._c_files_map = {}
197
198        for filename, code in code_lines.items():
199            abs_path = _find_dep_file_path(c_file, filename,
200                                           relative_path_search=True)
201            self._c_files_map[abs_path] = (c_file, filename, code)
202
203        if sourcefile not in self._c_files_map:
204            return (None,) * 2  # e.g. shared library file
205        return self._c_files_map[sourcefile][1:]
206
207    def _parse_cfile_lines(self, c_file):
208        """
209        Parse a C file and extract all source file lines that generated executable code.
210        """
211        match_source_path_line = re.compile(r' */[*] +"(.*)":([0-9]+)$').match
212        match_current_code_line = re.compile(r' *[*] (.*) # <<<<<<+$').match
213        match_comment_end = re.compile(r' *[*]/$').match
214        match_trace_line = re.compile(r' *__Pyx_TraceLine\(([0-9]+),').match
215        not_executable = re.compile(
216            r'\s*c(?:type)?def\s+'
217            r'(?:(?:public|external)\s+)?'
218            r'(?:struct|union|enum|class)'
219            r'(\s+[^:]+|)\s*:'
220        ).match
221
222        code_lines = defaultdict(dict)
223        executable_lines = defaultdict(set)
224        current_filename = None
225
226        with open(c_file) as lines:
227            lines = iter(lines)
228            for line in lines:
229                match = match_source_path_line(line)
230                if not match:
231                    if '__Pyx_TraceLine(' in line and current_filename is not None:
232                        trace_line = match_trace_line(line)
233                        if trace_line:
234                            executable_lines[current_filename].add(int(trace_line.group(1)))
235                    continue
236                filename, lineno = match.groups()
237                current_filename = filename
238                lineno = int(lineno)
239                for comment_line in lines:
240                    match = match_current_code_line(comment_line)
241                    if match:
242                        code_line = match.group(1).rstrip()
243                        if not_executable(code_line):
244                            break
245                        code_lines[filename][lineno] = code_line
246                        break
247                    elif match_comment_end(comment_line):
248                        # unexpected comment format - false positive?
249                        break
250
251        # Remove lines that generated code but are not traceable.
252        for filename, lines in code_lines.items():
253            dead_lines = set(lines).difference(executable_lines.get(filename, ()))
254            for lineno in dead_lines:
255                del lines[lineno]
256        return code_lines
257
258
259class CythonModuleTracer(FileTracer):
260    """
261    Find the Python/Cython source file for a Cython module.
262    """
263    def __init__(self, module_file, py_file, c_file, c_files_map, file_path_map):
264        super(CythonModuleTracer, self).__init__()
265        self.module_file = module_file
266        self.py_file = py_file
267        self.c_file = c_file
268        self._c_files_map = c_files_map
269        self._file_path_map = file_path_map
270
271    def has_dynamic_source_filename(self):
272        return True
273
274    def dynamic_source_filename(self, filename, frame):
275        """
276        Determine source file path.  Called by the function call tracer.
277        """
278        source_file = frame.f_code.co_filename
279        try:
280            return self._file_path_map[source_file]
281        except KeyError:
282            pass
283        abs_path = _find_dep_file_path(filename, source_file)
284
285        if self.py_file and source_file[-3:].lower() == '.py':
286            # always let coverage.py handle this case itself
287            self._file_path_map[source_file] = self.py_file
288            return self.py_file
289
290        assert self._c_files_map is not None
291        if abs_path not in self._c_files_map:
292            self._c_files_map[abs_path] = (self.c_file, source_file, None)
293        self._file_path_map[source_file] = abs_path
294        return abs_path
295
296
297class CythonModuleReporter(FileReporter):
298    """
299    Provide detailed trace information for one source file to coverage.py.
300    """
301    def __init__(self, c_file, source_file, rel_file_path, code):
302        super(CythonModuleReporter, self).__init__(source_file)
303        self.name = rel_file_path
304        self.c_file = c_file
305        self._code = code
306
307    def lines(self):
308        """
309        Return set of line numbers that are possibly executable.
310        """
311        return set(self._code)
312
313    def _iter_source_tokens(self):
314        current_line = 1
315        for line_no, code_line in sorted(self._code.items()):
316            while line_no > current_line:
317                yield []
318                current_line += 1
319            yield [('txt', code_line)]
320            current_line += 1
321
322    def source(self):
323        """
324        Return the source code of the file as a string.
325        """
326        if os.path.exists(self.filename):
327            with open_source_file(self.filename) as f:
328                return f.read()
329        else:
330            return '\n'.join(
331                (tokens[0][1] if tokens else '')
332                for tokens in self._iter_source_tokens())
333
334    def source_token_lines(self):
335        """
336        Iterate over the source code tokens.
337        """
338        if os.path.exists(self.filename):
339            with open_source_file(self.filename) as f:
340                for line in f:
341                    yield [('txt', line.rstrip('\n'))]
342        else:
343            for line in self._iter_source_tokens():
344                yield [('txt', line)]
345
346
347def coverage_init(reg, options):
348    reg.add_file_tracer(Plugin())
349