1"""Coverage data for Coverage."""
2
3import os
4
5from coverage.backward import iitems, pickle, sorted    # pylint: disable=W0622
6from coverage.files import PathAliases
7from coverage.misc import file_be_gone
8
9
10class CoverageData(object):
11    """Manages collected coverage data, including file storage.
12
13    The data file format is a pickled dict, with these keys:
14
15        * collector: a string identifying the collecting software
16
17        * lines: a dict mapping filenames to sorted lists of line numbers
18          executed:
19            { 'file1': [17,23,45],  'file2': [1,2,3], ... }
20
21        * arcs: a dict mapping filenames to sorted lists of line number pairs:
22            { 'file1': [(17,23), (17,25), (25,26)], ... }
23
24    """
25
26    def __init__(self, basename=None, collector=None, debug=None):
27        """Create a CoverageData.
28
29        `basename` is the name of the file to use for storing data.
30
31        `collector` is a string describing the coverage measurement software.
32
33        `debug` is a `DebugControl` object for writing debug messages.
34
35        """
36        self.collector = collector or 'unknown'
37        self.debug = debug
38
39        self.use_file = True
40
41        # Construct the filename that will be used for data file storage, if we
42        # ever do any file storage.
43        self.filename = basename or ".coverage"
44        self.filename = os.path.abspath(self.filename)
45
46        # A map from canonical Python source file name to a dictionary in
47        # which there's an entry for each line number that has been
48        # executed:
49        #
50        #   {
51        #       'filename1.py': { 12: None, 47: None, ... },
52        #       ...
53        #       }
54        #
55        self.lines = {}
56
57        # A map from canonical Python source file name to a dictionary with an
58        # entry for each pair of line numbers forming an arc:
59        #
60        #   {
61        #       'filename1.py': { (12,14): None, (47,48): None, ... },
62        #       ...
63        #       }
64        #
65        self.arcs = {}
66
67    def usefile(self, use_file=True):
68        """Set whether or not to use a disk file for data."""
69        self.use_file = use_file
70
71    def read(self):
72        """Read coverage data from the coverage data file (if it exists)."""
73        if self.use_file:
74            self.lines, self.arcs = self._read_file(self.filename)
75        else:
76            self.lines, self.arcs = {}, {}
77
78    def write(self, suffix=None):
79        """Write the collected coverage data to a file.
80
81        `suffix` is a suffix to append to the base file name. This can be used
82        for multiple or parallel execution, so that many coverage data files
83        can exist simultaneously.  A dot will be used to join the base name and
84        the suffix.
85
86        """
87        if self.use_file:
88            filename = self.filename
89            if suffix:
90                filename += "." + suffix
91            self.write_file(filename)
92
93    def erase(self):
94        """Erase the data, both in this object, and from its file storage."""
95        if self.use_file:
96            if self.filename:
97                file_be_gone(self.filename)
98        self.lines = {}
99        self.arcs = {}
100
101    def line_data(self):
102        """Return the map from filenames to lists of line numbers executed."""
103        return dict(
104            [(f, sorted(lmap.keys())) for f, lmap in iitems(self.lines)]
105            )
106
107    def arc_data(self):
108        """Return the map from filenames to lists of line number pairs."""
109        return dict(
110            [(f, sorted(amap.keys())) for f, amap in iitems(self.arcs)]
111            )
112
113    def write_file(self, filename):
114        """Write the coverage data to `filename`."""
115
116        # Create the file data.
117        data = {}
118
119        data['lines'] = self.line_data()
120        arcs = self.arc_data()
121        if arcs:
122            data['arcs'] = arcs
123
124        if self.collector:
125            data['collector'] = self.collector
126
127        if self.debug and self.debug.should('dataio'):
128            self.debug.write("Writing data to %r" % (filename,))
129
130        # Write the pickle to the file.
131        fdata = open(filename, 'wb')
132        try:
133            pickle.dump(data, fdata, 2)
134        finally:
135            fdata.close()
136
137    def read_file(self, filename):
138        """Read the coverage data from `filename`."""
139        self.lines, self.arcs = self._read_file(filename)
140
141    def raw_data(self, filename):
142        """Return the raw pickled data from `filename`."""
143        if self.debug and self.debug.should('dataio'):
144            self.debug.write("Reading data from %r" % (filename,))
145        fdata = open(filename, 'rb')
146        try:
147            data = pickle.load(fdata)
148        finally:
149            fdata.close()
150        return data
151
152    def _read_file(self, filename):
153        """Return the stored coverage data from the given file.
154
155        Returns two values, suitable for assigning to `self.lines` and
156        `self.arcs`.
157
158        """
159        lines = {}
160        arcs = {}
161        try:
162            data = self.raw_data(filename)
163            if isinstance(data, dict):
164                # Unpack the 'lines' item.
165                lines = dict([
166                    (f, dict.fromkeys(linenos, None))
167                        for f, linenos in iitems(data.get('lines', {}))
168                    ])
169                # Unpack the 'arcs' item.
170                arcs = dict([
171                    (f, dict.fromkeys(arcpairs, None))
172                        for f, arcpairs in iitems(data.get('arcs', {}))
173                    ])
174        except Exception:
175            pass
176        return lines, arcs
177
178    def combine_parallel_data(self, aliases=None):
179        """Combine a number of data files together.
180
181        Treat `self.filename` as a file prefix, and combine the data from all
182        of the data files starting with that prefix plus a dot.
183
184        If `aliases` is provided, it's a `PathAliases` object that is used to
185        re-map paths to match the local machine's.
186
187        """
188        aliases = aliases or PathAliases()
189        data_dir, local = os.path.split(self.filename)
190        localdot = local + '.'
191        for f in os.listdir(data_dir or '.'):
192            if f.startswith(localdot):
193                full_path = os.path.join(data_dir, f)
194                new_lines, new_arcs = self._read_file(full_path)
195                for filename, file_data in iitems(new_lines):
196                    filename = aliases.map(filename)
197                    self.lines.setdefault(filename, {}).update(file_data)
198                for filename, file_data in iitems(new_arcs):
199                    filename = aliases.map(filename)
200                    self.arcs.setdefault(filename, {}).update(file_data)
201                if f != local:
202                    os.remove(full_path)
203
204    def add_line_data(self, line_data):
205        """Add executed line data.
206
207        `line_data` is { filename: { lineno: None, ... }, ...}
208
209        """
210        for filename, linenos in iitems(line_data):
211            self.lines.setdefault(filename, {}).update(linenos)
212
213    def add_arc_data(self, arc_data):
214        """Add measured arc data.
215
216        `arc_data` is { filename: { (l1,l2): None, ... }, ...}
217
218        """
219        for filename, arcs in iitems(arc_data):
220            self.arcs.setdefault(filename, {}).update(arcs)
221
222    def touch_file(self, filename):
223        """Ensure that `filename` appears in the data, empty if needed."""
224        self.lines.setdefault(filename, {})
225
226    def measured_files(self):
227        """A list of all files that had been measured."""
228        return list(self.lines.keys())
229
230    def executed_lines(self, filename):
231        """A map containing all the line numbers executed in `filename`.
232
233        If `filename` hasn't been collected at all (because it wasn't executed)
234        then return an empty map.
235
236        """
237        return self.lines.get(filename) or {}
238
239    def executed_arcs(self, filename):
240        """A map containing all the arcs executed in `filename`."""
241        return self.arcs.get(filename) or {}
242
243    def add_to_hash(self, filename, hasher):
244        """Contribute `filename`'s data to the Md5Hash `hasher`."""
245        hasher.update(self.executed_lines(filename))
246        hasher.update(self.executed_arcs(filename))
247
248    def summary(self, fullpath=False):
249        """Return a dict summarizing the coverage data.
250
251        Keys are based on the filenames, and values are the number of executed
252        lines.  If `fullpath` is true, then the keys are the full pathnames of
253        the files, otherwise they are the basenames of the files.
254
255        """
256        summ = {}
257        if fullpath:
258            filename_fn = lambda f: f
259        else:
260            filename_fn = os.path.basename
261        for filename, lines in iitems(self.lines):
262            summ[filename_fn(filename)] = len(lines)
263        return summ
264
265    def has_arcs(self):
266        """Does this data have arcs?"""
267        return bool(self.arcs)
268
269
270if __name__ == '__main__':
271    # Ad-hoc: show the raw data in a data file.
272    import pprint, sys
273    covdata = CoverageData()
274    if sys.argv[1:]:
275        fname = sys.argv[1]
276    else:
277        fname = covdata.filename
278    pprint.pprint(covdata.raw_data(fname))
279