1"""A collection of functions to summarize object information.
2
3This module provides several function which will help you to analyze object
4information which was gathered. Often it is sufficient to work with aggregated
5data instead of handling the entire set of existing objects. For example can a
6memory leak identified simple based on the number and size of existing objects.
7
8A summary contains information about objects in a table-like manner.
9Technically, it is a list of lists. Each of these lists represents a row,
10whereas the first column reflects the object type, the second column the number
11of objects, and the third column the size of all these objects. This allows a
12simple table-like output like the	following:
13
14=============  ============  =============
15       types     # objects     total size
16=============  ============  =============
17<type 'dict'>             2            560
18 <type 'str'>             3            126
19 <type 'int'>             4             96
20<type 'long'>             2             66
21<type 'list'>             1             40
22=============  ============  =============
23
24Another advantage of summaries is that they influence the system you analyze
25only to a minimum. Working with references to existing objects will keep these
26objects alive. Most of the times this is no desired behavior (as it will have
27an impact on the observations). Using summaries reduces this effect greatly.
28
29output representation
30---------------------
31
32The output representation of types is defined in summary.representations.
33Every type defined in this dictionary will be represented as specified. Each
34definition has a list of different representations. The later a representation
35appears in this list, the higher its verbosity level. From types which are not
36defined in summary.representations the default str() representation will be
37used.
38
39Per default, summaries will use the verbosity level 1 for any encountered type.
40The reason is that several computations are done with summaries and rows have
41to remain comparable. Therefore information which reflect an objects state,
42e.g. the current line number of a frame, should not be included. You may add
43more detailed information at higher verbosity levels than 1.
44"""
45
46import re
47import sys
48import types
49
50from pympler.util import stringutils
51# default to asizeof if sys.getsizeof is not available (prior to Python 2.6)
52try:
53    from sys import getsizeof as _getsizeof
54except ImportError:
55    from pympler.asizeof import flatsize
56    _getsizeof = flatsize
57
58representations = {}
59
60
61def _init_representations():
62    global representations
63    if sys.hexversion < 0x2040000:
64        classobj = [
65            lambda c: "classobj(%s)" % repr(c),
66        ]
67        representations[types.ClassType] = classobj
68        instance = [
69            lambda f: "instance(%s)" % repr(f.__class__),
70        ]
71        representations[types.InstanceType] = instance
72        instancemethod = [
73            lambda i: "instancemethod (%s)" % (repr(i.im_func)),
74            lambda i: "instancemethod (%s, %s)" % (repr(i.im_class),
75                                                   repr(i.im_func)),
76        ]
77        representations[types.MethodType] = instancemethod
78    frame = [
79        lambda f: "frame (codename: %s)" % (f.f_code.co_name),
80        lambda f: "frame (codename: %s, codeline: %s)" %
81                  (f.f_code.co_name, f.f_code.co_firstlineno),
82        lambda f: "frame (codename: %s, filename: %s, codeline: %s)" %
83                  (f.f_code.co_name, f.f_code.co_filename,
84                   f.f_code.co_firstlineno)
85    ]
86    representations[types.FrameType] = frame
87    _dict = [
88        lambda d: str(type(d)),
89        lambda d: "dict, len=%s" % len(d),
90    ]
91    representations[dict] = _dict
92    function = [
93        lambda f: "function (%s)" % f.__name__,
94        lambda f: "function (%s.%s)" % (f.__module, f.__name__),
95    ]
96    representations[types.FunctionType] = function
97    _list = [
98        lambda l: str(type(l)),
99        lambda l: "list, len=%s" % len(l)
100    ]
101    representations[list] = _list
102    module = [lambda m: "module(%s)" % m.__name__]
103    representations[types.ModuleType] = module
104    _set = [
105        lambda s: str(type(s)),
106        lambda s: "set, len=%s" % len(s)
107    ]
108    representations[set] = _set
109
110_init_representations()
111
112
113def summarize(objects):
114    """Summarize an objects list.
115
116    Return a list of lists, whereas each row consists of::
117      [str(type), number of objects of this type, total size of these objects].
118
119    No guarantee regarding the order is given.
120
121    """
122    count = {}
123    total_size = {}
124    for o in objects:
125        otype = _repr(o)
126        if otype in count:
127            count[otype] += 1
128            total_size[otype] += _getsizeof(o)
129        else:
130            count[otype] = 1
131            total_size[otype] = _getsizeof(o)
132    rows = []
133    for otype in count:
134        rows.append([otype, count[otype], total_size[otype]])
135    return rows
136
137
138def get_diff(left, right):
139    """Get the difference of two summaries.
140
141    Subtracts the values of the right summary from the values of the left
142    summary.
143    If similar rows appear on both sides, the are included in the summary with
144    0 for number of elements and total size.
145    If the number of elements of a row of the diff is 0, but the total size is
146    not, it means that objects likely have changed, but not there number, thus
147    resulting in a changed size.
148
149    """
150    res = []
151    for row_r in right:
152        found = False
153        for row_l in left:
154            if row_r[0] == row_l[0]:
155                res.append([row_r[0], row_r[1] - row_l[1],
156                            row_r[2] - row_l[2]])
157                found = True
158        if not found:
159            res.append(row_r)
160
161    for row_l in left:
162        found = False
163        for row_r in right:
164            if row_l[0] == row_r[0]:
165                found = True
166        if not found:
167            res.append([row_l[0], -row_l[1], -row_l[2]])
168    return res
169
170
171def format_(rows, limit=15, sort='size', order='descending'):
172    """Format the rows as a summary.
173
174    Keyword arguments:
175    limit -- the maximum number of elements to be listed
176    sort  -- sort elements by 'size', 'type', or '#'
177    order -- sort 'ascending' or 'descending'
178    """
179    localrows = []
180    for row in rows:
181        localrows.append(list(row))
182    # input validation
183    sortby = ['type', '#', 'size']
184    if sort not in sortby:
185        raise ValueError("invalid sort, should be one of" + str(sortby))
186    orders = ['ascending', 'descending']
187    if order not in orders:
188        raise ValueError("invalid order, should be one of" + str(orders))
189    # sort rows
190    if sortby.index(sort) == 0:
191        if order == "ascending":
192            localrows.sort(key=lambda x: _repr(x[0]))
193        elif order == "descending":
194            localrows.sort(key=lambda x: _repr(x[0]), reverse=True)
195    else:
196        if order == "ascending":
197            localrows.sort(key=lambda x: x[sortby.index(sort)])
198        elif order == "descending":
199            localrows.sort(key=lambda x: x[sortby.index(sort)], reverse=True)
200    # limit rows
201    localrows = localrows[0:limit]
202    for row in localrows:
203        row[2] = stringutils.pp(row[2])
204    # print rows
205    localrows.insert(0, ["types", "# objects", "total size"])
206    return _format_table(localrows)
207
208
209def _format_table(rows, header=True):
210    """Format a list of lists as a pretty table.
211
212    Keyword arguments:
213    header -- if True the first row is treated as a table header
214
215    inspired by http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/267662
216    """
217    border = "="
218    # vertical delimiter
219    vdelim = " | "
220    # padding nr. of spaces are left around the longest element in the
221    # column
222    padding = 1
223    # may be left,center,right
224    justify = 'right'
225    justify = {'left': str.ljust,
226               'center': str.center,
227               'right': str.rjust}[justify.lower()]
228    # calculate column widths (longest item in each col
229    # plus "padding" nr of spaces on both sides)
230    cols = zip(*rows)
231    colWidths = [max([len(str(item)) + 2 * padding for item in col])
232                 for col in cols]
233    borderline = vdelim.join([w * border for w in colWidths])
234    for row in rows:
235        yield vdelim.join([justify(str(item), width)
236                           for (item, width) in zip(row, colWidths)])
237        if header:
238            yield borderline
239            header = False
240
241
242def print_(rows, limit=15, sort='size', order='descending'):
243    """Print the rows as a summary.
244
245    Keyword arguments:
246    limit -- the maximum number of elements to be listed
247    sort  -- sort elements by 'size', 'type', or '#'
248    order -- sort 'ascending' or 'descending'
249
250    """
251    for line in format_(rows, limit=limit, sort=sort, order=order):
252        print(line)
253
254
255# regular expressions used by _repr to replace default type representations
256type_prefix = re.compile(r"^<type '")
257address = re.compile(r' at 0x[0-9a-f]+')
258type_suffix = re.compile(r"'>$")
259
260
261def _repr(o, verbosity=1):
262    """Get meaning object representation.
263
264    This function should be used when the simple str(o) output would result in
265    too general data. E.g. "<type 'instance'" is less meaningful than
266    "instance: Foo".
267
268    Keyword arguments:
269    verbosity -- if True the first row is treated as a table header
270
271    """
272    res = ""
273
274    t = type(o)
275    if (verbosity == 0) or (t not in representations):
276        res = str(t)
277    else:
278        verbosity -= 1
279        if len(representations[t]) < verbosity:
280            verbosity = len(representations[t]) - 1
281        res = representations[t][verbosity](o)
282
283    res = address.sub('', res)
284    res = type_prefix.sub('', res)
285    res = type_suffix.sub('', res)
286
287    return res
288
289
290def _traverse(summary, function, *args):
291    """Traverse all objects of a summary and call function with each as a
292    parameter.
293
294    Using this function, the following objects will be traversed:
295    - the summary
296    - each row
297    - each item of a row
298    """
299    function(summary, *args)
300    for row in summary:
301        function(row, *args)
302        for item in row:
303            function(item, *args)
304
305
306def _subtract(summary, o):
307    """Remove object o from the summary by subtracting it's size."""
308    found = False
309    row = [_repr(o), 1, _getsizeof(o)]
310    for r in summary:
311        if r[0] == row[0]:
312            (r[1], r[2]) = (r[1] - row[1], r[2] - row[2])
313            found = True
314    if not found:
315        summary.append([row[0], -row[1], -row[2]])
316    return summary
317
318
319def _sweep(summary):
320    """Remove all rows in which the total size and the total number of
321    objects is zero.
322
323    """
324    return [row for row in summary if ((row[2] != 0) or (row[1] != 0))]
325