"""A collection of functions to summarize object information.
This module provides several function which will help you to analyze object
information which was gathered. Often it is sufficient to work with aggregated
data instead of handling the entire set of existing objects. For example can a
memory leak identified simple based on the number and size of existing objects.
A summary contains information about objects in a table-like manner.
Technically, it is a list of lists. Each of these lists represents a row,
whereas the first column reflects the object type, the second column the number
of objects, and the third column the size of all these objects. This allows a
simple table-like output like the	following:
=============  ============  =============
types     # objects     total size
=============  ============  =============
<type 'dict'>             2            560
<type 'str'>             3            126
<type 'int'>             4             96
<type 'long'>             2             66
<type 'list'>             1             40
=============  ============  =============
Another advantage of summaries is that they influence the system you analyze
only to a minimum. Working with references to existing objects will keep these
objects alive. Most of the times this is no desired behavior (as it will have
an impact on the observations). Using summaries reduces this effect greatly.
output representation
The output representation of types is defined in summary.representations.
Every type defined in this dictionary will be represented as specified. Each
definition has a list of different representations. The later a representation
appears in this list, the higher its verbosity level. From types which are not
defined in summary.representations the default str() representation will be
Per default, summaries will use the verbosity level 1 for any encountered type.
The reason is that several computations are done with summaries and rows have
to remain comparable. Therefore information which reflect an objects state,
e.g. the current line number of a frame, should not be included. You may add
more detailed information at higher verbosity levels than 1.
import re
import sys
import types
from pympler.util import stringutils
# default to asizeof if sys.getsizeof is not available (prior to Python 2.6)
from sys import getsizeof as _getsizeof
except ImportError:
from pympler.asizeof import flatsize
_getsizeof = flatsize
representations = {}
def _init_representations():
global representations
if sys.hexversion < 0x2040000:
classobj = [
lambda c: "classobj(%s)" % repr(c),
]
representations[types.ClassType] = classobj
instance = [
lambda f: "instance(%s)" % repr(f.__class__),
]
representations[types.InstanceType] = instance
instancemethod = [
lambda i: "instancemethod (%s)" % (repr(i.im_func)),
lambda i: "instancemethod (%s, %s)" % (repr(i.im_class),
repr(i.im_func)),
]
representations[types.MethodType] = instancemethod
frame = [
lambda f: "frame (codename: %s)" % (f.f_code.co_name),
lambda f: "frame (codename: %s, codeline: %s)" %
(f.f_code.co_name, f.f_code.co_firstlineno),
lambda f: "frame (codename: %s, filename: %s, codeline: %s)" %
(f.f_code.co_name, f.f_code.co_filename,
f.f_code.co_firstlineno)
]
representations[types.FrameType] = frame
_dict = [
lambda d: str(type(d)),
lambda d: "dict, len=%s" % len(d),
]
representations[dict] = _dict
function = [
lambda f: "function (%s)" % f.__name__,
lambda f: "function (%s.%s)" % (f.__module, f.__name__),
]
representations[types.FunctionType] = function
_list = [
lambda l: str(type(l)),
lambda l: "list, len=%s" % len(l)
]
representations[list] = _list
module = [lambda m: "module(%s)" % m.__name__]
representations[types.ModuleType] = module
_set = [
lambda s: str(type(s)),
lambda s: "set, len=%s" % len(s)
]
representations[set] = _set
def summarize(objects):
"""Summarize an objects list.
Return a list of lists, whereas each row consists of::
[str(type), number of objects of this type, total size of these objects].
No guarantee regarding the order is given.
"""
count = {}
total_size = {}
for o in objects:
otype = _repr(o)
if otype in count:
count[otype] += 1
total_size[otype] += _getsizeof(o)
else:
count[otype] = 1
total_size[otype] = _getsizeof(o)
rows = []
for otype in count:
rows.append([otype, count[otype], total_size[otype]])
return rows
def get_diff(left, right):
"""Get the difference of two summaries.
Subtracts the values of the right summary from the values of the left
summary.
If similar rows appear on both sides, the are included in the summary with
0 for number of elements and total size.
If the number of elements of a row of the diff is 0, but the total size is
not, it means that objects likely have changed, but not there number, thus
resulting in a changed size.
"""
res = []
for row_r in right:
found = False
for row_l in left:
if row_r[0] == row_l[0]:
res.append([row_r[0], row_r[1] - row_l[1],
row_r[2] - row_l[2]])
found = True
if not found:
res.append(row_r)
for row_l in left:
found = False
for row_r in right:
if row_l[0] == row_r[0]:
found = True
if not found:
res.append([row_l[0], -row_l[1], -row_l[2]])
return res
def format_(rows, limit=15, sort='size', order='descending'):
"""Format the rows as a summary.
Keyword arguments:
limit -- the maximum number of elements to be listed
sort  -- sort elements by 'size', 'type', or '#'
order -- sort 'ascending' or 'descending'
"""
localrows = []
for row in rows:
localrows.append(list(row))
# input validation
sortby = ['type', '#', 'size']
if sort not in sortby:
raise ValueError("invalid sort, should be one of" + str(sortby))
orders = ['ascending', 'descending']
if order not in orders:
raise ValueError("invalid order, should be one of" + str(orders))
# sort rows
if sortby.index(sort) == 0:
if order == "ascending":
localrows.sort(key=lambda x: _repr(x[0]))
elif order == "descending":
localrows.sort(key=lambda x: _repr(x[0]), reverse=True)
else:
if order == "ascending":
localrows.sort(key=lambda x: x[sortby.index(sort)])
elif order == "descending":
localrows.sort(key=lambda x: x[sortby.index(sort)], reverse=True)
# limit rows
localrows = localrows[0:limit]
for row in localrows:
row[2] = stringutils.pp(row[2])
# print rows
localrows.insert(0, ["types", "# objects", "total size"])
return _format_table(localrows)
def _format_table(rows, header=True):
"""Format a list of lists as a pretty table.
Keyword arguments:
header -- if True the first row is treated as a table header
inspired by http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/267662
"""
border = "="
# vertical delimiter
vdelim = " | "
# padding nr. of spaces are left around the longest element in the
# column
padding = 1
# may be left,center,right
justify = 'right'
justify = {'left': str.ljust,
'center': str.center,
'right': str.rjust}[justify.lower()]
# calculate column widths (longest item in each col
# plus "padding" nr of spaces on both sides)
cols = zip(*rows)
colWidths = [max([len(str(item)) + 2 * padding for item in col])
for col in cols]
borderline = vdelim.join([w * border for w in colWidths])
for
235        yield vdelim.join([justify(str(item), width)
236                           for (item, width) in zip(row, colWidths)])
237        if header:
238            yield borderline
239            header = False
242def print_(rows, limit=15, sort='size', order='descending'):
243    """Print the rows as a summary.
245    Keyword arguments:
246    limit -- the maximum number of elements to be listed
247    sort  -- sort elements by 'size', 'type', or '#'
248    order -- sort 'ascending' or 'descending'
250    """
251    for line in format_(rows, limit=limit, sort=sort, order=order):
252        print(line)
255# regular expressions used by _repr to replace default type representations
256type_prefix = re.compile(r"^<type '")
257address = re.compile(r' at 0x[0-9a-f]+')
258type_suffix = re.compile(r"'>$")
261def _repr(o, verbosity=1):
262    """Get meaning object representation.
264    This function should be used when the simple str(o) output would result in
265    too general data. E.g. "<type 'instance'" is less meaningful than
266    "instance: Foo".
268    Keyword arguments:
269    verbosity -- if True the first row is treated as a table header
271    """
272    res = ""
274    t = type(o)
275    if (verbosity == 0) or (t not in representations):
276        res = str(t)
277    else:
278        verbosity -= 1
279        if len(representations[t]) < verbosity:
280            verbosity = len(representations[t]) - 1
281        res = representations[t][verbosity](o)
283    res = address.sub('', res)
284    res = type_prefix.sub('', res)
285    res = type_suffix.sub('', res)
287    return res
290def _traverse(summary, function, *args):
291    """Traverse all objects of a summary and call function with each as a
292    parameter.
294    Using this function, the following objects will be traversed:
295    - the summary
296    - each row
297    - each item of a row
298    """
299    function(summary, *args)
300    for row in summary:
301        function(row, *args)
302        for item in row:
303            function(item, *args)
306def _subtract(summary, o):
307    """Remove object o from the summary by subtracting it's size."""
308    found = False
309    row = [_repr(o), 1, _getsizeof(o)]
310    for r in summary:
311        if r[0] == row[0]:
312            (r[1], r[2]) = (r[1] - row[1], r[2] - row[2])
313            found = True
314    if not found:
315        summary.append([row[0], -row[1], -row[2]])
316    return summary
319def _sweep(summary):
320    """Remove all rows in which the total size and the total number of
321    objects is zero.
323    """
324    return [row for row in summary if ((row[2] != 0) or (row[1] != 0))]