1"""A collection of functions to summarize object information. 2 3This module provides several function which will help you to analyze object 4information which was gathered. Often it is sufficient to work with aggregated 5data instead of handling the entire set of existing objects. For example can a 6memory leak identified simple based on the number and size of existing objects. 7 8A summary contains information about objects in a table-like manner. 9Technically, it is a list of lists. Each of these lists represents a row, 10whereas the first column reflects the object type, the second column the number 11of objects, and the third column the size of all these objects. This allows a 12simple table-like output like the following: 13 14============= ============ ============= 15 types # objects total size 16============= ============ ============= 17<type 'dict'> 2 560 18 <type 'str'> 3 126 19 <type 'int'> 4 96 20<type 'long'> 2 66 21<type 'list'> 1 40 22============= ============ ============= 23 24Another advantage of summaries is that they influence the system you analyze 25only to a minimum. Working with references to existing objects will keep these 26objects alive. Most of the times this is no desired behavior (as it will have 27an impact on the observations). Using summaries reduces this effect greatly. 28 29output representation 30--------------------- 31 32The output representation of types is defined in summary.representations. 33Every type defined in this dictionary will be represented as specified. Each 34definition has a list of different representations. The later a representation 35appears in this list, the higher its verbosity level. From types which are not 36defined in summary.representations the default str() representation will be 37used. 38 39Per default, summaries will use the verbosity level 1 for any encountered type. 40The reason is that several computations are done with summaries and rows have 41to remain comparable. Therefore information which reflect an objects state, 42e.g. the current line number of a frame, should not be included. You may add 43more detailed information at higher verbosity levels than 1. 44""" 45 46import re 47import sys 48import types 49 50from pympler.util import stringutils 51# default to asizeof if sys.getsizeof is not available (prior to Python 2.6) 52try: 53 from sys import getsizeof as _getsizeof 54except ImportError: 55 from pympler.asizeof import flatsize 56 _getsizeof = flatsize 57 58representations = {} 59 60 61def _init_representations(): 62 global representations 63 if sys.hexversion < 0x2040000: 64 classobj = [ 65 lambda c: "classobj(%s)" % repr(c), 66 ] 67 representations[types.ClassType] = classobj 68 instance = [ 69 lambda f: "instance(%s)" % repr(f.__class__), 70 ] 71 representations[types.InstanceType] = instance 72 instancemethod = [ 73 lambda i: "instancemethod (%s)" % (repr(i.im_func)), 74 lambda i: "instancemethod (%s, %s)" % (repr(i.im_class), 75 repr(i.im_func)), 76 ] 77 representations[types.MethodType] = instancemethod 78 frame = [ 79 lambda f: "frame (codename: %s)" % (f.f_code.co_name), 80 lambda f: "frame (codename: %s, codeline: %s)" % 81 (f.f_code.co_name, f.f_code.co_firstlineno), 82 lambda f: "frame (codename: %s, filename: %s, codeline: %s)" % 83 (f.f_code.co_name, f.f_code.co_filename, 84 f.f_code.co_firstlineno) 85 ] 86 representations[types.FrameType] = frame 87 _dict = [ 88 lambda d: str(type(d)), 89 lambda d: "dict, len=%s" % len(d), 90 ] 91 representations[dict] = _dict 92 function = [ 93 lambda f: "function (%s)" % f.__name__, 94 lambda f: "function (%s.%s)" % (f.__module, f.__name__), 95 ] 96 representations[types.FunctionType] = function 97 _list = [ 98 lambda l: str(type(l)), 99 lambda l: "list, len=%s" % len(l) 100 ] 101 representations[list] = _list 102 module = [lambda m: "module(%s)" % m.__name__] 103 representations[types.ModuleType] = module 104 _set = [ 105 lambda s: str(type(s)), 106 lambda s: "set, len=%s" % len(s) 107 ] 108 representations[set] = _set 109 110_init_representations() 111 112 113def summarize(objects): 114 """Summarize an objects list. 115 116 Return a list of lists, whereas each row consists of:: 117 [str(type), number of objects of this type, total size of these objects]. 118 119 No guarantee regarding the order is given. 120 121 """ 122 count = {} 123 total_size = {} 124 for o in objects: 125 otype = _repr(o) 126 if otype in count: 127 count[otype] += 1 128 total_size[otype] += _getsizeof(o) 129 else: 130 count[otype] = 1 131 total_size[otype] = _getsizeof(o) 132 rows = [] 133 for otype in count: 134 rows.append([otype, count[otype], total_size[otype]]) 135 return rows 136 137 138def get_diff(left, right): 139 """Get the difference of two summaries. 140 141 Subtracts the values of the right summary from the values of the left 142 summary. 143 If similar rows appear on both sides, the are included in the summary with 144 0 for number of elements and total size. 145 If the number of elements of a row of the diff is 0, but the total size is 146 not, it means that objects likely have changed, but not there number, thus 147 resulting in a changed size. 148 149 """ 150 res = [] 151 for row_r in right: 152 found = False 153 for row_l in left: 154 if row_r[0] == row_l[0]: 155 res.append([row_r[0], row_r[1] - row_l[1], 156 row_r[2] - row_l[2]]) 157 found = True 158 if not found: 159 res.append(row_r) 160 161 for row_l in left: 162 found = False 163 for row_r in right: 164 if row_l[0] == row_r[0]: 165 found = True 166 if not found: 167 res.append([row_l[0], -row_l[1], -row_l[2]]) 168 return res 169 170 171def format_(rows, limit=15, sort='size', order='descending'): 172 """Format the rows as a summary. 173 174 Keyword arguments: 175 limit -- the maximum number of elements to be listed 176 sort -- sort elements by 'size', 'type', or '#' 177 order -- sort 'ascending' or 'descending' 178 """ 179 localrows = [] 180 for row in rows: 181 localrows.append(list(row)) 182 # input validation 183 sortby = ['type', '#', 'size'] 184 if sort not in sortby: 185 raise ValueError("invalid sort, should be one of" + str(sortby)) 186 orders = ['ascending', 'descending'] 187 if order not in orders: 188 raise ValueError("invalid order, should be one of" + str(orders)) 189 # sort rows 190 if sortby.index(sort) == 0: 191 if order == "ascending": 192 localrows.sort(key=lambda x: _repr(x[0])) 193 elif order == "descending": 194 localrows.sort(key=lambda x: _repr(x[0]), reverse=True) 195 else: 196 if order == "ascending": 197 localrows.sort(key=lambda x: x[sortby.index(sort)]) 198 elif order == "descending": 199 localrows.sort(key=lambda x: x[sortby.index(sort)], reverse=True) 200 # limit rows 201 localrows = localrows[0:limit] 202 for row in localrows: 203 row[2] = stringutils.pp(row[2]) 204 # print rows 205 localrows.insert(0, ["types", "# objects", "total size"]) 206 return _format_table(localrows) 207 208 209def _format_table(rows, header=True): 210 """Format a list of lists as a pretty table. 211 212 Keyword arguments: 213 header -- if True the first row is treated as a table header 214 215 inspired by http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/267662 216 """ 217 border = "=" 218 # vertical delimiter 219 vdelim = " | " 220 # padding nr. of spaces are left around the longest element in the 221 # column 222 padding = 1 223 # may be left,center,right 224 justify = 'right' 225 justify = {'left': str.ljust, 226 'center': str.center, 227 'right': str.rjust}[justify.lower()] 228 # calculate column widths (longest item in each col 229 # plus "padding" nr of spaces on both sides) 230 cols = zip(*rows) 231 colWidths = [max([len(str(item)) + 2 * padding for item in col]) 232 for col in cols] 233 borderline = vdelim.join([w * border for w in colWidths]) 234 for row in rows: 235 yield vdelim.join([justify(str(item), width) 236 for (item, width) in zip(row, colWidths)]) 237 if header: 238 yield borderline 239 header = False 240 241 242def print_(rows, limit=15, sort='size', order='descending'): 243 """Print the rows as a summary. 244 245 Keyword arguments: 246 limit -- the maximum number of elements to be listed 247 sort -- sort elements by 'size', 'type', or '#' 248 order -- sort 'ascending' or 'descending' 249 250 """ 251 for line in format_(rows, limit=limit, sort=sort, order=order): 252 print(line) 253 254 255# regular expressions used by _repr to replace default type representations 256type_prefix = re.compile(r"^<type '") 257address = re.compile(r' at 0x[0-9a-f]+') 258type_suffix = re.compile(r"'>$") 259 260 261def _repr(o, verbosity=1): 262 """Get meaning object representation. 263 264 This function should be used when the simple str(o) output would result in 265 too general data. E.g. "<type 'instance'" is less meaningful than 266 "instance: Foo". 267 268 Keyword arguments: 269 verbosity -- if True the first row is treated as a table header 270 271 """ 272 res = "" 273 274 t = type(o) 275 if (verbosity == 0) or (t not in representations): 276 res = str(t) 277 else: 278 verbosity -= 1 279 if len(representations[t]) < verbosity: 280 verbosity = len(representations[t]) - 1 281 res = representations[t][verbosity](o) 282 283 res = address.sub('', res) 284 res = type_prefix.sub('', res) 285 res = type_suffix.sub('', res) 286 287 return res 288 289 290def _traverse(summary, function, *args): 291 """Traverse all objects of a summary and call function with each as a 292 parameter. 293 294 Using this function, the following objects will be traversed: 295 - the summary 296 - each row 297 - each item of a row 298 """ 299 function(summary, *args) 300 for row in summary: 301 function(row, *args) 302 for item in row: 303 function(item, *args) 304 305 306def _subtract(summary, o): 307 """Remove object o from the summary by subtracting it's size.""" 308 found = False 309 row = [_repr(o), 1, _getsizeof(o)] 310 for r in summary: 311 if r[0] == row[0]: 312 (r[1], r[2]) = (r[1] - row[1], r[2] - row[2]) 313 found = True 314 if not found: 315 summary.append([row[0], -row[1], -row[2]]) 316 return summary 317 318 319def _sweep(summary): 320 """Remove all rows in which the total size and the total number of 321 objects is zero. 322 323 """ 324 return [row for row in summary if ((row[2] != 0) or (row[1] != 0))] 325