1"""Class for printing reports on profiled python code."""
2
3# Written by James Roskind
4# Based on prior profile module by Sjoerd Mullender...
5#   which was hacked somewhat by: Guido van Rossum
6
7# Copyright Disney Enterprises, Inc.  All Rights Reserved.
8# Licensed to PSF under a Contributor Agreement
9#
10# Licensed under the Apache License, Version 2.0 (the "License");
11# you may not use this file except in compliance with the License.
12# You may obtain a copy of the License at
13#
14# http://www.apache.org/licenses/LICENSE-2.0
15#
16# Unless required by applicable law or agreed to in writing, software
17# distributed under the License is distributed on an "AS IS" BASIS,
18# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
19# either express or implied.  See the License for the specific language
20# governing permissions and limitations under the License.
21
22
23import sys
24import os
25import time
26import marshal
27import re
28from functools import cmp_to_key
29
30__all__ = ["Stats"]
31
32class Stats:
33    """This class is used for creating reports from data generated by the
34    Profile class.  It is a "friend" of that class, and imports data either
35    by direct access to members of Profile class, or by reading in a dictionary
36    that was emitted (via marshal) from the Profile class.
37
38    The big change from the previous Profiler (in terms of raw functionality)
39    is that an "add()" method has been provided to combine Stats from
40    several distinct profile runs.  Both the constructor and the add()
41    method now take arbitrarily many file names as arguments.
42
43    All the print methods now take an argument that indicates how many lines
44    to print.  If the arg is a floating point number between 0 and 1.0, then
45    it is taken as a decimal percentage of the available lines to be printed
46    (e.g., .1 means print 10% of all available lines).  If it is an integer,
47    it is taken to mean the number of lines of data that you wish to have
48    printed.
49
50    The sort_stats() method now processes some additional options (i.e., in
51    addition to the old -1, 0, 1, or 2).  It takes an arbitrary number of
52    quoted strings to select the sort order.  For example sort_stats('time',
53    'name') sorts on the major key of 'internal function time', and on the
54    minor key of 'the name of the function'.  Look at the two tables in
55    sort_stats() and get_sort_arg_defs(self) for more examples.
56
57    All methods return self, so you can string together commands like:
58        Stats('foo', 'goo').strip_dirs().sort_stats('calls').\
59                            print_stats(5).print_callers(5)
60    """
61
62    def __init__(self, *args, **kwds):
63        # I can't figure out how to explictly specify a stream keyword arg
64        # with *args:
65        #   def __init__(self, *args, stream=sys.stdout): ...
66        # so I use **kwds and sqauwk if something unexpected is passed in.
67        self.stream = sys.stdout
68        if "stream" in kwds:
69            self.stream = kwds["stream"]
70            del kwds["stream"]
71        if kwds:
72            keys = kwds.keys()
73            keys.sort()
74            extras = ", ".join(["%s=%s" % (k, kwds[k]) for k in keys])
75            raise ValueError, "unrecognized keyword args: %s" % extras
76        if not len(args):
77            arg = None
78        else:
79            arg = args[0]
80            args = args[1:]
81        self.init(arg)
82        self.add(*args)
83
84    def init(self, arg):
85        self.all_callees = None  # calc only if needed
86        self.files = []
87        self.fcn_list = None
88        self.total_tt = 0
89        self.total_calls = 0
90        self.prim_calls = 0
91        self.max_name_len = 0
92        self.top_level = {}
93        self.stats = {}
94        self.sort_arg_dict = {}
95        self.load_stats(arg)
96        trouble = 1
97        try:
98            self.get_top_level_stats()
99            trouble = 0
100        finally:
101            if trouble:
102                print >> self.stream, "Invalid timing data",
103                if self.files: print >> self.stream, self.files[-1],
104                print >> self.stream
105
106    def load_stats(self, arg):
107        if not arg:  self.stats = {}
108        elif isinstance(arg, basestring):
109            f = open(arg, 'rb')
110            self.stats = marshal.load(f)
111            f.close()
112            try:
113                file_stats = os.stat(arg)
114                arg = time.ctime(file_stats.st_mtime) + "    " + arg
115            except:  # in case this is not unix
116                pass
117            self.files = [ arg ]
118        elif hasattr(arg, 'create_stats'):
119            arg.create_stats()
120            self.stats = arg.stats
121            arg.stats = {}
122        if not self.stats:
123            raise TypeError("Cannot create or construct a %r object from %r"
124                            % (self.__class__, arg))
125        return
126
127    def get_top_level_stats(self):
128        for func, (cc, nc, tt, ct, callers) in self.stats.items():
129            self.total_calls += nc
130            self.prim_calls  += cc
131            self.total_tt    += tt
132            if ("jprofile", 0, "profiler") in callers:
133                self.top_level[func] = None
134            if len(func_std_string(func)) > self.max_name_len:
135                self.max_name_len = len(func_std_string(func))
136
137    def add(self, *arg_list):
138        if not arg_list: return self
139        if len(arg_list) > 1: self.add(*arg_list[1:])
140        other = arg_list[0]
141        if type(self) != type(other) or self.__class__ != other.__class__:
142            other = Stats(other)
143        self.files += other.files
144        self.total_calls += other.total_calls
145        self.prim_calls += other.prim_calls
146        self.total_tt += other.total_tt
147        for func in other.top_level:
148            self.top_level[func] = None
149
150        if self.max_name_len < other.max_name_len:
151            self.max_name_len = other.max_name_len
152
153        self.fcn_list = None
154
155        for func, stat in other.stats.iteritems():
156            if func in self.stats:
157                old_func_stat = self.stats[func]
158            else:
159                old_func_stat = (0, 0, 0, 0, {},)
160            self.stats[func] = add_func_stats(old_func_stat, stat)
161        return self
162
163    def dump_stats(self, filename):
164        """Write the profile data to a file we know how to load back."""
165        f = file(filename, 'wb')
166        try:
167            marshal.dump(self.stats, f)
168        finally:
169            f.close()
170
171    # list the tuple indices and directions for sorting,
172    # along with some printable description
173    sort_arg_dict_default = {
174              "calls"     : (((1,-1),              ), "call count"),
175              "ncalls"    : (((1,-1),              ), "call count"),
176              "cumtime"   : (((3,-1),              ), "cumulative time"),
177              "cumulative": (((3,-1),              ), "cumulative time"),
178              "file"      : (((4, 1),              ), "file name"),
179              "filename"  : (((4, 1),              ), "file name"),
180              "line"      : (((5, 1),              ), "line number"),
181              "module"    : (((4, 1),              ), "file name"),
182              "name"      : (((6, 1),              ), "function name"),
183              "nfl"       : (((6, 1),(4, 1),(5, 1),), "name/file/line"),
184              "pcalls"    : (((0,-1),              ), "primitive call count"),
185              "stdname"   : (((7, 1),              ), "standard name"),
186              "time"      : (((2,-1),              ), "internal time"),
187              "tottime"   : (((2,-1),              ), "internal time"),
188              }
189
190    def get_sort_arg_defs(self):
191        """Expand all abbreviations that are unique."""
192        if not self.sort_arg_dict:
193            self.sort_arg_dict = dict = {}
194            bad_list = {}
195            for word, tup in self.sort_arg_dict_default.iteritems():
196                fragment = word
197                while fragment:
198                    if not fragment:
199                        break
200                    if fragment in dict:
201                        bad_list[fragment] = 0
202                        break
203                    dict[fragment] = tup
204                    fragment = fragment[:-1]
205            for word in bad_list:
206                del dict[word]
207        return self.sort_arg_dict
208
209    def sort_stats(self, *field):
210        if not field:
211            self.fcn_list = 0
212            return self
213        if len(field) == 1 and isinstance(field[0], (int, long)):
214            # Be compatible with old profiler
215            field = [ {-1: "stdname",
216                       0:  "calls",
217                       1:  "time",
218                       2:  "cumulative"}[field[0]] ]
219
220        sort_arg_defs = self.get_sort_arg_defs()
221        sort_tuple = ()
222        self.sort_type = ""
223        connector = ""
224        for word in field:
225            sort_tuple = sort_tuple + sort_arg_defs[word][0]
226            self.sort_type += connector + sort_arg_defs[word][1]
227            connector = ", "
228
229        stats_list = []
230        for func, (cc, nc, tt, ct, callers) in self.stats.iteritems():
231            stats_list.append((cc, nc, tt, ct) + func +
232                              (func_std_string(func), func))
233
234        stats_list.sort(key=cmp_to_key(TupleComp(sort_tuple).compare))
235
236        self.fcn_list = fcn_list = []
237        for tuple in stats_list:
238            fcn_list.append(tuple[-1])
239        return self
240
241    def reverse_order(self):
242        if self.fcn_list:
243            self.fcn_list.reverse()
244        return self
245
246    def strip_dirs(self):
247        oldstats = self.stats
248        self.stats = newstats = {}
249        max_name_len = 0
250        for func, (cc, nc, tt, ct, callers) in oldstats.iteritems():
251            newfunc = func_strip_path(func)
252            if len(func_std_string(newfunc)) > max_name_len:
253                max_name_len = len(func_std_string(newfunc))
254            newcallers = {}
255            for func2, caller in callers.iteritems():
256                newcallers[func_strip_path(func2)] = caller
257
258            if newfunc in newstats:
259                newstats[newfunc] = add_func_stats(
260                                        newstats[newfunc],
261                                        (cc, nc, tt, ct, newcallers))
262            else:
263                newstats[newfunc] = (cc, nc, tt, ct, newcallers)
264        old_top = self.top_level
265        self.top_level = new_top = {}
266        for func in old_top:
267            new_top[func_strip_path(func)] = None
268
269        self.max_name_len = max_name_len
270
271        self.fcn_list = None
272        self.all_callees = None
273        return self
274
275    def calc_callees(self):
276        if self.all_callees: return
277        self.all_callees = all_callees = {}
278        for func, (cc, nc, tt, ct, callers) in self.stats.iteritems():
279            if not func in all_callees:
280                all_callees[func] = {}
281            for func2, caller in callers.iteritems():
282                if not func2 in all_callees:
283                    all_callees[func2] = {}
284                all_callees[func2][func]  = caller
285        return
286
287    #******************************************************************
288    # The following functions support actual printing of reports
289    #******************************************************************
290
291    # Optional "amount" is either a line count, or a percentage of lines.
292
293    def eval_print_amount(self, sel, list, msg):
294        new_list = list
295        if isinstance(sel, basestring):
296            try:
297                rex = re.compile(sel)
298            except re.error:
299                msg += "   <Invalid regular expression %r>\n" % sel
300                return new_list, msg
301            new_list = []
302            for func in list:
303                if rex.search(func_std_string(func)):
304                    new_list.append(func)
305        else:
306            count = len(list)
307            if isinstance(sel, float) and 0.0 <= sel < 1.0:
308                count = int(count * sel + .5)
309                new_list = list[:count]
310            elif isinstance(sel, (int, long)) and 0 <= sel < count:
311                count = sel
312                new_list = list[:count]
313        if len(list) != len(new_list):
314            msg += "   List reduced from %r to %r due to restriction <%r>\n" % (
315                len(list), len(new_list), sel)
316
317        return new_list, msg
318
319    def get_print_list(self, sel_list):
320        width = self.max_name_len
321        if self.fcn_list:
322            stat_list = self.fcn_list[:]
323            msg = "   Ordered by: " + self.sort_type + '\n'
324        else:
325            stat_list = self.stats.keys()
326            msg = "   Random listing order was used\n"
327
328        for selection in sel_list:
329            stat_list, msg = self.eval_print_amount(selection, stat_list, msg)
330
331        count = len(stat_list)
332
333        if not stat_list:
334            return 0, stat_list
335        print >> self.stream, msg
336        if count < len(self.stats):
337            width = 0
338            for func in stat_list:
339                if  len(func_std_string(func)) > width:
340                    width = len(func_std_string(func))
341        return width+2, stat_list
342
343    def print_stats(self, *amount):
344        for filename in self.files:
345            print >> self.stream, filename
346        if self.files: print >> self.stream
347        indent = ' ' * 8
348        for func in self.top_level:
349            print >> self.stream, indent, func_get_function_name(func)
350
351        print >> self.stream, indent, self.total_calls, "function calls",
352        if self.total_calls != self.prim_calls:
353            print >> self.stream, "(%d primitive calls)" % self.prim_calls,
354        print >> self.stream, "in %.3f seconds" % self.total_tt
355        print >> self.stream
356        width, list = self.get_print_list(amount)
357        if list:
358            self.print_title()
359            for func in list:
360                self.print_line(func)
361            print >> self.stream
362            print >> self.stream
363        return self
364
365    def print_callees(self, *amount):
366        width, list = self.get_print_list(amount)
367        if list:
368            self.calc_callees()
369
370            self.print_call_heading(width, "called...")
371            for func in list:
372                if func in self.all_callees:
373                    self.print_call_line(width, func, self.all_callees[func])
374                else:
375                    self.print_call_line(width, func, {})
376            print >> self.stream
377            print >> self.stream
378        return self
379
380    def print_callers(self, *amount):
381        width, list = self.get_print_list(amount)
382        if list:
383            self.print_call_heading(width, "was called by...")
384            for func in list:
385                cc, nc, tt, ct, callers = self.stats[func]
386                self.print_call_line(width, func, callers, "<-")
387            print >> self.stream
388            print >> self.stream
389        return self
390
391    def print_call_heading(self, name_size, column_title):
392        print >> self.stream, "Function ".ljust(name_size) + column_title
393        # print sub-header only if we have new-style callers
394        subheader = False
395        for cc, nc, tt, ct, callers in self.stats.itervalues():
396            if callers:
397                value = callers.itervalues().next()
398                subheader = isinstance(value, tuple)
399                break
400        if subheader:
401            print >> self.stream, " "*name_size + "    ncalls  tottime  cumtime"
402
403    def print_call_line(self, name_size, source, call_dict, arrow="->"):
404        print >> self.stream, func_std_string(source).ljust(name_size) + arrow,
405        if not call_dict:
406            print >> self.stream
407            return
408        clist = call_dict.keys()
409        clist.sort()
410        indent = ""
411        for func in clist:
412            name = func_std_string(func)
413            value = call_dict[func]
414            if isinstance(value, tuple):
415                nc, cc, tt, ct = value
416                if nc != cc:
417                    substats = '%d/%d' % (nc, cc)
418                else:
419                    substats = '%d' % (nc,)
420                substats = '%s %s %s  %s' % (substats.rjust(7+2*len(indent)),
421                                             f8(tt), f8(ct), name)
422                left_width = name_size + 1
423            else:
424                substats = '%s(%r) %s' % (name, value, f8(self.stats[func][3]))
425                left_width = name_size + 3
426            print >> self.stream, indent*left_width + substats
427            indent = " "
428
429    def print_title(self):
430        print >> self.stream, '   ncalls  tottime  percall  cumtime  percall',
431        print >> self.stream, 'filename:lineno(function)'
432
433    def print_line(self, func):  # hack : should print percentages
434        cc, nc, tt, ct, callers = self.stats[func]
435        c = str(nc)
436        if nc != cc:
437            c = c + '/' + str(cc)
438        print >> self.stream, c.rjust(9),
439        print >> self.stream, f8(tt),
440        if nc == 0:
441            print >> self.stream, ' '*8,
442        else:
443            print >> self.stream, f8(float(tt)/nc),
444        print >> self.stream, f8(ct),
445        if cc == 0:
446            print >> self.stream, ' '*8,
447        else:
448            print >> self.stream, f8(float(ct)/cc),
449        print >> self.stream, func_std_string(func)
450
451class TupleComp:
452    """This class provides a generic function for comparing any two tuples.
453    Each instance records a list of tuple-indices (from most significant
454    to least significant), and sort direction (ascending or decending) for
455    each tuple-index.  The compare functions can then be used as the function
456    argument to the system sort() function when a list of tuples need to be
457    sorted in the instances order."""
458
459    def __init__(self, comp_select_list):
460        self.comp_select_list = comp_select_list
461
462    def compare (self, left, right):
463        for index, direction in self.comp_select_list:
464            l = left[index]
465            r = right[index]
466            if l < r:
467                return -direction
468            if l > r:
469                return direction
470        return 0
471
472#**************************************************************************
473# func_name is a triple (file:string, line:int, name:string)
474
475def func_strip_path(func_name):
476    filename, line, name = func_name
477    return os.path.basename(filename), line, name
478
479def func_get_function_name(func):
480    return func[2]
481
482def func_std_string(func_name): # match what old profile produced
483    if func_name[:2] == ('~', 0):
484        # special case for built-in functions
485        name = func_name[2]
486        if name.startswith('<') and name.endswith('>'):
487            return '{%s}' % name[1:-1]
488        else:
489            return name
490    else:
491        return "%s:%d(%s)" % func_name
492
493#**************************************************************************
494# The following functions combine statists for pairs functions.
495# The bulk of the processing involves correctly handling "call" lists,
496# such as callers and callees.
497#**************************************************************************
498
499def add_func_stats(target, source):
500    """Add together all the stats for two profile entries."""
501    cc, nc, tt, ct, callers = source
502    t_cc, t_nc, t_tt, t_ct, t_callers = target
503    return (cc+t_cc, nc+t_nc, tt+t_tt, ct+t_ct,
504              add_callers(t_callers, callers))
505
506def add_callers(target, source):
507    """Combine two caller lists in a single list."""
508    new_callers = {}
509    for func, caller in target.iteritems():
510        new_callers[func] = caller
511    for func, caller in source.iteritems():
512        if func in new_callers:
513            if isinstance(caller, tuple):
514                # format used by cProfile
515                new_callers[func] = tuple([i[0] + i[1] for i in
516                                           zip(caller, new_callers[func])])
517            else:
518                # format used by profile
519                new_callers[func] += caller
520        else:
521            new_callers[func] = caller
522    return new_callers
523
524def count_calls(callers):
525    """Sum the caller statistics to get total number of calls received."""
526    nc = 0
527    for calls in callers.itervalues():
528        nc += calls
529    return nc
530
531#**************************************************************************
532# The following functions support printing of reports
533#**************************************************************************
534
535def f8(x):
536    return "%8.3f" % x
537
538#**************************************************************************
539# Statistics browser added by ESR, April 2001
540#**************************************************************************
541
542if __name__ == '__main__':
543    import cmd
544    try:
545        import readline
546    except ImportError:
547        pass
548
549    class ProfileBrowser(cmd.Cmd):
550        def __init__(self, profile=None):
551            cmd.Cmd.__init__(self)
552            self.prompt = "% "
553            self.stats = None
554            self.stream = sys.stdout
555            if profile is not None:
556                self.do_read(profile)
557
558        def generic(self, fn, line):
559            args = line.split()
560            processed = []
561            for term in args:
562                try:
563                    processed.append(int(term))
564                    continue
565                except ValueError:
566                    pass
567                try:
568                    frac = float(term)
569                    if frac > 1 or frac < 0:
570                        print >> self.stream, "Fraction argument must be in [0, 1]"
571                        continue
572                    processed.append(frac)
573                    continue
574                except ValueError:
575                    pass
576                processed.append(term)
577            if self.stats:
578                getattr(self.stats, fn)(*processed)
579            else:
580                print >> self.stream, "No statistics object is loaded."
581            return 0
582        def generic_help(self):
583            print >> self.stream, "Arguments may be:"
584            print >> self.stream, "* An integer maximum number of entries to print."
585            print >> self.stream, "* A decimal fractional number between 0 and 1, controlling"
586            print >> self.stream, "  what fraction of selected entries to print."
587            print >> self.stream, "* A regular expression; only entries with function names"
588            print >> self.stream, "  that match it are printed."
589
590        def do_add(self, line):
591            if self.stats:
592                self.stats.add(line)
593            else:
594                print >> self.stream, "No statistics object is loaded."
595            return 0
596        def help_add(self):
597            print >> self.stream, "Add profile info from given file to current statistics object."
598
599        def do_callees(self, line):
600            return self.generic('print_callees', line)
601        def help_callees(self):
602            print >> self.stream, "Print callees statistics from the current stat object."
603            self.generic_help()
604
605        def do_callers(self, line):
606            return self.generic('print_callers', line)
607        def help_callers(self):
608            print >> self.stream, "Print callers statistics from the current stat object."
609            self.generic_help()
610
611        def do_EOF(self, line):
612            print >> self.stream, ""
613            return 1
614        def help_EOF(self):
615            print >> self.stream, "Leave the profile brower."
616
617        def do_quit(self, line):
618            return 1
619        def help_quit(self):
620            print >> self.stream, "Leave the profile brower."
621
622        def do_read(self, line):
623            if line:
624                try:
625                    self.stats = Stats(line)
626                except IOError, args:
627                    print >> self.stream, args[1]
628                    return
629                except Exception as err:
630                    print >> self.stream, err.__class__.__name__ + ':', err
631                    return
632                self.prompt = line + "% "
633            elif len(self.prompt) > 2:
634                line = self.prompt[:-2]
635                self.do_read(line)
636            else:
637                print >> self.stream, "No statistics object is current -- cannot reload."
638            return 0
639        def help_read(self):
640            print >> self.stream, "Read in profile data from a specified file."
641            print >> self.stream, "Without argument, reload the current file."
642
643        def do_reverse(self, line):
644            if self.stats:
645                self.stats.reverse_order()
646            else:
647                print >> self.stream, "No statistics object is loaded."
648            return 0
649        def help_reverse(self):
650            print >> self.stream, "Reverse the sort order of the profiling report."
651
652        def do_sort(self, line):
653            if not self.stats:
654                print >> self.stream, "No statistics object is loaded."
655                return
656            abbrevs = self.stats.get_sort_arg_defs()
657            if line and all((x in abbrevs) for x in line.split()):
658                self.stats.sort_stats(*line.split())
659            else:
660                print >> self.stream, "Valid sort keys (unique prefixes are accepted):"
661                for (key, value) in Stats.sort_arg_dict_default.iteritems():
662                    print >> self.stream, "%s -- %s" % (key, value[1])
663            return 0
664        def help_sort(self):
665            print >> self.stream, "Sort profile data according to specified keys."
666            print >> self.stream, "(Typing `sort' without arguments lists valid keys.)"
667        def complete_sort(self, text, *args):
668            return [a for a in Stats.sort_arg_dict_default if a.startswith(text)]
669
670        def do_stats(self, line):
671            return self.generic('print_stats', line)
672        def help_stats(self):
673            print >> self.stream, "Print statistics from the current stat object."
674            self.generic_help()
675
676        def do_strip(self, line):
677            if self.stats:
678                self.stats.strip_dirs()
679            else:
680                print >> self.stream, "No statistics object is loaded."
681        def help_strip(self):
682            print >> self.stream, "Strip leading path information from filenames in the report."
683
684        def help_help(self):
685            print >> self.stream, "Show help for a given command."
686
687        def postcmd(self, stop, line):
688            if stop:
689                return stop
690            return None
691
692    import sys
693    if len(sys.argv) > 1:
694        initprofile = sys.argv[1]
695    else:
696        initprofile = None
697    try:
698        browser = ProfileBrowser(initprofile)
699        print >> browser.stream, "Welcome to the profile statistics browser."
700        browser.cmdloop()
701        print >> browser.stream, "Goodbye."
702    except KeyboardInterrupt:
703        pass
704
705# That's all, folks.
706