1#!/usr/local/bin/python3.7
2#
3# Class for profiling python code. rev 1.0  6/2/94
4#
5# Written by James Roskind
6# Based on prior profile module by Sjoerd Mullender...
7#   which was hacked somewhat by: Guido van Rossum
8
9"""Class for profiling Python code."""
10
11# Copyright Disney Enterprises, Inc.  All Rights Reserved.
12# Licensed to PSF under a Contributor Agreement
13#
14# Licensed under the Apache License, Version 2.0 (the "License");
15# you may not use this file except in compliance with the License.
16# You may obtain a copy of the License at
17#
18# http://www.apache.org/licenses/LICENSE-2.0
19#
20# Unless required by applicable law or agreed to in writing, software
21# distributed under the License is distributed on an "AS IS" BASIS,
22# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
23# either express or implied.  See the License for the specific language
24# governing permissions and limitations under the License.
25
26
27import sys
28import time
29import marshal
30
31__all__ = ["run", "runctx", "Profile"]
32
33# Sample timer for use with
34#i_count = 0
35#def integer_timer():
36#       global i_count
37#       i_count = i_count + 1
38#       return i_count
39#itimes = integer_timer # replace with C coded timer returning integers
40
41class _Utils:
42    """Support class for utility functions which are shared by
43    profile.py and cProfile.py modules.
44    Not supposed to be used directly.
45    """
46
47    def __init__(self, profiler):
48        self.profiler = profiler
49
50    def run(self, statement, filename, sort):
51        prof = self.profiler()
52        try:
53            prof.run(statement)
54        except SystemExit:
55            pass
56        finally:
57            self._show(prof, filename, sort)
58
59    def runctx(self, statement, globals, locals, filename, sort):
60        prof = self.profiler()
61        try:
62            prof.runctx(statement, globals, locals)
63        except SystemExit:
64            pass
65        finally:
66            self._show(prof, filename, sort)
67
68    def _show(self, prof, filename, sort):
69        if filename is not None:
70            prof.dump_stats(filename)
71        else:
72            prof.print_stats(sort)
73
74
75#**************************************************************************
76# The following are the static member functions for the profiler class
77# Note that an instance of Profile() is *not* needed to call them.
78#**************************************************************************
79
80def run(statement, filename=None, sort=-1):
81    """Run statement under profiler optionally saving results in filename
82
83    This function takes a single argument that can be passed to the
84    "exec" statement, and an optional file name.  In all cases this
85    routine attempts to "exec" its first argument and gather profiling
86    statistics from the execution. If no file name is present, then this
87    function automatically prints a simple profiling report, sorted by the
88    standard name string (file/line/function-name) that is presented in
89    each line.
90    """
91    return _Utils(Profile).run(statement, filename, sort)
92
93def runctx(statement, globals, locals, filename=None, sort=-1):
94    """Run statement under profiler, supplying your own globals and locals,
95    optionally saving results in filename.
96
97    statement and filename have the same semantics as profile.run
98    """
99    return _Utils(Profile).runctx(statement, globals, locals, filename, sort)
100
101
102class Profile:
103    """Profiler class.
104
105    self.cur is always a tuple.  Each such tuple corresponds to a stack
106    frame that is currently active (self.cur[-2]).  The following are the
107    definitions of its members.  We use this external "parallel stack" to
108    avoid contaminating the program that we are profiling. (old profiler
109    used to write into the frames local dictionary!!) Derived classes
110    can change the definition of some entries, as long as they leave
111    [-2:] intact (frame and previous tuple).  In case an internal error is
112    detected, the -3 element is used as the function name.
113
114    [ 0] = Time that needs to be charged to the parent frame's function.
115           It is used so that a function call will not have to access the
116           timing data for the parent frame.
117    [ 1] = Total time spent in this frame's function, excluding time in
118           subfunctions (this latter is tallied in cur[2]).
119    [ 2] = Total time spent in subfunctions, excluding time executing the
120           frame's function (this latter is tallied in cur[1]).
121    [-3] = Name of the function that corresponds to this frame.
122    [-2] = Actual frame that we correspond to (used to sync exception handling).
123    [-1] = Our parent 6-tuple (corresponds to frame.f_back).
124
125    Timing data for each function is stored as a 5-tuple in the dictionary
126    self.timings[].  The index is always the name stored in self.cur[-3].
127    The following are the definitions of the members:
128
129    [0] = The number of times this function was called, not counting direct
130          or indirect recursion,
131    [1] = Number of times this function appears on the stack, minus one
132    [2] = Total time spent internal to this function
133    [3] = Cumulative time that this function was present on the stack.  In
134          non-recursive functions, this is the total execution time from start
135          to finish of each invocation of a function, including time spent in
136          all subfunctions.
137    [4] = A dictionary indicating for each function name, the number of times
138          it was called by us.
139    """
140
141    bias = 0  # calibration constant
142
143    def __init__(self, timer=None, bias=None):
144        self.timings = {}
145        self.cur = None
146        self.cmd = ""
147        self.c_func_name = ""
148
149        if bias is None:
150            bias = self.bias
151        self.bias = bias     # Materialize in local dict for lookup speed.
152
153        if not timer:
154            self.timer = self.get_time = time.process_time
155            self.dispatcher = self.trace_dispatch_i
156        else:
157            self.timer = timer
158            t = self.timer() # test out timer function
159            try:
160                length = len(t)
161            except TypeError:
162                self.get_time = timer
163                self.dispatcher = self.trace_dispatch_i
164            else:
165                if length == 2:
166                    self.dispatcher = self.trace_dispatch
167                else:
168                    self.dispatcher = self.trace_dispatch_l
169                # This get_time() implementation needs to be defined
170                # here to capture the passed-in timer in the parameter
171                # list (for performance).  Note that we can't assume
172                # the timer() result contains two values in all
173                # cases.
174                def get_time_timer(timer=timer, sum=sum):
175                    return sum(timer())
176                self.get_time = get_time_timer
177        self.t = self.get_time()
178        self.simulate_call('profiler')
179
180    # Heavily optimized dispatch routine for time.process_time() timer
181
182    def trace_dispatch(self, frame, event, arg):
183        timer = self.timer
184        t = timer()
185        t = t[0] + t[1] - self.t - self.bias
186
187        if event == "c_call":
188            self.c_func_name = arg.__name__
189
190        if self.dispatch[event](self, frame,t):
191            t = timer()
192            self.t = t[0] + t[1]
193        else:
194            r = timer()
195            self.t = r[0] + r[1] - t # put back unrecorded delta
196
197    # Dispatch routine for best timer program (return = scalar, fastest if
198    # an integer but float works too -- and time.process_time() relies on that).
199
200    def trace_dispatch_i(self, frame, event, arg):
201        timer = self.timer
202        t = timer() - self.t - self.bias
203
204        if event == "c_call":
205            self.c_func_name = arg.__name__
206
207        if self.dispatch[event](self, frame, t):
208            self.t = timer()
209        else:
210            self.t = timer() - t  # put back unrecorded delta
211
212    # Dispatch routine for macintosh (timer returns time in ticks of
213    # 1/60th second)
214
215    def trace_dispatch_mac(self, frame, event, arg):
216        timer = self.timer
217        t = timer()/60.0 - self.t - self.bias
218
219        if event == "c_call":
220            self.c_func_name = arg.__name__
221
222        if self.dispatch[event](self, frame, t):
223            self.t = timer()/60.0
224        else:
225            self.t = timer()/60.0 - t  # put back unrecorded delta
226
227    # SLOW generic dispatch routine for timer returning lists of numbers
228
229    def trace_dispatch_l(self, frame, event, arg):
230        get_time = self.get_time
231        t = get_time() - self.t - self.bias
232
233        if event == "c_call":
234            self.c_func_name = arg.__name__
235
236        if self.dispatch[event](self, frame, t):
237            self.t = get_time()
238        else:
239            self.t = get_time() - t # put back unrecorded delta
240
241    # In the event handlers, the first 3 elements of self.cur are unpacked
242    # into vrbls w/ 3-letter names.  The last two characters are meant to be
243    # mnemonic:
244    #     _pt  self.cur[0] "parent time"   time to be charged to parent frame
245    #     _it  self.cur[1] "internal time" time spent directly in the function
246    #     _et  self.cur[2] "external time" time spent in subfunctions
247
248    def trace_dispatch_exception(self, frame, t):
249        rpt, rit, ret, rfn, rframe, rcur = self.cur
250        if (rframe is not frame) and rcur:
251            return self.trace_dispatch_return(rframe, t)
252        self.cur = rpt, rit+t, ret, rfn, rframe, rcur
253        return 1
254
255
256    def trace_dispatch_call(self, frame, t):
257        if self.cur and frame.f_back is not self.cur[-2]:
258            rpt, rit, ret, rfn, rframe, rcur = self.cur
259            if not isinstance(rframe, Profile.fake_frame):
260                assert rframe.f_back is frame.f_back, ("Bad call", rfn,
261                                                       rframe, rframe.f_back,
262                                                       frame, frame.f_back)
263                self.trace_dispatch_return(rframe, 0)
264                assert (self.cur is None or \
265                        frame.f_back is self.cur[-2]), ("Bad call",
266                                                        self.cur[-3])
267        fcode = frame.f_code
268        fn = (fcode.co_filename, fcode.co_firstlineno, fcode.co_name)
269        self.cur = (t, 0, 0, fn, frame, self.cur)
270        timings = self.timings
271        if fn in timings:
272            cc, ns, tt, ct, callers = timings[fn]
273            timings[fn] = cc, ns + 1, tt, ct, callers
274        else:
275            timings[fn] = 0, 0, 0, 0, {}
276        return 1
277
278    def trace_dispatch_c_call (self, frame, t):
279        fn = ("", 0, self.c_func_name)
280        self.cur = (t, 0, 0, fn, frame, self.cur)
281        timings = self.timings
282        if fn in timings:
283            cc, ns, tt, ct, callers = timings[fn]
284            timings[fn] = cc, ns+1, tt, ct, callers
285        else:
286            timings[fn] = 0, 0, 0, 0, {}
287        return 1
288
289    def trace_dispatch_return(self, frame, t):
290        if frame is not self.cur[-2]:
291            assert frame is self.cur[-2].f_back, ("Bad return", self.cur[-3])
292            self.trace_dispatch_return(self.cur[-2], 0)
293
294        # Prefix "r" means part of the Returning or exiting frame.
295        # Prefix "p" means part of the Previous or Parent or older frame.
296
297        rpt, rit, ret, rfn, frame, rcur = self.cur
298        rit = rit + t
299        frame_total = rit + ret
300
301        ppt, pit, pet, pfn, pframe, pcur = rcur
302        self.cur = ppt, pit + rpt, pet + frame_total, pfn, pframe, pcur
303
304        timings = self.timings
305        cc, ns, tt, ct, callers = timings[rfn]
306        if not ns:
307            # This is the only occurrence of the function on the stack.
308            # Else this is a (directly or indirectly) recursive call, and
309            # its cumulative time will get updated when the topmost call to
310            # it returns.
311            ct = ct + frame_total
312            cc = cc + 1
313
314        if pfn in callers:
315            callers[pfn] = callers[pfn] + 1  # hack: gather more
316            # stats such as the amount of time added to ct courtesy
317            # of this specific call, and the contribution to cc
318            # courtesy of this call.
319        else:
320            callers[pfn] = 1
321
322        timings[rfn] = cc, ns - 1, tt + rit, ct, callers
323
324        return 1
325
326
327    dispatch = {
328        "call": trace_dispatch_call,
329        "exception": trace_dispatch_exception,
330        "return": trace_dispatch_return,
331        "c_call": trace_dispatch_c_call,
332        "c_exception": trace_dispatch_return,  # the C function returned
333        "c_return": trace_dispatch_return,
334        }
335
336
337    # The next few functions play with self.cmd. By carefully preloading
338    # our parallel stack, we can force the profiled result to include
339    # an arbitrary string as the name of the calling function.
340    # We use self.cmd as that string, and the resulting stats look
341    # very nice :-).
342
343    def set_cmd(self, cmd):
344        if self.cur[-1]: return   # already set
345        self.cmd = cmd
346        self.simulate_call(cmd)
347
348    class fake_code:
349        def __init__(self, filename, line, name):
350            self.co_filename = filename
351            self.co_line = line
352            self.co_name = name
353            self.co_firstlineno = 0
354
355        def __repr__(self):
356            return repr((self.co_filename, self.co_line, self.co_name))
357
358    class fake_frame:
359        def __init__(self, code, prior):
360            self.f_code = code
361            self.f_back = prior
362
363    def simulate_call(self, name):
364        code = self.fake_code('profile', 0, name)
365        if self.cur:
366            pframe = self.cur[-2]
367        else:
368            pframe = None
369        frame = self.fake_frame(code, pframe)
370        self.dispatch['call'](self, frame, 0)
371
372    # collect stats from pending stack, including getting final
373    # timings for self.cmd frame.
374
375    def simulate_cmd_complete(self):
376        get_time = self.get_time
377        t = get_time() - self.t
378        while self.cur[-1]:
379            # We *can* cause assertion errors here if
380            # dispatch_trace_return checks for a frame match!
381            self.dispatch['return'](self, self.cur[-2], t)
382            t = 0
383        self.t = get_time() - t
384
385
386    def print_stats(self, sort=-1):
387        import pstats
388        pstats.Stats(self).strip_dirs().sort_stats(sort). \
389                  print_stats()
390
391    def dump_stats(self, file):
392        with open(file, 'wb') as f:
393            self.create_stats()
394            marshal.dump(self.stats, f)
395
396    def create_stats(self):
397        self.simulate_cmd_complete()
398        self.snapshot_stats()
399
400    def snapshot_stats(self):
401        self.stats = {}
402        for func, (cc, ns, tt, ct, callers) in self.timings.items():
403            callers = callers.copy()
404            nc = 0
405            for callcnt in callers.values():
406                nc += callcnt
407            self.stats[func] = cc, nc, tt, ct, callers
408
409
410    # The following two methods can be called by clients to use
411    # a profiler to profile a statement, given as a string.
412
413    def run(self, cmd):
414        import __main__
415        dict = __main__.__dict__
416        return self.runctx(cmd, dict, dict)
417
418    def runctx(self, cmd, globals, locals):
419        self.set_cmd(cmd)
420        sys.setprofile(self.dispatcher)
421        try:
422            exec(cmd, globals, locals)
423        finally:
424            sys.setprofile(None)
425        return self
426
427    # This method is more useful to profile a single function call.
428    def runcall(*args, **kw):
429        if len(args) >= 2:
430            self, func, *args = args
431        elif not args:
432            raise TypeError("descriptor 'runcall' of 'Profile' object "
433                            "needs an argument")
434        elif 'func' in kw:
435            func = kw.pop('func')
436            self, *args = args
437        else:
438            raise TypeError('runcall expected at least 1 positional argument, '
439                            'got %d' % (len(args)-1))
440
441        self.set_cmd(repr(func))
442        sys.setprofile(self.dispatcher)
443        try:
444            return func(*args, **kw)
445        finally:
446            sys.setprofile(None)
447
448
449    #******************************************************************
450    # The following calculates the overhead for using a profiler.  The
451    # problem is that it takes a fair amount of time for the profiler
452    # to stop the stopwatch (from the time it receives an event).
453    # Similarly, there is a delay from the time that the profiler
454    # re-starts the stopwatch before the user's code really gets to
455    # continue.  The following code tries to measure the difference on
456    # a per-event basis.
457    #
458    # Note that this difference is only significant if there are a lot of
459    # events, and relatively little user code per event.  For example,
460    # code with small functions will typically benefit from having the
461    # profiler calibrated for the current platform.  This *could* be
462    # done on the fly during init() time, but it is not worth the
463    # effort.  Also note that if too large a value specified, then
464    # execution time on some functions will actually appear as a
465    # negative number.  It is *normal* for some functions (with very
466    # low call counts) to have such negative stats, even if the
467    # calibration figure is "correct."
468    #
469    # One alternative to profile-time calibration adjustments (i.e.,
470    # adding in the magic little delta during each event) is to track
471    # more carefully the number of events (and cumulatively, the number
472    # of events during sub functions) that are seen.  If this were
473    # done, then the arithmetic could be done after the fact (i.e., at
474    # display time).  Currently, we track only call/return events.
475    # These values can be deduced by examining the callees and callers
476    # vectors for each functions.  Hence we *can* almost correct the
477    # internal time figure at print time (note that we currently don't
478    # track exception event processing counts).  Unfortunately, there
479    # is currently no similar information for cumulative sub-function
480    # time.  It would not be hard to "get all this info" at profiler
481    # time.  Specifically, we would have to extend the tuples to keep
482    # counts of this in each frame, and then extend the defs of timing
483    # tuples to include the significant two figures. I'm a bit fearful
484    # that this additional feature will slow the heavily optimized
485    # event/time ratio (i.e., the profiler would run slower, fur a very
486    # low "value added" feature.)
487    #**************************************************************
488
489    def calibrate(self, m, verbose=0):
490        if self.__class__ is not Profile:
491            raise TypeError("Subclasses must override .calibrate().")
492
493        saved_bias = self.bias
494        self.bias = 0
495        try:
496            return self._calibrate_inner(m, verbose)
497        finally:
498            self.bias = saved_bias
499
500    def _calibrate_inner(self, m, verbose):
501        get_time = self.get_time
502
503        # Set up a test case to be run with and without profiling.  Include
504        # lots of calls, because we're trying to quantify stopwatch overhead.
505        # Do not raise any exceptions, though, because we want to know
506        # exactly how many profile events are generated (one call event, +
507        # one return event, per Python-level call).
508
509        def f1(n):
510            for i in range(n):
511                x = 1
512
513        def f(m, f1=f1):
514            for i in range(m):
515                f1(100)
516
517        f(m)    # warm up the cache
518
519        # elapsed_noprofile <- time f(m) takes without profiling.
520        t0 = get_time()
521        f(m)
522        t1 = get_time()
523        elapsed_noprofile = t1 - t0
524        if verbose:
525            print("elapsed time without profiling =", elapsed_noprofile)
526
527        # elapsed_profile <- time f(m) takes with profiling.  The difference
528        # is profiling overhead, only some of which the profiler subtracts
529        # out on its own.
530        p = Profile()
531        t0 = get_time()
532        p.runctx('f(m)', globals(), locals())
533        t1 = get_time()
534        elapsed_profile = t1 - t0
535        if verbose:
536            print("elapsed time with profiling =", elapsed_profile)
537
538        # reported_time <- "CPU seconds" the profiler charged to f and f1.
539        total_calls = 0.0
540        reported_time = 0.0
541        for (filename, line, funcname), (cc, ns, tt, ct, callers) in \
542                p.timings.items():
543            if funcname in ("f", "f1"):
544                total_calls += cc
545                reported_time += tt
546
547        if verbose:
548            print("'CPU seconds' profiler reported =", reported_time)
549            print("total # calls =", total_calls)
550        if total_calls != m + 1:
551            raise ValueError("internal error: total calls = %d" % total_calls)
552
553        # reported_time - elapsed_noprofile = overhead the profiler wasn't
554        # able to measure.  Divide by twice the number of calls (since there
555        # are two profiler events per call in this test) to get the hidden
556        # overhead per event.
557        mean = (reported_time - elapsed_noprofile) / 2.0 / total_calls
558        if verbose:
559            print("mean stopwatch overhead per profile event =", mean)
560        return mean
561
562#****************************************************************************
563
564def main():
565    import os
566    from optparse import OptionParser
567
568    usage = "profile.py [-o output_file_path] [-s sort] scriptfile [arg] ..."
569    parser = OptionParser(usage=usage)
570    parser.allow_interspersed_args = False
571    parser.add_option('-o', '--outfile', dest="outfile",
572        help="Save stats to <outfile>", default=None)
573    parser.add_option('-s', '--sort', dest="sort",
574        help="Sort order when printing to stdout, based on pstats.Stats class",
575        default=-1)
576
577    if not sys.argv[1:]:
578        parser.print_usage()
579        sys.exit(2)
580
581    (options, args) = parser.parse_args()
582    sys.argv[:] = args
583
584    if len(args) > 0:
585        progname = args[0]
586        sys.path.insert(0, os.path.dirname(progname))
587        with open(progname, 'rb') as fp:
588            code = compile(fp.read(), progname, 'exec')
589        globs = {
590            '__file__': progname,
591            '__name__': '__main__',
592            '__package__': None,
593            '__cached__': None,
594        }
595        runctx(code, globs, None, options.outfile, options.sort)
596    else:
597        parser.print_usage()
598    return parser
599
600# When invoked as main program, invoke the profiler on a script
601if __name__ == '__main__':
602    main()
603