1package Benchmark; 2 3use strict; 4 5 6=head1 NAME 7 8Benchmark - benchmark running times of Perl code 9 10=head1 SYNOPSIS 11 12 use Benchmark qw(:all) ; 13 14 timethis ($count, "code"); 15 16 # Use Perl code in strings... 17 timethese($count, { 18 'Name1' => '...code1...', 19 'Name2' => '...code2...', 20 }); 21 22 # ... or use subroutine references. 23 timethese($count, { 24 'Name1' => sub { ...code1... }, 25 'Name2' => sub { ...code2... }, 26 }); 27 28 # cmpthese can be used both ways as well 29 cmpthese($count, { 30 'Name1' => '...code1...', 31 'Name2' => '...code2...', 32 }); 33 34 cmpthese($count, { 35 'Name1' => sub { ...code1... }, 36 'Name2' => sub { ...code2... }, 37 }); 38 39 # ...or in two stages 40 $results = timethese($count, 41 { 42 'Name1' => sub { ...code1... }, 43 'Name2' => sub { ...code2... }, 44 }, 45 'none' 46 ); 47 cmpthese( $results ) ; 48 49 $t = timeit($count, '...other code...') 50 print "$count loops of other code took:",timestr($t),"\n"; 51 52 $t = countit($time, '...other code...') 53 $count = $t->iters ; 54 print "$count loops of other code took:",timestr($t),"\n"; 55 56 # enable hires wallclock timing if possible 57 use Benchmark ':hireswallclock'; 58 59=head1 DESCRIPTION 60 61The Benchmark module encapsulates a number of routines to help you 62figure out how long it takes to execute some code. 63 64timethis - run a chunk of code several times 65 66timethese - run several chunks of code several times 67 68cmpthese - print results of timethese as a comparison chart 69 70timeit - run a chunk of code and see how long it goes 71 72countit - see how many times a chunk of code runs in a given time 73 74 75=head2 Methods 76 77=over 10 78 79=item new 80 81Returns the current time. Example: 82 83 use Benchmark; 84 $t0 = Benchmark->new; 85 # ... your code here ... 86 $t1 = Benchmark->new; 87 $td = timediff($t1, $t0); 88 print "the code took:",timestr($td),"\n"; 89 90=item debug 91 92Enables or disable debugging by setting the C<$Benchmark::Debug> flag: 93 94 Benchmark->debug(1); 95 $t = timeit(10, ' 5 ** $Global '); 96 Benchmark->debug(0); 97 98=item iters 99 100Returns the number of iterations. 101 102=back 103 104=head2 Standard Exports 105 106The following routines will be exported into your namespace 107if you use the Benchmark module: 108 109=over 10 110 111=item timeit(COUNT, CODE) 112 113Arguments: COUNT is the number of times to run the loop, and CODE is 114the code to run. CODE may be either a code reference or a string to 115be eval'd; either way it will be run in the caller's package. 116 117Returns: a Benchmark object. 118 119=item timethis ( COUNT, CODE, [ TITLE, [ STYLE ]] ) 120 121Time COUNT iterations of CODE. CODE may be a string to eval or a 122code reference; either way the CODE will run in the caller's package. 123Results will be printed to STDOUT as TITLE followed by the times. 124TITLE defaults to "timethis COUNT" if none is provided. STYLE 125determines the format of the output, as described for timestr() below. 126 127The COUNT can be zero or negative: this means the I<minimum number of 128CPU seconds> to run. A zero signifies the default of 3 seconds. For 129example to run at least for 10 seconds: 130 131 timethis(-10, $code) 132 133or to run two pieces of code tests for at least 3 seconds: 134 135 timethese(0, { test1 => '...', test2 => '...'}) 136 137CPU seconds is, in UNIX terms, the user time plus the system time of 138the process itself, as opposed to the real (wallclock) time and the 139time spent by the child processes. Less than 0.1 seconds is not 140accepted (-0.01 as the count, for example, will cause a fatal runtime 141exception). 142 143Note that the CPU seconds is the B<minimum> time: CPU scheduling and 144other operating system factors may complicate the attempt so that a 145little bit more time is spent. The benchmark output will, however, 146also tell the number of C<$code> runs/second, which should be a more 147interesting number than the actually spent seconds. 148 149Returns a Benchmark object. 150 151=item timethese ( COUNT, CODEHASHREF, [ STYLE ] ) 152 153The CODEHASHREF is a reference to a hash containing names as keys 154and either a string to eval or a code reference for each value. 155For each (KEY, VALUE) pair in the CODEHASHREF, this routine will 156call 157 158 timethis(COUNT, VALUE, KEY, STYLE) 159 160The routines are called in string comparison order of KEY. 161 162The COUNT can be zero or negative, see timethis(). 163 164Returns a hash reference of Benchmark objects, keyed by name. 165 166=item timediff ( T1, T2 ) 167 168Returns the difference between two Benchmark times as a Benchmark 169object suitable for passing to timestr(). 170 171=item timestr ( TIMEDIFF, [ STYLE, [ FORMAT ] ] ) 172 173Returns a string that formats the times in the TIMEDIFF object in 174the requested STYLE. TIMEDIFF is expected to be a Benchmark object 175similar to that returned by timediff(). 176 177STYLE can be any of 'all', 'none', 'noc', 'nop' or 'auto'. 'all' shows 178each of the 5 times available ('wallclock' time, user time, system time, 179user time of children, and system time of children). 'noc' shows all 180except the two children times. 'nop' shows only wallclock and the 181two children times. 'auto' (the default) will act as 'all' unless 182the children times are both zero, in which case it acts as 'noc'. 183'none' prevents output. 184 185FORMAT is the L<printf(3)>-style format specifier (without the 186leading '%') to use to print the times. It defaults to '5.2f'. 187 188=back 189 190=head2 Optional Exports 191 192The following routines will be exported into your namespace 193if you specifically ask that they be imported: 194 195=over 10 196 197=item clearcache ( COUNT ) 198 199Clear the cached time for COUNT rounds of the null loop. 200 201=item clearallcache ( ) 202 203Clear all cached times. 204 205=item cmpthese ( COUNT, CODEHASHREF, [ STYLE ] ) 206 207=item cmpthese ( RESULTSHASHREF, [ STYLE ] ) 208 209Optionally calls timethese(), then outputs comparison chart. This: 210 211 cmpthese( -1, { a => "++\$i", b => "\$i *= 2" } ) ; 212 213outputs a chart like: 214 215 Rate b a 216 b 2831802/s -- -61% 217 a 7208959/s 155% -- 218 219This chart is sorted from slowest to fastest, and shows the percent speed 220difference between each pair of tests. 221 222C<cmpthese> can also be passed the data structure that timethese() returns: 223 224 $results = timethese( -1, 225 { a => "++\$i", b => "\$i *= 2" } ) ; 226 cmpthese( $results ); 227 228in case you want to see both sets of results. 229If the first argument is an unblessed hash reference, 230that is RESULTSHASHREF; otherwise that is COUNT. 231 232Returns a reference to an ARRAY of rows, each row is an ARRAY of cells from the 233above chart, including labels. This: 234 235 my $rows = cmpthese( -1, 236 { a => '++$i', b => '$i *= 2' }, "none" ); 237 238returns a data structure like: 239 240 [ 241 [ '', 'Rate', 'b', 'a' ], 242 [ 'b', '2885232/s', '--', '-59%' ], 243 [ 'a', '7099126/s', '146%', '--' ], 244 ] 245 246B<NOTE>: This result value differs from previous versions, which returned 247the C<timethese()> result structure. If you want that, just use the two 248statement C<timethese>...C<cmpthese> idiom shown above. 249 250Incidentally, note the variance in the result values between the two examples; 251this is typical of benchmarking. If this were a real benchmark, you would 252probably want to run a lot more iterations. 253 254=item countit(TIME, CODE) 255 256Arguments: TIME is the minimum length of time to run CODE for, and CODE is 257the code to run. CODE may be either a code reference or a string to 258be eval'd; either way it will be run in the caller's package. 259 260TIME is I<not> negative. countit() will run the loop many times to 261calculate the speed of CODE before running it for TIME. The actual 262time run for will usually be greater than TIME due to system clock 263resolution, so it's best to look at the number of iterations divided 264by the times that you are concerned with, not just the iterations. 265 266Returns: a Benchmark object. 267 268=item disablecache ( ) 269 270Disable caching of timings for the null loop. This will force Benchmark 271to recalculate these timings for each new piece of code timed. 272 273=item enablecache ( ) 274 275Enable caching of timings for the null loop. The time taken for COUNT 276rounds of the null loop will be calculated only once for each 277different COUNT used. 278 279=item timesum ( T1, T2 ) 280 281Returns the sum of two Benchmark times as a Benchmark object suitable 282for passing to timestr(). 283 284=back 285 286=head2 :hireswallclock 287 288If the Time::HiRes module has been installed, you can specify the 289special tag C<:hireswallclock> for Benchmark (if Time::HiRes is not 290available, the tag will be silently ignored). This tag will cause the 291wallclock time to be measured in microseconds, instead of integer 292seconds. Note though that the speed computations are still conducted 293in CPU time, not wallclock time. 294 295=head1 Benchmark Object 296 297Many of the functions in this module return a Benchmark object, 298or in the case of C<timethese()>, a reference to a hash, the values of 299which are Benchmark objects. This is useful if you want to store or 300further process results from Benchmark functions. 301 302Internally the Benchmark object holds timing values, 303described in L</"NOTES"> below. 304The following methods can be used to access them: 305 306=over 4 307 308=item cpu_p 309 310Total CPU (User + System) of the main (parent) process. 311 312=item cpu_c 313 314Total CPU (User + System) of any children processes. 315 316=item cpu_a 317 318Total CPU of parent and any children processes. 319 320=item real 321 322Real elapsed time "wallclock seconds". 323 324=item iters 325 326Number of iterations run. 327 328=back 329 330The following illustrates use of the Benchmark object: 331 332 $result = timethis(100000, sub { ... }); 333 print "total CPU = ", $result->cpu_a, "\n"; 334 335=head1 NOTES 336 337The data is stored as a list of values from the time and times 338functions: 339 340 ($real, $user, $system, $children_user, $children_system, $iters) 341 342in seconds for the whole loop (not divided by the number of rounds). 343 344The timing is done using time(3) and times(3). 345 346Code is executed in the caller's package. 347 348The time of the null loop (a loop with the same 349number of rounds but empty loop body) is subtracted 350from the time of the real loop. 351 352The null loop times can be cached, the key being the 353number of rounds. The caching can be controlled using 354calls like these: 355 356 clearcache($key); 357 clearallcache(); 358 359 disablecache(); 360 enablecache(); 361 362Caching is off by default, as it can (usually slightly) decrease 363accuracy and does not usually noticeably affect runtimes. 364 365=head1 EXAMPLES 366 367For example, 368 369 use Benchmark qw( cmpthese ) ; 370 $x = 3; 371 cmpthese( -5, { 372 a => sub{$x*$x}, 373 b => sub{$x**2}, 374 } ); 375 376outputs something like this: 377 378 Benchmark: running a, b, each for at least 5 CPU seconds... 379 Rate b a 380 b 1559428/s -- -62% 381 a 4152037/s 166% -- 382 383 384while 385 386 use Benchmark qw( timethese cmpthese ) ; 387 $x = 3; 388 $r = timethese( -5, { 389 a => sub{$x*$x}, 390 b => sub{$x**2}, 391 } ); 392 cmpthese $r; 393 394outputs something like this: 395 396 Benchmark: running a, b, each for at least 5 CPU seconds... 397 a: 10 wallclock secs ( 5.14 usr + 0.13 sys = 5.27 CPU) @ 3835055.60/s (n=20210743) 398 b: 5 wallclock secs ( 5.41 usr + 0.00 sys = 5.41 CPU) @ 1574944.92/s (n=8520452) 399 Rate b a 400 b 1574945/s -- -59% 401 a 3835056/s 144% -- 402 403 404=head1 INHERITANCE 405 406Benchmark inherits from no other class, except of course 407from Exporter. 408 409=head1 CAVEATS 410 411Comparing eval'd strings with code references will give you 412inaccurate results: a code reference will show a slightly slower 413execution time than the equivalent eval'd string. 414 415The real time timing is done using time(2) and 416the granularity is therefore only one second. 417 418Short tests may produce negative figures because perl 419can appear to take longer to execute the empty loop 420than a short test; try: 421 422 timethis(100,'1'); 423 424The system time of the null loop might be slightly 425more than the system time of the loop with the actual 426code and therefore the difference might end up being E<lt> 0. 427 428=head1 SEE ALSO 429 430L<Devel::NYTProf> - a Perl code profiler 431 432=head1 AUTHORS 433 434Jarkko Hietaniemi <F<jhi@iki.fi>>, Tim Bunce <F<Tim.Bunce@ig.co.uk>> 435 436=head1 MODIFICATION HISTORY 437 438September 8th, 1994; by Tim Bunce. 439 440March 28th, 1997; by Hugo van der Sanden: added support for code 441references and the already documented 'debug' method; revamped 442documentation. 443 444April 04-07th, 1997: by Jarkko Hietaniemi, added the run-for-some-time 445functionality. 446 447September, 1999; by Barrie Slaymaker: math fixes and accuracy and 448efficiency tweaks. Added cmpthese(). A result is now returned from 449timethese(). Exposed countit() (was runfor()). 450 451December, 2001; by Nicholas Clark: make timestr() recognise the style 'none' 452and return an empty string. If cmpthese is calling timethese, make it pass the 453style in. (so that 'none' will suppress output). Make sub new dump its 454debugging output to STDERR, to be consistent with everything else. 455All bugs found while writing a regression test. 456 457September, 2002; by Jarkko Hietaniemi: add ':hireswallclock' special tag. 458 459February, 2004; by Chia-liang Kao: make cmpthese and timestr use time 460statistics for children instead of parent when the style is 'nop'. 461 462November, 2007; by Christophe Grosjean: make cmpthese and timestr compute 463time consistently with style argument, default is 'all' not 'noc' any more. 464 465=cut 466 467# evaluate something in a clean lexical environment 468sub _doeval { no strict; eval shift } 469 470# 471# put any lexicals at file scope AFTER here 472# 473 474use Carp; 475use Exporter; 476 477our(@ISA, @EXPORT, @EXPORT_OK, %EXPORT_TAGS, $VERSION); 478 479@ISA=qw(Exporter); 480@EXPORT=qw(timeit timethis timethese timediff timestr); 481@EXPORT_OK=qw(timesum cmpthese countit 482 clearcache clearallcache disablecache enablecache); 483%EXPORT_TAGS=( all => [ @EXPORT, @EXPORT_OK ] ) ; 484 485$VERSION = 1.22; 486 487# --- ':hireswallclock' special handling 488 489my $hirestime; 490 491sub mytime () { time } 492 493init(); 494 495sub BEGIN { 496 if (eval 'require Time::HiRes') { 497 import Time::HiRes qw(time); 498 $hirestime = \&Time::HiRes::time; 499 } 500} 501 502sub import { 503 my $class = shift; 504 if (grep { $_ eq ":hireswallclock" } @_) { 505 @_ = grep { $_ ne ":hireswallclock" } @_; 506 local $^W=0; 507 *mytime = $hirestime if defined $hirestime; 508 } 509 Benchmark->export_to_level(1, $class, @_); 510} 511 512our($Debug, $Min_Count, $Min_CPU, $Default_Format, $Default_Style, 513 %_Usage, %Cache, $Do_Cache); 514 515sub init { 516 $Debug = 0; 517 $Min_Count = 4; 518 $Min_CPU = 0.4; 519 $Default_Format = '5.2f'; 520 $Default_Style = 'auto'; 521 # The cache can cause a slight loss of sys time accuracy. If a 522 # user does many tests (>10) with *very* large counts (>10000) 523 # or works on a very slow machine the cache may be useful. 524 disablecache(); 525 clearallcache(); 526} 527 528sub debug { $Debug = ($_[1] != 0); } 529 530sub usage { 531 my $calling_sub = (caller(1))[3]; 532 $calling_sub =~ s/^Benchmark:://; 533 return $_Usage{$calling_sub} || ''; 534} 535 536# The cache needs two branches: 's' for strings and 'c' for code. The 537# empty loop is different in these two cases. 538 539$_Usage{clearcache} = <<'USAGE'; 540usage: clearcache($count); 541USAGE 542 543sub clearcache { 544 die usage unless @_ == 1; 545 delete $Cache{"$_[0]c"}; delete $Cache{"$_[0]s"}; 546} 547 548$_Usage{clearallcache} = <<'USAGE'; 549usage: clearallcache(); 550USAGE 551 552sub clearallcache { 553 die usage if @_; 554 %Cache = (); 555} 556 557$_Usage{enablecache} = <<'USAGE'; 558usage: enablecache(); 559USAGE 560 561sub enablecache { 562 die usage if @_; 563 $Do_Cache = 1; 564} 565 566$_Usage{disablecache} = <<'USAGE'; 567usage: disablecache(); 568USAGE 569 570sub disablecache { 571 die usage if @_; 572 $Do_Cache = 0; 573} 574 575 576# --- Functions to process the 'time' data type 577 578sub new { my @t = (mytime, times, @_ == 2 ? $_[1] : 0); 579 print STDERR "new=@t\n" if $Debug; 580 bless \@t; } 581 582sub cpu_p { my($r,$pu,$ps,$cu,$cs) = @{$_[0]}; $pu+$ps ; } 583sub cpu_c { my($r,$pu,$ps,$cu,$cs) = @{$_[0]}; $cu+$cs ; } 584sub cpu_a { my($r,$pu,$ps,$cu,$cs) = @{$_[0]}; $pu+$ps+$cu+$cs ; } 585sub real { my($r,$pu,$ps,$cu,$cs) = @{$_[0]}; $r ; } 586sub iters { $_[0]->[5] ; } 587 588# return the sum of various times: which ones depending on $style 589 590sub elapsed { 591 my ($self, $style) = @_; 592 $style = "" unless defined $style; 593 594 return $self->cpu_c if $style eq 'nop'; 595 return $self->cpu_p if $style eq 'noc'; 596 return $self->cpu_a; 597} 598 599 600$_Usage{timediff} = <<'USAGE'; 601usage: $result_diff = timediff($result1, $result2); 602USAGE 603 604sub timediff { 605 my($a, $b) = @_; 606 607 die usage unless ref $a and ref $b; 608 609 my @r; 610 for (my $i=0; $i < @$a; ++$i) { 611 push(@r, $a->[$i] - $b->[$i]); 612 } 613 #die "Bad timediff(): ($r[1] + $r[2]) <= 0 (@$a[1,2]|@$b[1,2])\n" 614 # if ($r[1] + $r[2]) < 0; 615 bless \@r; 616} 617 618$_Usage{timesum} = <<'USAGE'; 619usage: $sum = timesum($result1, $result2); 620USAGE 621 622sub timesum { 623 my($a, $b) = @_; 624 625 die usage unless ref $a and ref $b; 626 627 my @r; 628 for (my $i=0; $i < @$a; ++$i) { 629 push(@r, $a->[$i] + $b->[$i]); 630 } 631 bless \@r; 632} 633 634 635$_Usage{timestr} = <<'USAGE'; 636usage: $formatted_result = timestr($result1); 637USAGE 638 639sub timestr { 640 my($tr, $style, $f) = @_; 641 642 die usage unless ref $tr; 643 644 my @t = @$tr; 645 warn "bad time value (@t)" unless @t==6; 646 my($r, $pu, $ps, $cu, $cs, $n) = @t; 647 my($pt, $ct, $tt) = ($tr->cpu_p, $tr->cpu_c, $tr->cpu_a); 648 $f = $Default_Format unless defined $f; 649 # format a time in the required style, other formats may be added here 650 $style ||= $Default_Style; 651 return '' if $style eq 'none'; 652 $style = ($ct>0) ? 'all' : 'noc' if $style eq 'auto'; 653 my $s = "@t $style"; # default for unknown style 654 my $w = $hirestime ? "%2g" : "%2d"; 655 $s = sprintf("$w wallclock secs (%$f usr %$f sys + %$f cusr %$f csys = %$f CPU)", 656 $r,$pu,$ps,$cu,$cs,$tt) if $style eq 'all'; 657 $s = sprintf("$w wallclock secs (%$f usr + %$f sys = %$f CPU)", 658 $r,$pu,$ps,$pt) if $style eq 'noc'; 659 $s = sprintf("$w wallclock secs (%$f cusr + %$f csys = %$f CPU)", 660 $r,$cu,$cs,$ct) if $style eq 'nop'; 661 my $elapsed = $tr->elapsed($style); 662 $s .= sprintf(" @ %$f/s (n=$n)",$n/($elapsed)) if $n && $elapsed; 663 $s; 664} 665 666sub timedebug { 667 my($msg, $t) = @_; 668 print STDERR "$msg",timestr($t),"\n" if $Debug; 669} 670 671# --- Functions implementing low-level support for timing loops 672 673$_Usage{runloop} = <<'USAGE'; 674usage: runloop($number, [$string | $coderef]) 675USAGE 676 677sub runloop { 678 my($n, $c) = @_; 679 680 $n+=0; # force numeric now, so garbage won't creep into the eval 681 croak "negative loopcount $n" if $n<0; 682 confess usage unless defined $c; 683 my($t0, $t1, $td); # before, after, difference 684 685 # find package of caller so we can execute code there 686 my($curpack) = caller(0); 687 my($i, $pack)= 0; 688 while (($pack) = caller(++$i)) { 689 last if $pack ne $curpack; 690 } 691 692 my ($subcode, $subref); 693 if (ref $c eq 'CODE') { 694 $subcode = "sub { for (1 .. $n) { local \$_; package $pack; &\$c; } }"; 695 $subref = eval $subcode; 696 } 697 else { 698 $subcode = "sub { for (1 .. $n) { local \$_; package $pack; $c;} }"; 699 $subref = _doeval($subcode); 700 } 701 croak "runloop unable to compile '$c': $@\ncode: $subcode\n" if $@; 702 print STDERR "runloop $n '$subcode'\n" if $Debug; 703 704 # Wait for the user timer to tick. This makes the error range more like 705 # -0.01, +0. If we don't wait, then it's more like -0.01, +0.01. This 706 # may not seem important, but it significantly reduces the chances of 707 # getting a too low initial $n in the initial, 'find the minimum' loop 708 # in &countit. This, in turn, can reduce the number of calls to 709 # &runloop a lot, and thus reduce additive errors. 710 # 711 # Note that its possible for the act of reading the system clock to 712 # burn lots of system CPU while we burn very little user clock in the 713 # busy loop, which can cause the loop to run for a very long wall time. 714 # So gradually ramp up the duration of the loop. See RT #122003 715 # 716 my $tbase = Benchmark->new(0)->[1]; 717 my $limit = 1; 718 while ( ( $t0 = Benchmark->new(0) )->[1] == $tbase ) { 719 for (my $i=0; $i < $limit; $i++) { my $x = $i / 1.5 } # burn user CPU 720 $limit *= 1.1; 721 } 722 $subref->(); 723 $t1 = Benchmark->new($n); 724 $td = &timediff($t1, $t0); 725 timedebug("runloop:",$td); 726 $td; 727} 728 729$_Usage{timeit} = <<'USAGE'; 730usage: $result = timeit($count, 'code' ); or 731 $result = timeit($count, sub { code } ); 732USAGE 733 734sub timeit { 735 my($n, $code) = @_; 736 my($wn, $wc, $wd); 737 738 die usage unless defined $code and 739 (!ref $code or ref $code eq 'CODE'); 740 741 printf STDERR "timeit $n $code\n" if $Debug; 742 my $cache_key = $n . ( ref( $code ) ? 'c' : 's' ); 743 if ($Do_Cache && exists $Cache{$cache_key} ) { 744 $wn = $Cache{$cache_key}; 745 } else { 746 $wn = &runloop($n, ref( $code ) ? sub { } : '' ); 747 # Can't let our baseline have any iterations, or they get subtracted 748 # out of the result. 749 $wn->[5] = 0; 750 $Cache{$cache_key} = $wn; 751 } 752 753 $wc = &runloop($n, $code); 754 755 $wd = timediff($wc, $wn); 756 timedebug("timeit: ",$wc); 757 timedebug(" - ",$wn); 758 timedebug(" = ",$wd); 759 760 $wd; 761} 762 763 764my $default_for = 3; 765my $min_for = 0.1; 766 767 768$_Usage{countit} = <<'USAGE'; 769usage: $result = countit($time, 'code' ); or 770 $result = countit($time, sub { code } ); 771USAGE 772 773sub countit { 774 my ( $tmax, $code ) = @_; 775 776 die usage unless @_; 777 778 if ( not defined $tmax or $tmax == 0 ) { 779 $tmax = $default_for; 780 } elsif ( $tmax < 0 ) { 781 $tmax = -$tmax; 782 } 783 784 die "countit($tmax, ...): timelimit cannot be less than $min_for.\n" 785 if $tmax < $min_for; 786 787 my ($n, $tc); 788 789 # First find the minimum $n that gives a significant timing. 790 my $zeros=0; 791 for ($n = 1; ; $n *= 2 ) { 792 my $t0 = Benchmark->new(0); 793 my $td = timeit($n, $code); 794 my $t1 = Benchmark->new(0); 795 $tc = $td->[1] + $td->[2]; 796 if ( $tc <= 0 and $n > 1024 ) { 797 my $d = timediff($t1, $t0); 798 # note that $d is the total CPU time taken to call timeit(), 799 # while $tc is is difference in CPU secs between the empty run 800 # and the code run. If the code is trivial, its possible 801 # for $d to get large while $tc is still zero (or slightly 802 # negative). Bail out once timeit() starts taking more than a 803 # few seconds without noticeable difference. 804 if ($d->[1] + $d->[2] > 8 805 || ++$zeros > 16) 806 { 807 die "Timing is consistently zero in estimation loop, cannot benchmark. N=$n\n"; 808 } 809 } else { 810 $zeros = 0; 811 } 812 last if $tc > 0.1; 813 } 814 815 my $nmin = $n; 816 817 # Get $n high enough that we can guess the final $n with some accuracy. 818 my $tpra = 0.1 * $tmax; # Target/time practice. 819 while ( $tc < $tpra ) { 820 # The 5% fudge is to keep us from iterating again all 821 # that often (this speeds overall responsiveness when $tmax is big 822 # and we guess a little low). This does not noticeably affect 823 # accuracy since we're not counting these times. 824 $n = int( $tpra * 1.05 * $n / $tc ); # Linear approximation. 825 my $td = timeit($n, $code); 826 my $new_tc = $td->[1] + $td->[2]; 827 # Make sure we are making progress. 828 $tc = $new_tc > 1.2 * $tc ? $new_tc : 1.2 * $tc; 829 } 830 831 # Now, do the 'for real' timing(s), repeating until we exceed 832 # the max. 833 my $ntot = 0; 834 my $rtot = 0; 835 my $utot = 0.0; 836 my $stot = 0.0; 837 my $cutot = 0.0; 838 my $cstot = 0.0; 839 my $ttot = 0.0; 840 841 # The 5% fudge is because $n is often a few % low even for routines 842 # with stable times and avoiding extra timeit()s is nice for 843 # accuracy's sake. 844 $n = int( $n * ( 1.05 * $tmax / $tc ) ); 845 $zeros=0; 846 while () { 847 my $td = timeit($n, $code); 848 $ntot += $n; 849 $rtot += $td->[0]; 850 $utot += $td->[1]; 851 $stot += $td->[2]; 852 $cutot += $td->[3]; 853 $cstot += $td->[4]; 854 $ttot = $utot + $stot; 855 last if $ttot >= $tmax; 856 if ( $ttot <= 0 ) { 857 ++$zeros > 16 858 and die "Timing is consistently zero, cannot benchmark. N=$n\n"; 859 } else { 860 $zeros = 0; 861 } 862 $ttot = 0.01 if $ttot < 0.01; 863 my $r = $tmax / $ttot - 1; # Linear approximation. 864 $n = int( $r * $ntot ); 865 $n = $nmin if $n < $nmin; 866 } 867 868 return bless [ $rtot, $utot, $stot, $cutot, $cstot, $ntot ]; 869} 870 871# --- Functions implementing high-level time-then-print utilities 872 873sub n_to_for { 874 my $n = shift; 875 return $n == 0 ? $default_for : $n < 0 ? -$n : undef; 876} 877 878$_Usage{timethis} = <<'USAGE'; 879usage: $result = timethis($time, 'code' ); or 880 $result = timethis($time, sub { code } ); 881USAGE 882 883sub timethis{ 884 my($n, $code, $title, $style) = @_; 885 my($t, $forn); 886 887 die usage unless defined $code and 888 (!ref $code or ref $code eq 'CODE'); 889 890 if ( $n > 0 ) { 891 croak "non-integer loopcount $n, stopped" if int($n)<$n; 892 $t = timeit($n, $code); 893 $title = "timethis $n" unless defined $title; 894 } else { 895 my $fort = n_to_for( $n ); 896 $t = countit( $fort, $code ); 897 $title = "timethis for $fort" unless defined $title; 898 $forn = $t->[-1]; 899 } 900 local $| = 1; 901 $style = "" unless defined $style; 902 printf("%10s: ", $title) unless $style eq 'none'; 903 print timestr($t, $style, $Default_Format),"\n" unless $style eq 'none'; 904 905 $n = $forn if defined $forn; 906 907 if ($t->elapsed($style) < 0) { 908 # due to clock granularity and variable CPU speed and load, 909 # on quick code with a small number of loops, it's possible for 910 # the empty loop to appear to take longer than the real loop 911 # (e.g. 1 tick versus 0 ticks). This leads to a negative elapsed 912 # time. In this case, floor it at zero, to stop bizarre results. 913 print " (warning: too few iterations for a reliable count)\n"; 914 $t->[$_] = 0 for 1..4; 915 } 916 917 # A conservative warning to spot very silly tests. 918 # Don't assume that your benchmark is ok simply because 919 # you don't get this warning! 920 print " (warning: too few iterations for a reliable count)\n" 921 if $n < $Min_Count 922 || ($t->real < 1 && $n < 1000) 923 || $t->cpu_a < $Min_CPU; 924 $t; 925} 926 927 928$_Usage{timethese} = <<'USAGE'; 929usage: timethese($count, { Name1 => 'code1', ... }); or 930 timethese($count, { Name1 => sub { code1 }, ... }); 931USAGE 932 933sub timethese{ 934 my($n, $alt, $style) = @_; 935 die usage unless ref $alt eq 'HASH'; 936 937 my @names = sort keys %$alt; 938 $style = "" unless defined $style; 939 print "Benchmark: " unless $style eq 'none'; 940 if ( $n > 0 ) { 941 croak "non-integer loopcount $n, stopped" if int($n)<$n; 942 print "timing $n iterations of" unless $style eq 'none'; 943 } else { 944 print "running" unless $style eq 'none'; 945 } 946 print " ", join(', ',@names) unless $style eq 'none'; 947 unless ( $n > 0 ) { 948 my $for = n_to_for( $n ); 949 print ", each" if $n > 1 && $style ne 'none'; 950 print " for at least $for CPU seconds" unless $style eq 'none'; 951 } 952 print "...\n" unless $style eq 'none'; 953 954 # we could save the results in an array and produce a summary here 955 # sum, min, max, avg etc etc 956 my %results; 957 foreach my $name (@names) { 958 $results{$name} = timethis ($n, $alt -> {$name}, $name, $style); 959 } 960 961 return \%results; 962} 963 964 965$_Usage{cmpthese} = <<'USAGE'; 966usage: cmpthese($count, { Name1 => 'code1', ... }); or 967 cmpthese($count, { Name1 => sub { code1 }, ... }); or 968 cmpthese($result, $style); 969USAGE 970 971sub cmpthese{ 972 my ($results, $style); 973 974 # $count can be a blessed object. 975 if ( ref $_[0] eq 'HASH' ) { 976 ($results, $style) = @_; 977 } 978 else { 979 my($count, $code) = @_[0,1]; 980 $style = $_[2] if defined $_[2]; 981 982 die usage unless ref $code eq 'HASH'; 983 984 $results = timethese($count, $code, ($style || "none")); 985 } 986 987 $style = "" unless defined $style; 988 989 # Flatten in to an array of arrays with the name as the first field 990 my @vals = map{ [ $_, @{$results->{$_}} ] } keys %$results; 991 992 for (@vals) { 993 # recreate the pre-flattened Benchmark object 994 my $tmp_bm = bless [ @{$_}[1..$#$_] ]; 995 my $elapsed = $tmp_bm->elapsed($style); 996 # The epsilon fudge here is to prevent div by 0. Since clock 997 # resolutions are much larger, it's below the noise floor. 998 my $rate = $_->[6]/(($elapsed)+0.000000000000001); 999 $_->[7] = $rate; 1000 } 1001 1002 # Sort by rate 1003 @vals = sort { $a->[7] <=> $b->[7] } @vals; 1004 1005 # If more than half of the rates are greater than one... 1006 my $display_as_rate = @vals ? ($vals[$#vals>>1]->[7] > 1) : 0; 1007 1008 my @rows; 1009 my @col_widths; 1010 1011 my @top_row = ( 1012 '', 1013 $display_as_rate ? 'Rate' : 's/iter', 1014 map { $_->[0] } @vals 1015 ); 1016 1017 push @rows, \@top_row; 1018 @col_widths = map { length( $_ ) } @top_row; 1019 1020 # Build the data rows 1021 # We leave the last column in even though it never has any data. Perhaps 1022 # it should go away. Also, perhaps a style for a single column of 1023 # percentages might be nice. 1024 for my $row_val ( @vals ) { 1025 my @row; 1026 1027 # Column 0 = test name 1028 push @row, $row_val->[0]; 1029 $col_widths[0] = length( $row_val->[0] ) 1030 if length( $row_val->[0] ) > $col_widths[0]; 1031 1032 # Column 1 = performance 1033 my $row_rate = $row_val->[7]; 1034 1035 # We assume that we'll never get a 0 rate. 1036 my $rate = $display_as_rate ? $row_rate : 1 / $row_rate; 1037 1038 # Only give a few decimal places before switching to sci. notation, 1039 # since the results aren't usually that accurate anyway. 1040 my $format = 1041 $rate >= 100 ? 1042 "%0.0f" : 1043 $rate >= 10 ? 1044 "%0.1f" : 1045 $rate >= 1 ? 1046 "%0.2f" : 1047 $rate >= 0.1 ? 1048 "%0.3f" : 1049 "%0.2e"; 1050 1051 $format .= "/s" 1052 if $display_as_rate; 1053 1054 my $formatted_rate = sprintf( $format, $rate ); 1055 push @row, $formatted_rate; 1056 $col_widths[1] = length( $formatted_rate ) 1057 if length( $formatted_rate ) > $col_widths[1]; 1058 1059 # Columns 2..N = performance ratios 1060 my $skip_rest = 0; 1061 for ( my $col_num = 0 ; $col_num < @vals ; ++$col_num ) { 1062 my $col_val = $vals[$col_num]; 1063 my $out; 1064 if ( $skip_rest ) { 1065 $out = ''; 1066 } 1067 elsif ( $col_val->[0] eq $row_val->[0] ) { 1068 $out = "--"; 1069 # $skip_rest = 1; 1070 } 1071 else { 1072 my $col_rate = $col_val->[7]; 1073 $out = sprintf( "%.0f%%", 100*$row_rate/$col_rate - 100 ); 1074 } 1075 push @row, $out; 1076 $col_widths[$col_num+2] = length( $out ) 1077 if length( $out ) > $col_widths[$col_num+2]; 1078 1079 # A little weirdness to set the first column width properly 1080 $col_widths[$col_num+2] = length( $col_val->[0] ) 1081 if length( $col_val->[0] ) > $col_widths[$col_num+2]; 1082 } 1083 push @rows, \@row; 1084 } 1085 1086 return \@rows if $style eq "none"; 1087 1088 # Equalize column widths in the chart as much as possible without 1089 # exceeding 80 characters. This does not use or affect cols 0 or 1. 1090 my @sorted_width_refs = 1091 sort { $$a <=> $$b } map { \$_ } @col_widths[2..$#col_widths]; 1092 my $max_width = ${$sorted_width_refs[-1]}; 1093 1094 my $total = @col_widths - 1 ; 1095 for ( @col_widths ) { $total += $_ } 1096 1097 STRETCHER: 1098 while ( $total < 80 ) { 1099 my $min_width = ${$sorted_width_refs[0]}; 1100 last 1101 if $min_width == $max_width; 1102 for ( @sorted_width_refs ) { 1103 last 1104 if $$_ > $min_width; 1105 ++$$_; 1106 ++$total; 1107 last STRETCHER 1108 if $total >= 80; 1109 } 1110 } 1111 1112 # Dump the output 1113 my $format = join( ' ', map { "%${_}s" } @col_widths ) . "\n"; 1114 substr( $format, 1, 0 ) = '-'; 1115 for ( @rows ) { 1116 printf $format, @$_; 1117 } 1118 1119 return \@rows ; 1120} 1121 1122 11231; 1124