1package DBI::ProfileData; 2use strict; 3 4=head1 NAME 5 6DBI::ProfileData - manipulate DBI::ProfileDumper data dumps 7 8=head1 SYNOPSIS 9 10The easiest way to use this module is through the dbiprof frontend 11(see L<dbiprof> for details): 12 13 dbiprof --number 15 --sort count 14 15This module can also be used to roll your own profile analysis: 16 17 # load data from dbi.prof 18 $prof = DBI::ProfileData->new(File => "dbi.prof"); 19 20 # get a count of the records (unique paths) in the data set 21 $count = $prof->count(); 22 23 # sort by longest overall time 24 $prof->sort(field => "longest"); 25 26 # sort by longest overall time, least to greatest 27 $prof->sort(field => "longest", reverse => 1); 28 29 # exclude records with key2 eq 'disconnect' 30 $prof->exclude(key2 => 'disconnect'); 31 32 # exclude records with key1 matching /^UPDATE/i 33 $prof->exclude(key1 => qr/^UPDATE/i); 34 35 # remove all records except those where key1 matches /^SELECT/i 36 $prof->match(key1 => qr/^SELECT/i); 37 38 # produce a formatted report with the given number of items 39 $report = $prof->report(number => 10); 40 41 # clone the profile data set 42 $clone = $prof->clone(); 43 44 # get access to hash of header values 45 $header = $prof->header(); 46 47 # get access to sorted array of nodes 48 $nodes = $prof->nodes(); 49 50 # format a single node in the same style as report() 51 $text = $prof->format($nodes->[0]); 52 53 # get access to Data hash in DBI::Profile format 54 $Data = $prof->Data(); 55 56=head1 DESCRIPTION 57 58This module offers the ability to read, manipulate and format 59L<DBI::ProfileDumper> profile data. 60 61Conceptually, a profile consists of a series of records, or nodes, 62each of each has a set of statistics and set of keys. Each record 63must have a unique set of keys, but there is no requirement that every 64record have the same number of keys. 65 66=head1 METHODS 67 68The following methods are supported by DBI::ProfileData objects. 69 70=cut 71 72our $VERSION = "2.010008"; 73 74use Carp qw(croak); 75use Symbol; 76use Fcntl qw(:flock); 77 78use DBI::Profile qw(dbi_profile_merge); 79 80# some constants for use with node data arrays 81sub COUNT () { 0 }; 82sub TOTAL () { 1 }; 83sub FIRST () { 2 }; 84sub SHORTEST () { 3 }; 85sub LONGEST () { 4 }; 86sub FIRST_AT () { 5 }; 87sub LAST_AT () { 6 }; 88sub PATH () { 7 }; 89 90 91my $HAS_FLOCK = (defined $ENV{DBI_PROFILE_FLOCK}) 92 ? $ENV{DBI_PROFILE_FLOCK} 93 : do { local $@; eval { flock STDOUT, 0; 1 } }; 94 95 96=head2 $prof = DBI::ProfileData->new(File => "dbi.prof") 97 98=head2 $prof = DBI::ProfileData->new(File => "dbi.prof", Filter => sub { ... }) 99 100=head2 $prof = DBI::ProfileData->new(Files => [ "dbi.prof.1", "dbi.prof.2" ]) 101 102Creates a new DBI::ProfileData object. Takes either a single file 103through the File option or a list of Files in an array ref. If 104multiple files are specified then the header data from the first file 105is used. 106 107=head3 Files 108 109Reference to an array of file names to read. 110 111=head3 File 112 113Name of file to read. Takes precedence over C<Files>. 114 115=head3 DeleteFiles 116 117If true, the files are deleted after being read. 118 119Actually the files are renamed with a C<deleteme> suffix before being read, 120and then, after reading all the files, they're all deleted together. 121 122The files are locked while being read which, combined with the rename, makes it 123safe to 'consume' files that are still being generated by L<DBI::ProfileDumper>. 124 125=head3 Filter 126 127The C<Filter> parameter can be used to supply a code reference that can 128manipulate the profile data as it is being read. This is most useful for 129editing SQL statements so that slightly different statements in the raw data 130will be merged and aggregated in the loaded data. For example: 131 132 Filter => sub { 133 my ($path_ref, $data_ref) = @_; 134 s/foo = '.*?'/foo = '...'/ for @$path_ref; 135 } 136 137Here's an example that performs some normalization on the SQL. It converts all 138numbers to C<N> and all quoted strings to C<S>. It can also convert digits to 139N within names. Finally, it summarizes long "IN (...)" clauses. 140 141It's aggressive and simplistic, but it's often sufficient, and serves as an 142example that you can tailor to suit your own needs: 143 144 Filter => sub { 145 my ($path_ref, $data_ref) = @_; 146 local $_ = $path_ref->[0]; # whichever element contains the SQL Statement 147 s/\b\d+\b/N/g; # 42 -> N 148 s/\b0x[0-9A-Fa-f]+\b/N/g; # 0xFE -> N 149 s/'.*?'/'S'/g; # single quoted strings (doesn't handle escapes) 150 s/".*?"/"S"/g; # double quoted strings (doesn't handle escapes) 151 # convert names like log_20001231 into log_NNNNNNNN, controlled by $opt{n} 152 s/([a-z_]+)(\d{$opt{n},})/$1.('N' x length($2))/ieg if $opt{n}; 153 # abbreviate massive "in (...)" statements and similar 154 s!(([NS],){100,})!sprintf("$2,{repeated %d times}",length($1)/2)!eg; 155 } 156 157It's often better to perform this kinds of normalization in the DBI while the 158data is being collected, to avoid too much memory being used by storing profile 159data for many different SQL statement. See L<DBI::Profile>. 160 161=cut 162 163sub new { 164 my $pkg = shift; 165 my $self = { 166 Files => [ "dbi.prof" ], 167 Filter => undef, 168 DeleteFiles => 0, 169 LockFile => $HAS_FLOCK, 170 _header => {}, 171 _nodes => [], 172 _node_lookup => {}, 173 _sort => 'none', 174 @_ 175 }; 176 bless $self, $pkg; 177 178 # File (singular) overrides Files (plural) 179 $self->{Files} = [ $self->{File} ] if exists $self->{File}; 180 181 $self->_read_files(); 182 return $self; 183} 184 185# read files into _header and _nodes 186sub _read_files { 187 my $self = shift; 188 my $files = $self->{Files}; 189 my $read_header = 0; 190 my @files_to_delete; 191 192 my $fh = gensym; 193 foreach (@$files) { 194 my $filename = $_; 195 196 if ($self->{DeleteFiles}) { 197 my $newfilename = $filename . ".deleteme"; 198 if ($^O eq 'VMS') { 199 # VMS default filesystem can only have one period 200 $newfilename = $filename . 'deleteme'; 201 } 202 # will clobber an existing $newfilename 203 rename($filename, $newfilename) 204 or croak "Can't rename($filename, $newfilename): $!"; 205 # On a versioned filesystem we want old versions to be removed 206 1 while (unlink $filename); 207 $filename = $newfilename; 208 } 209 210 open($fh, "<", $filename) 211 or croak("Unable to read profile file '$filename': $!"); 212 213 # lock the file in case it's still being written to 214 # (we'll be forced to wait till the write is complete) 215 flock($fh, LOCK_SH) if $self->{LockFile}; 216 217 if (-s $fh) { # not empty 218 $self->_read_header($fh, $filename, $read_header ? 0 : 1); 219 $read_header = 1; 220 $self->_read_body($fh, $filename); 221 } 222 close($fh); # and release lock 223 224 push @files_to_delete, $filename 225 if $self->{DeleteFiles}; 226 } 227 for (@files_to_delete){ 228 # for versioned file systems 229 1 while (unlink $_); 230 if(-e $_){ 231 warn "Can't delete '$_': $!"; 232 } 233 } 234 235 # discard node_lookup now that all files are read 236 delete $self->{_node_lookup}; 237} 238 239# read the header from the given $fh named $filename. Discards the 240# data unless $keep. 241sub _read_header { 242 my ($self, $fh, $filename, $keep) = @_; 243 244 # get profiler module id 245 my $first = <$fh>; 246 chomp $first; 247 $self->{_profiler} = $first if $keep; 248 249 # collect variables from the header 250 local $_; 251 while (<$fh>) { 252 chomp; 253 last unless length $_; 254 /^(\S+)\s*=\s*(.*)/ 255 or croak("Syntax error in header in $filename line $.: $_"); 256 # XXX should compare new with existing (from previous file) 257 # and warn if they differ (different program or path) 258 $self->{_header}{$1} = unescape_key($2) if $keep; 259 } 260} 261 262 263sub unescape_key { # inverse of escape_key() in DBI::ProfileDumper 264 local $_ = shift; 265 s/(?<!\\)\\n/\n/g; # expand \n, unless it's a \\n 266 s/(?<!\\)\\r/\r/g; # expand \r, unless it's a \\r 267 s/\\\\/\\/g; # \\ to \ 268 return $_; 269} 270 271 272# reads the body of the profile data 273sub _read_body { 274 my ($self, $fh, $filename) = @_; 275 my $nodes = $self->{_nodes}; 276 my $lookup = $self->{_node_lookup}; 277 my $filter = $self->{Filter}; 278 279 # build up node array 280 my @path = (""); 281 my (@data, $path_key); 282 local $_; 283 while (<$fh>) { 284 chomp; 285 if (/^\+\s+(\d+)\s?(.*)/) { 286 # it's a key 287 my ($key, $index) = ($2, $1 - 1); 288 289 $#path = $index; # truncate path to new length 290 $path[$index] = unescape_key($key); # place new key at end 291 292 } 293 elsif (s/^=\s+//) { 294 # it's data - file in the node array with the path in index 0 295 # (the optional minus is to make it more robust against systems 296 # with unstable high-res clocks - typically due to poor NTP config 297 # of kernel SMP behaviour, i.e. min time may be -0.000008)) 298 299 @data = split / /, $_; 300 301 # corrupt data? 302 croak("Invalid number of fields in $filename line $.: $_") 303 unless @data == 7; 304 croak("Invalid leaf node characters $filename line $.: $_") 305 unless m/^[-+ 0-9eE\.]+$/; 306 307 # hook to enable pre-processing of the data - such as mangling SQL 308 # so that slightly different statements get treated as the same 309 # and so merged in the results 310 $filter->(\@path, \@data) if $filter; 311 312 # elements of @path can't have NULLs in them, so this 313 # forms a unique string per @path. If there's some way I 314 # can get this without arbitrarily stripping out a 315 # character I'd be happy to hear it! 316 $path_key = join("\0",@path); 317 318 # look for previous entry 319 if (exists $lookup->{$path_key}) { 320 # merge in the new data 321 dbi_profile_merge($nodes->[$lookup->{$path_key}], \@data); 322 } else { 323 # insert a new node - nodes are arrays with data in 0-6 324 # and path data after that 325 push(@$nodes, [ @data, @path ]); 326 327 # record node in %seen 328 $lookup->{$path_key} = $#$nodes; 329 } 330 } 331 else { 332 croak("Invalid line type syntax error in $filename line $.: $_"); 333 } 334 } 335} 336 337 338 339=head2 $copy = $prof->clone(); 340 341Clone a profile data set creating a new object. 342 343=cut 344 345sub clone { 346 my $self = shift; 347 348 # start with a simple copy 349 my $clone = bless { %$self }, ref($self); 350 351 # deep copy nodes 352 $clone->{_nodes} = [ map { [ @$_ ] } @{$self->{_nodes}} ]; 353 354 # deep copy header 355 $clone->{_header} = { %{$self->{_header}} }; 356 357 return $clone; 358} 359 360=head2 $header = $prof->header(); 361 362Returns a reference to a hash of header values. These are the key 363value pairs included in the header section of the L<DBI::ProfileDumper> 364data format. For example: 365 366 $header = { 367 Path => [ '!Statement', '!MethodName' ], 368 Program => 't/42profile_data.t', 369 }; 370 371Note that modifying this hash will modify the header data stored 372inside the profile object. 373 374=cut 375 376sub header { shift->{_header} } 377 378 379=head2 $nodes = $prof->nodes() 380 381Returns a reference the sorted nodes array. Each element in the array 382is a single record in the data set. The first seven elements are the 383same as the elements provided by L<DBI::Profile>. After that each key is 384in a separate element. For example: 385 386 $nodes = [ 387 [ 388 2, # 0, count 389 0.0312958955764771, # 1, total duration 390 0.000490069389343262, # 2, first duration 391 0.000176072120666504, # 3, shortest duration 392 0.00140702724456787, # 4, longest duration 393 1023115819.83019, # 5, time of first event 394 1023115819.86576, # 6, time of last event 395 'SELECT foo FROM bar' # 7, key1 396 'execute' # 8, key2 397 # 6+N, keyN 398 ], 399 # ... 400 ]; 401 402Note that modifying this array will modify the node data stored inside 403the profile object. 404 405=cut 406 407sub nodes { shift->{_nodes} } 408 409 410=head2 $count = $prof->count() 411 412Returns the number of items in the profile data set. 413 414=cut 415 416sub count { scalar @{shift->{_nodes}} } 417 418 419=head2 $prof->sort(field => "field") 420 421=head2 $prof->sort(field => "field", reverse => 1) 422 423Sorts data by the given field. Available fields are: 424 425 longest 426 total 427 count 428 shortest 429 430The default sort is greatest to smallest, which is the opposite of the 431normal Perl meaning. This, however, matches the expected behavior of 432the dbiprof frontend. 433 434=cut 435 436 437# sorts data by one of the available fields 438{ 439 my %FIELDS = ( 440 longest => LONGEST, 441 total => TOTAL, 442 count => COUNT, 443 shortest => SHORTEST, 444 key1 => PATH+0, 445 key2 => PATH+1, 446 key3 => PATH+2, 447 ); 448 sub sort { 449 my $self = shift; 450 my $nodes = $self->{_nodes}; 451 my %opt = @_; 452 453 croak("Missing required field option.") unless $opt{field}; 454 455 my $index = $FIELDS{$opt{field}}; 456 457 croak("Unrecognized sort field '$opt{field}'.") 458 unless defined $index; 459 460 # sort over index 461 if ($opt{reverse}) { 462 @$nodes = sort { 463 $a->[$index] <=> $b->[$index] 464 } @$nodes; 465 } else { 466 @$nodes = sort { 467 $b->[$index] <=> $a->[$index] 468 } @$nodes; 469 } 470 471 # remember how we're sorted 472 $self->{_sort} = $opt{field}; 473 474 return $self; 475 } 476} 477 478 479=head2 $count = $prof->exclude(key2 => "disconnect") 480 481=head2 $count = $prof->exclude(key2 => "disconnect", case_sensitive => 1) 482 483=head2 $count = $prof->exclude(key1 => qr/^SELECT/i) 484 485Removes records from the data set that match the given string or 486regular expression. This method modifies the data in a permanent 487fashion - use clone() first to maintain the original data after 488exclude(). Returns the number of nodes left in the profile data set. 489 490=cut 491 492sub exclude { 493 my $self = shift; 494 my $nodes = $self->{_nodes}; 495 my %opt = @_; 496 497 # find key index number 498 my ($index, $val); 499 foreach (keys %opt) { 500 if (/^key(\d+)$/) { 501 $index = PATH + $1 - 1; 502 $val = $opt{$_}; 503 last; 504 } 505 } 506 croak("Missing required keyN option.") unless $index; 507 508 if (UNIVERSAL::isa($val,"Regexp")) { 509 # regex match 510 @$nodes = grep { 511 $#$_ < $index or $_->[$index] !~ /$val/ 512 } @$nodes; 513 } else { 514 if ($opt{case_sensitive}) { 515 @$nodes = grep { 516 $#$_ < $index or $_->[$index] ne $val; 517 } @$nodes; 518 } else { 519 $val = lc $val; 520 @$nodes = grep { 521 $#$_ < $index or lc($_->[$index]) ne $val; 522 } @$nodes; 523 } 524 } 525 526 return scalar @$nodes; 527} 528 529 530=head2 $count = $prof->match(key2 => "disconnect") 531 532=head2 $count = $prof->match(key2 => "disconnect", case_sensitive => 1) 533 534=head2 $count = $prof->match(key1 => qr/^SELECT/i) 535 536Removes records from the data set that do not match the given string 537or regular expression. This method modifies the data in a permanent 538fashion - use clone() first to maintain the original data after 539match(). Returns the number of nodes left in the profile data set. 540 541=cut 542 543sub match { 544 my $self = shift; 545 my $nodes = $self->{_nodes}; 546 my %opt = @_; 547 548 # find key index number 549 my ($index, $val); 550 foreach (keys %opt) { 551 if (/^key(\d+)$/) { 552 $index = PATH + $1 - 1; 553 $val = $opt{$_}; 554 last; 555 } 556 } 557 croak("Missing required keyN option.") unless $index; 558 559 if (UNIVERSAL::isa($val,"Regexp")) { 560 # regex match 561 @$nodes = grep { 562 $#$_ >= $index and $_->[$index] =~ /$val/ 563 } @$nodes; 564 } else { 565 if ($opt{case_sensitive}) { 566 @$nodes = grep { 567 $#$_ >= $index and $_->[$index] eq $val; 568 } @$nodes; 569 } else { 570 $val = lc $val; 571 @$nodes = grep { 572 $#$_ >= $index and lc($_->[$index]) eq $val; 573 } @$nodes; 574 } 575 } 576 577 return scalar @$nodes; 578} 579 580 581=head2 $Data = $prof->Data() 582 583Returns the same Data hash structure as seen in L<DBI::Profile>. This 584structure is not sorted. The nodes() structure probably makes more 585sense for most analysis. 586 587=cut 588 589sub Data { 590 my $self = shift; 591 my (%Data, @data, $ptr); 592 593 foreach my $node (@{$self->{_nodes}}) { 594 # traverse to key location 595 $ptr = \%Data; 596 foreach my $key (@{$node}[PATH .. $#$node - 1]) { 597 $ptr->{$key} = {} unless exists $ptr->{$key}; 598 $ptr = $ptr->{$key}; 599 } 600 601 # slice out node data 602 $ptr->{$node->[-1]} = [ @{$node}[0 .. 6] ]; 603 } 604 605 return \%Data; 606} 607 608 609=head2 $text = $prof->format($nodes->[0]) 610 611Formats a single node into a human-readable block of text. 612 613=cut 614 615sub format { 616 my ($self, $node) = @_; 617 my $format; 618 619 # setup keys 620 my $keys = ""; 621 for (my $i = PATH; $i <= $#$node; $i++) { 622 my $key = $node->[$i]; 623 624 # remove leading and trailing space 625 $key =~ s/^\s+//; 626 $key =~ s/\s+$//; 627 628 # if key has newlines or is long take special precautions 629 if (length($key) > 72 or $key =~ /\n/) { 630 $keys .= " Key " . ($i - PATH + 1) . " :\n\n$key\n\n"; 631 } else { 632 $keys .= " Key " . ($i - PATH + 1) . " : $key\n"; 633 } 634 } 635 636 # nodes with multiple runs get the long entry format, nodes with 637 # just one run get a single count. 638 if ($node->[COUNT] > 1) { 639 $format = <<END; 640 Count : %d 641 Total Time : %3.6f seconds 642 Longest Time : %3.6f seconds 643 Shortest Time : %3.6f seconds 644 Average Time : %3.6f seconds 645END 646 return sprintf($format, @{$node}[COUNT,TOTAL,LONGEST,SHORTEST], 647 $node->[TOTAL] / $node->[COUNT]) . $keys; 648 } else { 649 $format = <<END; 650 Count : %d 651 Time : %3.6f seconds 652END 653 654 return sprintf($format, @{$node}[COUNT,TOTAL]) . $keys; 655 656 } 657} 658 659 660=head2 $text = $prof->report(number => 10) 661 662Produces a report with the given number of items. 663 664=cut 665 666sub report { 667 my $self = shift; 668 my $nodes = $self->{_nodes}; 669 my %opt = @_; 670 671 croak("Missing required number option") unless exists $opt{number}; 672 673 $opt{number} = @$nodes if @$nodes < $opt{number}; 674 675 my $report = $self->_report_header($opt{number}); 676 for (0 .. $opt{number} - 1) { 677 $report .= sprintf("#" x 5 . "[ %d ]". "#" x 59 . "\n", 678 $_ + 1); 679 $report .= $self->format($nodes->[$_]); 680 $report .= "\n"; 681 } 682 return $report; 683} 684 685# format the header for report() 686sub _report_header { 687 my ($self, $number) = @_; 688 my $nodes = $self->{_nodes}; 689 my $node_count = @$nodes; 690 691 # find total runtime and method count 692 my ($time, $count) = (0,0); 693 foreach my $node (@$nodes) { 694 $time += $node->[TOTAL]; 695 $count += $node->[COUNT]; 696 } 697 698 my $header = <<END; 699 700DBI Profile Data ($self->{_profiler}) 701 702END 703 704 # output header fields 705 while (my ($key, $value) = each %{$self->{_header}}) { 706 $header .= sprintf(" %-13s : %s\n", $key, $value); 707 } 708 709 # output summary data fields 710 $header .= sprintf(<<END, $node_count, $number, $self->{_sort}, $count, $time); 711 Total Records : %d (showing %d, sorted by %s) 712 Total Count : %d 713 Total Runtime : %3.6f seconds 714 715END 716 717 return $header; 718} 719 720 7211; 722 723__END__ 724 725=head1 AUTHOR 726 727Sam Tregar <sam@tregar.com> 728 729=head1 COPYRIGHT AND LICENSE 730 731Copyright (C) 2002 Sam Tregar 732 733This program is free software; you can redistribute it and/or modify 734it under the same terms as Perl 5 itself. 735 736=cut 737