1package DBI::ProfileData;
2use strict;
3
4=head1 NAME
5
6DBI::ProfileData - manipulate DBI::ProfileDumper data dumps
7
8=head1 SYNOPSIS
9
10The easiest way to use this module is through the dbiprof frontend
11(see L<dbiprof> for details):
12
13  dbiprof --number 15 --sort count
14
15This module can also be used to roll your own profile analysis:
16
17  # load data from dbi.prof
18  $prof = DBI::ProfileData->new(File => "dbi.prof");
19
20  # get a count of the records (unique paths) in the data set
21  $count = $prof->count();
22
23  # sort by longest overall time
24  $prof->sort(field => "longest");
25
26  # sort by longest overall time, least to greatest
27  $prof->sort(field => "longest", reverse => 1);
28
29  # exclude records with key2 eq 'disconnect'
30  $prof->exclude(key2 => 'disconnect');
31
32  # exclude records with key1 matching /^UPDATE/i
33  $prof->exclude(key1 => qr/^UPDATE/i);
34
35  # remove all records except those where key1 matches /^SELECT/i
36  $prof->match(key1 => qr/^SELECT/i);
37
38  # produce a formatted report with the given number of items
39  $report = $prof->report(number => 10);
40
41  # clone the profile data set
42  $clone = $prof->clone();
43
44  # get access to hash of header values
45  $header = $prof->header();
46
47  # get access to sorted array of nodes
48  $nodes = $prof->nodes();
49
50  # format a single node in the same style as report()
51  $text = $prof->format($nodes->[0]);
52
53  # get access to Data hash in DBI::Profile format
54  $Data = $prof->Data();
55
56=head1 DESCRIPTION
57
58This module offers the ability to read, manipulate and format
59L<DBI::ProfileDumper> profile data.
60
61Conceptually, a profile consists of a series of records, or nodes,
62each of each has a set of statistics and set of keys.  Each record
63must have a unique set of keys, but there is no requirement that every
64record have the same number of keys.
65
66=head1 METHODS
67
68The following methods are supported by DBI::ProfileData objects.
69
70=cut
71
72our $VERSION = "2.010008";
73
74use Carp qw(croak);
75use Symbol;
76use Fcntl qw(:flock);
77
78use DBI::Profile qw(dbi_profile_merge);
79
80# some constants for use with node data arrays
81sub COUNT     () { 0 };
82sub TOTAL     () { 1 };
83sub FIRST     () { 2 };
84sub SHORTEST  () { 3 };
85sub LONGEST   () { 4 };
86sub FIRST_AT  () { 5 };
87sub LAST_AT   () { 6 };
88sub PATH      () { 7 };
89
90
91my $HAS_FLOCK = (defined $ENV{DBI_PROFILE_FLOCK})
92    ? $ENV{DBI_PROFILE_FLOCK}
93    : do { local $@; eval { flock STDOUT, 0; 1 } };
94
95
96=head2 $prof = DBI::ProfileData->new(File => "dbi.prof")
97
98=head2 $prof = DBI::ProfileData->new(File => "dbi.prof", Filter => sub { ... })
99
100=head2 $prof = DBI::ProfileData->new(Files => [ "dbi.prof.1", "dbi.prof.2" ])
101
102Creates a new DBI::ProfileData object.  Takes either a single file
103through the File option or a list of Files in an array ref.  If
104multiple files are specified then the header data from the first file
105is used.
106
107=head3 Files
108
109Reference to an array of file names to read.
110
111=head3 File
112
113Name of file to read. Takes precedence over C<Files>.
114
115=head3 DeleteFiles
116
117If true, the files are deleted after being read.
118
119Actually the files are renamed with a C<deleteme> suffix before being read,
120and then, after reading all the files, they're all deleted together.
121
122The files are locked while being read which, combined with the rename, makes it
123safe to 'consume' files that are still being generated by L<DBI::ProfileDumper>.
124
125=head3 Filter
126
127The C<Filter> parameter can be used to supply a code reference that can
128manipulate the profile data as it is being read. This is most useful for
129editing SQL statements so that slightly different statements in the raw data
130will be merged and aggregated in the loaded data. For example:
131
132  Filter => sub {
133      my ($path_ref, $data_ref) = @_;
134      s/foo = '.*?'/foo = '...'/ for @$path_ref;
135  }
136
137Here's an example that performs some normalization on the SQL. It converts all
138numbers to C<N> and all quoted strings to C<S>.  It can also convert digits to
139N within names. Finally, it summarizes long "IN (...)" clauses.
140
141It's aggressive and simplistic, but it's often sufficient, and serves as an
142example that you can tailor to suit your own needs:
143
144  Filter => sub {
145      my ($path_ref, $data_ref) = @_;
146      local $_ = $path_ref->[0]; # whichever element contains the SQL Statement
147      s/\b\d+\b/N/g;             # 42 -> N
148      s/\b0x[0-9A-Fa-f]+\b/N/g;  # 0xFE -> N
149      s/'.*?'/'S'/g;             # single quoted strings (doesn't handle escapes)
150      s/".*?"/"S"/g;             # double quoted strings (doesn't handle escapes)
151      # convert names like log_20001231 into log_NNNNNNNN, controlled by $opt{n}
152      s/([a-z_]+)(\d{$opt{n},})/$1.('N' x length($2))/ieg if $opt{n};
153      # abbreviate massive "in (...)" statements and similar
154      s!(([NS],){100,})!sprintf("$2,{repeated %d times}",length($1)/2)!eg;
155  }
156
157It's often better to perform this kinds of normalization in the DBI while the
158data is being collected, to avoid too much memory being used by storing profile
159data for many different SQL statement. See L<DBI::Profile>.
160
161=cut
162
163sub new {
164    my $pkg = shift;
165    my $self = {
166                Files        => [ "dbi.prof" ],
167		Filter       => undef,
168                DeleteFiles  => 0,
169                LockFile     => $HAS_FLOCK,
170                _header      => {},
171                _nodes       => [],
172                _node_lookup => {},
173                _sort        => 'none',
174                @_
175               };
176    bless $self, $pkg;
177
178    # File (singular) overrides Files (plural)
179    $self->{Files} = [ $self->{File} ] if exists $self->{File};
180
181    $self->_read_files();
182    return $self;
183}
184
185# read files into _header and _nodes
186sub _read_files {
187    my $self = shift;
188    my $files  = $self->{Files};
189    my $read_header = 0;
190    my @files_to_delete;
191
192    my $fh = gensym;
193    foreach (@$files) {
194        my $filename = $_;
195
196        if ($self->{DeleteFiles}) {
197            my $newfilename = $filename . ".deleteme";
198	    if ($^O eq 'VMS') {
199		# VMS default filesystem can only have one period
200		$newfilename = $filename . 'deleteme';
201	    }
202            # will clobber an existing $newfilename
203            rename($filename, $newfilename)
204                or croak "Can't rename($filename, $newfilename): $!";
205	    # On a versioned filesystem we want old versions to be removed
206	    1 while (unlink $filename);
207            $filename = $newfilename;
208        }
209
210        open($fh, "<", $filename)
211          or croak("Unable to read profile file '$filename': $!");
212
213        # lock the file in case it's still being written to
214        # (we'll be forced to wait till the write is complete)
215        flock($fh, LOCK_SH) if $self->{LockFile};
216
217        if (-s $fh) {   # not empty
218            $self->_read_header($fh, $filename, $read_header ? 0 : 1);
219            $read_header = 1;
220            $self->_read_body($fh, $filename);
221        }
222        close($fh); # and release lock
223
224        push @files_to_delete, $filename
225            if $self->{DeleteFiles};
226    }
227    for (@files_to_delete){
228	# for versioned file systems
229	1 while (unlink $_);
230	if(-e $_){
231	    warn "Can't delete '$_': $!";
232	}
233    }
234
235    # discard node_lookup now that all files are read
236    delete $self->{_node_lookup};
237}
238
239# read the header from the given $fh named $filename.  Discards the
240# data unless $keep.
241sub _read_header {
242    my ($self, $fh, $filename, $keep) = @_;
243
244    # get profiler module id
245    my $first = <$fh>;
246    chomp $first;
247    $self->{_profiler} = $first if $keep;
248
249    # collect variables from the header
250    local $_;
251    while (<$fh>) {
252        chomp;
253        last unless length $_;
254        /^(\S+)\s*=\s*(.*)/
255          or croak("Syntax error in header in $filename line $.: $_");
256        # XXX should compare new with existing (from previous file)
257        # and warn if they differ (different program or path)
258        $self->{_header}{$1} = unescape_key($2) if $keep;
259    }
260}
261
262
263sub unescape_key {  # inverse of escape_key() in DBI::ProfileDumper
264    local $_ = shift;
265    s/(?<!\\)\\n/\n/g; # expand \n, unless it's a \\n
266    s/(?<!\\)\\r/\r/g; # expand \r, unless it's a \\r
267    s/\\\\/\\/g;       # \\ to \
268    return $_;
269}
270
271
272# reads the body of the profile data
273sub _read_body {
274    my ($self, $fh, $filename) = @_;
275    my $nodes = $self->{_nodes};
276    my $lookup = $self->{_node_lookup};
277    my $filter = $self->{Filter};
278
279    # build up node array
280    my @path = ("");
281    my (@data, $path_key);
282    local $_;
283    while (<$fh>) {
284        chomp;
285        if (/^\+\s+(\d+)\s?(.*)/) {
286            # it's a key
287            my ($key, $index) = ($2, $1 - 1);
288
289            $#path = $index;      # truncate path to new length
290            $path[$index] = unescape_key($key); # place new key at end
291
292        }
293	elsif (s/^=\s+//) {
294            # it's data - file in the node array with the path in index 0
295	    # (the optional minus is to make it more robust against systems
296	    # with unstable high-res clocks - typically due to poor NTP config
297	    # of kernel SMP behaviour, i.e. min time may be -0.000008))
298
299            @data = split / /, $_;
300
301            # corrupt data?
302            croak("Invalid number of fields in $filename line $.: $_")
303                unless @data == 7;
304            croak("Invalid leaf node characters $filename line $.: $_")
305                unless m/^[-+ 0-9eE\.]+$/;
306
307	    # hook to enable pre-processing of the data - such as mangling SQL
308	    # so that slightly different statements get treated as the same
309	    # and so merged in the results
310	    $filter->(\@path, \@data) if $filter;
311
312            # elements of @path can't have NULLs in them, so this
313            # forms a unique string per @path.  If there's some way I
314            # can get this without arbitrarily stripping out a
315            # character I'd be happy to hear it!
316            $path_key = join("\0",@path);
317
318            # look for previous entry
319            if (exists $lookup->{$path_key}) {
320                # merge in the new data
321		dbi_profile_merge($nodes->[$lookup->{$path_key}], \@data);
322            } else {
323                # insert a new node - nodes are arrays with data in 0-6
324                # and path data after that
325                push(@$nodes, [ @data, @path ]);
326
327                # record node in %seen
328                $lookup->{$path_key} = $#$nodes;
329            }
330        }
331	else {
332            croak("Invalid line type syntax error in $filename line $.: $_");
333	}
334    }
335}
336
337
338
339=head2 $copy = $prof->clone();
340
341Clone a profile data set creating a new object.
342
343=cut
344
345sub clone {
346    my $self = shift;
347
348    # start with a simple copy
349    my $clone = bless { %$self }, ref($self);
350
351    # deep copy nodes
352    $clone->{_nodes}  = [ map { [ @$_ ] } @{$self->{_nodes}} ];
353
354    # deep copy header
355    $clone->{_header} = { %{$self->{_header}} };
356
357    return $clone;
358}
359
360=head2 $header = $prof->header();
361
362Returns a reference to a hash of header values.  These are the key
363value pairs included in the header section of the L<DBI::ProfileDumper>
364data format.  For example:
365
366  $header = {
367    Path    => [ '!Statement', '!MethodName' ],
368    Program => 't/42profile_data.t',
369  };
370
371Note that modifying this hash will modify the header data stored
372inside the profile object.
373
374=cut
375
376sub header { shift->{_header} }
377
378
379=head2 $nodes = $prof->nodes()
380
381Returns a reference the sorted nodes array.  Each element in the array
382is a single record in the data set.  The first seven elements are the
383same as the elements provided by L<DBI::Profile>.  After that each key is
384in a separate element.  For example:
385
386 $nodes = [
387            [
388              2,                      # 0, count
389              0.0312958955764771,     # 1, total duration
390              0.000490069389343262,   # 2, first duration
391              0.000176072120666504,   # 3, shortest duration
392              0.00140702724456787,    # 4, longest duration
393              1023115819.83019,       # 5, time of first event
394              1023115819.86576,       # 6, time of last event
395              'SELECT foo FROM bar'   # 7, key1
396              'execute'               # 8, key2
397                                      # 6+N, keyN
398            ],
399                                      # ...
400          ];
401
402Note that modifying this array will modify the node data stored inside
403the profile object.
404
405=cut
406
407sub nodes { shift->{_nodes} }
408
409
410=head2 $count = $prof->count()
411
412Returns the number of items in the profile data set.
413
414=cut
415
416sub count { scalar @{shift->{_nodes}} }
417
418
419=head2 $prof->sort(field => "field")
420
421=head2 $prof->sort(field => "field", reverse => 1)
422
423Sorts data by the given field.  Available fields are:
424
425  longest
426  total
427  count
428  shortest
429
430The default sort is greatest to smallest, which is the opposite of the
431normal Perl meaning.  This, however, matches the expected behavior of
432the dbiprof frontend.
433
434=cut
435
436
437# sorts data by one of the available fields
438{
439    my %FIELDS = (
440                  longest  => LONGEST,
441                  total    => TOTAL,
442                  count    => COUNT,
443                  shortest => SHORTEST,
444                  key1     => PATH+0,
445                  key2     => PATH+1,
446                  key3     => PATH+2,
447                 );
448    sub sort {
449        my $self = shift;
450        my $nodes = $self->{_nodes};
451        my %opt = @_;
452
453        croak("Missing required field option.") unless $opt{field};
454
455        my $index = $FIELDS{$opt{field}};
456
457        croak("Unrecognized sort field '$opt{field}'.")
458          unless defined $index;
459
460        # sort over index
461        if ($opt{reverse}) {
462            @$nodes = sort {
463                $a->[$index] <=> $b->[$index]
464            } @$nodes;
465        } else {
466            @$nodes = sort {
467                $b->[$index] <=> $a->[$index]
468            } @$nodes;
469        }
470
471        # remember how we're sorted
472        $self->{_sort} = $opt{field};
473
474        return $self;
475    }
476}
477
478
479=head2 $count = $prof->exclude(key2 => "disconnect")
480
481=head2 $count = $prof->exclude(key2 => "disconnect", case_sensitive => 1)
482
483=head2 $count = $prof->exclude(key1 => qr/^SELECT/i)
484
485Removes records from the data set that match the given string or
486regular expression.  This method modifies the data in a permanent
487fashion - use clone() first to maintain the original data after
488exclude().  Returns the number of nodes left in the profile data set.
489
490=cut
491
492sub exclude {
493    my $self = shift;
494    my $nodes = $self->{_nodes};
495    my %opt = @_;
496
497    # find key index number
498    my ($index, $val);
499    foreach (keys %opt) {
500        if (/^key(\d+)$/) {
501            $index   = PATH + $1 - 1;
502            $val     = $opt{$_};
503            last;
504        }
505    }
506    croak("Missing required keyN option.") unless $index;
507
508    if (UNIVERSAL::isa($val,"Regexp")) {
509        # regex match
510        @$nodes = grep {
511            $#$_ < $index or $_->[$index] !~ /$val/
512        } @$nodes;
513    } else {
514        if ($opt{case_sensitive}) {
515            @$nodes = grep {
516                $#$_ < $index or $_->[$index] ne $val;
517            } @$nodes;
518        } else {
519            $val = lc $val;
520            @$nodes = grep {
521                $#$_ < $index or lc($_->[$index]) ne $val;
522            } @$nodes;
523        }
524    }
525
526    return scalar @$nodes;
527}
528
529
530=head2 $count = $prof->match(key2 => "disconnect")
531
532=head2 $count = $prof->match(key2 => "disconnect", case_sensitive => 1)
533
534=head2 $count = $prof->match(key1 => qr/^SELECT/i)
535
536Removes records from the data set that do not match the given string
537or regular expression.  This method modifies the data in a permanent
538fashion - use clone() first to maintain the original data after
539match().  Returns the number of nodes left in the profile data set.
540
541=cut
542
543sub match {
544    my $self = shift;
545    my $nodes = $self->{_nodes};
546    my %opt = @_;
547
548    # find key index number
549    my ($index, $val);
550    foreach (keys %opt) {
551        if (/^key(\d+)$/) {
552            $index   = PATH + $1 - 1;
553            $val     = $opt{$_};
554            last;
555        }
556    }
557    croak("Missing required keyN option.") unless $index;
558
559    if (UNIVERSAL::isa($val,"Regexp")) {
560        # regex match
561        @$nodes = grep {
562            $#$_ >= $index and $_->[$index] =~ /$val/
563        } @$nodes;
564    } else {
565        if ($opt{case_sensitive}) {
566            @$nodes = grep {
567                $#$_ >= $index and $_->[$index] eq $val;
568            } @$nodes;
569        } else {
570            $val = lc $val;
571            @$nodes = grep {
572                $#$_ >= $index and lc($_->[$index]) eq $val;
573            } @$nodes;
574        }
575    }
576
577    return scalar @$nodes;
578}
579
580
581=head2 $Data = $prof->Data()
582
583Returns the same Data hash structure as seen in L<DBI::Profile>.  This
584structure is not sorted.  The nodes() structure probably makes more
585sense for most analysis.
586
587=cut
588
589sub Data {
590    my $self = shift;
591    my (%Data, @data, $ptr);
592
593    foreach my $node (@{$self->{_nodes}}) {
594        # traverse to key location
595        $ptr = \%Data;
596        foreach my $key (@{$node}[PATH .. $#$node - 1]) {
597            $ptr->{$key} = {} unless exists $ptr->{$key};
598            $ptr = $ptr->{$key};
599        }
600
601        # slice out node data
602        $ptr->{$node->[-1]} = [ @{$node}[0 .. 6] ];
603    }
604
605    return \%Data;
606}
607
608
609=head2 $text = $prof->format($nodes->[0])
610
611Formats a single node into a human-readable block of text.
612
613=cut
614
615sub format {
616    my ($self, $node) = @_;
617    my $format;
618
619    # setup keys
620    my $keys = "";
621    for (my $i = PATH; $i <= $#$node; $i++) {
622        my $key = $node->[$i];
623
624        # remove leading and trailing space
625        $key =~ s/^\s+//;
626        $key =~ s/\s+$//;
627
628        # if key has newlines or is long take special precautions
629        if (length($key) > 72 or $key =~ /\n/) {
630            $keys .= "  Key " . ($i - PATH + 1) . "         :\n\n$key\n\n";
631        } else {
632            $keys .= "  Key " . ($i - PATH + 1) . "         : $key\n";
633        }
634    }
635
636    # nodes with multiple runs get the long entry format, nodes with
637    # just one run get a single count.
638    if ($node->[COUNT] > 1) {
639        $format = <<END;
640  Count         : %d
641  Total Time    : %3.6f seconds
642  Longest Time  : %3.6f seconds
643  Shortest Time : %3.6f seconds
644  Average Time  : %3.6f seconds
645END
646        return sprintf($format, @{$node}[COUNT,TOTAL,LONGEST,SHORTEST],
647                       $node->[TOTAL] / $node->[COUNT]) . $keys;
648    } else {
649        $format = <<END;
650  Count         : %d
651  Time          : %3.6f seconds
652END
653
654        return sprintf($format, @{$node}[COUNT,TOTAL]) . $keys;
655
656    }
657}
658
659
660=head2 $text = $prof->report(number => 10)
661
662Produces a report with the given number of items.
663
664=cut
665
666sub report {
667    my $self  = shift;
668    my $nodes = $self->{_nodes};
669    my %opt   = @_;
670
671    croak("Missing required number option") unless exists $opt{number};
672
673    $opt{number} = @$nodes if @$nodes < $opt{number};
674
675    my $report = $self->_report_header($opt{number});
676    for (0 .. $opt{number} - 1) {
677        $report .= sprintf("#" x 5  . "[ %d ]". "#" x 59 . "\n",
678                           $_ + 1);
679        $report .= $self->format($nodes->[$_]);
680        $report .= "\n";
681    }
682    return $report;
683}
684
685# format the header for report()
686sub _report_header {
687    my ($self, $number) = @_;
688    my $nodes = $self->{_nodes};
689    my $node_count = @$nodes;
690
691    # find total runtime and method count
692    my ($time, $count) = (0,0);
693    foreach my $node (@$nodes) {
694        $time  += $node->[TOTAL];
695        $count += $node->[COUNT];
696    }
697
698    my $header = <<END;
699
700DBI Profile Data ($self->{_profiler})
701
702END
703
704    # output header fields
705    while (my ($key, $value) = each %{$self->{_header}}) {
706        $header .= sprintf("  %-13s : %s\n", $key, $value);
707    }
708
709    # output summary data fields
710    $header .= sprintf(<<END, $node_count, $number, $self->{_sort}, $count, $time);
711  Total Records : %d (showing %d, sorted by %s)
712  Total Count   : %d
713  Total Runtime : %3.6f seconds
714
715END
716
717    return $header;
718}
719
720
7211;
722
723__END__
724
725=head1 AUTHOR
726
727Sam Tregar <sam@tregar.com>
728
729=head1 COPYRIGHT AND LICENSE
730
731Copyright (C) 2002 Sam Tregar
732
733This program is free software; you can redistribute it and/or modify
734it under the same terms as Perl 5 itself.
735
736=cut
737