1#!/usr/bin/env perl
2
3# This program is part of Percona Toolkit: http://www.percona.com/software/
4# See "COPYRIGHT, LICENSE, AND WARRANTY" at the end of this file for legal
5# notices and disclaimers.
6
7use strict;
8use warnings FATAL => 'all';
9
10# This tool is "fat-packed": most of its dependent modules are embedded
11# in this file.  Setting %INC to this file for each module makes Perl aware
12# of this so it will not try to load the module from @INC.  See the tool's
13# documentation for a full list of dependencies.
14BEGIN {
15   $INC{$_} = __FILE__ for map { (my $pkg = "$_.pm") =~ s!::!/!g; $pkg } (qw(
16      Percona::Toolkit
17      Lmo::Utils
18      Lmo::Meta
19      Lmo::Object
20      Lmo::Types
21      Lmo
22      DSNParser
23      Quoter
24      OptionParser
25      Transformers
26      QueryRewriter
27      Processlist
28      TcpdumpParser
29      MySQLProtocolParser
30      SlowLogParser
31      SlowLogWriter
32      EventAggregator
33      ReportFormatter
34      QueryReportFormatter
35      JSONReportFormatter
36      EventTimeline
37      QueryParser
38      TableParser
39      QueryReview
40      QueryHistory
41      Daemon
42      BinaryLogParser
43      GeneralLogParser
44      RawLogParser
45      ProtocolParser
46      MasterSlave
47      Progress
48      FileIterator
49      Runtime
50      Pipeline
51      HTTP::Micro
52      VersionCheck
53   ));
54}
55
56# ###########################################################################
57# Percona::Toolkit package
58# This package is a copy without comments from the original.  The original
59# with comments and its test file can be found in the Bazaar repository at,
60#   lib/Percona/Toolkit.pm
61#   t/lib/Percona/Toolkit.t
62# See https://launchpad.net/percona-toolkit for more information.
63# ###########################################################################
64{
65package Percona::Toolkit;
66
67our $VERSION = '3.3.0';
68
69use strict;
70use warnings FATAL => 'all';
71use English qw(-no_match_vars);
72use constant PTDEBUG => $ENV{PTDEBUG} || 0;
73
74use Carp qw(carp cluck);
75use Data::Dumper qw();
76
77require Exporter;
78our @ISA         = qw(Exporter);
79our @EXPORT_OK   = qw(
80   have_required_args
81   Dumper
82   _d
83);
84
85sub have_required_args {
86   my ($args, @required_args) = @_;
87   my $have_required_args = 1;
88   foreach my $arg ( @required_args ) {
89      if ( !defined $args->{$arg} ) {
90         $have_required_args = 0;
91         carp "Argument $arg is not defined";
92      }
93   }
94   cluck unless $have_required_args;  # print backtrace
95   return $have_required_args;
96}
97
98sub Dumper {
99   local $Data::Dumper::Indent    = 1;
100   local $Data::Dumper::Sortkeys  = 1;
101   local $Data::Dumper::Quotekeys = 0;
102   Data::Dumper::Dumper(@_);
103}
104
105sub _d {
106   my ($package, undef, $line) = caller 0;
107   @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
108        map { defined $_ ? $_ : 'undef' }
109        @_;
110   print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
111}
112
1131;
114}
115# ###########################################################################
116# End Percona::Toolkit package
117# ###########################################################################
118
119# ###########################################################################
120# Lmo::Utils package
121# This package is a copy without comments from the original.  The original
122# with comments and its test file can be found in the Bazaar repository at,
123#   lib/Lmo/Utils.pm
124#   t/lib/Lmo/Utils.t
125# See https://launchpad.net/percona-toolkit for more information.
126# ###########################################################################
127{
128package Lmo::Utils;
129
130use strict;
131use warnings qw( FATAL all );
132require Exporter;
133our (@ISA, @EXPORT, @EXPORT_OK);
134
135BEGIN {
136   @ISA = qw(Exporter);
137   @EXPORT = @EXPORT_OK = qw(
138      _install_coderef
139      _unimport_coderefs
140      _glob_for
141      _stash_for
142   );
143}
144
145{
146   no strict 'refs';
147   sub _glob_for {
148      return \*{shift()}
149   }
150
151   sub _stash_for {
152      return \%{ shift() . "::" };
153   }
154}
155
156sub _install_coderef {
157   my ($to, $code) = @_;
158
159   return *{ _glob_for $to } = $code;
160}
161
162sub _unimport_coderefs {
163   my ($target, @names) = @_;
164   return unless @names;
165   my $stash = _stash_for($target);
166   foreach my $name (@names) {
167      if ($stash->{$name} and defined(&{$stash->{$name}})) {
168         delete $stash->{$name};
169      }
170   }
171}
172
1731;
174}
175# ###########################################################################
176# End Lmo::Utils package
177# ###########################################################################
178
179# ###########################################################################
180# Lmo::Meta package
181# This package is a copy without comments from the original.  The original
182# with comments and its test file can be found in the Bazaar repository at,
183#   lib/Lmo/Meta.pm
184#   t/lib/Lmo/Meta.t
185# See https://launchpad.net/percona-toolkit for more information.
186# ###########################################################################
187{
188package Lmo::Meta;
189use strict;
190use warnings qw( FATAL all );
191
192my %metadata_for;
193
194sub new {
195   my $class = shift;
196   return bless { @_ }, $class
197}
198
199sub metadata_for {
200   my $self    = shift;
201   my ($class) = @_;
202
203   return $metadata_for{$class} ||= {};
204}
205
206sub class { shift->{class} }
207
208sub attributes {
209   my $self = shift;
210   return keys %{$self->metadata_for($self->class)}
211}
212
213sub attributes_for_new {
214   my $self = shift;
215   my @attributes;
216
217   my $class_metadata = $self->metadata_for($self->class);
218   while ( my ($attr, $meta) = each %$class_metadata ) {
219      if ( exists $meta->{init_arg} ) {
220         push @attributes, $meta->{init_arg}
221               if defined $meta->{init_arg};
222      }
223      else {
224         push @attributes, $attr;
225      }
226   }
227   return @attributes;
228}
229
2301;
231}
232# ###########################################################################
233# End Lmo::Meta package
234# ###########################################################################
235
236# ###########################################################################
237# Lmo::Object package
238# This package is a copy without comments from the original.  The original
239# with comments and its test file can be found in the Bazaar repository at,
240#   lib/Lmo/Object.pm
241#   t/lib/Lmo/Object.t
242# See https://launchpad.net/percona-toolkit for more information.
243# ###########################################################################
244{
245package Lmo::Object;
246
247use strict;
248use warnings qw( FATAL all );
249
250use Carp ();
251use Scalar::Util qw(blessed);
252
253use Lmo::Meta;
254use Lmo::Utils qw(_glob_for);
255
256sub new {
257   my $class = shift;
258   my $args  = $class->BUILDARGS(@_);
259
260   my $class_metadata = Lmo::Meta->metadata_for($class);
261
262   my @args_to_delete;
263   while ( my ($attr, $meta) = each %$class_metadata ) {
264      next unless exists $meta->{init_arg};
265      my $init_arg = $meta->{init_arg};
266
267      if ( defined $init_arg ) {
268         $args->{$attr} = delete $args->{$init_arg};
269      }
270      else {
271         push @args_to_delete, $attr;
272      }
273   }
274
275   delete $args->{$_} for @args_to_delete;
276
277   for my $attribute ( keys %$args ) {
278      if ( my $coerce = $class_metadata->{$attribute}{coerce} ) {
279         $args->{$attribute} = $coerce->($args->{$attribute});
280      }
281      if ( my $isa_check = $class_metadata->{$attribute}{isa} ) {
282         my ($check_name, $check_sub) = @$isa_check;
283         $check_sub->($args->{$attribute});
284      }
285   }
286
287   while ( my ($attribute, $meta) = each %$class_metadata ) {
288      next unless $meta->{required};
289      Carp::confess("Attribute ($attribute) is required for $class")
290         if ! exists $args->{$attribute}
291   }
292
293   my $self = bless $args, $class;
294
295   my @build_subs;
296   my $linearized_isa = mro::get_linear_isa($class);
297
298   for my $isa_class ( @$linearized_isa ) {
299      unshift @build_subs, *{ _glob_for "${isa_class}::BUILD" }{CODE};
300   }
301   my @args = %$args;
302   for my $sub (grep { defined($_) && exists &$_ } @build_subs) {
303      $sub->( $self, @args);
304   }
305   return $self;
306}
307
308sub BUILDARGS {
309   shift; # No need for the classname
310   if ( @_ == 1 && ref($_[0]) ) {
311      Carp::confess("Single parameters to new() must be a HASH ref, not $_[0]")
312         unless ref($_[0]) eq ref({});
313      return {%{$_[0]}} # We want a new reference, always
314   }
315   else {
316      return { @_ };
317   }
318}
319
320sub meta {
321   my $class = shift;
322   $class    = Scalar::Util::blessed($class) || $class;
323   return Lmo::Meta->new(class => $class);
324}
325
3261;
327}
328# ###########################################################################
329# End Lmo::Object package
330# ###########################################################################
331
332# ###########################################################################
333# Lmo::Types package
334# This package is a copy without comments from the original.  The original
335# with comments and its test file can be found in the Bazaar repository at,
336#   lib/Lmo/Types.pm
337#   t/lib/Lmo/Types.t
338# See https://launchpad.net/percona-toolkit for more information.
339# ###########################################################################
340{
341package Lmo::Types;
342
343use strict;
344use warnings qw( FATAL all );
345
346use Carp ();
347use Scalar::Util qw(looks_like_number blessed);
348
349
350our %TYPES = (
351   Bool   => sub { !$_[0] || (defined $_[0] && looks_like_number($_[0]) && $_[0] == 1) },
352   Num    => sub { defined $_[0] && looks_like_number($_[0]) },
353   Int    => sub { defined $_[0] && looks_like_number($_[0]) && $_[0] == int($_[0]) },
354   Str    => sub { defined $_[0] },
355   Object => sub { defined $_[0] && blessed($_[0]) },
356   FileHandle => sub { local $@; require IO::Handle; fileno($_[0]) && $_[0]->opened },
357
358   map {
359      my $type = /R/ ? $_ : uc $_;
360      $_ . "Ref" => sub { ref $_[0] eq $type }
361   } qw(Array Code Hash Regexp Glob Scalar)
362);
363
364sub check_type_constaints {
365   my ($attribute, $type_check, $check_name, $val) = @_;
366   ( ref($type_check) eq 'CODE'
367      ? $type_check->($val)
368      : (ref $val eq $type_check
369         || ($val && $val eq $type_check)
370         || (exists $TYPES{$type_check} && $TYPES{$type_check}->($val)))
371   )
372   || Carp::confess(
373        qq<Attribute ($attribute) does not pass the type constraint because: >
374      . qq<Validation failed for '$check_name' with value >
375      . (defined $val ? Lmo::Dumper($val) : 'undef') )
376}
377
378sub _nested_constraints {
379   my ($attribute, $aggregate_type, $type) = @_;
380
381   my $inner_types;
382   if ( $type =~ /\A(ArrayRef|Maybe)\[(.*)\]\z/ ) {
383      $inner_types = _nested_constraints($1, $2);
384   }
385   else {
386      $inner_types = $TYPES{$type};
387   }
388
389   if ( $aggregate_type eq 'ArrayRef' ) {
390      return sub {
391         my ($val) = @_;
392         return unless ref($val) eq ref([]);
393
394         if ($inner_types) {
395            for my $value ( @{$val} ) {
396               return unless $inner_types->($value)
397            }
398         }
399         else {
400            for my $value ( @{$val} ) {
401               return unless $value && ($value eq $type
402                        || (Scalar::Util::blessed($value) && $value->isa($type)));
403            }
404         }
405         return 1;
406      };
407   }
408   elsif ( $aggregate_type eq 'Maybe' ) {
409      return sub {
410         my ($value) = @_;
411         return 1 if ! defined($value);
412         if ($inner_types) {
413            return unless $inner_types->($value)
414         }
415         else {
416            return unless $value eq $type
417                        || (Scalar::Util::blessed($value) && $value->isa($type));
418         }
419         return 1;
420      }
421   }
422   else {
423      Carp::confess("Nested aggregate types are only implemented for ArrayRefs and Maybe");
424   }
425}
426
4271;
428}
429# ###########################################################################
430# End Lmo::Types package
431# ###########################################################################
432
433# ###########################################################################
434# Lmo package
435# This package is a copy without comments from the original.  The original
436# with comments and its test file can be found in the Bazaar repository at,
437#   lib/Lmo.pm
438#   t/lib/Lmo.t
439# See https://launchpad.net/percona-toolkit for more information.
440# ###########################################################################
441{
442BEGIN {
443$INC{"Lmo.pm"} = __FILE__;
444package Lmo;
445our $VERSION = '0.30_Percona'; # Forked from 0.30 of Mo.
446
447
448use strict;
449use warnings qw( FATAL all );
450
451use Carp ();
452use Scalar::Util qw(looks_like_number blessed);
453
454use Lmo::Meta;
455use Lmo::Object;
456use Lmo::Types;
457
458use Lmo::Utils;
459
460my %export_for;
461sub import {
462   warnings->import(qw(FATAL all));
463   strict->import();
464
465   my $caller     = scalar caller(); # Caller's package
466   my %exports = (
467      extends  => \&extends,
468      has      => \&has,
469      with     => \&with,
470      override => \&override,
471      confess  => \&Carp::confess,
472   );
473
474   $export_for{$caller} = \%exports;
475
476   for my $keyword ( keys %exports ) {
477      _install_coderef "${caller}::$keyword" => $exports{$keyword};
478   }
479
480   if ( !@{ *{ _glob_for "${caller}::ISA" }{ARRAY} || [] } ) {
481      @_ = "Lmo::Object";
482      goto *{ _glob_for "${caller}::extends" }{CODE};
483   }
484}
485
486sub extends {
487   my $caller = scalar caller();
488   for my $class ( @_ ) {
489      _load_module($class);
490   }
491   _set_package_isa($caller, @_);
492   _set_inherited_metadata($caller);
493}
494
495sub _load_module {
496   my ($class) = @_;
497
498   (my $file = $class) =~ s{::|'}{/}g;
499   $file .= '.pm';
500   { local $@; eval { require "$file" } } # or warn $@;
501   return;
502}
503
504sub with {
505   my $package = scalar caller();
506   require Role::Tiny;
507   for my $role ( @_ ) {
508      _load_module($role);
509      _role_attribute_metadata($package, $role);
510   }
511   Role::Tiny->apply_roles_to_package($package, @_);
512}
513
514sub _role_attribute_metadata {
515   my ($package, $role) = @_;
516
517   my $package_meta = Lmo::Meta->metadata_for($package);
518   my $role_meta    = Lmo::Meta->metadata_for($role);
519
520   %$package_meta = (%$role_meta, %$package_meta);
521}
522
523sub has {
524   my $names  = shift;
525   my $caller = scalar caller();
526
527   my $class_metadata = Lmo::Meta->metadata_for($caller);
528
529   for my $attribute ( ref $names ? @$names : $names ) {
530      my %args   = @_;
531      my $method = ($args{is} || '') eq 'ro'
532         ? sub {
533            Carp::confess("Cannot assign a value to a read-only accessor at reader ${caller}::${attribute}")
534               if $#_;
535            return $_[0]{$attribute};
536         }
537         : sub {
538            return $#_
539                  ? $_[0]{$attribute} = $_[1]
540                  : $_[0]{$attribute};
541         };
542
543      $class_metadata->{$attribute} = ();
544
545      if ( my $type_check = $args{isa} ) {
546         my $check_name = $type_check;
547
548         if ( my ($aggregate_type, $inner_type) = $type_check =~ /\A(ArrayRef|Maybe)\[(.*)\]\z/ ) {
549            $type_check = Lmo::Types::_nested_constraints($attribute, $aggregate_type, $inner_type);
550         }
551
552         my $check_sub = sub {
553            my ($new_val) = @_;
554            Lmo::Types::check_type_constaints($attribute, $type_check, $check_name, $new_val);
555         };
556
557         $class_metadata->{$attribute}{isa} = [$check_name, $check_sub];
558         my $orig_method = $method;
559         $method = sub {
560            $check_sub->($_[1]) if $#_;
561            goto &$orig_method;
562         };
563      }
564
565      if ( my $builder = $args{builder} ) {
566         my $original_method = $method;
567         $method = sub {
568               $#_
569                  ? goto &$original_method
570                  : ! exists $_[0]{$attribute}
571                     ? $_[0]{$attribute} = $_[0]->$builder
572                     : goto &$original_method
573         };
574      }
575
576      if ( my $code = $args{default} ) {
577         Carp::confess("${caller}::${attribute}'s default is $code, but should be a coderef")
578               unless ref($code) eq 'CODE';
579         my $original_method = $method;
580         $method = sub {
581               $#_
582                  ? goto &$original_method
583                  : ! exists $_[0]{$attribute}
584                     ? $_[0]{$attribute} = $_[0]->$code
585                     : goto &$original_method
586         };
587      }
588
589      if ( my $role = $args{does} ) {
590         my $original_method = $method;
591         $method = sub {
592            if ( $#_ ) {
593               Carp::confess(qq<Attribute ($attribute) doesn't consume a '$role' role">)
594                  unless Scalar::Util::blessed($_[1]) && eval { $_[1]->does($role) }
595            }
596            goto &$original_method
597         };
598      }
599
600      if ( my $coercion = $args{coerce} ) {
601         $class_metadata->{$attribute}{coerce} = $coercion;
602         my $original_method = $method;
603         $method = sub {
604            if ( $#_ ) {
605               return $original_method->($_[0], $coercion->($_[1]))
606            }
607            goto &$original_method;
608         }
609      }
610
611      _install_coderef "${caller}::$attribute" => $method;
612
613      if ( $args{required} ) {
614         $class_metadata->{$attribute}{required} = 1;
615      }
616
617      if ($args{clearer}) {
618         _install_coderef "${caller}::$args{clearer}"
619            => sub { delete shift->{$attribute} }
620      }
621
622      if ($args{predicate}) {
623         _install_coderef "${caller}::$args{predicate}"
624            => sub { exists shift->{$attribute} }
625      }
626
627      if ($args{handles}) {
628         _has_handles($caller, $attribute, \%args);
629      }
630
631      if (exists $args{init_arg}) {
632         $class_metadata->{$attribute}{init_arg} = $args{init_arg};
633      }
634   }
635}
636
637sub _has_handles {
638   my ($caller, $attribute, $args) = @_;
639   my $handles = $args->{handles};
640
641   my $ref = ref $handles;
642   my $kv;
643   if ( $ref eq ref [] ) {
644         $kv = { map { $_,$_ } @{$handles} };
645   }
646   elsif ( $ref eq ref {} ) {
647         $kv = $handles;
648   }
649   elsif ( $ref eq ref qr// ) {
650         Carp::confess("Cannot delegate methods based on a Regexp without a type constraint (isa)")
651            unless $args->{isa};
652         my $target_class = $args->{isa};
653         $kv = {
654            map   { $_, $_     }
655            grep  { $_ =~ $handles }
656            grep  { !exists $Lmo::Object::{$_} && $target_class->can($_) }
657            grep  { !$export_for{$target_class}->{$_} }
658            keys %{ _stash_for $target_class }
659         };
660   }
661   else {
662         Carp::confess("handles for $ref not yet implemented");
663   }
664
665   while ( my ($method, $target) = each %{$kv} ) {
666         my $name = _glob_for "${caller}::$method";
667         Carp::confess("You cannot overwrite a locally defined method ($method) with a delegation")
668            if defined &$name;
669
670         my ($target, @curried_args) = ref($target) ? @$target : $target;
671         *$name = sub {
672            my $self        = shift;
673            my $delegate_to = $self->$attribute();
674            my $error = "Cannot delegate $method to $target because the value of $attribute";
675            Carp::confess("$error is not defined") unless $delegate_to;
676            Carp::confess("$error is not an object (got '$delegate_to')")
677               unless Scalar::Util::blessed($delegate_to) || (!ref($delegate_to) && $delegate_to->can($target));
678            return $delegate_to->$target(@curried_args, @_);
679         }
680   }
681}
682
683sub _set_package_isa {
684   my ($package, @new_isa) = @_;
685   my $package_isa  = \*{ _glob_for "${package}::ISA" };
686   @{*$package_isa} = @new_isa;
687}
688
689sub _set_inherited_metadata {
690   my $class = shift;
691   my $class_metadata = Lmo::Meta->metadata_for($class);
692   my $linearized_isa = mro::get_linear_isa($class);
693   my %new_metadata;
694
695   for my $isa_class (reverse @$linearized_isa) {
696      my $isa_metadata = Lmo::Meta->metadata_for($isa_class);
697      %new_metadata = (
698         %new_metadata,
699         %$isa_metadata,
700      );
701   }
702   %$class_metadata = %new_metadata;
703}
704
705sub unimport {
706   my $caller = scalar caller();
707   my $target = caller;
708  _unimport_coderefs($target, keys %{$export_for{$caller}});
709}
710
711sub Dumper {
712   require Data::Dumper;
713   local $Data::Dumper::Indent    = 0;
714   local $Data::Dumper::Sortkeys  = 0;
715   local $Data::Dumper::Quotekeys = 0;
716   local $Data::Dumper::Terse     = 1;
717
718   Data::Dumper::Dumper(@_)
719}
720
721BEGIN {
722   if ($] >= 5.010) {
723      { local $@; require mro; }
724   }
725   else {
726      local $@;
727      eval {
728         require MRO::Compat;
729      } or do {
730         *mro::get_linear_isa = *mro::get_linear_isa_dfs = sub {
731            no strict 'refs';
732
733            my $classname = shift;
734
735            my @lin = ($classname);
736            my %stored;
737            foreach my $parent (@{"$classname\::ISA"}) {
738               my $plin = mro::get_linear_isa_dfs($parent);
739               foreach (@$plin) {
740                     next if exists $stored{$_};
741                     push(@lin, $_);
742                     $stored{$_} = 1;
743               }
744            }
745            return \@lin;
746         };
747      }
748   }
749}
750
751sub override {
752   my ($methods, $code) = @_;
753   my $caller          = scalar caller;
754
755   for my $method ( ref($methods) ? @$methods : $methods ) {
756      my $full_method     = "${caller}::${method}";
757      *{_glob_for $full_method} = $code;
758   }
759}
760
761}
7621;
763}
764# ###########################################################################
765# End Lmo package
766# ###########################################################################
767
768# ###########################################################################
769# DSNParser package
770# This package is a copy without comments from the original.  The original
771# with comments and its test file can be found in the Bazaar repository at,
772#   lib/DSNParser.pm
773#   t/lib/DSNParser.t
774# See https://launchpad.net/percona-toolkit for more information.
775# ###########################################################################
776{
777package DSNParser;
778
779use strict;
780use warnings FATAL => 'all';
781use English qw(-no_match_vars);
782use constant PTDEBUG => $ENV{PTDEBUG} || 0;
783
784use Data::Dumper;
785$Data::Dumper::Indent    = 0;
786$Data::Dumper::Quotekeys = 0;
787
788my $dsn_sep = qr/(?<!\\),/;
789
790eval {
791   require DBI;
792};
793my $have_dbi = $EVAL_ERROR ? 0 : 1;
794
795sub new {
796   my ( $class, %args ) = @_;
797   foreach my $arg ( qw(opts) ) {
798      die "I need a $arg argument" unless $args{$arg};
799   }
800   my $self = {
801      opts => {}  # h, P, u, etc.  Should come from DSN OPTIONS section in POD.
802   };
803   foreach my $opt ( @{$args{opts}} ) {
804      if ( !$opt->{key} || !$opt->{desc} ) {
805         die "Invalid DSN option: ", Dumper($opt);
806      }
807      PTDEBUG && _d('DSN option:',
808         join(', ',
809            map { "$_=" . (defined $opt->{$_} ? ($opt->{$_} || '') : 'undef') }
810               keys %$opt
811         )
812      );
813      $self->{opts}->{$opt->{key}} = {
814         dsn  => $opt->{dsn},
815         desc => $opt->{desc},
816         copy => $opt->{copy} || 0,
817      };
818   }
819   return bless $self, $class;
820}
821
822sub prop {
823   my ( $self, $prop, $value ) = @_;
824   if ( @_ > 2 ) {
825      PTDEBUG && _d('Setting', $prop, 'property');
826      $self->{$prop} = $value;
827   }
828   return $self->{$prop};
829}
830
831sub parse {
832   my ( $self, $dsn, $prev, $defaults ) = @_;
833   if ( !$dsn ) {
834      PTDEBUG && _d('No DSN to parse');
835      return;
836   }
837   PTDEBUG && _d('Parsing', $dsn);
838   $prev     ||= {};
839   $defaults ||= {};
840   my %given_props;
841   my %final_props;
842   my $opts = $self->{opts};
843
844   foreach my $dsn_part ( split($dsn_sep, $dsn) ) {
845      $dsn_part =~ s/\\,/,/g;
846      if ( my ($prop_key, $prop_val) = $dsn_part =~  m/^(.)=(.*)$/ ) {
847         $given_props{$prop_key} = $prop_val;
848      }
849      else {
850         PTDEBUG && _d('Interpreting', $dsn_part, 'as h=', $dsn_part);
851         $given_props{h} = $dsn_part;
852      }
853   }
854
855   foreach my $key ( keys %$opts ) {
856      PTDEBUG && _d('Finding value for', $key);
857      $final_props{$key} = $given_props{$key};
858      if ( !defined $final_props{$key}
859           && defined $prev->{$key} && $opts->{$key}->{copy} )
860      {
861         $final_props{$key} = $prev->{$key};
862         PTDEBUG && _d('Copying value for', $key, 'from previous DSN');
863      }
864      if ( !defined $final_props{$key} ) {
865         $final_props{$key} = $defaults->{$key};
866         PTDEBUG && _d('Copying value for', $key, 'from defaults');
867      }
868   }
869
870   foreach my $key ( keys %given_props ) {
871      die "Unknown DSN option '$key' in '$dsn'.  For more details, "
872            . "please use the --help option, or try 'perldoc $PROGRAM_NAME' "
873            . "for complete documentation."
874         unless exists $opts->{$key};
875   }
876   if ( (my $required = $self->prop('required')) ) {
877      foreach my $key ( keys %$required ) {
878         die "Missing required DSN option '$key' in '$dsn'.  For more details, "
879               . "please use the --help option, or try 'perldoc $PROGRAM_NAME' "
880               . "for complete documentation."
881            unless $final_props{$key};
882      }
883   }
884
885   return \%final_props;
886}
887
888sub parse_options {
889   my ( $self, $o ) = @_;
890   die 'I need an OptionParser object' unless ref $o eq 'OptionParser';
891   my $dsn_string
892      = join(',',
893          map  { "$_=".$o->get($_); }
894          grep { $o->has($_) && $o->get($_) }
895          keys %{$self->{opts}}
896        );
897   PTDEBUG && _d('DSN string made from options:', $dsn_string);
898   return $self->parse($dsn_string);
899}
900
901sub as_string {
902   my ( $self, $dsn, $props ) = @_;
903   return $dsn unless ref $dsn;
904   my @keys = $props ? @$props : sort keys %$dsn;
905   return join(',',
906      map  { "$_=" . ($_ eq 'p' ? '...' : $dsn->{$_}) }
907      grep {
908         exists $self->{opts}->{$_}
909         && exists $dsn->{$_}
910         && defined $dsn->{$_}
911      } @keys);
912}
913
914sub usage {
915   my ( $self ) = @_;
916   my $usage
917      = "DSN syntax is key=value[,key=value...]  Allowable DSN keys:\n\n"
918      . "  KEY  COPY  MEANING\n"
919      . "  ===  ====  =============================================\n";
920   my %opts = %{$self->{opts}};
921   foreach my $key ( sort keys %opts ) {
922      $usage .= "  $key    "
923             .  ($opts{$key}->{copy} ? 'yes   ' : 'no    ')
924             .  ($opts{$key}->{desc} || '[No description]')
925             . "\n";
926   }
927   $usage .= "\n  If the DSN is a bareword, the word is treated as the 'h' key.\n";
928   return $usage;
929}
930
931sub get_cxn_params {
932   my ( $self, $info ) = @_;
933   my $dsn;
934   my %opts = %{$self->{opts}};
935   my $driver = $self->prop('dbidriver') || '';
936   if ( $driver eq 'Pg' ) {
937      $dsn = 'DBI:Pg:dbname=' . ( $info->{D} || '' ) . ';'
938         . join(';', map  { "$opts{$_}->{dsn}=$info->{$_}" }
939                     grep { defined $info->{$_} }
940                     qw(h P));
941   }
942   else {
943      $dsn = 'DBI:mysql:' . ( $info->{D} || '' ) . ';'
944         . join(';', map  { "$opts{$_}->{dsn}=$info->{$_}" }
945                     grep { defined $info->{$_} }
946                     qw(F h P S A))
947         . ';mysql_read_default_group=client'
948         . ($info->{L} ? ';mysql_local_infile=1' : '');
949   }
950   PTDEBUG && _d($dsn);
951   return ($dsn, $info->{u}, $info->{p});
952}
953
954sub fill_in_dsn {
955   my ( $self, $dbh, $dsn ) = @_;
956   my $vars = $dbh->selectall_hashref('SHOW VARIABLES', 'Variable_name');
957   my ($user, $db) = $dbh->selectrow_array('SELECT USER(), DATABASE()');
958   $user =~ s/@.*//;
959   $dsn->{h} ||= $vars->{hostname}->{Value};
960   $dsn->{S} ||= $vars->{'socket'}->{Value};
961   $dsn->{P} ||= $vars->{port}->{Value};
962   $dsn->{u} ||= $user;
963   $dsn->{D} ||= $db;
964}
965
966sub get_dbh {
967   my ( $self, $cxn_string, $user, $pass, $opts ) = @_;
968   $opts ||= {};
969   my $defaults = {
970      AutoCommit         => 0,
971      RaiseError         => 1,
972      PrintError         => 0,
973      ShowErrorStatement => 1,
974      mysql_enable_utf8 => ($cxn_string =~ m/charset=utf8/i ? 1 : 0),
975   };
976   @{$defaults}{ keys %$opts } = values %$opts;
977   if (delete $defaults->{L}) { # L for LOAD DATA LOCAL INFILE, our own extension
978      $defaults->{mysql_local_infile} = 1;
979   }
980
981   if ( $opts->{mysql_use_result} ) {
982      $defaults->{mysql_use_result} = 1;
983   }
984
985   if ( !$have_dbi ) {
986      die "Cannot connect to MySQL because the Perl DBI module is not "
987         . "installed or not found.  Run 'perl -MDBI' to see the directories "
988         . "that Perl searches for DBI.  If DBI is not installed, try:\n"
989         . "  Debian/Ubuntu  apt-get install libdbi-perl\n"
990         . "  RHEL/CentOS    yum install perl-DBI\n"
991         . "  OpenSolaris    pkg install pkg:/SUNWpmdbi\n";
992
993   }
994
995   my $dbh;
996   my $tries = 2;
997   while ( !$dbh && $tries-- ) {
998      PTDEBUG && _d($cxn_string, ' ', $user, ' ', $pass,
999         join(', ', map { "$_=>$defaults->{$_}" } keys %$defaults ));
1000
1001      $dbh = eval { DBI->connect($cxn_string, $user, $pass, $defaults) };
1002
1003      if ( !$dbh && $EVAL_ERROR ) {
1004         if ( $EVAL_ERROR =~ m/locate DBD\/mysql/i ) {
1005            die "Cannot connect to MySQL because the Perl DBD::mysql module is "
1006               . "not installed or not found.  Run 'perl -MDBD::mysql' to see "
1007               . "the directories that Perl searches for DBD::mysql.  If "
1008               . "DBD::mysql is not installed, try:\n"
1009               . "  Debian/Ubuntu  apt-get install libdbd-mysql-perl\n"
1010               . "  RHEL/CentOS    yum install perl-DBD-MySQL\n"
1011               . "  OpenSolaris    pgk install pkg:/SUNWapu13dbd-mysql\n";
1012         }
1013         elsif ( $EVAL_ERROR =~ m/not a compiled character set|character set utf8/ ) {
1014            PTDEBUG && _d('Going to try again without utf8 support');
1015            delete $defaults->{mysql_enable_utf8};
1016         }
1017         if ( !$tries ) {
1018            die $EVAL_ERROR;
1019         }
1020      }
1021   }
1022
1023   if ( $cxn_string =~ m/mysql/i ) {
1024      my $sql;
1025
1026      if ( my ($charset) = $cxn_string =~ m/charset=([\w]+)/ ) {
1027         $sql = qq{/*!40101 SET NAMES "$charset"*/};
1028         PTDEBUG && _d($dbh, $sql);
1029         eval { $dbh->do($sql) };
1030         if ( $EVAL_ERROR ) {
1031            die "Error setting NAMES to $charset: $EVAL_ERROR";
1032         }
1033         PTDEBUG && _d('Enabling charset for STDOUT');
1034         if ( $charset eq 'utf8' ) {
1035            binmode(STDOUT, ':utf8')
1036               or die "Can't binmode(STDOUT, ':utf8'): $OS_ERROR";
1037         }
1038         else {
1039            binmode(STDOUT) or die "Can't binmode(STDOUT): $OS_ERROR";
1040         }
1041      }
1042
1043      if ( my $vars = $self->prop('set-vars') ) {
1044         $self->set_vars($dbh, $vars);
1045      }
1046
1047      $sql = 'SELECT @@SQL_MODE';
1048      PTDEBUG && _d($dbh, $sql);
1049      my ($sql_mode) = eval { $dbh->selectrow_array($sql) };
1050      if ( $EVAL_ERROR ) {
1051         die "Error getting the current SQL_MODE: $EVAL_ERROR";
1052      }
1053
1054      $sql = 'SET @@SQL_QUOTE_SHOW_CREATE = 1'
1055            . '/*!40101, @@SQL_MODE=\'NO_AUTO_VALUE_ON_ZERO'
1056            . ($sql_mode ? ",$sql_mode" : '')
1057            . '\'*/';
1058      PTDEBUG && _d($dbh, $sql);
1059      eval { $dbh->do($sql) };
1060      if ( $EVAL_ERROR ) {
1061         die "Error setting SQL_QUOTE_SHOW_CREATE, SQL_MODE"
1062           . ($sql_mode ? " and $sql_mode" : '')
1063           . ": $EVAL_ERROR";
1064      }
1065   }
1066   my ($mysql_version) = eval { $dbh->selectrow_array('SELECT VERSION()') };
1067   if ($EVAL_ERROR) {
1068       die "Cannot get MySQL version: $EVAL_ERROR";
1069   }
1070
1071   my (undef, $character_set_server) = eval { $dbh->selectrow_array("SHOW VARIABLES LIKE 'character_set_server'") };
1072   if ($EVAL_ERROR) {
1073       die "Cannot get MySQL var character_set_server: $EVAL_ERROR";
1074   }
1075
1076   if ($mysql_version =~ m/^(\d+)\.(\d)\.(\d+).*/) {
1077       if ($1 >= 8 && $character_set_server =~ m/^utf8/) {
1078           $dbh->{mysql_enable_utf8} = 1;
1079           my $msg = "MySQL version $mysql_version >= 8 and character_set_server = $character_set_server\n".
1080                     "Setting: SET NAMES $character_set_server";
1081           PTDEBUG && _d($msg);
1082           eval { $dbh->do("SET NAMES 'utf8mb4'") };
1083           if ($EVAL_ERROR) {
1084               die "Cannot SET NAMES $character_set_server: $EVAL_ERROR";
1085           }
1086       }
1087   }
1088
1089   PTDEBUG && _d('DBH info: ',
1090      $dbh,
1091      Dumper($dbh->selectrow_hashref(
1092         'SELECT DATABASE(), CONNECTION_ID(), VERSION()/*!50038 , @@hostname*/')),
1093      'Connection info:',      $dbh->{mysql_hostinfo},
1094      'Character set info:',   Dumper($dbh->selectall_arrayref(
1095                     "SHOW VARIABLES LIKE 'character_set%'", { Slice => {}})),
1096      '$DBD::mysql::VERSION:', $DBD::mysql::VERSION,
1097      '$DBI::VERSION:',        $DBI::VERSION,
1098   );
1099
1100   return $dbh;
1101}
1102
1103sub get_hostname {
1104   my ( $self, $dbh ) = @_;
1105   if ( my ($host) = ($dbh->{mysql_hostinfo} || '') =~ m/^(\w+) via/ ) {
1106      return $host;
1107   }
1108   my ( $hostname, $one ) = $dbh->selectrow_array(
1109      'SELECT /*!50038 @@hostname, */ 1');
1110   return $hostname;
1111}
1112
1113sub disconnect {
1114   my ( $self, $dbh ) = @_;
1115   PTDEBUG && $self->print_active_handles($dbh);
1116   $dbh->disconnect;
1117}
1118
1119sub print_active_handles {
1120   my ( $self, $thing, $level ) = @_;
1121   $level ||= 0;
1122   printf("# Active %sh: %s %s %s\n", ($thing->{Type} || 'undef'), "\t" x $level,
1123      $thing, (($thing->{Type} || '') eq 'st' ? $thing->{Statement} || '' : ''))
1124      or die "Cannot print: $OS_ERROR";
1125   foreach my $handle ( grep {defined} @{ $thing->{ChildHandles} } ) {
1126      $self->print_active_handles( $handle, $level + 1 );
1127   }
1128}
1129
1130sub copy {
1131   my ( $self, $dsn_1, $dsn_2, %args ) = @_;
1132   die 'I need a dsn_1 argument' unless $dsn_1;
1133   die 'I need a dsn_2 argument' unless $dsn_2;
1134   my %new_dsn = map {
1135      my $key = $_;
1136      my $val;
1137      if ( $args{overwrite} ) {
1138         $val = defined $dsn_1->{$key} ? $dsn_1->{$key} : $dsn_2->{$key};
1139      }
1140      else {
1141         $val = defined $dsn_2->{$key} ? $dsn_2->{$key} : $dsn_1->{$key};
1142      }
1143      $key => $val;
1144   } keys %{$self->{opts}};
1145   return \%new_dsn;
1146}
1147
1148sub set_vars {
1149   my ($self, $dbh, $vars) = @_;
1150
1151   return unless $vars;
1152
1153   foreach my $var ( sort keys %$vars ) {
1154      my $val = $vars->{$var}->{val};
1155
1156      (my $quoted_var = $var) =~ s/_/\\_/;
1157      my ($var_exists, $current_val);
1158      eval {
1159         ($var_exists, $current_val) = $dbh->selectrow_array(
1160            "SHOW VARIABLES LIKE '$quoted_var'");
1161      };
1162      my $e = $EVAL_ERROR;
1163      if ( $e ) {
1164         PTDEBUG && _d($e);
1165      }
1166
1167      if ( $vars->{$var}->{default} && !$var_exists ) {
1168         PTDEBUG && _d('Not setting default var', $var,
1169            'because it does not exist');
1170         next;
1171      }
1172
1173      if ( $current_val && $current_val eq $val ) {
1174         PTDEBUG && _d('Not setting var', $var, 'because its value',
1175            'is already', $val);
1176         next;
1177      }
1178
1179      my $sql = "SET SESSION $var=$val";
1180      PTDEBUG && _d($dbh, $sql);
1181      eval { $dbh->do($sql) };
1182      if ( my $set_error = $EVAL_ERROR ) {
1183         chomp($set_error);
1184         $set_error =~ s/ at \S+ line \d+//;
1185         my $msg = "Error setting $var: $set_error";
1186         if ( $current_val ) {
1187            $msg .= "  The current value for $var is $current_val.  "
1188                  . "If the variable is read only (not dynamic), specify "
1189                  . "--set-vars $var=$current_val to avoid this warning, "
1190                  . "else manually set the variable and restart MySQL.";
1191         }
1192         warn $msg . "\n\n";
1193      }
1194   }
1195
1196   return;
1197}
1198
1199sub _d {
1200   my ($package, undef, $line) = caller 0;
1201   @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
1202        map { defined $_ ? $_ : 'undef' }
1203        @_;
1204   print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
1205}
1206
12071;
1208}
1209# ###########################################################################
1210# End DSNParser package
1211# ###########################################################################
1212
1213# ###########################################################################
1214# Quoter package
1215# This package is a copy without comments from the original.  The original
1216# with comments and its test file can be found in the Bazaar repository at,
1217#   lib/Quoter.pm
1218#   t/lib/Quoter.t
1219# See https://launchpad.net/percona-toolkit for more information.
1220# ###########################################################################
1221{
1222package Quoter;
1223
1224use strict;
1225use warnings FATAL => 'all';
1226use English qw(-no_match_vars);
1227use constant PTDEBUG => $ENV{PTDEBUG} || 0;
1228
1229use Data::Dumper;
1230$Data::Dumper::Indent    = 1;
1231$Data::Dumper::Sortkeys  = 1;
1232$Data::Dumper::Quotekeys = 0;
1233
1234sub new {
1235   my ( $class, %args ) = @_;
1236   return bless {}, $class;
1237}
1238
1239sub quote {
1240   my ( $self, @vals ) = @_;
1241   foreach my $val ( @vals ) {
1242      $val =~ s/`/``/g;
1243   }
1244   return join('.', map { '`' . $_ . '`' } @vals);
1245}
1246
1247sub quote_val {
1248   my ( $self, $val, %args ) = @_;
1249
1250   return 'NULL' unless defined $val;          # undef = NULL
1251   return "''" if $val eq '';                  # blank string = ''
1252   return $val if $val =~ m/^0x[0-9a-fA-F]+$/  # quote hex data
1253                  && !$args{is_char};          # unless is_char is true
1254
1255   $val =~ s/(['\\])/\\$1/g;
1256   return "'$val'";
1257}
1258
1259sub split_unquote {
1260   my ( $self, $db_tbl, $default_db ) = @_;
1261   my ( $db, $tbl ) = split(/[.]/, $db_tbl);
1262   if ( !$tbl ) {
1263      $tbl = $db;
1264      $db  = $default_db;
1265   }
1266   for ($db, $tbl) {
1267      next unless $_;
1268      s/\A`//;
1269      s/`\z//;
1270      s/``/`/g;
1271   }
1272
1273   return ($db, $tbl);
1274}
1275
1276sub literal_like {
1277   my ( $self, $like ) = @_;
1278   return unless $like;
1279   $like =~ s/([%_])/\\$1/g;
1280   return "'$like'";
1281}
1282
1283sub join_quote {
1284   my ( $self, $default_db, $db_tbl ) = @_;
1285   return unless $db_tbl;
1286   my ($db, $tbl) = split(/[.]/, $db_tbl);
1287   if ( !$tbl ) {
1288      $tbl = $db;
1289      $db  = $default_db;
1290   }
1291   $db  = "`$db`"  if $db  && $db  !~ m/^`/;
1292   $tbl = "`$tbl`" if $tbl && $tbl !~ m/^`/;
1293   return $db ? "$db.$tbl" : $tbl;
1294}
1295
1296sub serialize_list {
1297   my ( $self, @args ) = @_;
1298   PTDEBUG && _d('Serializing', Dumper(\@args));
1299   return unless @args;
1300
1301   my @parts;
1302   foreach my $arg  ( @args ) {
1303      if ( defined $arg ) {
1304         $arg =~ s/,/\\,/g;      # escape commas
1305         $arg =~ s/\\N/\\\\N/g;  # escape literal \N
1306         push @parts, $arg;
1307      }
1308      else {
1309         push @parts, '\N';
1310      }
1311   }
1312
1313   my $string = join(',', @parts);
1314   PTDEBUG && _d('Serialized: <', $string, '>');
1315   return $string;
1316}
1317
1318sub deserialize_list {
1319   my ( $self, $string ) = @_;
1320   PTDEBUG && _d('Deserializing <', $string, '>');
1321   die "Cannot deserialize an undefined string" unless defined $string;
1322
1323   my @parts;
1324   foreach my $arg ( split(/(?<!\\),/, $string) ) {
1325      if ( $arg eq '\N' ) {
1326         $arg = undef;
1327      }
1328      else {
1329         $arg =~ s/\\,/,/g;
1330         $arg =~ s/\\\\N/\\N/g;
1331      }
1332      push @parts, $arg;
1333   }
1334
1335   if ( !@parts ) {
1336      my $n_empty_strings = $string =~ tr/,//;
1337      $n_empty_strings++;
1338      PTDEBUG && _d($n_empty_strings, 'empty strings');
1339      map { push @parts, '' } 1..$n_empty_strings;
1340   }
1341   elsif ( $string =~ m/(?<!\\),$/ ) {
1342      PTDEBUG && _d('Last value is an empty string');
1343      push @parts, '';
1344   }
1345
1346   PTDEBUG && _d('Deserialized', Dumper(\@parts));
1347   return @parts;
1348}
1349
1350sub _d {
1351   my ($package, undef, $line) = caller 0;
1352   @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
1353        map { defined $_ ? $_ : 'undef' }
1354        @_;
1355   print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
1356}
1357
13581;
1359}
1360# ###########################################################################
1361# End Quoter package
1362# ###########################################################################
1363
1364# ###########################################################################
1365# OptionParser package
1366# This package is a copy without comments from the original.  The original
1367# with comments and its test file can be found in the Bazaar repository at,
1368#   lib/OptionParser.pm
1369#   t/lib/OptionParser.t
1370# See https://launchpad.net/percona-toolkit for more information.
1371# ###########################################################################
1372{
1373package OptionParser;
1374
1375use strict;
1376use warnings FATAL => 'all';
1377use English qw(-no_match_vars);
1378use constant PTDEBUG => $ENV{PTDEBUG} || 0;
1379
1380use List::Util qw(max);
1381use Getopt::Long;
1382use Data::Dumper;
1383
1384my $POD_link_re = '[LC]<"?([^">]+)"?>';
1385
1386sub new {
1387   my ( $class, %args ) = @_;
1388   my @required_args = qw();
1389   foreach my $arg ( @required_args ) {
1390      die "I need a $arg argument" unless $args{$arg};
1391   }
1392
1393   my ($program_name) = $PROGRAM_NAME =~ m/([.A-Za-z-]+)$/;
1394   $program_name ||= $PROGRAM_NAME;
1395   my $home = $ENV{HOME} || $ENV{HOMEPATH} || $ENV{USERPROFILE} || '.';
1396
1397   my %attributes = (
1398      'type'       => 1,
1399      'short form' => 1,
1400      'group'      => 1,
1401      'default'    => 1,
1402      'cumulative' => 1,
1403      'negatable'  => 1,
1404      'repeatable' => 1,  # means it can be specified more than once
1405   );
1406
1407   my $self = {
1408      head1             => 'OPTIONS',        # These args are used internally
1409      skip_rules        => 0,                # to instantiate another Option-
1410      item              => '--(.*)',         # Parser obj that parses the
1411      attributes        => \%attributes,     # DSN OPTIONS section.  Tools
1412      parse_attributes  => \&_parse_attribs, # don't tinker with these args.
1413
1414      %args,
1415
1416      strict            => 1,  # disabled by a special rule
1417      program_name      => $program_name,
1418      opts              => {},
1419      got_opts          => 0,
1420      short_opts        => {},
1421      defaults          => {},
1422      groups            => {},
1423      allowed_groups    => {},
1424      errors            => [],
1425      rules             => [],  # desc of rules for --help
1426      mutex             => [],  # rule: opts are mutually exclusive
1427      atleast1          => [],  # rule: at least one opt is required
1428      disables          => {},  # rule: opt disables other opts
1429      defaults_to       => {},  # rule: opt defaults to value of other opt
1430      DSNParser         => undef,
1431      default_files     => [
1432         "/etc/percona-toolkit/percona-toolkit.conf",
1433         "/etc/percona-toolkit/$program_name.conf",
1434         "$home/.percona-toolkit.conf",
1435         "$home/.$program_name.conf",
1436      ],
1437      types             => {
1438         string => 's', # standard Getopt type
1439         int    => 'i', # standard Getopt type
1440         float  => 'f', # standard Getopt type
1441         Hash   => 'H', # hash, formed from a comma-separated list
1442         hash   => 'h', # hash as above, but only if a value is given
1443         Array  => 'A', # array, similar to Hash
1444         array  => 'a', # array, similar to hash
1445         DSN    => 'd', # DSN
1446         size   => 'z', # size with kMG suffix (powers of 2^10)
1447         time   => 'm', # time, with an optional suffix of s/h/m/d
1448      },
1449   };
1450
1451   return bless $self, $class;
1452}
1453
1454sub get_specs {
1455   my ( $self, $file ) = @_;
1456   $file ||= $self->{file} || __FILE__;
1457   my @specs = $self->_pod_to_specs($file);
1458   $self->_parse_specs(@specs);
1459
1460   open my $fh, "<", $file or die "Cannot open $file: $OS_ERROR";
1461   my $contents = do { local $/ = undef; <$fh> };
1462   close $fh;
1463   if ( $contents =~ m/^=head1 DSN OPTIONS/m ) {
1464      PTDEBUG && _d('Parsing DSN OPTIONS');
1465      my $dsn_attribs = {
1466         dsn  => 1,
1467         copy => 1,
1468      };
1469      my $parse_dsn_attribs = sub {
1470         my ( $self, $option, $attribs ) = @_;
1471         map {
1472            my $val = $attribs->{$_};
1473            if ( $val ) {
1474               $val    = $val eq 'yes' ? 1
1475                       : $val eq 'no'  ? 0
1476                       :                 $val;
1477               $attribs->{$_} = $val;
1478            }
1479         } keys %$attribs;
1480         return {
1481            key => $option,
1482            %$attribs,
1483         };
1484      };
1485      my $dsn_o = new OptionParser(
1486         description       => 'DSN OPTIONS',
1487         head1             => 'DSN OPTIONS',
1488         dsn               => 0,         # XXX don't infinitely recurse!
1489         item              => '\* (.)',  # key opts are a single character
1490         skip_rules        => 1,         # no rules before opts
1491         attributes        => $dsn_attribs,
1492         parse_attributes  => $parse_dsn_attribs,
1493      );
1494      my @dsn_opts = map {
1495         my $opts = {
1496            key  => $_->{spec}->{key},
1497            dsn  => $_->{spec}->{dsn},
1498            copy => $_->{spec}->{copy},
1499            desc => $_->{desc},
1500         };
1501         $opts;
1502      } $dsn_o->_pod_to_specs($file);
1503      $self->{DSNParser} = DSNParser->new(opts => \@dsn_opts);
1504   }
1505
1506   if ( $contents =~ m/^=head1 VERSION\n\n^(.+)$/m ) {
1507      $self->{version} = $1;
1508      PTDEBUG && _d($self->{version});
1509   }
1510
1511   return;
1512}
1513
1514sub DSNParser {
1515   my ( $self ) = @_;
1516   return $self->{DSNParser};
1517};
1518
1519sub get_defaults_files {
1520   my ( $self ) = @_;
1521   return @{$self->{default_files}};
1522}
1523
1524sub _pod_to_specs {
1525   my ( $self, $file ) = @_;
1526   $file ||= $self->{file} || __FILE__;
1527   open my $fh, '<', $file or die "Cannot open $file: $OS_ERROR";
1528
1529   my @specs = ();
1530   my @rules = ();
1531   my $para;
1532
1533   local $INPUT_RECORD_SEPARATOR = '';
1534   while ( $para = <$fh> ) {
1535      next unless $para =~ m/^=head1 $self->{head1}/;
1536      last;
1537   }
1538
1539   while ( $para = <$fh> ) {
1540      last if $para =~ m/^=over/;
1541      next if $self->{skip_rules};
1542      chomp $para;
1543      $para =~ s/\s+/ /g;
1544      $para =~ s/$POD_link_re/$1/go;
1545      PTDEBUG && _d('Option rule:', $para);
1546      push @rules, $para;
1547   }
1548
1549   die "POD has no $self->{head1} section" unless $para;
1550
1551   do {
1552      if ( my ($option) = $para =~ m/^=item $self->{item}/ ) {
1553         chomp $para;
1554         PTDEBUG && _d($para);
1555         my %attribs;
1556
1557         $para = <$fh>; # read next paragraph, possibly attributes
1558
1559         if ( $para =~ m/: / ) { # attributes
1560            $para =~ s/\s+\Z//g;
1561            %attribs = map {
1562                  my ( $attrib, $val) = split(/: /, $_);
1563                  die "Unrecognized attribute for --$option: $attrib"
1564                     unless $self->{attributes}->{$attrib};
1565                  ($attrib, $val);
1566               } split(/; /, $para);
1567            if ( $attribs{'short form'} ) {
1568               $attribs{'short form'} =~ s/-//;
1569            }
1570            $para = <$fh>; # read next paragraph, probably short help desc
1571         }
1572         else {
1573            PTDEBUG && _d('Option has no attributes');
1574         }
1575
1576         $para =~ s/\s+\Z//g;
1577         $para =~ s/\s+/ /g;
1578         $para =~ s/$POD_link_re/$1/go;
1579
1580         $para =~ s/\.(?:\n.*| [A-Z].*|\Z)//s;
1581         PTDEBUG && _d('Short help:', $para);
1582
1583         die "No description after option spec $option" if $para =~ m/^=item/;
1584
1585         if ( my ($base_option) =  $option =~ m/^\[no\](.*)/ ) {
1586            $option = $base_option;
1587            $attribs{'negatable'} = 1;
1588         }
1589
1590         push @specs, {
1591            spec  => $self->{parse_attributes}->($self, $option, \%attribs),
1592            desc  => $para
1593               . (defined $attribs{default} ? " (default $attribs{default})" : ''),
1594            group => ($attribs{'group'} ? $attribs{'group'} : 'default'),
1595            attributes => \%attribs
1596         };
1597      }
1598      while ( $para = <$fh> ) {
1599         last unless $para;
1600         if ( $para =~ m/^=head1/ ) {
1601            $para = undef; # Can't 'last' out of a do {} block.
1602            last;
1603         }
1604         last if $para =~ m/^=item /;
1605      }
1606   } while ( $para );
1607
1608   die "No valid specs in $self->{head1}" unless @specs;
1609
1610   close $fh;
1611   return @specs, @rules;
1612}
1613
1614sub _parse_specs {
1615   my ( $self, @specs ) = @_;
1616   my %disables; # special rule that requires deferred checking
1617
1618   foreach my $opt ( @specs ) {
1619      if ( ref $opt ) { # It's an option spec, not a rule.
1620         PTDEBUG && _d('Parsing opt spec:',
1621            map { ($_, '=>', $opt->{$_}) } keys %$opt);
1622
1623         my ( $long, $short ) = $opt->{spec} =~ m/^([\w-]+)(?:\|([^!+=]*))?/;
1624         if ( !$long ) {
1625            die "Cannot parse long option from spec $opt->{spec}";
1626         }
1627         $opt->{long} = $long;
1628
1629         die "Duplicate long option --$long" if exists $self->{opts}->{$long};
1630         $self->{opts}->{$long} = $opt;
1631
1632         if ( length $long == 1 ) {
1633            PTDEBUG && _d('Long opt', $long, 'looks like short opt');
1634            $self->{short_opts}->{$long} = $long;
1635         }
1636
1637         if ( $short ) {
1638            die "Duplicate short option -$short"
1639               if exists $self->{short_opts}->{$short};
1640            $self->{short_opts}->{$short} = $long;
1641            $opt->{short} = $short;
1642         }
1643         else {
1644            $opt->{short} = undef;
1645         }
1646
1647         $opt->{is_negatable}  = $opt->{spec} =~ m/!/        ? 1 : 0;
1648         $opt->{is_cumulative} = $opt->{spec} =~ m/\+/       ? 1 : 0;
1649         $opt->{is_repeatable} = $opt->{attributes}->{repeatable} ? 1 : 0;
1650         $opt->{is_required}   = $opt->{desc} =~ m/required/ ? 1 : 0;
1651
1652         $opt->{group} ||= 'default';
1653         $self->{groups}->{ $opt->{group} }->{$long} = 1;
1654
1655         $opt->{value} = undef;
1656         $opt->{got}   = 0;
1657
1658         my ( $type ) = $opt->{spec} =~ m/=(.)/;
1659         $opt->{type} = $type;
1660         PTDEBUG && _d($long, 'type:', $type);
1661
1662
1663         $opt->{spec} =~ s/=./=s/ if ( $type && $type =~ m/[HhAadzm]/ );
1664
1665         if ( (my ($def) = $opt->{desc} =~ m/default\b(?: ([^)]+))?/) ) {
1666            $self->{defaults}->{$long} = defined $def ? $def : 1;
1667            PTDEBUG && _d($long, 'default:', $def);
1668         }
1669
1670         if ( $long eq 'config' ) {
1671            $self->{defaults}->{$long} = join(',', $self->get_defaults_files());
1672         }
1673
1674         if ( (my ($dis) = $opt->{desc} =~ m/(disables .*)/) ) {
1675            $disables{$long} = $dis;
1676            PTDEBUG && _d('Deferring check of disables rule for', $opt, $dis);
1677         }
1678
1679         $self->{opts}->{$long} = $opt;
1680      }
1681      else { # It's an option rule, not a spec.
1682         PTDEBUG && _d('Parsing rule:', $opt);
1683         push @{$self->{rules}}, $opt;
1684         my @participants = $self->_get_participants($opt);
1685         my $rule_ok = 0;
1686
1687         if ( $opt =~ m/mutually exclusive|one and only one/ ) {
1688            $rule_ok = 1;
1689            push @{$self->{mutex}}, \@participants;
1690            PTDEBUG && _d(@participants, 'are mutually exclusive');
1691         }
1692         if ( $opt =~ m/at least one|one and only one/ ) {
1693            $rule_ok = 1;
1694            push @{$self->{atleast1}}, \@participants;
1695            PTDEBUG && _d(@participants, 'require at least one');
1696         }
1697         if ( $opt =~ m/default to/ ) {
1698            $rule_ok = 1;
1699            $self->{defaults_to}->{$participants[0]} = $participants[1];
1700            PTDEBUG && _d($participants[0], 'defaults to', $participants[1]);
1701         }
1702         if ( $opt =~ m/restricted to option groups/ ) {
1703            $rule_ok = 1;
1704            my ($groups) = $opt =~ m/groups ([\w\s\,]+)/;
1705            my @groups = split(',', $groups);
1706            %{$self->{allowed_groups}->{$participants[0]}} = map {
1707               s/\s+//;
1708               $_ => 1;
1709            } @groups;
1710         }
1711         if( $opt =~ m/accepts additional command-line arguments/ ) {
1712            $rule_ok = 1;
1713            $self->{strict} = 0;
1714            PTDEBUG && _d("Strict mode disabled by rule");
1715         }
1716
1717         die "Unrecognized option rule: $opt" unless $rule_ok;
1718      }
1719   }
1720
1721   foreach my $long ( keys %disables ) {
1722      my @participants = $self->_get_participants($disables{$long});
1723      $self->{disables}->{$long} = \@participants;
1724      PTDEBUG && _d('Option', $long, 'disables', @participants);
1725   }
1726
1727   return;
1728}
1729
1730sub _get_participants {
1731   my ( $self, $str ) = @_;
1732   my @participants;
1733   foreach my $long ( $str =~ m/--(?:\[no\])?([\w-]+)/g ) {
1734      die "Option --$long does not exist while processing rule $str"
1735         unless exists $self->{opts}->{$long};
1736      push @participants, $long;
1737   }
1738   PTDEBUG && _d('Participants for', $str, ':', @participants);
1739   return @participants;
1740}
1741
1742sub opts {
1743   my ( $self ) = @_;
1744   my %opts = %{$self->{opts}};
1745   return %opts;
1746}
1747
1748sub short_opts {
1749   my ( $self ) = @_;
1750   my %short_opts = %{$self->{short_opts}};
1751   return %short_opts;
1752}
1753
1754sub set_defaults {
1755   my ( $self, %defaults ) = @_;
1756   $self->{defaults} = {};
1757   foreach my $long ( keys %defaults ) {
1758      die "Cannot set default for nonexistent option $long"
1759         unless exists $self->{opts}->{$long};
1760      $self->{defaults}->{$long} = $defaults{$long};
1761      PTDEBUG && _d('Default val for', $long, ':', $defaults{$long});
1762   }
1763   return;
1764}
1765
1766sub get_defaults {
1767   my ( $self ) = @_;
1768   return $self->{defaults};
1769}
1770
1771sub get_groups {
1772   my ( $self ) = @_;
1773   return $self->{groups};
1774}
1775
1776sub _set_option {
1777   my ( $self, $opt, $val ) = @_;
1778   my $long = exists $self->{opts}->{$opt}       ? $opt
1779            : exists $self->{short_opts}->{$opt} ? $self->{short_opts}->{$opt}
1780            : die "Getopt::Long gave a nonexistent option: $opt";
1781   $opt = $self->{opts}->{$long};
1782   if ( $opt->{is_cumulative} ) {
1783      $opt->{value}++;
1784   }
1785   elsif ( ($opt->{type} || '') eq 's' && $val =~ m/^--?(.+)/ ) {
1786      my $next_opt = $1;
1787      if (    exists $self->{opts}->{$next_opt}
1788           || exists $self->{short_opts}->{$next_opt} ) {
1789         $self->save_error("--$long requires a string value");
1790         return;
1791      }
1792      else {
1793         if ($opt->{is_repeatable}) {
1794            push @{$opt->{value}} , $val;
1795         }
1796         else {
1797            $opt->{value} = $val;
1798         }
1799      }
1800   }
1801   else {
1802      if ($opt->{is_repeatable}) {
1803         push @{$opt->{value}} , $val;
1804      }
1805      else {
1806         $opt->{value} = $val;
1807      }
1808   }
1809   $opt->{got} = 1;
1810   PTDEBUG && _d('Got option', $long, '=', $val);
1811}
1812
1813sub get_opts {
1814   my ( $self ) = @_;
1815
1816   foreach my $long ( keys %{$self->{opts}} ) {
1817      $self->{opts}->{$long}->{got} = 0;
1818      $self->{opts}->{$long}->{value}
1819         = exists $self->{defaults}->{$long}       ? $self->{defaults}->{$long}
1820         : $self->{opts}->{$long}->{is_cumulative} ? 0
1821         : undef;
1822   }
1823   $self->{got_opts} = 0;
1824
1825   $self->{errors} = [];
1826
1827   if ( @ARGV && $ARGV[0] =~/^--config=/ ) {
1828      $ARGV[0] = substr($ARGV[0],9);
1829      $ARGV[0] =~ s/^'(.*)'$/$1/;
1830      $ARGV[0] =~ s/^"(.*)"$/$1/;
1831      $self->_set_option('config', shift @ARGV);
1832   }
1833   if ( @ARGV && $ARGV[0] eq "--config" ) {
1834      shift @ARGV;
1835      $self->_set_option('config', shift @ARGV);
1836   }
1837   if ( $self->has('config') ) {
1838      my @extra_args;
1839      foreach my $filename ( split(',', $self->get('config')) ) {
1840         eval {
1841            push @extra_args, $self->_read_config_file($filename);
1842         };
1843         if ( $EVAL_ERROR ) {
1844            if ( $self->got('config') ) {
1845               die $EVAL_ERROR;
1846            }
1847            elsif ( PTDEBUG ) {
1848               _d($EVAL_ERROR);
1849            }
1850         }
1851      }
1852      unshift @ARGV, @extra_args;
1853   }
1854
1855   Getopt::Long::Configure('no_ignore_case', 'bundling');
1856   GetOptions(
1857      map    { $_->{spec} => sub { $self->_set_option(@_); } }
1858      grep   { $_->{long} ne 'config' } # --config is handled specially above.
1859      values %{$self->{opts}}
1860   ) or $self->save_error('Error parsing options');
1861
1862   if ( exists $self->{opts}->{version} && $self->{opts}->{version}->{got} ) {
1863      if ( $self->{version} ) {
1864         print $self->{version}, "\n";
1865         exit 0;
1866      }
1867      else {
1868         print "Error parsing version.  See the VERSION section of the tool's documentation.\n";
1869         exit 1;
1870      }
1871   }
1872
1873   if ( @ARGV && $self->{strict} ) {
1874      $self->save_error("Unrecognized command-line options @ARGV");
1875   }
1876
1877   foreach my $mutex ( @{$self->{mutex}} ) {
1878      my @set = grep { $self->{opts}->{$_}->{got} } @$mutex;
1879      if ( @set > 1 ) {
1880         my $err = join(', ', map { "--$self->{opts}->{$_}->{long}" }
1881                      @{$mutex}[ 0 .. scalar(@$mutex) - 2] )
1882                 . ' and --'.$self->{opts}->{$mutex->[-1]}->{long}
1883                 . ' are mutually exclusive.';
1884         $self->save_error($err);
1885      }
1886   }
1887
1888   foreach my $required ( @{$self->{atleast1}} ) {
1889      my @set = grep { $self->{opts}->{$_}->{got} } @$required;
1890      if ( @set == 0 ) {
1891         my $err = join(', ', map { "--$self->{opts}->{$_}->{long}" }
1892                      @{$required}[ 0 .. scalar(@$required) - 2] )
1893                 .' or --'.$self->{opts}->{$required->[-1]}->{long};
1894         $self->save_error("Specify at least one of $err");
1895      }
1896   }
1897
1898   $self->_check_opts( keys %{$self->{opts}} );
1899   $self->{got_opts} = 1;
1900   return;
1901}
1902
1903sub _check_opts {
1904   my ( $self, @long ) = @_;
1905   my $long_last = scalar @long;
1906   while ( @long ) {
1907      foreach my $i ( 0..$#long ) {
1908         my $long = $long[$i];
1909         next unless $long;
1910         my $opt  = $self->{opts}->{$long};
1911         if ( $opt->{got} ) {
1912            if ( exists $self->{disables}->{$long} ) {
1913               my @disable_opts = @{$self->{disables}->{$long}};
1914               map { $self->{opts}->{$_}->{value} = undef; } @disable_opts;
1915               PTDEBUG && _d('Unset options', @disable_opts,
1916                  'because', $long,'disables them');
1917            }
1918
1919            if ( exists $self->{allowed_groups}->{$long} ) {
1920
1921               my @restricted_groups = grep {
1922                  !exists $self->{allowed_groups}->{$long}->{$_}
1923               } keys %{$self->{groups}};
1924
1925               my @restricted_opts;
1926               foreach my $restricted_group ( @restricted_groups ) {
1927                  RESTRICTED_OPT:
1928                  foreach my $restricted_opt (
1929                     keys %{$self->{groups}->{$restricted_group}} )
1930                  {
1931                     next RESTRICTED_OPT if $restricted_opt eq $long;
1932                     push @restricted_opts, $restricted_opt
1933                        if $self->{opts}->{$restricted_opt}->{got};
1934                  }
1935               }
1936
1937               if ( @restricted_opts ) {
1938                  my $err;
1939                  if ( @restricted_opts == 1 ) {
1940                     $err = "--$restricted_opts[0]";
1941                  }
1942                  else {
1943                     $err = join(', ',
1944                               map { "--$self->{opts}->{$_}->{long}" }
1945                               grep { $_ }
1946                               @restricted_opts[0..scalar(@restricted_opts) - 2]
1947                            )
1948                          . ' or --'.$self->{opts}->{$restricted_opts[-1]}->{long};
1949                  }
1950                  $self->save_error("--$long is not allowed with $err");
1951               }
1952            }
1953
1954         }
1955         elsif ( $opt->{is_required} ) {
1956            $self->save_error("Required option --$long must be specified");
1957         }
1958
1959         $self->_validate_type($opt);
1960         if ( $opt->{parsed} ) {
1961            delete $long[$i];
1962         }
1963         else {
1964            PTDEBUG && _d('Temporarily failed to parse', $long);
1965         }
1966      }
1967
1968      die "Failed to parse options, possibly due to circular dependencies"
1969         if @long == $long_last;
1970      $long_last = @long;
1971   }
1972
1973   return;
1974}
1975
1976sub _validate_type {
1977   my ( $self, $opt ) = @_;
1978   return unless $opt;
1979
1980   if ( !$opt->{type} ) {
1981      $opt->{parsed} = 1;
1982      return;
1983   }
1984
1985   my $val = $opt->{value};
1986
1987   if ( $val && $opt->{type} eq 'm' ) {  # type time
1988      PTDEBUG && _d('Parsing option', $opt->{long}, 'as a time value');
1989      my ( $prefix, $num, $suffix ) = $val =~ m/([+-]?)(\d+)([a-z])?$/;
1990      if ( !$suffix ) {
1991         my ( $s ) = $opt->{desc} =~ m/\(suffix (.)\)/;
1992         $suffix = $s || 's';
1993         PTDEBUG && _d('No suffix given; using', $suffix, 'for',
1994            $opt->{long}, '(value:', $val, ')');
1995      }
1996      if ( $suffix =~ m/[smhd]/ ) {
1997         $val = $suffix eq 's' ? $num            # Seconds
1998              : $suffix eq 'm' ? $num * 60       # Minutes
1999              : $suffix eq 'h' ? $num * 3600     # Hours
2000              :                  $num * 86400;   # Days
2001         $opt->{value} = ($prefix || '') . $val;
2002         PTDEBUG && _d('Setting option', $opt->{long}, 'to', $val);
2003      }
2004      else {
2005         $self->save_error("Invalid time suffix for --$opt->{long}");
2006      }
2007   }
2008   elsif ( $val && $opt->{type} eq 'd' ) {  # type DSN
2009      PTDEBUG && _d('Parsing option', $opt->{long}, 'as a DSN');
2010      my $prev = {};
2011      my $from_key = $self->{defaults_to}->{ $opt->{long} };
2012      if ( $from_key ) {
2013         PTDEBUG && _d($opt->{long}, 'DSN copies from', $from_key, 'DSN');
2014         if ( $self->{opts}->{$from_key}->{parsed} ) {
2015            $prev = $self->{opts}->{$from_key}->{value};
2016         }
2017         else {
2018            PTDEBUG && _d('Cannot parse', $opt->{long}, 'until',
2019               $from_key, 'parsed');
2020            return;
2021         }
2022      }
2023      my $defaults = $self->{DSNParser}->parse_options($self);
2024      if (!$opt->{attributes}->{repeatable}) {
2025          $opt->{value} = $self->{DSNParser}->parse($val, $prev, $defaults);
2026      } else {
2027          my $values = [];
2028          for my $dsn_string (@$val) {
2029              push @$values, $self->{DSNParser}->parse($dsn_string, $prev, $defaults);
2030          }
2031          $opt->{value} = $values;
2032      }
2033   }
2034   elsif ( $val && $opt->{type} eq 'z' ) {  # type size
2035      PTDEBUG && _d('Parsing option', $opt->{long}, 'as a size value');
2036      $self->_parse_size($opt, $val);
2037   }
2038   elsif ( $opt->{type} eq 'H' || (defined $val && $opt->{type} eq 'h') ) {
2039      $opt->{value} = { map { $_ => 1 } split(/(?<!\\),\s*/, ($val || '')) };
2040   }
2041   elsif ( $opt->{type} eq 'A' || (defined $val && $opt->{type} eq 'a') ) {
2042      $opt->{value} = [ split(/(?<!\\),\s*/, ($val || '')) ];
2043   }
2044   else {
2045      PTDEBUG && _d('Nothing to validate for option',
2046         $opt->{long}, 'type', $opt->{type}, 'value', $val);
2047   }
2048
2049   $opt->{parsed} = 1;
2050   return;
2051}
2052
2053sub get {
2054   my ( $self, $opt ) = @_;
2055   my $long = (length $opt == 1 ? $self->{short_opts}->{$opt} : $opt);
2056   die "Option $opt does not exist"
2057      unless $long && exists $self->{opts}->{$long};
2058   return $self->{opts}->{$long}->{value};
2059}
2060
2061sub got {
2062   my ( $self, $opt ) = @_;
2063   my $long = (length $opt == 1 ? $self->{short_opts}->{$opt} : $opt);
2064   die "Option $opt does not exist"
2065      unless $long && exists $self->{opts}->{$long};
2066   return $self->{opts}->{$long}->{got};
2067}
2068
2069sub has {
2070   my ( $self, $opt ) = @_;
2071   my $long = (length $opt == 1 ? $self->{short_opts}->{$opt} : $opt);
2072   return defined $long ? exists $self->{opts}->{$long} : 0;
2073}
2074
2075sub set {
2076   my ( $self, $opt, $val ) = @_;
2077   my $long = (length $opt == 1 ? $self->{short_opts}->{$opt} : $opt);
2078   die "Option $opt does not exist"
2079      unless $long && exists $self->{opts}->{$long};
2080   $self->{opts}->{$long}->{value} = $val;
2081   return;
2082}
2083
2084sub save_error {
2085   my ( $self, $error ) = @_;
2086   push @{$self->{errors}}, $error;
2087   return;
2088}
2089
2090sub errors {
2091   my ( $self ) = @_;
2092   return $self->{errors};
2093}
2094
2095sub usage {
2096   my ( $self ) = @_;
2097   warn "No usage string is set" unless $self->{usage}; # XXX
2098   return "Usage: " . ($self->{usage} || '') . "\n";
2099}
2100
2101sub descr {
2102   my ( $self ) = @_;
2103   warn "No description string is set" unless $self->{description}; # XXX
2104   my $descr  = ($self->{description} || $self->{program_name} || '')
2105              . "  For more details, please use the --help option, "
2106              . "or try 'perldoc $PROGRAM_NAME' "
2107              . "for complete documentation.";
2108   $descr = join("\n", $descr =~ m/(.{0,80})(?:\s+|$)/g)
2109      unless $ENV{DONT_BREAK_LINES};
2110   $descr =~ s/ +$//mg;
2111   return $descr;
2112}
2113
2114sub usage_or_errors {
2115   my ( $self, $file, $return ) = @_;
2116   $file ||= $self->{file} || __FILE__;
2117
2118   if ( !$self->{description} || !$self->{usage} ) {
2119      PTDEBUG && _d("Getting description and usage from SYNOPSIS in", $file);
2120      my %synop = $self->_parse_synopsis($file);
2121      $self->{description} ||= $synop{description};
2122      $self->{usage}       ||= $synop{usage};
2123      PTDEBUG && _d("Description:", $self->{description},
2124         "\nUsage:", $self->{usage});
2125   }
2126
2127   if ( $self->{opts}->{help}->{got} ) {
2128      print $self->print_usage() or die "Cannot print usage: $OS_ERROR";
2129      exit 0 unless $return;
2130   }
2131   elsif ( scalar @{$self->{errors}} ) {
2132      print $self->print_errors() or die "Cannot print errors: $OS_ERROR";
2133      exit 1 unless $return;
2134   }
2135
2136   return;
2137}
2138
2139sub print_errors {
2140   my ( $self ) = @_;
2141   my $usage = $self->usage() . "\n";
2142   if ( (my @errors = @{$self->{errors}}) ) {
2143      $usage .= join("\n  * ", 'Errors in command-line arguments:', @errors)
2144              . "\n";
2145   }
2146   return $usage . "\n" . $self->descr();
2147}
2148
2149sub print_usage {
2150   my ( $self ) = @_;
2151   die "Run get_opts() before print_usage()" unless $self->{got_opts};
2152   my @opts = values %{$self->{opts}};
2153
2154   my $maxl = max(
2155      map {
2156         length($_->{long})               # option long name
2157         + ($_->{is_negatable} ? 4 : 0)   # "[no]" if opt is negatable
2158         + ($_->{type} ? 2 : 0)           # "=x" where x is the opt type
2159      }
2160      @opts);
2161
2162   my $maxs = max(0,
2163      map {
2164         length($_)
2165         + ($self->{opts}->{$_}->{is_negatable} ? 4 : 0)
2166         + ($self->{opts}->{$_}->{type} ? 2 : 0)
2167      }
2168      values %{$self->{short_opts}});
2169
2170   my $lcol = max($maxl, ($maxs + 3));
2171   my $rcol = 80 - $lcol - 6;
2172   my $rpad = ' ' x ( 80 - $rcol );
2173
2174   $maxs = max($lcol - 3, $maxs);
2175
2176   my $usage = $self->descr() . "\n" . $self->usage();
2177
2178   my @groups = reverse sort grep { $_ ne 'default'; } keys %{$self->{groups}};
2179   push @groups, 'default';
2180
2181   foreach my $group ( reverse @groups ) {
2182      $usage .= "\n".($group eq 'default' ? 'Options' : $group).":\n\n";
2183      foreach my $opt (
2184         sort { $a->{long} cmp $b->{long} }
2185         grep { $_->{group} eq $group }
2186         @opts )
2187      {
2188         my $long  = $opt->{is_negatable} ? "[no]$opt->{long}" : $opt->{long};
2189         my $short = $opt->{short};
2190         my $desc  = $opt->{desc};
2191
2192         $long .= $opt->{type} ? "=$opt->{type}" : "";
2193
2194         if ( $opt->{type} && $opt->{type} eq 'm' ) {
2195            my ($s) = $desc =~ m/\(suffix (.)\)/;
2196            $s    ||= 's';
2197            $desc =~ s/\s+\(suffix .\)//;
2198            $desc .= ".  Optional suffix s=seconds, m=minutes, h=hours, "
2199                   . "d=days; if no suffix, $s is used.";
2200         }
2201         $desc = join("\n$rpad", grep { $_ } $desc =~ m/(.{0,$rcol}(?!\W))(?:\s+|(?<=\W)|$)/g);
2202         $desc =~ s/ +$//mg;
2203         if ( $short ) {
2204            $usage .= sprintf("  --%-${maxs}s -%s  %s\n", $long, $short, $desc);
2205         }
2206         else {
2207            $usage .= sprintf("  --%-${lcol}s  %s\n", $long, $desc);
2208         }
2209      }
2210   }
2211
2212   $usage .= "\nOption types: s=string, i=integer, f=float, h/H/a/A=comma-separated list, d=DSN, z=size, m=time\n";
2213
2214   if ( (my @rules = @{$self->{rules}}) ) {
2215      $usage .= "\nRules:\n\n";
2216      $usage .= join("\n", map { "  $_" } @rules) . "\n";
2217   }
2218   if ( $self->{DSNParser} ) {
2219      $usage .= "\n" . $self->{DSNParser}->usage();
2220   }
2221   $usage .= "\nOptions and values after processing arguments:\n\n";
2222   foreach my $opt ( sort { $a->{long} cmp $b->{long} } @opts ) {
2223      my $val   = $opt->{value};
2224      my $type  = $opt->{type} || '';
2225      my $bool  = $opt->{spec} =~ m/^[\w-]+(?:\|[\w-])?!?$/;
2226      $val      = $bool              ? ( $val ? 'TRUE' : 'FALSE' )
2227                : !defined $val      ? '(No value)'
2228                : $type eq 'd'       ? $self->{DSNParser}->as_string($val)
2229                : $type =~ m/H|h/    ? join(',', sort keys %$val)
2230                : $type =~ m/A|a/    ? join(',', @$val)
2231                :                    $val;
2232      $usage .= sprintf("  --%-${lcol}s  %s\n", $opt->{long}, $val);
2233   }
2234   return $usage;
2235}
2236
2237sub prompt_noecho {
2238   shift @_ if ref $_[0] eq __PACKAGE__;
2239   my ( $prompt ) = @_;
2240   local $OUTPUT_AUTOFLUSH = 1;
2241   print STDERR $prompt
2242      or die "Cannot print: $OS_ERROR";
2243   my $response;
2244   eval {
2245      require Term::ReadKey;
2246      Term::ReadKey::ReadMode('noecho');
2247      chomp($response = <STDIN>);
2248      Term::ReadKey::ReadMode('normal');
2249      print "\n"
2250         or die "Cannot print: $OS_ERROR";
2251   };
2252   if ( $EVAL_ERROR ) {
2253      die "Cannot read response; is Term::ReadKey installed? $EVAL_ERROR";
2254   }
2255   return $response;
2256}
2257
2258sub _read_config_file {
2259   my ( $self, $filename ) = @_;
2260   open my $fh, "<", $filename or die "Cannot open $filename: $OS_ERROR\n";
2261   my @args;
2262   my $prefix = '--';
2263   my $parse  = 1;
2264
2265   LINE:
2266   while ( my $line = <$fh> ) {
2267      chomp $line;
2268      next LINE if $line =~ m/^\s*(?:\#|\;|$)/;
2269      $line =~ s/\s+#.*$//g;
2270      $line =~ s/^\s+|\s+$//g;
2271      if ( $line eq '--' ) {
2272         $prefix = '';
2273         $parse  = 0;
2274         next LINE;
2275      }
2276
2277      if (  $parse
2278            && !$self->has('version-check')
2279            && $line =~ /version-check/
2280      ) {
2281         next LINE;
2282      }
2283
2284      if ( $parse
2285         && (my($opt, $arg) = $line =~ m/^\s*([^=\s]+?)(?:\s*=\s*(.*?)\s*)?$/)
2286      ) {
2287         push @args, grep { defined $_ } ("$prefix$opt", $arg);
2288      }
2289      elsif ( $line =~ m/./ ) {
2290         push @args, $line;
2291      }
2292      else {
2293         die "Syntax error in file $filename at line $INPUT_LINE_NUMBER";
2294      }
2295   }
2296   close $fh;
2297   return @args;
2298}
2299
2300sub read_para_after {
2301   my ( $self, $file, $regex ) = @_;
2302   open my $fh, "<", $file or die "Can't open $file: $OS_ERROR";
2303   local $INPUT_RECORD_SEPARATOR = '';
2304   my $para;
2305   while ( $para = <$fh> ) {
2306      next unless $para =~ m/^=pod$/m;
2307      last;
2308   }
2309   while ( $para = <$fh> ) {
2310      next unless $para =~ m/$regex/;
2311      last;
2312   }
2313   $para = <$fh>;
2314   chomp($para);
2315   close $fh or die "Can't close $file: $OS_ERROR";
2316   return $para;
2317}
2318
2319sub clone {
2320   my ( $self ) = @_;
2321
2322   my %clone = map {
2323      my $hashref  = $self->{$_};
2324      my $val_copy = {};
2325      foreach my $key ( keys %$hashref ) {
2326         my $ref = ref $hashref->{$key};
2327         $val_copy->{$key} = !$ref           ? $hashref->{$key}
2328                           : $ref eq 'HASH'  ? { %{$hashref->{$key}} }
2329                           : $ref eq 'ARRAY' ? [ @{$hashref->{$key}} ]
2330                           : $hashref->{$key};
2331      }
2332      $_ => $val_copy;
2333   } qw(opts short_opts defaults);
2334
2335   foreach my $scalar ( qw(got_opts) ) {
2336      $clone{$scalar} = $self->{$scalar};
2337   }
2338
2339   return bless \%clone;
2340}
2341
2342sub _parse_size {
2343   my ( $self, $opt, $val ) = @_;
2344
2345   if ( lc($val || '') eq 'null' ) {
2346      PTDEBUG && _d('NULL size for', $opt->{long});
2347      $opt->{value} = 'null';
2348      return;
2349   }
2350
2351   my %factor_for = (k => 1_024, M => 1_048_576, G => 1_073_741_824);
2352   my ($pre, $num, $factor) = $val =~ m/^([+-])?(\d+)([kMG])?$/;
2353   if ( defined $num ) {
2354      if ( $factor ) {
2355         $num *= $factor_for{$factor};
2356         PTDEBUG && _d('Setting option', $opt->{y},
2357            'to num', $num, '* factor', $factor);
2358      }
2359      $opt->{value} = ($pre || '') . $num;
2360   }
2361   else {
2362      $self->save_error("Invalid size for --$opt->{long}: $val");
2363   }
2364   return;
2365}
2366
2367sub _parse_attribs {
2368   my ( $self, $option, $attribs ) = @_;
2369   my $types = $self->{types};
2370   return $option
2371      . ($attribs->{'short form'} ? '|' . $attribs->{'short form'}   : '' )
2372      . ($attribs->{'negatable'}  ? '!'                              : '' )
2373      . ($attribs->{'cumulative'} ? '+'                              : '' )
2374      . ($attribs->{'type'}       ? '=' . $types->{$attribs->{type}} : '' );
2375}
2376
2377sub _parse_synopsis {
2378   my ( $self, $file ) = @_;
2379   $file ||= $self->{file} || __FILE__;
2380   PTDEBUG && _d("Parsing SYNOPSIS in", $file);
2381
2382   local $INPUT_RECORD_SEPARATOR = '';  # read paragraphs
2383   open my $fh, "<", $file or die "Cannot open $file: $OS_ERROR";
2384   my $para;
2385   1 while defined($para = <$fh>) && $para !~ m/^=head1 SYNOPSIS/;
2386   die "$file does not contain a SYNOPSIS section" unless $para;
2387   my @synop;
2388   for ( 1..2 ) {  # 1 for the usage, 2 for the description
2389      my $para = <$fh>;
2390      push @synop, $para;
2391   }
2392   close $fh;
2393   PTDEBUG && _d("Raw SYNOPSIS text:", @synop);
2394   my ($usage, $desc) = @synop;
2395   die "The SYNOPSIS section in $file is not formatted properly"
2396      unless $usage && $desc;
2397
2398   $usage =~ s/^\s*Usage:\s+(.+)/$1/;
2399   chomp $usage;
2400
2401   $desc =~ s/\n/ /g;
2402   $desc =~ s/\s{2,}/ /g;
2403   $desc =~ s/\. ([A-Z][a-z])/.  $1/g;
2404   $desc =~ s/\s+$//;
2405
2406   return (
2407      description => $desc,
2408      usage       => $usage,
2409   );
2410};
2411
2412sub set_vars {
2413   my ($self, $file) = @_;
2414   $file ||= $self->{file} || __FILE__;
2415
2416   my %user_vars;
2417   my $user_vars = $self->has('set-vars') ? $self->get('set-vars') : undef;
2418   if ( $user_vars ) {
2419      foreach my $var_val ( @$user_vars ) {
2420         my ($var, $val) = $var_val =~ m/([^\s=]+)=(\S+)/;
2421         die "Invalid --set-vars value: $var_val\n" unless $var && defined $val;
2422         $user_vars{$var} = {
2423            val     => $val,
2424            default => 0,
2425         };
2426      }
2427   }
2428
2429   my %default_vars;
2430   my $default_vars = $self->read_para_after($file, qr/MAGIC_set_vars/);
2431   if ( $default_vars ) {
2432      %default_vars = map {
2433         my $var_val = $_;
2434         my ($var, $val) = $var_val =~ m/([^\s=]+)=(\S+)/;
2435         die "Invalid --set-vars value: $var_val\n" unless $var && defined $val;
2436         $var => {
2437            val     => $val,
2438            default => 1,
2439         };
2440      } split("\n", $default_vars);
2441   }
2442
2443   my %vars = (
2444      %default_vars, # first the tool's defaults
2445      %user_vars,    # then the user's which overwrite the defaults
2446   );
2447   PTDEBUG && _d('--set-vars:', Dumper(\%vars));
2448   return \%vars;
2449}
2450
2451sub _d {
2452   my ($package, undef, $line) = caller 0;
2453   @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
2454        map { defined $_ ? $_ : 'undef' }
2455        @_;
2456   print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
2457}
2458
2459if ( PTDEBUG ) {
2460   print STDERR '# ', $^X, ' ', $], "\n";
2461   if ( my $uname = `uname -a` ) {
2462      $uname =~ s/\s+/ /g;
2463      print STDERR "# $uname\n";
2464   }
2465   print STDERR '# Arguments: ',
2466      join(' ', map { my $a = "_[$_]_"; $a =~ s/\n/\n# /g; $a; } @ARGV), "\n";
2467}
2468
24691;
2470}
2471# ###########################################################################
2472# End OptionParser package
2473# ###########################################################################
2474
2475# ###########################################################################
2476# Transformers package
2477# This package is a copy without comments from the original.  The original
2478# with comments and its test file can be found in the Bazaar repository at,
2479#   lib/Transformers.pm
2480#   t/lib/Transformers.t
2481# See https://launchpad.net/percona-toolkit for more information.
2482# ###########################################################################
2483{
2484package Transformers;
2485
2486use strict;
2487use warnings FATAL => 'all';
2488use English qw(-no_match_vars);
2489use constant PTDEBUG => $ENV{PTDEBUG} || 0;
2490
2491use Time::Local qw(timegm timelocal);
2492use Digest::MD5 qw(md5_hex);
2493use B qw();
2494
2495BEGIN {
2496   require Exporter;
2497   our @ISA         = qw(Exporter);
2498   our %EXPORT_TAGS = ();
2499   our @EXPORT      = ();
2500   our @EXPORT_OK   = qw(
2501      micro_t
2502      percentage_of
2503      secs_to_time
2504      time_to_secs
2505      shorten
2506      ts
2507      parse_timestamp
2508      unix_timestamp
2509      any_unix_timestamp
2510      make_checksum
2511      crc32
2512      encode_json
2513   );
2514}
2515
2516our $mysql_ts  = qr/(\d\d)(\d\d)(\d\d) +(\d+):(\d+):(\d+)(\.\d+)?/;
2517our $proper_ts = qr/(\d\d\d\d)-(\d\d)-(\d\d)[T ](\d\d):(\d\d):(\d\d)(\.\d+)?/;
2518our $n_ts      = qr/(\d{1,5})([shmd]?)/; # Limit \d{1,5} because \d{6} looks
2519
2520sub micro_t {
2521   my ( $t, %args ) = @_;
2522   my $p_ms = defined $args{p_ms} ? $args{p_ms} : 0;  # precision for ms vals
2523   my $p_s  = defined $args{p_s}  ? $args{p_s}  : 0;  # precision for s vals
2524   my $f;
2525
2526   $t = 0 if $t < 0;
2527
2528   $t = sprintf('%.17f', $t) if $t =~ /e/;
2529
2530   $t =~ s/\.(\d{1,6})\d*/\.$1/;
2531
2532   if ($t > 0 && $t <= 0.000999) {
2533      $f = ($t * 1000000) . 'us';
2534   }
2535   elsif ($t >= 0.001000 && $t <= 0.999999) {
2536      $f = sprintf("%.${p_ms}f", $t * 1000);
2537      $f = ($f * 1) . 'ms'; # * 1 to remove insignificant zeros
2538   }
2539   elsif ($t >= 1) {
2540      $f = sprintf("%.${p_s}f", $t);
2541      $f = ($f * 1) . 's'; # * 1 to remove insignificant zeros
2542   }
2543   else {
2544      $f = 0;  # $t should = 0 at this point
2545   }
2546
2547   return $f;
2548}
2549
2550sub percentage_of {
2551   my ( $is, $of, %args ) = @_;
2552   my $p   = $args{p} || 0; # float precision
2553   my $fmt = $p ? "%.${p}f" : "%d";
2554   return sprintf $fmt, ($is * 100) / ($of ||= 1);
2555}
2556
2557sub secs_to_time {
2558   my ( $secs, $fmt ) = @_;
2559   $secs ||= 0;
2560   return '00:00' unless $secs;
2561
2562   $fmt ||= $secs >= 86_400 ? 'd'
2563          : $secs >= 3_600  ? 'h'
2564          :                   'm';
2565
2566   return
2567      $fmt eq 'd' ? sprintf(
2568         "%d+%02d:%02d:%02d",
2569         int($secs / 86_400),
2570         int(($secs % 86_400) / 3_600),
2571         int(($secs % 3_600) / 60),
2572         $secs % 60)
2573      : $fmt eq 'h' ? sprintf(
2574         "%02d:%02d:%02d",
2575         int(($secs % 86_400) / 3_600),
2576         int(($secs % 3_600) / 60),
2577         $secs % 60)
2578      : sprintf(
2579         "%02d:%02d",
2580         int(($secs % 3_600) / 60),
2581         $secs % 60);
2582}
2583
2584sub time_to_secs {
2585   my ( $val, $default_suffix ) = @_;
2586   die "I need a val argument" unless defined $val;
2587   my $t = 0;
2588   my ( $prefix, $num, $suffix ) = $val =~ m/([+-]?)(\d+)([a-z])?$/;
2589   $suffix = $suffix || $default_suffix || 's';
2590   if ( $suffix =~ m/[smhd]/ ) {
2591      $t = $suffix eq 's' ? $num * 1        # Seconds
2592         : $suffix eq 'm' ? $num * 60       # Minutes
2593         : $suffix eq 'h' ? $num * 3600     # Hours
2594         :                  $num * 86400;   # Days
2595
2596      $t *= -1 if $prefix && $prefix eq '-';
2597   }
2598   else {
2599      die "Invalid suffix for $val: $suffix";
2600   }
2601   return $t;
2602}
2603
2604sub shorten {
2605   my ( $num, %args ) = @_;
2606   my $p = defined $args{p} ? $args{p} : 2;     # float precision
2607   my $d = defined $args{d} ? $args{d} : 1_024; # divisor
2608   my $n = 0;
2609   my @units = ('', qw(k M G T P E Z Y));
2610   while ( $num >= $d && $n < @units - 1 ) {
2611      $num /= $d;
2612      ++$n;
2613   }
2614   return sprintf(
2615      $num =~ m/\./ || $n
2616         ? '%1$.'.$p.'f%2$s'
2617         : '%1$d',
2618      $num, $units[$n]);
2619}
2620
2621sub ts {
2622   my ( $time, $gmt ) = @_;
2623   my ( $sec, $min, $hour, $mday, $mon, $year )
2624      = $gmt ? gmtime($time) : localtime($time);
2625   $mon  += 1;
2626   $year += 1900;
2627   my $val = sprintf("%d-%02d-%02dT%02d:%02d:%02d",
2628      $year, $mon, $mday, $hour, $min, $sec);
2629   if ( my ($us) = $time =~ m/(\.\d+)$/ ) {
2630      $us = sprintf("%.6f", $us);
2631      $us =~ s/^0\././;
2632      $val .= $us;
2633   }
2634   return $val;
2635}
2636
2637sub parse_timestamp {
2638   my ( $val ) = @_;
2639   if ( my($y, $m, $d, $h, $i, $s, $f)
2640         = $val =~ m/^$mysql_ts$/ )
2641   {
2642      return sprintf "%d-%02d-%02d %02d:%02d:"
2643                     . (defined $f ? '%09.6f' : '%02d'),
2644                     $y + 2000, $m, $d, $h, $i, (defined $f ? $s + $f : $s);
2645   }
2646   elsif ( $val =~ m/^$proper_ts$/ ) {
2647      return $val;
2648   }
2649   return $val;
2650}
2651
2652sub unix_timestamp {
2653   my ( $val, $gmt ) = @_;
2654   if ( my($y, $m, $d, $h, $i, $s, $us) = $val =~ m/^$proper_ts$/ ) {
2655      $val = $gmt
2656         ? timegm($s, $i, $h, $d, $m - 1, $y)
2657         : timelocal($s, $i, $h, $d, $m - 1, $y);
2658      if ( defined $us ) {
2659         $us = sprintf('%.6f', $us);
2660         $us =~ s/^0\././;
2661         $val .= $us;
2662      }
2663   }
2664   return $val;
2665}
2666
2667sub any_unix_timestamp {
2668   my ( $val, $callback ) = @_;
2669
2670   if ( my ($n, $suffix) = $val =~ m/^$n_ts$/ ) {
2671      $n = $suffix eq 's' ? $n            # Seconds
2672         : $suffix eq 'm' ? $n * 60       # Minutes
2673         : $suffix eq 'h' ? $n * 3600     # Hours
2674         : $suffix eq 'd' ? $n * 86400    # Days
2675         :                  $n;           # default: Seconds
2676      PTDEBUG && _d('ts is now - N[shmd]:', $n);
2677      return time - $n;
2678   }
2679   elsif ( $val =~ m/^\d{9,}/ ) {
2680      PTDEBUG && _d('ts is already a unix timestamp');
2681      return $val;
2682   }
2683   elsif ( my ($ymd, $hms) = $val =~ m/^(\d{6})(?:\s+(\d+:\d+:\d+))?/ ) {
2684      PTDEBUG && _d('ts is MySQL slow log timestamp');
2685      $val .= ' 00:00:00' unless $hms;
2686      return unix_timestamp(parse_timestamp($val));
2687   }
2688   elsif ( ($ymd, $hms) = $val =~ m/^(\d{4}-\d\d-\d\d)(?:[T ](\d+:\d+:\d+))?/) {
2689      PTDEBUG && _d('ts is properly formatted timestamp');
2690      $val .= ' 00:00:00' unless $hms;
2691      return unix_timestamp($val);
2692   }
2693   else {
2694      PTDEBUG && _d('ts is MySQL expression');
2695      return $callback->($val) if $callback && ref $callback eq 'CODE';
2696   }
2697
2698   PTDEBUG && _d('Unknown ts type:', $val);
2699   return;
2700}
2701
2702sub make_checksum {
2703   my ( $val ) = @_;
2704   my $checksum = uc md5_hex($val);
2705   PTDEBUG && _d($checksum, 'checksum for', $val);
2706   return $checksum;
2707}
2708
2709sub crc32 {
2710   my ( $string ) = @_;
2711   return unless $string;
2712   my $poly = 0xEDB88320;
2713   my $crc  = 0xFFFFFFFF;
2714   foreach my $char ( split(//, $string) ) {
2715      my $comp = ($crc ^ ord($char)) & 0xFF;
2716      for ( 1 .. 8 ) {
2717         $comp = $comp & 1 ? $poly ^ ($comp >> 1) : $comp >> 1;
2718      }
2719      $crc = (($crc >> 8) & 0x00FFFFFF) ^ $comp;
2720   }
2721   return $crc ^ 0xFFFFFFFF;
2722}
2723
2724my $got_json = eval { require JSON };
2725sub encode_json {
2726   return JSON::encode_json(@_) if $got_json;
2727   my ( $data ) = @_;
2728   return (object_to_json($data) || '');
2729}
2730
2731
2732sub object_to_json {
2733   my ($obj) = @_;
2734   my $type  = ref($obj);
2735
2736   if($type eq 'HASH'){
2737      return hash_to_json($obj);
2738   }
2739   elsif($type eq 'ARRAY'){
2740      return array_to_json($obj);
2741   }
2742   else {
2743      return value_to_json($obj);
2744   }
2745}
2746
2747sub hash_to_json {
2748   my ($obj) = @_;
2749   my @res;
2750   for my $k ( sort { $a cmp $b } keys %$obj ) {
2751      push @res, string_to_json( $k )
2752         .  ":"
2753         . ( object_to_json( $obj->{$k} ) || value_to_json( $obj->{$k} ) );
2754   }
2755   return '{' . ( @res ? join( ",", @res ) : '' )  . '}';
2756}
2757
2758sub array_to_json {
2759   my ($obj) = @_;
2760   my @res;
2761
2762   for my $v (@$obj) {
2763      push @res, object_to_json($v) || value_to_json($v);
2764   }
2765
2766   return '[' . ( @res ? join( ",", @res ) : '' ) . ']';
2767}
2768
2769sub value_to_json {
2770   my ($value) = @_;
2771
2772   return 'null' if(!defined $value);
2773
2774   my $b_obj = B::svref_2object(\$value);  # for round trip problem
2775   my $flags = $b_obj->FLAGS;
2776   return $value # as is
2777      if $flags & ( B::SVp_IOK | B::SVp_NOK ) and !( $flags & B::SVp_POK ); # SvTYPE is IV or NV?
2778
2779   my $type = ref($value);
2780
2781   if( !$type ) {
2782      return string_to_json($value);
2783   }
2784   else {
2785      return 'null';
2786   }
2787
2788}
2789
2790my %esc = (
2791   "\n" => '\n',
2792   "\r" => '\r',
2793   "\t" => '\t',
2794   "\f" => '\f',
2795   "\b" => '\b',
2796   "\"" => '\"',
2797   "\\" => '\\\\',
2798   "\'" => '\\\'',
2799);
2800
2801sub string_to_json {
2802   my ($arg) = @_;
2803
2804   $arg =~ s/([\x22\x5c\n\r\t\f\b])/$esc{$1}/g;
2805   $arg =~ s/\//\\\//g;
2806   $arg =~ s/([\x00-\x08\x0b\x0e-\x1f])/'\\u00' . unpack('H2', $1)/eg;
2807
2808   utf8::upgrade($arg);
2809   utf8::encode($arg);
2810
2811   return '"' . $arg . '"';
2812}
2813
2814sub _d {
2815   my ($package, undef, $line) = caller 0;
2816   @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
2817        map { defined $_ ? $_ : 'undef' }
2818        @_;
2819   print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
2820}
2821
28221;
2823}
2824# ###########################################################################
2825# End Transformers package
2826# ###########################################################################
2827
2828# ###########################################################################
2829# QueryRewriter package
2830# This package is a copy without comments from the original.  The original
2831# with comments and its test file can be found in the Bazaar repository at,
2832#   lib/QueryRewriter.pm
2833#   t/lib/QueryRewriter.t
2834# See https://launchpad.net/percona-toolkit for more information.
2835# ###########################################################################
2836{
2837package QueryRewriter;
2838
2839use strict;
2840use warnings FATAL => 'all';
2841use English qw(-no_match_vars);
2842use constant PTDEBUG => $ENV{PTDEBUG} || 0;
2843
2844our $verbs   = qr{^SHOW|^FLUSH|^COMMIT|^ROLLBACK|^BEGIN|SELECT|INSERT
2845                  |UPDATE|DELETE|REPLACE|^SET|UNION|^START|^LOCK}xi;
2846my $quote_re = qr/"(?:(?!(?<!\\)").)*"|'(?:(?!(?<!\\)').)*'/; # Costly!
2847my $bal;
2848$bal         = qr/
2849                  \(
2850                  (?:
2851                     (?> [^()]+ )    # Non-parens without backtracking
2852                     |
2853                     (??{ $bal })    # Group with matching parens
2854                  )*
2855                  \)
2856                 /x;
2857
2858my $olc_re = qr/(?:--|#)[^'"\r\n]*(?=[\r\n]|\Z)/;  # One-line comments
2859my $mlc_re = qr#/\*[^!].*?\*/#sm;                  # But not /*!version */
2860my $vlc_re = qr#/\*.*?[0-9]+.*?\*/#sm;             # For SHOW + /*!version */
2861my $vlc_rf = qr#^(?:SHOW).*?/\*![0-9]+(.*?)\*/#sm;     # Variation for SHOW
2862
2863
2864sub new {
2865   my ( $class, %args ) = @_;
2866   my $self = { %args };
2867   return bless $self, $class;
2868}
2869
2870sub strip_comments {
2871   my ( $self, $query ) = @_;
2872   return unless $query;
2873   $query =~ s/$mlc_re//go;
2874   $query =~ s/$olc_re//go;
2875   if ( $query =~ m/$vlc_rf/i ) { # contains show + version
2876      my $qualifier = $1 || '';
2877      $query =~ s/$vlc_re/$qualifier/go;
2878   }
2879   return $query;
2880}
2881
2882sub shorten {
2883   my ( $self, $query, $length ) = @_;
2884   $query =~ s{
2885      \A(
2886         (?:INSERT|REPLACE)
2887         (?:\s+LOW_PRIORITY|DELAYED|HIGH_PRIORITY|IGNORE)?
2888         (?:\s\w+)*\s+\S+\s+VALUES\s*\(.*?\)
2889      )
2890      \s*,\s*\(.*?(ON\s+DUPLICATE|\Z)}
2891      {$1 /*... omitted ...*/$2}xsi;
2892
2893   return $query unless $query =~ m/IN\s*\(\s*(?!select)/i;
2894
2895   my $last_length  = 0;
2896   my $query_length = length($query);
2897   while (
2898      $length          > 0
2899      && $query_length > $length
2900      && $query_length < ( $last_length || $query_length + 1 )
2901   ) {
2902      $last_length = $query_length;
2903      $query =~ s{
2904         (\bIN\s*\()    # The opening of an IN list
2905         ([^\)]+)       # Contents of the list, assuming no item contains paren
2906         (?=\))           # Close of the list
2907      }
2908      {
2909         $1 . __shorten($2)
2910      }gexsi;
2911   }
2912
2913   return $query;
2914}
2915
2916sub __shorten {
2917   my ( $snippet ) = @_;
2918   my @vals = split(/,/, $snippet);
2919   return $snippet unless @vals > 20;
2920   my @keep = splice(@vals, 0, 20);  # Remove and save the first 20 items
2921   return
2922      join(',', @keep)
2923      . "/*... omitted "
2924      . scalar(@vals)
2925      . " items ...*/";
2926}
2927
2928sub fingerprint {
2929   my ( $self, $query ) = @_;
2930
2931   $query =~ m#\ASELECT /\*!40001 SQL_NO_CACHE \*/ \* FROM `# # mysqldump query
2932      && return 'mysqldump';
2933   $query =~ m#/\*\w+\.\w+:[0-9]/[0-9]\*/#     # pt-table-checksum, etc query
2934      && return 'percona-toolkit';
2935   $query =~ m/\Aadministrator command: /
2936      && return $query;
2937   $query =~ m/\A\s*(call\s+\S+)\(/i
2938      && return lc($1); # Warning! $1 used, be careful.
2939   if ( my ($beginning) = $query =~ m/\A((?:INSERT|REPLACE)(?: IGNORE)?\s+INTO.+?VALUES\s*\(.*?\))\s*,\s*\(/is ) {
2940      $query = $beginning; # Shorten multi-value INSERT statements ASAP
2941   }
2942
2943   $query =~ s/$mlc_re//go;
2944   $query =~ s/$olc_re//go;
2945   $query =~ s/\Ause \S+\Z/use ?/i       # Abstract the DB in USE
2946      && return $query;
2947
2948   $query =~ s/\\["']//g;                # quoted strings
2949   $query =~ s/".*?"/?/sg;               # quoted strings
2950   $query =~ s/'.*?'/?/sg;               # quoted strings
2951
2952   $query =~ s/\bfalse\b|\btrue\b/?/isg; # boolean values
2953
2954   if ( $self->{match_md5_checksums} ) {
2955      $query =~ s/([._-])[a-f0-9]{32}/$1?/g;
2956   }
2957
2958   if ( !$self->{match_embedded_numbers} ) {
2959      $query =~ s/[0-9+-][0-9a-f.xb+-]*/?/g;
2960   }
2961   else {
2962      $query =~ s/\b[0-9+-][0-9a-f.xb+-]*/?/g;
2963   }
2964
2965   if ( $self->{match_md5_checksums} ) {
2966      $query =~ s/[xb+-]\?/?/g;
2967   }
2968   else {
2969      $query =~ s/[xb.+-]\?/?/g;
2970   }
2971
2972   $query =~ s/\A\s+//;                  # Chop off leading whitespace
2973   chomp $query;                         # Kill trailing whitespace
2974   $query =~ tr[ \n\t\r\f][ ]s;          # Collapse whitespace
2975   $query = lc $query;
2976   $query =~ s/\bnull\b/?/g;             # Get rid of NULLs
2977   $query =~ s{                          # Collapse IN and VALUES lists
2978               \b(in|values?)(?:[\s,]*\([\s?,]*\))+
2979              }
2980              {$1(?+)}gx;
2981   $query =~ s{                          # Collapse UNION
2982               \b(select\s.*?)(?:(\sunion(?:\sall)?)\s\1)+
2983              }
2984              {$1 /*repeat$2*/}xg;
2985   $query =~ s/\blimit \?(?:, ?\?| offset \?)?/limit ?/; # LIMIT
2986
2987   if ( $query =~ m/\bORDER BY /gi ) {  # Find, anchor on ORDER BY clause
2988      1 while $query =~ s/\G(.+?)\s+ASC/$1/gi && pos $query;
2989   }
2990
2991   return $query;
2992}
2993
2994sub distill_verbs {
2995   my ( $self, $query ) = @_;
2996
2997   $query =~ m/\A\s*call\s+(\S+)\(/i && return "CALL $1";
2998   $query =~ m/\A\s*use\s+/          && return "USE";
2999   $query =~ m/\A\s*UNLOCK TABLES/i  && return "UNLOCK";
3000   $query =~ m/\A\s*xa\s+(\S+)/i     && return "XA_$1";
3001
3002   if ( $query =~ m/\A\s*LOAD/i ) {
3003      my ($tbl) = $query =~ m/INTO TABLE\s+(\S+)/i;
3004      $tbl ||= '';
3005      $tbl =~ s/`//g;
3006      return "LOAD DATA $tbl";
3007   }
3008
3009   if ( $query =~ m/\Aadministrator command:/ ) {
3010      $query =~ s/administrator command:/ADMIN/;
3011      $query = uc $query;
3012      return $query;
3013   }
3014
3015   $query = $self->strip_comments($query);
3016
3017   if ( $query =~ m/\A\s*SHOW\s+/i ) {
3018      PTDEBUG && _d($query);
3019
3020      $query = uc $query;
3021      $query =~ s/\s+(?:SESSION|FULL|STORAGE|ENGINE)\b/ /g;
3022      $query =~ s/\s+COUNT[^)]+\)//g;
3023
3024      $query =~ s/\s+(?:FOR|FROM|LIKE|WHERE|LIMIT|IN)\b.+//ms;
3025
3026      $query =~ s/\A(SHOW(?:\s+\S+){1,2}).*\Z/$1/s;
3027      $query =~ s/\s+/ /g;
3028      PTDEBUG && _d($query);
3029      return $query;
3030   }
3031
3032   eval $QueryParser::data_def_stmts;
3033   eval $QueryParser::tbl_ident;
3034   my ( $dds ) = $query =~ /^\s*($QueryParser::data_def_stmts)\b/i;
3035   if ( $dds) {
3036      $query =~ s/\s+IF(?:\s+NOT)?\s+EXISTS/ /i;
3037      my ( $obj ) = $query =~ m/$dds.+(DATABASE|TABLE)\b/i;
3038      $obj = uc $obj if $obj;
3039      PTDEBUG && _d('Data def statment:', $dds, 'obj:', $obj);
3040      my ($db_or_tbl)
3041         = $query =~ m/(?:TABLE|DATABASE)\s+($QueryParser::tbl_ident)(\s+.*)?/i;
3042      PTDEBUG && _d('Matches db or table:', $db_or_tbl);
3043      return uc($dds . ($obj ? " $obj" : '')), $db_or_tbl;
3044   }
3045
3046   my @verbs = $query =~ m/\b($verbs)\b/gio;
3047   @verbs    = do {
3048      my $last = '';
3049      grep { my $pass = $_ ne $last; $last = $_; $pass } map { uc } @verbs;
3050   };
3051
3052   if ( ($verbs[0] || '') eq 'SELECT' && @verbs > 1 ) {
3053      PTDEBUG && _d("False-positive verbs after SELECT:", @verbs[1..$#verbs]);
3054      my $union = grep { $_ eq 'UNION' } @verbs;
3055      @verbs    = $union ? qw(SELECT UNION) : qw(SELECT);
3056   }
3057
3058   my $verb_str = join(q{ }, @verbs);
3059   return $verb_str;
3060}
3061
3062sub __distill_tables {
3063   my ( $self, $query, $table, %args ) = @_;
3064   my $qp = $args{QueryParser} || $self->{QueryParser};
3065   die "I need a QueryParser argument" unless $qp;
3066
3067   my @tables = map {
3068      $_ =~ s/`//g;
3069      $_ =~ s/(_?)[0-9]+/$1?/g;
3070      $_;
3071   } grep { defined $_ } $qp->get_tables($query);
3072
3073   push @tables, $table if $table;
3074
3075   @tables = do {
3076      my $last = '';
3077      grep { my $pass = $_ ne $last; $last = $_; $pass } @tables;
3078   };
3079
3080   return @tables;
3081}
3082
3083sub distill {
3084   my ( $self, $query, %args ) = @_;
3085
3086   if ( $args{generic} ) {
3087      my ($cmd, $arg) = $query =~ m/^(\S+)\s+(\S+)/;
3088      return '' unless $cmd;
3089      $query = (uc $cmd) . ($arg ? " $arg" : '');
3090   }
3091   else {
3092      my ($verbs, $table)  = $self->distill_verbs($query, %args);
3093
3094      if ( $verbs && $verbs =~ m/^SHOW/ ) {
3095         my %alias_for = qw(
3096            SCHEMA   DATABASE
3097            KEYS     INDEX
3098            INDEXES  INDEX
3099         );
3100         map { $verbs =~ s/$_/$alias_for{$_}/ } keys %alias_for;
3101         $query = $verbs;
3102      }
3103      elsif ( $verbs && $verbs =~ m/^LOAD DATA/ ) {
3104         return $verbs;
3105      }
3106      else {
3107         my @tables = $self->__distill_tables($query, $table, %args);
3108         $query     = join(q{ }, $verbs, @tables);
3109      }
3110   }
3111
3112   if ( $args{trf} ) {
3113      $query = $args{trf}->($query, %args);
3114   }
3115
3116   return $query;
3117}
3118
3119sub convert_to_select {
3120   my ( $self, $query ) = @_;
3121   return unless $query;
3122
3123   return if $query =~ m/=\s*\(\s*SELECT /i;
3124
3125   $query =~ s{
3126                 \A.*?
3127                 update(?:\s+(?:low_priority|ignore))?\s+(.*?)
3128                 \s+set\b(.*?)
3129                 (?:\s*where\b(.*?))?
3130                 (limit\s*[0-9]+(?:\s*,\s*[0-9]+)?)?
3131                 \Z
3132              }
3133              {__update_to_select($1, $2, $3, $4)}exsi
3134      || $query =~ s{
3135                    \A.*?
3136                    (?:insert(?:\s+ignore)?|replace)\s+
3137                    .*?\binto\b(.*?)\(([^\)]+)\)\s*
3138                    values?\s*(\(.*?\))\s*
3139                    (?:\blimit\b|on\s+duplicate\s+key.*)?\s*
3140                    \Z
3141                 }
3142                 {__insert_to_select($1, $2, $3)}exsi
3143      || $query =~ s{
3144                    \A.*?
3145                    (?:insert(?:\s+ignore)?|replace)\s+
3146                    (?:.*?\binto)\b(.*?)\s*
3147                    set\s+(.*?)\s*
3148                    (?:\blimit\b|on\s+duplicate\s+key.*)?\s*
3149                    \Z
3150                 }
3151                 {__insert_to_select_with_set($1, $2)}exsi
3152      || $query =~ s{
3153                    \A.*?
3154                    delete\s+(.*?)
3155                    \bfrom\b(.*)
3156                    \Z
3157                 }
3158                 {__delete_to_select($1, $2)}exsi;
3159   $query =~ s/\s*on\s+duplicate\s+key\s+update.*\Z//si;
3160   $query =~ s/\A.*?(?=\bSELECT\s*\b)//ism;
3161   return $query;
3162}
3163
3164sub convert_select_list {
3165   my ( $self, $query ) = @_;
3166   $query =~ s{
3167               \A\s*select(.*?)\bfrom\b
3168              }
3169              {$1 =~ m/\*/ ? "select 1 from" : "select isnull(coalesce($1)) from"}exi;
3170   return $query;
3171}
3172
3173sub __delete_to_select {
3174   my ( $delete, $join ) = @_;
3175   if ( $join =~ m/\bjoin\b/ ) {
3176      return "select 1 from $join";
3177   }
3178   return "select * from $join";
3179}
3180
3181sub __insert_to_select {
3182   my ( $tbl, $cols, $vals ) = @_;
3183   PTDEBUG && _d('Args:', @_);
3184   my @cols = split(/,/, $cols);
3185   PTDEBUG && _d('Cols:', @cols);
3186   $vals =~ s/^\(|\)$//g; # Strip leading/trailing parens
3187   my @vals = $vals =~ m/($quote_re|[^,]*${bal}[^,]*|[^,]+)/g;
3188   PTDEBUG && _d('Vals:', @vals);
3189   if ( @cols == @vals ) {
3190      return "select * from $tbl where "
3191         . join(' and ', map { "$cols[$_]=$vals[$_]" } (0..$#cols));
3192   }
3193   else {
3194      return "select * from $tbl limit 1";
3195   }
3196}
3197
3198sub __insert_to_select_with_set {
3199   my ( $from, $set ) = @_;
3200   $set =~ s/,/ and /g;
3201   return "select * from $from where $set ";
3202}
3203
3204sub __update_to_select {
3205   my ( $from, $set, $where, $limit ) = @_;
3206   return "select $set from $from "
3207      . ( $where ? "where $where" : '' )
3208      . ( $limit ? " $limit "      : '' );
3209}
3210
3211sub wrap_in_derived {
3212   my ( $self, $query ) = @_;
3213   return unless $query;
3214   return $query =~ m/\A\s*select/i
3215      ? "select 1 from ($query) as x limit 1"
3216      : $query;
3217}
3218
3219sub _d {
3220   my ($package, undef, $line) = caller 0;
3221   @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
3222        map { defined $_ ? $_ : 'undef' }
3223        @_;
3224   print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
3225}
3226
32271;
3228}
3229# ###########################################################################
3230# End QueryRewriter package
3231# ###########################################################################
3232
3233# ###########################################################################
3234# Processlist package
3235# This package is a copy without comments from the original.  The original
3236# with comments and its test file can be found in the Bazaar repository at,
3237#   lib/Processlist.pm
3238#   t/lib/Processlist.t
3239# See https://launchpad.net/percona-toolkit for more information.
3240# ###########################################################################
3241{
3242package Processlist;
3243
3244use strict;
3245use warnings FATAL => 'all';
3246use English qw(-no_match_vars);
3247use Time::HiRes qw(time usleep);
3248use List::Util qw(max);
3249use Data::Dumper;
3250$Data::Dumper::Indent    = 1;
3251$Data::Dumper::Sortkeys  = 1;
3252$Data::Dumper::Quotekeys = 0;
3253
3254use constant PTDEBUG => $ENV{PTDEBUG} || 0;
3255use constant {
3256   ID      => 0,
3257   USER    => 1,
3258   HOST    => 2,
3259   DB      => 3,
3260   COMMAND => 4,
3261   TIME    => 5,
3262   STATE   => 6,
3263   INFO    => 7,
3264   START   => 8,  # Calculated start time of statement ($start - TIME)
3265   ETIME   => 9,  # Exec time of SHOW PROCESSLIST (margin of error in START)
3266   FSEEN   => 10, # First time ever seen
3267   PROFILE => 11, # Profile of individual STATE times
3268};
3269
3270
3271sub new {
3272   my ( $class, %args ) = @_;
3273   foreach my $arg ( qw(MasterSlave) ) {
3274      die "I need a $arg argument" unless $args{$arg};
3275   }
3276   my $kill_busy_commands = {};
3277   if ($args{kill_busy_commands}) {
3278       for my $command (split /,/,$args{kill_busy_commands}) {
3279           $command =~ s/^\s+|\s+$//g;
3280           $kill_busy_commands->{$command} = 1;
3281       }
3282   } else {
3283       $kill_busy_commands->{Query} = 1;
3284   }
3285   $args{kill_busy_commands} = $kill_busy_commands;
3286
3287   my $self = {
3288      %args,
3289      polls       => 0,
3290      last_poll   => 0,
3291      active_cxn  => {},  # keyed off ID
3292      event_cache => [],
3293      _reasons_for_matching => {},
3294   };
3295   return bless $self, $class;
3296}
3297
3298sub parse_event {
3299   my ( $self, %args ) = @_;
3300   my @required_args = qw(code);
3301   foreach my $arg ( @required_args ) {
3302     die "I need a $arg argument" unless $args{$arg};
3303   }
3304   my ($code) = @args{@required_args};
3305
3306   if ( @{$self->{event_cache}} ) {
3307      PTDEBUG && _d("Returning cached event");
3308      return shift @{$self->{event_cache}};
3309   }
3310
3311   if ( $self->{interval} && $self->{polls} ) {
3312      PTDEBUG && _d("Sleeping between polls");
3313      usleep($self->{interval});
3314   }
3315
3316   PTDEBUG && _d("Polling PROCESSLIST");
3317   my ($time, $etime) = @args{qw(time etime)};
3318   my $start          = $etime ? 0 : time;  # don't need start if etime given
3319   my $rows           = $code->();
3320   if ( !$rows ) {
3321      warn "Processlist callback did not return an arrayref";
3322      return;
3323   }
3324   $time  = time           unless $time;
3325   $etime = $time - $start unless $etime;
3326   $self->{polls}++;
3327   PTDEBUG && _d('Rows:', ($rows ? scalar @$rows : 0), 'in', $etime, 'seconds');
3328
3329   my $active_cxn = $self->{active_cxn};
3330   my $curr_cxn   = {};
3331   my @new_cxn    = ();
3332
3333   CURRENTLY_ACTIVE_CXN:
3334   foreach my $curr ( @$rows ) {
3335
3336      $curr_cxn->{$curr->[ID]} = $curr;
3337
3338      my $query_start = $time - ($curr->[TIME] || 0);
3339
3340      if ( $active_cxn->{$curr->[ID]} ) {
3341         PTDEBUG && _d('Checking existing cxn', $curr->[ID]);
3342         my $prev      = $active_cxn->{$curr->[ID]}; # previous state of cxn
3343         my $new_query = 0;
3344         my $fudge     = ($curr->[TIME] || 0) =~ m/\D/ ? 0.001 : 1; # micro-t?
3345
3346         if ( $prev->[INFO] ) {
3347            if ( !$curr->[INFO] || $prev->[INFO] ne $curr->[INFO] ) {
3348               PTDEBUG && _d('Info is different; new query');
3349               $new_query = 1;
3350            }
3351            elsif ( defined $curr->[TIME] && $curr->[TIME] < $prev->[TIME] ) {
3352               PTDEBUG && _d('Time is less than previous; new query');
3353               $new_query = 1;
3354            }
3355            elsif ( $curr->[INFO] && defined $curr->[TIME]
3356                    && $query_start - $etime - $prev->[START] > $fudge)
3357            {
3358               my $ms = $self->{MasterSlave};
3359
3360               my $is_repl_thread = $ms->is_replication_thread({
3361                                        Command => $curr->[COMMAND],
3362                                        User    => $curr->[USER],
3363                                        State   => $curr->[STATE],
3364                                        Id      => $curr->[ID]});
3365               if ( $is_repl_thread ) {
3366                  PTDEBUG &&
3367                  _d(q{Query has restarted but it's a replication thread, ignoring});
3368               }
3369               else {
3370                  PTDEBUG && _d('Query restarted; new query',
3371                     $query_start, $etime, $prev->[START], $fudge);
3372                  $new_query = 1;
3373               }
3374            }
3375
3376            if ( $new_query ) {
3377               $self->_update_profile($prev, $curr, $time);
3378               push @{$self->{event_cache}},
3379                  $self->make_event($prev, $time);
3380            }
3381         }
3382
3383         if ( $curr->[INFO] ) {
3384            if ( $prev->[INFO] && !$new_query ) {
3385               PTDEBUG && _d("Query on cxn", $curr->[ID], "hasn't changed");
3386               $self->_update_profile($prev, $curr, $time);
3387            }
3388            else {
3389               PTDEBUG && _d('Saving new query, state', $curr->[STATE]);
3390               push @new_cxn, [
3391                  @{$curr}[0..7],           # proc info
3392                  int($query_start),        # START
3393                  $etime,                   # ETIME
3394                  $time,                    # FSEEN
3395                  { ($curr->[STATE] || "") => 0 }, # PROFILE
3396               ];
3397            }
3398         }
3399      }
3400      else {
3401         PTDEBUG && _d('New cxn', $curr->[ID]);
3402         if ( $curr->[INFO] && defined $curr->[TIME] ) {
3403            PTDEBUG && _d('Saving query of new cxn, state', $curr->[STATE]);
3404            push @new_cxn, [
3405               @{$curr}[0..7],           # proc info
3406               int($query_start),        # START
3407               $etime,                   # ETIME
3408               $time,                    # FSEEN
3409               { ($curr->[STATE] || "") => 0 }, # PROFILE
3410            ];
3411         }
3412      }
3413   }  # CURRENTLY_ACTIVE_CXN
3414
3415   PREVIOUSLY_ACTIVE_CXN:
3416   foreach my $prev ( values %$active_cxn ) {
3417      if ( !$curr_cxn->{$prev->[ID]} ) {
3418         PTDEBUG && _d('cxn', $prev->[ID], 'ended');
3419         push @{$self->{event_cache}},
3420            $self->make_event($prev, $time);
3421         delete $active_cxn->{$prev->[ID]};
3422      }
3423      elsif (   ($curr_cxn->{$prev->[ID]}->[COMMAND] || "") eq 'Sleep'
3424             || !$curr_cxn->{$prev->[ID]}->[STATE]
3425             || !$curr_cxn->{$prev->[ID]}->[INFO] ) {
3426         PTDEBUG && _d('cxn', $prev->[ID], 'became idle');
3427         delete $active_cxn->{$prev->[ID]};
3428      }
3429   }
3430
3431   map { $active_cxn->{$_->[ID]} = $_; } @new_cxn;
3432
3433   $self->{last_poll} = $time;
3434
3435   my $event = shift @{$self->{event_cache}};
3436   PTDEBUG && _d(scalar @{$self->{event_cache}}, "events in cache");
3437   return $event;
3438}
3439
3440sub make_event {
3441   my ( $self, $row, $time ) = @_;
3442
3443   my $observed_time = $time - $row->[FSEEN];
3444   my $Query_time    = max($row->[TIME], $observed_time);
3445
3446
3447
3448
3449   my $event = {
3450      id         => $row->[ID],
3451      db         => $row->[DB],
3452      user       => $row->[USER],
3453      host       => $row->[HOST],
3454      arg        => $row->[INFO],
3455      bytes      => length($row->[INFO]),
3456      ts         => Transformers::ts($row->[START] + $row->[TIME]), # Query END time
3457      Query_time => $Query_time,
3458      Lock_time  => $row->[PROFILE]->{Locked} || 0,
3459   };
3460   PTDEBUG && _d('Properties of event:', Dumper($event));
3461   return $event;
3462}
3463
3464sub _get_active_cxn {
3465   my ( $self ) = @_;
3466   PTDEBUG && _d("Active cxn:", Dumper($self->{active_cxn}));
3467   return $self->{active_cxn};
3468}
3469
3470sub _update_profile {
3471   my ( $self, $prev, $curr, $time ) = @_;
3472   return unless $prev && $curr;
3473
3474   my $time_elapsed = $time - $self->{last_poll};
3475
3476
3477   if ( ($prev->[STATE] || "") eq ($curr->[STATE] || "") ) {
3478      PTDEBUG && _d("Query is still in", $curr->[STATE], "state");
3479      $prev->[PROFILE]->{$prev->[STATE] || ""} += $time_elapsed;
3480   }
3481   else {
3482      PTDEBUG && _d("Query changed from state", $prev->[STATE],
3483         "to", $curr->[STATE]);
3484      my $half_time = ($time_elapsed || 0) / 2;
3485
3486      $prev->[PROFILE]->{$prev->[STATE] || ""} += $half_time;
3487
3488      $prev->[STATE] = $curr->[STATE];
3489      $prev->[PROFILE]->{$curr->[STATE] || ""}  = $half_time;
3490   }
3491
3492   return;
3493}
3494
3495sub find {
3496   my ( $self, $proclist, %find_spec ) = @_;
3497   PTDEBUG && _d('find specs:', Dumper(\%find_spec));
3498   my $ms  = $self->{MasterSlave};
3499
3500   my @matches;
3501   $self->{_reasons_for_matching} = undef;
3502   QUERY:
3503   foreach my $query ( @$proclist ) {
3504      PTDEBUG && _d('Checking query', Dumper($query));
3505      my $matched = 0;
3506
3507      if (    !$find_spec{replication_threads}
3508           && $ms->is_replication_thread($query) ) {
3509         PTDEBUG && _d('Skipping replication thread');
3510         next QUERY;
3511      }
3512
3513      if ( $find_spec{busy_time} && exists($self->{kill_busy_commands}->{$query->{Command} || ''}) ) {
3514         next QUERY unless defined($query->{Time});
3515         if ( $query->{Time} < $find_spec{busy_time} ) {
3516            PTDEBUG && _d("Query isn't running long enough");
3517            next QUERY;
3518         }
3519         my $reason = 'Exceeds busy time';
3520         PTDEBUG && _d($reason);
3521         push @{$self->{_reasons_for_matching}->{$query} ||= []}, $reason;
3522         $matched++;
3523      }
3524
3525      if ( $find_spec{idle_time} && ($query->{Command} || '') eq 'Sleep' ) {
3526         next QUERY unless defined($query->{Time});
3527         if ( $query->{Time} < $find_spec{idle_time} ) {
3528            PTDEBUG && _d("Query isn't idle long enough");
3529            next QUERY;
3530         }
3531         my $reason = 'Exceeds idle time';
3532         PTDEBUG && _d($reason);
3533         push @{$self->{_reasons_for_matching}->{$query} ||= []}, $reason;
3534         $matched++;
3535      }
3536
3537      PROPERTY:
3538      foreach my $property ( qw(Id User Host db State Command Info) ) {
3539         my $filter = "_find_match_$property";
3540         if ( defined $find_spec{ignore}->{$property}
3541              && $self->$filter($query, $find_spec{ignore}->{$property}) ) {
3542            PTDEBUG && _d('Query matches ignore', $property, 'spec');
3543            next QUERY;
3544         }
3545         if ( defined $find_spec{match}->{$property} ) {
3546            if ( !$self->$filter($query, $find_spec{match}->{$property}) ) {
3547               PTDEBUG && _d('Query does not match', $property, 'spec');
3548               next QUERY;
3549            }
3550            my $reason = 'Query matches ' . $property . ' spec';
3551            PTDEBUG && _d($reason);
3552            push @{$self->{_reasons_for_matching}->{$query} ||= []}, $reason;
3553            $matched++;
3554         }
3555      }
3556      if ( $matched || $find_spec{all} ) {
3557         PTDEBUG && _d("Query matched one or more specs, adding");
3558         push @matches, $query;
3559         next QUERY;
3560      }
3561      PTDEBUG && _d('Query does not match any specs, ignoring');
3562   } # QUERY
3563
3564   return @matches;
3565}
3566
3567sub _find_match_Id {
3568   my ( $self, $query, $property ) = @_;
3569   return defined $property && defined $query->{Id} && $query->{Id} == $property;
3570}
3571
3572sub _find_match_User {
3573   my ( $self, $query, $property ) = @_;
3574   return defined $property && defined $query->{User}
3575      && $query->{User} =~ m/$property/;
3576}
3577
3578sub _find_match_Host {
3579   my ( $self, $query, $property ) = @_;
3580   return defined $property && defined $query->{Host}
3581      && $query->{Host} =~ m/$property/;
3582}
3583
3584sub _find_match_db {
3585   my ( $self, $query, $property ) = @_;
3586   return defined $property && defined $query->{db}
3587      && $query->{db} =~ m/$property/;
3588}
3589
3590sub _find_match_State {
3591   my ( $self, $query, $property ) = @_;
3592   return defined $property && defined $query->{State}
3593      && $query->{State} =~ m/$property/;
3594}
3595
3596sub _find_match_Command {
3597   my ( $self, $query, $property ) = @_;
3598   return defined $property && defined $query->{Command}
3599      && $query->{Command} =~ m/$property/;
3600}
3601
3602sub _find_match_Info {
3603   my ( $self, $query, $property ) = @_;
3604   return defined $property && defined $query->{Info}
3605      && $query->{Info} =~ m/$property/;
3606}
3607
3608sub _d {
3609   my ($package, undef, $line) = caller 0;
3610   @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
3611        map { defined $_ ? $_ : 'undef' }
3612        @_;
3613   print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
3614}
3615
36161;
3617}
3618# ###########################################################################
3619# End Processlist package
3620# ###########################################################################
3621
3622# ###########################################################################
3623# TcpdumpParser package
3624# This package is a copy without comments from the original.  The original
3625# with comments and its test file can be found in the Bazaar repository at,
3626#   lib/TcpdumpParser.pm
3627#   t/lib/TcpdumpParser.t
3628# See https://launchpad.net/percona-toolkit for more information.
3629# ###########################################################################
3630{
3631package TcpdumpParser;
3632
3633use strict;
3634use warnings FATAL => 'all';
3635use English qw(-no_match_vars);
3636use constant PTDEBUG => $ENV{PTDEBUG} || 0;
3637
3638use Data::Dumper;
3639$Data::Dumper::Indent    = 1;
3640$Data::Dumper::Sortkeys  = 1;
3641$Data::Dumper::Quotekeys = 0;
3642
3643sub new {
3644   my ( $class, %args ) = @_;
3645   my $self = {};
3646   return bless $self, $class;
3647}
3648
3649sub parse_event {
3650   my ( $self, %args ) = @_;
3651   my @required_args = qw(next_event tell);
3652   foreach my $arg ( @required_args ) {
3653      die "I need a $arg argument" unless $args{$arg};
3654   }
3655   my ($next_event, $tell) = @args{@required_args};
3656
3657   local $INPUT_RECORD_SEPARATOR = "\n20";
3658
3659   my $pos_in_log = $tell->();
3660   while ( defined(my $raw_packet = $next_event->()) ) {
3661      next if $raw_packet =~ m/^$/;  # issue 564
3662      $pos_in_log -= 1 if $pos_in_log;
3663
3664      $raw_packet =~ s/\n20\Z//;
3665      $raw_packet = "20$raw_packet" if $raw_packet =~ /\A20-\d\d-\d\d/; # workaround for year 2020 problem
3666      $raw_packet = "20$raw_packet" unless $raw_packet =~ m/\A20/;
3667
3668      $raw_packet =~ s/0x0000:.+?(450.) /0x0000:  $1 /;
3669
3670      my $packet = $self->_parse_packet($raw_packet);
3671      $packet->{pos_in_log} = $pos_in_log;
3672      $packet->{raw_packet} = $raw_packet;
3673
3674      $args{stats}->{events_read}++ if $args{stats};
3675
3676      return $packet;
3677   }
3678
3679   $args{oktorun}->(0) if $args{oktorun};
3680   return;
3681}
3682
3683sub _parse_packet {
3684   my ( $self, $packet ) = @_;
3685   die "I need a packet" unless $packet;
3686
3687   my ( $ts, $source, $dest )  = $packet =~ m/\A(\S+ \S+).*? IP .*?(\S+) > (\S+):/;
3688   my ( $src_host, $src_port ) = $source =~ m/((?:\d+\.){3}\d+)\.(\w+)/;
3689   my ( $dst_host, $dst_port ) = $dest   =~ m/((?:\d+\.){3}\d+)\.(\w+)/;
3690
3691   $src_port = $self->port_number($src_port);
3692   $dst_port = $self->port_number($dst_port);
3693
3694   my $hex = qr/[0-9a-f]/;
3695   (my $data = join('', $packet =~ m/\s+0x$hex+:\s((?:\s$hex{2,4})+)/go)) =~ s/\s+//g;
3696
3697   my $ip_hlen = hex(substr($data, 1, 1)); # Num of 32-bit words in header.
3698   my $ip_plen = hex(substr($data, 4, 4)); # Num of BYTES in IPv4 datagram.
3699   my $complete = length($data) == 2 * $ip_plen ? 1 : 0;
3700
3701   my $tcp_hlen = hex(substr($data, ($ip_hlen + 3) * 8, 1));
3702
3703   my $seq = hex(substr($data, ($ip_hlen + 1) * 8, 8));
3704   my $ack = hex(substr($data, ($ip_hlen + 2) * 8, 8));
3705
3706   my $flags = hex(substr($data, (($ip_hlen + 3) * 8) + 2, 2));
3707
3708   $data = substr($data, ($ip_hlen + $tcp_hlen) * 8);
3709
3710   my $pkt = {
3711      ts        => $ts,
3712      seq       => $seq,
3713      ack       => $ack,
3714      fin       => $flags & 0x01,
3715      syn       => $flags & 0x02,
3716      rst       => $flags & 0x04,
3717      src_host  => $src_host,
3718      src_port  => $src_port,
3719      dst_host  => $dst_host,
3720      dst_port  => $dst_port,
3721      complete  => $complete,
3722      ip_hlen   => $ip_hlen,
3723      tcp_hlen  => $tcp_hlen,
3724      dgram_len => $ip_plen,
3725      data_len  => $ip_plen - (($ip_hlen + $tcp_hlen) * 4),
3726      data      => $data ? substr($data, 0, 10).(length $data > 10 ? '...' : '')
3727                         : '',
3728   };
3729   PTDEBUG && _d('packet:', Dumper($pkt));
3730   $pkt->{data} = $data;
3731   return $pkt;
3732}
3733
3734sub port_number {
3735   my ( $self, $port ) = @_;
3736   return unless $port;
3737   return $port eq 'mysql' ? 3306 : $port;
3738}
3739
3740sub _d {
3741   my ($package, undef, $line) = caller 0;
3742   @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
3743        map { defined $_ ? $_ : 'undef' }
3744        @_;
3745   print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
3746}
3747
37481;
3749}
3750# ###########################################################################
3751# End TcpdumpParser package
3752# ###########################################################################
3753
3754# ###########################################################################
3755# MySQLProtocolParser package
3756# This package is a copy without comments from the original.  The original
3757# with comments and its test file can be found in the Bazaar repository at,
3758#   lib/MySQLProtocolParser.pm
3759#   t/lib/MySQLProtocolParser.t
3760# See https://launchpad.net/percona-toolkit for more information.
3761# ###########################################################################
3762{
3763package MySQLProtocolParser;
3764
3765use strict;
3766use warnings FATAL => 'all';
3767use English qw(-no_match_vars);
3768use constant PTDEBUG => $ENV{PTDEBUG} || 0;
3769
3770eval {
3771   require IO::Uncompress::Inflate; # yum: perl-IO-Compress-Zlib
3772   IO::Uncompress::Inflate->import(qw(inflate $InflateError));
3773};
3774
3775use Data::Dumper;
3776$Data::Dumper::Indent    = 1;
3777$Data::Dumper::Sortkeys  = 1;
3778$Data::Dumper::Quotekeys = 0;
3779
3780BEGIN { our @ISA = 'ProtocolParser'; }
3781
3782use constant {
3783   COM_SLEEP               => '00',
3784   COM_QUIT                => '01',
3785   COM_INIT_DB             => '02',
3786   COM_QUERY               => '03',
3787   COM_FIELD_LIST          => '04',
3788   COM_CREATE_DB           => '05',
3789   COM_DROP_DB             => '06',
3790   COM_REFRESH             => '07',
3791   COM_SHUTDOWN            => '08',
3792   COM_STATISTICS          => '09',
3793   COM_PROCESS_INFO        => '0a',
3794   COM_CONNECT             => '0b',
3795   COM_PROCESS_KILL        => '0c',
3796   COM_DEBUG               => '0d',
3797   COM_PING                => '0e',
3798   COM_TIME                => '0f',
3799   COM_DELAYED_INSERT      => '10',
3800   COM_CHANGE_USER         => '11',
3801   COM_BINLOG_DUMP         => '12',
3802   COM_TABLE_DUMP          => '13',
3803   COM_CONNECT_OUT         => '14',
3804   COM_REGISTER_SLAVE      => '15',
3805   COM_STMT_PREPARE        => '16',
3806   COM_STMT_EXECUTE        => '17',
3807   COM_STMT_SEND_LONG_DATA => '18',
3808   COM_STMT_CLOSE          => '19',
3809   COM_STMT_RESET          => '1a',
3810   COM_SET_OPTION          => '1b',
3811   COM_STMT_FETCH          => '1c',
3812   SERVER_QUERY_NO_GOOD_INDEX_USED => 16,
3813   SERVER_QUERY_NO_INDEX_USED      => 32,
3814};
3815
3816my %com_for = (
3817   '00' => 'COM_SLEEP',
3818   '01' => 'COM_QUIT',
3819   '02' => 'COM_INIT_DB',
3820   '03' => 'COM_QUERY',
3821   '04' => 'COM_FIELD_LIST',
3822   '05' => 'COM_CREATE_DB',
3823   '06' => 'COM_DROP_DB',
3824   '07' => 'COM_REFRESH',
3825   '08' => 'COM_SHUTDOWN',
3826   '09' => 'COM_STATISTICS',
3827   '0a' => 'COM_PROCESS_INFO',
3828   '0b' => 'COM_CONNECT',
3829   '0c' => 'COM_PROCESS_KILL',
3830   '0d' => 'COM_DEBUG',
3831   '0e' => 'COM_PING',
3832   '0f' => 'COM_TIME',
3833   '10' => 'COM_DELAYED_INSERT',
3834   '11' => 'COM_CHANGE_USER',
3835   '12' => 'COM_BINLOG_DUMP',
3836   '13' => 'COM_TABLE_DUMP',
3837   '14' => 'COM_CONNECT_OUT',
3838   '15' => 'COM_REGISTER_SLAVE',
3839   '16' => 'COM_STMT_PREPARE',
3840   '17' => 'COM_STMT_EXECUTE',
3841   '18' => 'COM_STMT_SEND_LONG_DATA',
3842   '19' => 'COM_STMT_CLOSE',
3843   '1a' => 'COM_STMT_RESET',
3844   '1b' => 'COM_SET_OPTION',
3845   '1c' => 'COM_STMT_FETCH',
3846);
3847
3848my %flag_for = (
3849   'CLIENT_LONG_PASSWORD'     => 1,       # new more secure passwords
3850   'CLIENT_FOUND_ROWS'        => 2,       # Found instead of affected rows
3851   'CLIENT_LONG_FLAG'         => 4,       # Get all column flags
3852   'CLIENT_CONNECT_WITH_DB'   => 8,       # One can specify db on connect
3853   'CLIENT_NO_SCHEMA'         => 16,      # Don't allow database.table.column
3854   'CLIENT_COMPRESS'          => 32,      # Can use compression protocol
3855   'CLIENT_ODBC'              => 64,      # Odbc client
3856   'CLIENT_LOCAL_FILES'       => 128,     # Can use LOAD DATA LOCAL
3857   'CLIENT_IGNORE_SPACE'      => 256,     # Ignore spaces before '('
3858   'CLIENT_PROTOCOL_41'       => 512,     # New 4.1 protocol
3859   'CLIENT_INTERACTIVE'       => 1024,    # This is an interactive client
3860   'CLIENT_SSL'               => 2048,    # Switch to SSL after handshake
3861   'CLIENT_IGNORE_SIGPIPE'    => 4096,    # IGNORE sigpipes
3862   'CLIENT_TRANSACTIONS'      => 8192,    # Client knows about transactions
3863   'CLIENT_RESERVED'          => 16384,   # Old flag for 4.1 protocol
3864   'CLIENT_SECURE_CONNECTION' => 32768,   # New 4.1 authentication
3865   'CLIENT_MULTI_STATEMENTS'  => 65536,   # Enable/disable multi-stmt support
3866   'CLIENT_MULTI_RESULTS'     => 131072,  # Enable/disable multi-results
3867);
3868
3869use constant {
3870   MYSQL_TYPE_DECIMAL      => 0,
3871   MYSQL_TYPE_TINY         => 1,
3872   MYSQL_TYPE_SHORT        => 2,
3873   MYSQL_TYPE_LONG         => 3,
3874   MYSQL_TYPE_FLOAT        => 4,
3875   MYSQL_TYPE_DOUBLE       => 5,
3876   MYSQL_TYPE_NULL         => 6,
3877   MYSQL_TYPE_TIMESTAMP    => 7,
3878   MYSQL_TYPE_LONGLONG     => 8,
3879   MYSQL_TYPE_INT24        => 9,
3880   MYSQL_TYPE_DATE         => 10,
3881   MYSQL_TYPE_TIME         => 11,
3882   MYSQL_TYPE_DATETIME     => 12,
3883   MYSQL_TYPE_YEAR         => 13,
3884   MYSQL_TYPE_NEWDATE      => 14,
3885   MYSQL_TYPE_VARCHAR      => 15,
3886   MYSQL_TYPE_BIT          => 16,
3887   MYSQL_TYPE_NEWDECIMAL   => 246,
3888   MYSQL_TYPE_ENUM         => 247,
3889   MYSQL_TYPE_SET          => 248,
3890   MYSQL_TYPE_TINY_BLOB    => 249,
3891   MYSQL_TYPE_MEDIUM_BLOB  => 250,
3892   MYSQL_TYPE_LONG_BLOB    => 251,
3893   MYSQL_TYPE_BLOB         => 252,
3894   MYSQL_TYPE_VAR_STRING   => 253,
3895   MYSQL_TYPE_STRING       => 254,
3896   MYSQL_TYPE_GEOMETRY     => 255,
3897};
3898
3899my %type_for = (
3900   0   => 'MYSQL_TYPE_DECIMAL',
3901   1   => 'MYSQL_TYPE_TINY',
3902   2   => 'MYSQL_TYPE_SHORT',
3903   3   => 'MYSQL_TYPE_LONG',
3904   4   => 'MYSQL_TYPE_FLOAT',
3905   5   => 'MYSQL_TYPE_DOUBLE',
3906   6   => 'MYSQL_TYPE_NULL',
3907   7   => 'MYSQL_TYPE_TIMESTAMP',
3908   8   => 'MYSQL_TYPE_LONGLONG',
3909   9   => 'MYSQL_TYPE_INT24',
3910   10  => 'MYSQL_TYPE_DATE',
3911   11  => 'MYSQL_TYPE_TIME',
3912   12  => 'MYSQL_TYPE_DATETIME',
3913   13  => 'MYSQL_TYPE_YEAR',
3914   14  => 'MYSQL_TYPE_NEWDATE',
3915   15  => 'MYSQL_TYPE_VARCHAR',
3916   16  => 'MYSQL_TYPE_BIT',
3917   246 => 'MYSQL_TYPE_NEWDECIMAL',
3918   247 => 'MYSQL_TYPE_ENUM',
3919   248 => 'MYSQL_TYPE_SET',
3920   249 => 'MYSQL_TYPE_TINY_BLOB',
3921   250 => 'MYSQL_TYPE_MEDIUM_BLOB',
3922   251 => 'MYSQL_TYPE_LONG_BLOB',
3923   252 => 'MYSQL_TYPE_BLOB',
3924   253 => 'MYSQL_TYPE_VAR_STRING',
3925   254 => 'MYSQL_TYPE_STRING',
3926   255 => 'MYSQL_TYPE_GEOMETRY',
3927);
3928
3929my %unpack_type = (
3930   MYSQL_TYPE_NULL       => sub { return 'NULL', 0; },
3931   MYSQL_TYPE_TINY       => sub { return to_num(@_, 1), 1; },
3932   MySQL_TYPE_SHORT      => sub { return to_num(@_, 2), 2; },
3933   MYSQL_TYPE_LONG       => sub { return to_num(@_, 4), 4; },
3934   MYSQL_TYPE_LONGLONG   => sub { return to_num(@_, 8), 8; },
3935   MYSQL_TYPE_DOUBLE     => sub { return to_double(@_), 8; },
3936   MYSQL_TYPE_VARCHAR    => \&unpack_string,
3937   MYSQL_TYPE_VAR_STRING => \&unpack_string,
3938   MYSQL_TYPE_STRING     => \&unpack_string,
3939);
3940
3941sub new {
3942   my ( $class, %args ) = @_;
3943
3944   my $self = {
3945      server         => $args{server},
3946      port           => $args{port} || '3306',
3947      version        => '41',    # MySQL proto version; not used yet
3948      sessions       => {},
3949      o              => $args{o},
3950      fake_thread_id => 2**32,   # see _make_event()
3951      null_event     => $args{null_event},
3952   };
3953   PTDEBUG && $self->{server} && _d('Watching only server', $self->{server});
3954   return bless $self, $class;
3955}
3956
3957sub parse_event {
3958   my ( $self, %args ) = @_;
3959   my @required_args = qw(event);
3960   foreach my $arg ( @required_args ) {
3961      die "I need a $arg argument" unless $args{$arg};
3962   }
3963   my $packet = @args{@required_args};
3964
3965   my $src_host = "$packet->{src_host}:$packet->{src_port}";
3966   my $dst_host = "$packet->{dst_host}:$packet->{dst_port}";
3967
3968   if ( my $server = $self->{server} ) {  # Watch only the given server.
3969      $server .= ":$self->{port}";
3970      if ( $src_host ne $server && $dst_host ne $server ) {
3971         PTDEBUG && _d('Packet is not to or from', $server);
3972         return $self->{null_event};
3973      }
3974   }
3975
3976   my $packet_from;
3977   my $client;
3978   if ( $src_host =~ m/:$self->{port}$/ ) {
3979      $packet_from = 'server';
3980      $client      = $dst_host;
3981   }
3982   elsif ( $dst_host =~ m/:$self->{port}$/ ) {
3983      $packet_from = 'client';
3984      $client      = $src_host;
3985   }
3986   else {
3987      PTDEBUG && _d('Packet is not to or from a MySQL server');
3988      return $self->{null_event};
3989   }
3990   PTDEBUG && _d('Client', $client);
3991
3992   my $packetno = -1;
3993   if ( $packet->{data_len} >= 5 ) {
3994      $packetno = to_num(substr($packet->{data}, 6, 2));
3995   }
3996   if ( !exists $self->{sessions}->{$client} ) {
3997      if ( $packet->{syn} ) {
3998         PTDEBUG && _d('New session (SYN)');
3999      }
4000      elsif ( $packetno == 0 ) {
4001         PTDEBUG && _d('New session (packetno 0)');
4002      }
4003      else {
4004         PTDEBUG && _d('Ignoring mid-stream', $packet_from, 'data,',
4005            'packetno', $packetno);
4006         return $self->{null_event};
4007      }
4008
4009      $self->{sessions}->{$client} = {
4010         client        => $client,
4011         ts            => $packet->{ts},
4012         state         => undef,
4013         compress      => undef,
4014         raw_packets   => [],
4015         buff          => '',
4016         sths          => {},
4017         attribs       => {},
4018         n_queries     => 0,
4019      };
4020   }
4021   my $session = $self->{sessions}->{$client};
4022   PTDEBUG && _d('Client state:', $session->{state});
4023
4024   push @{$session->{raw_packets}}, $packet->{raw_packet};
4025
4026   if ( $packet->{syn} && ($session->{n_queries} > 0 || $session->{state}) ) {
4027      PTDEBUG && _d('Client port reuse and last session did not quit');
4028      $self->fail_session($session,
4029            'client port reuse and last session did not quit');
4030      return $self->parse_event(%args);
4031   }
4032
4033   if ( $packet->{data_len} == 0 ) {
4034      PTDEBUG && _d('TCP control:',
4035         map { uc $_ } grep { $packet->{$_} } qw(syn ack fin rst));
4036      if ( $packet->{'fin'}
4037           && ($session->{state} || '') eq 'server_handshake' ) {
4038         PTDEBUG && _d('Client aborted connection');
4039         my $event = {
4040            cmd => 'Admin',
4041            arg => 'administrator command: Connect',
4042            ts  => $packet->{ts},
4043         };
4044         $session->{attribs}->{Error_msg} = 'Client closed connection during handshake';
4045         $event = $self->_make_event($event, $packet, $session);
4046         delete $self->{sessions}->{$session->{client}};
4047         return $event;
4048      }
4049      return $self->{null_event};
4050   }
4051
4052   if ( $session->{compress} ) {
4053      return unless $self->uncompress_packet($packet, $session);
4054   }
4055
4056   if ( $session->{buff} && $packet_from eq 'client' ) {
4057      $session->{buff}      .= $packet->{data};
4058      $packet->{data}        = $session->{buff};
4059      $session->{buff_left} -= $packet->{data_len};
4060
4061      $packet->{mysql_data_len} = $session->{mysql_data_len};
4062      $packet->{number}         = $session->{number};
4063
4064      PTDEBUG && _d('Appending data to buff; expecting',
4065         $session->{buff_left}, 'more bytes');
4066   }
4067   else {
4068      eval {
4069         remove_mysql_header($packet);
4070      };
4071      if ( $EVAL_ERROR ) {
4072         PTDEBUG && _d('remove_mysql_header() failed; failing session');
4073         $session->{EVAL_ERROR} = $EVAL_ERROR;
4074         $self->fail_session($session, 'remove_mysql_header() failed');
4075         return $self->{null_event};
4076      }
4077   }
4078
4079   my $event;
4080   if ( $packet_from eq 'server' ) {
4081      $event = $self->_packet_from_server($packet, $session, $args{misc});
4082   }
4083   elsif ( $packet_from eq 'client' ) {
4084      if ( $session->{buff} ) {
4085         if ( $session->{buff_left} <= 0 ) {
4086            PTDEBUG && _d('Data is complete');
4087            $self->_delete_buff($session);
4088         }
4089         else {
4090            return $self->{null_event};  # waiting for more data; buff_left was reported earlier
4091         }
4092      }
4093      elsif ( $packet->{mysql_data_len} > ($packet->{data_len} - 4) ) {
4094
4095         if ( $session->{cmd} && ($session->{state} || '') eq 'awaiting_reply' ) {
4096            PTDEBUG && _d('No server OK to previous command (frag)');
4097            $self->fail_session($session, 'no server OK to previous command');
4098            $packet->{data} = $packet->{mysql_hdr} . $packet->{data};
4099            return $self->parse_event(%args);
4100         }
4101
4102         $session->{buff}           = $packet->{data};
4103         $session->{mysql_data_len} = $packet->{mysql_data_len};
4104         $session->{number}         = $packet->{number};
4105
4106         $session->{buff_left}
4107            ||= $packet->{mysql_data_len} - ($packet->{data_len} - 4);
4108
4109         PTDEBUG && _d('Data not complete; expecting',
4110            $session->{buff_left}, 'more bytes');
4111         return $self->{null_event};
4112      }
4113
4114      if ( $session->{cmd} && ($session->{state} || '') eq 'awaiting_reply' ) {
4115         PTDEBUG && _d('No server OK to previous command');
4116         $self->fail_session($session, 'no server OK to previous command');
4117         $packet->{data} = $packet->{mysql_hdr} . $packet->{data};
4118         return $self->parse_event(%args);
4119      }
4120
4121      $event = $self->_packet_from_client($packet, $session, $args{misc});
4122   }
4123   else {
4124      die 'Packet origin unknown';
4125   }
4126
4127   PTDEBUG && _d('Done parsing packet; client state:', $session->{state});
4128   if ( $session->{closed} ) {
4129      delete $self->{sessions}->{$session->{client}};
4130      PTDEBUG && _d('Session deleted');
4131   }
4132
4133   $args{stats}->{events_parsed}++ if $args{stats};
4134   return $event || $self->{null_event};
4135}
4136
4137sub _packet_from_server {
4138   my ( $self, $packet, $session, $misc ) = @_;
4139   die "I need a packet"  unless $packet;
4140   die "I need a session" unless $session;
4141
4142   PTDEBUG && _d('Packet is from server; client state:', $session->{state});
4143
4144   if ( ($session->{server_seq} || '') eq $packet->{seq} ) {
4145      push @{ $session->{server_retransmissions} }, $packet->{seq};
4146      PTDEBUG && _d('TCP retransmission');
4147      return;
4148   }
4149   $session->{server_seq} = $packet->{seq};
4150
4151   my $data = $packet->{data};
4152
4153
4154   my ( $first_byte ) = substr($data, 0, 2, '');
4155   PTDEBUG && _d('First byte of packet:', $first_byte);
4156   if ( !$first_byte ) {
4157      $self->fail_session($session, 'no first byte');
4158      return;
4159   }
4160
4161   if ( !$session->{state} ) {
4162      if ( $first_byte eq '0a' && length $data >= 33 && $data =~ m/00{13}/ ) {
4163         my $handshake = parse_server_handshake_packet($data);
4164         if ( !$handshake ) {
4165            $self->fail_session($session, 'failed to parse server handshake');
4166            return;
4167         }
4168         $session->{state}     = 'server_handshake';
4169         $session->{thread_id} = $handshake->{thread_id};
4170
4171         $session->{ts} = $packet->{ts} unless $session->{ts};
4172      }
4173      elsif ( $session->{buff} ) {
4174         $self->fail_session($session,
4175            'got server response before full buffer');
4176         return;
4177      }
4178      else {
4179         PTDEBUG && _d('Ignoring mid-stream server response');
4180         return;
4181      }
4182   }
4183   else {
4184      if ( $first_byte eq '00' ) {
4185         if ( ($session->{state} || '') eq 'client_auth' ) {
4186
4187            $session->{compress} = $session->{will_compress};
4188            delete $session->{will_compress};
4189            PTDEBUG && $session->{compress} && _d('Packets will be compressed');
4190
4191            PTDEBUG && _d('Admin command: Connect');
4192            return $self->_make_event(
4193               {  cmd => 'Admin',
4194                  arg => 'administrator command: Connect',
4195                  ts  => $packet->{ts}, # Events are timestamped when they end
4196               },
4197               $packet, $session
4198            );
4199         }
4200         elsif ( $session->{cmd} ) {
4201            my $com = $session->{cmd}->{cmd};
4202            my $ok;
4203            if ( $com eq COM_STMT_PREPARE ) {
4204               PTDEBUG && _d('OK for prepared statement');
4205               $ok = parse_ok_prepared_statement_packet($data);
4206               if ( !$ok ) {
4207                  $self->fail_session($session,
4208                     'failed to parse OK prepared statement packet');
4209                  return;
4210               }
4211               my $sth_id = $ok->{sth_id};
4212               $session->{attribs}->{Statement_id} = $sth_id;
4213
4214               $session->{sths}->{$sth_id} = $ok;
4215               $session->{sths}->{$sth_id}->{statement}
4216                  = $session->{cmd}->{arg};
4217            }
4218            else {
4219               $ok  = parse_ok_packet($data);
4220               if ( !$ok ) {
4221                  $self->fail_session($session, 'failed to parse OK packet');
4222                  return;
4223               }
4224            }
4225
4226            my $arg;
4227            if ( $com eq COM_QUERY
4228                 || $com eq COM_STMT_EXECUTE || $com eq COM_STMT_RESET ) {
4229               $com = 'Query';
4230               $arg = $session->{cmd}->{arg};
4231            }
4232            elsif ( $com eq COM_STMT_PREPARE ) {
4233               $com = 'Query';
4234               $arg = "PREPARE $session->{cmd}->{arg}";
4235            }
4236            else {
4237               $arg = 'administrator command: '
4238                    . ucfirst(lc(substr($com_for{$com}, 4)));
4239               $com = 'Admin';
4240            }
4241
4242            return $self->_make_event(
4243               {  cmd           => $com,
4244                  arg           => $arg,
4245                  ts            => $packet->{ts},
4246                  Insert_id     => $ok->{insert_id},
4247                  Warning_count => $ok->{warnings},
4248                  Rows_affected => $ok->{affected_rows},
4249               },
4250               $packet, $session
4251            );
4252         }
4253         else {
4254            PTDEBUG && _d('Looks like an OK packet but session has no cmd');
4255         }
4256      }
4257      elsif ( $first_byte eq 'ff' ) {
4258         my $error = parse_error_packet($data);
4259         if ( !$error ) {
4260            $self->fail_session($session, 'failed to parse error packet');
4261            return;
4262         }
4263         my $event;
4264
4265         if (   $session->{state} eq 'client_auth'
4266             || $session->{state} eq 'server_handshake' ) {
4267            PTDEBUG && _d('Connection failed');
4268            $event = {
4269               cmd      => 'Admin',
4270               arg      => 'administrator command: Connect',
4271               ts       => $packet->{ts},
4272               Error_no => $error->{errno},
4273            };
4274            $session->{attribs}->{Error_msg} = $error->{message};
4275            $session->{closed} = 1;  # delete session when done
4276            return $self->_make_event($event, $packet, $session);
4277         }
4278         elsif ( $session->{cmd} ) {
4279            my $com = $session->{cmd}->{cmd};
4280            my $arg;
4281
4282            if ( $com eq COM_QUERY || $com eq COM_STMT_EXECUTE ) {
4283               $com = 'Query';
4284               $arg = $session->{cmd}->{arg};
4285            }
4286            else {
4287               $arg = 'administrator command: '
4288                    . ucfirst(lc(substr($com_for{$com}, 4)));
4289               $com = 'Admin';
4290            }
4291
4292            $event = {
4293               cmd => $com,
4294               arg => $arg,
4295               ts  => $packet->{ts},
4296            };
4297            if ( $error->{errno} ) {
4298               $event->{Error_no} = $error->{errno};
4299            }
4300            $session->{attribs}->{Error_msg} = $error->{message};
4301            return $self->_make_event($event, $packet, $session);
4302         }
4303         else {
4304            PTDEBUG && _d('Looks like an error packet but client is not '
4305               . 'authenticating and session has no cmd');
4306         }
4307      }
4308      elsif ( $first_byte eq 'fe' && $packet->{mysql_data_len} < 9 ) {
4309         if ( $packet->{mysql_data_len} == 1
4310              && $session->{state} eq 'client_auth'
4311              && $packet->{number} == 2 )
4312         {
4313            PTDEBUG && _d('Server has old password table;',
4314               'client will resend password using old algorithm');
4315            $session->{state} = 'client_auth_resend';
4316         }
4317         else {
4318            PTDEBUG && _d('Got an EOF packet');
4319            $self->fail_session($session, 'got an unexpected EOF packet');
4320         }
4321      }
4322      else {
4323         if ( $session->{cmd} ) {
4324            PTDEBUG && _d('Got a row/field/result packet');
4325            my $com = $session->{cmd}->{cmd};
4326            PTDEBUG && _d('Responding to client', $com_for{$com});
4327            my $event = { ts  => $packet->{ts} };
4328            if ( $com eq COM_QUERY || $com eq COM_STMT_EXECUTE ) {
4329               $event->{cmd} = 'Query';
4330               $event->{arg} = $session->{cmd}->{arg};
4331            }
4332            else {
4333               $event->{arg} = 'administrator command: '
4334                    . ucfirst(lc(substr($com_for{$com}, 4)));
4335               $event->{cmd} = 'Admin';
4336            }
4337
4338            if ( $packet->{complete} ) {
4339               my ( $warning_count, $status_flags )
4340                  = $data =~ m/fe(.{4})(.{4})\Z/;
4341               if ( $warning_count ) {
4342                  $event->{Warnings} = to_num($warning_count);
4343                  my $flags = to_num($status_flags); # TODO set all flags?
4344                  $event->{No_good_index_used}
4345                     = $flags & SERVER_QUERY_NO_GOOD_INDEX_USED ? 1 : 0;
4346                  $event->{No_index_used}
4347                     = $flags & SERVER_QUERY_NO_INDEX_USED ? 1 : 0;
4348               }
4349            }
4350
4351            return $self->_make_event($event, $packet, $session);
4352         }
4353         else {
4354            PTDEBUG && _d('Unknown in-stream server response');
4355         }
4356      }
4357   }
4358
4359   return;
4360}
4361
4362sub _packet_from_client {
4363   my ( $self, $packet, $session, $misc ) = @_;
4364   die "I need a packet"  unless $packet;
4365   die "I need a session" unless $session;
4366
4367   PTDEBUG && _d('Packet is from client; state:', $session->{state});
4368
4369   if ( ($session->{client_seq} || '') eq $packet->{seq} ) {
4370      push @{ $session->{client_retransmissions} }, $packet->{seq};
4371      PTDEBUG && _d('TCP retransmission');
4372      return;
4373   }
4374   $session->{client_seq} = $packet->{seq};
4375
4376   my $data  = $packet->{data};
4377   my $ts    = $packet->{ts};
4378
4379   if ( ($session->{state} || '') eq 'server_handshake' ) {
4380      PTDEBUG && _d('Expecting client authentication packet');
4381      my $handshake = parse_client_handshake_packet($data);
4382      if ( !$handshake ) {
4383         $self->fail_session($session, 'failed to parse client handshake');
4384         return;
4385      }
4386      $session->{state}         = 'client_auth';
4387      $session->{pos_in_log}    = $packet->{pos_in_log};
4388      $session->{user}          = $handshake->{user};
4389      $session->{db}            = $handshake->{db};
4390
4391      $session->{will_compress} = $handshake->{flags}->{CLIENT_COMPRESS};
4392   }
4393   elsif ( ($session->{state} || '') eq 'client_auth_resend' ) {
4394      PTDEBUG && _d('Client resending password using old algorithm');
4395      $session->{state} = 'client_auth';
4396   }
4397   elsif ( ($session->{state} || '') eq 'awaiting_reply' ) {
4398      my $arg = $session->{cmd}->{arg} ? substr($session->{cmd}->{arg}, 0, 50)
4399              : 'unknown';
4400      PTDEBUG && _d('More data for previous command:', $arg, '...');
4401      return;
4402   }
4403   else {
4404      if ( $packet->{number} != 0 ) {
4405         $self->fail_session($session, 'client cmd not packet 0');
4406         return;
4407      }
4408
4409      if ( !defined $session->{compress} ) {
4410         return unless $self->detect_compression($packet, $session);
4411         $data = $packet->{data};
4412      }
4413
4414      my $com = parse_com_packet($data, $packet->{mysql_data_len});
4415      if ( !$com ) {
4416         $self->fail_session($session, 'failed to parse COM packet');
4417         return;
4418      }
4419
4420      if ( $com->{code} eq COM_STMT_EXECUTE ) {
4421         PTDEBUG && _d('Execute prepared statement');
4422         my $exec = parse_execute_packet($com->{data}, $session->{sths});
4423         if ( !$exec ) {
4424            PTDEBUG && _d('Failed to parse execute packet');
4425            $session->{state} = undef;
4426            return;
4427         }
4428         $com->{data} = $exec->{arg};
4429         $session->{attribs}->{Statement_id} = $exec->{sth_id};
4430      }
4431      elsif ( $com->{code} eq COM_STMT_RESET ) {
4432         my $sth_id = get_sth_id($com->{data});
4433         if ( !$sth_id ) {
4434            $self->fail_session($session,
4435               'failed to parse prepared statement reset packet');
4436            return;
4437         }
4438         $com->{data} = "RESET $sth_id";
4439         $session->{attribs}->{Statement_id} = $sth_id;
4440      }
4441
4442      $session->{state}      = 'awaiting_reply';
4443      $session->{pos_in_log} = $packet->{pos_in_log};
4444      $session->{ts}         = $ts;
4445      $session->{cmd}        = {
4446         cmd => $com->{code},
4447         arg => $com->{data},
4448      };
4449
4450      if ( $com->{code} eq COM_QUIT ) { # Fire right away; will cleanup later.
4451         PTDEBUG && _d('Got a COM_QUIT');
4452
4453         $session->{closed} = 1;  # delete session when done
4454
4455         return $self->_make_event(
4456            {  cmd       => 'Admin',
4457               arg       => 'administrator command: Quit',
4458               ts        => $ts,
4459            },
4460            $packet, $session
4461         );
4462      }
4463      elsif ( $com->{code} eq COM_STMT_CLOSE ) {
4464         my $sth_id = get_sth_id($com->{data});
4465         if ( !$sth_id ) {
4466            $self->fail_session($session,
4467               'failed to parse prepared statement close packet');
4468            return;
4469         }
4470         delete $session->{sths}->{$sth_id};
4471         return $self->_make_event(
4472            {  cmd       => 'Query',
4473               arg       => "DEALLOCATE PREPARE $sth_id",
4474               ts        => $ts,
4475            },
4476            $packet, $session
4477         );
4478      }
4479   }
4480
4481   return;
4482}
4483
4484sub _make_event {
4485   my ( $self, $event, $packet, $session ) = @_;
4486   PTDEBUG && _d('Making event');
4487
4488   $session->{raw_packets}  = [];
4489   $self->_delete_buff($session);
4490
4491   if ( !$session->{thread_id} ) {
4492      PTDEBUG && _d('Giving session fake thread id', $self->{fake_thread_id});
4493      $session->{thread_id} = $self->{fake_thread_id}++;
4494   }
4495
4496   my ($host, $port) = $session->{client} =~ m/((?:\d+\.){3}\d+)\:(\w+)/;
4497   my $new_event = {
4498      cmd        => $event->{cmd},
4499      arg        => $event->{arg},
4500      bytes      => length( $event->{arg} ),
4501      ts         => tcp_timestamp( $event->{ts} ),
4502      host       => $host,
4503      ip         => $host,
4504      port       => $port,
4505      db         => $session->{db},
4506      user       => $session->{user},
4507      Thread_id  => $session->{thread_id},
4508      pos_in_log => $session->{pos_in_log},
4509      Query_time => timestamp_diff($session->{ts}, $packet->{ts}),
4510      Rows_affected      => ($event->{Rows_affected} || 0),
4511      Warning_count      => ($event->{Warning_count} || 0),
4512      No_good_index_used => ($event->{No_good_index_used} ? 'Yes' : 'No'),
4513      No_index_used      => ($event->{No_index_used}      ? 'Yes' : 'No'),
4514   };
4515   @{$new_event}{keys %{$session->{attribs}}} = values %{$session->{attribs}};
4516   foreach my $opt_attrib ( qw(Error_no) ) {
4517      if ( defined $event->{$opt_attrib} ) {
4518         $new_event->{$opt_attrib} = $event->{$opt_attrib};
4519      }
4520   }
4521   PTDEBUG && _d('Properties of event:', Dumper($new_event));
4522
4523   delete $session->{cmd};
4524
4525   $session->{state} = undef;
4526
4527   $session->{attribs} = {};
4528
4529   $session->{n_queries}++;
4530   $session->{server_retransmissions} = [];
4531   $session->{client_retransmissions} = [];
4532
4533   return $new_event;
4534}
4535
4536sub tcp_timestamp {
4537   my ( $ts ) = @_;
4538   $ts =~ s/^\d\d(\d\d)-(\d\d)-(\d\d)/$1$2$3/;
4539   return $ts;
4540}
4541
4542sub timestamp_diff {
4543   my ( $start, $end ) = @_;
4544   my $sd = substr($start, 0, 11, '');
4545   my $ed = substr($end,   0, 11, '');
4546   my ( $sh, $sm, $ss ) = split(/:/, $start);
4547   my ( $eh, $em, $es ) = split(/:/, $end);
4548   my $esecs = ($eh * 3600 + $em * 60 + $es);
4549   my $ssecs = ($sh * 3600 + $sm * 60 + $ss);
4550   if ( $sd eq $ed ) {
4551      return sprintf '%.6f', $esecs - $ssecs;
4552   }
4553   else { # Assume only one day boundary has been crossed, no DST, etc
4554      return sprintf '%.6f', ( 86_400 - $ssecs ) + $esecs;
4555   }
4556}
4557
4558sub to_string {
4559   my ( $data ) = @_;
4560   return pack('H*', $data);
4561}
4562
4563sub unpack_string {
4564   my ( $data ) = @_;
4565   my $len        = 0;
4566   my $encode_len = 0;
4567   ($data, $len, $encode_len) = decode_len($data);
4568   my $t = 'H' . ($len ? $len * 2 : '*');
4569   $data = pack($t, $data);
4570   return "\"$data\"", $encode_len + $len;
4571}
4572
4573sub decode_len {
4574   my ( $data ) = @_;
4575   return unless $data;
4576
4577   my $first_byte = to_num(substr($data, 0, 2, ''));
4578
4579   my $len;
4580   my $encode_len;
4581   if ( $first_byte <= 251 ) {
4582      $len        = $first_byte;
4583      $encode_len = 1;
4584   }
4585   elsif ( $first_byte == 252 ) {
4586      $len        = to_num(substr($data, 4, ''));
4587      $encode_len = 2;
4588   }
4589   elsif ( $first_byte == 253 ) {
4590      $len        = to_num(substr($data, 6, ''));
4591      $encode_len = 3;
4592   }
4593   elsif ( $first_byte == 254 ) {
4594      $len        = to_num(substr($data, 16, ''));
4595      $encode_len = 8;
4596   }
4597   else {
4598      PTDEBUG && _d('data:', $data, 'first byte:', $first_byte);
4599      die "Invalid length encoded byte: $first_byte";
4600   }
4601
4602   PTDEBUG && _d('len:', $len, 'encode len', $encode_len);
4603   return $data, $len, $encode_len;
4604}
4605
4606sub to_num {
4607   my ( $str, $len ) = @_;
4608   if ( $len ) {
4609      $str = substr($str, 0, $len * 2);
4610   }
4611   my @bytes = $str =~ m/(..)/g;
4612   my $result = 0;
4613   foreach my $i ( 0 .. $#bytes ) {
4614      $result += hex($bytes[$i]) * (16 ** ($i * 2));
4615   }
4616   return $result;
4617}
4618
4619sub to_double {
4620   my ( $str ) = @_;
4621   return unpack('d', pack('H*', $str));
4622}
4623
4624sub get_lcb {
4625   my ( $string ) = @_;
4626   my $first_byte = hex(substr($$string, 0, 2, ''));
4627   if ( $first_byte < 251 ) {
4628      return $first_byte;
4629   }
4630   elsif ( $first_byte == 252 ) {
4631      return to_num(substr($$string, 0, 4, ''));
4632   }
4633   elsif ( $first_byte == 253 ) {
4634      return to_num(substr($$string, 0, 6, ''));
4635   }
4636   elsif ( $first_byte == 254 ) {
4637      return to_num(substr($$string, 0, 16, ''));
4638   }
4639}
4640
4641sub parse_error_packet {
4642   my ( $data ) = @_;
4643   return unless $data;
4644   PTDEBUG && _d('ERROR data:', $data);
4645   if ( length $data < 16 ) {
4646      PTDEBUG && _d('Error packet is too short:', $data);
4647      return;
4648   }
4649   my $errno    = to_num(substr($data, 0, 4));
4650   my $marker   = to_string(substr($data, 4, 2));
4651   my $sqlstate = '';
4652   my $message  = '';
4653   if ( $marker eq '#' ) {
4654      $sqlstate = to_string(substr($data, 6, 10));
4655      $message  = to_string(substr($data, 16));
4656   }
4657   else {
4658      $marker  = '';
4659      $message = to_string(substr($data, 4));
4660   }
4661   return unless $message;
4662   my $pkt = {
4663      errno    => $errno,
4664      sqlstate => $marker . $sqlstate,
4665      message  => $message,
4666   };
4667   PTDEBUG && _d('Error packet:', Dumper($pkt));
4668   return $pkt;
4669}
4670
4671sub parse_ok_packet {
4672   my ( $data ) = @_;
4673   return unless $data;
4674   PTDEBUG && _d('OK data:', $data);
4675   if ( length $data < 12 ) {
4676      PTDEBUG && _d('OK packet is too short:', $data);
4677      return;
4678   }
4679   my $affected_rows = get_lcb(\$data);
4680   my $insert_id     = get_lcb(\$data);
4681   my $status        = to_num(substr($data, 0, 4, ''));
4682   my $warnings      = to_num(substr($data, 0, 4, ''));
4683   my $message       = to_string($data);
4684   my $pkt = {
4685      affected_rows => $affected_rows,
4686      insert_id     => $insert_id,
4687      status        => $status,
4688      warnings      => $warnings,
4689      message       => $message,
4690   };
4691   PTDEBUG && _d('OK packet:', Dumper($pkt));
4692   return $pkt;
4693}
4694
4695sub parse_ok_prepared_statement_packet {
4696   my ( $data ) = @_;
4697   return unless $data;
4698   PTDEBUG && _d('OK prepared statement data:', $data);
4699   if ( length $data < 8 ) {
4700      PTDEBUG && _d('OK prepared statement packet is too short:', $data);
4701      return;
4702   }
4703   my $sth_id     = to_num(substr($data, 0, 8, ''));
4704   my $num_cols   = to_num(substr($data, 0, 4, ''));
4705   my $num_params = to_num(substr($data, 0, 4, ''));
4706   my $pkt = {
4707      sth_id     => $sth_id,
4708      num_cols   => $num_cols,
4709      num_params => $num_params,
4710   };
4711   PTDEBUG && _d('OK prepared packet:', Dumper($pkt));
4712   return $pkt;
4713}
4714
4715sub parse_server_handshake_packet {
4716   my ( $data ) = @_;
4717   return unless $data;
4718   PTDEBUG && _d('Server handshake data:', $data);
4719   my $handshake_pattern = qr{
4720      ^                 # -----                ----
4721      (.+?)00           # n Null-Term String   server_version
4722      (.{8})            # 4                    thread_id
4723      .{16}             # 8                    scramble_buff
4724      .{2}              # 1                    filler: always 0x00
4725      (.{4})            # 2                    server_capabilities
4726      .{2}              # 1                    server_language
4727      .{4}              # 2                    server_status
4728      .{26}             # 13                   filler: always 0x00
4729   }x;
4730   my ( $server_version, $thread_id, $flags ) = $data =~ m/$handshake_pattern/;
4731   my $pkt = {
4732      server_version => to_string($server_version),
4733      thread_id      => to_num($thread_id),
4734      flags          => parse_flags($flags),
4735   };
4736   PTDEBUG && _d('Server handshake packet:', Dumper($pkt));
4737   return $pkt;
4738}
4739
4740sub parse_client_handshake_packet {
4741   my ( $data ) = @_;
4742   return unless $data;
4743   PTDEBUG && _d('Client handshake data:', $data);
4744   my ( $flags, $user, $buff_len ) = $data =~ m{
4745      ^
4746      (.{8})         # Client flags
4747      .{10}          # Max packet size, charset
4748      (?:00){23}     # Filler
4749      ((?:..)+?)00   # Null-terminated user name
4750      (..)           # Length-coding byte for scramble buff
4751   }x;
4752
4753   if ( !$buff_len ) {
4754      PTDEBUG && _d('Did not match client handshake packet');
4755      return;
4756   }
4757
4758   my $code_len = hex($buff_len);
4759   my $db;
4760
4761   my $capability_flags = to_num($flags); # $flags is stored as little endian.
4762
4763   if ($capability_flags & $flag_for{CLIENT_CONNECT_WITH_DB}) {
4764      ( $db ) = $data =~ m!
4765         ^.{64}${user}00..   # Everything matched before
4766         (?:..){$code_len}   # The scramble buffer
4767         (.*?)00.*\Z         # The database name
4768      !x;
4769   }
4770
4771   my $pkt = {
4772      user  => to_string($user),
4773      db    => $db ? to_string($db) : '',
4774      flags => parse_flags($flags),
4775   };
4776   PTDEBUG && _d('Client handshake packet:', Dumper($pkt));
4777   return $pkt;
4778}
4779
4780sub parse_com_packet {
4781   my ( $data, $len ) = @_;
4782   return unless $data && $len;
4783   PTDEBUG && _d('COM data:',
4784      (substr($data, 0, 100).(length $data > 100 ? '...' : '')),
4785      'len:', $len);
4786   my $code = substr($data, 0, 2);
4787   my $com  = $com_for{$code};
4788   if ( !$com ) {
4789      PTDEBUG && _d('Did not match COM packet');
4790      return;
4791   }
4792   if (    $code ne COM_STMT_EXECUTE
4793        && $code ne COM_STMT_CLOSE
4794        && $code ne COM_STMT_RESET )
4795   {
4796      $data = to_string(substr($data, 2, ($len - 1) * 2));
4797   }
4798   my $pkt = {
4799      code => $code,
4800      com  => $com,
4801      data => $data,
4802   };
4803   PTDEBUG && _d('COM packet:', Dumper($pkt));
4804   return $pkt;
4805}
4806
4807sub parse_execute_packet {
4808   my ( $data, $sths ) = @_;
4809   return unless $data && $sths;
4810
4811   my $sth_id = to_num(substr($data, 2, 8));
4812   return unless defined $sth_id;
4813
4814   my $sth = $sths->{$sth_id};
4815   if ( !$sth ) {
4816      PTDEBUG && _d('Skipping unknown statement handle', $sth_id);
4817      return;
4818   }
4819   my $null_count  = int(($sth->{num_params} + 7) / 8) || 1;
4820   my $null_bitmap = to_num(substr($data, 20, $null_count * 2));
4821   PTDEBUG && _d('NULL bitmap:', $null_bitmap, 'count:', $null_count);
4822
4823   substr($data, 0, 20 + ($null_count * 2), '');
4824
4825   my $new_params = to_num(substr($data, 0, 2, ''));
4826   my @types;
4827   if ( $new_params ) {
4828      PTDEBUG && _d('New param types');
4829      for my $i ( 0..($sth->{num_params}-1) ) {
4830         my $type = to_num(substr($data, 0, 4, ''));
4831         push @types, $type_for{$type};
4832         PTDEBUG && _d('Param', $i, 'type:', $type, $type_for{$type});
4833      }
4834      $sth->{types} = \@types;
4835   }
4836   else {
4837      @types = @{$sth->{types}} if $data;
4838   }
4839
4840
4841   my $arg  = $sth->{statement};
4842   PTDEBUG && _d('Statement:', $arg);
4843   for my $i ( 0..($sth->{num_params}-1) ) {
4844      my $val;
4845      my $len;  # in bytes
4846      if ( $null_bitmap & (2**$i) ) {
4847         PTDEBUG && _d('Param', $i, 'is NULL (bitmap)');
4848         $val = 'NULL';
4849         $len = 0;
4850      }
4851      else {
4852         if ( $unpack_type{$types[$i]} ) {
4853            ($val, $len) = $unpack_type{$types[$i]}->($data);
4854         }
4855         else {
4856            PTDEBUG && _d('No handler for param', $i, 'type', $types[$i]);
4857            $val = '?';
4858            $len = 0;
4859         }
4860      }
4861
4862      PTDEBUG && _d('Param', $i, 'val:', $val);
4863      $arg =~ s/\?/$val/;
4864
4865      substr($data, 0, $len * 2, '') if $len;
4866   }
4867
4868   my $pkt = {
4869      sth_id => $sth_id,
4870      arg    => "EXECUTE $arg",
4871   };
4872   PTDEBUG && _d('Execute packet:', Dumper($pkt));
4873   return $pkt;
4874}
4875
4876sub get_sth_id {
4877   my ( $data ) = @_;
4878   return unless $data;
4879   my $sth_id = to_num(substr($data, 2, 8));
4880   return $sth_id;
4881}
4882
4883sub parse_flags {
4884   my ( $flags ) = @_;
4885   die "I need flags" unless $flags;
4886   PTDEBUG && _d('Flag data:', $flags);
4887   my %flags     = %flag_for;
4888   my $flags_dec = to_num($flags);
4889   foreach my $flag ( keys %flag_for ) {
4890      my $flagno    = $flag_for{$flag};
4891      $flags{$flag} = ($flags_dec & $flagno ? 1 : 0);
4892   }
4893   return \%flags;
4894}
4895
4896sub uncompress_data {
4897   my ( $data, $len ) = @_;
4898   die "I need data" unless $data;
4899   die "I need a len argument" unless $len;
4900   die "I need a scalar reference to data" unless ref $data eq 'SCALAR';
4901   PTDEBUG && _d('Uncompressing data');
4902   our $InflateError;
4903
4904   my $comp_bin_data = pack('H*', $$data);
4905
4906   my $uncomp_bin_data = '';
4907   my $z = new IO::Uncompress::Inflate(
4908      \$comp_bin_data
4909   ) or die "IO::Uncompress::Inflate failed: $InflateError";
4910   my $status = $z->read(\$uncomp_bin_data, $len)
4911      or die "IO::Uncompress::Inflate failed: $InflateError";
4912
4913   my $uncomp_data = unpack('H*', $uncomp_bin_data);
4914
4915   return \$uncomp_data;
4916}
4917
4918sub detect_compression {
4919   my ( $self, $packet, $session ) = @_;
4920   PTDEBUG && _d('Checking for client compression');
4921   my $com = parse_com_packet($packet->{data}, $packet->{mysql_data_len});
4922   if ( $com && $com->{code} eq COM_SLEEP ) {
4923      PTDEBUG && _d('Client is using compression');
4924      $session->{compress} = 1;
4925
4926      $packet->{data} = $packet->{mysql_hdr} . $packet->{data};
4927      return 0 unless $self->uncompress_packet($packet, $session);
4928      remove_mysql_header($packet);
4929   }
4930   else {
4931      PTDEBUG && _d('Client is NOT using compression');
4932      $session->{compress} = 0;
4933   }
4934   return 1;
4935}
4936
4937sub uncompress_packet {
4938   my ( $self, $packet, $session ) = @_;
4939   die "I need a packet"  unless $packet;
4940   die "I need a session" unless $session;
4941
4942
4943   my $data;
4944   my $comp_hdr;
4945   my $comp_data_len;
4946   my $pkt_num;
4947   my $uncomp_data_len;
4948   eval {
4949      $data            = \$packet->{data};
4950      $comp_hdr        = substr($$data, 0, 14, '');
4951      $comp_data_len   = to_num(substr($comp_hdr, 0, 6));
4952      $pkt_num         = to_num(substr($comp_hdr, 6, 2));
4953      $uncomp_data_len = to_num(substr($comp_hdr, 8, 6));
4954      PTDEBUG && _d('Compression header data:', $comp_hdr,
4955         'compressed data len (bytes)', $comp_data_len,
4956         'number', $pkt_num,
4957         'uncompressed data len (bytes)', $uncomp_data_len);
4958   };
4959   if ( $EVAL_ERROR ) {
4960      $session->{EVAL_ERROR} = $EVAL_ERROR;
4961      $self->fail_session($session, 'failed to parse compression header');
4962      return 0;
4963   }
4964
4965   if ( $uncomp_data_len ) {
4966      eval {
4967         $data = uncompress_data($data, $uncomp_data_len);
4968         $packet->{data} = $$data;
4969      };
4970      if ( $EVAL_ERROR ) {
4971         $session->{EVAL_ERROR} = $EVAL_ERROR;
4972         $self->fail_session($session, 'failed to uncompress data');
4973         die "Cannot uncompress packet.  Check that IO::Uncompress::Inflate "
4974            . "is installed.\nError: $EVAL_ERROR";
4975      }
4976   }
4977   else {
4978      PTDEBUG && _d('Packet is not really compressed');
4979      $packet->{data} = $$data;
4980   }
4981
4982   return 1;
4983}
4984
4985sub remove_mysql_header {
4986   my ( $packet ) = @_;
4987   die "I need a packet" unless $packet;
4988
4989   my $mysql_hdr      = substr($packet->{data}, 0, 8, '');
4990   my $mysql_data_len = to_num(substr($mysql_hdr, 0, 6));
4991   my $pkt_num        = to_num(substr($mysql_hdr, 6, 2));
4992   PTDEBUG && _d('MySQL packet: header data', $mysql_hdr,
4993      'data len (bytes)', $mysql_data_len, 'number', $pkt_num);
4994
4995   $packet->{mysql_hdr}      = $mysql_hdr;
4996   $packet->{mysql_data_len} = $mysql_data_len;
4997   $packet->{number}         = $pkt_num;
4998
4999   return;
5000}
5001
5002sub _delete_buff {
5003   my ( $self, $session ) = @_;
5004   map { delete $session->{$_} } qw(buff buff_left mysql_data_len);
5005   return;
5006}
5007
5008sub _d {
5009   my ($package, undef, $line) = caller 0;
5010   @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
5011        map { defined $_ ? $_ : 'undef' }
5012        @_;
5013   print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
5014}
5015
50161;
5017}
5018# ###########################################################################
5019# End MySQLProtocolParser package
5020# ###########################################################################
5021
5022# ###########################################################################
5023# SlowLogParser package
5024# This package is a copy without comments from the original.  The original
5025# with comments and its test file can be found in the Bazaar repository at,
5026#   lib/SlowLogParser.pm
5027#   t/lib/SlowLogParser.t
5028# See https://launchpad.net/percona-toolkit for more information.
5029# ###########################################################################
5030{
5031package SlowLogParser;
5032
5033use strict;
5034use warnings FATAL => 'all';
5035use English qw(-no_match_vars);
5036use constant PTDEBUG => $ENV{PTDEBUG} || 0;
5037
5038use Data::Dumper;
5039$Data::Dumper::Indent    = 1;
5040$Data::Dumper::Sortkeys  = 1;
5041$Data::Dumper::Quotekeys = 0;
5042
5043sub new {
5044   my ( $class ) = @_;
5045   my $self = {
5046      pending => [],
5047      last_event_offset => undef,
5048   };
5049   return bless $self, $class;
5050}
5051
5052my $slow_log_ts_line = qr/^# Time: ((?:[0-9: ]{15})|(?:[-0-9: T]{19}))/;
5053my $slow_log_uh_line = qr/# User\@Host: ([^\[]+|\[[^[]+\]).*?@ (\S*) \[(.*)\]\s*(?:Id:\s*(\d+))?/;
5054my $slow_log_hd_line = qr{
5055      ^(?:
5056      T[cC][pP]\s[pP]ort:\s+\d+ # case differs on windows/unix
5057      |
5058      [/A-Z].*mysqld,\sVersion.*(?:started\swith:|embedded\slibrary)
5059      |
5060      Time\s+Id\s+Command
5061      ).*\n
5062   }xm;
5063
5064sub parse_event {
5065   my ( $self, %args ) = @_;
5066   my @required_args = qw(next_event tell);
5067   foreach my $arg ( @required_args ) {
5068      die "I need a $arg argument" unless $args{$arg};
5069   }
5070   my ($next_event, $tell) = @args{@required_args};
5071
5072   my $pending = $self->{pending};
5073   local $INPUT_RECORD_SEPARATOR = ";\n#";
5074   my $trimlen    = length($INPUT_RECORD_SEPARATOR);
5075   my $pos_in_log = $tell->();
5076   my $stmt;
5077
5078   EVENT:
5079   while (
5080         defined($stmt = shift @$pending)
5081      or defined($stmt = $next_event->())
5082   ) {
5083      my @properties = ('cmd', 'Query', 'pos_in_log', $pos_in_log);
5084      $self->{last_event_offset} = $pos_in_log;
5085      $pos_in_log = $tell->();
5086
5087      if ( $stmt =~ s/$slow_log_hd_line//go ){ # Throw away header lines in log
5088         my @chunks = split(/$INPUT_RECORD_SEPARATOR/o, $stmt);
5089         if ( @chunks > 1 ) {
5090            PTDEBUG && _d("Found multiple chunks");
5091            $stmt = shift @chunks;
5092            unshift @$pending, @chunks;
5093         }
5094      }
5095
5096      $stmt = '#' . $stmt unless $stmt =~ m/\A#/;
5097      $stmt =~ s/;\n#?\Z//;
5098
5099
5100      my ($got_ts, $got_uh, $got_ac, $got_db, $got_set, $got_embed);
5101      my $pos = 0;
5102      my $len = length($stmt);
5103      my $found_arg = 0;
5104      LINE:
5105      while ( $stmt =~ m/^(.*)$/mg ) { # /g is important, requires scalar match.
5106         $pos     = pos($stmt);  # Be careful not to mess this up!
5107         my $line = $1;          # Necessary for /g and pos() to work.
5108         PTDEBUG && _d($line);
5109
5110         if ($line =~ m/^(?:#|use |SET (?:last_insert_id|insert_id|timestamp))/o) {
5111
5112            if ( !$got_ts && (my ( $time ) = $line =~ m/$slow_log_ts_line/o)) {
5113               PTDEBUG && _d("Got ts", $time);
5114               push @properties, 'ts', $time;
5115               ++$got_ts;
5116               if ( !$got_uh
5117                  && ( my ( $user, $host, $ip, $thread_id ) = $line =~ m/$slow_log_uh_line/o )
5118               ) {
5119                  PTDEBUG && _d("Got user, host, ip", $user, $host, $ip);
5120                  $host ||= $ip;  # sometimes host is missing when using skip-name-resolve (LP #issue 1262456)
5121                  push @properties, 'user', $user, 'host', $host, 'ip', $ip;
5122                  if ( $thread_id ) {
5123                     push @properties, 'Thread_id', $thread_id;
5124                 }
5125                 ++$got_uh;
5126               }
5127            }
5128
5129            elsif ( !$got_uh
5130                  && ( my ( $user, $host, $ip, $thread_id ) = $line =~ m/$slow_log_uh_line/o )
5131            ) {
5132                  PTDEBUG && _d("Got user, host, ip", $user, $host, $ip);
5133                  $host ||= $ip;  # sometimes host is missing when using skip-name-resolve (LP #issue 1262456)
5134                  push @properties, 'user', $user, 'host', $host, 'ip', $ip;
5135                  if ( $thread_id ) {
5136                     push @properties, 'Thread_id', $thread_id;
5137                 }
5138               ++$got_uh;
5139            }
5140
5141            elsif (!$got_ac && $line =~ m/^# (?:administrator command:.*)$/) {
5142               PTDEBUG && _d("Got admin command");
5143               $line =~ s/^#\s+//;  # string leading "# ".
5144               push @properties, 'cmd', 'Admin', 'arg', $line;
5145               push @properties, 'bytes', length($properties[-1]);
5146               ++$found_arg;
5147               ++$got_ac;
5148            }
5149
5150            elsif ( $line =~ m/^# +[A-Z][A-Za-z_]+: \S+/ ) { # Make the test cheap!
5151               PTDEBUG && _d("Got some line with properties");
5152
5153               if ( $line =~ m/Schema:\s+\w+: / ) {
5154                  PTDEBUG && _d('Removing empty Schema attrib');
5155                  $line =~ s/Schema:\s+//;
5156                  PTDEBUG && _d($line);
5157               }
5158
5159               my @temp = $line =~ m/(\w+):\s+(\S+|\Z)/g;
5160               push @properties, @temp;
5161            }
5162
5163            elsif ( !$got_db && (my ( $db ) = $line =~ m/^use ([^;]+)/ ) ) {
5164               PTDEBUG && _d("Got a default database:", $db);
5165               push @properties, 'db', $db;
5166               ++$got_db;
5167            }
5168
5169            elsif (!$got_set && (my ($setting) = $line =~ m/^SET\s+([^;]*)/)) {
5170               PTDEBUG && _d("Got some setting:", $setting);
5171               push @properties, split(/,|\s*=\s*/, $setting);
5172               ++$got_set;
5173            }
5174
5175            if ( !$found_arg && $pos == $len ) {
5176               PTDEBUG && _d("Did not find arg, looking for special cases");
5177               local $INPUT_RECORD_SEPARATOR = ";\n";  # get next line
5178               if ( defined(my $l = $next_event->()) ) {
5179                  if ( $l =~ /^\s*[A-Z][a-z_]+: / ) {
5180                     PTDEBUG && _d("Found NULL query before", $l);
5181                     local $INPUT_RECORD_SEPARATOR = ";\n#";
5182                     my $rest_of_event = $next_event->();
5183                     push @{$self->{pending}}, $l . $rest_of_event;
5184                     push @properties, 'cmd', 'Query', 'arg', '/* No query */';
5185                     push @properties, 'bytes', 0;
5186                     $found_arg++;
5187                  }
5188                  else {
5189                     chomp $l;
5190                     $l =~ s/^\s+//;
5191                     PTDEBUG && _d("Found admin statement", $l);
5192                     push @properties, 'cmd', 'Admin', 'arg', $l;
5193                     push @properties, 'bytes', length($properties[-1]);
5194                     $found_arg++;
5195                  }
5196               }
5197               else {
5198                  PTDEBUG && _d("I can't figure out what to do with this line");
5199                  next EVENT;
5200               }
5201            }
5202         }
5203         else {
5204            PTDEBUG && _d("Got the query/arg line");
5205            my $arg = substr($stmt, $pos - length($line));
5206            push @properties, 'arg', $arg, 'bytes', length($arg);
5207            if ( $args{misc} && $args{misc}->{embed}
5208               && ( my ($e) = $arg =~ m/($args{misc}->{embed})/)
5209            ) {
5210               push @properties, $e =~ m/$args{misc}->{capture}/g;
5211            }
5212            last LINE;
5213         }
5214      }
5215
5216      PTDEBUG && _d('Properties of event:', Dumper(\@properties));
5217      my $event = { @properties };
5218      if ( !$event->{arg} ) {
5219         PTDEBUG && _d('Partial event, no arg');
5220      }
5221      else {
5222         $self->{last_event_offset} = undef;
5223         if ( $args{stats} ) {
5224            $args{stats}->{events_read}++;
5225            $args{stats}->{events_parsed}++;
5226         }
5227      }
5228      return $event;
5229   } # EVENT
5230
5231   @$pending = ();
5232   $args{oktorun}->(0) if $args{oktorun};
5233   return;
5234}
5235
5236sub _d {
5237   my ($package, undef, $line) = caller 0;
5238   @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
5239        map { defined $_ ? $_ : 'undef' }
5240        @_;
5241   print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
5242}
5243
52441;
5245}
5246# ###########################################################################
5247# End SlowLogParser package
5248# ###########################################################################
5249
5250# ###########################################################################
5251# SlowLogWriter package
5252# This package is a copy without comments from the original.  The original
5253# with comments and its test file can be found in the Bazaar repository at,
5254#   lib/SlowLogWriter.pm
5255#   t/lib/SlowLogWriter.t
5256# See https://launchpad.net/percona-toolkit for more information.
5257# ###########################################################################
5258{
5259package SlowLogWriter;
5260
5261use strict;
5262use warnings FATAL => 'all';
5263use English qw(-no_match_vars);
5264use constant PTDEBUG => $ENV{PTDEBUG} || 0;
5265
5266sub new {
5267   my ( $class ) = @_;
5268   bless {}, $class;
5269}
5270
5271sub write {
5272   my ( $self, $fh, $event, $field ) = @_;
5273   if ( $event->{ts} ) {
5274      print $fh "# Time: $event->{ts}\n";
5275   }
5276   if ( $event->{user} ) {
5277      printf $fh "# User\@Host: %s[%s] \@ %s []\n",
5278         $event->{user}, $event->{user}, $event->{host};
5279   }
5280   if ( $event->{ip} && $event->{port} ) {
5281      printf $fh "# Client: $event->{ip}:$event->{port}\n";
5282   }
5283   if ( $event->{Thread_id} ) {
5284      printf $fh "# Thread_id: $event->{Thread_id}\n";
5285   }
5286
5287   my $percona_patched = exists $event->{QC_Hit} ? 1 : 0;
5288
5289   printf $fh
5290      "# Query_time: %.6f  Lock_time: %.6f  Rows_sent: %d  Rows_examined: %d\n",
5291      map { $_ || 0 }
5292         @{$event}{qw(Query_time Lock_time Rows_sent Rows_examined)};
5293
5294   if ( $percona_patched ) {
5295      printf $fh
5296         "# QC_Hit: %s  Full_scan: %s  Full_join: %s  Tmp_table: %s  Tmp_table_on_disk: %s\n# Filesort: %s  Filesort_on_disk: %s  Merge_passes: %d\n",
5297         map { $_ || 0 }
5298            @{$event}{qw(QC_Hit Full_scan Full_join Tmp_table Tmp_table_on_disk Filesort Filesort_on_disk Merge_passes)};
5299
5300      if ( exists $event->{InnoDB_IO_r_ops} ) {
5301         printf $fh
5302            "#   InnoDB_IO_r_ops: %d  InnoDB_IO_r_bytes: %d  InnoDB_IO_r_wait: %s\n#   InnoDB_rec_lock_wait: %s  InnoDB_queue_wait: %s\n#   InnoDB_pages_distinct: %d\n",
5303            map { $_ || 0 }
5304               @{$event}{qw(InnoDB_IO_r_ops InnoDB_IO_r_bytes InnoDB_IO_r_wait InnoDB_rec_lock_wait InnoDB_queue_wait InnoDB_pages_distinct)};
5305
5306      }
5307      else {
5308         printf $fh "# No InnoDB statistics available for this query\n";
5309      }
5310   }
5311
5312   if ( $event->{db} ) {
5313      printf $fh "use %s;\n", $event->{db};
5314   }
5315   if ( $event->{arg} =~ m/^administrator command/ ) {
5316      print $fh '# ';
5317   }
5318   if ($field && $event->{$field}) {
5319       print $fh $event->{$field}, ";\n";
5320   } else {
5321       print $fh $event->{arg}, ";\n";
5322   }
5323
5324   return;
5325}
5326
5327sub _d {
5328   my ($package, undef, $line) = caller 0;
5329   @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
5330        map { defined $_ ? $_ : 'undef' }
5331        @_;
5332   print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
5333}
5334
53351;
5336}
5337# ###########################################################################
5338# End SlowLogWriter package
5339# ###########################################################################
5340
5341# ###########################################################################
5342# EventAggregator package
5343# This package is a copy without comments from the original.  The original
5344# with comments and its test file can be found in the Bazaar repository at,
5345#   lib/EventAggregator.pm
5346#   t/lib/EventAggregator.t
5347# See https://launchpad.net/percona-toolkit for more information.
5348# ###########################################################################
5349{
5350package EventAggregator;
5351
5352use strict;
5353use warnings FATAL => 'all';
5354use English qw(-no_match_vars);
5355use constant PTDEBUG => $ENV{PTDEBUG} || 0;
5356
5357use List::Util qw(min max);
5358use Data::Dumper;
5359$Data::Dumper::Indent    = 1;
5360$Data::Dumper::Sortkeys  = 1;
5361$Data::Dumper::Quotekeys = 0;
5362
5363use Digest::MD5 qw(md5);
5364
5365use constant BUCK_SIZE   => 1.05;
5366use constant BASE_LOG    => log(BUCK_SIZE);
5367use constant BASE_OFFSET => abs(1 - log(0.000001) / BASE_LOG); # 284.1617969
5368use constant NUM_BUCK    => 1000;
5369use constant MIN_BUCK    => .000001;
5370
5371my @buck_vals = map { bucket_value($_); } (0..NUM_BUCK-1);
5372
5373sub new {
5374   my ( $class, %args ) = @_;
5375   foreach my $arg ( qw(groupby worst) ) {
5376      die "I need a $arg argument" unless $args{$arg};
5377   }
5378   my $attributes = $args{attributes} || {};
5379   my $self = {
5380      groupby        => $args{groupby},
5381      detect_attribs => scalar keys %$attributes == 0 ? 1 : 0,
5382      all_attribs    => [ keys %$attributes ],
5383      ignore_attribs => {
5384         map  { $_ => $args{attributes}->{$_} }
5385         grep { $_ ne $args{groupby} }
5386         @{$args{ignore_attributes}}
5387      },
5388      attributes     => {
5389         map  { $_ => $args{attributes}->{$_} }
5390         grep { $_ ne $args{groupby} }
5391         keys %$attributes
5392      },
5393      alt_attribs    => {
5394         map  { $_ => make_alt_attrib(@{$args{attributes}->{$_}}) }
5395         grep { $_ ne $args{groupby} }
5396         keys %$attributes
5397      },
5398      worst          => $args{worst},
5399      unroll_limit   => $ENV{PT_QUERY_DIGEST_CHECK_ATTRIB_LIMIT} || 1000,
5400      attrib_limit   => $args{attrib_limit},
5401      result_classes => {},
5402      result_globals => {},
5403      result_samples => {},
5404      class_metrics  => {},
5405      global_metrics => {},
5406      n_events       => 0,
5407      unrolled_loops => undef,
5408      type_for       => { %{$args{type_for} || { Query_time => 'num' }} },
5409   };
5410   return bless $self, $class;
5411}
5412
5413sub reset_aggregated_data {
5414   my ( $self ) = @_;
5415   foreach my $class ( values %{$self->{result_classes}} ) {
5416      foreach my $attrib ( values %$class ) {
5417         delete @{$attrib}{keys %$attrib};
5418      }
5419   }
5420   foreach my $class ( values %{$self->{result_globals}} ) {
5421      delete @{$class}{keys %$class};
5422   }
5423   delete @{$self->{result_samples}}{keys %{$self->{result_samples}}};
5424   $self->{n_events} = 0;
5425}
5426
5427sub aggregate {
5428   my ( $self, $event ) = @_;
5429
5430   my $group_by = $event->{$self->{groupby}};
5431   return unless defined $group_by;
5432
5433   $self->{n_events}++;
5434   PTDEBUG && _d('Event', $self->{n_events});
5435
5436   return $self->{unrolled_loops}->($self, $event, $group_by)
5437      if $self->{unrolled_loops};
5438
5439   if ( $self->{n_events} <= $self->{unroll_limit} ) {
5440
5441      $self->add_new_attributes($event) if $self->{detect_attribs};
5442
5443      ATTRIB:
5444      foreach my $attrib ( keys %{$self->{attributes}} ) {
5445
5446         if ( !exists $event->{$attrib} ) {
5447            PTDEBUG && _d("attrib doesn't exist in event:", $attrib);
5448            my $alt_attrib = $self->{alt_attribs}->{$attrib}->($event);
5449            PTDEBUG && _d('alt attrib:', $alt_attrib);
5450            next ATTRIB unless $alt_attrib;
5451         }
5452
5453         GROUPBY:
5454         foreach my $val ( ref $group_by ? @$group_by : ($group_by) ) {
5455            my $class_attrib  = $self->{result_classes}->{$val}->{$attrib} ||= {};
5456            my $global_attrib = $self->{result_globals}->{$attrib} ||= {};
5457            my $samples       = $self->{result_samples};
5458            my $handler = $self->{handlers}->{ $attrib };
5459            if ( !$handler ) {
5460               $handler = $self->make_handler(
5461                  event      => $event,
5462                  attribute  => $attrib,
5463                  alternates => $self->{attributes}->{$attrib},
5464                  worst      => $self->{worst} eq $attrib,
5465               );
5466               $self->{handlers}->{$attrib} = $handler;
5467            }
5468            next GROUPBY unless $handler;
5469            $samples->{$val} ||= $event; # Initialize to the first event.
5470            $handler->($event, $class_attrib, $global_attrib, $samples, $group_by);
5471         }
5472      }
5473   }
5474   else {
5475      $self->_make_unrolled_loops($event);
5476      $self->{unrolled_loops}->($self, $event, $group_by);
5477   }
5478
5479   return;
5480}
5481
5482sub _make_unrolled_loops {
5483   my ( $self, $event ) = @_;
5484
5485   my $group_by = $event->{$self->{groupby}};
5486
5487   my @attrs   = grep { $self->{handlers}->{$_} } keys %{$self->{attributes}};
5488   my $globs   = $self->{result_globals}; # Global stats for each
5489   my $samples = $self->{result_samples};
5490
5491   my @lines = (
5492      'my ( $self, $event, $group_by ) = @_;',
5493      'my ($val, $class, $global, $idx);',
5494      (ref $group_by ? ('foreach my $group_by ( @$group_by ) {') : ()),
5495      'my $temp = $self->{result_classes}->{ $group_by }
5496         ||= { map { $_ => { } } @attrs };',
5497      '$samples->{$group_by} ||= $event;', # Always start with the first.
5498   );
5499   foreach my $i ( 0 .. $#attrs ) {
5500      push @lines, (
5501         '$class  = $temp->{\''  . $attrs[$i] . '\'};',
5502         '$global = $globs->{\'' . $attrs[$i] . '\'};',
5503         $self->{unrolled_for}->{$attrs[$i]},
5504      );
5505   }
5506   if ( ref $group_by ) {
5507      push @lines, '}'; # Close the loop opened above
5508   }
5509   @lines = map { s/^/   /gm; $_ } @lines; # Indent for debugging
5510   unshift @lines, 'sub {';
5511   push @lines, '}';
5512
5513   my $code = join("\n", @lines);
5514   PTDEBUG && _d('Unrolled subroutine:', @lines);
5515   my $sub = eval $code;
5516   die $EVAL_ERROR if $EVAL_ERROR;
5517   $self->{unrolled_loops} = $sub;
5518
5519   return;
5520}
5521
5522sub results {
5523   my ( $self ) = @_;
5524   return {
5525      classes => $self->{result_classes},
5526      globals => $self->{result_globals},
5527      samples => $self->{result_samples},
5528   };
5529}
5530
5531sub set_results {
5532   my ( $self, $results ) = @_;
5533   $self->{result_classes} = $results->{classes};
5534   $self->{result_globals} = $results->{globals};
5535   $self->{result_samples} = $results->{samples};
5536   return;
5537}
5538
5539sub stats {
5540   my ( $self ) = @_;
5541   return {
5542      classes => $self->{class_metrics},
5543      globals => $self->{global_metrics},
5544   };
5545}
5546
5547sub attributes {
5548   my ( $self ) = @_;
5549   return $self->{type_for};
5550}
5551
5552sub set_attribute_types {
5553   my ( $self, $attrib_types ) = @_;
5554   $self->{type_for} = $attrib_types;
5555   return;
5556}
5557
5558sub type_for {
5559   my ( $self, $attrib ) = @_;
5560   return $self->{type_for}->{$attrib};
5561}
5562
5563sub make_handler {
5564   my ( $self, %args ) = @_;
5565   my @required_args = qw(event attribute);
5566   foreach my $arg ( @required_args ) {
5567      die "I need a $arg argument" unless $args{$arg};
5568   }
5569   my ($event, $attrib) = @args{@required_args};
5570
5571   my $val;
5572   eval { $val= $self->_get_value(%args); };
5573   if ( $EVAL_ERROR ) {
5574      PTDEBUG && _d("Cannot make", $attrib, "handler:", $EVAL_ERROR);
5575      return;
5576   }
5577   return unless defined $val; # can't determine type if it's undef
5578
5579   my $float_re = qr{[+-]?(?:(?=\d|[.])\d+(?:[.])\d{0,})(?:E[+-]?\d+)?}i;
5580   my $type = $self->type_for($attrib)           ? $self->type_for($attrib)
5581            : $attrib =~ m/_crc$/                ? 'string'
5582            : $val    =~ m/^(?:\d+|$float_re)$/o ? 'num'
5583            : $val    =~ m/^(?:Yes|No)$/         ? 'bool'
5584            :                                      'string';
5585   PTDEBUG && _d('Type for', $attrib, 'is', $type, '(sample:', $val, ')');
5586   $self->{type_for}->{$attrib} = $type;
5587
5588   my @lines;
5589
5590   my %track = (
5591      sum => $type =~ m/num|bool/    ? 1 : 0,  # sum of values
5592      unq => $type =~ m/bool|string/ ? 1 : 0,  # count of unique values seen
5593      all => $type eq 'num'          ? 1 : 0,  # all values in bucketed list
5594   );
5595
5596   my $trf = ($type eq 'bool') ? q{(($val || '') eq 'Yes') ? 1 : 0}
5597           :                     undef;
5598   if ( $trf ) {
5599      push @lines, q{$val = } . $trf . ';';
5600   }
5601
5602   if ( $attrib eq 'Query_time' ) {
5603      push @lines, (
5604         '$val =~ s/^(\d+(?:\.\d+)?).*/$1/;',
5605         '$event->{\''.$attrib.'\'} = $val;',
5606      );
5607   }
5608
5609   if ( $type eq 'num' && $self->{attrib_limit} ) {
5610      push @lines, (
5611         "if ( \$val > $self->{attrib_limit} ) {",
5612         '   $val = $class->{last} ||= 0;',
5613         '}',
5614         '$class->{last} = $val;',
5615      );
5616   }
5617
5618   my $lt = $type eq 'num' ? '<' : 'lt';
5619   my $gt = $type eq 'num' ? '>' : 'gt';
5620   foreach my $place ( qw($class $global) ) {
5621      my @tmp;  # hold lines until PLACE placeholder is replaced
5622
5623      push @tmp, '++PLACE->{cnt};';  # count of all values seen
5624
5625      if ( $attrib =~ m/_crc$/ ) {
5626         push @tmp, '$val = $val % 1_000;';
5627      }
5628
5629      push @tmp, (
5630         'PLACE->{min} = $val if !defined PLACE->{min} || $val '
5631            . $lt . ' PLACE->{min};',
5632      );
5633      push @tmp, (
5634         'PLACE->{max} = $val if !defined PLACE->{max} || $val '
5635         . $gt . ' PLACE->{max};',
5636      );
5637      if ( $track{sum} ) {
5638         push @tmp, 'PLACE->{sum} += $val;';
5639      }
5640
5641      if ( $track{all} ) {
5642         push @tmp, (
5643            'exists PLACE->{all} or PLACE->{all} = {};',
5644            '++PLACE->{all}->{ EventAggregator::bucket_idx($val) };',
5645         );
5646      }
5647
5648      push @lines, map { s/PLACE/$place/g; $_ } @tmp;
5649   }
5650
5651   if ( $track{unq} ) {
5652      push @lines, '++$class->{unq}->{$val}';
5653   }
5654
5655   if ( $args{worst} ) {
5656      my $op = $type eq 'num' ? '>=' : 'ge';
5657      push @lines, (
5658         'if ( $val ' . $op . ' ($class->{max} || 0) ) {',
5659         '   $samples->{$group_by} = $event;',
5660         '}',
5661      );
5662   }
5663
5664   my @unrolled = (
5665      "\$val = \$event->{'$attrib'};",
5666
5667      ( map  { "\$val = \$event->{'$_'} unless defined \$val;" }
5668        grep { $_ ne $attrib } @{$args{alternates}}
5669      ),
5670
5671      'defined $val && do {',
5672         @lines,
5673      '};',
5674   );
5675   $self->{unrolled_for}->{$attrib} = join("\n", @unrolled);
5676
5677   my @code = (
5678      'sub {',
5679         'my ( $event, $class, $global, $samples, $group_by ) = @_;',
5680         'my ($val, $idx);',
5681
5682         $self->{unrolled_for}->{$attrib},
5683
5684         'return;',
5685      '}',
5686   );
5687   $self->{code_for}->{$attrib} = join("\n", @code);
5688   PTDEBUG && _d($attrib, 'handler code:', $self->{code_for}->{$attrib});
5689   my $sub = eval $self->{code_for}->{$attrib};
5690   if ( $EVAL_ERROR ) {
5691      die "Failed to compile $attrib handler code: $EVAL_ERROR";
5692   }
5693
5694   return $sub;
5695}
5696
5697sub bucket_idx {
5698   my ( $val ) = @_;
5699   return 0 if $val < MIN_BUCK;
5700   my $idx = int(BASE_OFFSET + log($val)/BASE_LOG);
5701   return $idx > (NUM_BUCK-1) ? (NUM_BUCK-1) : $idx;
5702}
5703
5704sub bucket_value {
5705   my ( $bucket ) = @_;
5706   return 0 if $bucket == 0;
5707   die "Invalid bucket: $bucket" if $bucket < 0 || $bucket > (NUM_BUCK-1);
5708   return (BUCK_SIZE**($bucket-1)) * MIN_BUCK;
5709}
5710
5711{
5712   my @buck_tens;
5713   sub buckets_of {
5714      return @buck_tens if @buck_tens;
5715
5716      my $start_bucket  = 0;
5717      my @base10_starts = (0);
5718      map { push @base10_starts, (10**$_)*MIN_BUCK } (1..7);
5719
5720      for my $base10_bucket ( 0..($#base10_starts-1) ) {
5721         my $next_bucket = bucket_idx( $base10_starts[$base10_bucket+1] );
5722         PTDEBUG && _d('Base 10 bucket', $base10_bucket, 'maps to',
5723            'base 1.05 buckets', $start_bucket, '..', $next_bucket-1);
5724         for my $base1_05_bucket ($start_bucket..($next_bucket-1)) {
5725            $buck_tens[$base1_05_bucket] = $base10_bucket;
5726         }
5727         $start_bucket = $next_bucket;
5728      }
5729
5730      map { $buck_tens[$_] = 7 } ($start_bucket..(NUM_BUCK-1));
5731
5732      return @buck_tens;
5733   }
5734}
5735
5736sub calculate_statistical_metrics {
5737   my ( $self, %args ) = @_;
5738   my $classes        = $self->{result_classes};
5739   my $globals        = $self->{result_globals};
5740   my $class_metrics  = $self->{class_metrics};
5741   my $global_metrics = $self->{global_metrics};
5742   PTDEBUG && _d('Calculating statistical_metrics');
5743   foreach my $attrib ( keys %$globals ) {
5744      if ( exists $globals->{$attrib}->{all} ) {
5745         $global_metrics->{$attrib}
5746            = $self->_calc_metrics(
5747               $globals->{$attrib}->{all},
5748               $globals->{$attrib},
5749            );
5750      }
5751
5752      foreach my $class ( keys %$classes ) {
5753         if ( exists $classes->{$class}->{$attrib}->{all} ) {
5754            $class_metrics->{$class}->{$attrib}
5755               = $self->_calc_metrics(
5756                  $classes->{$class}->{$attrib}->{all},
5757                  $classes->{$class}->{$attrib}
5758               );
5759         }
5760      }
5761   }
5762
5763   return;
5764}
5765
5766sub _calc_metrics {
5767   my ( $self, $vals, $args ) = @_;
5768   my $statistical_metrics = {
5769      pct_95    => 0,
5770      stddev    => 0,
5771      median    => 0,
5772      cutoff    => undef,
5773   };
5774
5775   return $statistical_metrics
5776      unless defined $vals && %$vals && $args->{cnt};
5777
5778   my $n_vals = $args->{cnt};
5779   if ( $n_vals == 1 || $args->{max} == $args->{min} ) {
5780      my $v      = $args->{max} || 0;
5781      my $bucket = int(6 + ( log($v > 0 ? $v : MIN_BUCK) / log(10)));
5782      $bucket    = $bucket > 7 ? 7 : $bucket < 0 ? 0 : $bucket;
5783      return {
5784         pct_95 => $v,
5785         stddev => 0,
5786         median => $v,
5787         cutoff => $n_vals,
5788      };
5789   }
5790   elsif ( $n_vals == 2 ) {
5791      foreach my $v ( $args->{min}, $args->{max} ) {
5792         my $bucket = int(6 + ( log($v && $v > 0 ? $v : MIN_BUCK) / log(10)));
5793         $bucket = $bucket > 7 ? 7 : $bucket < 0 ? 0 : $bucket;
5794      }
5795      my $v      = $args->{max} || 0;
5796      my $mean = (($args->{min} || 0) + $v) / 2;
5797      return {
5798         pct_95 => $v,
5799         stddev => sqrt((($v - $mean) ** 2) *2),
5800         median => $mean,
5801         cutoff => $n_vals,
5802      };
5803   }
5804
5805   my $cutoff = $n_vals >= 10 ? int ( $n_vals * 0.95 ) : $n_vals;
5806   $statistical_metrics->{cutoff} = $cutoff;
5807
5808   my $total_left = $n_vals;
5809   my $top_vals   = $n_vals - $cutoff; # vals > 95th
5810   my $sum_excl   = 0;
5811   my $sum        = 0;
5812   my $sumsq      = 0;
5813   my $mid        = int($n_vals / 2);
5814   my $median     = 0;
5815   my $prev       = NUM_BUCK-1; # Used for getting median when $cutoff is odd
5816   my $bucket_95  = 0; # top bucket in 95th
5817
5818   PTDEBUG && _d('total vals:', $total_left, 'top vals:', $top_vals, 'mid:', $mid);
5819
5820   my @buckets = map { 0 } (0..NUM_BUCK-1);
5821   map { $buckets[$_] = $vals->{$_} } keys %$vals;
5822   $vals = \@buckets;  # repoint vals from given hashref to our array
5823
5824   BUCKET:
5825   for my $bucket ( reverse 0..(NUM_BUCK-1) ) {
5826      my $val = $vals->[$bucket];
5827      next BUCKET unless $val;
5828
5829      $total_left -= $val;
5830      $sum_excl   += $val;
5831      $bucket_95   = $bucket if !$bucket_95 && $sum_excl > $top_vals;
5832
5833      if ( !$median && $total_left <= $mid ) {
5834         $median = (($cutoff % 2) || ($val > 1)) ? $buck_vals[$bucket]
5835                 : ($buck_vals[$bucket] + $buck_vals[$prev]) / 2;
5836      }
5837
5838      $sum    += $val * $buck_vals[$bucket];
5839      $sumsq  += $val * ($buck_vals[$bucket]**2);
5840      $prev   =  $bucket;
5841   }
5842
5843   my $var      = $sumsq/$n_vals - ( ($sum/$n_vals) ** 2 );
5844   my $stddev   = $var > 0 ? sqrt($var) : 0;
5845   my $maxstdev = (($args->{max} || 0) - ($args->{min} || 0)) / 2;
5846   $stddev      = $stddev > $maxstdev ? $maxstdev : $stddev;
5847
5848   PTDEBUG && _d('sum:', $sum, 'sumsq:', $sumsq, 'stddev:', $stddev,
5849      'median:', $median, 'prev bucket:', $prev,
5850      'total left:', $total_left, 'sum excl', $sum_excl,
5851      'bucket 95:', $bucket_95, $buck_vals[$bucket_95]);
5852
5853   $statistical_metrics->{stddev} = $stddev;
5854   $statistical_metrics->{pct_95} = $buck_vals[$bucket_95];
5855   $statistical_metrics->{median} = $median;
5856
5857   return $statistical_metrics;
5858}
5859
5860sub metrics {
5861   my ( $self, %args ) = @_;
5862   foreach my $arg ( qw(attrib where) ) {
5863      die "I need a $arg argument" unless defined $args{$arg};
5864   }
5865   my $attrib = $args{attrib};
5866   my $where   = $args{where};
5867
5868   my $stats      = $self->results();
5869   my $metrics    = $self->stats();
5870   my $store      = $stats->{classes}->{$where}->{$attrib};
5871   my $global_cnt = $stats->{globals}->{$attrib}->{cnt};
5872
5873   return {
5874      cnt    => $store->{cnt},
5875      pct    => $global_cnt && $store->{cnt} ? $store->{cnt} / $global_cnt : 0,
5876      sum    => $store->{sum},
5877      min    => $store->{min},
5878      max    => $store->{max},
5879      avg    => $store->{sum} && $store->{cnt} ? $store->{sum} / $store->{cnt} : 0,
5880      median => $metrics->{classes}->{$where}->{$attrib}->{median} || 0,
5881      pct_95 => $metrics->{classes}->{$where}->{$attrib}->{pct_95} || 0,
5882      stddev => $metrics->{classes}->{$where}->{$attrib}->{stddev} || 0,
5883   };
5884}
5885
5886sub top_events {
5887   my ( $self, %args ) = @_;
5888   my $classes = $self->{result_classes};
5889   my @sorted = reverse sort { # Sorted list of $groupby values
5890      $classes->{$a}->{$args{attrib}}->{$args{orderby}}
5891         <=> $classes->{$b}->{$args{attrib}}->{$args{orderby}}
5892         || tiebreaker($classes->{$a}, $classes->{$b});
5893      } grep {
5894         defined $classes->{$_}->{$args{attrib}}->{$args{orderby}}
5895      } keys %$classes;  # this should first be sorted for test consistency, but many tests already in place would fail
5896   my @chosen;  # top events
5897   my @other;   # other events (< top)
5898   my ($total, $count) = (0, 0);
5899   foreach my $groupby ( @sorted ) {
5900      if (
5901         (!$args{total} || $total < $args{total} )
5902         && ( !$args{count} || $count < $args{count} )
5903      ) {
5904         push @chosen, [$groupby, 'top', $count+1];
5905      }
5906
5907      elsif ( $args{ol_attrib} && (!$args{ol_freq}
5908         || $classes->{$groupby}->{$args{ol_attrib}}->{cnt} >= $args{ol_freq})
5909      ) {
5910         my $stats = $self->{class_metrics}->{$groupby}->{$args{ol_attrib}};
5911         if ( ($stats->{pct_95} || 0) >= $args{ol_limit} ) {
5912            push @chosen, [$groupby, 'outlier', $count+1];
5913         }
5914         else {
5915            push @other, [$groupby, 'misc', $count+1];
5916         }
5917      }
5918
5919      else {
5920         push @other, [$groupby, 'misc', $count+1];
5921      }
5922
5923      $total += $classes->{$groupby}->{$args{attrib}}->{$args{orderby}};
5924      $count++;
5925   }
5926   return \@chosen, \@other;
5927}
5928
5929sub tiebreaker {
5930    my ($a, $b) = @_;
5931   if (defined $a->{pos_in_log}) {
5932      return $a->{pos_in_log}->{max} cmp $b->{pos_in_log}->{max};
5933   }
5934   return 0;
5935
5936}
5937
5938sub add_new_attributes {
5939   my ( $self, $event ) = @_;
5940   return unless $event;
5941
5942   map {
5943      my $attrib = $_;
5944      $self->{attributes}->{$attrib}  = [$attrib];
5945      $self->{alt_attribs}->{$attrib} = make_alt_attrib($attrib);
5946      push @{$self->{all_attribs}}, $attrib;
5947      PTDEBUG && _d('Added new attribute:', $attrib);
5948   }
5949   grep {
5950      $_ ne $self->{groupby}
5951      && !exists $self->{attributes}->{$_}
5952      && !exists $self->{ignore_attribs}->{$_}
5953   }
5954   keys %$event;
5955
5956   return;
5957}
5958
5959sub get_attributes {
5960   my ( $self ) = @_;
5961   return $self->{all_attribs};
5962}
5963
5964sub events_processed {
5965   my ( $self ) = @_;
5966   return $self->{n_events};
5967}
5968
5969sub make_alt_attrib {
5970   my ( @attribs ) = @_;
5971
5972   my $attrib = shift @attribs;  # Primary attribute.
5973   return sub {} unless @attribs;  # No alternates.
5974
5975   my @lines;
5976   push @lines, 'sub { my ( $event ) = @_; my $alt_attrib;';
5977   push @lines, map  {
5978         "\$alt_attrib = '$_' if !defined \$alt_attrib "
5979         . "&& exists \$event->{'$_'};"
5980      } @attribs;
5981   push @lines, 'return $alt_attrib; }';
5982   PTDEBUG && _d('alt attrib sub for', $attrib, ':', @lines);
5983   my $sub = eval join("\n", @lines);
5984   die if $EVAL_ERROR;
5985   return $sub;
5986}
5987
5988sub merge {
5989   my ( @ea_objs ) = @_;
5990   PTDEBUG && _d('Merging', scalar @ea_objs, 'ea');
5991   return unless scalar @ea_objs;
5992
5993   my $ea1   = shift @ea_objs;
5994   my $r1    = $ea1->results;
5995   my $worst = $ea1->{worst};  # for merging, finding worst sample
5996
5997   my %attrib_types = %{ $ea1->attributes() };
5998
5999   foreach my $ea ( @ea_objs ) {
6000      die "EventAggregator objects have different groupby: "
6001         . "$ea1->{groupby} and $ea->{groupby}"
6002         unless $ea1->{groupby} eq $ea->{groupby};
6003      die "EventAggregator objects have different worst: "
6004         . "$ea1->{worst} and $ea->{worst}"
6005         unless $ea1->{worst} eq $ea->{worst};
6006
6007      my $attrib_types = $ea->attributes();
6008      map {
6009         $attrib_types{$_} = $attrib_types->{$_}
6010            unless exists $attrib_types{$_};
6011      } keys %$attrib_types;
6012   }
6013
6014   my $r_merged = {
6015      classes => {},
6016      globals => _deep_copy_attribs($r1->{globals}),
6017      samples => {},
6018   };
6019   map {
6020      $r_merged->{classes}->{$_}
6021         = _deep_copy_attribs($r1->{classes}->{$_});
6022
6023      @{$r_merged->{samples}->{$_}}{keys %{$r1->{samples}->{$_}}}
6024         = values %{$r1->{samples}->{$_}};
6025   } keys %{$r1->{classes}};
6026
6027   for my $i ( 0..$#ea_objs ) {
6028      PTDEBUG && _d('Merging ea obj', ($i + 1));
6029      my $r2 = $ea_objs[$i]->results;
6030
6031      eval {
6032         CLASS:
6033         foreach my $class ( keys %{$r2->{classes}} ) {
6034            my $r1_class = $r_merged->{classes}->{$class};
6035            my $r2_class = $r2->{classes}->{$class};
6036
6037            if ( $r1_class && $r2_class ) {
6038               CLASS_ATTRIB:
6039               foreach my $attrib ( keys %$r2_class ) {
6040                  PTDEBUG && _d('merge', $attrib);
6041                  if ( $r1_class->{$attrib} && $r2_class->{$attrib} ) {
6042                     _add_attrib_vals($r1_class->{$attrib}, $r2_class->{$attrib});
6043                  }
6044                  elsif ( !$r1_class->{$attrib} ) {
6045                  PTDEBUG && _d('copy', $attrib);
6046                     $r1_class->{$attrib} =
6047                        _deep_copy_attrib_vals($r2_class->{$attrib})
6048                  }
6049               }
6050            }
6051            elsif ( !$r1_class ) {
6052               PTDEBUG && _d('copy class');
6053               $r_merged->{classes}->{$class} = _deep_copy_attribs($r2_class);
6054            }
6055
6056            my $new_worst_sample;
6057            if ( $r_merged->{samples}->{$class} && $r2->{samples}->{$class} ) {
6058               if (   $r2->{samples}->{$class}->{$worst}
6059                    > $r_merged->{samples}->{$class}->{$worst} ) {
6060                  $new_worst_sample = $r2->{samples}->{$class}
6061               }
6062            }
6063            elsif ( !$r_merged->{samples}->{$class} ) {
6064               $new_worst_sample = $r2->{samples}->{$class};
6065            }
6066            if ( $new_worst_sample ) {
6067               PTDEBUG && _d('New worst sample:', $worst, '=',
6068                  $new_worst_sample->{$worst}, 'item:', substr($class, 0, 100));
6069               my %new_sample;
6070               @new_sample{keys %$new_worst_sample}
6071                  = values %$new_worst_sample;
6072               $r_merged->{samples}->{$class} = \%new_sample;
6073            }
6074         }
6075      };
6076      if ( $EVAL_ERROR ) {
6077         warn "Error merging class/sample: $EVAL_ERROR";
6078      }
6079
6080      eval {
6081         GLOBAL_ATTRIB:
6082         PTDEBUG && _d('Merging global attributes');
6083         foreach my $attrib ( keys %{$r2->{globals}} ) {
6084            my $r1_global = $r_merged->{globals}->{$attrib};
6085            my $r2_global = $r2->{globals}->{$attrib};
6086
6087            if ( $r1_global && $r2_global ) {
6088               PTDEBUG && _d('merge', $attrib);
6089               _add_attrib_vals($r1_global, $r2_global);
6090            }
6091            elsif ( !$r1_global ) {
6092               PTDEBUG && _d('copy', $attrib);
6093               $r_merged->{globals}->{$attrib}
6094                  = _deep_copy_attrib_vals($r2_global);
6095            }
6096         }
6097      };
6098      if ( $EVAL_ERROR ) {
6099         warn "Error merging globals: $EVAL_ERROR";
6100      }
6101   }
6102
6103   my $ea_merged = new EventAggregator(
6104      groupby    => $ea1->{groupby},
6105      worst      => $ea1->{worst},
6106      attributes => { map { $_=>[$_] } keys %attrib_types },
6107   );
6108   $ea_merged->set_results($r_merged);
6109   $ea_merged->set_attribute_types(\%attrib_types);
6110   return $ea_merged;
6111}
6112
6113sub _add_attrib_vals {
6114   my ( $vals1, $vals2 ) = @_;
6115
6116   foreach my $val ( keys %$vals1 ) {
6117      my $val1 = $vals1->{$val};
6118      my $val2 = $vals2->{$val};
6119
6120      if ( (!ref $val1) && (!ref $val2) ) {
6121         die "undefined $val value" unless defined $val1 && defined $val2;
6122
6123         my $is_num = exists $vals1->{sum} ? 1 : 0;
6124         if ( $val eq 'max' ) {
6125            if ( $is_num ) {
6126               $vals1->{$val} = $val1 > $val2  ? $val1 : $val2;
6127            }
6128            else {
6129               $vals1->{$val} = $val1 gt $val2 ? $val1 : $val2;
6130            }
6131         }
6132         elsif ( $val eq 'min' ) {
6133            if ( $is_num ) {
6134               $vals1->{$val} = $val1 < $val2  ? $val1 : $val2;
6135            }
6136            else {
6137               $vals1->{$val} = $val1 lt $val2 ? $val1 : $val2;
6138            }
6139         }
6140         else {
6141            $vals1->{$val} += $val2;
6142         }
6143      }
6144      elsif ( (ref $val1 eq 'ARRAY') && (ref $val2 eq 'ARRAY') ) {
6145         die "Empty $val arrayref" unless @$val1 && @$val2;
6146         my $n_buckets = (scalar @$val1) - 1;
6147         for my $i ( 0..$n_buckets ) {
6148            $vals1->{$val}->[$i] += $val2->[$i];
6149         }
6150      }
6151      elsif ( (ref $val1 eq 'HASH')  && (ref $val2 eq 'HASH')  ) {
6152         die "Empty $val hashref" unless %$val1 and %$val2;
6153         map { $vals1->{$val}->{$_} += $val2->{$_} } keys %$val2;
6154      }
6155      else {
6156         PTDEBUG && _d('vals1:', Dumper($vals1));
6157         PTDEBUG && _d('vals2:', Dumper($vals2));
6158         die "$val type mismatch";
6159      }
6160   }
6161
6162   return;
6163}
6164
6165sub _deep_copy_attribs {
6166   my ( $attribs ) = @_;
6167   my $copy = {};
6168   foreach my $attrib ( keys %$attribs ) {
6169      $copy->{$attrib} = _deep_copy_attrib_vals($attribs->{$attrib});
6170   }
6171   return $copy;
6172}
6173
6174sub _deep_copy_attrib_vals {
6175   my ( $vals ) = @_;
6176   my $copy;
6177   if ( ref $vals eq 'HASH' ) {
6178      $copy = {};
6179      foreach my $val ( keys %$vals ) {
6180         if ( my $ref_type = ref $val ) {
6181            if ( $ref_type eq 'ARRAY' ) {
6182               my $n_elems = (scalar @$val) - 1;
6183               $copy->{$val} = [ map { undef } ( 0..$n_elems ) ];
6184               for my $i ( 0..$n_elems ) {
6185                  $copy->{$val}->[$i] = $vals->{$val}->[$i];
6186               }
6187            }
6188            elsif ( $ref_type eq 'HASH' ) {
6189               $copy->{$val} = {};
6190               map { $copy->{$val}->{$_} += $vals->{$val}->{$_} }
6191                  keys %{$vals->{$val}}
6192            }
6193            else {
6194               die "I don't know how to deep copy a $ref_type reference";
6195            }
6196         }
6197         else {
6198            $copy->{$val} = $vals->{$val};
6199         }
6200      }
6201   }
6202   else {
6203      $copy = $vals;
6204   }
6205   return $copy;
6206}
6207
6208sub _get_value {
6209   my ( $self, %args ) = @_;
6210   my ($event, $attrib, $alts) = @args{qw(event attribute alternates)};
6211   return unless $event && $attrib;
6212
6213   my $value;
6214   if ( exists $event->{$attrib} ) {
6215      $value = $event->{$attrib};
6216   }
6217   elsif ( $alts ) {
6218      my $found_value = 0;
6219      foreach my $alt_attrib( @$alts ) {
6220         if ( exists $event->{$alt_attrib} ) {
6221            $value       = $event->{$alt_attrib};
6222            $found_value = 1;
6223            last;
6224         }
6225      }
6226      die "Event does not have attribute $attrib or any of its alternates"
6227         unless $found_value;
6228   }
6229   else {
6230      die "Event does not have attribute $attrib and there are no alterantes";
6231   }
6232
6233   return $value;
6234}
6235
6236sub _d {
6237   my ($package, undef, $line) = caller 0;
6238   @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
6239        map { defined $_ ? $_ : 'undef' }
6240        @_;
6241   print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
6242}
6243
62441;
6245}
6246# ###########################################################################
6247# End EventAggregator package
6248# ###########################################################################
6249
6250# ###########################################################################
6251# ReportFormatter package
6252# This package is a copy without comments from the original.  The original
6253# with comments and its test file can be found in the Bazaar repository at,
6254#   lib/ReportFormatter.pm
6255#   t/lib/ReportFormatter.t
6256# See https://launchpad.net/percona-toolkit for more information.
6257# ###########################################################################
6258{
6259package ReportFormatter;
6260
6261use Lmo;
6262use English qw(-no_match_vars);
6263use constant PTDEBUG => $ENV{PTDEBUG} || 0;
6264
6265use List::Util qw(min max);
6266use POSIX qw(ceil);
6267
6268eval { require Term::ReadKey };
6269my $have_term = $EVAL_ERROR ? 0 : 1;
6270
6271
6272has underline_header => (
6273   is      => 'ro',
6274   isa     => 'Bool',
6275   default => sub { 1 },
6276);
6277has line_prefix => (
6278   is      => 'ro',
6279   isa     => 'Str',
6280   default => sub { '# ' },
6281);
6282has line_width => (
6283   is      => 'ro',
6284   isa     => 'Int',
6285   default => sub { 78 },
6286);
6287has column_spacing => (
6288   is      => 'ro',
6289   isa     => 'Str',
6290   default => sub { ' ' },
6291);
6292has extend_right => (
6293   is      => 'ro',
6294   isa     => 'Bool',
6295   default => sub { '' },
6296);
6297has truncate_line_mark => (
6298   is      => 'ro',
6299   isa     => 'Str',
6300   default => sub { '...' },
6301);
6302has column_errors => (
6303   is      => 'ro',
6304   isa     => 'Str',
6305   default => sub { 'warn' },
6306);
6307has truncate_header_side => (
6308   is      => 'ro',
6309   isa     => 'Str',
6310   default => sub { 'left' },
6311);
6312has strip_whitespace => (
6313   is      => 'ro',
6314   isa     => 'Bool',
6315   default => sub { 1 },
6316);
6317has title => (
6318   is        => 'rw',
6319   isa       => 'Str',
6320   predicate => 'has_title',
6321);
6322
6323
6324has n_cols => (
6325   is      => 'rw',
6326   isa     => 'Int',
6327   default => sub { 0 },
6328   init_arg => undef,
6329);
6330
6331has cols => (
6332   is       => 'ro',
6333   isa      => 'ArrayRef',
6334   init_arg => undef,
6335   default  => sub { [] },
6336   clearer  => 'clear_cols',
6337);
6338
6339has lines => (
6340   is       => 'ro',
6341   isa      => 'ArrayRef',
6342   init_arg => undef,
6343   default  => sub { [] },
6344   clearer  => 'clear_lines',
6345);
6346
6347has truncate_headers => (
6348   is       => 'rw',
6349   isa      => 'Bool',
6350   default  => sub { undef },
6351   init_arg => undef,
6352   clearer  => 'clear_truncate_headers',
6353);
6354
6355sub BUILDARGS {
6356   my $class = shift;
6357   my $args  = $class->SUPER::BUILDARGS(@_);
6358
6359   if ( ($args->{line_width} || '') eq 'auto' ) {
6360      die "Cannot auto-detect line width because the Term::ReadKey module "
6361         . "is not installed" unless $have_term;
6362      ($args->{line_width}) = GetTerminalSize();
6363      PTDEBUG && _d('Line width:', $args->{line_width});
6364   }
6365
6366   return $args;
6367}
6368
6369sub set_columns {
6370   my ( $self, @cols ) = @_;
6371   my $min_hdr_wid = 0;  # check that header fits on line
6372   my $used_width  = 0;
6373   my @auto_width_cols;
6374
6375   for my $i ( 0..$#cols ) {
6376      my $col      = $cols[$i];
6377      my $col_name = $col->{name};
6378      my $col_len  = length $col_name;
6379      die "Column does not have a name" unless defined $col_name;
6380
6381      if ( $col->{width} ) {
6382         $col->{width_pct} = ceil(($col->{width} * 100) / $self->line_width());
6383         PTDEBUG && _d('col:', $col_name, 'width:', $col->{width}, 'chars =',
6384            $col->{width_pct}, '%');
6385      }
6386
6387      if ( $col->{width_pct} ) {
6388         $used_width += $col->{width_pct};
6389      }
6390      else {
6391         PTDEBUG && _d('Auto width col:', $col_name);
6392         $col->{auto_width} = 1;
6393         push @auto_width_cols, $i;
6394      }
6395
6396      $col->{truncate}        = 1 unless defined $col->{truncate};
6397      $col->{truncate_mark}   = '...' unless defined $col->{truncate_mark};
6398      $col->{truncate_side} ||= 'right';
6399      $col->{undef_value}     = '' unless defined $col->{undef_value};
6400
6401      $col->{min_val} = 0;
6402      $col->{max_val} = 0;
6403
6404      $min_hdr_wid        += $col_len;
6405      $col->{header_width} = $col_len;
6406
6407      $col->{right_most} = 1 if $i == $#cols;
6408
6409      push @{$self->cols}, $col;
6410   }
6411
6412   $self->n_cols( scalar @cols );
6413
6414   if ( ($used_width || 0) > 100 ) {
6415      die "Total width_pct for all columns is >100%";
6416   }
6417
6418   if ( @auto_width_cols ) {
6419      my $wid_per_col = int((100 - $used_width) / scalar @auto_width_cols);
6420      PTDEBUG && _d('Line width left:', (100-$used_width), '%;',
6421         'each auto width col:', $wid_per_col, '%');
6422      map { $self->cols->[$_]->{width_pct} = $wid_per_col } @auto_width_cols;
6423   }
6424
6425   $min_hdr_wid += ($self->n_cols() - 1) * length $self->column_spacing();
6426   PTDEBUG && _d('min header width:', $min_hdr_wid);
6427   if ( $min_hdr_wid > $self->line_width() ) {
6428      PTDEBUG && _d('Will truncate headers because min header width',
6429         $min_hdr_wid, '> line width', $self->line_width());
6430      $self->truncate_headers(1);
6431   }
6432
6433   return;
6434}
6435
6436sub add_line {
6437   my ( $self, @vals ) = @_;
6438   my $n_vals = scalar @vals;
6439   if ( $n_vals != $self->n_cols() ) {
6440      $self->_column_error("Number of values $n_vals does not match "
6441         . "number of columns " . $self->n_cols());
6442   }
6443   for my $i ( 0..($n_vals-1) ) {
6444      my $col   = $self->cols->[$i];
6445      my $val   = defined $vals[$i] ? $vals[$i] : $col->{undef_value};
6446      if ( $self->strip_whitespace() ) {
6447         $val =~ s/^\s+//g;
6448         $val =~ s/\s+$//;
6449         $vals[$i] = $val;
6450      }
6451      my $width = length $val;
6452      $col->{min_val} = min($width, ($col->{min_val} || $width));
6453      $col->{max_val} = max($width, ($col->{max_val} || $width));
6454   }
6455   push @{$self->lines}, \@vals;
6456   return;
6457}
6458
6459sub get_report {
6460   my ( $self, %args ) = @_;
6461
6462   $self->_calculate_column_widths();
6463   if ( $self->truncate_headers() ) {
6464      $self->_truncate_headers();
6465   }
6466   $self->_truncate_line_values(%args);
6467
6468   my @col_fmts = $self->_make_column_formats();
6469   my $fmt      = $self->line_prefix()
6470                . join($self->column_spacing(), @col_fmts);
6471   PTDEBUG && _d('Format:', $fmt);
6472
6473   (my $hdr_fmt = $fmt) =~ s/%([^-])/%-$1/g;
6474
6475   my @lines;
6476   push @lines, $self->line_prefix() . $self->title() if $self->has_title();
6477   push @lines, $self->_truncate_line(
6478         sprintf($hdr_fmt, map { $_->{name} } @{$self->cols}),
6479         strip => 1,
6480         mark  => '',
6481   );
6482
6483   if ( $self->underline_header() ) {
6484      my @underlines = map { '=' x $_->{print_width} } @{$self->cols};
6485      push @lines, $self->_truncate_line(
6486         sprintf($fmt, map { $_ || '' } @underlines),
6487         mark  => '',
6488      );
6489   }
6490
6491   push @lines, map {
6492      my $vals = $_;
6493      my $i    = 0;
6494      my @vals = map {
6495            my $val = defined $_ ? $_ : $self->cols->[$i++]->{undef_value};
6496            $val = '' if !defined $val;
6497            $val =~ s/\n/ /g;
6498            $val;
6499      } @$vals;
6500      my $line = sprintf($fmt, @vals);
6501      if ( $self->extend_right() ) {
6502         $line;
6503      }
6504      else {
6505         $self->_truncate_line($line);
6506      }
6507   } @{$self->lines};
6508
6509   $self->clear_cols();
6510   $self->clear_lines();
6511   $self->clear_truncate_headers();
6512
6513   return join("\n", @lines) . "\n";
6514}
6515
6516sub truncate_value {
6517   my ( $self, $col, $val, $width, $side ) = @_;
6518   return $val if length $val <= $width;
6519   return $val if $col->{right_most} && $self->extend_right();
6520   $side  ||= $col->{truncate_side};
6521   my $mark = $col->{truncate_mark};
6522   if ( $side eq 'right' ) {
6523      $val  = substr($val, 0, $width - length $mark);
6524      $val .= $mark;
6525   }
6526   elsif ( $side eq 'left') {
6527      $val = $mark . substr($val, -1 * $width + length $mark);
6528   }
6529   else {
6530      PTDEBUG && _d("I don't know how to", $side, "truncate values");
6531   }
6532   return $val;
6533}
6534
6535sub _calculate_column_widths {
6536   my ( $self ) = @_;
6537
6538   my $extra_space = 0;
6539   foreach my $col ( @{$self->cols} ) {
6540      my $print_width = int($self->line_width() * ($col->{width_pct} / 100));
6541
6542      PTDEBUG && _d('col:', $col->{name}, 'width pct:', $col->{width_pct},
6543         'char width:', $print_width,
6544         'min val:', $col->{min_val}, 'max val:', $col->{max_val});
6545
6546      if ( $col->{auto_width} ) {
6547         if ( $col->{min_val} && $print_width < $col->{min_val} ) {
6548            PTDEBUG && _d('Increased to min val width:', $col->{min_val});
6549            $print_width = $col->{min_val};
6550         }
6551         elsif ( $col->{max_val} &&  $print_width > $col->{max_val} ) {
6552            PTDEBUG && _d('Reduced to max val width:', $col->{max_val});
6553            $extra_space += $print_width - $col->{max_val};
6554            $print_width  = $col->{max_val};
6555         }
6556      }
6557
6558      $col->{print_width} = $print_width;
6559      PTDEBUG && _d('print width:', $col->{print_width});
6560   }
6561
6562   PTDEBUG && _d('Extra space:', $extra_space);
6563   while ( $extra_space-- ) {
6564      foreach my $col ( @{$self->cols} ) {
6565         if (    $col->{auto_width}
6566              && (    $col->{print_width} < $col->{max_val}
6567                   || $col->{print_width} < $col->{header_width})
6568         ) {
6569            $col->{print_width}++;
6570         }
6571      }
6572   }
6573
6574   return;
6575}
6576
6577sub _truncate_headers {
6578   my ( $self, $col ) = @_;
6579   my $side = $self->truncate_header_side();
6580   foreach my $col ( @{$self->cols} ) {
6581      my $col_name    = $col->{name};
6582      my $print_width = $col->{print_width};
6583      next if length $col_name <= $print_width;
6584      $col->{name}  = $self->truncate_value($col, $col_name, $print_width, $side);
6585      PTDEBUG && _d('Truncated hdr', $col_name, 'to', $col->{name},
6586         'max width:', $print_width);
6587   }
6588   return;
6589}
6590
6591sub _truncate_line_values {
6592   my ( $self, %args ) = @_;
6593   my $n_vals = $self->n_cols() - 1;
6594   foreach my $vals ( @{$self->lines} ) {
6595      for my $i ( 0..$n_vals ) {
6596         my $col   = $self->cols->[$i];
6597         my $val   = defined $vals->[$i] ? $vals->[$i] : $col->{undef_value};
6598         my $width = length $val;
6599
6600         if ( $col->{print_width} && $width > $col->{print_width} ) {
6601            if ( !$col->{truncate} ) {
6602               $self->_column_error("Value '$val' is too wide for column "
6603                  . $col->{name});
6604            }
6605
6606            my $callback    = $args{truncate_callback};
6607            my $print_width = $col->{print_width};
6608            $val = $callback ? $callback->($col, $val, $print_width)
6609                 :             $self->truncate_value($col, $val, $print_width);
6610            PTDEBUG && _d('Truncated val', $vals->[$i], 'to', $val,
6611               '; max width:', $print_width);
6612            $vals->[$i] = $val;
6613         }
6614      }
6615   }
6616   return;
6617}
6618
6619sub _make_column_formats {
6620   my ( $self ) = @_;
6621   my @col_fmts;
6622   my $n_cols = $self->n_cols() - 1;
6623   for my $i ( 0..$n_cols ) {
6624      my $col = $self->cols->[$i];
6625
6626      my $width = $col->{right_most} && !$col->{right_justify} ? ''
6627                : $col->{print_width};
6628
6629      my $col_fmt  = '%' . ($col->{right_justify} ? '' : '-') . $width . 's';
6630      push @col_fmts, $col_fmt;
6631   }
6632   return @col_fmts;
6633}
6634
6635sub _truncate_line {
6636   my ( $self, $line, %args ) = @_;
6637   my $mark = defined $args{mark} ? $args{mark} : $self->truncate_line_mark();
6638   if ( $line ) {
6639      $line =~ s/\s+$// if $args{strip};
6640      my $len  = length($line);
6641      if ( $len > $self->line_width() ) {
6642         $line  = substr($line, 0, $self->line_width() - length $mark);
6643         $line .= $mark if $mark;
6644      }
6645   }
6646   return $line;
6647}
6648
6649sub _column_error {
6650   my ( $self, $err ) = @_;
6651   my $msg = "Column error: $err";
6652   $self->column_errors() eq 'die' ? die $msg : warn $msg;
6653   return;
6654}
6655
6656sub _d {
6657   my ($package, undef, $line) = caller 0;
6658   @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
6659        map { defined $_ ? $_ : 'undef' }
6660        @_;
6661   print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
6662}
6663
6664no Lmo;
66651;
6666}
6667# ###########################################################################
6668# End ReportFormatter package
6669# ###########################################################################
6670
6671# ###########################################################################
6672# QueryReportFormatter package
6673# This package is a copy without comments from the original.  The original
6674# with comments and its test file can be found in the Bazaar repository at,
6675#   lib/QueryReportFormatter.pm
6676#   t/lib/QueryReportFormatter.t
6677# See https://launchpad.net/percona-toolkit for more information.
6678# ###########################################################################
6679{
6680package QueryReportFormatter;
6681
6682use Lmo;
6683use English qw(-no_match_vars);
6684use POSIX qw(floor);
6685
6686Transformers->import(qw(
6687   shorten micro_t parse_timestamp unix_timestamp make_checksum percentage_of
6688   crc32
6689));
6690
6691use constant PTDEBUG           => $ENV{PTDEBUG} || 0;
6692use constant LINE_LENGTH       => 74;
6693use constant MAX_STRING_LENGTH => 10;
6694
6695{ local $EVAL_ERROR; eval { require Quoter } };
6696{ local $EVAL_ERROR; eval { require ReportFormatter } };
6697
6698has Quoter => (
6699   is      => 'ro',
6700   isa     => 'Quoter',
6701   default => sub { Quoter->new() },
6702);
6703
6704has label_width => (
6705   is      => 'ro',
6706   isa     => 'Int',
6707);
6708
6709has global_headers => (
6710   is      => 'ro',
6711   isa     => 'ArrayRef',
6712   default => sub { [qw(    total min max avg 95% stddev median)] },
6713);
6714
6715has event_headers => (
6716   is      => 'ro',
6717   isa     => 'ArrayRef',
6718   default => sub { [qw(pct total min max avg 95% stddev median)] },
6719);
6720
6721has show_all => (
6722   is      => 'ro',
6723   isa     => 'HashRef',
6724   default => sub { {} },
6725);
6726
6727has ReportFormatter => (
6728   is      => 'ro',
6729   isa     => 'ReportFormatter',
6730   builder => '_build_report_formatter',
6731);
6732
6733sub _build_report_formatter {
6734   return ReportFormatter->new(
6735      line_width       => LINE_LENGTH,
6736      extend_right     => 1,
6737   );
6738}
6739
6740sub BUILDARGS {
6741   my $class = shift;
6742   my $args  = $class->SUPER::BUILDARGS(@_);
6743
6744   foreach my $arg ( qw(OptionParser QueryRewriter) ) {
6745      die "I need a $arg argument" unless $args->{$arg};
6746   }
6747
6748   my $label_width = $args->{label_width} ||= 12;
6749   PTDEBUG && _d('Label width:', $label_width);
6750
6751   my $o = delete $args->{OptionParser};
6752   my $self = {
6753      %$args,
6754      options        => {
6755         shorten          => 1024,
6756         report_all       => $o->get('report-all'),
6757         report_histogram => $o->get('report-histogram'),
6758         output           => $o->got('output') ? $o->get('output') : '',
6759      },
6760      num_format     => '# %1$-'.$label_width.'s %2$3s %3$7s %4$7s %5$7s %6$7s %7$7s %8$7s %9$7s',
6761      bool_format    => '# %1$-'.$label_width.'s %2$3d%% yes, %3$3d%% no',
6762      string_format  => '# %1$-'.$label_width.'s %2$s',
6763      no_partitions  => 0,
6764      hidden_attrib  => {   # Don't sort/print these attribs in the reports.
6765         arg         => 1, # They're usually handled specially, or not
6766         fingerprint => 1, # printed at all.
6767         pos_in_log  => 1,
6768         ts          => 1,
6769      },
6770   };
6771   if (!defined($self->{max_hostname_length})) {
6772       $self->{max_hostname_length} = MAX_STRING_LENGTH;
6773   }
6774   if (!defined($self->{max_line_length})) {
6775       $self->{max_line_length} = LINE_LENGTH;
6776   }
6777   return $self;
6778}
6779
6780sub print_reports {
6781   my ( $self, %args ) = @_;
6782   foreach my $arg ( qw(reports ea worst orderby groupby) ) {
6783      die "I need a $arg argument" unless exists $args{$arg};
6784   }
6785   my $reports = $args{reports};
6786   my $group   = $args{group};
6787   my $last_report;
6788
6789   foreach my $report ( @$reports ) {
6790      PTDEBUG && _d('Printing', $report, 'report');
6791      my $report_output = $self->$report(%args);
6792      if ( $report_output ) {
6793         print "\n"
6794            if !$last_report || !($group->{$last_report} && $group->{$report});
6795         print $report_output;
6796      }
6797      else {
6798         PTDEBUG && _d('No', $report, 'report');
6799      }
6800      $last_report = $report;
6801   }
6802
6803   return;
6804}
6805
6806sub rusage {
6807   my ( $self ) = @_;
6808   my ( $rss, $vsz, $user, $system ) = ( 0, 0, 0, 0 );
6809   my $rusage = '';
6810   eval {
6811      my $mem = `ps -o rss,vsz -p $PID 2>&1`;
6812      ( $rss, $vsz ) = $mem =~ m/(\d+)/g;
6813      ( $user, $system ) = times();
6814      $rusage = sprintf "# %s user time, %s system time, %s rss, %s vsz\n",
6815         micro_t( $user,   p_s => 1, p_ms => 1 ),
6816         micro_t( $system, p_s => 1, p_ms => 1 ),
6817         shorten( ($rss || 0) * 1_024 ),
6818         shorten( ($vsz || 0) * 1_024 );
6819   };
6820   if ( $EVAL_ERROR ) {
6821      PTDEBUG && _d($EVAL_ERROR);
6822   }
6823   return $rusage ? $rusage : "# Could not get rusage\n";
6824}
6825
6826sub date {
6827   my ( $self ) = @_;
6828   return "# Current date: " . (scalar localtime) . "\n";
6829}
6830
6831sub hostname {
6832   my ( $self ) = @_;
6833   my $hostname = `hostname`;
6834   if ( $hostname ) {
6835      chomp $hostname;
6836      return "# Hostname: $hostname\n";
6837   }
6838   return;
6839}
6840
6841sub files {
6842   my ( $self, %args ) = @_;
6843   if ( $args{files} ) {
6844      return "# Files: " . join(', ', map { $_->{name} } @{$args{files}}) . "\n";
6845   }
6846   return;
6847}
6848
6849sub header {
6850   my ( $self, %args ) = @_;
6851   foreach my $arg ( qw(ea orderby) ) {
6852      die "I need a $arg argument" unless defined $args{$arg};
6853   }
6854   my $ea      = $args{ea};
6855   my $orderby = $args{orderby};
6856   my $results = $ea->results();
6857   my @result;
6858
6859   my $global_cnt = $results->{globals}->{$orderby}->{cnt} || 0;
6860
6861   my ($qps, $conc) = (0, 0);
6862   if ( $global_cnt && $results->{globals}->{ts}
6863      && ($results->{globals}->{ts}->{max} || '')
6864         gt ($results->{globals}->{ts}->{min} || '')
6865   ) {
6866      eval {
6867         my $min  = parse_timestamp($results->{globals}->{ts}->{min});
6868         my $max  = parse_timestamp($results->{globals}->{ts}->{max});
6869         my $diff = unix_timestamp($max) - unix_timestamp($min);
6870         $qps     = $global_cnt / ($diff || 1);
6871         $conc    = $results->{globals}->{$args{orderby}}->{sum} / $diff;
6872      };
6873   }
6874
6875   PTDEBUG && _d('global_cnt:', $global_cnt, 'unique:',
6876      scalar keys %{$results->{classes}}, 'qps:', $qps, 'conc:', $conc);
6877   my $line = sprintf(
6878      '# Overall: %s total, %s unique, %s QPS, %sx concurrency ',
6879      shorten($global_cnt, d=>1_000),
6880      shorten(scalar keys %{$results->{classes}}, d=>1_000),
6881      shorten($qps  || 0, d=>1_000),
6882      shorten($conc || 0, d=>1_000));
6883   $line .= ('_' x (LINE_LENGTH - length($line) + $self->label_width() - 12));
6884   push @result, $line;
6885
6886   if ( my $ts = $results->{globals}->{ts} ) {
6887      my $time_range = $self->format_time_range($ts) || "unknown";
6888      push @result, "# Time range: $time_range";
6889   }
6890
6891   if ( $results->{globals}->{rate_limit} ) {
6892      print "# Rate limits apply\n";
6893   }
6894
6895   push @result, $self->make_global_header();
6896
6897   my $attribs = $self->sort_attribs( $ea );
6898
6899   foreach my $type ( qw(num innodb) ) {
6900      if ( $type eq 'innodb' && @{$attribs->{$type}} ) {
6901         push @result, "# InnoDB:";
6902      };
6903
6904      NUM_ATTRIB:
6905      foreach my $attrib ( @{$attribs->{$type}} ) {
6906         next unless exists $results->{globals}->{$attrib};
6907         my $store   = $results->{globals}->{$attrib};
6908         my $metrics = $ea->stats()->{globals}->{$attrib};
6909         my $func    = $attrib =~ m/time|wait$/ ? \&micro_t : \&shorten;
6910         my @values  = (
6911            @{$store}{qw(sum min max)},
6912            $store->{sum} / $store->{cnt},
6913            @{$metrics}{qw(pct_95 stddev median)},
6914         );
6915         @values = map { defined $_ ? $func->($_) : '' } @values;
6916
6917         push @result,
6918            sprintf $self->{num_format},
6919               $self->make_label($attrib), '', @values;
6920      }
6921   }
6922
6923   if ( @{$attribs->{bool}} ) {
6924      push @result, "# Boolean:";
6925      my $printed_bools = 0;
6926      BOOL_ATTRIB:
6927      foreach my $attrib ( @{$attribs->{bool}} ) {
6928         next unless exists $results->{globals}->{$attrib};
6929
6930         my $store = $results->{globals}->{$attrib};
6931         if ( $store->{sum} > 0 ) {
6932            push @result,
6933               sprintf $self->{bool_format},
6934                  $self->make_label($attrib), $self->bool_percents($store);
6935            $printed_bools = 1;
6936         }
6937      }
6938      pop @result unless $printed_bools;
6939   }
6940
6941   return join("\n", map { s/\s+$//; $_ } @result) . "\n";
6942}
6943
6944sub query_report_values {
6945   my ($self, %args) = @_;
6946   foreach my $arg ( qw(ea worst orderby groupby) ) {
6947      die "I need a $arg argument" unless defined $arg;
6948   }
6949   my $ea      = $args{ea};
6950   my $groupby = $args{groupby};
6951   my $worst   = $args{worst};
6952
6953   my $q   = $self->Quoter;
6954   my $qv  = $self->{QueryReview};
6955   my $qr  = $self->{QueryRewriter};
6956
6957   my @values;
6958   ITEM:
6959   foreach my $top_event ( @$worst ) {
6960      my $item       = $top_event->[0];
6961      my $reason     = $args{explain_why} ? $top_event->[1] : '';
6962      my $rank       = $top_event->[2];
6963      my $stats      = $ea->results->{classes}->{$item};
6964      my $sample     = $ea->results->{samples}->{$item};
6965      my $samp_query = ($self->{options}->{output} eq 'secure-slowlog') ? $sample->{fingerprint} || '' : $sample->{arg} || '';
6966
6967      my %item_vals = (
6968         item       => $item,
6969         samp_query => $samp_query,
6970         rank       => ($rank || 0),
6971         reason     => $reason,
6972      );
6973
6974      my $review_vals;
6975      if ( $qv ) {
6976         $review_vals = $qv->get_review_info($item);
6977         next ITEM if $review_vals->{reviewed_by} && !$self->{options}->{report_all};
6978         for my $col ( $qv->review_cols() ) {
6979            push @{$item_vals{review_vals}}, [$col, $review_vals->{$col}];
6980         }
6981      }
6982
6983      $item_vals{default_db} = $sample->{db}       ? $sample->{db}
6984                              : $stats->{db}->{unq} ? keys %{$stats->{db}->{unq}}
6985                              :                       undef;
6986      $item_vals{tables} = [$self->{QueryParser}->extract_tables(
6987            query      => $samp_query,
6988            default_db => $item_vals{default_db},
6989            Quoter     => $self->Quoter,
6990         )];
6991
6992      if ( $samp_query && ($args{variations} && @{$args{variations}}) ) {
6993         $item_vals{crc} = crc32($samp_query);
6994      }
6995
6996      push @values, \%item_vals;
6997   }
6998   return \@values;
6999}
7000
7001sub query_report {
7002   my ( $self, %args ) = @_;
7003
7004   my $ea      = $args{ea};
7005   my $groupby = $args{groupby};
7006   my $report_values = $self->query_report_values(%args);
7007
7008   my $qr  = $self->{QueryRewriter};
7009
7010   my $report = '';
7011
7012   if ( $args{print_header} ) {
7013      $report .= "# " . ( '#' x 72 ) . "\n"
7014               . "# Report grouped by $groupby\n"
7015               . '# ' . ( '#' x 72 ) . "\n\n";
7016   }
7017
7018   my $attribs = $self->sort_attribs( $ea );
7019
7020   ITEM:
7021   foreach my $vals ( @$report_values ) {
7022      my $item = $vals->{item};
7023      $report .= "\n" if $vals->{rank} > 1;  # space between each event report
7024      $report .= $self->event_report(
7025         %args,
7026         item    => $item,
7027         sample  => $ea->results->{samples}->{$item},
7028         rank    => $vals->{rank},
7029         reason  => $vals->{reason},
7030         attribs => $attribs,
7031         db      => $vals->{default_db},
7032      );
7033
7034      if ( $self->{options}->{report_histogram} ) {
7035         $report .= $self->chart_distro(
7036            %args,
7037            attrib => $self->{options}->{report_histogram},
7038            item   => $vals->{item},
7039         );
7040      }
7041
7042      if ( $vals->{review_vals} ) {
7043         $report .= "# Review information\n";
7044         foreach my $elem ( @{$vals->{review_vals}} ) {
7045            my ($col, $val) = @$elem;
7046            if ( !$val || $val ne '0000-00-00 00:00:00' ) { # issue 202
7047               $report .= sprintf "# %13s: %-s\n", $col, ($val ? $val : '');
7048            }
7049         }
7050      }
7051
7052      my $partitions_msg = $self->{no_partitions} ? '' : '/*!50100 PARTITIONS*/';
7053      if ( $groupby eq 'fingerprint' ) {
7054         my $samp_query = $qr->shorten($vals->{samp_query}, $self->{options}->{shorten})
7055            if $self->{options}->{shorten};
7056
7057         PTDEBUG && _d("Fingerprint\n#    $vals->{item}\n");
7058
7059         $report .= $self->tables_report($vals->{tables}, \%args);
7060
7061         if ( $vals->{crc} ) {
7062            $report.= "# CRC " . ($vals->{crc} % 1_000) . "\n";
7063         }
7064
7065         my $log_type = $args{log_type} || '';
7066         my $mark     = $args{no_v_format} ? '' : '\G';
7067
7068         if ( $item =~ m/^(?:[\(\s]*select|insert|replace)/ ) {
7069            if ( $item =~ m/^(?:insert|replace)/ ) { # No EXPLAIN
7070               $report .= "$samp_query${mark}\n";
7071            }
7072            else {
7073               $report .= "# EXPLAIN $partitions_msg\n$samp_query${mark}\n";
7074               $report .= $self->explain_report($samp_query, $vals->{default_db});
7075            }
7076         }
7077         else {
7078            $report .= "$samp_query${mark}\n";
7079            my $converted = $qr->convert_to_select($samp_query);
7080            if ( $converted
7081                 && $converted =~ m/^[\(\s]*select/i ) {
7082               $report .= "# Converted for EXPLAIN\n# EXPLAIN $partitions_msg\n$converted${mark}\n";
7083            }
7084         }
7085      }
7086      else {
7087         if ( $groupby eq 'tables' ) {
7088            my ( $db, $tbl ) = $self->Quoter->split_unquote($item);
7089            $report .= $self->tables_report([ [$db, $tbl] ], \%args);
7090         }
7091         $report .= "$item\n";
7092      }
7093   }
7094
7095   return $report;
7096}
7097
7098sub event_report_values {
7099   my ($self, %args) = @_;
7100
7101   my $ea   = $args{ea};
7102   my $item = $args{item};
7103   my $orderby = $args{orderby};
7104   my $results = $ea->results();
7105
7106   my %vals;
7107
7108   my $store = $results->{classes}->{$item};
7109
7110   return unless $store;
7111
7112   my $global_cnt = $results->{globals}->{$orderby}->{cnt};
7113   my $class_cnt  = $store->{$orderby}->{cnt};
7114
7115   my ($qps, $conc) = (0, 0);
7116   if ( $global_cnt && $store->{ts}
7117      && ($store->{ts}->{max} || '')
7118         gt ($store->{ts}->{min} || '')
7119   ) {
7120      eval {
7121         my $min  = parse_timestamp($store->{ts}->{min});
7122         my $max  = parse_timestamp($store->{ts}->{max});
7123         my $diff = unix_timestamp($max) - unix_timestamp($min);
7124         $qps     = $class_cnt / $diff;
7125         $conc    = $store->{$orderby}->{sum} / $diff;
7126      };
7127   }
7128
7129   $vals{groupby}     = $ea->{groupby};
7130   $vals{qps}         = $qps  || 0;
7131   $vals{concurrency} = $conc || 0;
7132   $vals{checksum}    = make_checksum($item);
7133   $vals{pos_in_log}  = $results->{samples}->{$item}->{pos_in_log} || 0;
7134   $vals{reason}      = $args{reason};
7135   $vals{variance_to_mean} = do {
7136      my $query_time = $ea->metrics(where => $item, attrib => 'Query_time');
7137      $query_time->{stddev}**2 / ($query_time->{avg} || 1)
7138   };
7139
7140   $vals{counts} = {
7141      class_cnt        => $class_cnt,
7142      global_cnt       => $global_cnt,
7143   };
7144
7145   if ( my $ts = $store->{ts}) {
7146      $vals{time_range} = $self->format_time_range($ts) || "unknown";
7147   }
7148
7149   my $attribs = $args{attribs};
7150   if ( !$attribs ) {
7151      $attribs = $self->sort_attribs( $ea );
7152   }
7153
7154   $vals{attributes} = { map { $_ => [] } qw(num innodb bool string) };
7155
7156   foreach my $type ( qw(num innodb) ) {
7157
7158      NUM_ATTRIB:
7159      foreach my $attrib ( @{$attribs->{$type}} ) {
7160         next NUM_ATTRIB unless exists $store->{$attrib};
7161         my $vals = $store->{$attrib};
7162         next unless scalar %$vals;
7163
7164         my $pct;
7165         my $func    = $attrib =~ m/time|wait$/ ? \&micro_t : \&shorten;
7166         my $metrics = $ea->stats()->{classes}->{$item}->{$attrib};
7167         my @values = (
7168            @{$vals}{qw(sum min max)},
7169            $vals->{sum} / $vals->{cnt},
7170            @{$metrics}{qw(pct_95 stddev median)},
7171         );
7172         @values = map { defined $_ ? $func->($_) : '' } @values;
7173         $pct   = percentage_of(
7174            $vals->{sum}, $results->{globals}->{$attrib}->{sum});
7175
7176         push @{$vals{attributes}{$type}},
7177               [ $attrib, $pct, @values ];
7178      }
7179   }
7180
7181   if ( @{$attribs->{bool}} ) {
7182      BOOL_ATTRIB:
7183      foreach my $attrib ( @{$attribs->{bool}} ) {
7184         next BOOL_ATTRIB unless exists $store->{$attrib};
7185         my $vals = $store->{$attrib};
7186         next unless scalar %$vals;
7187
7188         if ( $vals->{sum} > 0 ) {
7189            push @{$vals{attributes}{bool}},
7190                  [ $attrib, $self->bool_percents($vals) ];
7191         }
7192      }
7193   }
7194
7195   if ( @{$attribs->{string}} ) {
7196      STRING_ATTRIB:
7197      foreach my $attrib ( @{$attribs->{string}} ) {
7198         next STRING_ATTRIB unless exists $store->{$attrib};
7199         my $vals = $store->{$attrib};
7200         next unless scalar %$vals;
7201
7202         push @{$vals{attributes}{string}},
7203               [ $attrib, $vals ];
7204      }
7205   }
7206
7207
7208   return \%vals;
7209}
7210
7211
7212sub event_report {
7213   my ( $self, %args ) = @_;
7214   foreach my $arg ( qw(ea item orderby) ) {
7215      die "I need a $arg argument" unless defined $args{$arg};
7216   }
7217
7218   my $item = $args{item};
7219   my $val  = $self->event_report_values(%args);
7220   my @result;
7221
7222   return "# No such event $item\n" unless $val;
7223
7224   my $line = sprintf(
7225      '# %s %d: %s QPS, %sx concurrency, ID 0x%s at byte %.f ',
7226      ($val->{groupby} eq 'fingerprint' ? 'Query' : 'Item'),
7227      $args{rank} || 0,
7228      shorten($val->{qps}, d=>1_000),
7229      shorten($val->{concurrency}, d=>1_000),
7230      $val->{checksum},
7231      $val->{pos_in_log},
7232   );
7233   my $underscores = LINE_LENGTH - length($line) + $self->label_width() - 12;
7234   if ( $underscores < 0 ) {
7235      $underscores = 0;
7236   }
7237   $line .= ('_' x $underscores);
7238   push @result, $line;
7239
7240   if ( $val->{reason} ) {
7241      push @result,
7242         "# This item is included in the report because it matches "
7243            . ($val->{reason} eq 'top' ? '--limit.' : '--outliers.');
7244   }
7245
7246   push @result,
7247      sprintf("# Scores: V/M = %.2f", $val->{variance_to_mean} );
7248
7249   if ( $val->{time_range} ) {
7250      push @result, "# Time range: $val->{time_range}";
7251   }
7252
7253   push @result, $self->make_event_header();
7254
7255   push @result,
7256      sprintf $self->{num_format}, 'Count',
7257         percentage_of($val->{counts}{class_cnt}, $val->{counts}{global_cnt}),
7258         $val->{counts}{class_cnt},
7259         map { '' } (1..8);
7260
7261
7262   my $attribs = $val->{attributes};
7263
7264   foreach my $type ( qw(num innodb) ) {
7265      if ( $type eq 'innodb' && @{$attribs->{$type}} ) {
7266         push @result, "# InnoDB:";
7267      };
7268
7269      NUM_ATTRIB:
7270      foreach my $attrib ( @{$attribs->{$type}} ) {
7271         my ($attrib_name, @vals) = @$attrib;
7272         push @result,
7273            sprintf $self->{num_format},
7274               $self->make_label($attrib_name), @vals;
7275      }
7276   }
7277
7278   if ( @{$attribs->{bool}} ) {
7279      push @result, "# Boolean:";
7280      BOOL_ATTRIB:
7281      foreach my $attrib ( @{$attribs->{bool}} ) {
7282         my ($attrib_name, @vals) = @$attrib;
7283         push @result,
7284            sprintf $self->{bool_format},
7285               $self->make_label($attrib_name), @vals;
7286      }
7287   }
7288
7289   if ( @{$attribs->{string}} ) {
7290      push @result, "# String:";
7291      STRING_ATTRIB:
7292      foreach my $attrib ( @{$attribs->{string}} ) {
7293         my ($attrib_name, $vals) = @$attrib;
7294         push @result,
7295            sprintf $self->{string_format},
7296               $self->make_label($attrib_name),
7297               $self->format_string_list($attrib_name, $vals, $val->{counts}{class_cnt});
7298      }
7299   }
7300
7301
7302   return join("\n", map { s/\s+$//; $_ } @result) . "\n";
7303}
7304
7305sub chart_distro {
7306   my ( $self, %args ) = @_;
7307   foreach my $arg ( qw(ea item attrib) ) {
7308      die "I need a $arg argument" unless defined $args{$arg};
7309   }
7310   my $ea     = $args{ea};
7311   my $item   = $args{item};
7312   my $attrib = $args{attrib};
7313
7314   my $results = $ea->results();
7315   my $store   = $results->{classes}->{$item}->{$attrib};
7316   my $vals    = $store->{all};
7317   return "" unless defined $vals && scalar %$vals;
7318
7319   my @buck_tens = $ea->buckets_of(10);
7320   my @distro = map { 0 } (0 .. 7);
7321
7322   my @buckets = map { 0 } (0..999);
7323   map { $buckets[$_] = $vals->{$_} } keys %$vals;
7324   $vals = \@buckets;  # repoint vals from given hashref to our array
7325
7326   map { $distro[$buck_tens[$_]] += $vals->[$_] } (1 .. @$vals - 1);
7327
7328   my $vals_per_mark; # number of vals represented by 1 #-mark
7329   my $max_val        = 0;
7330   my $max_disp_width = 64;
7331   my $bar_fmt        = "# %5s%s";
7332   my @distro_labels  = qw(1us 10us 100us 1ms 10ms 100ms 1s 10s+);
7333   my @results        = "# $attrib distribution";
7334
7335   foreach my $n_vals ( @distro ) {
7336      $max_val = $n_vals if $n_vals > $max_val;
7337   }
7338   $vals_per_mark = $max_val / $max_disp_width;
7339
7340   foreach my $i ( 0 .. $#distro ) {
7341      my $n_vals  = $distro[$i];
7342      my $n_marks = $n_vals / ($vals_per_mark || 1);
7343
7344      $n_marks = 1 if $n_marks < 1 && $n_vals > 0;
7345
7346      my $bar = ($n_marks ? '  ' : '') . '#' x $n_marks;
7347      push @results, sprintf $bar_fmt, $distro_labels[$i], $bar;
7348   }
7349
7350   return join("\n", @results) . "\n";
7351}
7352
7353sub profile {
7354   my ( $self, %args ) = @_;
7355   foreach my $arg ( qw(ea worst groupby) ) {
7356      die "I need a $arg argument" unless defined $arg;
7357   }
7358   my $ea      = $args{ea};
7359   my $worst   = $args{worst};
7360   my $other   = $args{other};
7361   my $groupby = $args{groupby};
7362
7363   my $qr  = $self->{QueryRewriter};
7364
7365   my $results = $ea->results();
7366   my $total_r = $results->{globals}->{Query_time}->{sum} || 0;
7367
7368   my @profiles;
7369   foreach my $top_event ( @$worst ) {
7370      my $item       = $top_event->[0];
7371      my $rank       = $top_event->[2];
7372      my $stats      = $ea->results->{classes}->{$item};
7373      my $sample     = $ea->results->{samples}->{$item};
7374      my $samp_query = $sample->{arg} || '';
7375      my $query_time = $ea->metrics(where => $item, attrib => 'Query_time');
7376
7377      my %profile    = (
7378         rank   => $rank,
7379         r      => $stats->{Query_time}->{sum},
7380         cnt    => $stats->{Query_time}->{cnt},
7381         sample => $groupby eq 'fingerprint' ?
7382                    $qr->distill($samp_query, %{$args{distill_args}}) : $item,
7383         id     => $groupby eq 'fingerprint' ? make_checksum($item)   : '',
7384         vmr    => ($query_time->{stddev}**2) / ($query_time->{avg} || 1),
7385      );
7386
7387      push @profiles, \%profile;
7388   }
7389
7390   my $report = $self->ReportFormatter();
7391   $report->title('Profile');
7392   my @cols = (
7393      { name => 'Rank',          right_justify => 1,             },
7394      { name => 'Query ID', width => 35                          },
7395      { name => 'Response time', right_justify => 1,             },
7396      { name => 'Calls',         right_justify => 1,             },
7397      { name => 'R/Call',        right_justify => 1,             },
7398      { name => 'V/M',           right_justify => 1, width => 5, },
7399      { name => 'Item',                                          },
7400   );
7401   $report->set_columns(@cols);
7402
7403   foreach my $item ( sort { $a->{rank} <=> $b->{rank} } @profiles ) {
7404      my $rt  = sprintf('%10.4f', $item->{r});
7405      my $rtp = sprintf('%4.1f%%', $item->{r} / ($total_r || 1) * 100);
7406      my $rc  = sprintf('%8.4f', $item->{r} / $item->{cnt});
7407      my $vmr = sprintf('%4.2f', $item->{vmr});
7408      my @vals = (
7409         $item->{rank},
7410         "0x$item->{id}",
7411         "$rt $rtp",
7412         $item->{cnt},
7413         $rc,
7414         $vmr,
7415         $item->{sample},
7416      );
7417      $report->add_line(@vals);
7418   }
7419
7420   if ( $other && @$other ) {
7421      my $misc = {
7422            r   => 0,
7423            cnt => 0,
7424      };
7425      foreach my $other_event ( @$other ) {
7426         my $item      = $other_event->[0];
7427         my $stats     = $ea->results->{classes}->{$item};
7428         $misc->{r}   += $stats->{Query_time}->{sum};
7429         $misc->{cnt} += $stats->{Query_time}->{cnt};
7430      }
7431      my $rt  = sprintf('%10.4f', $misc->{r});
7432      my $rtp = sprintf('%4.1f%%', $misc->{r} / ($total_r || 1) * 100);
7433      my $rc  = sprintf('%8.4f', $misc->{r} / $misc->{cnt});
7434      $report->add_line(
7435         "MISC",
7436         "0xMISC",
7437         "$rt $rtp",
7438         $misc->{cnt},
7439         $rc,
7440         '0.0',  # variance-to-mean ratio is not meaningful here
7441         "<".scalar @$other." ITEMS>",
7442      );
7443   }
7444
7445   return $report->get_report();
7446}
7447
7448sub prepared {
7449   my ( $self, %args ) = @_;
7450   foreach my $arg ( qw(ea worst groupby) ) {
7451      die "I need a $arg argument" unless defined $arg;
7452   }
7453   my $ea      = $args{ea};
7454   my $worst   = $args{worst};
7455   my $groupby = $args{groupby};
7456
7457   my $qr = $self->{QueryRewriter};
7458
7459   my @prepared;       # prepared statements
7460   my %seen_prepared;  # report each PREP-EXEC pair once
7461   my $total_r = 0;
7462
7463   foreach my $top_event ( @$worst ) {
7464      my $item       = $top_event->[0];
7465      my $rank       = $top_event->[2];
7466      my $stats      = $ea->results->{classes}->{$item};
7467      my $sample     = $ea->results->{samples}->{$item};
7468      my $samp_query = $sample->{arg} || '';
7469
7470      $total_r += $stats->{Query_time}->{sum};
7471      next unless $stats->{Statement_id} && $item =~ m/^(?:prepare|execute) /;
7472
7473      my ($prep_stmt, $prep, $prep_r, $prep_cnt);
7474      my ($exec_stmt, $exec, $exec_r, $exec_cnt);
7475
7476      if ( $item =~ m/^prepare / ) {
7477         $prep_stmt           = $item;
7478         ($exec_stmt = $item) =~ s/^prepare /execute /;
7479      }
7480      else {
7481         ($prep_stmt = $item) =~ s/^execute /prepare /;
7482         $exec_stmt           = $item;
7483      }
7484
7485      if ( !$seen_prepared{$prep_stmt}++ ) {
7486         if ( exists $ea->results->{classes}->{$exec_stmt} ) {
7487            $exec     = $ea->results->{classes}->{$exec_stmt};
7488            $exec_r   = $exec->{Query_time}->{sum};
7489            $exec_cnt = $exec->{Query_time}->{cnt};
7490         }
7491         else {
7492            PTDEBUG && _d('Statement prepared but not executed:', $item);
7493            $exec_r   = 0;
7494            $exec_cnt = 0;
7495         }
7496
7497         if ( exists $ea->results->{classes}->{$prep_stmt} ) {
7498            $prep     = $ea->results->{classes}->{$prep_stmt};
7499            $prep_r   = $prep->{Query_time}->{sum};
7500            $prep_cnt = scalar keys %{$prep->{Statement_id}->{unq}},
7501         }
7502         else {
7503            PTDEBUG && _d('Statement executed but not prepared:', $item);
7504            $prep_r   = 0;
7505            $prep_cnt = 0;
7506         }
7507
7508         push @prepared, {
7509            prep_r   => $prep_r,
7510            prep_cnt => $prep_cnt,
7511            exec_r   => $exec_r,
7512            exec_cnt => $exec_cnt,
7513            rank     => $rank,
7514            sample   => $groupby eq 'fingerprint'
7515                          ? $qr->distill($samp_query, %{$args{distill_args}})
7516                          : $item,
7517            id       => $groupby eq 'fingerprint' ? make_checksum($item)
7518                                                  : '',
7519         };
7520      }
7521   }
7522
7523   return unless scalar @prepared;
7524
7525   my $report = $self->ReportFormatter();
7526   $report->title('Prepared statements');
7527   $report->set_columns(
7528      { name => 'Rank',          right_justify => 1, },
7529      { name => 'Query ID',                          },
7530      { name => 'PREP',          right_justify => 1, },
7531      { name => 'PREP Response', right_justify => 1, },
7532      { name => 'EXEC',          right_justify => 1, },
7533      { name => 'EXEC Response', right_justify => 1, },
7534      { name => 'Item',                              },
7535   );
7536
7537   foreach my $item ( sort { $a->{rank} <=> $b->{rank} } @prepared ) {
7538      my $exec_rt  = sprintf('%10.4f', $item->{exec_r});
7539      my $exec_rtp = sprintf('%4.1f%%',$item->{exec_r}/($total_r || 1) * 100);
7540      my $prep_rt  = sprintf('%10.4f', $item->{prep_r});
7541      my $prep_rtp = sprintf('%4.1f%%',$item->{prep_r}/($total_r || 1) * 100);
7542      $report->add_line(
7543         $item->{rank},
7544         "0x$item->{id}",
7545         $item->{prep_cnt} || 0,
7546         "$prep_rt $prep_rtp",
7547         $item->{exec_cnt} || 0,
7548         "$exec_rt $exec_rtp",
7549         $item->{sample},
7550      );
7551   }
7552   return $report->get_report();
7553}
7554
7555sub make_global_header {
7556   my ( $self ) = @_;
7557   my @lines;
7558
7559   push @lines,
7560      sprintf $self->{num_format}, "Attribute", '', @{$self->global_headers()};
7561
7562   push @lines,
7563      sprintf $self->{num_format},
7564         (map { "=" x $_ } $self->label_width()),
7565         (map { " " x $_ } qw(3)),  # no pct column in global header
7566         (map { "=" x $_ } qw(7 7 7 7 7 7 7));
7567
7568   return @lines;
7569}
7570
7571sub make_event_header {
7572   my ( $self ) = @_;
7573
7574   return @{$self->{event_header_lines}} if $self->{event_header_lines};
7575
7576   my @lines;
7577   push @lines,
7578      sprintf $self->{num_format}, "Attribute", @{$self->event_headers()};
7579
7580   push @lines,
7581      sprintf $self->{num_format},
7582         map { "=" x $_ } ($self->label_width(), qw(3 7 7 7 7 7 7 7));
7583
7584   $self->{event_header_lines} = \@lines;
7585   return @lines;
7586}
7587
7588sub make_label {
7589   my ( $self, $val ) = @_;
7590   return '' unless $val;
7591
7592   $val =~ s/_/ /g;
7593
7594   if ( $val =~ m/^InnoDB/ ) {
7595      $val =~ s/^InnoDB //;
7596      $val = $val eq 'trx id' ? "InnoDB trxID"
7597           : substr($val, 0, $self->label_width());
7598   }
7599
7600   $val = $val eq 'user'            ? 'Users'
7601        : $val eq 'db'              ? 'Databases'
7602        : $val eq 'Query time'      ? 'Exec time'
7603        : $val eq 'host'            ? 'Hosts'
7604        : $val eq 'Error no'        ? 'Errors'
7605        : $val eq 'bytes'           ? 'Query size'
7606        : $val eq 'Tmp disk tables' ? 'Tmp disk tbl'
7607        : $val eq 'Tmp table sizes' ? 'Tmp tbl size'
7608        : substr($val, 0, $self->label_width);
7609
7610   return $val;
7611}
7612
7613sub bool_percents {
7614   my ( $self, $vals ) = @_;
7615   my $p_true  = percentage_of($vals->{sum},  $vals->{cnt});
7616   my $p_false = percentage_of(($vals->{cnt} - $vals->{sum}), $vals->{cnt});
7617   return $p_true, $p_false;
7618}
7619
7620sub format_string_list {
7621   my ( $self, $attrib, $vals, $class_cnt ) = @_;
7622
7623   if ( !exists $vals->{unq} ) {
7624      return ($vals->{cnt});
7625   }
7626
7627   my $show_all = $self->show_all();
7628
7629   my $cnt_for = $vals->{unq};
7630   if ( 1 == keys %$cnt_for ) {
7631      my ($str) = keys %$cnt_for;
7632      $str = substr($str, 0, LINE_LENGTH - 30) . '...'
7633         if length $str > LINE_LENGTH - 30;
7634      return $str;
7635   }
7636   my $line = '';
7637   my @top = sort { $cnt_for->{$b} <=> $cnt_for->{$a} || $a cmp $b }
7638                  keys %$cnt_for;
7639   my $i = 0;
7640   foreach my $str ( @top ) {
7641      my $print_str;
7642      if ( $str =~ m/(?:\d+\.){3}\d+/ ) {
7643         $print_str = $str;  # Do not shorten IP addresses.
7644      }
7645      elsif ( $self->{max_hostname_length} > 0 and length $str > $self->{max_hostname_length} ) {
7646         $print_str = substr($str, 0, $self->{max_hostname_length}) . '...';
7647      } else {
7648         $print_str = $str;
7649      }
7650      my $p = percentage_of($cnt_for->{$str}, $class_cnt);
7651      $print_str .= " ($cnt_for->{$str}/$p%)";
7652      my $trim_length = LINE_LENGTH;
7653      if ($self->{max_hostname_length} == 0 or $self->{max_hostname_length} > LINE_LENGTH) {
7654          $trim_length = $self->{max_hostname_length};
7655      }
7656      if ( $self->{max_line_length} > 0 and !$show_all->{$attrib} ) {
7657         last if (length $line) + (length $print_str)  > $self->{max_line_length} - 27;
7658      }
7659      $line .= "$print_str, ";
7660      $i++;
7661   }
7662
7663   $line =~ s/, $//;
7664
7665   if ( $i < @top ) {
7666      $line .= "... " . (@top - $i) . " more";
7667   }
7668
7669   return $line;
7670}
7671
7672sub sort_attribs {
7673   my ( $self, $ea ) = @_;
7674   my $attribs = $ea->get_attributes();
7675   return unless $attribs && @$attribs;
7676   PTDEBUG && _d("Sorting attribs:", @$attribs);
7677
7678   my @num_order = qw(
7679      Query_time
7680      Exec_orig_time
7681      Transmit_time
7682      Lock_time
7683      Rows_sent
7684      Rows_examined
7685      Rows_affected
7686      Rows_read
7687      Bytes_sent
7688      Merge_passes
7689      Tmp_tables
7690      Tmp_disk_tables
7691      Tmp_table_sizes
7692      bytes
7693   );
7694   my $i         = 0;
7695   my %num_order = map { $_ => $i++ } @num_order;
7696
7697   my (@num, @innodb, @bool, @string);
7698   ATTRIB:
7699   foreach my $attrib ( @$attribs ) {
7700      next if $self->{hidden_attrib}->{$attrib};
7701
7702      my $type = $ea->type_for($attrib) || 'string';
7703      if ( $type eq 'num' ) {
7704         if ( $attrib =~ m/^InnoDB_/ ) {
7705            push @innodb, $attrib;
7706         }
7707         else {
7708            push @num, $attrib;
7709         }
7710      }
7711      elsif ( $type eq 'bool' ) {
7712         push @bool, $attrib;
7713      }
7714      elsif ( $type eq 'string' ) {
7715         push @string, $attrib;
7716      }
7717      else {
7718         PTDEBUG && _d("Unknown attrib type:", $type, "for", $attrib);
7719      }
7720   }
7721
7722   @num    = sort { pref_sort($a, $num_order{$a}, $b, $num_order{$b}) } @num;
7723   @innodb = sort { uc $a cmp uc $b } @innodb;
7724   @bool   = sort { uc $a cmp uc $b } @bool;
7725   @string = sort { uc $a cmp uc $b } @string;
7726
7727   return {
7728      num     => \@num,
7729      innodb  => \@innodb,
7730      string  => \@string,
7731      bool    => \@bool,
7732   };
7733}
7734
7735sub pref_sort {
7736   my ( $attrib_a, $order_a, $attrib_b, $order_b ) = @_;
7737
7738   if ( !defined $order_a && !defined $order_b ) {
7739      return $attrib_a cmp $attrib_b;
7740   }
7741
7742   if ( defined $order_a && defined $order_b ) {
7743      return $order_a <=> $order_b;
7744   }
7745
7746   if ( !defined $order_a ) {
7747      return 1;
7748   }
7749   else {
7750      return -1;
7751   }
7752}
7753
7754sub tables_report {
7755   my ( $self, $tables_ref, $args_ref ) = @_;
7756   return '' unless @$tables_ref;
7757   my $q      = $self->Quoter();
7758   my $tables = "";
7759   my $mark   = $args_ref->{no_v_format} ? '' : '\G';
7760   foreach my $db_tbl ( @$tables_ref ) {
7761      my ( $db, $tbl ) = @$db_tbl;
7762      $tables .= '#    SHOW TABLE STATUS'
7763               . ($db ? " FROM `$db`" : '')
7764               . " LIKE '$tbl'${mark}\n";
7765      $tables .= "#    SHOW CREATE TABLE "
7766               . $q->quote(grep { $_ } @$db_tbl)
7767               . "${mark}\n";
7768   }
7769   return $tables ? "# Tables\n$tables" : "# No tables\n";
7770}
7771
7772sub explain_report {
7773   my ( $self, $query, $db ) = @_;
7774   return '' unless $query;
7775
7776   my $dbh = $self->{dbh};
7777   my $q   = $self->Quoter();
7778   my $qp  = $self->{QueryParser};
7779   return '' unless $dbh && $q && $qp;
7780
7781   my $explain = '';
7782   eval {
7783      if ( !$qp->has_derived_table($query) ) {
7784         if ( $db ) {
7785            PTDEBUG && _d($dbh, "USE", $db);
7786            $dbh->do("USE " . $q->quote($db));
7787         }
7788         my $sth;
7789         eval {
7790             $sth = $dbh->prepare("EXPLAIN /*!50100 PARTITIONS*/ $query");
7791             $sth->execute();
7792         };
7793         if ($EVAL_ERROR) { # MySQL 8.0+ doesn't support PARTITIONS
7794             $self->{no_partitions} = 1;
7795             $sth = $dbh->prepare("EXPLAIN $query");
7796             $sth->execute();
7797         }
7798         $sth->execute();
7799         my $i = 1;
7800         while ( my @row = $sth->fetchrow_array() ) {
7801            $explain .= "# *************************** $i. "
7802                      . "row ***************************\n";
7803            foreach my $j ( 0 .. $#row ) {
7804               # In some OSes/Perl versions, the filtered row can be reported with or without decimals.
7805               # Example, in Ubuntu 16.04 it is being printed as 100.00 while in Ubuntu 18.04 it is
7806               # being printed as 100.
7807               # To make it testeable, we need to have a consistent format across versions.
7808               my $value_format = $sth->{NAME}->[$j] eq 'filtered' ? "%.02f" : "%s";
7809               $explain .= sprintf "# %13s: $value_format\n", $sth->{NAME}->[$j],
7810                  defined $row[$j] ? $row[$j] : 'NULL';
7811            }
7812            $i++;  # next row number
7813         }
7814      }
7815   };
7816   if ( $EVAL_ERROR ) {
7817      PTDEBUG && _d("EXPLAIN failed:", $query, $EVAL_ERROR);
7818   }
7819   return $explain ? $explain : "# EXPLAIN failed: $EVAL_ERROR";
7820}
7821
7822sub format_time_range {
7823   my ( $self, $vals ) = @_;
7824   my $min = parse_timestamp($vals->{min} || '');
7825   my $max = parse_timestamp($vals->{max} || '');
7826
7827   if ( $min && $max && $min eq $max ) {
7828      return "all events occurred at $min";
7829   }
7830
7831   my ($min_day) = split(' ', $min) if $min;
7832   my ($max_day) = split(' ', $max) if $max;
7833   if ( ($min_day || '') eq ($max_day || '') ) {
7834      (undef, $max) = split(' ', $max);
7835   }
7836
7837   return $min && $max ? "$min to $max" : '';
7838}
7839
7840sub _d {
7841   my ($package, undef, $line) = caller 0;
7842   @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
7843        map { defined $_ ? $_ : 'undef' }
7844        @_;
7845   print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
7846}
7847
7848no Lmo;
78491;
7850}
7851# ###########################################################################
7852# End QueryReportFormatter package
7853# ###########################################################################
7854
7855# ###########################################################################
7856# JSONReportFormatter package
7857# This package is a copy without comments from the original.  The original
7858# with comments and its test file can be found in the Bazaar repository at,
7859#   lib/JSONReportFormatter.pm
7860#   t/lib/JSONReportFormatter.t
7861# See https://launchpad.net/percona-toolkit for more information.
7862# ###########################################################################
7863{
7864package JSONReportFormatter;
7865use Lmo;
7866
7867use List::Util   qw(sum);
7868use Transformers qw(make_checksum parse_timestamp);
7869
7870use constant PTDEBUG => $ENV{PTDEBUG} || 0;
7871
7872my $have_json = eval { require JSON };
7873
7874our $pretty_json = $ENV{PTTEST_PRETTY_JSON} || 0;
7875our $sorted_json = $ENV{PTTEST_PRETTY_JSON} || 0;
7876
7877
7878extends qw(QueryReportFormatter);
7879
7880has 'QueryRewriter' => (
7881   is       => 'ro',
7882   isa      => 'Object',
7883   required => 1,
7884);
7885
7886has 'QueryParser' => (
7887   is       => 'ro',
7888   isa      => 'Object',
7889   required => 1,
7890);
7891
7892has 'Quoter' => (
7893   is       => 'ro',
7894   isa      => 'Object',
7895   required => 1,
7896);
7897
7898has _json => (
7899   is       => 'ro',
7900   init_arg => undef,
7901   builder  => '_build_json',
7902);
7903
7904has 'max_query_length' => (
7905   is       => 'rw',
7906   isa      => 'Int',
7907   required => 0,
7908   default  => sub { return 10_000; }, # characters, not bytes
7909);
7910
7911has 'max_fingerprint_length' => (
7912   is       => 'rw',
7913   isa      => 'Int',
7914   required => 0,
7915   default  => sub { return 5_000; }, # characters, not bytes
7916);
7917
7918sub _build_json {
7919   return unless $have_json;
7920   return JSON->new->utf8
7921                   ->pretty($pretty_json)
7922                   ->canonical($sorted_json);
7923}
7924
7925sub encode_json {
7926   my ($self, $encode) = @_;
7927   if ( my $json = $self->_json ) {
7928      return $json->encode($encode);
7929   }
7930   else {
7931      return Transformers::encode_json($encode);
7932   }
7933}
7934
7935override [qw(rusage date hostname files header profile prepared)] => sub {
7936   return;
7937};
7938
7939override event_report => sub {
7940   my ($self, %args) = @_;
7941   return $self->event_report_values(%args);
7942};
7943
7944override query_report => sub {
7945   my ($self, %args) = @_;
7946   foreach my $arg ( qw(ea worst orderby groupby) ) {
7947      die "I need a $arg argument" unless defined $arg;
7948   }
7949   my $ea      = $args{ea};
7950   my $worst   = $args{worst};
7951   my $orderby = $args{orderby};
7952   my $groupby = $args{groupby};
7953
7954   my $results = $ea->results();
7955   my @attribs = @{$ea->get_attributes()};
7956
7957   my $q  = $self->Quoter;
7958   my $qr = $self->QueryRewriter;
7959
7960   my $global_data = {
7961      metrics => {},
7962      files   => $args{files},
7963      ($args{resume} && scalar keys %{$args{resume}} ? (resume  => $args{resume}) : ()),
7964   };
7965
7966   my $global_cnt = $results->{globals}->{$orderby}->{cnt} || 0;
7967   my $global_unq = scalar keys %{$results->{classes}};
7968
7969   my ($qps, $conc) = (0, 0);
7970   if ( $global_cnt && $results->{globals}->{ts}
7971        && ($results->{globals}->{ts}->{max} || '')
7972            gt ($results->{globals}->{ts}->{min} || '') )
7973   {
7974      eval {
7975         my $min  = parse_timestamp($results->{globals}->{ts}->{min});
7976         my $max  = parse_timestamp($results->{globals}->{ts}->{max});
7977         my $diff = unix_timestamp($max) - unix_timestamp($min);
7978         $qps     = $global_cnt / ($diff || 1);
7979         $conc    = $results->{globals}->{$orderby}->{sum} / $diff;
7980      };
7981   }
7982
7983   $global_data->{query_count}        = $global_cnt;
7984   $global_data->{unique_query_count} = $global_unq;
7985   $global_data->{queries_per_second} = $qps  if $qps;
7986   $global_data->{concurrency}        = $conc if $conc;
7987
7988   if ( exists $results->{globals}->{rate_limit} ) {
7989      my $rate_limit = $results->{globals}->{rate_limit}->{min} || '';
7990      my ($type, $limit) = $rate_limit =~ m/^(\w+):(\d+)$/;
7991      if ( $type && $limit ) {
7992         $global_data->{rate_limit} = {
7993            type  => $type,
7994            limit => int($limit),
7995         };
7996      }
7997      else {
7998         $global_data->{rate_limit}->{error} = "Invalid rate limit: $rate_limit";
7999      }
8000
8001      if (    ($results->{globals}->{rate_limit}->{min} || '')
8002           ne ($results->{globals}->{rate_limit}->{max} || '') ) {
8003         $global_data->{rate_limit}->{diff} = 1;
8004      }
8005   }
8006
8007   my %hidden_attrib = (
8008      arg         => 1,
8009      fingerprint => 1,
8010      pos_in_log  => 1,
8011      ts          => 1,
8012   );
8013
8014   foreach my $attrib ( grep { !$hidden_attrib{$_} } @attribs ) {
8015      my $type = $ea->type_for($attrib) || 'string';
8016      next if $type eq 'string';
8017      next unless exists $results->{globals}->{$attrib};
8018
8019      my $store   = $results->{globals}->{$attrib};
8020      my $metrics = $ea->stats()->{globals}->{$attrib};
8021      my $int     = $attrib =~ m/(?:time|wait)$/ ? 0 : 1;
8022
8023      my $real_attrib = $attrib eq 'bytes' ? 'Query_length' : $attrib;
8024
8025      if ( $type eq 'num' ) {
8026         foreach my $m ( qw(sum min max) ) {
8027            if ( $int ) {
8028               $global_data->{metrics}->{$real_attrib}->{$m}
8029                  = sprintf('%d', $store->{$m} || 0);
8030            }
8031            else {  # microsecond
8032               $global_data->{metrics}->{$real_attrib}->{$m}
8033                  = sprintf('%.6f',  $store->{$m} || 0);
8034            }
8035         }
8036         foreach my $m ( qw(pct_95 stddev median) ) {
8037            if ( $int ) {
8038               $global_data->{metrics}->{$real_attrib}->{$m}
8039                  = sprintf('%d', $metrics->{$m} || 0);
8040            }
8041            else {  # microsecond
8042               $global_data->{metrics}->{$real_attrib}->{$m}
8043                  = sprintf('%.6f',  $metrics->{$m} || 0);
8044            }
8045         }
8046         if ( $int ) {
8047            $global_data->{metrics}->{$real_attrib}->{avg}
8048               = sprintf('%d', $store->{sum} / $store->{cnt});
8049         }
8050         else {
8051            $global_data->{metrics}->{$real_attrib}->{avg}
8052               = sprintf('%.6f', $store->{sum} / $store->{cnt});
8053         }
8054      }
8055      elsif ( $type eq 'bool' ) {
8056         my $store = $results->{globals}->{$real_attrib};
8057         $global_data->{metrics}->{$real_attrib}->{cnt}
8058            = sprintf('%d', $store->{sum});
8059      }
8060   }
8061
8062
8063   my @classes;
8064   foreach my $worst_info ( @$worst ) {
8065      my $item   = $worst_info->[0];
8066      my $stats  = $ea->results->{classes}->{$item};
8067      my $sample = $ea->results->{samples}->{$item};
8068
8069      my $all_log_pos = $ea->{result_classes}->{$item}->{pos_in_log}->{all};
8070      my $times_seen  = sum values %$all_log_pos;
8071
8072      my $distill     = $groupby eq 'fingerprint' ? $qr->distill($sample->{arg})
8073                      :                             undef;
8074      my $fingerprint = substr($item, 0, $self->max_fingerprint_length);
8075      my $checksum    = make_checksum($item);
8076      my $class       = {
8077         checksum    => $checksum,
8078         fingerprint => $fingerprint,
8079         distillate  => $distill,
8080         attribute   => $groupby,
8081         query_count => $times_seen,
8082         $args{anon} ? () : (
8083            example     => {
8084               query      => substr($sample->{arg}, 0, $self->max_query_length),
8085               ts         => $sample->{ts} ? parse_timestamp($sample->{ts}) : undef,
8086               Query_time => $sample->{Query_time},
8087            },
8088         ),
8089      };
8090
8091      my %metrics;
8092      foreach my $attrib ( @attribs ) {
8093         my $real_attrib = $attrib eq 'bytes' ? 'Query_length' : $attrib;
8094         next if $real_attrib eq 'Rows_affected'
8095            && $distill && $distill =~ m/^(?:SELECT|SHOW|SET|ADMIN)/;
8096         $metrics{$real_attrib} = $ea->metrics(
8097            attrib => $attrib,
8098            where  => $item,
8099         );
8100      }
8101
8102      foreach my $attrib ( keys %metrics ) {
8103         if ( ! grep { $_ } values %{$metrics{$attrib}} ) {
8104            delete $metrics{$attrib};
8105            next;
8106         }
8107         delete $metrics{pos_in_log};
8108         delete $metrics{$attrib}->{cnt};
8109
8110         if ($attrib eq 'ts') {
8111            my $ts = delete $metrics{ts};
8112            foreach my $thing ( qw(min max) ) {
8113               next unless defined $ts && defined $ts->{$thing};
8114               $ts->{$thing} = parse_timestamp($ts->{$thing});
8115            }
8116            $class->{ts_min} = $ts->{min};
8117            $class->{ts_max} = $ts->{max};
8118         }
8119         else {
8120            my $type = $attrib eq 'Query_length' ? 'num' : $ea->type_for($attrib) || 'string';
8121            if ( $type eq 'string' ) {
8122               $metrics{$attrib} = { value => $metrics{$attrib}{max} };
8123            }
8124            elsif ( $type eq 'num' ) {
8125               foreach my $value ( values %{$metrics{$attrib}} ) {
8126                  next unless defined $value;
8127                  if ( $attrib =~ m/_(?:time|wait)$/ ) {
8128                     $value = sprintf('%.6f', $value);
8129                  }
8130                  else {
8131                     $value = sprintf('%d', $value);
8132                  }
8133               }
8134            }
8135            elsif ( $type eq 'bool' ) {
8136               $metrics{$attrib} = {
8137                  yes => sprintf('%d', $metrics{$attrib}->{sum}),
8138               };
8139            }
8140         }
8141      }
8142
8143      my @tables;
8144      if ( $groupby eq 'fingerprint' ) {
8145         my $default_db = $sample->{db}       ? $sample->{db}
8146                        : $stats->{db}->{unq} ? keys %{$stats->{db}->{unq}}
8147                        :                       undef;
8148         my @table_names = $self->QueryParser->extract_tables(
8149            query      => $sample->{arg} || '',
8150            default_db => $default_db,
8151            Quoter     => $q,
8152         );
8153         my $mark = $args{no_v_format} ? '' : '\G';
8154
8155         foreach my $db_tbl ( @table_names ) {
8156            my ( $db, $tbl ) = @$db_tbl;
8157            my $status
8158               = 'SHOW TABLE STATUS'
8159               . ($db ? " FROM `$db`" : '')
8160               . " LIKE '$tbl'${mark}";
8161            my $create
8162               = "SHOW CREATE TABLE "
8163               . $q->quote(grep { $_ } @$db_tbl)
8164               . ${mark};
8165            push @tables, { status => $status, create => $create };
8166         }
8167
8168         if ( !$args{anon} ) {
8169            if ( $item =~ m/^(?:[\(\s]*select|insert|replace)/ ) {
8170               if ( $item =~ m/^(?:insert|replace)/ ) {
8171               }
8172               else {
8173
8174               }
8175            }
8176            else {
8177               my $converted = $qr->convert_to_select(
8178                  $sample->{arg} || '',
8179               );
8180               if ( $converted && $converted =~ m/^[\(\s]*select/i ) {
8181                  $class->{example}->{as_select} = $converted;
8182               }
8183            }
8184         }
8185      }
8186
8187      my $vals = $stats->{Query_time}->{all};
8188      if ( defined $vals && scalar %$vals ) {
8189         my @buck_tens = $ea->buckets_of(10);
8190         my @distro = map { 0 } (0 .. 7);
8191         my @buckets = map { 0 } (0..999);
8192         map { $buckets[$_] = $vals->{$_} } keys %$vals;
8193         $vals = \@buckets;  # repoint vals from given hashref to our array
8194         map { $distro[$buck_tens[$_]] += $vals->[$_] } (1 .. @$vals - 1);
8195         $class->{histograms}->{Query_time} = \@distro;
8196      } # histogram
8197
8198      $class->{metrics} = \%metrics;
8199      if ( @tables ) {
8200         $class->{tables} = \@tables;
8201      }
8202      push @classes, $class;
8203   }
8204
8205   my $data = {
8206      global  => $global_data,
8207      classes => \@classes,
8208   };
8209   my $json = $self->encode_json($data);
8210   $json .= "\n" unless $json =~ /\n\Z/;
8211   return $json;
8212};
8213
8214no Lmo;
82151;
8216}
8217# ###########################################################################
8218# End JSONReportFormatter package
8219# ###########################################################################
8220
8221# ###########################################################################
8222# EventTimeline package
8223# This package is a copy without comments from the original.  The original
8224# with comments and its test file can be found in the Bazaar repository at,
8225#   lib/EventTimeline.pm
8226#   t/lib/EventTimeline.t
8227# See https://launchpad.net/percona-toolkit for more information.
8228# ###########################################################################
8229{
8230package EventTimeline;
8231
8232use strict;
8233use warnings FATAL => 'all';
8234use English qw(-no_match_vars);
8235use constant PTDEBUG => $ENV{PTDEBUG} || 0;
8236
8237Transformers->import(qw(parse_timestamp secs_to_time unix_timestamp));
8238
8239use constant KEY     => 0;
8240use constant CNT     => 1;
8241use constant ATT     => 2;
8242
8243sub new {
8244   my ( $class, %args ) = @_;
8245   foreach my $arg ( qw(groupby attributes) ) {
8246      die "I need a $arg argument" unless $args{$arg};
8247   }
8248
8249   my %is_groupby = map { $_ => 1 } @{$args{groupby}};
8250
8251   return bless {
8252      groupby    => $args{groupby},
8253      attributes => [ grep { !$is_groupby{$_} } @{$args{attributes}} ],
8254      results    => [],
8255   }, $class;
8256}
8257
8258sub reset_aggregated_data {
8259   my ( $self ) = @_;
8260   $self->{results} = [];
8261}
8262
8263sub aggregate {
8264   my ( $self, $event ) = @_;
8265   my $handler = $self->{handler};
8266   if ( !$handler ) {
8267      $handler = $self->make_handler($event);
8268      $self->{handler} = $handler;
8269   }
8270   return unless $handler;
8271   $handler->($event);
8272}
8273
8274sub results {
8275   my ( $self ) = @_;
8276   return $self->{results};
8277}
8278
8279sub make_handler {
8280   my ( $self, $event ) = @_;
8281
8282   my $float_re = qr{[+-]?(?:(?=\d|[.])\d*(?:[.])\d{0,})?(?:[E](?:[+-]?\d+)|)}i;
8283   my @lines; # lines of code for the subroutine
8284
8285   foreach my $attrib ( @{$self->{attributes}} ) {
8286      my ($val) = $event->{$attrib};
8287      next unless defined $val; # Can't decide type if it's undef.
8288
8289      my $type = $val  =~ m/^(?:\d+|$float_re)$/o ? 'num'
8290               : $val  =~ m/^(?:Yes|No)$/         ? 'bool'
8291               :                                    'string';
8292      PTDEBUG && _d('Type for', $attrib, 'is', $type, '(sample:', $val, ')');
8293      $self->{type_for}->{$attrib} = $type;
8294
8295      push @lines, (
8296         "\$val = \$event->{$attrib};",
8297         'defined $val && do {',
8298         "# type: $type",
8299         "\$store = \$last->[ATT]->{$attrib} ||= {};",
8300      );
8301
8302      if ( $type eq 'bool' ) {
8303         push @lines, q{$val = $val eq 'Yes' ? 1 : 0;};
8304         $type = 'num';
8305      }
8306      my $op   = $type eq 'num' ? '<' : 'lt';
8307      push @lines, (
8308         '$store->{min} = $val if !defined $store->{min} || $val '
8309            . $op . ' $store->{min};',
8310      );
8311      $op = ($type eq 'num') ? '>' : 'gt';
8312      push @lines, (
8313         '$store->{max} = $val if !defined $store->{max} || $val '
8314            . $op . ' $store->{max};',
8315      );
8316      if ( $type eq 'num' ) {
8317         push @lines, '$store->{sum} += $val;';
8318      }
8319      push @lines, '};';
8320   }
8321
8322   unshift @lines, (
8323      'sub {',
8324      'my ( $event ) = @_;',
8325      'my ($val, $last, $store);', # NOTE: define all variables here
8326      '$last = $results->[-1];',
8327      'if ( !$last || '
8328         . join(' || ',
8329            map { "\$last->[KEY]->[$_] ne (\$event->{$self->{groupby}->[$_]} || 0)" }
8330                (0 .. @{$self->{groupby}} -1))
8331         . ' ) {',
8332      '  $last = [['
8333         . join(', ',
8334            map { "(\$event->{$self->{groupby}->[$_]} || 0)" }
8335                (0 .. @{$self->{groupby}} -1))
8336         . '], 0, {} ];',
8337      '  push @$results, $last;',
8338      '}',
8339      '++$last->[CNT];',
8340   );
8341   push @lines, '}';
8342   my $results = $self->{results}; # Referred to by the eval
8343   my $code = join("\n", @lines);
8344   $self->{code} = $code;
8345
8346   PTDEBUG && _d('Timeline handler:', $code);
8347   my $sub = eval $code;
8348   die if $EVAL_ERROR;
8349   return $sub;
8350}
8351
8352sub report {
8353   my ( $self, $results, $callback ) = @_;
8354   $callback->("# " . ('#' x 72) . "\n");
8355   $callback->("# " . join(',', @{$self->{groupby}}) . " report\n");
8356   $callback->("# " . ('#' x 72) . "\n");
8357   foreach my $res ( @$results ) {
8358      my $t;
8359      my @vals;
8360      if ( ($t = $res->[ATT]->{ts}) && $t->{min} ) {
8361         my $min = parse_timestamp($t->{min});
8362         push @vals, $min;
8363         if ( $t->{max} && $t->{max} gt $t->{min} ) {
8364            my $max  = parse_timestamp($t->{max});
8365            my $diff = secs_to_time(unix_timestamp($max) - unix_timestamp($min));
8366            push @vals, $diff;
8367         }
8368         else {
8369            push @vals, '0:00';
8370         }
8371      }
8372      else {
8373         push @vals, ('', '');
8374      }
8375      $callback->(sprintf("# %19s %7s %3d %s\n", @vals, $res->[CNT], $res->[KEY]->[0]));
8376   }
8377}
8378
8379sub _d {
8380   my ($package, undef, $line) = caller 0;
8381   @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
8382        map { defined $_ ? $_ : 'undef' }
8383        @_;
8384   print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
8385}
8386
83871;
8388}
8389# ###########################################################################
8390# End EventTimeline package
8391# ###########################################################################
8392
8393# ###########################################################################
8394# QueryParser package
8395# This package is a copy without comments from the original.  The original
8396# with comments and its test file can be found in the Bazaar repository at,
8397#   lib/QueryParser.pm
8398#   t/lib/QueryParser.t
8399# See https://launchpad.net/percona-toolkit for more information.
8400# ###########################################################################
8401{
8402package QueryParser;
8403
8404use strict;
8405use warnings FATAL => 'all';
8406use English qw(-no_match_vars);
8407use constant PTDEBUG => $ENV{PTDEBUG} || 0;
8408
8409our $tbl_ident = qr/(?:`[^`]+`|\w+)(?:\.(?:`[^`]+`|\w+))?/;
8410our $tbl_regex = qr{
8411         \b(?:FROM|JOIN|(?<!KEY\s)UPDATE|INTO) # Words that precede table names
8412         \b\s*
8413         \(?                                   # Optional paren around tables
8414         ($tbl_ident
8415            (?: (?:\s+ (?:AS\s+)? \w+)?, \s*$tbl_ident )*
8416         )
8417      }xio;
8418our $has_derived = qr{
8419      \b(?:FROM|JOIN|,)
8420      \s*\(\s*SELECT
8421   }xi;
8422
8423our $data_def_stmts = qr/(?:CREATE|ALTER|TRUNCATE|DROP|RENAME)/i;
8424
8425our $data_manip_stmts = qr/(?:INSERT|UPDATE|DELETE|REPLACE)/i;
8426
8427sub new {
8428   my ( $class ) = @_;
8429   bless {}, $class;
8430}
8431
8432sub get_tables {
8433   my ( $self, $query ) = @_;
8434   return unless $query;
8435   PTDEBUG && _d('Getting tables for', $query);
8436
8437   my ( $ddl_stmt ) = $query =~ m/^\s*($data_def_stmts)\b/i;
8438   if ( $ddl_stmt ) {
8439      PTDEBUG && _d('Special table type:', $ddl_stmt);
8440      $query =~ s/IF\s+(?:NOT\s+)?EXISTS//i;
8441      if ( $query =~ m/$ddl_stmt DATABASE\b/i ) {
8442         PTDEBUG && _d('Query alters a database, not a table');
8443         return ();
8444      }
8445      if ( $ddl_stmt =~ m/CREATE/i && $query =~ m/$ddl_stmt\b.+?\bSELECT\b/i ) {
8446         my ($select) = $query =~ m/\b(SELECT\b.+)/is;
8447         PTDEBUG && _d('CREATE TABLE ... SELECT:', $select);
8448         return $self->get_tables($select);
8449      }
8450      my ($tbl) = $query =~ m/TABLE\s+($tbl_ident)(\s+.*)?/i;
8451      PTDEBUG && _d('Matches table:', $tbl);
8452      return ($tbl);
8453   }
8454
8455   $query =~ s/(?:LOW_PRIORITY|IGNORE|STRAIGHT_JOIN|DELAYED)\s+/ /ig;
8456
8457   if ( $query =~ s/^\s*LOCK TABLES\s+//i ) {
8458      PTDEBUG && _d('Special table type: LOCK TABLES');
8459      $query =~ s/\s+(?:READ(?:\s+LOCAL)?|WRITE)\s*//gi;
8460      PTDEBUG && _d('Locked tables:', $query);
8461      $query = "FROM $query";
8462   }
8463
8464   $query =~ s/\\["']//g;   # quoted strings
8465   $query =~ s/".*?"/?/sg;  # quoted strings
8466   $query =~ s/'.*?'/?/sg;  # quoted strings
8467
8468   if ( $query =~ m/\A\s*(?:INSERT|REPLACE)(?!\s+INTO)/i ) {
8469      $query =~ s/\A\s*((?:INSERT|REPLACE))\s+/$1 INTO /i;
8470   }
8471
8472   if ( $query =~ m/\A\s*LOAD DATA/i ) {
8473      my ($tbl) = $query =~ m/INTO TABLE\s+(\S+)/i;
8474      return $tbl;
8475   }
8476
8477   my @tables;
8478   foreach my $tbls ( $query =~ m/$tbl_regex/gio ) {
8479      PTDEBUG && _d('Match tables:', $tbls);
8480
8481      next if $tbls =~ m/\ASELECT\b/i;
8482
8483      foreach my $tbl ( split(',', $tbls) ) {
8484         $tbl =~ s/\s*($tbl_ident)(\s+.*)?/$1/gio;
8485
8486         if ( $tbl !~ m/[a-zA-Z]/ ) {
8487            PTDEBUG && _d('Skipping suspicious table name:', $tbl);
8488            next;
8489         }
8490
8491         push @tables, $tbl;
8492      }
8493   }
8494   return @tables;
8495}
8496
8497sub has_derived_table {
8498   my ( $self, $query ) = @_;
8499   my $match = $query =~ m/$has_derived/;
8500   PTDEBUG && _d($query, 'has ' . ($match ? 'a' : 'no') . ' derived table');
8501   return $match;
8502}
8503
8504sub get_aliases {
8505   my ( $self, $query, $list ) = @_;
8506
8507   my $result = {
8508      DATABASE => {},
8509      TABLE    => {},
8510   };
8511   return $result unless $query;
8512
8513   $query =~ s/ (?:LOW_PRIORITY|IGNORE|STRAIGHT_JOIN)//ig;
8514
8515   $query =~ s/ (?:INNER|OUTER|CROSS|LEFT|RIGHT|NATURAL)//ig;
8516
8517   my @tbl_refs;
8518   my ($tbl_refs, $from) = $query =~ m{
8519      (
8520         (FROM|INTO|UPDATE)\b\s*   # Keyword before table refs
8521         .+?                       # Table refs
8522      )
8523      (?:\s+|\z)                   # If the query does not end with the table
8524      (?:WHERE|ORDER|LIMIT|HAVING|SET|VALUES|\z) # Keyword after table refs
8525   }ix;
8526
8527   if ( $tbl_refs ) {
8528
8529      if ( $query =~ m/^(?:INSERT|REPLACE)/i ) {
8530         $tbl_refs =~ s/\([^\)]+\)\s*//;
8531      }
8532
8533      PTDEBUG && _d('tbl refs:', $tbl_refs);
8534
8535      my $before_tbl = qr/(?:,|JOIN|\s|$from)+/i;
8536
8537      my $after_tbl  = qr/(?:,|JOIN|ON|USING|\z)/i;
8538
8539      $tbl_refs =~ s/ = /=/g;
8540
8541      while (
8542         $tbl_refs =~ m{
8543            $before_tbl\b\s*
8544               ( ($tbl_ident) (?:\s+ (?:AS\s+)? (\w+))? )
8545            \s*$after_tbl
8546         }xgio )
8547      {
8548         my ( $tbl_ref, $db_tbl, $alias ) = ($1, $2, $3);
8549         PTDEBUG && _d('Match table:', $tbl_ref);
8550         push @tbl_refs, $tbl_ref;
8551         $alias = $self->trim_identifier($alias);
8552
8553         if ( $tbl_ref =~ m/^AS\s+\w+/i ) {
8554            PTDEBUG && _d('Subquery', $tbl_ref);
8555            $result->{TABLE}->{$alias} = undef;
8556            next;
8557         }
8558
8559         my ( $db, $tbl ) = $db_tbl =~ m/^(?:(.*?)\.)?(.*)/;
8560         $db  = $self->trim_identifier($db);
8561         $tbl = $self->trim_identifier($tbl);
8562         $result->{TABLE}->{$alias || $tbl} = $tbl;
8563         $result->{DATABASE}->{$tbl}        = $db if $db;
8564      }
8565   }
8566   else {
8567      PTDEBUG && _d("No tables ref in", $query);
8568   }
8569
8570   if ( $list ) {
8571      return \@tbl_refs;
8572   }
8573   else {
8574      return $result;
8575   }
8576}
8577
8578sub split {
8579   my ( $self, $query ) = @_;
8580   return unless $query;
8581   $query = $self->clean_query($query);
8582   PTDEBUG && _d('Splitting', $query);
8583
8584   my $verbs = qr{SELECT|INSERT|UPDATE|DELETE|REPLACE|UNION|CREATE}i;
8585
8586   my @split_statements = grep { $_ } split(m/\b($verbs\b(?!(?:\s*\()))/io, $query);
8587
8588   my @statements;
8589   if ( @split_statements == 1 ) {
8590      push @statements, $query;
8591   }
8592   else {
8593      for ( my $i = 0; $i <= $#split_statements; $i += 2 ) {
8594         push @statements, $split_statements[$i].$split_statements[$i+1];
8595
8596         if ( $statements[-2] && $statements[-2] =~ m/on duplicate key\s+$/i ) {
8597            $statements[-2] .= pop @statements;
8598         }
8599      }
8600   }
8601
8602   PTDEBUG && _d('statements:', map { $_ ? "<$_>" : 'none' } @statements);
8603   return @statements;
8604}
8605
8606sub clean_query {
8607   my ( $self, $query ) = @_;
8608   return unless $query;
8609   $query =~ s!/\*.*?\*/! !g;  # Remove /* comment blocks */
8610   $query =~ s/^\s+//;         # Remove leading spaces
8611   $query =~ s/\s+$//;         # Remove trailing spaces
8612   $query =~ s/\s{2,}/ /g;     # Remove extra spaces
8613   return $query;
8614}
8615
8616sub split_subquery {
8617   my ( $self, $query ) = @_;
8618   return unless $query;
8619   $query = $self->clean_query($query);
8620   $query =~ s/;$//;
8621
8622   my @subqueries;
8623   my $sqno = 0;  # subquery number
8624   my $pos  = 0;
8625   while ( $query =~ m/(\S+)(?:\s+|\Z)/g ) {
8626      $pos = pos($query);
8627      my $word = $1;
8628      PTDEBUG && _d($word, $sqno);
8629      if ( $word =~ m/^\(?SELECT\b/i ) {
8630         my $start_pos = $pos - length($word) - 1;
8631         if ( $start_pos ) {
8632            $sqno++;
8633            PTDEBUG && _d('Subquery', $sqno, 'starts at', $start_pos);
8634            $subqueries[$sqno] = {
8635               start_pos => $start_pos,
8636               end_pos   => 0,
8637               len       => 0,
8638               words     => [$word],
8639               lp        => 1, # left parentheses
8640               rp        => 0, # right parentheses
8641               done      => 0,
8642            };
8643         }
8644         else {
8645            PTDEBUG && _d('Main SELECT at pos 0');
8646         }
8647      }
8648      else {
8649         next unless $sqno;  # next unless we're in a subquery
8650         PTDEBUG && _d('In subquery', $sqno);
8651         my $sq = $subqueries[$sqno];
8652         if ( $sq->{done} ) {
8653            PTDEBUG && _d('This subquery is done; SQL is for',
8654               ($sqno - 1 ? "subquery $sqno" : "the main SELECT"));
8655            next;
8656         }
8657         push @{$sq->{words}}, $word;
8658         my $lp = ($word =~ tr/\(//) || 0;
8659         my $rp = ($word =~ tr/\)//) || 0;
8660         PTDEBUG && _d('parentheses left', $lp, 'right', $rp);
8661         if ( ($sq->{lp} + $lp) - ($sq->{rp} + $rp) == 0 ) {
8662            my $end_pos = $pos - 1;
8663            PTDEBUG && _d('Subquery', $sqno, 'ends at', $end_pos);
8664            $sq->{end_pos} = $end_pos;
8665            $sq->{len}     = $end_pos - $sq->{start_pos};
8666         }
8667      }
8668   }
8669
8670   for my $i ( 1..$#subqueries ) {
8671      my $sq = $subqueries[$i];
8672      next unless $sq;
8673      $sq->{sql} = join(' ', @{$sq->{words}});
8674      substr $query,
8675         $sq->{start_pos} + 1,  # +1 for (
8676         $sq->{len} - 1,        # -1 for )
8677         "__subquery_$i";
8678   }
8679
8680   return $query, map { $_->{sql} } grep { defined $_ } @subqueries;
8681}
8682
8683sub query_type {
8684   my ( $self, $query, $qr ) = @_;
8685   my ($type, undef) = $qr->distill_verbs($query);
8686   my $rw;
8687   if ( $type =~ m/^SELECT\b/ ) {
8688      $rw = 'read';
8689   }
8690   elsif ( $type =~ m/^$data_manip_stmts\b/
8691           || $type =~ m/^$data_def_stmts\b/  ) {
8692      $rw = 'write'
8693   }
8694
8695   return {
8696      type => $type,
8697      rw   => $rw,
8698   }
8699}
8700
8701sub get_columns {
8702   my ( $self, $query ) = @_;
8703   my $cols = [];
8704   return $cols unless $query;
8705   my $cols_def;
8706
8707   if ( $query =~ m/^SELECT/i ) {
8708      $query =~ s/
8709         ^SELECT\s+
8710           (?:ALL
8711              |DISTINCT
8712              |DISTINCTROW
8713              |HIGH_PRIORITY
8714              |STRAIGHT_JOIN
8715              |SQL_SMALL_RESULT
8716              |SQL_BIG_RESULT
8717              |SQL_BUFFER_RESULT
8718              |SQL_CACHE
8719              |SQL_NO_CACHE
8720              |SQL_CALC_FOUND_ROWS
8721           )\s+
8722      /SELECT /xgi;
8723      ($cols_def) = $query =~ m/^SELECT\s+(.+?)\s+FROM/i;
8724   }
8725   elsif ( $query =~ m/^(?:INSERT|REPLACE)/i ) {
8726      ($cols_def) = $query =~ m/\(([^\)]+)\)\s*VALUE/i;
8727   }
8728
8729   PTDEBUG && _d('Columns:', $cols_def);
8730   if ( $cols_def ) {
8731      @$cols = split(',', $cols_def);
8732      map {
8733         my $col = $_;
8734         $col = s/^\s+//g;
8735         $col = s/\s+$//g;
8736         $col;
8737      } @$cols;
8738   }
8739
8740   return $cols;
8741}
8742
8743sub parse {
8744   my ( $self, $query ) = @_;
8745   return unless $query;
8746   my $parsed = {};
8747
8748   $query =~ s/\n/ /g;
8749   $query = $self->clean_query($query);
8750
8751   $parsed->{query}   = $query,
8752   $parsed->{tables}  = $self->get_aliases($query, 1);
8753   $parsed->{columns} = $self->get_columns($query);
8754
8755   my ($type) = $query =~ m/^(\w+)/;
8756   $parsed->{type} = lc $type;
8757
8758
8759   $parsed->{sub_queries} = [];
8760
8761   return $parsed;
8762}
8763
8764sub extract_tables {
8765   my ( $self, %args ) = @_;
8766   my $query      = $args{query};
8767   my $default_db = $args{default_db};
8768   my $q          = $self->{Quoter} || $args{Quoter};
8769   return unless $query;
8770   PTDEBUG && _d('Extracting tables');
8771   my @tables;
8772   my %seen;
8773   foreach my $db_tbl ( $self->get_tables($query) ) {
8774      next unless $db_tbl;
8775      next if $seen{$db_tbl}++; # Unique-ify for issue 337.
8776      my ( $db, $tbl ) = $q->split_unquote($db_tbl);
8777      push @tables, [ $db || $default_db, $tbl ];
8778   }
8779   return @tables;
8780}
8781
8782sub trim_identifier {
8783   my ($self, $str) = @_;
8784   return unless defined $str;
8785   $str =~ s/`//g;
8786   $str =~ s/^\s+//;
8787   $str =~ s/\s+$//;
8788   return $str;
8789}
8790
8791sub _d {
8792   my ($package, undef, $line) = caller 0;
8793   @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
8794        map { defined $_ ? $_ : 'undef' }
8795        @_;
8796   print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
8797}
8798
87991;
8800}
8801# ###########################################################################
8802# End QueryParser package
8803# ###########################################################################
8804
8805# ###########################################################################
8806# TableParser package
8807# This package is a copy without comments from the original.  The original
8808# with comments and its test file can be found in the Bazaar repository at,
8809#   lib/TableParser.pm
8810#   t/lib/TableParser.t
8811# See https://launchpad.net/percona-toolkit for more information.
8812# ###########################################################################
8813{
8814package TableParser;
8815
8816use strict;
8817use warnings FATAL => 'all';
8818use English qw(-no_match_vars);
8819use constant PTDEBUG => $ENV{PTDEBUG} || 0;
8820
8821use Data::Dumper;
8822$Data::Dumper::Indent    = 1;
8823$Data::Dumper::Sortkeys  = 1;
8824$Data::Dumper::Quotekeys = 0;
8825
8826local $EVAL_ERROR;
8827eval {
8828   require Quoter;
8829};
8830
8831sub new {
8832   my ( $class, %args ) = @_;
8833   my $self = { %args };
8834   $self->{Quoter} ||= Quoter->new();
8835   return bless $self, $class;
8836}
8837
8838sub Quoter { shift->{Quoter} }
8839
8840sub get_create_table {
8841   my ( $self, $dbh, $db, $tbl ) = @_;
8842   die "I need a dbh parameter" unless $dbh;
8843   die "I need a db parameter"  unless $db;
8844   die "I need a tbl parameter" unless $tbl;
8845   my $q = $self->{Quoter};
8846
8847   my $new_sql_mode
8848      = q{/*!40101 SET @OLD_SQL_MODE := @@SQL_MODE, }
8849      . q{@@SQL_MODE := '', }
8850      . q{@OLD_QUOTE := @@SQL_QUOTE_SHOW_CREATE, }
8851      . q{@@SQL_QUOTE_SHOW_CREATE := 1 */};
8852
8853   my $old_sql_mode
8854      = q{/*!40101 SET @@SQL_MODE := @OLD_SQL_MODE, }
8855      . q{@@SQL_QUOTE_SHOW_CREATE := @OLD_QUOTE */};
8856
8857   PTDEBUG && _d($new_sql_mode);
8858   eval { $dbh->do($new_sql_mode); };
8859   PTDEBUG && $EVAL_ERROR && _d($EVAL_ERROR);
8860
8861   my $use_sql = 'USE ' . $q->quote($db);
8862   PTDEBUG && _d($dbh, $use_sql);
8863   $dbh->do($use_sql);
8864
8865   my $show_sql = "SHOW CREATE TABLE " . $q->quote($db, $tbl);
8866   PTDEBUG && _d($show_sql);
8867   my $href;
8868   eval { $href = $dbh->selectrow_hashref($show_sql); };
8869   if ( my $e = $EVAL_ERROR ) {
8870      PTDEBUG && _d($old_sql_mode);
8871      $dbh->do($old_sql_mode);
8872
8873      die $e;
8874   }
8875
8876   PTDEBUG && _d($old_sql_mode);
8877   $dbh->do($old_sql_mode);
8878
8879   my ($key) = grep { m/create (?:table|view)/i } keys %$href;
8880   if ( !$key ) {
8881      die "Error: no 'Create Table' or 'Create View' in result set from "
8882         . "$show_sql: " . Dumper($href);
8883   }
8884
8885   return $href->{$key};
8886}
8887
8888sub parse {
8889   my ( $self, $ddl, $opts ) = @_;
8890   return unless $ddl;
8891
8892   if ( $ddl =~ m/CREATE (?:TEMPORARY )?TABLE "/ ) {
8893      $ddl = $self->ansi_to_legacy($ddl);
8894   }
8895   elsif ( $ddl !~ m/CREATE (?:TEMPORARY )?TABLE `/ ) {
8896      die "TableParser doesn't handle CREATE TABLE without quoting.";
8897   }
8898
8899   my ($name)     = $ddl =~ m/CREATE (?:TEMPORARY )?TABLE\s+(`.+?`)/;
8900   (undef, $name) = $self->{Quoter}->split_unquote($name) if $name;
8901
8902   $ddl =~ s/(`[^`\n]+`)/\L$1/gm;
8903
8904   my $engine = $self->get_engine($ddl);
8905
8906   my @defs   = $ddl =~ m/^(\s+`.*?),?$/gm;
8907   my @cols   = map { $_ =~ m/`([^`]+)`/ } @defs;
8908   PTDEBUG && _d('Table cols:', join(', ', map { "`$_`" } @cols));
8909
8910   my %def_for;
8911   @def_for{@cols} = @defs;
8912
8913   my (@nums, @null, @non_generated);
8914   my (%type_for, %is_nullable, %is_numeric, %is_autoinc, %is_generated);
8915   foreach my $col ( @cols ) {
8916      my $def = $def_for{$col};
8917
8918      $def =~ s/``//g;
8919
8920      my ( $type ) = $def =~ m/`[^`]+`\s([a-z]+)/;
8921      die "Can't determine column type for $def" unless $type;
8922      $type_for{$col} = $type;
8923      if ( $type =~ m/(?:(?:tiny|big|medium|small)?int|float|double|decimal|year)/ ) {
8924         push @nums, $col;
8925         $is_numeric{$col} = 1;
8926      }
8927      if ( $def !~ m/NOT NULL/ ) {
8928         push @null, $col;
8929         $is_nullable{$col} = 1;
8930      }
8931      if ( remove_quoted_text($def) =~ m/\WGENERATED\W/i ) {
8932          $is_generated{$col} = 1;
8933      } else {
8934          push @non_generated, $col;
8935      }
8936      $is_autoinc{$col} = $def =~ m/AUTO_INCREMENT/i ? 1 : 0;
8937   }
8938
8939   my ($keys, $clustered_key) = $self->get_keys($ddl, $opts, \%is_nullable);
8940
8941   my ($charset) = $ddl =~ m/DEFAULT CHARSET=(\w+)/;
8942
8943   return {
8944      name               => $name,
8945      cols               => \@cols,
8946      col_posn           => { map { $cols[$_] => $_ } 0..$#cols },
8947      is_col             => { map { $_ => 1 } @non_generated },
8948      null_cols          => \@null,
8949      is_nullable        => \%is_nullable,
8950      non_generated_cols => \@non_generated,
8951      is_autoinc         => \%is_autoinc,
8952      is_generated       => \%is_generated,
8953      clustered_key      => $clustered_key,
8954      keys               => $keys,
8955      defs               => \%def_for,
8956      numeric_cols       => \@nums,
8957      is_numeric         => \%is_numeric,
8958      engine             => $engine,
8959      type_for           => \%type_for,
8960      charset            => $charset,
8961   };
8962}
8963
8964sub remove_quoted_text {
8965   my ($string) = @_;
8966   $string =~ s/[^\\]`[^`]*[^\\]`//g;
8967   $string =~ s/[^\\]"[^"]*[^\\]"//g;
8968   $string =~ s/[^\\]'[^']*[^\\]'//g;
8969   return $string;
8970}
8971
8972sub sort_indexes {
8973   my ( $self, $tbl ) = @_;
8974
8975   my @indexes
8976      = sort {
8977         (($a ne 'PRIMARY') <=> ($b ne 'PRIMARY'))
8978         || ( !$tbl->{keys}->{$a}->{is_unique} <=> !$tbl->{keys}->{$b}->{is_unique} )
8979         || ( $tbl->{keys}->{$a}->{is_nullable} <=> $tbl->{keys}->{$b}->{is_nullable} )
8980         || ( scalar(@{$tbl->{keys}->{$a}->{cols}}) <=> scalar(@{$tbl->{keys}->{$b}->{cols}}) )
8981      }
8982      grep {
8983         $tbl->{keys}->{$_}->{type} eq 'BTREE'
8984      }
8985      sort keys %{$tbl->{keys}};
8986
8987   PTDEBUG && _d('Indexes sorted best-first:', join(', ', @indexes));
8988   return @indexes;
8989}
8990
8991sub find_best_index {
8992   my ( $self, $tbl, $index ) = @_;
8993   my $best;
8994   if ( $index ) {
8995      ($best) = grep { uc $_ eq uc $index } keys %{$tbl->{keys}};
8996   }
8997   if ( !$best ) {
8998      if ( $index ) {
8999         die "Index '$index' does not exist in table";
9000      }
9001      else {
9002         ($best) = $self->sort_indexes($tbl);
9003      }
9004   }
9005   PTDEBUG && _d('Best index found is', $best);
9006   return $best;
9007}
9008
9009sub find_possible_keys {
9010   my ( $self, $dbh, $database, $table, $quoter, $where ) = @_;
9011   return () unless $where;
9012   my $sql = 'EXPLAIN SELECT * FROM ' . $quoter->quote($database, $table)
9013      . ' WHERE ' . $where;
9014   PTDEBUG && _d($sql);
9015   my $expl = $dbh->selectrow_hashref($sql);
9016   $expl = { map { lc($_) => $expl->{$_} } keys %$expl };
9017   if ( $expl->{possible_keys} ) {
9018      PTDEBUG && _d('possible_keys =', $expl->{possible_keys});
9019      my @candidates = split(',', $expl->{possible_keys});
9020      my %possible   = map { $_ => 1 } @candidates;
9021      if ( $expl->{key} ) {
9022         PTDEBUG && _d('MySQL chose', $expl->{key});
9023         unshift @candidates, grep { $possible{$_} } split(',', $expl->{key});
9024         PTDEBUG && _d('Before deduping:', join(', ', @candidates));
9025         my %seen;
9026         @candidates = grep { !$seen{$_}++ } @candidates;
9027      }
9028      PTDEBUG && _d('Final list:', join(', ', @candidates));
9029      return @candidates;
9030   }
9031   else {
9032      PTDEBUG && _d('No keys in possible_keys');
9033      return ();
9034   }
9035}
9036
9037sub check_table {
9038   my ( $self, %args ) = @_;
9039   my @required_args = qw(dbh db tbl);
9040   foreach my $arg ( @required_args ) {
9041      die "I need a $arg argument" unless $args{$arg};
9042   }
9043   my ($dbh, $db, $tbl) = @args{@required_args};
9044   my $q      = $self->{Quoter} || 'Quoter';
9045   my $db_tbl = $q->quote($db, $tbl);
9046   PTDEBUG && _d('Checking', $db_tbl);
9047
9048   $self->{check_table_error} = undef;
9049
9050   my $sql = "SHOW TABLES FROM " . $q->quote($db)
9051           . ' LIKE ' . $q->literal_like($tbl);
9052   PTDEBUG && _d($sql);
9053   my $row;
9054   eval {
9055      $row = $dbh->selectrow_arrayref($sql);
9056   };
9057   if ( my $e = $EVAL_ERROR ) {
9058      PTDEBUG && _d($e);
9059      $self->{check_table_error} = $e;
9060      return 0;
9061   }
9062   if ( !$row->[0] || $row->[0] ne $tbl ) {
9063      PTDEBUG && _d('Table does not exist');
9064      return 0;
9065   }
9066
9067   PTDEBUG && _d('Table', $db, $tbl, 'exists');
9068   return 1;
9069
9070}
9071
9072sub get_engine {
9073   my ( $self, $ddl, $opts ) = @_;
9074   my ( $engine ) = $ddl =~ m/\).*?(?:ENGINE|TYPE)=(\w+)/;
9075   PTDEBUG && _d('Storage engine:', $engine);
9076   return $engine || undef;
9077}
9078
9079sub get_keys {
9080   my ( $self, $ddl, $opts, $is_nullable ) = @_;
9081   my $engine        = $self->get_engine($ddl);
9082   my $keys          = {};
9083   my $clustered_key = undef;
9084
9085   KEY:
9086   foreach my $key ( $ddl =~ m/^  ((?:[A-Z]+ )?KEY .*)$/gm ) {
9087
9088      next KEY if $key =~ m/FOREIGN/;
9089
9090      my $key_ddl = $key;
9091      PTDEBUG && _d('Parsed key:', $key_ddl);
9092
9093      if ( !$engine || $engine !~ m/MEMORY|HEAP/ ) {
9094         $key =~ s/USING HASH/USING BTREE/;
9095      }
9096
9097      my ( $type, $cols ) = $key =~ m/(?:USING (\w+))? \((.+)\)/;
9098      my ( $special ) = $key =~ m/(FULLTEXT|SPATIAL)/;
9099      $type = $type || $special || 'BTREE';
9100      my ($name) = $key =~ m/(PRIMARY|`[^`]*`)/;
9101      my $unique = $key =~ m/PRIMARY|UNIQUE/ ? 1 : 0;
9102      my @cols;
9103      my @col_prefixes;
9104      foreach my $col_def ( $cols =~ m/`[^`]+`(?:\(\d+\))?/g ) {
9105         my ($name, $prefix) = $col_def =~ m/`([^`]+)`(?:\((\d+)\))?/;
9106         push @cols, $name;
9107         push @col_prefixes, $prefix;
9108      }
9109      $name =~ s/`//g;
9110
9111      PTDEBUG && _d( $name, 'key cols:', join(', ', map { "`$_`" } @cols));
9112
9113      $keys->{$name} = {
9114         name         => $name,
9115         type         => $type,
9116         colnames     => $cols,
9117         cols         => \@cols,
9118         col_prefixes => \@col_prefixes,
9119         is_unique    => $unique,
9120         is_nullable  => scalar(grep { $is_nullable->{$_} } @cols),
9121         is_col       => { map { $_ => 1 } @cols },
9122         ddl          => $key_ddl,
9123      };
9124
9125      if ( ($engine || '') =~ m/InnoDB/i && !$clustered_key ) {
9126         my $this_key = $keys->{$name};
9127         if ( $this_key->{name} eq 'PRIMARY' ) {
9128            $clustered_key = 'PRIMARY';
9129         }
9130         elsif ( $this_key->{is_unique} && !$this_key->{is_nullable} ) {
9131            $clustered_key = $this_key->{name};
9132         }
9133         PTDEBUG && $clustered_key && _d('This key is the clustered key');
9134      }
9135   }
9136
9137   return $keys, $clustered_key;
9138}
9139
9140sub get_fks {
9141   my ( $self, $ddl, $opts ) = @_;
9142   my $q   = $self->{Quoter};
9143   my $fks = {};
9144
9145   foreach my $fk (
9146      $ddl =~ m/CONSTRAINT .* FOREIGN KEY .* REFERENCES [^\)]*\)/mg )
9147   {
9148      my ( $name ) = $fk =~ m/CONSTRAINT `(.*?)`/;
9149      my ( $cols ) = $fk =~ m/FOREIGN KEY \(([^\)]+)\)/;
9150      my ( $parent, $parent_cols ) = $fk =~ m/REFERENCES (\S+) \(([^\)]+)\)/;
9151
9152      my ($db, $tbl) = $q->split_unquote($parent, $opts->{database});
9153      my %parent_tbl = (tbl => $tbl);
9154      $parent_tbl{db} = $db if $db;
9155
9156      if ( $parent !~ m/\./ && $opts->{database} ) {
9157         $parent = $q->quote($opts->{database}) . ".$parent";
9158      }
9159
9160      $fks->{$name} = {
9161         name           => $name,
9162         colnames       => $cols,
9163         cols           => [ map { s/[ `]+//g; $_; } split(',', $cols) ],
9164         parent_tbl     => \%parent_tbl,
9165         parent_tblname => $parent,
9166         parent_cols    => [ map { s/[ `]+//g; $_; } split(',', $parent_cols) ],
9167         parent_colnames=> $parent_cols,
9168         ddl            => $fk,
9169      };
9170   }
9171
9172   return $fks;
9173}
9174
9175sub remove_auto_increment {
9176   my ( $self, $ddl ) = @_;
9177   $ddl =~ s/(^\).*?) AUTO_INCREMENT=\d+\b/$1/m;
9178   return $ddl;
9179}
9180
9181sub get_table_status {
9182   my ( $self, $dbh, $db, $like ) = @_;
9183   my $q = $self->{Quoter};
9184   my $sql = "SHOW TABLE STATUS FROM " . $q->quote($db);
9185   my @params;
9186   if ( $like ) {
9187      $sql .= ' LIKE ?';
9188      push @params, $like;
9189   }
9190   PTDEBUG && _d($sql, @params);
9191   my $sth = $dbh->prepare($sql);
9192   eval { $sth->execute(@params); };
9193   if ($EVAL_ERROR) {
9194      PTDEBUG && _d($EVAL_ERROR);
9195      return;
9196   }
9197   my @tables = @{$sth->fetchall_arrayref({})};
9198   @tables = map {
9199      my %tbl; # Make a copy with lowercased keys
9200      @tbl{ map { lc $_ } keys %$_ } = values %$_;
9201      $tbl{engine} ||= $tbl{type} || $tbl{comment};
9202      delete $tbl{type};
9203      \%tbl;
9204   } @tables;
9205   return @tables;
9206}
9207
9208my $ansi_quote_re = qr/" [^"]* (?: "" [^"]* )* (?<=.) "/ismx;
9209sub ansi_to_legacy {
9210   my ($self, $ddl) = @_;
9211   $ddl =~ s/($ansi_quote_re)/ansi_quote_replace($1)/ge;
9212   return $ddl;
9213}
9214
9215sub ansi_quote_replace {
9216   my ($val) = @_;
9217   $val =~ s/^"|"$//g;
9218   $val =~ s/`/``/g;
9219   $val =~ s/""/"/g;
9220   return "`$val`";
9221}
9222
9223sub _d {
9224   my ($package, undef, $line) = caller 0;
9225   @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
9226        map { defined $_ ? $_ : 'undef' }
9227        @_;
9228   print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
9229}
9230
92311;
9232}
9233# ###########################################################################
9234# End TableParser package
9235# ###########################################################################
9236
9237# ###########################################################################
9238# QueryReview package
9239# This package is a copy without comments from the original.  The original
9240# with comments and its test file can be found in the Bazaar repository at,
9241#   lib/QueryReview.pm
9242#   t/lib/QueryReview.t
9243# See https://launchpad.net/percona-toolkit for more information.
9244# ###########################################################################
9245{
9246package QueryReview;
9247
9248use strict;
9249use warnings FATAL => 'all';
9250use English qw(-no_match_vars);
9251use constant PTDEBUG => $ENV{PTDEBUG} || 0;
9252
9253Transformers->import(qw(make_checksum parse_timestamp));
9254
9255my %basic_cols = map { $_ => 1 }
9256   qw(checksum fingerprint sample first_seen last_seen reviewed_by
9257      reviewed_on comments);
9258my %skip_cols  = map { $_ => 1 } qw(fingerprint sample checksum);
9259
9260sub new {
9261   my ( $class, %args ) = @_;
9262   foreach my $arg ( qw(dbh db_tbl tbl_struct quoter) ) {
9263      die "I need a $arg argument" unless $args{$arg};
9264   }
9265
9266   foreach my $col ( keys %basic_cols ) {
9267      die "Query review table $args{db_tbl} does not have a $col column"
9268         unless $args{tbl_struct}->{is_col}->{$col};
9269   }
9270
9271   my $now = defined $args{ts_default} ? $args{ts_default} : 'NOW()';
9272
9273   my $sql = <<"      SQL";
9274      INSERT INTO $args{db_tbl}
9275      (checksum, fingerprint, sample, first_seen, last_seen)
9276      VALUES(?, ?, ?, COALESCE(?, $now), COALESCE(?, $now))
9277      ON DUPLICATE KEY UPDATE
9278         first_seen = IF(
9279            first_seen IS NULL,
9280            COALESCE(?, $now),
9281            LEAST(first_seen, COALESCE(?, $now))),
9282         last_seen = IF(
9283            last_seen IS NULL,
9284            COALESCE(?, $now),
9285            GREATEST(last_seen, COALESCE(?, $now)))
9286      SQL
9287   PTDEBUG && _d('SQL to insert into review table:', $sql);
9288   my $insert_sth = $args{dbh}->prepare($sql);
9289
9290   my @review_cols = grep { !$skip_cols{$_} } @{$args{tbl_struct}->{cols}};
9291   $sql = "SELECT "
9292        . join(', ', map { $args{quoter}->quote($_) } @review_cols)
9293        . ", checksum AS checksum_conv FROM $args{db_tbl}"
9294        . " WHERE checksum=?";
9295   PTDEBUG && _d('SQL to select from review table:', $sql);
9296   my $select_sth = $args{dbh}->prepare($sql);
9297
9298   my $self = {
9299      dbh         => $args{dbh},
9300      db_tbl      => $args{db_tbl},
9301      insert_sth  => $insert_sth,
9302      select_sth  => $select_sth,
9303      tbl_struct  => $args{tbl_struct},
9304      quoter      => $args{quoter},
9305      ts_default  => $now,
9306   };
9307   return bless $self, $class;
9308}
9309
9310sub get_review_info {
9311   my ( $self, $id ) = @_;
9312   $self->{select_sth}->execute(make_checksum($id));
9313   my $review_vals = $self->{select_sth}->fetchall_arrayref({});
9314   if ( $review_vals && @$review_vals == 1 ) {
9315      return $review_vals->[0];
9316   }
9317   return undef;
9318}
9319
9320sub set_review_info {
9321   my ( $self, %args ) = @_;
9322   $self->{insert_sth}->execute(
9323      make_checksum($args{fingerprint}),
9324      @args{qw(fingerprint sample)},
9325      map { $args{$_} ? parse_timestamp($args{$_}) : undef }
9326         qw(first_seen last_seen first_seen first_seen last_seen last_seen));
9327}
9328
9329sub review_cols {
9330   my ( $self ) = @_;
9331   return grep { !$skip_cols{$_} } @{$self->{tbl_struct}->{cols}};
9332}
9333
9334sub _d {
9335   my ($package, undef, $line) = caller 0;
9336   @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
9337        map { defined $_ ? $_ : 'undef' }
9338        @_;
9339   print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
9340}
9341
93421;
9343}
9344# ###########################################################################
9345# End QueryReview package
9346# ###########################################################################
9347
9348# ###########################################################################
9349# QueryHistory package
9350# This package is a copy without comments from the original.  The original
9351# with comments and its test file can be found in the Bazaar repository at,
9352#   lib/QueryHistory.pm
9353#   t/lib/QueryHistory.t
9354# See https://launchpad.net/percona-toolkit for more information.
9355# ###########################################################################
9356{
9357package QueryHistory;
9358
9359use English qw(-no_match_vars);
9360use constant PTDEBUG => $ENV{PTDEBUG} || 0;
9361
9362use Lmo;
9363
9364use Quoter;
9365use Transformers qw(make_checksum parse_timestamp);
9366
9367has history_dbh => (
9368   is       => 'ro',
9369   required => 1,
9370);
9371
9372has history_sth => (
9373   is => 'rw',
9374);
9375
9376has history_metrics => (
9377   is => 'rw',
9378   isa => 'ArrayRef',
9379);
9380
9381has column_pattern => (
9382   is       => 'ro',
9383   isa      => 'Regexp',
9384   required => 1,
9385);
9386
9387has ts_default => (
9388   is      => 'ro',
9389   isa     => 'Str',
9390   default => sub { 'NOW()' },
9391);
9392
9393sub set_history_options {
9394   my ( $self, %args ) = @_;
9395   foreach my $arg ( qw(table tbl_struct) ) {
9396      die "I need a $arg argument" unless $args{$arg};
9397   }
9398
9399   my $col_pat = $self->column_pattern();
9400
9401   my @cols;
9402   my @metrics;
9403   foreach my $col ( @{$args{tbl_struct}->{cols}} ) {
9404      my ( $attr, $metric ) = $col =~ m/$col_pat/;
9405      next unless $attr && $metric;
9406
9407
9408      $attr = ucfirst $attr if $attr =~ m/_/;
9409      $attr = 'Filesort' if $attr eq 'filesort';
9410
9411      $attr =~ s/^Qc_hit/QC_Hit/;  # Qc_hit is really QC_Hit
9412      $attr =~ s/^Innodb/InnoDB/g; # Innodb is really InnoDB
9413      $attr =~ s/_io_/_IO_/g;      # io is really IO
9414
9415      push @cols, $col;
9416      push @metrics, [$attr, $metric];
9417   }
9418
9419   my $ts_default = $self->ts_default;
9420
9421   my $sql = "REPLACE INTO $args{table}("
9422      . join(', ',
9423         map { Quoter->quote($_) } ('checksum', 'sample', @cols))
9424      . ') VALUES (?, ?'
9425      . (@cols ? ', ' : '')  # issue 1265
9426      . join(', ', map {
9427         $_ eq 'ts_min' || $_ eq 'ts_max'
9428            ? "COALESCE(?, $ts_default)"
9429            : '?'
9430        } @cols) . ')';
9431   PTDEBUG && _d($sql);
9432
9433   $self->history_sth($self->history_dbh->prepare($sql));
9434   $self->history_metrics(\@metrics);
9435
9436   return;
9437}
9438
9439sub set_review_history {
9440   my ( $self, $id, $sample, %data ) = @_;
9441   foreach my $thing ( qw(min max) ) {
9442      next unless defined $data{ts} && defined $data{ts}->{$thing};
9443      $data{ts}->{$thing} = parse_timestamp($data{ts}->{$thing});
9444   }
9445   $self->history_sth->execute(
9446      make_checksum($id),
9447      $sample,
9448      map { $data{$_->[0]}->{$_->[1]} } @{$self->history_metrics});
9449}
9450
9451sub _d {
9452   my ($package, undef, $line) = caller 0;
9453   @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
9454        map { defined $_ ? $_ : 'undef' }
9455        @_;
9456   print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
9457}
9458
94591;
9460}
9461# ###########################################################################
9462# End QueryHistory package
9463# ###########################################################################
9464
9465# ###########################################################################
9466# Daemon package
9467# This package is a copy without comments from the original.  The original
9468# with comments and its test file can be found in the Bazaar repository at,
9469#   lib/Daemon.pm
9470#   t/lib/Daemon.t
9471# See https://launchpad.net/percona-toolkit for more information.
9472# ###########################################################################
9473{
9474package Daemon;
9475
9476use strict;
9477use warnings FATAL => 'all';
9478use English qw(-no_match_vars);
9479
9480use constant PTDEBUG => $ENV{PTDEBUG} || 0;
9481
9482use POSIX qw(setsid);
9483use Fcntl qw(:DEFAULT);
9484
9485sub new {
9486   my ($class, %args) = @_;
9487   my $self = {
9488      log_file       => $args{log_file},
9489      pid_file       => $args{pid_file},
9490      daemonize      => $args{daemonize},
9491      force_log_file => $args{force_log_file},
9492      parent_exit    => $args{parent_exit},
9493      pid_file_owner => 0,
9494   };
9495   return bless $self, $class;
9496}
9497
9498sub run {
9499   my ($self) = @_;
9500
9501   my $daemonize      = $self->{daemonize};
9502   my $pid_file       = $self->{pid_file};
9503   my $log_file       = $self->{log_file};
9504   my $force_log_file = $self->{force_log_file};
9505   my $parent_exit    = $self->{parent_exit};
9506
9507   PTDEBUG && _d('Starting daemon');
9508
9509   if ( $pid_file ) {
9510      eval {
9511         $self->_make_pid_file(
9512            pid      => $PID,  # parent's pid
9513            pid_file => $pid_file,
9514         );
9515      };
9516      die "$EVAL_ERROR\n" if $EVAL_ERROR;
9517      if ( !$daemonize ) {
9518         $self->{pid_file_owner} = $PID;  # parent's pid
9519      }
9520   }
9521
9522   if ( $daemonize ) {
9523      defined (my $child_pid = fork()) or die "Cannot fork: $OS_ERROR";
9524      if ( $child_pid ) {
9525         PTDEBUG && _d('Forked child', $child_pid);
9526         $parent_exit->($child_pid) if $parent_exit;
9527         exit 0;
9528      }
9529
9530      POSIX::setsid() or die "Cannot start a new session: $OS_ERROR";
9531      chdir '/'       or die "Cannot chdir to /: $OS_ERROR";
9532
9533      if ( $pid_file ) {
9534         $self->_update_pid_file(
9535            pid      => $PID,  # child's pid
9536            pid_file => $pid_file,
9537         );
9538         $self->{pid_file_owner} = $PID;
9539      }
9540   }
9541
9542   if ( $daemonize || $force_log_file ) {
9543      PTDEBUG && _d('Redirecting STDIN to /dev/null');
9544      close STDIN;
9545      open  STDIN, '/dev/null'
9546         or die "Cannot reopen STDIN to /dev/null: $OS_ERROR";
9547      if ( $log_file ) {
9548         PTDEBUG && _d('Redirecting STDOUT and STDERR to', $log_file);
9549         close STDOUT;
9550         open  STDOUT, '>>', $log_file
9551            or die "Cannot open log file $log_file: $OS_ERROR";
9552
9553         close STDERR;
9554         open  STDERR, ">&STDOUT"
9555            or die "Cannot dupe STDERR to STDOUT: $OS_ERROR";
9556      }
9557      else {
9558         if ( -t STDOUT ) {
9559            PTDEBUG && _d('No log file and STDOUT is a terminal;',
9560               'redirecting to /dev/null');
9561            close STDOUT;
9562            open  STDOUT, '>', '/dev/null'
9563               or die "Cannot reopen STDOUT to /dev/null: $OS_ERROR";
9564         }
9565         if ( -t STDERR ) {
9566            PTDEBUG && _d('No log file and STDERR is a terminal;',
9567               'redirecting to /dev/null');
9568            close STDERR;
9569            open  STDERR, '>', '/dev/null'
9570               or die "Cannot reopen STDERR to /dev/null: $OS_ERROR";
9571         }
9572      }
9573
9574      $OUTPUT_AUTOFLUSH = 1;
9575   }
9576
9577   PTDEBUG && _d('Daemon running');
9578   return;
9579}
9580
9581sub _make_pid_file {
9582   my ($self, %args) = @_;
9583   my @required_args = qw(pid pid_file);
9584   foreach my $arg ( @required_args ) {
9585      die "I need a $arg argument" unless $args{$arg};
9586   };
9587   my $pid      = $args{pid};
9588   my $pid_file = $args{pid_file};
9589
9590   eval {
9591      sysopen(PID_FH, $pid_file, O_RDWR|O_CREAT|O_EXCL) or die $OS_ERROR;
9592      print PID_FH $PID, "\n";
9593      close PID_FH;
9594   };
9595   if ( my $e = $EVAL_ERROR ) {
9596      if ( $e =~ m/file exists/i ) {
9597         my $old_pid = $self->_check_pid_file(
9598            pid_file => $pid_file,
9599            pid      => $PID,
9600         );
9601         if ( $old_pid ) {
9602            warn "Overwriting PID file $pid_file because PID $old_pid "
9603               . "is not running.\n";
9604         }
9605         $self->_update_pid_file(
9606            pid      => $PID,
9607            pid_file => $pid_file
9608         );
9609      }
9610      else {
9611         die "Error creating PID file $pid_file: $e\n";
9612      }
9613   }
9614
9615   return;
9616}
9617
9618sub _check_pid_file {
9619   my ($self, %args) = @_;
9620   my @required_args = qw(pid_file pid);
9621   foreach my $arg ( @required_args ) {
9622      die "I need a $arg argument" unless $args{$arg};
9623   };
9624   my $pid_file = $args{pid_file};
9625   my $pid      = $args{pid};
9626
9627   PTDEBUG && _d('Checking if PID in', $pid_file, 'is running');
9628
9629   if ( ! -f $pid_file ) {
9630      PTDEBUG && _d('PID file', $pid_file, 'does not exist');
9631      return;
9632   }
9633
9634   open my $fh, '<', $pid_file
9635      or die "Error opening $pid_file: $OS_ERROR";
9636   my $existing_pid = do { local $/; <$fh> };
9637   chomp($existing_pid) if $existing_pid;
9638   close $fh
9639      or die "Error closing $pid_file: $OS_ERROR";
9640
9641   if ( $existing_pid ) {
9642      if ( $existing_pid == $pid ) {
9643         warn "The current PID $pid already holds the PID file $pid_file\n";
9644         return;
9645      }
9646      else {
9647         PTDEBUG && _d('Checking if PID', $existing_pid, 'is running');
9648         my $pid_is_alive = kill 0, $existing_pid;
9649         if ( $pid_is_alive ) {
9650            die "PID file $pid_file exists and PID $existing_pid is running\n";
9651         }
9652      }
9653   }
9654   else {
9655      die "PID file $pid_file exists but it is empty.  Remove the file "
9656         . "if the process is no longer running.\n";
9657   }
9658
9659   return $existing_pid;
9660}
9661
9662sub _update_pid_file {
9663   my ($self, %args) = @_;
9664   my @required_args = qw(pid pid_file);
9665   foreach my $arg ( @required_args ) {
9666      die "I need a $arg argument" unless $args{$arg};
9667   };
9668   my $pid      = $args{pid};
9669   my $pid_file = $args{pid_file};
9670
9671   open my $fh, '>', $pid_file
9672      or die "Cannot open $pid_file: $OS_ERROR";
9673   print { $fh } $pid, "\n"
9674      or die "Cannot print to $pid_file: $OS_ERROR";
9675   close $fh
9676      or warn "Cannot close $pid_file: $OS_ERROR";
9677
9678   return;
9679}
9680
9681sub remove_pid_file {
9682   my ($self, $pid_file) = @_;
9683   $pid_file ||= $self->{pid_file};
9684   if ( $pid_file && -f $pid_file ) {
9685      unlink $self->{pid_file}
9686         or warn "Cannot remove PID file $pid_file: $OS_ERROR";
9687      PTDEBUG && _d('Removed PID file');
9688   }
9689   else {
9690      PTDEBUG && _d('No PID to remove');
9691   }
9692   return;
9693}
9694
9695sub DESTROY {
9696   my ($self) = @_;
9697
9698   if ( $self->{pid_file_owner} == $PID ) {
9699      $self->remove_pid_file();
9700   }
9701
9702   return;
9703}
9704
9705sub _d {
9706   my ($package, undef, $line) = caller 0;
9707   @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
9708        map { defined $_ ? $_ : 'undef' }
9709        @_;
9710   print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
9711}
9712
97131;
9714}
9715# ###########################################################################
9716# End Daemon package
9717# ###########################################################################
9718
9719# ###########################################################################
9720# BinaryLogParser package
9721# This package is a copy without comments from the original.  The original
9722# with comments and its test file can be found in the Bazaar repository at,
9723#   lib/BinaryLogParser.pm
9724#   t/lib/BinaryLogParser.t
9725# See https://launchpad.net/percona-toolkit for more information.
9726# ###########################################################################
9727{
9728package BinaryLogParser;
9729
9730use strict;
9731use warnings FATAL => 'all';
9732use English qw(-no_match_vars);
9733use constant PTDEBUG => $ENV{PTDEBUG} || 0;
9734
9735use Data::Dumper;
9736$Data::Dumper::Indent    = 1;
9737$Data::Dumper::Sortkeys  = 1;
9738$Data::Dumper::Quotekeys = 0;
9739
9740my $binlog_line_1 = qr/at (\d+)$/m;
9741my $binlog_line_2 = qr/^#(\d{6}\s+\d{1,2}:\d\d:\d\d)\s+server\s+id\s+(\d+)\s+end_log_pos\s+(\d+)\s+(?:CRC32\s+0x[a-f0-9]{8}\s+)?(\S+)\s*([^\n]*)$/m;
9742my $binlog_line_2_rest = qr/thread_id=(\d+)\s+exec_time=(\d+)\s+error_code=(\d+)/m;
9743
9744sub new {
9745   my ( $class, %args ) = @_;
9746   my $self = {
9747      delim     => undef,
9748      delim_len => 0,
9749   };
9750   return bless $self, $class;
9751}
9752
9753
9754sub parse_event {
9755   my ( $self, %args ) = @_;
9756   my @required_args = qw(next_event tell);
9757   foreach my $arg ( @required_args ) {
9758      die "I need a $arg argument" unless $args{$arg};
9759   }
9760   my ($next_event, $tell) = @args{@required_args};
9761
9762   local $INPUT_RECORD_SEPARATOR = ";\n#";
9763   my $pos_in_log = $tell->();
9764   my $stmt;
9765   my ($delim, $delim_len) = ($self->{delim}, $self->{delim_len});
9766
9767   EVENT:
9768   while ( defined($stmt = $next_event->()) ) {
9769      my @properties = ('pos_in_log', $pos_in_log);
9770      my ($ts, $sid, $end, $type, $rest);
9771      $pos_in_log = $tell->();
9772      $stmt =~ s/;\n#?\Z//;
9773
9774      my ( $got_offset, $got_hdr );
9775      my $pos = 0;
9776      my $len = length($stmt);
9777      my $found_arg = 0;
9778      LINE:
9779      while ( $stmt =~ m/^(.*)$/mg ) { # /g requires scalar match.
9780         $pos     = pos($stmt);  # Be careful not to mess this up!
9781         my $line = $1;          # Necessary for /g and pos() to work.
9782         $line    =~ s/$delim// if $delim;
9783         PTDEBUG && _d($line);
9784
9785         if ( $line =~ m/^\/\*.+\*\/;/ ) {
9786            PTDEBUG && _d('Comment line');
9787            next LINE;
9788         }
9789
9790         if ( $line =~ m/^DELIMITER/m ) {
9791            my ( $del ) = $line =~ m/^DELIMITER (\S*)$/m;
9792            if ( $del ) {
9793               $self->{delim_len} = $delim_len = length $del;
9794               $self->{delim}     = $delim     = quotemeta $del;
9795               PTDEBUG && _d('delimiter:', $delim);
9796            }
9797            else {
9798               PTDEBUG && _d('Delimiter reset to ;');
9799               $self->{delim}     = $delim     = undef;
9800               $self->{delim_len} = $delim_len = 0;
9801            }
9802            next LINE;
9803         }
9804
9805         next LINE if $line =~ m/End of log file/;
9806
9807         if ( !$got_offset && (my ( $offset ) = $line =~ m/$binlog_line_1/m) ) {
9808            PTDEBUG && _d('Got the at offset line');
9809            push @properties, 'offset', $offset;
9810            $got_offset++;
9811         }
9812
9813         elsif ( !$got_hdr && $line =~ m/^#(\d{6}\s+\d{1,2}:\d\d:\d\d)/ ) {
9814            ($ts, $sid, $end, $type, $rest) = $line =~ m/$binlog_line_2/m;
9815            PTDEBUG && _d('Got the header line; type:', $type, 'rest:', $rest);
9816            push @properties, 'cmd', 'Query', 'ts', $ts, 'server_id', $sid,
9817               'end_log_pos', $end;
9818            $got_hdr++;
9819         }
9820
9821         elsif ( $line =~ m/^(?:#|use |SET)/i ) {
9822
9823            if ( my ( $db ) = $line =~ m/^use ([^;]+)/ ) {
9824               PTDEBUG && _d("Got a default database:", $db);
9825               push @properties, 'db', $db;
9826            }
9827
9828            elsif ( my ($setting) = $line =~ m/^SET\s+([^;]*)/ ) {
9829               PTDEBUG && _d("Got some setting:", $setting);
9830               push @properties, map { s/\s+//; lc } split(/,|\s*=\s*/, $setting);
9831            }
9832
9833         }
9834         else {
9835            PTDEBUG && _d("Got the query/arg line at pos", $pos);
9836            $found_arg++;
9837            if ( $got_offset && $got_hdr ) {
9838               if ( $type eq 'Xid' ) {
9839                  my ($xid) = $rest =~ m/(\d+)/;
9840                  push @properties, 'Xid', $xid;
9841               }
9842               elsif ( $type eq 'Query' ) {
9843                  my ($i, $t, $c) = $rest =~ m/$binlog_line_2_rest/m;
9844                  push @properties, 'Thread_id', $i, 'Query_time', $t,
9845                                    'error_code', $c;
9846               }
9847               elsif ( $type eq 'Start:' ) {
9848                  PTDEBUG && _d("Binlog start");
9849               }
9850               else {
9851                  PTDEBUG && _d('Unknown event type:', $type);
9852                  next EVENT;
9853               }
9854            }
9855            else {
9856               PTDEBUG && _d("It's not a query/arg, it's just some SQL fluff");
9857               push @properties, 'cmd', 'Query', 'ts', undef;
9858            }
9859
9860            my $delim_len = ($pos == length($stmt) ? $delim_len : 0);
9861            my $arg = substr($stmt, $pos - length($line) - $delim_len);
9862
9863            $arg =~ s/$delim// if $delim; # Remove the delimiter.
9864
9865            if ( $arg =~ m/^DELIMITER/m ) {
9866               my ( $del ) = $arg =~ m/^DELIMITER (\S*)$/m;
9867               if ( $del ) {
9868                  $self->{delim_len} = $delim_len = length $del;
9869                  $self->{delim}     = $delim     = quotemeta $del;
9870                  PTDEBUG && _d('delimiter:', $delim);
9871               }
9872               else {
9873                  PTDEBUG && _d('Delimiter reset to ;');
9874                  $del       = ';';
9875                  $self->{delim}     = $delim     = undef;
9876                  $self->{delim_len} = $delim_len = 0;
9877               }
9878
9879               $arg =~ s/^DELIMITER.*$//m;  # Remove DELIMITER from arg.
9880            }
9881
9882            $arg =~ s/;$//gm;  # Ensure ending ; are gone.
9883            $arg =~ s/\s+$//;  # Remove trailing spaces and newlines.
9884
9885            push @properties, 'arg', $arg, 'bytes', length($arg);
9886            last LINE;
9887         }
9888      } # LINE
9889
9890      if ( $found_arg ) {
9891         PTDEBUG && _d('Properties of event:', Dumper(\@properties));
9892         my $event = { @properties };
9893         if ( $args{stats} ) {
9894            $args{stats}->{events_read}++;
9895            $args{stats}->{events_parsed}++;
9896         }
9897         return $event;
9898      }
9899      else {
9900         PTDEBUG && _d('Event had no arg');
9901      }
9902   } # EVENT
9903
9904   $args{oktorun}->(0) if $args{oktorun};
9905   return;
9906}
9907
9908sub _d {
9909   my ($package, undef, $line) = caller 0;
9910   @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
9911        map { defined $_ ? $_ : 'undef' }
9912        @_;
9913   print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
9914}
9915
99161;
9917}
9918# ###########################################################################
9919# End BinaryLogParser package
9920# ###########################################################################
9921
9922# ###########################################################################
9923# GeneralLogParser package
9924# This package is a copy without comments from the original.  The original
9925# with comments and its test file can be found in the Bazaar repository at,
9926#   lib/GeneralLogParser.pm
9927#   t/lib/GeneralLogParser.t
9928# See https://launchpad.net/percona-toolkit for more information.
9929# ###########################################################################
9930{
9931package GeneralLogParser;
9932
9933use strict;
9934use warnings FATAL => 'all';
9935use English qw(-no_match_vars);
9936use constant PTDEBUG => $ENV{PTDEBUG} || 0;
9937
9938use Data::Dumper;
9939$Data::Dumper::Indent    = 1;
9940$Data::Dumper::Sortkeys  = 1;
9941$Data::Dumper::Quotekeys = 0;
9942
9943sub new {
9944   my ( $class ) = @_;
9945   my $self = {
9946      pending => [],
9947      db_for  => {},
9948   };
9949   return bless $self, $class;
9950}
9951
9952my $genlog_line_1= qr{
9953   \A
9954   (?:(\d{6}\s+\d{1,2}:\d\d:\d\d|\d{4}-\d{1,2}-\d{1,2}T\d\d:\d\d:\d\d\.\d+(?:Z|[-+]?\d\d:\d\d)?))? # Timestamp
9955   \s+
9956   (?:\s*(\d+))                     # Thread ID
9957   \s
9958   (\w+)                            # Command
9959   \s+
9960   (.*)                             # Argument
9961   \Z
9962}xs;
9963
9964sub parse_event {
9965   my ( $self, %args ) = @_;
9966   my @required_args = qw(next_event tell);
9967   foreach my $arg ( @required_args ) {
9968      die "I need a $arg argument" unless $args{$arg};
9969   }
9970   my ($next_event, $tell) = @args{@required_args};
9971
9972   my $pending = $self->{pending};
9973   my $db_for  = $self->{db_for};
9974   my $line;
9975   my $pos_in_log = $tell->();
9976   LINE:
9977   while (
9978         defined($line = shift @$pending)
9979      or defined($line = $next_event->())
9980   ) {
9981      PTDEBUG && _d($line);
9982      my ($ts, $thread_id, $cmd, $arg) = $line =~ m/$genlog_line_1/;
9983      if ( !($thread_id && $cmd) ) {
9984         PTDEBUG && _d('Not start of general log event');
9985         next;
9986      }
9987      my @properties = ('pos_in_log', $pos_in_log, 'ts', $ts,
9988         'Thread_id', $thread_id);
9989
9990      $pos_in_log = $tell->();
9991
9992      @$pending = ();
9993      if ( $cmd eq 'Query' ) {
9994         my $done = 0;
9995         do {
9996            $line = $next_event->();
9997            if ( $line ) {
9998               my (undef, $next_thread_id, $next_cmd)
9999                  = $line =~ m/$genlog_line_1/;
10000               if ( $next_thread_id && $next_cmd ) {
10001                  PTDEBUG && _d('Event done');
10002                  $done = 1;
10003                  push @$pending, $line;
10004               }
10005               else {
10006                  PTDEBUG && _d('More arg:', $line);
10007                  $arg .= $line;
10008               }
10009            }
10010            else {
10011               PTDEBUG && _d('No more lines');
10012               $done = 1;
10013            }
10014         } until ( $done );
10015
10016         chomp $arg;
10017         push @properties, 'cmd', 'Query', 'arg', $arg;
10018         push @properties, 'bytes', length($properties[-1]);
10019         push @properties, 'db', $db_for->{$thread_id} if $db_for->{$thread_id};
10020      }
10021      else {
10022         push @properties, 'cmd', 'Admin';
10023
10024         if ( $cmd eq 'Connect' ) {
10025            if ( $arg =~ m/^Access denied/ ) {
10026               $cmd = $arg;
10027            }
10028            else {
10029               my ($user) = $arg =~ m/(\S+)/;
10030               my ($db)   = $arg =~ m/on (\S+)/;
10031               my $host;
10032               ($user, $host) = split(/@/, $user);
10033               PTDEBUG && _d('Connect', $user, '@', $host, 'on', $db);
10034
10035               push @properties, 'user', $user if $user;
10036               push @properties, 'host', $host if $host;
10037               push @properties, 'db',   $db   if $db;
10038               $db_for->{$thread_id} = $db;
10039            }
10040         }
10041         elsif ( $cmd eq 'Init' ) {
10042            $cmd = 'Init DB';
10043            $arg =~ s/^DB\s+//;
10044            my ($db) = $arg =~ /(\S+)/;
10045            PTDEBUG && _d('Init DB:', $db);
10046            push @properties, 'db',   $db   if $db;
10047            $db_for->{$thread_id} = $db;
10048         }
10049
10050         push @properties, 'arg', "administrator command: $cmd";
10051         push @properties, 'bytes', length($properties[-1]);
10052      }
10053
10054      push @properties, 'Query_time', 0;
10055
10056      PTDEBUG && _d('Properties of event:', Dumper(\@properties));
10057      my $event = { @properties };
10058      if ( $args{stats} ) {
10059         $args{stats}->{events_read}++;
10060         $args{stats}->{events_parsed}++;
10061      }
10062      return $event;
10063   } # LINE
10064
10065   @{$self->{pending}} = ();
10066   $args{oktorun}->(0) if $args{oktorun};
10067   return;
10068}
10069
10070sub _d {
10071   my ($package, undef, $line) = caller 0;
10072   @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
10073        map { defined $_ ? $_ : 'undef' }
10074        @_;
10075   print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
10076}
10077
100781;
10079}
10080# ###########################################################################
10081# End GeneralLogParser package
10082# ###########################################################################
10083
10084# ###########################################################################
10085# RawLogParser package
10086# This package is a copy without comments from the original.  The original
10087# with comments and its test file can be found in the Bazaar repository at,
10088#   lib/RawLogParser.pm
10089#   t/lib/RawLogParser.t
10090# See https://launchpad.net/percona-toolkit for more information.
10091# ###########################################################################
10092{
10093package RawLogParser;
10094
10095use strict;
10096use warnings FATAL => 'all';
10097use English qw(-no_match_vars);
10098use constant PTDEBUG => $ENV{PTDEBUG} || 0;
10099
10100use Data::Dumper;
10101$Data::Dumper::Indent    = 1;
10102$Data::Dumper::Sortkeys  = 1;
10103$Data::Dumper::Quotekeys = 0;
10104
10105sub new {
10106   my ( $class ) = @_;
10107   my $self = {
10108   };
10109   return bless $self, $class;
10110}
10111
10112sub parse_event {
10113   my ( $self, %args ) = @_;
10114   my @required_args = qw(next_event tell);
10115   foreach my $arg ( @required_args ) {
10116      die "I need a $arg argument" unless $args{$arg};
10117   }
10118   my ($next_event, $tell) = @args{@required_args};
10119
10120   my $line;
10121   my $pos_in_log = $tell->();
10122   LINE:
10123   while ( defined($line = $next_event->()) ) {
10124      PTDEBUG && _d($line);
10125      chomp($line);
10126      my @properties = (
10127         'pos_in_log', $pos_in_log,
10128         'cmd',        'Query',
10129         'bytes',      length($line),
10130         'Query_time', 0,
10131         'arg',        $line,
10132      );
10133
10134      $pos_in_log = $tell->();
10135
10136      PTDEBUG && _d('Properties of event:', Dumper(\@properties));
10137      my $event = { @properties };
10138      if ( $args{stats} ) {
10139         $args{stats}->{events_read}++;
10140         $args{stats}->{events_parsed}++;
10141      }
10142
10143      return $event;
10144   }
10145
10146   $args{oktorun}->(0) if $args{oktorun};
10147   return;
10148}
10149
10150sub _d {
10151   my ($package, undef, $line) = caller 0;
10152   @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
10153        map { defined $_ ? $_ : 'undef' }
10154        @_;
10155   print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
10156}
10157
101581;
10159}
10160# ###########################################################################
10161# End RawLogParser package
10162# ###########################################################################
10163
10164# ###########################################################################
10165# ProtocolParser package
10166# This package is a copy without comments from the original.  The original
10167# with comments and its test file can be found in the Bazaar repository at,
10168#   lib/ProtocolParser.pm
10169#   t/lib/ProtocolParser.t
10170# See https://launchpad.net/percona-toolkit for more information.
10171# ###########################################################################
10172{
10173package ProtocolParser;
10174
10175use strict;
10176use warnings FATAL => 'all';
10177use English qw(-no_match_vars);
10178use constant PTDEBUG => $ENV{PTDEBUG} || 0;
10179
10180use File::Basename qw(basename);
10181use File::Temp qw(tempfile);
10182
10183eval {
10184   require IO::Uncompress::Inflate; # yum: perl-IO-Compress-Zlib
10185   IO::Uncompress::Inflate->import(qw(inflate $InflateError));
10186};
10187
10188use Data::Dumper;
10189$Data::Dumper::Indent    = 1;
10190$Data::Dumper::Sortkeys  = 1;
10191$Data::Dumper::Quotekeys = 0;
10192
10193sub new {
10194   my ( $class, %args ) = @_;
10195
10196   my $self = {
10197      server      => $args{server},
10198      port        => $args{port},
10199      sessions    => {},
10200      o           => $args{o},
10201   };
10202
10203   return bless $self, $class;
10204}
10205
10206sub parse_event {
10207   my ( $self, %args ) = @_;
10208   my @required_args = qw(event);
10209   foreach my $arg ( @required_args ) {
10210      die "I need a $arg argument" unless $args{$arg};
10211   }
10212   my $packet = @args{@required_args};
10213
10214   if ( $self->{buffer} ) {
10215      my ($packet_from, $session) = $self->_get_session($packet);
10216      if ( $packet->{data_len} ) {
10217         if ( $packet_from eq 'client' ) {
10218            push @{$session->{client_packets}}, $packet;
10219            PTDEBUG && _d('Saved client packet');
10220         }
10221         else {
10222            push @{$session->{server_packets}}, $packet;
10223            PTDEBUG && _d('Saved server packet');
10224         }
10225      }
10226
10227      return unless ($packet_from eq 'client')
10228                    && ($packet->{fin} || $packet->{rst});
10229
10230      my $event;
10231      map {
10232         $event = $self->_parse_packet($_, $args{misc});
10233         $args{stats}->{events_parsed}++ if $args{stats};
10234      } sort { $a->{seq} <=> $b->{seq} }
10235      @{$session->{client_packets}};
10236
10237      map {
10238         $event = $self->_parse_packet($_, $args{misc});
10239         $args{stats}->{events_parsed}++ if $args{stats};
10240      } sort { $a->{seq} <=> $b->{seq} }
10241      @{$session->{server_packets}};
10242
10243      return $event;
10244   }
10245
10246   if ( $packet->{data_len} == 0 ) {
10247      PTDEBUG && _d('No TCP data');
10248      return;
10249   }
10250
10251   my $event = $self->_parse_packet($packet, $args{misc});
10252   $args{stats}->{events_parsed}++ if $args{stats};
10253   return $event;
10254}
10255
10256sub _parse_packet {
10257   my ( $self, $packet, $misc ) = @_;
10258
10259   my ($packet_from, $session) = $self->_get_session($packet);
10260   PTDEBUG && _d('State:', $session->{state});
10261
10262   push @{$session->{raw_packets}}, $packet->{raw_packet}
10263      unless $misc->{recurse};
10264
10265   if ( $session->{buff} ) {
10266      $session->{buff_left} -= $packet->{data_len};
10267      if ( $session->{buff_left} > 0 ) {
10268         PTDEBUG && _d('Added data to buff; expecting', $session->{buff_left},
10269            'more bytes');
10270         return;
10271      }
10272
10273      PTDEBUG && _d('Got all data; buff left:', $session->{buff_left});
10274      $packet->{data}       = $session->{buff} . $packet->{data};
10275      $packet->{data_len}  += length $session->{buff};
10276      $session->{buff}      = '';
10277      $session->{buff_left} = 0;
10278   }
10279
10280   $packet->{data} = pack('H*', $packet->{data}) unless $misc->{recurse};
10281   my $event;
10282   if ( $packet_from eq 'server' ) {
10283      $event = $self->_packet_from_server($packet, $session, $misc);
10284   }
10285   elsif ( $packet_from eq 'client' ) {
10286      $event = $self->_packet_from_client($packet, $session, $misc);
10287   }
10288   else {
10289      die 'Packet origin unknown';
10290   }
10291   PTDEBUG && _d('State:', $session->{state});
10292
10293   if ( $session->{out_of_order} ) {
10294      PTDEBUG && _d('Session packets are out of order');
10295      push @{$session->{packets}}, $packet;
10296      $session->{ts_min}
10297         = $packet->{ts} if $packet->{ts} lt ($session->{ts_min} || '');
10298      $session->{ts_max}
10299         = $packet->{ts} if $packet->{ts} gt ($session->{ts_max} || '');
10300      if ( $session->{have_all_packets} ) {
10301         PTDEBUG && _d('Have all packets; ordering and processing');
10302         delete $session->{out_of_order};
10303         delete $session->{have_all_packets};
10304         map {
10305            $event = $self->_parse_packet($_, { recurse => 1 });
10306         } sort { $a->{seq} <=> $b->{seq} } @{$session->{packets}};
10307      }
10308   }
10309
10310   PTDEBUG && _d('Done with packet; event:', Dumper($event));
10311   return $event;
10312}
10313
10314sub _get_session {
10315   my ( $self, $packet ) = @_;
10316
10317   my $src_host = "$packet->{src_host}:$packet->{src_port}";
10318   my $dst_host = "$packet->{dst_host}:$packet->{dst_port}";
10319
10320   if ( my $server = $self->{server} ) {  # Watch only the given server.
10321      $server .= ":$self->{port}";
10322      if ( $src_host ne $server && $dst_host ne $server ) {
10323         PTDEBUG && _d('Packet is not to or from', $server);
10324         return;
10325      }
10326   }
10327
10328   my $packet_from;
10329   my $client;
10330   if ( $src_host =~ m/:$self->{port}$/ ) {
10331      $packet_from = 'server';
10332      $client      = $dst_host;
10333   }
10334   elsif ( $dst_host =~ m/:$self->{port}$/ ) {
10335      $packet_from = 'client';
10336      $client      = $src_host;
10337   }
10338   else {
10339      warn 'Packet is not to or from server: ', Dumper($packet);
10340      return;
10341   }
10342   PTDEBUG && _d('Client:', $client);
10343
10344   if ( !exists $self->{sessions}->{$client} ) {
10345      PTDEBUG && _d('New session');
10346      $self->{sessions}->{$client} = {
10347         client      => $client,
10348         state       => undef,
10349         raw_packets => [],
10350      };
10351   };
10352   my $session = $self->{sessions}->{$client};
10353
10354   return $packet_from, $session;
10355}
10356
10357sub _packet_from_server {
10358   die "Don't call parent class _packet_from_server()";
10359}
10360
10361sub _packet_from_client {
10362   die "Don't call parent class _packet_from_client()";
10363}
10364
10365sub make_event {
10366   my ( $self, $session, $packet ) = @_;
10367   die "Event has no attributes" unless scalar keys %{$session->{attribs}};
10368   die "Query has no arg attribute" unless $session->{attribs}->{arg};
10369   my $start_request = $session->{start_request} || 0;
10370   my $start_reply   = $session->{start_reply}   || 0;
10371   my $end_reply     = $session->{end_reply}     || 0;
10372   PTDEBUG && _d('Request start:', $start_request,
10373      'reply start:', $start_reply, 'reply end:', $end_reply);
10374   my $event = {
10375      Query_time    => $self->timestamp_diff($start_request, $start_reply),
10376      Transmit_time => $self->timestamp_diff($start_reply, $end_reply),
10377   };
10378   @{$event}{keys %{$session->{attribs}}} = values %{$session->{attribs}};
10379   return $event;
10380}
10381
10382sub _get_errors_fh {
10383   my ( $self ) = @_;
10384   return $self->{errors_fh} if $self->{errors_fh};
10385
10386   my $exec = basename($0);
10387   my ($errors_fh, $filename);
10388   if ( $filename = $ENV{PERCONA_TOOLKIT_TCP_ERRORS_FILE} ) {
10389      open $errors_fh, ">", $filename
10390         or die "Cannot open $filename for writing (supplied from "
10391              . "PERCONA_TOOLKIT_TCP_ERRORS_FILE): $OS_ERROR";
10392   }
10393   else {
10394      ($errors_fh, $filename) = tempfile("/tmp/$exec-errors.XXXXXXX", UNLINK => 0);
10395   }
10396
10397   $self->{errors_file} = $filename;
10398   $self->{errors_fh}   = $errors_fh;
10399   return $errors_fh;
10400}
10401
10402sub fail_session {
10403   my ( $self, $session, $reason ) = @_;
10404   PTDEBUG && _d('Failed session', $session->{client}, 'because', $reason);
10405   delete $self->{sessions}->{$session->{client}};
10406
10407   return if $self->{_no_save_error};
10408
10409   my $errors_fh = $self->_get_errors_fh();
10410
10411   warn "TCP session $session->{client} had errors, will save them in $self->{errors_file}\n"
10412      unless $self->{_warned_for}->{$self->{errors_file}}++;
10413
10414   my $raw_packets = delete $session->{raw_packets};
10415   $session->{reason_for_failure} = $reason;
10416   my $session_dump = '# ' . Dumper($session);
10417   chomp $session_dump;
10418   $session_dump =~ s/\n/\n# /g;
10419   print $errors_fh join("\n", $session_dump, @$raw_packets), "\n";
10420   return;
10421}
10422
10423sub timestamp_diff {
10424   my ( $self, $start, $end ) = @_;
10425   return 0 unless $start && $end;
10426   my $sd = substr($start, 0, 11, '');
10427   my $ed = substr($end,   0, 11, '');
10428   my ( $sh, $sm, $ss ) = split(/:/, $start);
10429   my ( $eh, $em, $es ) = split(/:/, $end);
10430   my $esecs = ($eh * 3600 + $em * 60 + $es);
10431   my $ssecs = ($sh * 3600 + $sm * 60 + $ss);
10432   if ( $sd eq $ed ) {
10433      return sprintf '%.6f', $esecs - $ssecs;
10434   }
10435   else { # Assume only one day boundary has been crossed, no DST, etc
10436      return sprintf '%.6f', ( 86_400 - $ssecs ) + $esecs;
10437   }
10438}
10439
10440sub uncompress_data {
10441   my ( $self, $data, $len ) = @_;
10442   die "I need data" unless $data;
10443   die "I need a len argument" unless $len;
10444   die "I need a scalar reference to data" unless ref $data eq 'SCALAR';
10445   PTDEBUG && _d('Uncompressing data');
10446   our $InflateError;
10447
10448   my $comp_bin_data = pack('H*', $$data);
10449
10450   my $uncomp_bin_data = '';
10451   my $z = new IO::Uncompress::Inflate(
10452      \$comp_bin_data
10453   ) or die "IO::Uncompress::Inflate failed: $InflateError";
10454   my $status = $z->read(\$uncomp_bin_data, $len)
10455      or die "IO::Uncompress::Inflate failed: $InflateError";
10456
10457   my $uncomp_data = unpack('H*', $uncomp_bin_data);
10458
10459   return \$uncomp_data;
10460}
10461
10462sub _d {
10463   my ($package, undef, $line) = caller 0;
10464   @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
10465        map { defined $_ ? $_ : 'undef' }
10466        @_;
10467   print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
10468}
10469
104701;
10471}
10472# ###########################################################################
10473# End ProtocolParser package
10474# ###########################################################################
10475
10476# ###########################################################################
10477# MasterSlave package
10478# This package is a copy without comments from the original.  The original
10479# with comments and its test file can be found in the Bazaar repository at,
10480#   lib/MasterSlave.pm
10481#   t/lib/MasterSlave.t
10482# See https://launchpad.net/percona-toolkit for more information.
10483# ###########################################################################
10484{
10485package MasterSlave;
10486
10487use strict;
10488use warnings FATAL => 'all';
10489use English qw(-no_match_vars);
10490use constant PTDEBUG => $ENV{PTDEBUG} || 0;
10491
10492sub check_recursion_method {
10493   my ($methods) = @_;
10494   if ( @$methods != 1 ) {
10495      if ( grep({ !m/processlist|hosts/i } @$methods)
10496            && $methods->[0] !~ /^dsn=/i )
10497      {
10498         die  "Invalid combination of recursion methods: "
10499            . join(", ", map { defined($_) ? $_ : 'undef' } @$methods) . ". "
10500            . "Only hosts and processlist may be combined.\n"
10501      }
10502   }
10503   else {
10504      my ($method) = @$methods;
10505      die "Invalid recursion method: " . ( $method || 'undef' )
10506         unless $method && $method =~ m/^(?:processlist$|hosts$|none$|cluster$|dsn=)/i;
10507   }
10508}
10509
10510sub new {
10511   my ( $class, %args ) = @_;
10512   my @required_args = qw(OptionParser DSNParser Quoter);
10513   foreach my $arg ( @required_args ) {
10514      die "I need a $arg argument" unless $args{$arg};
10515   }
10516   my $self = {
10517      %args,
10518      replication_thread => {},
10519   };
10520   return bless $self, $class;
10521}
10522
10523sub get_slaves {
10524   my ($self, %args) = @_;
10525   my @required_args = qw(make_cxn);
10526   foreach my $arg ( @required_args ) {
10527      die "I need a $arg argument" unless $args{$arg};
10528   }
10529   my ($make_cxn) = @args{@required_args};
10530
10531   my $slaves  = [];
10532   my $dp      = $self->{DSNParser};
10533   my $methods = $self->_resolve_recursion_methods($args{dsn});
10534
10535   return $slaves unless @$methods;
10536
10537   if ( grep { m/processlist|hosts/i } @$methods ) {
10538      my @required_args = qw(dbh dsn);
10539      foreach my $arg ( @required_args ) {
10540         die "I need a $arg argument" unless $args{$arg};
10541      }
10542      my ($dbh, $dsn) = @args{@required_args};
10543      my $o = $self->{OptionParser};
10544
10545      $self->recurse_to_slaves(
10546         {  dbh            => $dbh,
10547            dsn            => $dsn,
10548            slave_user     => $o->got('slave-user') ? $o->get('slave-user') : '',
10549            slave_password => $o->got('slave-password') ? $o->get('slave-password') : '',
10550            callback  => sub {
10551               my ( $dsn, $dbh, $level, $parent ) = @_;
10552               return unless $level;
10553               PTDEBUG && _d('Found slave:', $dp->as_string($dsn));
10554               my $slave_dsn = $dsn;
10555               if ($o->got('slave-user')) {
10556                  $slave_dsn->{u} = $o->get('slave-user');
10557                  PTDEBUG && _d("Using slave user ".$o->get('slave-user')." on ".$slave_dsn->{h}.":".$slave_dsn->{P});
10558               }
10559               if ($o->got('slave-password')) {
10560                  $slave_dsn->{p} = $o->get('slave-password');
10561                  PTDEBUG && _d("Slave password set");
10562               }
10563               push @$slaves, $make_cxn->(dsn => $slave_dsn, dbh => $dbh);
10564               return;
10565            },
10566         }
10567      );
10568   } elsif ( $methods->[0] =~ m/^dsn=/i ) {
10569      (my $dsn_table_dsn = join ",", @$methods) =~ s/^dsn=//i;
10570      $slaves = $self->get_cxn_from_dsn_table(
10571         %args,
10572         dsn_table_dsn => $dsn_table_dsn,
10573      );
10574   }
10575   elsif ( $methods->[0] =~ m/none/i ) {
10576      PTDEBUG && _d('Not getting to slaves');
10577   }
10578   else {
10579      die "Unexpected recursion methods: @$methods";
10580   }
10581
10582   return $slaves;
10583}
10584
10585sub _resolve_recursion_methods {
10586   my ($self, $dsn) = @_;
10587   my $o = $self->{OptionParser};
10588   if ( $o->got('recursion-method') ) {
10589      return $o->get('recursion-method');
10590   }
10591   elsif ( $dsn && ($dsn->{P} || 3306) != 3306 ) {
10592      PTDEBUG && _d('Port number is non-standard; using only hosts method');
10593      return [qw(hosts)];
10594   }
10595   else {
10596      return $o->get('recursion-method');
10597   }
10598}
10599
10600sub recurse_to_slaves {
10601   my ( $self, $args, $level ) = @_;
10602   $level ||= 0;
10603   my $dp = $self->{DSNParser};
10604   my $recurse = $args->{recurse} || $self->{OptionParser}->get('recurse');
10605   my $dsn = $args->{dsn};
10606   my $slave_user = $args->{slave_user} || '';
10607   my $slave_password = $args->{slave_password} || '';
10608
10609   my $methods = $self->_resolve_recursion_methods($dsn);
10610   PTDEBUG && _d('Recursion methods:', @$methods);
10611   if ( lc($methods->[0]) eq 'none' ) {
10612      PTDEBUG && _d('Not recursing to slaves');
10613      return;
10614   }
10615
10616   my $slave_dsn = $dsn;
10617   if ($slave_user) {
10618      $slave_dsn->{u} = $slave_user;
10619      PTDEBUG && _d("Using slave user $slave_user on ".$slave_dsn->{h}.":".$slave_dsn->{P});
10620   }
10621   if ($slave_password) {
10622      $slave_dsn->{p} = $slave_password;
10623      PTDEBUG && _d("Slave password set");
10624   }
10625
10626   my $dbh;
10627   eval {
10628      $dbh = $args->{dbh} || $dp->get_dbh(
10629         $dp->get_cxn_params($slave_dsn), { AutoCommit => 1 });
10630      PTDEBUG && _d('Connected to', $dp->as_string($slave_dsn));
10631   };
10632   if ( $EVAL_ERROR ) {
10633      print STDERR "Cannot connect to ", $dp->as_string($slave_dsn), "\n"
10634         or die "Cannot print: $OS_ERROR";
10635      return;
10636   }
10637
10638   my $sql  = 'SELECT @@SERVER_ID';
10639   PTDEBUG && _d($sql);
10640   my ($id) = $dbh->selectrow_array($sql);
10641   PTDEBUG && _d('Working on server ID', $id);
10642   my $master_thinks_i_am = $dsn->{server_id};
10643   if ( !defined $id
10644       || ( defined $master_thinks_i_am && $master_thinks_i_am != $id )
10645       || $args->{server_ids_seen}->{$id}++
10646   ) {
10647      PTDEBUG && _d('Server ID seen, or not what master said');
10648      if ( $args->{skip_callback} ) {
10649         $args->{skip_callback}->($dsn, $dbh, $level, $args->{parent});
10650      }
10651      return;
10652   }
10653
10654   $args->{callback}->($dsn, $dbh, $level, $args->{parent});
10655
10656   if ( !defined $recurse || $level < $recurse ) {
10657
10658      my @slaves =
10659         grep { !$_->{master_id} || $_->{master_id} == $id } # Only my slaves.
10660         $self->find_slave_hosts($dp, $dbh, $dsn, $methods);
10661
10662      foreach my $slave ( @slaves ) {
10663         PTDEBUG && _d('Recursing from',
10664            $dp->as_string($dsn), 'to', $dp->as_string($slave));
10665         $self->recurse_to_slaves(
10666            { %$args, dsn => $slave, dbh => undef, parent => $dsn, slave_user => $slave_user, $slave_password => $slave_password }, $level + 1 );
10667      }
10668   }
10669}
10670
10671sub find_slave_hosts {
10672   my ( $self, $dsn_parser, $dbh, $dsn, $methods ) = @_;
10673
10674   PTDEBUG && _d('Looking for slaves on', $dsn_parser->as_string($dsn),
10675      'using methods', @$methods);
10676
10677   my @slaves;
10678   METHOD:
10679   foreach my $method ( @$methods ) {
10680      my $find_slaves = "_find_slaves_by_$method";
10681      PTDEBUG && _d('Finding slaves with', $find_slaves);
10682      @slaves = $self->$find_slaves($dsn_parser, $dbh, $dsn);
10683      last METHOD if @slaves;
10684   }
10685
10686   PTDEBUG && _d('Found', scalar(@slaves), 'slaves');
10687   return @slaves;
10688}
10689
10690sub _find_slaves_by_processlist {
10691   my ( $self, $dsn_parser, $dbh, $dsn ) = @_;
10692   my @connected_slaves = $self->get_connected_slaves($dbh);
10693   my @slaves = $self->_process_slaves_list($dsn_parser, $dsn, \@connected_slaves);
10694   return @slaves;
10695}
10696
10697sub _process_slaves_list {
10698   my ($self, $dsn_parser, $dsn, $connected_slaves) = @_;
10699   my @slaves = map  {
10700      my $slave        = $dsn_parser->parse("h=$_", $dsn);
10701      $slave->{source} = 'processlist';
10702      $slave;
10703   }
10704   grep { $_ }
10705   map  {
10706      my ( $host ) = $_->{host} =~ m/^(.*):\d+$/;
10707      if ( $host eq 'localhost' ) {
10708         $host = '127.0.0.1'; # Replication never uses sockets.
10709      }
10710      if ($host =~ m/::/) {
10711          $host = '['.$host.']';
10712      }
10713      $host;
10714   } @$connected_slaves;
10715
10716   return @slaves;
10717}
10718
10719sub _find_slaves_by_hosts {
10720   my ( $self, $dsn_parser, $dbh, $dsn ) = @_;
10721
10722   my @slaves;
10723   my $sql = 'SHOW SLAVE HOSTS';
10724   PTDEBUG && _d($dbh, $sql);
10725   @slaves = @{$dbh->selectall_arrayref($sql, { Slice => {} })};
10726
10727   if ( @slaves ) {
10728      PTDEBUG && _d('Found some SHOW SLAVE HOSTS info');
10729      @slaves = map {
10730         my %hash;
10731         @hash{ map { lc $_ } keys %$_ } = values %$_;
10732         my $spec = "h=$hash{host},P=$hash{port}"
10733            . ( $hash{user} ? ",u=$hash{user}" : '')
10734            . ( $hash{password} ? ",p=$hash{password}" : '');
10735         my $dsn           = $dsn_parser->parse($spec, $dsn);
10736         $dsn->{server_id} = $hash{server_id};
10737         $dsn->{master_id} = $hash{master_id};
10738         $dsn->{source}    = 'hosts';
10739         $dsn;
10740      } @slaves;
10741   }
10742
10743   return @slaves;
10744}
10745
10746sub get_connected_slaves {
10747   my ( $self, $dbh ) = @_;
10748
10749   my $show = "SHOW GRANTS FOR ";
10750   my $user = 'CURRENT_USER()';
10751   my $sql = $show . $user;
10752   PTDEBUG && _d($dbh, $sql);
10753
10754   my $proc;
10755   eval {
10756      $proc = grep {
10757         m/ALL PRIVILEGES.*?\*\.\*|PROCESS/
10758      } @{$dbh->selectcol_arrayref($sql)};
10759   };
10760   if ( $EVAL_ERROR ) {
10761
10762      if ( $EVAL_ERROR =~ m/no such grant defined for user/ ) {
10763         PTDEBUG && _d('Retrying SHOW GRANTS without host; error:',
10764            $EVAL_ERROR);
10765         ($user) = split('@', $user);
10766         $sql    = $show . $user;
10767         PTDEBUG && _d($sql);
10768         eval {
10769            $proc = grep {
10770               m/ALL PRIVILEGES.*?\*\.\*|PROCESS/
10771            } @{$dbh->selectcol_arrayref($sql)};
10772         };
10773      }
10774
10775      die "Failed to $sql: $EVAL_ERROR" if $EVAL_ERROR;
10776   }
10777   if ( !$proc ) {
10778      die "You do not have the PROCESS privilege";
10779   }
10780
10781   $sql = 'SHOW FULL PROCESSLIST';
10782   PTDEBUG && _d($dbh, $sql);
10783   grep { $_->{command} =~ m/Binlog Dump/i }
10784   map  { # Lowercase the column names
10785      my %hash;
10786      @hash{ map { lc $_ } keys %$_ } = values %$_;
10787      \%hash;
10788   }
10789   @{$dbh->selectall_arrayref($sql, { Slice => {} })};
10790}
10791
10792sub is_master_of {
10793   my ( $self, $master, $slave ) = @_;
10794   my $master_status = $self->get_master_status($master)
10795      or die "The server specified as a master is not a master";
10796   my $slave_status  = $self->get_slave_status($slave)
10797      or die "The server specified as a slave is not a slave";
10798   my @connected     = $self->get_connected_slaves($master)
10799      or die "The server specified as a master has no connected slaves";
10800   my (undef, $port) = $master->selectrow_array("SHOW VARIABLES LIKE 'port'");
10801
10802   if ( $port != $slave_status->{master_port} ) {
10803      die "The slave is connected to $slave_status->{master_port} "
10804         . "but the master's port is $port";
10805   }
10806
10807   if ( !grep { $slave_status->{master_user} eq $_->{user} } @connected ) {
10808      die "I don't see any slave I/O thread connected with user "
10809         . $slave_status->{master_user};
10810   }
10811
10812   if ( ($slave_status->{slave_io_state} || '')
10813      eq 'Waiting for master to send event' )
10814   {
10815      my ( $master_log_name, $master_log_num )
10816         = $master_status->{file} =~ m/^(.*?)\.0*([1-9][0-9]*)$/;
10817      my ( $slave_log_name, $slave_log_num )
10818         = $slave_status->{master_log_file} =~ m/^(.*?)\.0*([1-9][0-9]*)$/;
10819      if ( $master_log_name ne $slave_log_name
10820         || abs($master_log_num - $slave_log_num) > 1 )
10821      {
10822         die "The slave thinks it is reading from "
10823            . "$slave_status->{master_log_file},  but the "
10824            . "master is writing to $master_status->{file}";
10825      }
10826   }
10827   return 1;
10828}
10829
10830sub get_master_dsn {
10831   my ( $self, $dbh, $dsn, $dsn_parser ) = @_;
10832   my $master = $self->get_slave_status($dbh) or return undef;
10833   my $spec   = "h=$master->{master_host},P=$master->{master_port}";
10834   return       $dsn_parser->parse($spec, $dsn);
10835}
10836
10837sub get_slave_status {
10838   my ( $self, $dbh ) = @_;
10839
10840   if ( !$self->{not_a_slave}->{$dbh} ) {
10841      my $sth = $self->{sths}->{$dbh}->{SLAVE_STATUS}
10842            ||= $dbh->prepare('SHOW SLAVE STATUS');
10843      PTDEBUG && _d($dbh, 'SHOW SLAVE STATUS');
10844      $sth->execute();
10845      my ($sss_rows) = $sth->fetchall_arrayref({}); # Show Slave Status rows
10846
10847      my $ss;
10848      if ( $sss_rows && @$sss_rows ) {
10849          if (scalar @$sss_rows > 1) {
10850              if (!$self->{channel}) {
10851                  die 'This server returned more than one row for SHOW SLAVE STATUS but "channel" was not specified on the command line';
10852              }
10853              my $slave_use_channels;
10854              for my $row (@$sss_rows) {
10855                  $row = { map { lc($_) => $row->{$_} } keys %$row }; # lowercase the keys
10856                  if ($row->{channel_name}) {
10857                      $slave_use_channels = 1;
10858                  }
10859                  if ($row->{channel_name} eq $self->{channel}) {
10860                      $ss = $row;
10861                      last;
10862                  }
10863              }
10864              if (!$ss && $slave_use_channels) {
10865                 die 'This server is using replication channels but "channel" was not specified on the command line';
10866              }
10867          } else {
10868              if ($sss_rows->[0]->{channel_name} && $sss_rows->[0]->{channel_name} ne $self->{channel}) {
10869                  die 'This server is using replication channels but "channel" was not specified on the command line';
10870              } else {
10871                  $ss = $sss_rows->[0];
10872              }
10873          }
10874
10875          if ( $ss && %$ss ) {
10876             $ss = { map { lc($_) => $ss->{$_} } keys %$ss }; # lowercase the keys
10877             return $ss;
10878          }
10879          if (!$ss && $self->{channel}) {
10880              die "Specified channel name is invalid";
10881          }
10882      }
10883
10884      PTDEBUG && _d('This server returns nothing for SHOW SLAVE STATUS');
10885      $self->{not_a_slave}->{$dbh}++;
10886  }
10887}
10888
10889sub get_master_status {
10890   my ( $self, $dbh ) = @_;
10891
10892   if ( $self->{not_a_master}->{$dbh} ) {
10893      PTDEBUG && _d('Server on dbh', $dbh, 'is not a master');
10894      return;
10895   }
10896
10897   my $sth = $self->{sths}->{$dbh}->{MASTER_STATUS}
10898         ||= $dbh->prepare('SHOW MASTER STATUS');
10899   PTDEBUG && _d($dbh, 'SHOW MASTER STATUS');
10900   $sth->execute();
10901   my ($ms) = @{$sth->fetchall_arrayref({})};
10902   PTDEBUG && _d(
10903      $ms ? map { "$_=" . (defined $ms->{$_} ? $ms->{$_} : '') } keys %$ms
10904          : '');
10905
10906   if ( !$ms || scalar keys %$ms < 2 ) {
10907      PTDEBUG && _d('Server on dbh', $dbh, 'does not seem to be a master');
10908      $self->{not_a_master}->{$dbh}++;
10909   }
10910
10911  return { map { lc($_) => $ms->{$_} } keys %$ms }; # lowercase the keys
10912}
10913
10914sub wait_for_master {
10915   my ( $self, %args ) = @_;
10916   my @required_args = qw(master_status slave_dbh);
10917   foreach my $arg ( @required_args ) {
10918      die "I need a $arg argument" unless $args{$arg};
10919   }
10920   my ($master_status, $slave_dbh) = @args{@required_args};
10921   my $timeout       = $args{timeout} || 60;
10922
10923   my $result;
10924   my $waited;
10925   if ( $master_status ) {
10926      my $slave_status;
10927      eval {
10928          $slave_status = $self->get_slave_status($slave_dbh);
10929      };
10930      if ($EVAL_ERROR) {
10931          return {
10932              result => undef,
10933              waited => 0,
10934              error  =>'Wait for master: this is a multi-master slave but "channel" was not specified on the command line',
10935          };
10936      }
10937      my $server_version = VersionParser->new($slave_dbh);
10938      my $channel_sql = $server_version > '5.6' && $self->{channel} ? ", '$self->{channel}'" : '';
10939      my $sql = "SELECT MASTER_POS_WAIT('$master_status->{file}', $master_status->{position}, $timeout $channel_sql)";
10940      PTDEBUG && _d($slave_dbh, $sql);
10941      my $start = time;
10942      ($result) = $slave_dbh->selectrow_array($sql);
10943
10944      $waited = time - $start;
10945
10946      PTDEBUG && _d('Result of waiting:', $result);
10947      PTDEBUG && _d("Waited", $waited, "seconds");
10948   }
10949   else {
10950      PTDEBUG && _d('Not waiting: this server is not a master');
10951   }
10952
10953   return {
10954      result => $result,
10955      waited => $waited,
10956   };
10957}
10958
10959sub stop_slave {
10960   my ( $self, $dbh ) = @_;
10961   my $sth = $self->{sths}->{$dbh}->{STOP_SLAVE}
10962         ||= $dbh->prepare('STOP SLAVE');
10963   PTDEBUG && _d($dbh, $sth->{Statement});
10964   $sth->execute();
10965}
10966
10967sub start_slave {
10968   my ( $self, $dbh, $pos ) = @_;
10969   if ( $pos ) {
10970      my $sql = "START SLAVE UNTIL MASTER_LOG_FILE='$pos->{file}', "
10971              . "MASTER_LOG_POS=$pos->{position}";
10972      PTDEBUG && _d($dbh, $sql);
10973      $dbh->do($sql);
10974   }
10975   else {
10976      my $sth = $self->{sths}->{$dbh}->{START_SLAVE}
10977            ||= $dbh->prepare('START SLAVE');
10978      PTDEBUG && _d($dbh, $sth->{Statement});
10979      $sth->execute();
10980   }
10981}
10982
10983sub catchup_to_master {
10984   my ( $self, $slave, $master, $timeout ) = @_;
10985   $self->stop_slave($master);
10986   $self->stop_slave($slave);
10987   my $slave_status  = $self->get_slave_status($slave);
10988   my $slave_pos     = $self->repl_posn($slave_status);
10989   my $master_status = $self->get_master_status($master);
10990   my $master_pos    = $self->repl_posn($master_status);
10991   PTDEBUG && _d('Master position:', $self->pos_to_string($master_pos),
10992      'Slave position:', $self->pos_to_string($slave_pos));
10993
10994   my $result;
10995   if ( $self->pos_cmp($slave_pos, $master_pos) < 0 ) {
10996      PTDEBUG && _d('Waiting for slave to catch up to master');
10997      $self->start_slave($slave, $master_pos);
10998
10999      $result = $self->wait_for_master(
11000            master_status => $master_status,
11001            slave_dbh     => $slave,
11002            timeout       => $timeout,
11003            master_status => $master_status
11004      );
11005      if ($result->{error}) {
11006          die $result->{error};
11007      }
11008      if ( !defined $result->{result} ) {
11009         $slave_status = $self->get_slave_status($slave);
11010         if ( !$self->slave_is_running($slave_status) ) {
11011            PTDEBUG && _d('Master position:',
11012               $self->pos_to_string($master_pos),
11013               'Slave position:', $self->pos_to_string($slave_pos));
11014            $slave_pos = $self->repl_posn($slave_status);
11015            if ( $self->pos_cmp($slave_pos, $master_pos) != 0 ) {
11016               die "MASTER_POS_WAIT() returned NULL but slave has not "
11017                  . "caught up to master";
11018            }
11019            PTDEBUG && _d('Slave is caught up to master and stopped');
11020         }
11021         else {
11022            die "Slave has not caught up to master and it is still running";
11023         }
11024      }
11025   }
11026   else {
11027      PTDEBUG && _d("Slave is already caught up to master");
11028   }
11029
11030   return $result;
11031}
11032
11033sub catchup_to_same_pos {
11034   my ( $self, $s1_dbh, $s2_dbh ) = @_;
11035   $self->stop_slave($s1_dbh);
11036   $self->stop_slave($s2_dbh);
11037   my $s1_status = $self->get_slave_status($s1_dbh);
11038   my $s2_status = $self->get_slave_status($s2_dbh);
11039   my $s1_pos    = $self->repl_posn($s1_status);
11040   my $s2_pos    = $self->repl_posn($s2_status);
11041   if ( $self->pos_cmp($s1_pos, $s2_pos) < 0 ) {
11042      $self->start_slave($s1_dbh, $s2_pos);
11043   }
11044   elsif ( $self->pos_cmp($s2_pos, $s1_pos) < 0 ) {
11045      $self->start_slave($s2_dbh, $s1_pos);
11046   }
11047
11048   $s1_status = $self->get_slave_status($s1_dbh);
11049   $s2_status = $self->get_slave_status($s2_dbh);
11050   $s1_pos    = $self->repl_posn($s1_status);
11051   $s2_pos    = $self->repl_posn($s2_status);
11052
11053   if ( $self->slave_is_running($s1_status)
11054     || $self->slave_is_running($s2_status)
11055     || $self->pos_cmp($s1_pos, $s2_pos) != 0)
11056   {
11057      die "The servers aren't both stopped at the same position";
11058   }
11059
11060}
11061
11062sub slave_is_running {
11063   my ( $self, $slave_status ) = @_;
11064   return ($slave_status->{slave_sql_running} || 'No') eq 'Yes';
11065}
11066
11067sub has_slave_updates {
11068   my ( $self, $dbh ) = @_;
11069   my $sql = q{SHOW VARIABLES LIKE 'log_slave_updates'};
11070   PTDEBUG && _d($dbh, $sql);
11071   my ($name, $value) = $dbh->selectrow_array($sql);
11072   return $value && $value =~ m/^(1|ON)$/;
11073}
11074
11075sub repl_posn {
11076   my ( $self, $status ) = @_;
11077   if ( exists $status->{file} && exists $status->{position} ) {
11078      return {
11079         file     => $status->{file},
11080         position => $status->{position},
11081      };
11082   }
11083   else {
11084      return {
11085         file     => $status->{relay_master_log_file},
11086         position => $status->{exec_master_log_pos},
11087      };
11088   }
11089}
11090
11091sub get_slave_lag {
11092   my ( $self, $dbh ) = @_;
11093   my $stat = $self->get_slave_status($dbh);
11094   return unless $stat;  # server is not a slave
11095   return $stat->{seconds_behind_master};
11096}
11097
11098sub pos_cmp {
11099   my ( $self, $a, $b ) = @_;
11100   return $self->pos_to_string($a) cmp $self->pos_to_string($b);
11101}
11102
11103sub short_host {
11104   my ( $self, $dsn ) = @_;
11105   my ($host, $port);
11106   if ( $dsn->{master_host} ) {
11107      $host = $dsn->{master_host};
11108      $port = $dsn->{master_port};
11109   }
11110   else {
11111      $host = $dsn->{h};
11112      $port = $dsn->{P};
11113   }
11114   return ($host || '[default]') . ( ($port || 3306) == 3306 ? '' : ":$port" );
11115}
11116
11117sub is_replication_thread {
11118   my ( $self, $query, %args ) = @_;
11119   return unless $query;
11120
11121   my $type = lc($args{type} || 'all');
11122   die "Invalid type: $type"
11123      unless $type =~ m/^binlog_dump|slave_io|slave_sql|all$/i;
11124
11125   my $match = 0;
11126   if ( $type =~ m/binlog_dump|all/i ) {
11127      $match = 1
11128         if ($query->{Command} || $query->{command} || '') eq "Binlog Dump";
11129   }
11130   if ( !$match ) {
11131      if ( ($query->{User} || $query->{user} || '') eq "system user" ) {
11132         PTDEBUG && _d("Slave replication thread");
11133         if ( $type ne 'all' ) {
11134            my $state = $query->{State} || $query->{state} || '';
11135
11136            if ( $state =~ m/^init|end$/ ) {
11137               PTDEBUG && _d("Special state:", $state);
11138               $match = 1;
11139            }
11140            else {
11141               my ($slave_sql) = $state =~ m/
11142                  ^(Waiting\sfor\sthe\snext\sevent
11143                   |Reading\sevent\sfrom\sthe\srelay\slog
11144                   |Has\sread\sall\srelay\slog;\swaiting
11145                   |Making\stemp\sfile
11146                   |Waiting\sfor\sslave\smutex\son\sexit)/xi;
11147
11148               $match = $type eq 'slave_sql' &&  $slave_sql ? 1
11149                      : $type eq 'slave_io'  && !$slave_sql ? 1
11150                      :                                       0;
11151            }
11152         }
11153         else {
11154            $match = 1;
11155         }
11156      }
11157      else {
11158         PTDEBUG && _d('Not system user');
11159      }
11160
11161      if ( !defined $args{check_known_ids} || $args{check_known_ids} ) {
11162         my $id = $query->{Id} || $query->{id};
11163         if ( $match ) {
11164            $self->{replication_thread}->{$id} = 1;
11165         }
11166         else {
11167            if ( $self->{replication_thread}->{$id} ) {
11168               PTDEBUG && _d("Thread ID is a known replication thread ID");
11169               $match = 1;
11170            }
11171         }
11172      }
11173   }
11174
11175   PTDEBUG && _d('Matches', $type, 'replication thread:',
11176      ($match ? 'yes' : 'no'), '; match:', $match);
11177
11178   return $match;
11179}
11180
11181
11182sub get_replication_filters {
11183   my ( $self, %args ) = @_;
11184   my @required_args = qw(dbh);
11185   foreach my $arg ( @required_args ) {
11186      die "I need a $arg argument" unless $args{$arg};
11187   }
11188   my ($dbh) = @args{@required_args};
11189
11190   my %filters = ();
11191
11192   my $status = $self->get_master_status($dbh);
11193   if ( $status ) {
11194      map { $filters{$_} = $status->{$_} }
11195      grep { defined $status->{$_} && $status->{$_} ne '' }
11196      qw(
11197         binlog_do_db
11198         binlog_ignore_db
11199      );
11200   }
11201
11202   $status = $self->get_slave_status($dbh);
11203   if ( $status ) {
11204      map { $filters{$_} = $status->{$_} }
11205      grep { defined $status->{$_} && $status->{$_} ne '' }
11206      qw(
11207         replicate_do_db
11208         replicate_ignore_db
11209         replicate_do_table
11210         replicate_ignore_table
11211         replicate_wild_do_table
11212         replicate_wild_ignore_table
11213      );
11214
11215      my $sql = "SHOW VARIABLES LIKE 'slave_skip_errors'";
11216      PTDEBUG && _d($dbh, $sql);
11217      my $row = $dbh->selectrow_arrayref($sql);
11218      $filters{slave_skip_errors} = $row->[1] if $row->[1] && $row->[1] ne 'OFF';
11219   }
11220
11221   return \%filters;
11222}
11223
11224
11225sub pos_to_string {
11226   my ( $self, $pos ) = @_;
11227   my $fmt  = '%s/%020d';
11228   return sprintf($fmt, @{$pos}{qw(file position)});
11229}
11230
11231sub reset_known_replication_threads {
11232   my ( $self ) = @_;
11233   $self->{replication_thread} = {};
11234   return;
11235}
11236
11237sub get_cxn_from_dsn_table {
11238   my ($self, %args) = @_;
11239   my @required_args = qw(dsn_table_dsn make_cxn);
11240   foreach my $arg ( @required_args ) {
11241      die "I need a $arg argument" unless $args{$arg};
11242   }
11243   my ($dsn_table_dsn, $make_cxn) = @args{@required_args};
11244   PTDEBUG && _d('DSN table DSN:', $dsn_table_dsn);
11245
11246   my $dp = $self->{DSNParser};
11247   my $q  = $self->{Quoter};
11248
11249   my $dsn = $dp->parse($dsn_table_dsn);
11250   my $dsn_table;
11251   if ( $dsn->{D} && $dsn->{t} ) {
11252      $dsn_table = $q->quote($dsn->{D}, $dsn->{t});
11253   }
11254   elsif ( $dsn->{t} && $dsn->{t} =~ m/\./ ) {
11255      $dsn_table = $q->quote($q->split_unquote($dsn->{t}));
11256   }
11257   else {
11258      die "DSN table DSN does not specify a database (D) "
11259        . "or a database-qualified table (t)";
11260   }
11261
11262   my $dsn_tbl_cxn = $make_cxn->(dsn => $dsn);
11263   my $dbh         = $dsn_tbl_cxn->connect();
11264   my $sql         = "SELECT dsn FROM $dsn_table ORDER BY id";
11265   PTDEBUG && _d($sql);
11266   my $dsn_strings = $dbh->selectcol_arrayref($sql);
11267   my @cxn;
11268   if ( $dsn_strings ) {
11269      foreach my $dsn_string ( @$dsn_strings ) {
11270         PTDEBUG && _d('DSN from DSN table:', $dsn_string);
11271         push @cxn, $make_cxn->(dsn_string => $dsn_string);
11272      }
11273   }
11274   return \@cxn;
11275}
11276
11277sub _d {
11278   my ($package, undef, $line) = caller 0;
11279   @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
11280        map { defined $_ ? $_ : 'undef' }
11281        @_;
11282   print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
11283}
11284
112851;
11286}
11287# ###########################################################################
11288# End MasterSlave package
11289# ###########################################################################
11290
11291# ###########################################################################
11292# Progress package
11293# This package is a copy without comments from the original.  The original
11294# with comments and its test file can be found in the Bazaar repository at,
11295#   lib/Progress.pm
11296#   t/lib/Progress.t
11297# See https://launchpad.net/percona-toolkit for more information.
11298# ###########################################################################
11299{
11300package Progress;
11301
11302use strict;
11303use warnings FATAL => 'all';
11304use English qw(-no_match_vars);
11305use constant PTDEBUG => $ENV{PTDEBUG} || 0;
11306
11307sub new {
11308   my ( $class, %args ) = @_;
11309   foreach my $arg (qw(jobsize)) {
11310      die "I need a $arg argument" unless defined $args{$arg};
11311   }
11312   if ( (!$args{report} || !$args{interval}) ) {
11313      if ( $args{spec} && @{$args{spec}} == 2 ) {
11314         @args{qw(report interval)} = @{$args{spec}};
11315      }
11316      else {
11317         die "I need either report and interval arguments, or a spec";
11318      }
11319   }
11320
11321   my $name  = $args{name} || "Progress";
11322   $args{start} ||= time();
11323   my $self;
11324   $self = {
11325      last_reported => $args{start},
11326      fraction      => 0,       # How complete the job is
11327      callback      => sub {
11328         my ($fraction, $elapsed, $remaining) = @_;
11329         printf STDERR "$name: %3d%% %s remain\n",
11330            $fraction * 100,
11331            Transformers::secs_to_time($remaining);
11332      },
11333      %args,
11334   };
11335   return bless $self, $class;
11336}
11337
11338sub validate_spec {
11339   shift @_ if $_[0] eq 'Progress'; # Permit calling as Progress-> or Progress::
11340   my ( $spec ) = @_;
11341   if ( @$spec != 2 ) {
11342      die "spec array requires a two-part argument\n";
11343   }
11344   if ( $spec->[0] !~ m/^(?:percentage|time|iterations)$/ ) {
11345      die "spec array's first element must be one of "
11346        . "percentage,time,iterations\n";
11347   }
11348   if ( $spec->[1] !~ m/^\d+$/ ) {
11349      die "spec array's second element must be an integer\n";
11350   }
11351}
11352
11353sub set_callback {
11354   my ( $self, $callback ) = @_;
11355   $self->{callback} = $callback;
11356}
11357
11358sub start {
11359   my ( $self, $start ) = @_;
11360   $self->{start} = $self->{last_reported} = $start || time();
11361   $self->{first_report} = 0;
11362}
11363
11364sub update {
11365   my ( $self, $callback, %args ) = @_;
11366   my $jobsize   = $self->{jobsize};
11367   my $now    ||= $args{now} || time;
11368
11369   $self->{iterations}++; # How many updates have happened;
11370
11371   if ( !$self->{first_report} && $args{first_report} ) {
11372      $args{first_report}->();
11373      $self->{first_report} = 1;
11374   }
11375
11376   if ( $self->{report} eq 'time'
11377         && $self->{interval} > $now - $self->{last_reported}
11378   ) {
11379      return;
11380   }
11381   elsif ( $self->{report} eq 'iterations'
11382         && ($self->{iterations} - 1) % $self->{interval} > 0
11383   ) {
11384      return;
11385   }
11386   $self->{last_reported} = $now;
11387
11388   my $completed = $callback->();
11389   $self->{updates}++; # How many times we have run the update callback
11390
11391   return if $completed > $jobsize;
11392
11393   my $fraction = $completed > 0 ? $completed / $jobsize : 0;
11394
11395   if ( $self->{report} eq 'percentage'
11396         && $self->fraction_modulo($self->{fraction})
11397            >= $self->fraction_modulo($fraction)
11398   ) {
11399      $self->{fraction} = $fraction;
11400      return;
11401   }
11402   $self->{fraction} = $fraction;
11403
11404   my $elapsed   = $now - $self->{start};
11405   my $remaining = 0;
11406   my $eta       = $now;
11407   if ( $completed > 0 && $completed <= $jobsize && $elapsed > 0 ) {
11408      my $rate = $completed / $elapsed;
11409      if ( $rate > 0 ) {
11410         $remaining = ($jobsize - $completed) / $rate;
11411         $eta       = $now + int($remaining);
11412      }
11413   }
11414   $self->{callback}->($fraction, $elapsed, $remaining, $eta, $completed);
11415}
11416
11417sub fraction_modulo {
11418   my ( $self, $num ) = @_;
11419   $num *= 100; # Convert from fraction to percentage
11420   return sprintf('%d',
11421      sprintf('%d', $num / $self->{interval}) * $self->{interval});
11422}
11423
11424sub _d {
11425   my ($package, undef, $line) = caller 0;
11426   @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
11427        map { defined $_ ? $_ : 'undef' }
11428        @_;
11429   print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
11430}
11431
114321;
11433}
11434# ###########################################################################
11435# End Progress package
11436# ###########################################################################
11437
11438# ###########################################################################
11439# FileIterator package
11440# This package is a copy without comments from the original.  The original
11441# with comments and its test file can be found in the Bazaar repository at,
11442#   lib/FileIterator.pm
11443#   t/lib/FileIterator.t
11444# See https://launchpad.net/percona-toolkit for more information.
11445# ###########################################################################
11446{
11447package FileIterator;
11448
11449use strict;
11450use warnings FATAL => 'all';
11451use English qw(-no_match_vars);
11452use constant PTDEBUG => $ENV{PTDEBUG} || 0;
11453
11454sub new {
11455   my ( $class, %args ) = @_;
11456   my $self = {
11457      %args,
11458   };
11459   return bless $self, $class;
11460}
11461
11462sub get_file_itr {
11463   my ( $self, @filenames ) = @_;
11464
11465   my @final_filenames;
11466   FILENAME:
11467   foreach my $fn ( @filenames ) {
11468      if ( !defined $fn ) {
11469         warn "Skipping undefined filename";
11470         next FILENAME;
11471      }
11472      if ( $fn ne '-' ) {
11473         if ( !-e $fn || !-r $fn ) {
11474            warn "$fn does not exist or is not readable";
11475            next FILENAME;
11476         }
11477      }
11478      push @final_filenames, $fn;
11479   }
11480
11481   if ( !@filenames ) {
11482      push @final_filenames, '-';
11483      PTDEBUG && _d('Auto-adding "-" to the list of filenames');
11484   }
11485
11486   PTDEBUG && _d('Final filenames:', @final_filenames);
11487   return sub {
11488      while ( @final_filenames ) {
11489         my $fn = shift @final_filenames;
11490         PTDEBUG && _d('Filename:', $fn);
11491         if ( $fn eq '-' ) { # Magical STDIN filename.
11492            return (*STDIN, undef, undef);
11493         }
11494         open my $fh, '<', $fn or warn "Cannot open $fn: $OS_ERROR";
11495         if ( $fh ) {
11496            return ( $fh, $fn, -s $fn );
11497         }
11498      }
11499      return (); # Avoids $f being set to 0 in list context.
11500   };
11501}
11502
11503sub _d {
11504   my ($package, undef, $line) = caller 0;
11505   @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
11506        map { defined $_ ? $_ : 'undef' }
11507        @_;
11508   print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
11509}
11510
115111;
11512}
11513# ###########################################################################
11514# End FileIterator package
11515# ###########################################################################
11516
11517# ###########################################################################
11518# Runtime package
11519# This package is a copy without comments from the original.  The original
11520# with comments and its test file can be found in the Bazaar repository at,
11521#   lib/Runtime.pm
11522#   t/lib/Runtime.t
11523# See https://launchpad.net/percona-toolkit for more information.
11524# ###########################################################################
11525{
11526package Runtime;
11527
11528use strict;
11529use warnings FATAL => 'all';
11530use English qw(-no_match_vars);
11531use constant PTDEBUG => $ENV{PTDEBUG} || 0;
11532
11533sub new {
11534   my ( $class, %args ) = @_;
11535   my @required_args = qw(now);
11536   foreach my $arg ( @required_args ) {
11537      die "I need a $arg argument" unless exists $args{$arg};
11538   }
11539
11540   my $run_time = $args{run_time};
11541   if ( defined $run_time ) {
11542      die "run_time must be > 0" if $run_time <= 0;
11543   }
11544
11545   my $now = $args{now};
11546   die "now must be a callback" unless ref $now eq 'CODE';
11547
11548   my $self = {
11549      run_time   => $run_time,
11550      now        => $now,
11551      start_time => undef,
11552      end_time   => undef,
11553      time_left  => undef,
11554      stop       => 0,
11555   };
11556
11557   return bless $self, $class;
11558}
11559
11560sub time_left {
11561   my ( $self, %args ) = @_;
11562
11563   if ( $self->{stop} ) {
11564      PTDEBUG && _d("No time left because stop was called");
11565      return 0;
11566   }
11567
11568   my $now = $self->{now}->(%args);
11569   PTDEBUG && _d("Current time:", $now);
11570
11571   if ( !defined $self->{start_time} ) {
11572      $self->{start_time} = $now;
11573   }
11574
11575   return unless defined $now;
11576
11577   my $run_time = $self->{run_time};
11578   return unless defined $run_time;
11579
11580   if ( !$self->{end_time} ) {
11581      $self->{end_time} = $now + $run_time;
11582      PTDEBUG && _d("End time:", $self->{end_time});
11583   }
11584
11585   $self->{time_left} = $self->{end_time} - $now;
11586   PTDEBUG && _d("Time left:", $self->{time_left});
11587   return $self->{time_left};
11588}
11589
11590sub have_time {
11591   my ( $self, %args ) = @_;
11592   my $time_left = $self->time_left(%args);
11593   return 1 if !defined $time_left;  # run forever
11594   return $time_left <= 0 ? 0 : 1;   # <=0s means run time has elapsed
11595}
11596
11597sub time_elapsed {
11598   my ( $self, %args ) = @_;
11599
11600   my $start_time = $self->{start_time};
11601   return 0 unless $start_time;
11602
11603   my $now = $self->{now}->(%args);
11604   PTDEBUG && _d("Current time:", $now);
11605
11606   my $time_elapsed = $now - $start_time;
11607   PTDEBUG && _d("Time elapsed:", $time_elapsed);
11608   if ( $time_elapsed < 0 ) {
11609      warn "Current time $now is earlier than start time $start_time";
11610   }
11611   return $time_elapsed;
11612}
11613
11614sub reset {
11615   my ( $self ) = @_;
11616   $self->{start_time} = undef;
11617   $self->{end_time}   = undef;
11618   $self->{time_left}  = undef;
11619   $self->{stop}       = 0;
11620   PTDEBUG && _d("Reset run time");
11621   return;
11622}
11623
11624sub stop {
11625   my ( $self ) = @_;
11626   $self->{stop} = 1;
11627   return;
11628}
11629
11630sub start {
11631   my ( $self ) = @_;
11632   $self->{stop} = 0;
11633   return;
11634}
11635
11636sub _d {
11637   my ($package, undef, $line) = caller 0;
11638   @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
11639        map { defined $_ ? $_ : 'undef' }
11640        @_;
11641   print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
11642}
11643
116441;
11645}
11646# ###########################################################################
11647# End Runtime package
11648# ###########################################################################
11649
11650# ###########################################################################
11651# Pipeline package
11652# This package is a copy without comments from the original.  The original
11653# with comments and its test file can be found in the Bazaar repository at,
11654#   lib/Pipeline.pm
11655#   t/lib/Pipeline.t
11656# See https://launchpad.net/percona-toolkit for more information.
11657# ###########################################################################
11658{
11659package Pipeline;
11660
11661use strict;
11662use warnings FATAL => 'all';
11663use English qw(-no_match_vars);
11664use constant PTDEBUG => $ENV{PTDEBUG} || 0;
11665
11666use Data::Dumper;
11667$Data::Dumper::Indent    = 1;
11668$Data::Dumper::Sortkeys  = 1;
11669$Data::Dumper::Quotekeys = 0;
11670use Time::HiRes qw(time);
11671
11672sub new {
11673   my ( $class, %args ) = @_;
11674   my @required_args = qw();
11675   foreach my $arg ( @required_args ) {
11676      die "I need a $arg argument" unless defined $args{$arg};
11677   }
11678
11679   my $self = {
11680      instrument        => PTDEBUG,
11681      continue_on_error => 0,
11682
11683      %args,
11684
11685      procs           => [],  # coderefs for pipeline processes
11686      names           => [],  # names for each ^ pipeline proc
11687      instrumentation => {    # keyed on proc index in procs
11688         Pipeline => {
11689            time  => 0,
11690            calls => 0,
11691         },
11692      },
11693   };
11694   return bless $self, $class;
11695}
11696
11697sub add {
11698   my ( $self, %args ) = @_;
11699   my @required_args = qw(process name);
11700   foreach my $arg ( @required_args ) {
11701      die "I need a $arg argument" unless defined $args{$arg};
11702   }
11703   my ($process, $name) = @args{@required_args};
11704
11705   push @{$self->{procs}}, $process;
11706   push @{$self->{names}}, $name;
11707   $self->{retries}->{$name} = $args{retry_on_error} || 100;
11708   if ( $self->{instrument} ) {
11709      $self->{instrumentation}->{$name} = { time => 0, calls => 0 };
11710   }
11711   PTDEBUG && _d("Added pipeline process", $name);
11712
11713   return;
11714}
11715
11716sub processes {
11717   my ( $self ) = @_;
11718   return @{$self->{names}};
11719}
11720
11721sub execute {
11722   my ( $self, %args ) = @_;
11723
11724   die "Cannot execute pipeline because no process have been added"
11725      unless scalar @{$self->{procs}};
11726
11727   my $oktorun = $args{oktorun};
11728   die "I need an oktorun argument" unless $oktorun;
11729   die '$oktorun argument must be a reference' unless ref $oktorun;
11730
11731   my $pipeline_data = $args{pipeline_data} || {};
11732   $pipeline_data->{oktorun} = $oktorun;
11733
11734   my $stats = $args{stats};  # optional
11735
11736   PTDEBUG && _d("Pipeline starting at", time);
11737   my $instrument = $self->{instrument};
11738   my $processes  = $self->{procs};
11739   EVENT:
11740   while ( $$oktorun ) {
11741      my $procno  = 0;  # so we can see which proc if one causes an error
11742      my $output;
11743      eval {
11744         PIPELINE_PROCESS:
11745         while ( $procno < scalar @{$self->{procs}} ) {
11746            my $call_start = $instrument ? time : 0;
11747
11748            PTDEBUG && _d("Pipeline process", $self->{names}->[$procno]);
11749            $output = $processes->[$procno]->($pipeline_data);
11750
11751            if ( $instrument ) {
11752               my $call_end = time;
11753               my $call_t   = $call_end - $call_start;
11754               $self->{instrumentation}->{$self->{names}->[$procno]}->{time} += $call_t;
11755               $self->{instrumentation}->{$self->{names}->[$procno]}->{count}++;
11756               $self->{instrumentation}->{Pipeline}->{time} += $call_t;
11757               $self->{instrumentation}->{Pipeline}->{count}++;
11758            }
11759            if ( !$output ) {
11760               PTDEBUG && _d("Pipeline restarting early after",
11761                  $self->{names}->[$procno]);
11762               if ( $stats ) {
11763                  $stats->{"pipeline_restarted_after_"
11764                     .$self->{names}->[$procno]}++;
11765               }
11766               last PIPELINE_PROCESS;
11767            }
11768            $procno++;
11769         }
11770      };
11771      if ( $EVAL_ERROR ) {
11772         my $name = $self->{names}->[$procno] || "";
11773         my $msg  = "Pipeline process " . ($procno + 1)
11774                  . " ($name) caused an error: "
11775                  . $EVAL_ERROR;
11776         if ( !$self->{continue_on_error} ) {
11777            die $msg . "Terminating pipeline because --continue-on-error "
11778               . "is false.\n";
11779         }
11780         elsif ( defined $self->{retries}->{$name} ) {
11781            my $n = $self->{retries}->{$name};
11782            if ( $n ) {
11783               warn $msg . "Will retry pipeline process $procno ($name) "
11784                  . "$n more " . ($n > 1 ? "times" : "time") . ".\n";
11785               $self->{retries}->{$name}--;
11786            }
11787            else {
11788               die $msg . "Terminating pipeline because process $procno "
11789                  . "($name) caused too many errors.\n";
11790            }
11791         }
11792         else {
11793            warn $msg;
11794         }
11795      }
11796   }
11797
11798   PTDEBUG && _d("Pipeline stopped at", time);
11799   return;
11800}
11801
11802sub instrumentation {
11803   my ( $self ) = @_;
11804   return $self->{instrumentation};
11805}
11806
11807sub reset {
11808   my ( $self ) = @_;
11809   foreach my $proc_name ( @{$self->{names}} ) {
11810      if ( exists $self->{instrumentation}->{$proc_name} ) {
11811         $self->{instrumentation}->{$proc_name}->{calls} = 0;
11812         $self->{instrumentation}->{$proc_name}->{time}  = 0;
11813      }
11814   }
11815   $self->{instrumentation}->{Pipeline}->{calls} = 0;
11816   $self->{instrumentation}->{Pipeline}->{time}  = 0;
11817   return;
11818}
11819
11820sub _d {
11821   my ($package, undef, $line) = caller 0;
11822   @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
11823        map { defined $_ ? $_ : 'undef' }
11824        @_;
11825   print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
11826}
11827
118281;
11829}
11830# ###########################################################################
11831# End Pipeline package
11832# ###########################################################################
11833
11834# ###########################################################################
11835# HTTP::Micro package
11836# This package is a copy without comments from the original.  The original
11837# with comments and its test file can be found in the Bazaar repository at,
11838#   lib/HTTP/Micro.pm
11839#   t/lib/HTTP/Micro.t
11840# See https://launchpad.net/percona-toolkit for more information.
11841# ###########################################################################
11842{
11843package HTTP::Micro;
11844
11845our $VERSION = '0.01';
11846
11847use strict;
11848use warnings FATAL => 'all';
11849use English qw(-no_match_vars);
11850use Carp ();
11851
11852my @attributes;
11853BEGIN {
11854    @attributes = qw(agent timeout);
11855    no strict 'refs';
11856    for my $accessor ( @attributes ) {
11857        *{$accessor} = sub {
11858            @_ > 1 ? $_[0]->{$accessor} = $_[1] : $_[0]->{$accessor};
11859        };
11860    }
11861}
11862
11863sub new {
11864    my($class, %args) = @_;
11865    (my $agent = $class) =~ s{::}{-}g;
11866    my $self = {
11867        agent        => $agent . "/" . ($class->VERSION || 0),
11868        timeout      => 60,
11869    };
11870    for my $key ( @attributes ) {
11871        $self->{$key} = $args{$key} if exists $args{$key}
11872    }
11873    return bless $self, $class;
11874}
11875
11876my %DefaultPort = (
11877    http => 80,
11878    https => 443,
11879);
11880
11881sub request {
11882    my ($self, $method, $url, $args) = @_;
11883    @_ == 3 || (@_ == 4 && ref $args eq 'HASH')
11884      or Carp::croak(q/Usage: $http->request(METHOD, URL, [HASHREF])/);
11885    $args ||= {}; # we keep some state in this during _request
11886
11887    my $response;
11888    for ( 0 .. 1 ) {
11889        $response = eval { $self->_request($method, $url, $args) };
11890        last unless $@ && $method eq 'GET'
11891            && $@ =~ m{^(?:Socket closed|Unexpected end)};
11892    }
11893
11894    if (my $e = "$@") {
11895        $response = {
11896            success => q{},
11897            status  => 599,
11898            reason  => 'Internal Exception',
11899            content => $e,
11900            headers => {
11901                'content-type'   => 'text/plain',
11902                'content-length' => length $e,
11903            }
11904        };
11905    }
11906    return $response;
11907}
11908
11909sub _request {
11910    my ($self, $method, $url, $args) = @_;
11911
11912    my ($scheme, $host, $port, $path_query) = $self->_split_url($url);
11913
11914    my $request = {
11915        method    => $method,
11916        scheme    => $scheme,
11917        host_port => ($port == $DefaultPort{$scheme} ? $host : "$host:$port"),
11918        uri       => $path_query,
11919        headers   => {},
11920    };
11921
11922    my $handle  = HTTP::Micro::Handle->new(timeout => $self->{timeout});
11923
11924    $handle->connect($scheme, $host, $port);
11925
11926    $self->_prepare_headers_and_cb($request, $args);
11927    $handle->write_request_header(@{$request}{qw/method uri headers/});
11928    $handle->write_content_body($request) if $request->{content};
11929
11930    my $response;
11931    do { $response = $handle->read_response_header }
11932        until (substr($response->{status},0,1) ne '1');
11933
11934    if (!($method eq 'HEAD' || $response->{status} =~ /^[23]04/)) {
11935        $response->{content} = '';
11936        $handle->read_content_body(sub { $_[1]->{content} .= $_[0] }, $response);
11937    }
11938
11939    $handle->close;
11940    $response->{success} = substr($response->{status},0,1) eq '2';
11941    return $response;
11942}
11943
11944sub _prepare_headers_and_cb {
11945    my ($self, $request, $args) = @_;
11946
11947    for ($args->{headers}) {
11948        next unless defined;
11949        while (my ($k, $v) = each %$_) {
11950            $request->{headers}{lc $k} = $v;
11951        }
11952    }
11953    $request->{headers}{'host'}         = $request->{host_port};
11954    $request->{headers}{'connection'}   = "close";
11955    $request->{headers}{'user-agent'} ||= $self->{agent};
11956
11957    if (defined $args->{content}) {
11958        $request->{headers}{'content-type'} ||= "application/octet-stream";
11959        utf8::downgrade($args->{content}, 1)
11960            or Carp::croak(q/Wide character in request message body/);
11961        $request->{headers}{'content-length'} = length $args->{content};
11962        $request->{content} = $args->{content};
11963    }
11964    return;
11965}
11966
11967sub _split_url {
11968    my $url = pop;
11969
11970    my ($scheme, $authority, $path_query) = $url =~ m<\A([^:/?#]+)://([^/?#]*)([^#]*)>
11971      or Carp::croak(qq/Cannot parse URL: '$url'/);
11972
11973    $scheme     = lc $scheme;
11974    $path_query = "/$path_query" unless $path_query =~ m<\A/>;
11975
11976    my $host = (length($authority)) ? lc $authority : 'localhost';
11977       $host =~ s/\A[^@]*@//;   # userinfo
11978    my $port = do {
11979       $host =~ s/:([0-9]*)\z// && length $1
11980         ? $1
11981         : $DefaultPort{$scheme}
11982    };
11983
11984    return ($scheme, $host, $port, $path_query);
11985}
11986
11987} # HTTP::Micro
11988
11989{
11990   package HTTP::Micro::Handle;
11991
11992   use strict;
11993   use warnings FATAL => 'all';
11994   use English qw(-no_match_vars);
11995
11996   use Carp       qw(croak);
11997   use Errno      qw(EINTR EPIPE);
11998   use IO::Socket qw(SOCK_STREAM);
11999
12000   sub BUFSIZE () { 32768 }
12001
12002   my $Printable = sub {
12003       local $_ = shift;
12004       s/\r/\\r/g;
12005       s/\n/\\n/g;
12006       s/\t/\\t/g;
12007       s/([^\x20-\x7E])/sprintf('\\x%.2X', ord($1))/ge;
12008       $_;
12009   };
12010
12011   sub new {
12012       my ($class, %args) = @_;
12013       return bless {
12014           rbuf          => '',
12015           timeout       => 60,
12016           max_line_size => 16384,
12017           %args
12018       }, $class;
12019   }
12020
12021   my $ssl_verify_args = {
12022       check_cn         => "when_only",
12023       wildcards_in_alt => "anywhere",
12024       wildcards_in_cn  => "anywhere"
12025   };
12026
12027   sub connect {
12028       @_ == 4 || croak(q/Usage: $handle->connect(scheme, host, port)/);
12029       my ($self, $scheme, $host, $port) = @_;
12030
12031       if ( $scheme eq 'https' ) {
12032           eval "require IO::Socket::SSL"
12033               unless exists $INC{'IO/Socket/SSL.pm'};
12034           croak(qq/IO::Socket::SSL must be installed for https support\n/)
12035               unless $INC{'IO/Socket/SSL.pm'};
12036       }
12037       elsif ( $scheme ne 'http' ) {
12038         croak(qq/Unsupported URL scheme '$scheme'\n/);
12039       }
12040
12041       $self->{fh} = IO::Socket::INET->new(
12042           PeerHost  => $host,
12043           PeerPort  => $port,
12044           Proto     => 'tcp',
12045           Type      => SOCK_STREAM,
12046           Timeout   => $self->{timeout}
12047       ) or croak(qq/Could not connect to '$host:$port': $@/);
12048
12049       binmode($self->{fh})
12050         or croak(qq/Could not binmode() socket: '$!'/);
12051
12052       if ( $scheme eq 'https') {
12053           IO::Socket::SSL->start_SSL($self->{fh});
12054           ref($self->{fh}) eq 'IO::Socket::SSL'
12055               or die(qq/SSL connection failed for $host\n/);
12056           if ( $self->{fh}->can("verify_hostname") ) {
12057               $self->{fh}->verify_hostname( $host, $ssl_verify_args )
12058                  or die(qq/SSL certificate not valid for $host\n/);
12059           }
12060           else {
12061            my $fh = $self->{fh};
12062            _verify_hostname_of_cert($host, _peer_certificate($fh), $ssl_verify_args)
12063                  or die(qq/SSL certificate not valid for $host\n/);
12064            }
12065       }
12066
12067       $self->{host} = $host;
12068       $self->{port} = $port;
12069
12070       return $self;
12071   }
12072
12073   sub close {
12074       @_ == 1 || croak(q/Usage: $handle->close()/);
12075       my ($self) = @_;
12076       CORE::close($self->{fh})
12077         or croak(qq/Could not close socket: '$!'/);
12078   }
12079
12080   sub write {
12081       @_ == 2 || croak(q/Usage: $handle->write(buf)/);
12082       my ($self, $buf) = @_;
12083
12084       my $len = length $buf;
12085       my $off = 0;
12086
12087       local $SIG{PIPE} = 'IGNORE';
12088
12089       while () {
12090           $self->can_write
12091             or croak(q/Timed out while waiting for socket to become ready for writing/);
12092           my $r = syswrite($self->{fh}, $buf, $len, $off);
12093           if (defined $r) {
12094               $len -= $r;
12095               $off += $r;
12096               last unless $len > 0;
12097           }
12098           elsif ($! == EPIPE) {
12099               croak(qq/Socket closed by remote server: $!/);
12100           }
12101           elsif ($! != EINTR) {
12102               croak(qq/Could not write to socket: '$!'/);
12103           }
12104       }
12105       return $off;
12106   }
12107
12108   sub read {
12109       @_ == 2 || @_ == 3 || croak(q/Usage: $handle->read(len)/);
12110       my ($self, $len) = @_;
12111
12112       my $buf  = '';
12113       my $got = length $self->{rbuf};
12114
12115       if ($got) {
12116           my $take = ($got < $len) ? $got : $len;
12117           $buf  = substr($self->{rbuf}, 0, $take, '');
12118           $len -= $take;
12119       }
12120
12121       while ($len > 0) {
12122           $self->can_read
12123             or croak(q/Timed out while waiting for socket to become ready for reading/);
12124           my $r = sysread($self->{fh}, $buf, $len, length $buf);
12125           if (defined $r) {
12126               last unless $r;
12127               $len -= $r;
12128           }
12129           elsif ($! != EINTR) {
12130               croak(qq/Could not read from socket: '$!'/);
12131           }
12132       }
12133       if ($len) {
12134           croak(q/Unexpected end of stream/);
12135       }
12136       return $buf;
12137   }
12138
12139   sub readline {
12140       @_ == 1 || croak(q/Usage: $handle->readline()/);
12141       my ($self) = @_;
12142
12143       while () {
12144           if ($self->{rbuf} =~ s/\A ([^\x0D\x0A]* \x0D?\x0A)//x) {
12145               return $1;
12146           }
12147           $self->can_read
12148             or croak(q/Timed out while waiting for socket to become ready for reading/);
12149           my $r = sysread($self->{fh}, $self->{rbuf}, BUFSIZE, length $self->{rbuf});
12150           if (defined $r) {
12151               last unless $r;
12152           }
12153           elsif ($! != EINTR) {
12154               croak(qq/Could not read from socket: '$!'/);
12155           }
12156       }
12157       croak(q/Unexpected end of stream while looking for line/);
12158   }
12159
12160   sub read_header_lines {
12161       @_ == 1 || @_ == 2 || croak(q/Usage: $handle->read_header_lines([headers])/);
12162       my ($self, $headers) = @_;
12163       $headers ||= {};
12164       my $lines   = 0;
12165       my $val;
12166
12167       while () {
12168            my $line = $self->readline;
12169
12170            if ($line =~ /\A ([^\x00-\x1F\x7F:]+) : [\x09\x20]* ([^\x0D\x0A]*)/x) {
12171                my ($field_name) = lc $1;
12172                $val = \($headers->{$field_name} = $2);
12173            }
12174            elsif ($line =~ /\A [\x09\x20]+ ([^\x0D\x0A]*)/x) {
12175                $val
12176                  or croak(q/Unexpected header continuation line/);
12177                next unless length $1;
12178                $$val .= ' ' if length $$val;
12179                $$val .= $1;
12180            }
12181            elsif ($line =~ /\A \x0D?\x0A \z/x) {
12182               last;
12183            }
12184            else {
12185               croak(q/Malformed header line: / . $Printable->($line));
12186            }
12187       }
12188       return $headers;
12189   }
12190
12191   sub write_header_lines {
12192       (@_ == 2 && ref $_[1] eq 'HASH') || croak(q/Usage: $handle->write_header_lines(headers)/);
12193       my($self, $headers) = @_;
12194
12195       my $buf = '';
12196       while (my ($k, $v) = each %$headers) {
12197           my $field_name = lc $k;
12198            $field_name =~ /\A [\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7A\x7C\x7E]+ \z/x
12199               or croak(q/Invalid HTTP header field name: / . $Printable->($field_name));
12200            $field_name =~ s/\b(\w)/\u$1/g;
12201            $buf .= "$field_name: $v\x0D\x0A";
12202       }
12203       $buf .= "\x0D\x0A";
12204       return $self->write($buf);
12205   }
12206
12207   sub read_content_body {
12208       @_ == 3 || @_ == 4 || croak(q/Usage: $handle->read_content_body(callback, response, [read_length])/);
12209       my ($self, $cb, $response, $len) = @_;
12210       $len ||= $response->{headers}{'content-length'};
12211
12212       croak("No content-length in the returned response, and this "
12213           . "UA doesn't implement chunking") unless defined $len;
12214
12215       while ($len > 0) {
12216           my $read = ($len > BUFSIZE) ? BUFSIZE : $len;
12217           $cb->($self->read($read), $response);
12218           $len -= $read;
12219       }
12220
12221       return;
12222   }
12223
12224   sub write_content_body {
12225       @_ == 2 || croak(q/Usage: $handle->write_content_body(request)/);
12226       my ($self, $request) = @_;
12227       my ($len, $content_length) = (0, $request->{headers}{'content-length'});
12228
12229       $len += $self->write($request->{content});
12230
12231       $len == $content_length
12232         or croak(qq/Content-Length missmatch (got: $len expected: $content_length)/);
12233
12234       return $len;
12235   }
12236
12237   sub read_response_header {
12238       @_ == 1 || croak(q/Usage: $handle->read_response_header()/);
12239       my ($self) = @_;
12240
12241       my $line = $self->readline;
12242
12243       $line =~ /\A (HTTP\/(0*\d+\.0*\d+)) [\x09\x20]+ ([0-9]{3}) [\x09\x20]+ ([^\x0D\x0A]*) \x0D?\x0A/x
12244         or croak(q/Malformed Status-Line: / . $Printable->($line));
12245
12246       my ($protocol, $version, $status, $reason) = ($1, $2, $3, $4);
12247
12248       return {
12249           status   => $status,
12250           reason   => $reason,
12251           headers  => $self->read_header_lines,
12252           protocol => $protocol,
12253       };
12254   }
12255
12256   sub write_request_header {
12257       @_ == 4 || croak(q/Usage: $handle->write_request_header(method, request_uri, headers)/);
12258       my ($self, $method, $request_uri, $headers) = @_;
12259
12260       return $self->write("$method $request_uri HTTP/1.1\x0D\x0A")
12261            + $self->write_header_lines($headers);
12262   }
12263
12264   sub _do_timeout {
12265       my ($self, $type, $timeout) = @_;
12266       $timeout = $self->{timeout}
12267           unless defined $timeout && $timeout >= 0;
12268
12269       my $fd = fileno $self->{fh};
12270       defined $fd && $fd >= 0
12271         or croak(q/select(2): 'Bad file descriptor'/);
12272
12273       my $initial = time;
12274       my $pending = $timeout;
12275       my $nfound;
12276
12277       vec(my $fdset = '', $fd, 1) = 1;
12278
12279       while () {
12280           $nfound = ($type eq 'read')
12281               ? select($fdset, undef, undef, $pending)
12282               : select(undef, $fdset, undef, $pending) ;
12283           if ($nfound == -1) {
12284               $! == EINTR
12285                 or croak(qq/select(2): '$!'/);
12286               redo if !$timeout || ($pending = $timeout - (time - $initial)) > 0;
12287               $nfound = 0;
12288           }
12289           last;
12290       }
12291       $! = 0;
12292       return $nfound;
12293   }
12294
12295   sub can_read {
12296       @_ == 1 || @_ == 2 || croak(q/Usage: $handle->can_read([timeout])/);
12297       my $self = shift;
12298       return $self->_do_timeout('read', @_)
12299   }
12300
12301   sub can_write {
12302       @_ == 1 || @_ == 2 || croak(q/Usage: $handle->can_write([timeout])/);
12303       my $self = shift;
12304       return $self->_do_timeout('write', @_)
12305   }
12306}  # HTTP::Micro::Handle
12307
12308my $prog = <<'EOP';
12309BEGIN {
12310   if ( defined &IO::Socket::SSL::CAN_IPV6 ) {
12311      *CAN_IPV6 = \*IO::Socket::SSL::CAN_IPV6;
12312   }
12313   else {
12314      constant->import( CAN_IPV6 => '' );
12315   }
12316   my %const = (
12317      NID_CommonName => 13,
12318      GEN_DNS => 2,
12319      GEN_IPADD => 7,
12320   );
12321   while ( my ($name,$value) = each %const ) {
12322      no strict 'refs';
12323      *{$name} = UNIVERSAL::can( 'Net::SSLeay', $name ) || sub { $value };
12324   }
12325}
12326{
12327   use Carp qw(croak);
12328   my %dispatcher = (
12329      issuer =>  sub { Net::SSLeay::X509_NAME_oneline( Net::SSLeay::X509_get_issuer_name( shift )) },
12330      subject => sub { Net::SSLeay::X509_NAME_oneline( Net::SSLeay::X509_get_subject_name( shift )) },
12331   );
12332   if ( $Net::SSLeay::VERSION >= 1.30 ) {
12333      $dispatcher{commonName} = sub {
12334         my $cn = Net::SSLeay::X509_NAME_get_text_by_NID(
12335            Net::SSLeay::X509_get_subject_name( shift ), NID_CommonName);
12336         $cn =~s{\0$}{}; # work around Bug in Net::SSLeay <1.33
12337         $cn;
12338      }
12339   } else {
12340      $dispatcher{commonName} = sub {
12341         croak "you need at least Net::SSLeay version 1.30 for getting commonName"
12342      }
12343   }
12344
12345   if ( $Net::SSLeay::VERSION >= 1.33 ) {
12346      $dispatcher{subjectAltNames} = sub { Net::SSLeay::X509_get_subjectAltNames( shift ) };
12347   } else {
12348      $dispatcher{subjectAltNames} = sub {
12349         return;
12350      };
12351   }
12352
12353   $dispatcher{authority} = $dispatcher{issuer};
12354   $dispatcher{owner}     = $dispatcher{subject};
12355   $dispatcher{cn}        = $dispatcher{commonName};
12356
12357   sub _peer_certificate {
12358      my ($self, $field) = @_;
12359      my $ssl = $self->_get_ssl_object or return;
12360
12361      my $cert = ${*$self}{_SSL_certificate}
12362         ||= Net::SSLeay::get_peer_certificate($ssl)
12363         or return $self->error("Could not retrieve peer certificate");
12364
12365      if ($field) {
12366         my $sub = $dispatcher{$field} or croak
12367            "invalid argument for peer_certificate, valid are: ".join( " ",keys %dispatcher ).
12368            "\nMaybe you need to upgrade your Net::SSLeay";
12369         return $sub->($cert);
12370      } else {
12371         return $cert
12372      }
12373   }
12374
12375
12376   my %scheme = (
12377      ldap => {
12378         wildcards_in_cn    => 0,
12379         wildcards_in_alt => 'leftmost',
12380         check_cn         => 'always',
12381      },
12382      http => {
12383         wildcards_in_cn    => 'anywhere',
12384         wildcards_in_alt => 'anywhere',
12385         check_cn         => 'when_only',
12386      },
12387      smtp => {
12388         wildcards_in_cn    => 0,
12389         wildcards_in_alt => 0,
12390         check_cn         => 'always'
12391      },
12392      none => {}, # do not check
12393   );
12394
12395   $scheme{www}  = $scheme{http}; # alias
12396   $scheme{xmpp} = $scheme{http}; # rfc 3920
12397   $scheme{pop3} = $scheme{ldap}; # rfc 2595
12398   $scheme{imap} = $scheme{ldap}; # rfc 2595
12399   $scheme{acap} = $scheme{ldap}; # rfc 2595
12400   $scheme{nntp} = $scheme{ldap}; # rfc 4642
12401   $scheme{ftp}  = $scheme{http}; # rfc 4217
12402
12403
12404   sub _verify_hostname_of_cert {
12405      my $identity = shift;
12406      my $cert = shift;
12407      my $scheme = shift || 'none';
12408      if ( ! ref($scheme) ) {
12409         $scheme = $scheme{$scheme} or croak "scheme $scheme not defined";
12410      }
12411
12412      return 1 if ! %$scheme; # 'none'
12413
12414      my $commonName = $dispatcher{cn}->($cert);
12415      my @altNames   = $dispatcher{subjectAltNames}->($cert);
12416
12417      if ( my $sub = $scheme->{callback} ) {
12418         return $sub->($identity,$commonName,@altNames);
12419      }
12420
12421
12422      my $ipn;
12423      if ( CAN_IPV6 and $identity =~m{:} ) {
12424         $ipn = IO::Socket::SSL::inet_pton(IO::Socket::SSL::AF_INET6,$identity)
12425            or croak "'$identity' is not IPv6, but neither IPv4 nor hostname";
12426      } elsif ( $identity =~m{^\d+\.\d+\.\d+\.\d+$} ) {
12427         $ipn = IO::Socket::SSL::inet_aton( $identity ) or croak "'$identity' is not IPv4, but neither IPv6 nor hostname";
12428      } else {
12429         if ( $identity =~m{[^a-zA-Z0-9_.\-]} ) {
12430            $identity =~m{\0} and croak("name '$identity' has \\0 byte");
12431            $identity = IO::Socket::SSL::idn_to_ascii($identity) or
12432               croak "Warning: Given name '$identity' could not be converted to IDNA!";
12433         }
12434      }
12435
12436      my $check_name = sub {
12437         my ($name,$identity,$wtyp) = @_;
12438         $wtyp ||= '';
12439         my $pattern;
12440         if ( $wtyp eq 'anywhere' and $name =~m{^([a-zA-Z0-9_\-]*)\*(.+)} ) {
12441            $pattern = qr{^\Q$1\E[a-zA-Z0-9_\-]*\Q$2\E$}i;
12442         } elsif ( $wtyp eq 'leftmost' and $name =~m{^\*(\..+)$} ) {
12443            $pattern = qr{^[a-zA-Z0-9_\-]*\Q$1\E$}i;
12444         } else {
12445            $pattern = qr{^\Q$name\E$}i;
12446         }
12447         return $identity =~ $pattern;
12448      };
12449
12450      my $alt_dnsNames = 0;
12451      while (@altNames) {
12452         my ($type, $name) = splice (@altNames, 0, 2);
12453         if ( $ipn and $type == GEN_IPADD ) {
12454            return 1 if $ipn eq $name;
12455
12456         } elsif ( ! $ipn and $type == GEN_DNS ) {
12457            $name =~s/\s+$//; $name =~s/^\s+//;
12458            $alt_dnsNames++;
12459            $check_name->($name,$identity,$scheme->{wildcards_in_alt})
12460               and return 1;
12461         }
12462      }
12463
12464      if ( ! $ipn and (
12465         $scheme->{check_cn} eq 'always' or
12466         $scheme->{check_cn} eq 'when_only' and !$alt_dnsNames)) {
12467         $check_name->($commonName,$identity,$scheme->{wildcards_in_cn})
12468            and return 1;
12469      }
12470
12471      return 0; # no match
12472   }
12473}
12474EOP
12475
12476eval { require IO::Socket::SSL };
12477if ( $INC{"IO/Socket/SSL.pm"} ) {
12478   eval $prog;
12479   die $@ if $@;
12480}
12481
124821;
12483# ###########################################################################
12484# End HTTP::Micro package
12485# ###########################################################################
12486
12487# ###########################################################################
12488# VersionCheck package
12489# This package is a copy without comments from the original.  The original
12490# with comments and its test file can be found in the Bazaar repository at,
12491#   lib/VersionCheck.pm
12492#   t/lib/VersionCheck.t
12493# See https://launchpad.net/percona-toolkit for more information.
12494# ###########################################################################
12495{
12496package VersionCheck;
12497
12498
12499use strict;
12500use warnings FATAL => 'all';
12501use English qw(-no_match_vars);
12502
12503use constant PTDEBUG => $ENV{PTDEBUG} || 0;
12504
12505use Data::Dumper;
12506local $Data::Dumper::Indent    = 1;
12507local $Data::Dumper::Sortkeys  = 1;
12508local $Data::Dumper::Quotekeys = 0;
12509
12510use Digest::MD5 qw(md5_hex);
12511use Sys::Hostname qw(hostname);
12512use File::Basename qw();
12513use File::Spec;
12514use FindBin qw();
12515
12516eval {
12517   require Percona::Toolkit;
12518   require HTTP::Micro;
12519};
12520
12521my $home    = $ENV{HOME} || $ENV{HOMEPATH} || $ENV{USERPROFILE} || '.';
12522my @vc_dirs = (
12523   '/etc/percona',
12524   '/etc/percona-toolkit',
12525   '/tmp',
12526   "$home",
12527);
12528
12529{
12530   my $file    = 'percona-version-check';
12531
12532   sub version_check_file {
12533      foreach my $dir ( @vc_dirs ) {
12534         if ( -d $dir && -w $dir ) {
12535            PTDEBUG && _d('Version check file', $file, 'in', $dir);
12536            return $dir . '/' . $file;
12537         }
12538      }
12539      PTDEBUG && _d('Version check file', $file, 'in', $ENV{PWD});
12540      return $file;  # in the CWD
12541   }
12542}
12543
12544sub version_check_time_limit {
12545   return 60 * 60 * 24;  # one day
12546}
12547
12548
12549sub version_check {
12550   my (%args) = @_;
12551
12552   my $instances = $args{instances} || [];
12553   my $instances_to_check;
12554
12555   PTDEBUG && _d('FindBin::Bin:', $FindBin::Bin);
12556   if ( !$args{force} ) {
12557      if ( $FindBin::Bin
12558           && (-d "$FindBin::Bin/../.bzr"    ||
12559               -d "$FindBin::Bin/../../.bzr" ||
12560               -d "$FindBin::Bin/../.git"    ||
12561               -d "$FindBin::Bin/../../.git"
12562              )
12563         ) {
12564         PTDEBUG && _d("$FindBin::Bin/../.bzr disables --version-check");
12565         return;
12566      }
12567   }
12568
12569   eval {
12570      foreach my $instance ( @$instances ) {
12571         my ($name, $id) = get_instance_id($instance);
12572         $instance->{name} = $name;
12573         $instance->{id}   = $id;
12574      }
12575
12576      push @$instances, { name => 'system', id => 0 };
12577
12578      $instances_to_check = get_instances_to_check(
12579         instances => $instances,
12580         vc_file   => $args{vc_file},  # testing
12581         now       => $args{now},      # testing
12582      );
12583      PTDEBUG && _d(scalar @$instances_to_check, 'instances to check');
12584      return unless @$instances_to_check;
12585
12586      my $protocol = 'https';
12587      eval { require IO::Socket::SSL; };
12588      if ( $EVAL_ERROR ) {
12589         PTDEBUG && _d($EVAL_ERROR);
12590         PTDEBUG && _d("SSL not available, won't run version_check");
12591         return;
12592      }
12593      PTDEBUG && _d('Using', $protocol);
12594
12595      my $advice = pingback(
12596         instances => $instances_to_check,
12597         protocol  => $protocol,
12598         url       => $args{url}                       # testing
12599                   || $ENV{PERCONA_VERSION_CHECK_URL}  # testing
12600                   || "$protocol://v.percona.com",
12601      );
12602      if ( $advice ) {
12603         PTDEBUG && _d('Advice:', Dumper($advice));
12604         if ( scalar @$advice > 1) {
12605            print "\n# " . scalar @$advice . " software updates are "
12606               . "available:\n";
12607         }
12608         else {
12609            print "\n# A software update is available:\n";
12610         }
12611         print join("\n", map { "#   * $_" } @$advice), "\n\n";
12612      }
12613   };
12614   if ( $EVAL_ERROR ) {
12615      PTDEBUG && _d('Version check failed:', $EVAL_ERROR);
12616   }
12617
12618   if ( @$instances_to_check ) {
12619      eval {
12620         update_check_times(
12621            instances => $instances_to_check,
12622            vc_file   => $args{vc_file},  # testing
12623            now       => $args{now},      # testing
12624         );
12625      };
12626      if ( $EVAL_ERROR ) {
12627         PTDEBUG && _d('Error updating version check file:', $EVAL_ERROR);
12628      }
12629   }
12630
12631   if ( $ENV{PTDEBUG_VERSION_CHECK} ) {
12632      warn "Exiting because the PTDEBUG_VERSION_CHECK "
12633         . "environment variable is defined.\n";
12634      exit 255;
12635   }
12636
12637   return;
12638}
12639
12640sub get_instances_to_check {
12641   my (%args) = @_;
12642
12643   my $instances = $args{instances};
12644   my $now       = $args{now}     || int(time);
12645   my $vc_file   = $args{vc_file} || version_check_file();
12646
12647   if ( !-f $vc_file ) {
12648      PTDEBUG && _d('Version check file', $vc_file, 'does not exist;',
12649         'version checking all instances');
12650      return $instances;
12651   }
12652
12653   open my $fh, '<', $vc_file or die "Cannot open $vc_file: $OS_ERROR";
12654   chomp(my $file_contents = do { local $/ = undef; <$fh> });
12655   PTDEBUG && _d('Version check file', $vc_file, 'contents:', $file_contents);
12656   close $fh;
12657   my %last_check_time_for = $file_contents =~ /^([^,]+),(.+)$/mg;
12658
12659   my $check_time_limit = version_check_time_limit();
12660   my @instances_to_check;
12661   foreach my $instance ( @$instances ) {
12662      my $last_check_time = $last_check_time_for{ $instance->{id} };
12663      PTDEBUG && _d('Intsance', $instance->{id}, 'last checked',
12664         $last_check_time, 'now', $now, 'diff', $now - ($last_check_time || 0),
12665         'hours until next check',
12666         sprintf '%.2f',
12667            ($check_time_limit - ($now - ($last_check_time || 0))) / 3600);
12668      if ( !defined $last_check_time
12669           || ($now - $last_check_time) >= $check_time_limit ) {
12670         PTDEBUG && _d('Time to check', Dumper($instance));
12671         push @instances_to_check, $instance;
12672      }
12673   }
12674
12675   return \@instances_to_check;
12676}
12677
12678sub update_check_times {
12679   my (%args) = @_;
12680
12681   my $instances = $args{instances};
12682   my $now       = $args{now}     || int(time);
12683   my $vc_file   = $args{vc_file} || version_check_file();
12684   PTDEBUG && _d('Updating last check time:', $now);
12685
12686   my %all_instances = map {
12687      $_->{id} => { name => $_->{name}, ts => $now }
12688   } @$instances;
12689
12690   if ( -f $vc_file ) {
12691      open my $fh, '<', $vc_file or die "Cannot read $vc_file: $OS_ERROR";
12692      my $contents = do { local $/ = undef; <$fh> };
12693      close $fh;
12694
12695      foreach my $line ( split("\n", ($contents || '')) ) {
12696         my ($id, $ts) = split(',', $line);
12697         if ( !exists $all_instances{$id} ) {
12698            $all_instances{$id} = { ts => $ts };  # original ts, not updated
12699         }
12700      }
12701   }
12702
12703   open my $fh, '>', $vc_file or die "Cannot write to $vc_file: $OS_ERROR";
12704   foreach my $id ( sort keys %all_instances ) {
12705      PTDEBUG && _d('Updated:', $id, Dumper($all_instances{$id}));
12706      print { $fh } $id . ',' . $all_instances{$id}->{ts} . "\n";
12707   }
12708   close $fh;
12709
12710   return;
12711}
12712
12713sub get_instance_id {
12714   my ($instance) = @_;
12715
12716   my $dbh = $instance->{dbh};
12717   my $dsn = $instance->{dsn};
12718
12719   my $sql = q{SELECT CONCAT(@@hostname, @@port)};
12720   PTDEBUG && _d($sql);
12721   my ($name) = eval { $dbh->selectrow_array($sql) };
12722   if ( $EVAL_ERROR ) {
12723      PTDEBUG && _d($EVAL_ERROR);
12724      $sql = q{SELECT @@hostname};
12725      PTDEBUG && _d($sql);
12726      ($name) = eval { $dbh->selectrow_array($sql) };
12727      if ( $EVAL_ERROR ) {
12728         PTDEBUG && _d($EVAL_ERROR);
12729         $name = ($dsn->{h} || 'localhost') . ($dsn->{P} || 3306);
12730      }
12731      else {
12732         $sql = q{SHOW VARIABLES LIKE 'port'};
12733         PTDEBUG && _d($sql);
12734         my (undef, $port) = eval { $dbh->selectrow_array($sql) };
12735         PTDEBUG && _d('port:', $port);
12736         $name .= $port || '';
12737      }
12738   }
12739   my $id = md5_hex($name);
12740
12741   PTDEBUG && _d('MySQL instance:', $id, $name, Dumper($dsn));
12742
12743   return $name, $id;
12744}
12745
12746
12747sub get_uuid {
12748    my $uuid_file = '/.percona-toolkit.uuid';
12749    foreach my $dir (@vc_dirs) {
12750        my $filename = $dir.$uuid_file;
12751        my $uuid=_read_uuid($filename);
12752        return $uuid if $uuid;
12753    }
12754
12755    my $filename = $ENV{"HOME"} . $uuid_file;
12756    my $uuid = _generate_uuid();
12757
12758    open(my $fh, '>', $filename) or die "Could not open file '$filename' $!";
12759    print $fh $uuid;
12760    close $fh;
12761
12762    return $uuid;
12763}
12764
12765sub _generate_uuid {
12766    return sprintf+($}="%04x")."$}-$}-$}-$}-".$}x3,map rand 65537,0..7;
12767}
12768
12769sub _read_uuid {
12770    my $filename = shift;
12771    my $fh;
12772
12773    eval {
12774        open($fh, '<:encoding(UTF-8)', $filename);
12775    };
12776    return if ($EVAL_ERROR);
12777
12778    my $uuid;
12779    eval { $uuid = <$fh>; };
12780    return if ($EVAL_ERROR);
12781
12782    chomp $uuid;
12783    return $uuid;
12784}
12785
12786
12787sub pingback {
12788   my (%args) = @_;
12789   my @required_args = qw(url instances);
12790   foreach my $arg ( @required_args ) {
12791      die "I need a $arg arugment" unless $args{$arg};
12792   }
12793   my $url       = $args{url};
12794   my $instances = $args{instances};
12795
12796   my $ua = $args{ua} || HTTP::Micro->new( timeout => 3 );
12797
12798   my $response = $ua->request('GET', $url);
12799   PTDEBUG && _d('Server response:', Dumper($response));
12800   die "No response from GET $url"
12801      if !$response;
12802   die("GET on $url returned HTTP status $response->{status}; expected 200\n",
12803       ($response->{content} || '')) if $response->{status} != 200;
12804   die("GET on $url did not return any programs to check")
12805      if !$response->{content};
12806
12807   my $items = parse_server_response(
12808      response => $response->{content}
12809   );
12810   die "Failed to parse server requested programs: $response->{content}"
12811      if !scalar keys %$items;
12812
12813   my $versions = get_versions(
12814      items     => $items,
12815      instances => $instances,
12816   );
12817   die "Failed to get any program versions; should have at least gotten Perl"
12818      if !scalar keys %$versions;
12819
12820   my $client_content = encode_client_response(
12821      items      => $items,
12822      versions   => $versions,
12823      general_id => get_uuid(),
12824   );
12825
12826   my $client_response = {
12827      headers => { "X-Percona-Toolkit-Tool" => File::Basename::basename($0) },
12828      content => $client_content,
12829   };
12830   PTDEBUG && _d('Client response:', Dumper($client_response));
12831
12832   $response = $ua->request('POST', $url, $client_response);
12833   PTDEBUG && _d('Server suggestions:', Dumper($response));
12834   die "No response from POST $url $client_response"
12835      if !$response;
12836   die "POST $url returned HTTP status $response->{status}; expected 200"
12837      if $response->{status} != 200;
12838
12839   return unless $response->{content};
12840
12841   $items = parse_server_response(
12842      response   => $response->{content},
12843      split_vars => 0,
12844   );
12845   die "Failed to parse server suggestions: $response->{content}"
12846      if !scalar keys %$items;
12847   my @suggestions = map { $_->{vars} }
12848                     sort { $a->{item} cmp $b->{item} }
12849                     values %$items;
12850
12851   return \@suggestions;
12852}
12853
12854sub encode_client_response {
12855   my (%args) = @_;
12856   my @required_args = qw(items versions general_id);
12857   foreach my $arg ( @required_args ) {
12858      die "I need a $arg arugment" unless $args{$arg};
12859   }
12860   my ($items, $versions, $general_id) = @args{@required_args};
12861
12862   my @lines;
12863   foreach my $item ( sort keys %$items ) {
12864      next unless exists $versions->{$item};
12865      if ( ref($versions->{$item}) eq 'HASH' ) {
12866         my $mysql_versions = $versions->{$item};
12867         for my $id ( sort keys %$mysql_versions ) {
12868            push @lines, join(';', $id, $item, $mysql_versions->{$id});
12869         }
12870      }
12871      else {
12872         push @lines, join(';', $general_id, $item, $versions->{$item});
12873      }
12874   }
12875
12876   my $client_response = join("\n", @lines) . "\n";
12877   return $client_response;
12878}
12879
12880sub parse_server_response {
12881   my (%args) = @_;
12882   my @required_args = qw(response);
12883   foreach my $arg ( @required_args ) {
12884      die "I need a $arg arugment" unless $args{$arg};
12885   }
12886   my ($response) = @args{@required_args};
12887
12888   my %items = map {
12889      my ($item, $type, $vars) = split(";", $_);
12890      if ( !defined $args{split_vars} || $args{split_vars} ) {
12891         $vars = [ split(",", ($vars || '')) ];
12892      }
12893      $item => {
12894         item => $item,
12895         type => $type,
12896         vars => $vars,
12897      };
12898   } split("\n", $response);
12899
12900   PTDEBUG && _d('Items:', Dumper(\%items));
12901
12902   return \%items;
12903}
12904
12905my %sub_for_type = (
12906   os_version          => \&get_os_version,
12907   perl_version        => \&get_perl_version,
12908   perl_module_version => \&get_perl_module_version,
12909   mysql_variable      => \&get_mysql_variable,
12910);
12911
12912sub valid_item {
12913   my ($item) = @_;
12914   return unless $item;
12915   if ( !exists $sub_for_type{ $item->{type} } ) {
12916      PTDEBUG && _d('Invalid type:', $item->{type});
12917      return 0;
12918   }
12919   return 1;
12920}
12921
12922sub get_versions {
12923   my (%args) = @_;
12924   my @required_args = qw(items);
12925   foreach my $arg ( @required_args ) {
12926      die "I need a $arg arugment" unless $args{$arg};
12927   }
12928   my ($items) = @args{@required_args};
12929
12930   my %versions;
12931   foreach my $item ( values %$items ) {
12932      next unless valid_item($item);
12933      eval {
12934         my $version = $sub_for_type{ $item->{type} }->(
12935            item      => $item,
12936            instances => $args{instances},
12937         );
12938         if ( $version ) {
12939            chomp $version unless ref($version);
12940            $versions{$item->{item}} = $version;
12941         }
12942      };
12943      if ( $EVAL_ERROR ) {
12944         PTDEBUG && _d('Error getting version for', Dumper($item), $EVAL_ERROR);
12945      }
12946   }
12947
12948   return \%versions;
12949}
12950
12951
12952sub get_os_version {
12953   if ( $OSNAME eq 'MSWin32' ) {
12954      require Win32;
12955      return Win32::GetOSDisplayName();
12956   }
12957
12958  chomp(my $platform = `uname -s`);
12959  PTDEBUG && _d('platform:', $platform);
12960  return $OSNAME unless $platform;
12961
12962   chomp(my $lsb_release
12963            = `which lsb_release 2>/dev/null | awk '{print \$1}'` || '');
12964   PTDEBUG && _d('lsb_release:', $lsb_release);
12965
12966   my $release = "";
12967
12968   if ( $platform eq 'Linux' ) {
12969      if ( -f "/etc/fedora-release" ) {
12970         $release = `cat /etc/fedora-release`;
12971      }
12972      elsif ( -f "/etc/redhat-release" ) {
12973         $release = `cat /etc/redhat-release`;
12974      }
12975      elsif ( -f "/etc/system-release" ) {
12976         $release = `cat /etc/system-release`;
12977      }
12978      elsif ( $lsb_release ) {
12979         $release = `$lsb_release -ds`;
12980      }
12981      elsif ( -f "/etc/lsb-release" ) {
12982         $release = `grep DISTRIB_DESCRIPTION /etc/lsb-release`;
12983         $release =~ s/^\w+="([^"]+)".+/$1/;
12984      }
12985      elsif ( -f "/etc/debian_version" ) {
12986         chomp(my $rel = `cat /etc/debian_version`);
12987         $release = "Debian $rel";
12988         if ( -f "/etc/apt/sources.list" ) {
12989             chomp(my $code_name = `awk '/^deb/ {print \$3}' /etc/apt/sources.list | awk -F/ '{print \$1}'| awk 'BEGIN {FS="|"} {print \$1}' | sort | uniq -c | sort -rn | head -n1 | awk '{print \$2}'`);
12990             $release .= " ($code_name)" if $code_name;
12991         }
12992      }
12993      elsif ( -f "/etc/os-release" ) { # openSUSE
12994         chomp($release = `grep PRETTY_NAME /etc/os-release`);
12995         $release =~ s/^PRETTY_NAME="(.+)"$/$1/;
12996      }
12997      elsif ( `ls /etc/*release 2>/dev/null` ) {
12998         if ( `grep DISTRIB_DESCRIPTION /etc/*release 2>/dev/null` ) {
12999            $release = `grep DISTRIB_DESCRIPTION /etc/*release | head -n1`;
13000         }
13001         else {
13002            $release = `cat /etc/*release | head -n1`;
13003         }
13004      }
13005   }
13006   elsif ( $platform =~ m/(?:BSD|^Darwin)$/ ) {
13007      my $rel = `uname -r`;
13008      $release = "$platform $rel";
13009   }
13010   elsif ( $platform eq "SunOS" ) {
13011      my $rel = `head -n1 /etc/release` || `uname -r`;
13012      $release = "$platform $rel";
13013   }
13014
13015   if ( !$release ) {
13016      PTDEBUG && _d('Failed to get the release, using platform');
13017      $release = $platform;
13018   }
13019   chomp($release);
13020
13021   $release =~ s/^"|"$//g;
13022
13023   PTDEBUG && _d('OS version =', $release);
13024   return $release;
13025}
13026
13027sub get_perl_version {
13028   my (%args) = @_;
13029   my $item = $args{item};
13030   return unless $item;
13031
13032   my $version = sprintf '%vd', $PERL_VERSION;
13033   PTDEBUG && _d('Perl version', $version);
13034   return $version;
13035}
13036
13037sub get_perl_module_version {
13038   my (%args) = @_;
13039   my $item = $args{item};
13040   return unless $item;
13041
13042   my $var     = '$' . $item->{item} . '::VERSION';
13043   my $version = eval "use $item->{item}; $var;";
13044   PTDEBUG && _d('Perl version for', $var, '=', $version);
13045   return $version;
13046}
13047
13048sub get_mysql_variable {
13049   return get_from_mysql(
13050      show => 'VARIABLES',
13051      @_,
13052   );
13053}
13054
13055sub get_from_mysql {
13056   my (%args) = @_;
13057   my $show      = $args{show};
13058   my $item      = $args{item};
13059   my $instances = $args{instances};
13060   return unless $show && $item;
13061
13062   if ( !$instances || !@$instances ) {
13063      PTDEBUG && _d('Cannot check', $item,
13064         'because there are no MySQL instances');
13065      return;
13066   }
13067
13068   if ($item->{item} eq 'MySQL' && $item->{type} eq 'mysql_variable') {
13069      @{$item->{vars}} = grep { $_ eq 'version' || $_ eq 'version_comment' } @{$item->{vars}};
13070   }
13071
13072
13073   my @versions;
13074   my %version_for;
13075   foreach my $instance ( @$instances ) {
13076      next unless $instance->{id};  # special system instance has id=0
13077      my $dbh = $instance->{dbh};
13078      local $dbh->{FetchHashKeyName} = 'NAME_lc';
13079      my $sql = qq/SHOW $show/;
13080      PTDEBUG && _d($sql);
13081      my $rows = $dbh->selectall_hashref($sql, 'variable_name');
13082
13083      my @versions;
13084      foreach my $var ( @{$item->{vars}} ) {
13085         $var = lc($var);
13086         my $version = $rows->{$var}->{value};
13087         PTDEBUG && _d('MySQL version for', $item->{item}, '=', $version,
13088            'on', $instance->{name});
13089         push @versions, $version;
13090      }
13091      $version_for{ $instance->{id} } = join(' ', @versions);
13092   }
13093
13094   return \%version_for;
13095}
13096
13097sub _d {
13098   my ($package, undef, $line) = caller 0;
13099   @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
13100        map { defined $_ ? $_ : 'undef' }
13101        @_;
13102   print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
13103}
13104
131051;
13106}
13107# ###########################################################################
13108# End VersionCheck package
13109# ###########################################################################
13110
13111# ###########################################################################
13112# This is a combination of modules and programs in one -- a runnable module.
13113# http://www.perl.com/pub/a/2006/07/13/lightning-articles.html?page=last
13114# Or, look it up in the Camel book on pages 642 and 643 in the 3rd edition.
13115#
13116# Check at the end of this package for the call to main() which actually runs
13117# the program.
13118# ###########################################################################
13119package pt_query_digest;
13120
13121use strict;
13122use warnings FATAL => 'all';
13123use English qw(-no_match_vars);
13124use constant PTDEBUG => $ENV{PTDEBUG} || 0;
13125
13126use Time::Local  qw(timelocal);
13127use Time::HiRes  qw(time usleep);
13128use List::Util   qw(max);
13129use Scalar::Util qw(looks_like_number);
13130use POSIX        qw(signal_h);
13131use Data::Dumper;
13132
13133use Percona::Toolkit;
13134
13135$Data::Dumper::Indent    = 1;
13136$Data::Dumper::Sortkeys  = 1;
13137$Data::Dumper::Quotekeys = 0;
13138
13139$OUTPUT_AUTOFLUSH = 1;
13140
13141Transformers->import(qw(
13142   shorten
13143   micro_t
13144   percentage_of
13145   ts
13146   make_checksum
13147   any_unix_timestamp
13148   parse_timestamp
13149   unix_timestamp
13150   crc32
13151));
13152
13153use sigtrap 'handler', \&sig_int, 'normal-signals';
13154
13155# Global variables.  Only really essential variables should be here.
13156my $oktorun = 1;
13157my $ep_dbh;  # For --explain
13158my $ps_dbh;  # For Processlist
13159my $aux_dbh; # For --aux-dsn (--since/--until "MySQL expression")
13160
13161my $resume_file;
13162my $resume = {};
13163my $offset;
13164my $exit_status = 0;
13165
13166(my $tool = __PACKAGE__) =~ tr/_/-/;
13167
13168sub main {
13169   # Reset global vars, else tests will fail.
13170   local @ARGV  = @_;
13171   $oktorun     = 1;
13172   $resume      = {};
13173   $offset      = undef;
13174   $exit_status = 0;
13175
13176   # ##########################################################################
13177   # Get configuration information.
13178   # ##########################################################################
13179   my $o = new OptionParser();
13180   $o->get_specs();
13181   $o->get_opts();
13182
13183   my $dp = $o->DSNParser();
13184   $dp->prop('set-vars', $o->set_vars());
13185
13186   my $aux_dsn;
13187   for my $i (0..$#ARGV) {
13188       next if -e $ARGV[$i];
13189       $aux_dsn = $dp->parse(splice(@ARGV, $i, 1));
13190       last;
13191   }
13192
13193   # Frequently used options.
13194   my $review_dsn  = handle_special_defaults($o, 'review');
13195   my $history_dsn = handle_special_defaults($o, 'history');
13196
13197   my @groupby    = @{$o->get('group-by')};
13198   my @orderby;
13199   if ( (grep { $_ =~ m/genlog|GeneralLogParser|rawlog|RawLogParser/ } @{$o->get('type')})
13200        && !$o->got('order-by') ) {
13201      @orderby = 'Query_time:cnt';
13202   }
13203   else {
13204      @orderby = @{$o->get('order-by')};
13205   }
13206
13207   if ( !$o->get('help') ) {
13208      if ( $o->get('outliers')
13209         && grep { $_ !~ m/^\w+:[0-9.]+(?::[0-9.]+)?$/ } @{$o->get('outliers')}
13210      ) {
13211         $o->save_error('--outliers requires two or three colon-separated fields');
13212      }
13213      if ( $o->get('progress') ) {
13214         eval { Progress->validate_spec($o->get('progress')) };
13215         if ( $EVAL_ERROR ) {
13216            chomp $EVAL_ERROR;
13217            $o->save_error("--progress $EVAL_ERROR");
13218         }
13219      }
13220
13221      if ( my $patterns = $o->get('embedded-attributes') ) {
13222         $o->save_error("--embedded-attributes should be passed two "
13223           . "comma-separated patterns, got " . scalar(@$patterns) )
13224           unless scalar(@$patterns) == 2;
13225         for my $re (@$patterns) {
13226            no re 'eval';
13227            eval { qr/$re/ };
13228            if ( $EVAL_ERROR ) {
13229               $o->save_error("--embedded-attributes $EVAL_ERROR")
13230            }
13231         }
13232      }
13233   }
13234
13235   # Set an orderby for each groupby; use the default orderby if there
13236   # are more groupby than orderby attribs.
13237   my $default_orderby = $o->get_defaults()->{'order-by'};
13238   foreach my $i ( 0..$#groupby ) {
13239      $orderby[$i] ||= $default_orderby;
13240   }
13241   $o->set('order-by', \@orderby);
13242
13243   my $run_time_mode = lc $o->get('run-time-mode');
13244   my $run_time_interval;
13245   eval {
13246      $run_time_interval = verify_run_time(
13247         run_mode => $run_time_mode,
13248         run_time => $o->get('run-time'),
13249      );
13250   };
13251   if ( $EVAL_ERROR ) {
13252      chomp $EVAL_ERROR;
13253      $o->save_error($EVAL_ERROR);
13254   }
13255
13256   $o->usage_or_errors();
13257
13258   # ########################################################################
13259   # Common modules.
13260   # #######################################################################
13261   my $q  = new Quoter();
13262   my $qp = new QueryParser();
13263   my $qr = new QueryRewriter(QueryParser=>$qp, match_embedded_numbers => $o->get('preserve-embedded-numbers') ? 1 : 0);
13264   my %common_modules = (
13265      OptionParser  => $o,
13266      DSNParser     => $dp,
13267      Quoter        => $q,
13268      QueryParser   => $qp,
13269      QueryRewriter => $qr,
13270   );
13271
13272   # ########################################################################
13273   # Set up for --explain
13274   # ########################################################################
13275   if ( my $ep_dsn = $o->get('explain') ) {
13276      $ep_dbh = get_cxn(
13277         for          => '--explain',
13278         dsn          => $ep_dsn,
13279         OptionParser => $o,
13280         DSNParser    => $dp,
13281         opts         => { AutoCommit => 1 },
13282      );
13283      $ep_dbh->{InactiveDestroy}  = 1;  # Don't die on fork().
13284   }
13285
13286   # ########################################################################
13287   # Set up for --review.
13288   # ########################################################################
13289   my $qv;      # QueryReview
13290   my $qv_dbh;  # For QueryReview
13291
13292   my $tp = new TableParser(Quoter => $q);
13293   if ( $review_dsn ) {
13294      my %dsn_without_Dt = %$review_dsn;
13295      delete $dsn_without_Dt{D};
13296      delete $dsn_without_Dt{t};
13297
13298      $qv_dbh = get_cxn(
13299         for          => '--review',
13300         dsn          => \%dsn_without_Dt,
13301         OptionParser => $o,
13302         DSNParser    => $dp,
13303         opts         => { AutoCommit => 1 },
13304      );
13305      $qv_dbh->{InactiveDestroy}  = 1;  # Don't die on fork().
13306
13307      my @db_tbl = @{$review_dsn}{qw(D t)};
13308      my $db_tbl = $q->quote(@db_tbl);
13309
13310      my $create_review_sql = $o->read_para_after(
13311         __FILE__, qr/\bMAGIC_create_review_table\b/);
13312      $create_review_sql =~ s/\bquery_review\b/$db_tbl/;
13313
13314      create_review_tables(
13315          type             => 'review',
13316          dbh              => $qv_dbh,
13317          full_table       => $db_tbl,
13318          create_table_sql => $create_review_sql,
13319          create_table     => $o->get('create-review-table'),
13320          TableParser      => $tp,
13321      );
13322
13323      # Set up the new QueryReview object.
13324      my $struct = $tp->parse($tp->get_create_table($qv_dbh, @db_tbl));
13325      $qv = new QueryReview(
13326         dbh         => $qv_dbh,
13327         db_tbl      => $db_tbl,
13328         tbl_struct  => $struct,
13329         quoter      => $q,
13330      );
13331   }
13332
13333   # ########################################################################
13334   # Set up for --history.
13335   # ########################################################################
13336   my $qh;      # QueryHistory
13337   my $qh_dbh;
13338   if ( $history_dsn ) {
13339      my %dsn_without_Dt = %$history_dsn;
13340      delete $dsn_without_Dt{D};
13341      delete $dsn_without_Dt{t};
13342      my $qh_dbh = get_cxn(
13343         for          => '--history',
13344         dsn          => \%dsn_without_Dt,
13345         OptionParser => $o,
13346         DSNParser    => $dp,
13347         opts         => { AutoCommit => 1 },
13348      );
13349      $qh_dbh->{InactiveDestroy}  = 1;  # Don't die on fork().
13350
13351      my @hdb_tbl = @{$history_dsn}{qw(D t)};
13352      my $hdb_tbl = $q->quote(@hdb_tbl);
13353
13354      my $create_history_sql = $o->read_para_after(
13355         __FILE__, qr/\bMAGIC_create_history_table\b/);
13356      $create_history_sql =~ s/\bquery_history\b/$hdb_tbl/;
13357
13358      create_review_tables(
13359         type             => 'history',
13360         dbh              => $qh_dbh,
13361         full_table       => $hdb_tbl,
13362         create_table_sql => $create_history_sql,
13363         create_table     => $o->get('create-history-table'),
13364         TableParser      => $tp,
13365      );
13366
13367      my $tbl = $tp->parse($tp->get_create_table($qh_dbh, @hdb_tbl));
13368      my $pat = $o->read_para_after(__FILE__, qr/\bMAGIC_history_columns\b/);
13369      $pat    =~ s/\s+//g;
13370      $pat    = qr/^(.*?)_($pat)$/;
13371
13372      $qh = QueryHistory->new(
13373                history_dbh    => $qh_dbh,
13374                column_pattern => $pat,
13375            );
13376      # And tell the QueryReview that it has more work to do.
13377      $qh->set_history_options(
13378         table      => $hdb_tbl,
13379         tbl_struct => $tbl,
13380      );
13381   }
13382
13383   # ########################################################################
13384   # Create all the pipeline processes that do all the work: get input,
13385   # parse events, manage runtime, switch iterations, aggregate, etc.
13386   # ########################################################################
13387
13388   # These four vars are passed to print_reports().
13389   my @ea;         # EventAggregator objs
13390   my @tl;         # EventTimeline obj
13391   my @read_files; # file names that have been parsed
13392   my %stats;      # various stats/counters used in some procs
13393
13394   # The pipeline data hashref is passed to each proc.  Procs use this to
13395   # pass data through the pipeline.  The most importat data is the event.
13396   # Other data includes in the next_event callback, time and iters left,
13397   # etc.  This hashref is accessed inside a proc via the $args arg.
13398   my $pipeline_data = {
13399      iter  => 1,
13400      stats => \%stats,
13401   };
13402
13403   my $pipeline = new Pipeline(
13404      continue_on_error => $o->get('continue-on-error'),
13405   );
13406
13407   # ########################################################################
13408   # Procs before the terminator are, in general, responsible for getting
13409   # and event that procs after the terminator process before aggregation
13410   # at the end of the pipeline.  Therefore, these pre-terminator procs
13411   # should not assume an event exists.  If one does, they should let the
13412   # pipeline continue.  Only the terminator proc terminates the pipeline.
13413   # ########################################################################
13414
13415   { # prep
13416      $pipeline->add(
13417         name    => 'prep',
13418         process => sub {
13419            my ( $args ) = @_;
13420            # Stuff you'd like to do to make sure pipeline data is prepped
13421            # and ready to go...
13422
13423            $args->{event} = undef;  # remove event from previous pass
13424
13425            return $args;
13426         },
13427      );
13428   } # prep
13429
13430   { # input
13431      my $fi        = FileIterator->new();
13432      my $next_file = $fi->get_file_itr(@ARGV);
13433      my $input_fh; # the current input fh
13434      my $pr;       # Progress obj for ^
13435
13436      $pipeline->add(
13437         name    => 'input',
13438         process => sub {
13439            my ( $args ) = @_;
13440
13441            # Only get the next file when there's no fh or no more events in
13442            # the current fh.  This allows us to do collect-and-report cycles
13443            # (i.e. iterations) on huge files.  This doesn't apply to infinite
13444            # inputs because they don't set more_events false.
13445            if ( !$args->{input_fh} || !$args->{more_events} ) {
13446
13447               # Close the current file.
13448               if ( $args->{input_fh} ) {
13449                  close $args->{input_fh}
13450                     or die "Cannot close input fh: $OS_ERROR";
13451               }
13452
13453               # Open the next file.
13454               my ($fh, $filename, $filesize) = $next_file->();
13455               if ( $fh ) {
13456                  my $fileno = fileno $fh;
13457                  if ($fileno == 0) {
13458                     print "Reading from STDIN ...\n";
13459                  }
13460                  PTDEBUG && _d('Reading', $filename);
13461                  PTDEBUG && _d('File size:', $filesize);
13462                  # catch if user is trying to use an uncoverted (raw) binlog  # issue 1377888
13463                  if ( $filename &&  $o->get('type')->[0] eq 'binlog') {
13464                     if (is_raw_binlog($filename)) {
13465                        warn "Binlog file $filename must first be converted to text format using mysqlbinlog";
13466                        return 1;
13467                     }
13468                  }
13469                  push @read_files, { name => ($filename || "STDIN"), size => $filesize };
13470
13471                  # Read the file offset for --resume.
13472                  if ( ($resume_file = $o->get('resume')) && $filename ) {
13473                     if ( -s $resume_file ) {
13474                        open my $resume_fh, "<", $resume_file
13475                           or die "Cannot open $resume_file: $OS_ERROR";
13476                        my $resume_offset = do { local $/; <$resume_fh> };
13477                        close $resume_fh
13478                           or die "Error close $resume_file: $OS_ERROR";
13479                        chomp($resume_offset) if $resume_offset;
13480                        if ( looks_like_number($resume_offset) ) {
13481                           PTDEBUG && _d('Resuming at offset', $resume_offset);
13482                           $resume->{simple} = 1;
13483                           seek $fh, $resume_offset, 0
13484                              or die "Error seeking to $resume_offset in "
13485                                 . "$resume_file: $OS_ERROR";
13486                           warn "# Resuming $filename from offset "
13487                              . "$resume_offset (file size: $filesize)...\n";
13488                        }
13489                        else {
13490                           $resume->{simple} = 0;  # enhanced resume file
13491                           map {
13492                              my $line = $_;
13493                              chomp $line;
13494                              my ($key, $value) = split('=', $line);
13495                              if ( !$key
13496                                   || !defined $value
13497                                   || !looks_like_number($value)
13498                                   || $value < 0 )
13499                              {
13500                                 $exit_status = 1;
13501                                 warn "Invalid line in --resume $resume_file: $line\n";
13502                                 $oktorun = 0;
13503                                 return;
13504                              }
13505                              $resume->{$key} = $value;
13506                           } split("\n", $resume_offset);
13507                           if ( $resume->{end_offset} &&
13508                                 $resume->{end_offset} <=
13509                                ($resume->{stop_offset} || 0) )
13510                           {
13511                              close $args->{input_fh} if $args->{input_fh};
13512                              $args->{input_fh}    = undef;
13513                              $args->{more_events} = 0;
13514                              $oktorun = 0;
13515                              $resume_file = '';
13516                              warn "# Not resuming $filename because "
13517                                 . "end_offset $resume->{end_offset} is "
13518                                 . "less than or equal to stop_offset "
13519                                 . ($resume->{stop_offset} || 0) . "\n";
13520                           }
13521                           else {
13522                              $resume_offset = $resume->{stop_offset}
13523                                            || $resume->{start_offset}
13524                                            || 0;
13525                              seek $fh, $resume_offset, 0
13526                                 or die "Error seeking to $resume_offset in "
13527                                    . "$resume_file: $OS_ERROR";
13528                              warn "# Resuming $filename from offset "
13529                                . "$resume_offset to "
13530                                . ($resume->{end_offset} ? $resume->{end_offset}
13531                                                         : "end of file")
13532                                . " (file size: $filesize)...\n";
13533                           }
13534                        }
13535                     }
13536                     else {
13537                        warn "# Resuming $filename from offset 0 because "
13538                           . "resume file $filename does not exist "
13539                           . "(file size: $filesize)...\n";
13540                        $resume->{simple}       = 0;
13541                        $resume->{start_offset} = 0;
13542                     }
13543                  }
13544
13545                  # Create callback to read next event.  Some inputs, like
13546                  # Processlist, may use something else but most next_event.
13547                  if ( my $read_time = $o->get('read-timeout') ) {
13548                     $args->{next_event}
13549                        = sub { return read_timeout($fh, $read_time); };
13550                  }
13551                  else {
13552                     $args->{next_event} = sub { return <$fh>; };
13553                  }
13554                  $args->{filename}    = $filename;
13555                  $args->{input_fh}    = $fh;
13556                  $args->{tell}        = sub {
13557                     $offset = tell $fh;  # update global $offset
13558                     if ( $args->{filename} ) {
13559                        $args->{pos_for}->{$args->{filename}} = $offset;
13560                     }
13561                     return $offset;  # legacy: return global $offset
13562                  };
13563                  $args->{more_events} = 1;
13564
13565                  # Reset in case we read two logs out of order by time.
13566                  $args->{past_since} = 0 if $o->get('since');
13567                  $args->{at_until}   = 0 if $o->get('until');
13568
13569                  # Make a progress reporter, one per file.
13570                  if ( $o->get('progress') && $filename && -e $filename ) {
13571                     $pr = new Progress(
13572                        jobsize => $filesize,
13573                        spec    => $o->get('progress'),
13574                        name    => $filename,
13575                     );
13576                  }
13577               }
13578               else {
13579                  PTDEBUG && _d("No more input");
13580                  # This will cause terminator proc to terminate the pipeline.
13581                  $args->{input_fh}    = undef;
13582                  $args->{more_events} = 0;
13583               }
13584            }
13585            elsif ( $resume->{end_offset}
13586                    && $offset >= $resume->{end_offset} ) {
13587               PTDEBUG && _d('Offset', $offset, 'at end_offset',
13588                  $resume->{end_offset});
13589               close $args->{input_fh} if $args->{input_fh};
13590               $args->{input_fh}    = undef;
13591               $args->{more_events} = 0;
13592            }
13593            else {
13594               $pr->update($args->{tell}) if $pr;
13595            }
13596            return $args;
13597         },
13598      );
13599   } # input
13600
13601   my $ps_dsn;
13602   my @parsers;
13603   { # event
13604      my $misc;
13605      if ( $ps_dsn = $o->get('processlist') ) {
13606         my $ms = new MasterSlave(
13607            OptionParser => $o,
13608            DSNParser    => $dp,
13609            Quoter       => $q,
13610         );
13611         my $pl = new Processlist(
13612            interval    => $o->get('interval') * 1_000_000,
13613            MasterSlave => $ms
13614         );
13615         my ( $sth, $cxn );
13616         my $cur_server = 'processlist';
13617         my $cur_time   = 0;
13618
13619         if ( $o->get('ask-pass') ) {
13620            $ps_dsn->{p} = OptionParser::prompt_noecho("Enter password for "
13621               . "--processlist: ");
13622            $o->get('processlist', $ps_dsn);
13623         }
13624
13625         my $code = sub {
13626            my $err;
13627            do {
13628               eval { $sth->execute; };
13629               $err = $EVAL_ERROR;
13630               if ( $err ) { # Try to reconnect when there's an error.
13631                  eval {
13632                     if ( !$ps_dbh || !$ps_dbh->ping ) {
13633                        PTDEBUG && _d('Getting a dbh from', $cur_server);
13634                        $ps_dbh = $dp->get_dbh(
13635                           $dp->get_cxn_params($o->get($cur_server)), {AutoCommit => 1});
13636                        $ps_dbh->{InactiveDestroy}  = 1;  # Don't die on fork().
13637                     }
13638                     $cur_time = time();
13639                     $sth      = $ps_dbh->prepare('SHOW FULL PROCESSLIST');
13640                     $cxn      = $ps_dbh->{mysql_thread_id};
13641                     $sth->execute();
13642                  };
13643                  $err = $EVAL_ERROR;
13644                  if ( $err ) {
13645                     warn $err;
13646                     sleep 1;
13647                  }
13648               }
13649            } until ( $sth && !$err );
13650
13651            return [ grep { $_->[0] != $cxn } @{ $sth->fetchall_arrayref(); } ];
13652         };
13653
13654         $pipeline->add(
13655            name    => ref $pl,
13656            process => sub {
13657               my ( $args ) = @_;
13658               my $event = $pl->parse_event(code => $code);
13659               if ( $event ) {
13660                  sanitize_event($event);
13661                  $args->{event} = $event;
13662               }
13663               return $args;
13664            },
13665         );
13666      }  # get events from processlist
13667      else {
13668         my %alias_for = (
13669            slowlog   => ['SlowLogParser'],
13670            binlog    => ['BinaryLogParser'],
13671            genlog    => ['GeneralLogParser'],
13672            tcpdump   => ['TcpdumpParser','MySQLProtocolParser'],
13673            rawlog    => ['RawLogParser'],
13674         );
13675         my $type = $o->get('type');
13676         $type    = $alias_for{$type->[0]} if $alias_for{$type->[0]};
13677
13678         my ($server, $port);
13679         if ( my $watch_server = $o->get('watch-server') ) {
13680            # This should match all combinations of HOST and PORT except
13681            # "host-name.port" because "host.mysql" could be either
13682            # host "host" and port "mysql" or just host "host.mysql"
13683            # (e.g. if someone added "127.1 host.mysql" to etc/hosts).
13684            # So host-name* requires a colon between it and a port.
13685            ($server, $port) = $watch_server
13686                  =~ m/^((?:\d+\.\d+\.\d+\.\d+|[\w\.\-]+\w))(?:[\:\.](\S+))?/;
13687            PTDEBUG && _d('Watch server', $server, 'port', $port);
13688         }
13689
13690         foreach my $module ( @$type ) {
13691            my $parser;
13692            eval {
13693               $parser = $module->new(
13694                  server => $server,
13695                  port   => $port,
13696                  o      => $o,
13697               );
13698            };
13699            if ( $EVAL_ERROR ) {
13700               if ( $EVAL_ERROR =~ m/perhaps you forgot to load/ ) {
13701                  # There is no module to handle --type, so wrong --type
13702                  die "'$module' is not a valid input type. "
13703                     . "Please check the documentation for --type.\n";
13704               }
13705               die "Failed to load $module module: $EVAL_ERROR";
13706            }
13707            push @parsers, $parser;
13708
13709            $pipeline->add(
13710               name    => ref $parser,
13711               process => sub {
13712                  my ( $args ) = @_;
13713                  if ( $args->{input_fh} ) {
13714                     my $event = $parser->parse_event(
13715                        event       => $args->{event},
13716                        next_event  => $args->{next_event},
13717                        tell        => $args->{tell},
13718                        misc        => $args->{misc},
13719                        oktorun     => sub { $args->{more_events} = $_[0]; },
13720                        stats       => $args->{stats},
13721                     );
13722                     if ( $event ) {
13723                        sanitize_event($event);
13724                        $args->{event} = $event;
13725                        return $args;
13726                     }
13727                     PTDEBUG && _d("No more events, input EOF");
13728                     return;  # next input
13729                  }
13730                  # No input, let pipeline run so the last report is printed.
13731                  return $args;
13732               },
13733            );
13734         }
13735      }  # get events from log file
13736
13737      if ( my $patterns = $o->get('embedded-attributes') ) {
13738         $misc->{embed}   = qr/$patterns->[0]/;
13739         $misc->{capture} = qr/$patterns->[1]/;
13740         PTDEBUG && _d('Patterns for embedded attributes:', $misc->{embed},
13741               $misc->{capture});
13742      }
13743      $pipeline_data->{misc} = $misc;
13744   } # event
13745
13746   { # runtime
13747      my $now_callback;
13748      if ( $run_time_mode eq 'clock' ) {
13749         $now_callback = sub { return time; };
13750      }
13751      elsif ( $run_time_mode eq 'event' ) {
13752         $now_callback = sub {
13753            my ( %args ) = @_;
13754            my $event = $args{event};
13755            return unless $event && $event->{ts};
13756            PTDEBUG && _d("Log time:", $event->{ts});
13757            return unix_timestamp(parse_timestamp($event->{ts}));
13758         };
13759      }
13760      else {
13761         $now_callback = sub { return; };
13762      }
13763      $pipeline_data->{Runtime} = new Runtime(
13764         now      => $now_callback,
13765         run_time => $o->get('run-time'),
13766      );
13767
13768      $pipeline->add(
13769         name    => 'runtime',
13770         process => sub {
13771            my ( $args ) = @_;
13772            if ( $run_time_mode eq 'interval' ) {
13773               my $event = $args->{event};
13774               return $args unless $event && $event->{ts};
13775
13776               my $ts = $args->{unix_ts}
13777                  = unix_timestamp(parse_timestamp($event->{ts}));
13778
13779               if ( !$args->{next_ts_interval} ) {
13780                  # We need to figure out what interval we're in and what
13781                  # interval is next.  So first we need to parse the ts.
13782                  if ( my($y, $m, $d, $h, $i, $s)
13783                        = $args->{event}->{ts} =~ m/^$Transformers::mysql_ts$/ ) {
13784                     my $rt = $o->get('run-time');
13785                     if ( $run_time_interval == 60 ) {
13786                        PTDEBUG && _d("Run-time interval in seconds");
13787                        my $this_minute = unix_timestamp(parse_timestamp(
13788                           "$y$m$d $h:$i:00"));
13789                        do { $this_minute += $rt } until $this_minute > $ts;
13790                        $args->{next_ts_interval} = $this_minute;
13791                     }
13792                     elsif ( $run_time_interval == 3600 ) {
13793                        PTDEBUG && _d("Run-time interval in minutes");
13794                        my $this_hour = unix_timestamp(parse_timestamp(
13795                           "$y$m$d $h:00:00"));
13796                        do { $this_hour += $rt } until $this_hour > $ts;
13797                        $args->{next_ts_interval} = $this_hour;
13798                     }
13799                     elsif ( $run_time_interval == 86400 ) {
13800                        PTDEBUG && _d("Run-time interval in days");
13801                        my $this_day = unix_timestamp(parse_timestamp(
13802                           "$y$m$d 00:00:00"));
13803                        $args->{next_ts_interval} = $this_day + $rt;
13804                     }
13805                     else {
13806                        die "Invalid run-time interval: $run_time_interval";
13807                     }
13808                     PTDEBUG && _d("First ts interval:",
13809                        $args->{next_ts_interval});
13810                  }
13811                  else {
13812                     PTDEBUG && _d("Failed to parse MySQL ts:",
13813                        $args->{event}->{ts});
13814                  }
13815               }
13816            }
13817            else {
13818               # Clock and event run-time modes need to check the time.
13819               $args->{time_left}
13820                  = $args->{Runtime}->time_left(event=>$args->{event});
13821            }
13822
13823            return $args;
13824         },
13825      );
13826   } # runtime
13827
13828   # Filter early for --since and --until.
13829   # If --since or --until is a MySQL expression, then any_unix_timestamp()
13830   # will need this callback to execute the expression.  We don't know what
13831   # type of time value the user gave, so we'll create the callback in any case.
13832   if ( $o->get('since') || $o->get('until') ) {
13833      if ( $aux_dsn ) {
13834         $aux_dbh = get_cxn(
13835            for          => '--aux',
13836            dsn          => $aux_dsn,
13837            OptionParser => $o,
13838            DSNParser    => $dp,
13839            opts         => { AutoCommit => 1 }
13840         );
13841         $aux_dbh->{InactiveDestroy}  = 1;  # Don't die on fork().
13842      }
13843      $aux_dbh ||= $qv_dbh || $qh_dbh || $ps_dbh || $ep_dbh;
13844      PTDEBUG && _d('aux dbh:', $aux_dbh);
13845
13846      my $time_callback = sub {
13847         my ( $exp ) = @_;
13848         return unless $aux_dbh;
13849         my $sql = "SELECT UNIX_TIMESTAMP($exp)";
13850         PTDEBUG && _d($sql);
13851         return $aux_dbh->selectall_arrayref($sql)->[0]->[0];
13852      };
13853      if ( $o->get('since') ) {
13854         my $since = any_unix_timestamp($o->get('since'), $time_callback);
13855         die "Invalid --since value" unless $since;
13856
13857         $pipeline->add(
13858            name    => 'since',
13859            process => sub {
13860               my ( $args ) = @_;
13861               my $event = $args->{event};
13862               return $args unless $event;
13863               if ( $args->{past_since} ) {
13864                  PTDEBUG && _d('Already past --since');
13865                  return $args;
13866               }
13867               if ( $event->{ts} ) {
13868                  my $ts = any_unix_timestamp($event->{ts}, $time_callback);
13869                  if ( ($ts || 0) >= $since ) {
13870                     PTDEBUG && _d('Event is at or past --since');
13871                     $args->{past_since} = 1;
13872                     return $args;
13873                  }
13874               }
13875               PTDEBUG && _d('Event is before --since (or ts unknown)');
13876               return;  # next event
13877            },
13878         );
13879      }
13880      if ( $o->get('until') ) {
13881         my $until = any_unix_timestamp($o->get('until'), $time_callback);
13882         die "Invalid --until value" unless $until;
13883         $pipeline->add(
13884            name    => 'until',
13885            process => sub {
13886               my ( $args ) = @_;
13887               my $event = $args->{event};
13888               return $args unless $event;
13889               if ( $args->{at_until} ) {
13890                  PTDEBUG && _d('Already past --until');
13891                  return;
13892               }
13893               if ( $event->{ts} ) {
13894                  my $ts = any_unix_timestamp($event->{ts}, $time_callback);
13895                  if ( ($ts || 0) >= $until ) {
13896                     PTDEBUG && _d('Event at or after --until');
13897                     $args->{at_until} = 1;
13898                     return;
13899                  }
13900               }
13901               PTDEBUG && _d('Event is before --until (or ts unknown)');
13902               return $args;
13903            },
13904         );
13905      }
13906   } # since/until
13907
13908   { # iteration
13909      $pipeline->add(
13910         # This is a critical proc: if we die here, we probably need
13911         # to stop, else an infinite loop can develop:
13912         # https://bugs.launchpad.net/percona-toolkit/+bug/888114
13913         # We'll retry twice in case the problem is just one bad
13914         # query class, or something like that.
13915         retry_on_error => 2,
13916         name           => 'iteration',
13917         process        => sub {
13918            my ( $args ) = @_;
13919
13920            # Start the (next) iteration.
13921            if ( !$args->{iter_start} ) {
13922               my $iter_start = $args->{iter_start} = time;
13923               PTDEBUG && _d('Iteration', $args->{iter},
13924                  'started at', ts($iter_start));
13925
13926               if ( PTDEBUG ) {
13927                  _d("\n# Iteration $args->{iter} started at ",
13928                     ts($iter_start), "\n");
13929               }
13930            }
13931
13932            # Determine if we should stop the current iteration.
13933            # If we do, then we report events collected during this
13934            # iter, then reset and increment for the next iter.
13935            my $report    = 0;
13936            my $time_left = $args->{time_left};
13937            if ( !$args->{more_events}
13938                 || defined $time_left && $time_left <= 0 ) {
13939               PTDEBUG && _d("Runtime elapsed or no more events, reporting");
13940               $report = 1;
13941            }
13942            elsif ( $run_time_mode eq 'interval'
13943                    && $args->{next_ts_interval}
13944                    && $args->{unix_ts} >= $args->{next_ts_interval} ) {
13945               PTDEBUG && _d("Event is in the next interval, reporting");
13946
13947               # Get the next ts interval based on the current log ts.
13948               # Log ts can make big jumps, so just += $rt might not
13949               # set the next ts interval at a time past the current
13950               # log ts.
13951               my $rt = $o->get('run-time');
13952               do {
13953                  $args->{next_ts_interval} += $rt;
13954               } until $args->{next_ts_interval} >= $args->{unix_ts};
13955
13956               $report = 1;
13957            }
13958
13959            if ( $report ) {
13960               PTDEBUG && _d("Iteration", $args->{iter}, "stopped at",ts(time));
13961
13962               save_resume_offset(
13963                  last_event_offset => $parsers[0]->{last_event_offset},
13964               );
13965
13966               # Get this before calling print_reports() because that sub
13967               # resets each ea and we may need this later for stats.
13968               my $n_events_aggregated = $ea[0]->events_processed();
13969
13970               if ( $n_events_aggregated ) {
13971                  print_reports(
13972                     eas             => \@ea,
13973                     tls             => \@tl,
13974                     groupby         => \@groupby,
13975                     orderby         => \@orderby,
13976                     files           => \@read_files,
13977                     Pipeline        => $pipeline,
13978                     QueryReview     => $qv,
13979                     QueryHistory    => $qh,
13980                     %common_modules,
13981                  );
13982               }
13983               else {
13984                  if ( $o->get('output') eq 'report' ) {
13985                     print "\n# No events processed.\n";
13986                  }
13987               }
13988
13989               if ( PTDEBUG ) {
13990                  if ( keys %stats ) {
13991                     my $report = new ReportFormatter(
13992                        line_width => 74,
13993                     );
13994                     $report->set_columns(
13995                        { name => 'Statistic',                  },
13996                        { name => 'Count',    right_justify => 1 },
13997                        { name => '%/Events', right_justify => 1 },
13998                     );
13999
14000                     # Have to add this one manually because currently
14001                     # EventAggregator::aggregate() doesn't know about stats.
14002                     # It's the same thing as events_processed() though.
14003                     $stats{events_aggregated} = $n_events_aggregated;
14004
14005                     # Save value else events_read will be reset during the
14006                     # foreach loop below and mess up percentage_of().
14007                     my $n_events_read = $stats{events_read} || 0;
14008
14009                     my %stats_sort_order = (
14010                        events_read       => 1,
14011                        events_parsed     => 2,
14012                        events_aggregated => 3,
14013                     );
14014                     my @stats = sort {
14015                           QueryReportFormatter::pref_sort(
14016                              $a, $stats_sort_order{$a},
14017                              $b, $stats_sort_order{$b})
14018                     } keys %stats;
14019                     foreach my $stat ( @stats ) {
14020                        $report->add_line(
14021                           $stat,
14022                           $stats{$stat} || 0,
14023                           percentage_of(
14024                              $stats{$stat} || 0,
14025                              $n_events_read,
14026                              p => 2),
14027                        );
14028                        $stats{$stat} = 0;  # Reset for next iteration.
14029                     }
14030                     print STDERR "\n" . $report->get_report();
14031                  }
14032                  else {
14033                     print STDERR "\n# No statistics values.\n";
14034                  }
14035               }
14036
14037               # Decrement iters_left after finishing an iter because in the
14038               # default case, 1 iter, if we decr when the iter starts, then
14039               # terminator will think there's no iters left before the one
14040               # iter has finished.
14041               if ( my $max_iters = $o->get('iterations') ) {
14042                  $args->{iters_left} = $max_iters - $args->{iter};
14043                  PTDEBUG && _d($args->{iters_left}, "iterations left");
14044               }
14045
14046               # Next iteration.
14047               $args->{iter}++;
14048               $args->{iter_start} = undef;
14049
14050               # Runtime is per-iteration, so reset it, and reset time_left
14051               # else terminator will think runtime has elapsed when really
14052               # we may just be between iters.
14053               $args->{Runtime}->reset();
14054               $args->{time_left} = undef;
14055            }
14056
14057            # Continue the pipeline even if we reported and went to the next
14058            # iter because there could be an event in the pipeline that is
14059            # the first in the next/new iter.
14060            return $args;
14061         },
14062      );
14063   } # iteration
14064
14065   { # terminator
14066      $pipeline->add(
14067         name    => 'terminator',
14068         process => sub {
14069            my ( $args ) = @_;
14070
14071            # The first sure-fire state that terminates the pipeline is
14072            # having no more input.
14073            if ( !$args->{input_fh} ) {
14074               PTDEBUG && _d("No more input, terminating pipeline");
14075
14076               # This shouldn't happen, but I want to know if it does.
14077               warn "There's an event in the pipeline but no current input: "
14078                     . Dumper($args)
14079                  if $args->{event};
14080
14081               $oktorun = 0;  # 2. terminate pipeline
14082               return;        # 1. exit pipeline early
14083            }
14084
14085            # The second sure-first state is having no more iterations.
14086            my $iters_left = $args->{iters_left};
14087            if ( defined $iters_left && $iters_left <= 0 ) {
14088               PTDEBUG && _d("No more iterations, terminating pipeline");
14089               $oktorun = 0;  # 2. terminate pipeline
14090               return;        # 1. exit pipeline early
14091            }
14092
14093            # There's time or iters left so keep running.
14094            if ( $args->{event} ) {
14095               PTDEBUG && _d("Event in pipeline, continuing");
14096               return $args;
14097            }
14098            else {
14099               PTDEBUG && _d("No event in pipeline, get next event");
14100               return;
14101            }
14102         },
14103      );
14104   } # terminator
14105
14106   # ########################################################################
14107   # All pipeline processes after the terminator expect an event
14108   # (i.e. that $args->{event} exists and is a valid event).
14109   # ########################################################################
14110
14111   if ( grep { $_ eq 'fingerprint' } @groupby ) {
14112      $pipeline->add(
14113         name    => 'fingerprint',
14114         process => sub {
14115            my ( $args ) = @_;
14116            my $event = $args->{event};
14117            # Skip events which do not have the groupby attribute.
14118            my $groupby_val = $event->{arg};
14119            return unless $groupby_val;
14120            $event->{fingerprint} = $qr->fingerprint($groupby_val);
14121            return $args;
14122         },
14123      );
14124   }
14125
14126   # Make subs which map attrib aliases to their primary attrib.
14127   foreach my $alt_attrib ( @{$o->get('attribute-aliases')} ) {
14128      $pipeline->add(
14129         name    => 'attribute aliases',
14130         process => make_alt_attrib($alt_attrib),
14131      );
14132   }
14133
14134   # Carry attribs forward for --inherit-attributes.
14135   my $inherited_attribs = $o->get('inherit-attributes');
14136   if ( @$inherited_attribs ) {
14137      my $last_val = {};
14138      $pipeline->add(
14139         name    => 'inherit attributes',
14140         process => sub {
14141            my ( $args ) = @_;
14142            my $event = $args->{event};
14143            foreach my $attrib ( @$inherited_attribs ) {
14144               if ( defined $event->{$attrib} ) {
14145                  # Event has val for this attrib; save it as the last val.
14146                  $last_val->{$attrib} = $event->{$attrib};
14147               }
14148               else {
14149                  # Inherit last val for this attrib (if there was a last val).
14150                  $event->{$attrib} = $last_val->{$attrib}
14151                     if defined $last_val->{$attrib};
14152               }
14153            }
14154            return $args;
14155         },
14156      );
14157   }
14158
14159   { # variations
14160      my @variations = @{$o->get('variations')};
14161      if ( @variations ) {
14162         $pipeline->add(
14163            name    => 'variations',
14164            process => sub {
14165               my ( $args ) = @_;
14166               my $event = $args->{event};
14167               foreach my $attrib ( @variations ) {
14168                  my $checksum = crc32($event->{$attrib});
14169                  $event->{"${attrib}_crc"} = $checksum if defined $checksum;
14170               }
14171               return $args;
14172            },
14173         );
14174      }
14175   } # variations
14176
14177   if ( grep { $_ eq 'tables' } @groupby ) {
14178      $pipeline->add(
14179         name    => 'tables',
14180         process => sub {
14181            my ( $args ) = @_;
14182            my $event = $args->{event};
14183            my $group_by_val = $event->{arg};
14184            return unless defined $group_by_val;
14185            $event->{tables} = [
14186               map {
14187                  # Canonicalize and add the db name in front
14188                  $_ =~ s/`//g;
14189                  if ( $_ !~ m/\./
14190                       && (my $db = $event->{db} || $event->{Schema}) ) {
14191                     $_ = "$db.$_";
14192                  }
14193                  $_;
14194               }
14195               $qp->get_tables($group_by_val)
14196            ];
14197            return $args;
14198         },
14199      );
14200   }
14201
14202   { # distill
14203      my %distill_args;
14204      if ( grep { $_ eq 'distill' } @groupby ) {
14205         $pipeline->add(
14206            name    => 'distill',
14207            process => sub {
14208               my ( $args ) = @_;
14209               my $event = $args->{event};
14210               my $group_by_val = $event->{arg};
14211               return unless defined $group_by_val;
14212               $event->{distill} = $qr->distill($group_by_val, %distill_args);
14213               PTDEBUG && !$event->{distill} && _d('Cannot distill',
14214                  $event->{arg});
14215               return $args;
14216            },
14217         );
14218      }
14219   } # distill
14220
14221   # Former --zero-admin
14222   $pipeline->add(
14223      name    => 'zero admin',
14224      process => sub {
14225         my ( $args ) = @_;
14226         my $event = $args->{event};
14227         if ( $event->{arg} && $event->{arg} =~ m/^administrator/ ) {
14228            $event->{Rows_sent}     = 0 if exists $event->{Rows_sent};
14229            $event->{Rows_examined} = 0 if exists $event->{Rows_examined};
14230            $event->{Rows_read}     = 0 if exists $event->{Rows_read};
14231            $event->{Rows_affected} = 0 if exists $event->{Rows_affected};
14232         }
14233         return $args;
14234      },
14235   );
14236   # zero admin
14237
14238   # Filter after special attributes, like fingerprint, tables,
14239   # distill, etc., have been created.
14240   if ( $o->get('filter') ) {
14241      my $filter = $o->get('filter');
14242      if ( -f $filter && -r $filter ) {
14243         PTDEBUG && _d('Reading file', $filter, 'for --filter code');
14244         open my $fh, "<", $filter or die "Cannot open $filter: $OS_ERROR";
14245         $filter = do { local $/ = undef; <$fh> };
14246         close $fh;
14247      }
14248      else {
14249         $filter = "( $filter )";  # issue 565
14250      }
14251      my $code = 'sub { my ( $args ) = @_; my $event = $args->{event}; '
14252               . "$filter && return \$args; };";
14253      PTDEBUG && _d('--filter code:', $code);
14254      my $sub = eval $code
14255         or die "Error compiling --filter code: $code\n$EVAL_ERROR";
14256
14257      $pipeline->add(
14258         name    => 'filter',
14259         process => $sub,
14260      );
14261   } # filter
14262
14263   if ( $o->got('sample') ) {
14264      my $group_by_val = $groupby[0];
14265      my $num_samples  = $o->get('sample');
14266      if ( $group_by_val ) {
14267         my %seen;
14268         $pipeline->add(
14269            name    => 'sample',
14270            process => sub {
14271               my ( $args ) = @_;
14272               my $event = $args->{event};
14273               if ( ++$seen{$event->{$group_by_val}} <= $num_samples ) {
14274                  PTDEBUG && _d("--sample permits event",
14275                     $event->{$group_by_val});
14276                  return $args;
14277               }
14278               PTDEBUG && _d("--sample rejects event", $event->{$group_by_val});
14279               return;
14280            },
14281         );
14282      }
14283   } # sample
14284
14285   if ( $o->get('output') =~ /slowlog/i ) {
14286      my $w = new SlowLogWriter();
14287      my $field = $o->get('output') eq 'secure-slowlog' ? 'fingerprint' : '';
14288      $pipeline->add(
14289         name    => '--output slowlog',
14290         process => sub {
14291            my ( $args ) = @_;
14292            my $event = $args->{event};
14293            PTDEBUG && _d('callback: --output slowlog');
14294            $w->write(*STDOUT, $event, $field);
14295            return $args;
14296         },
14297      );
14298   } # print
14299
14300   # Combine "# Log_slow_rate_type: query  Log_slow_rate_limit: 2"
14301   # as rate_limit=>'query:2'.
14302   $pipeline->add(
14303      name    => 'rate limit',
14304      process => sub {
14305         my ( $args ) = @_;
14306         my $event = $args->{event};
14307         PTDEBUG && _d('callback: rate limit');
14308         if ( my $limit = $event->{Log_slow_rate_limit} ) {
14309            $event->{rate_limit} = ($event->{Log_slow_rate_type} || 'session') . ":$limit";
14310            delete $event->{Log_slow_rate_limit};
14311            delete $event->{Log_slow_rate_type};
14312         }
14313         return $args;
14314      },
14315   );
14316
14317
14318   # Finally, add aggregator obj for each groupby attrib to the callbacks.
14319   # These aggregating objs should be the last pipeline processes.
14320   foreach my $i ( 0..$#groupby  ) {
14321      my $groupby = $groupby[$i];
14322
14323      # This shouldn't happen.
14324      die "No --order-by value for --group-by $groupby" unless $orderby[$i];
14325
14326      my ( $orderby_attrib, $orderby_func ) = split(/:/, $orderby[$i]);
14327
14328      # Create an EventAggregator for this groupby attrib and
14329      # add it to callbacks.
14330      my $type_for = {
14331         val           => 'string',
14332         key_print     => 'string',
14333         Status_code   => 'string',
14334         Statement_id  => 'string',
14335         Error_no      => 'string',
14336         Last_errno    => 'string',
14337         Thread_id     => 'string',
14338         InnoDB_trx_id => 'string',
14339         host          => 'string',
14340         ip            => 'string',
14341         port          => 'string',
14342         Killed        => 'bool',
14343         rate_limit    => 'string',
14344      };
14345
14346      my $ea = new EventAggregator(
14347         groupby           => $groupby,
14348         attributes        => { },
14349         worst             => $orderby_attrib,
14350         attrib_limit      => $o->get('attribute-value-limit'),
14351         ignore_attributes => $o->get('ignore-attributes'),
14352         type_for          => $type_for,
14353      );
14354      push @ea, $ea;
14355
14356      $pipeline->add(
14357         name    => "aggregate $groupby",
14358         process => sub {
14359            my ( $args ) = @_;
14360            $ea->aggregate($args->{event});
14361            return $args;
14362         },
14363      );
14364
14365      # If user wants a timeline report, too, then create an EventTimeline
14366      # aggregator for this groupby attrib and add it to the callbacks, too.
14367      if ( $o->get('timeline') ) {
14368         my $tl = new EventTimeline(
14369            groupby    => [$groupby],
14370            attributes => [qw(Query_time ts)],
14371         );
14372         push @tl, $tl;
14373
14374         $pipeline->add(
14375            name    => "timeline $groupby",
14376            process => sub {
14377               my ( $args ) = @_;
14378               $tl->aggregate($args->{event});
14379               return $args;
14380            },
14381         );
14382      }
14383   } # aggregate
14384
14385   # ########################################################################
14386   # Daemonize now that everything is setup and ready to work.
14387   # ########################################################################
14388   my $daemon = Daemon->new(
14389      daemonize => $o->get('daemonize'),
14390      pid_file  => $o->get('pid'),
14391      log_file  => $o->get('log'),
14392   );
14393   $daemon->run();
14394
14395   # ########################################################################
14396   # Do the version-check
14397   # ########################################################################
14398   if ( $o->get('version-check') && (!$o->has('quiet') || !$o->get('quiet')) ) {
14399      VersionCheck::version_check(
14400         force     => $o->got('version-check'),
14401         instances => [
14402            ($qv_dbh ? { dbh => $qv_dbh, dsn => $review_dsn  } : ()),
14403            ($qh_dbh ? { dbh => $qh_dbh, dsn => $history_dsn } : ()),
14404            ($ps_dbh ? { dbh => $ps_dbh, dsn => $ps_dsn      } : ()),
14405         ],
14406      );
14407   }
14408
14409   # ##########################################################################
14410   # Parse the input.
14411   # ##########################################################################
14412
14413   # Pump the pipeline until either no more input, or we're interrupted by
14414   # CTRL-C, or--this shouldn't happen--the pipeline causes an error.  All
14415   # work happens inside the pipeline via the procs we created above.
14416   eval {
14417      $pipeline->execute(
14418         oktorun       => \$oktorun,
14419         pipeline_data => $pipeline_data,
14420         stats         => \%stats,
14421      );
14422   };
14423   if ( $EVAL_ERROR ) {
14424      warn "The pipeline caused an error: $EVAL_ERROR";
14425   }
14426   PTDEBUG && _d("Pipeline data:", Dumper($pipeline_data));
14427
14428   save_resume_offset(
14429      last_event_offset => $parsers[0]->{last_event_offset},
14430   );
14431
14432   # Disconnect all open $dbh's
14433   map {
14434      $dp->disconnect($_);
14435      PTDEBUG && _d('Disconnected dbh', $_);
14436   }
14437   grep { $_ }
14438   ($qv_dbh, $qh_dbh, $ps_dbh, $ep_dbh, $aux_dbh);
14439
14440   return $exit_status;
14441} # End main()
14442
14443# ############################################################################
14444# Subroutines.
14445# ############################################################################
14446
14447sub create_review_tables {
14448   my ( %args ) = @_;
14449   my @required_args = qw(dbh full_table TableParser type);
14450   foreach my $arg ( @required_args ) {
14451      die "I need a $arg argument" unless $args{$arg};
14452   }
14453   my $create_table_sql = $args{create_table_sql};
14454   my ($dbh, $full_table, $tp, $type) = @args{@required_args};
14455
14456   PTDEBUG && _d('Checking --review table', $full_table);
14457
14458   # If the repl db doesn't exit, auto-create it, maybe.
14459   my ($db, $tbl)  = Quoter->split_unquote($full_table);
14460   my $show_db_sql = qq{SHOW DATABASES LIKE '$db'};
14461   PTDEBUG && _d($show_db_sql);
14462   my @db_exists = $dbh->selectrow_array($show_db_sql);
14463   if ( !@db_exists && !$args{create_table} ) {
14464      die "--$type database $db does not exist and "
14465        . "--no-create-$type-table was specified.  You need "
14466        . "to create the database.\n";
14467   }
14468   else {
14469      # Even if the db already exists, do this in case it does not exist
14470      # on a slave.
14471      my $create_db_sql
14472         = "CREATE DATABASE IF NOT EXISTS "
14473         . Quoter->quote($db)
14474         . " /* $tool */";
14475      PTDEBUG && _d($create_db_sql);
14476      eval {
14477         $dbh->do($create_db_sql);
14478      };
14479      if ( $EVAL_ERROR && !@db_exists ) {
14480         warn $EVAL_ERROR;
14481         die "--$type database $db does not exist and it cannot be "
14482            . "created automatically.  You need to create the database.\n";
14483      }
14484   }
14485
14486   # USE the correct db
14487   my $sql = "USE " . Quoter->quote($db);
14488   PTDEBUG && _d($sql);
14489   $dbh->do($sql);
14490
14491   # Check if the table exists; if not, create it, maybe.
14492   my $tbl_exists = $tp->check_table(
14493      dbh => $dbh,
14494      db  => $db,
14495      tbl => $tbl,
14496   );
14497
14498   PTDEBUG && _d('Table exists: ', $tbl_exists ? 'yes' : 'no');
14499
14500   if ( !$tbl_exists && !$args{create_table} ) {
14501      die "Table $full_table does not exist and "
14502        . "--no-create-$type-table was specified.  "
14503        . "You need to create the table.\n";
14504   }
14505   else {
14506      PTDEBUG && _d($dbh, $create_table_sql);
14507      eval {
14508         $dbh->do($create_table_sql);
14509      };
14510      if ( $EVAL_ERROR && !$args{create_table} ) {
14511         warn $EVAL_ERROR;
14512         die "--$type history table $full_table does not exist and it cannot be "
14513           . "created automatically.  You need to create the table.\n"
14514      }
14515   }
14516}
14517
14518# TODO: This sub is poorly named since it does more than print reports:
14519# it aggregates, reports, does QueryReview stuff, etc.
14520sub print_reports {
14521   my ( %args ) = @_;
14522   my @required_args = qw(eas OptionParser);
14523   foreach my $arg ( @required_args ) {
14524      die "I need a $arg argument" unless $args{$arg};
14525   }
14526
14527   my ($o, $qv, $pipeline) = @args{qw(OptionParser QueryReview Pipeline)};
14528   my ($eas, $tls, $stats) = @args{qw(eas tls stats)};
14529   my $qh                  = $args{QueryHistory};
14530
14531   my @reports = @{$o->get('report-format')};
14532   my @groupby = @{$args{groupby}};
14533   my @orderby = @{$args{orderby}};
14534
14535   my $show_all = $o->get('show-all');
14536
14537   for my $i ( 0..$#groupby ) {
14538      if ( $o->get('report') || $qv || $qh ) {
14539         $eas->[$i]->calculate_statistical_metrics();
14540      }
14541
14542      my ($orderby_attrib, $orderby_func) = split(/:/, $orderby[$i]);
14543      $orderby_attrib = check_orderby_attrib($orderby_attrib, $eas->[$i], $o);
14544      PTDEBUG && _d('Doing reports for groupby', $groupby[$i], 'orderby',
14545         $orderby_attrib, $orderby_func);
14546
14547      my ($worst, $other) = get_worst_queries(
14548         OptionParser   => $o,
14549         ea             => $eas->[$i],
14550         orderby_attrib => $orderby_attrib,
14551         orderby_func   => $orderby_func,
14552         limit          => $o->get('limit')->[$i] || '95%:20',
14553         outliers       => $o->get('outliers')->[$i],
14554      );
14555
14556      if ( $o->get('report') ) {
14557         # XXX There's a bug here: --expected-range '','' will cause
14558         # Use of uninitialized value in numeric lt (<)
14559         # This bug is intentionally left unfixed at the moment because
14560         # we exploit it to test a more serious bug: an infinite loop:
14561         # https://bugs.launchpad.net/percona-toolkit/+bug/888114
14562         my $expected_range = $o->get('expected-range');
14563         my $explain_why    = $expected_range
14564                            && (   @$worst < $expected_range->[0]
14565                                || @$worst > $expected_range->[1]);
14566
14567         # Print a header for this groupby/class if we're doing the
14568         # standard query report and there's more than one class or
14569         # there's one class but it's not the normal class grouped
14570         # by fingerprint.
14571         my $print_header = 0;
14572         if ( (grep { $_ eq 'query_report'; } @{$o->get('report-format')})
14573              && (@groupby > 1 || $groupby[$i] ne 'fingerprint') ) {
14574            $print_header = 1;
14575         }
14576
14577         my $report_class = $o->get('output') =~ m/^json/i
14578                          ? 'JSONReportFormatter'
14579                          : 'QueryReportFormatter';
14580         my $qrf = $report_class->new(
14581            dbh                 => $ep_dbh,
14582            QueryReview         => $args{QueryReview},
14583            QueryRewriter       => $args{QueryRewriter},
14584            OptionParser        => $args{OptionParser},
14585            QueryParser         => $args{QueryParser},
14586            Quoter              => $args{Quoter},
14587            show_all            => $show_all,
14588            max_hostname_length => $o->get('max-hostname-length'),
14589            max_line_length     => $o->get('max-line-length'),
14590         );
14591
14592         $qrf->print_reports(
14593            reports      => \@reports,
14594            ea           => $eas->[$i],
14595            worst        => $worst,
14596            other        => $other,
14597            orderby      => $orderby_attrib,
14598            groupby      => $groupby[$i],
14599            print_header => $print_header,
14600            explain_why  => $explain_why,
14601            files        => $args{files},
14602            log_type     => $o->get('type')->[0],
14603            no_v_format  => !$o->get('vertical-format'),
14604            variations   => $o->get('variations'),
14605            group        => { map { $_=>1 } qw(rusage date hostname files header) },
14606            resume       => $resume,
14607            anon         => $o->get('output') eq 'json-anon',
14608         );
14609      }
14610
14611      if ( $qv ) {  # query review
14612         update_query_review_table(
14613            ea           => $eas->[$i],
14614            worst        => $worst,
14615            QueryReview  => $qv,
14616         );
14617      }
14618      if ( $qh ) { # query history
14619         update_query_history_table(
14620            ea           => $eas->[$i],
14621            worst        => $worst,
14622            QueryHistory => $qh,
14623         );
14624      }
14625
14626      if ( $o->get('timeline') ) {  # --timeline
14627         $tls->[$i]->report($tls->[$i]->results(), sub { print @_ });
14628         $tls->[$i]->reset_aggregated_data();
14629      }
14630
14631      $eas->[$i]->reset_aggregated_data();  # Reset for next iteration.
14632
14633      # Print header report only once.  So remove it from the
14634      # list of reports after the first groupby's reports.
14635      if ( $i == 0 ) {
14636         @reports = grep { $_ ne 'header' } @reports;
14637      }
14638
14639   } # Each groupby
14640
14641   if ( PTDEBUG ) {
14642      my $report = new ReportFormatter(
14643         line_width => 74,
14644      );
14645      $report->set_columns(
14646         { name => 'Process'                   },
14647         { name => 'Time',  right_justify => 1 },
14648         { name => 'Count', right_justify => 1 },
14649      );
14650      $report->title('Pipeline profile');
14651      my $instrument = $pipeline->instrumentation;
14652      my $total_time = $instrument->{Pipeline};
14653      foreach my $process_name ( $pipeline->processes() ) {
14654         my $t    = $instrument->{$process_name}->{time} || 0;
14655         my $tp   = sprintf('%.2f %4.1f%%', $t, $t / ($total_time || 1) * 100);
14656         $report->add_line($process_name, $tp,
14657            $instrument->{$process_name}->{count} || 0);
14658      }
14659      # Reset profile for next iteration.
14660      $pipeline->reset();
14661
14662      _d($report->get_report());
14663   }
14664
14665   return;
14666}
14667
14668# Catches signals so we can exit gracefully.
14669sub sig_int {
14670   my ( $signal ) = @_;
14671   if ( $oktorun ) {
14672      print STDERR "# Caught SIG$signal.\n";
14673      $oktorun = 0;
14674   }
14675   else {
14676      print STDERR "# Exiting on SIG$signal.\n";
14677      save_resume_offset();
14678      exit(1);
14679   }
14680}
14681
14682# Handle the special defaults for --review & --history
14683sub handle_special_defaults {
14684   my ($o, $opt) = @_;
14685   my $dsn = $o->get($opt);
14686   return unless $dsn;
14687
14688   my $para = $o->read_para_after(
14689      __FILE__, qr/MAGIC_default_${opt}_table/);
14690   my ($default_table) = $para =~ m/default table is C<([^>]+)>/;
14691   die "Error parsing special default for --$opt"
14692      unless $default_table;
14693   my ($D, $t) = Quoter->split_unquote($default_table);
14694   $dsn->{D} ||= $D;
14695   $dsn->{t} ||= $t;
14696
14697   return $dsn;
14698}
14699
14700sub make_alt_attrib {
14701   my ( $alt_attrib ) = @_;
14702   my @alts   = split('\|', $alt_attrib);
14703   my $attrib = shift @alts;
14704   PTDEBUG && _d('Primary attrib:', $attrib, 'aliases:', @alts);
14705   my @lines;
14706   push @lines,
14707      'sub { my ( $args ) = @_; ',
14708      'my $event = $args->{event}; ',
14709      "if ( exists \$event->{'$attrib'} ) { ",
14710      (map { "delete \$event->{'$_'}; "; } @alts),
14711      'return $args; }',
14712      # Primary attrib doesn't exist; look for alts
14713      (map {
14714         "if ( exists \$event->{'$_'} ) { "
14715         . "\$event->{'$attrib'} = \$event->{'$_'}; "
14716         . "delete \$event->{'$_'}; "
14717         . 'return $args; }';
14718      } @alts),
14719      'return $args; }';
14720   PTDEBUG && _d('attrib alias sub for', $attrib, ':', @lines);
14721   my $sub = eval join("\n", @lines);
14722   die if $EVAL_ERROR;
14723   return $sub;
14724}
14725
14726# Checks that the orderby attrib exists in the ea, returns the default
14727# orderby attrib if not.
14728sub check_orderby_attrib {
14729   my ( $orderby_attrib, $ea, $o ) = @_;
14730
14731   if ( !$ea->type_for($orderby_attrib) && $orderby_attrib ne 'Query_time' ) {
14732      my $default_orderby = $o->get_defaults()->{'order-by'};
14733
14734      # Print the notice only if the query report is being printed, too.
14735      if ( grep { $_ eq 'query_report' } @{$o->get('report-format')} ) {
14736         print "--order-by attribute $orderby_attrib doesn't exist, "
14737            . "using $default_orderby\n";
14738      }
14739
14740      # Fall back to the default orderby attrib.
14741      ( $orderby_attrib, undef ) = split(/:/, $default_orderby);
14742   }
14743
14744   PTDEBUG && _d('orderby attrib:', $orderby_attrib);
14745   return $orderby_attrib;
14746}
14747
14748# Read the fh and timeout after t seconds.
14749sub read_timeout {
14750   my ( $fh, $t ) = @_;
14751   return unless $fh;
14752   $t ||= 0;  # will reset alarm and cause read to wait forever
14753
14754   # Set the SIGALRM handler.
14755   my $mask   = POSIX::SigSet->new(&POSIX::SIGALRM);
14756   my $action = POSIX::SigAction->new(
14757      sub {
14758         # This sub is called when a SIGALRM is received.
14759         die 'read timeout';
14760      },
14761      $mask,
14762   );
14763   my $oldaction = POSIX::SigAction->new();
14764   sigaction(&POSIX::SIGALRM, $action, $oldaction);
14765
14766   my $res;
14767   eval {
14768      alarm $t;
14769      $res = <$fh>;
14770      alarm 0;
14771   };
14772   if ( $EVAL_ERROR ) {
14773      PTDEBUG && _d('Read error:', $EVAL_ERROR);
14774      die $EVAL_ERROR unless $EVAL_ERROR =~ m/read timeout/;
14775      $oktorun = 0;
14776      $res     = undef;  # res is a blank string after a timeout
14777   }
14778   return $res;
14779}
14780
14781sub get_cxn {
14782   my ( %args ) = @_;
14783   my @required_args = qw(dsn OptionParser DSNParser);
14784   foreach my $arg ( @required_args ) {
14785      die "I need a $arg argument" unless $args{$arg};
14786   }
14787   my ($dsn, $o, $dp) = @args{@required_args};
14788
14789   if ( $o->get('ask-pass') ) {
14790      $dsn->{p} = OptionParser::prompt_noecho("Enter password "
14791         . ($args{for} ? "for $args{for}: " : ": "));
14792   }
14793
14794   my $dbh = $dp->get_dbh($dp->get_cxn_params($dsn), $args{opts});
14795   PTDEBUG && _d('Connected dbh', $dbh);
14796   return $dbh;
14797}
14798
14799sub get_worst_queries {
14800   my ( %args ) = @_;
14801   my $o              = $args{OptionParser};
14802   my $ea             = $args{ea};
14803   my $orderby_attrib = $args{orderby_attrib};
14804   my $orderby_func   = $args{orderby_func};
14805   my $limit          = $args{limit};
14806   my $outliers       = $args{outliers};
14807
14808   # We don't report on all queries, just the worst, i.e. the top
14809   # however many.
14810   my ($total, $count);
14811   if ( $limit =~ m/^\d+$/ ) {
14812      $count = $limit;
14813   }
14814   else {
14815      # It's a percentage, so grab as many as needed to get to
14816      # that % of the file.
14817      ($total, $count) = $limit =~ m/(\d+)/g;
14818      $total *= ($ea->results->{globals}->{$orderby_attrib}->{sum} || 0) / 100;
14819   }
14820   my %top_spec = (
14821      attrib  => $orderby_attrib,
14822      orderby => $orderby_func || 'cnt',
14823      total   => $total,
14824      count   => $count,
14825   );
14826   if ( $args{outliers} ) {
14827      @top_spec{qw(ol_attrib ol_limit ol_freq)}
14828         = split(/:/, $args{outliers});
14829   }
14830
14831   # The queries that will be reported.
14832   return $ea->top_events(%top_spec);
14833}
14834
14835sub update_query_review_table {
14836   my ( %args ) = @_;
14837   foreach my $arg ( qw(ea worst QueryReview) ) {
14838      die "I need a $arg argument" unless $args{$arg};
14839   }
14840   my $ea    = $args{ea};
14841   my $worst = $args{worst};
14842   my $qv    = $args{QueryReview};
14843
14844   my $attribs = $ea->get_attributes();
14845
14846   PTDEBUG && _d('Updating query review tables');
14847
14848   foreach my $worst_info ( @$worst ) {
14849      my $item        = $worst_info->[0];
14850      my $stats       = $ea->results->{classes}->{$item};
14851      my $sample      = $ea->results->{samples}->{$item};
14852      my $review_vals = $qv->get_review_info($item);
14853      $qv->set_review_info(
14854         fingerprint => $item,
14855         sample      => $sample->{arg} || '',
14856         first_seen  => $stats->{ts}->{min},
14857         last_seen   => $stats->{ts}->{max}
14858      );
14859   }
14860
14861   return;
14862}
14863
14864sub update_query_history_table {
14865   my ( %args ) = @_;
14866   foreach my $arg ( qw(ea worst QueryHistory) ) {
14867      die "I need a $arg argument" unless $args{$arg};
14868   }
14869   my $ea    = $args{ea};
14870   my $worst = $args{worst};
14871   my $qh    = $args{QueryHistory};
14872
14873   my $attribs = $ea->get_attributes();
14874
14875   PTDEBUG && _d('Updating query review tables');
14876
14877   foreach my $worst_info ( @$worst ) {
14878      my $item        = $worst_info->[0];
14879      my $sample      = $ea->results->{samples}->{$item};
14880
14881      my %history;
14882      foreach my $attrib ( @$attribs ) {
14883         $history{$attrib} = $ea->metrics(
14884            attrib => $attrib,
14885            where  => $item,
14886         );
14887      }
14888      $qh->set_review_history(
14889         $item, $sample->{arg} || '', %history);
14890   }
14891
14892   return;
14893}
14894
14895
14896# Sub: verify_run_time
14897#   Verify that the given run mode and run time are valid.  If the run mode
14898#   is "interval", the time boundary (in seconds) for the run time is returned
14899#   if valid.  Else, undef is returned because modes "clock" and "event" have
14900#   no boundaries that need to be verified.  In any case the sub will die if
14901#   something is invalid, so the caller should eval their call.  The eval
14902#   error message is suitable for <OptionParser::save_error()>.
14903#
14904# Parameters:
14905#   %args - Arguments
14906#
14907# Required Arguments:
14908#   run_mode - Name of run mode (e.g. "clock", "event" or "interval")
14909#   run_time - Run time in seconds
14910#
14911# Returns:
14912#   Time boundary in seconds if run mode and time are valid; dies if
14913#   they are not.  Time boundary is undef except for interval run mode.
14914sub verify_run_time {
14915   my ( %args ) = @_;
14916   my $run_mode = lc $args{run_mode};
14917   my $run_time = defined $args{run_time} ? lc $args{run_time} : undef;
14918   PTDEBUG && _d("Verifying run time mode", $run_mode, "and time", $run_time);
14919
14920   die "Invalid --run-time-mode: $run_mode\n"
14921      unless $run_mode =~ m/clock|event|interval/;
14922
14923   if ( defined $run_time && $run_time < 0 ) {
14924      die "--run-time must be greater than zero\n";
14925   }
14926
14927   my $boundary;
14928   if ( $run_mode eq 'interval' ) {
14929      if ( !defined $run_time || $run_time <= 0 ) {
14930         die "--run-time must be greater than zero for "
14931            . "--run-time-mode $run_mode\n";
14932      }
14933
14934      if ( $run_time > 86400 ) {  # 1 day
14935         # Make sure run time is a whole day and not something like 25h.
14936         if ( $run_time % 86400 ) {
14937            die "Invalid --run-time argument for --run-time-mode $run_mode; "
14938            . "see documentation.\n"
14939         }
14940         $boundary = $run_time;
14941      }
14942      else {
14943         # If run time is sub-minute (some amount of seconds), it should
14944         # divide evenly into minute boundaries.  If it's sub-minute
14945         # (some amount of minutes), it should divide evenly into hour
14946         # boundaries.  If it's sub-hour, it should divide eventy into
14947         # day boundaries.
14948         $boundary = $run_time <= 60   ? 60     # seconds divide into minutes
14949                   : $run_time <= 3600 ? 3600   # minutes divide into hours
14950                   :                     86400; # hours divide into days
14951         if ( $boundary % $run_time ) {
14952            die "Invalid --run-time argument for --run-time-mode $run_mode; "
14953               . "see documentation.\n"
14954         }
14955      }
14956   }
14957
14958   return $boundary;
14959}
14960
14961sub save_resume_offset {
14962   my (%args) = @_;
14963   my $last_event_offset = $args{last_event_offset};
14964
14965   if ( !$resume_file || !$offset ) {
14966      PTDEBUG && _d('Not saving resume offset because there is no '
14967         . 'resume file or offset:', $resume_file, $offset);
14968      return;
14969   }
14970
14971   PTDEBUG && _d('Saving resume at offset', $offset, 'to', $resume_file);
14972   open my $resume_fh, '>', $resume_file
14973      or die "Error opening $resume_file: $OS_ERROR";
14974
14975   if ( $resume->{simple} ) {
14976      print { $resume_fh } $offset, "\n";
14977      warn "\n# Saved resume file offset $offset to $resume_file\n";
14978   }
14979   else {
14980      # 2.2.3+ enhanced resume file
14981      $resume->{stop_offset} = defined $last_event_offset ? $last_event_offset
14982                             :                              $offset;
14983      foreach my $key ( sort keys %$resume ) {
14984         next if $key eq 'simple';
14985         print { $resume_fh } "$key=$resume->{$key}\n";
14986      }
14987      warn "\n# Saved resume file stop_offset $resume->{stop_offset} to "
14988         . "$resume_file\n";
14989   }
14990
14991   close $resume_fh
14992      or die "Error close $resume_file: $OS_ERROR";
14993
14994   return;
14995}
14996
14997sub sanitize_event {
14998   my ($event) = @_;
14999
15000   # Quoted and unquoted values should be treated the same
15001   # https://bugs.launchpad.net/percona-toolkit/+bug/1176010
15002   if ( $event->{db} ) {
15003      $event->{db} =~ s/^`//;
15004      $event->{db} =~ s/`$//;
15005   }
15006   if ( $event->{Schema} ) {
15007      $event->{Schema} =~ s/^`//;
15008      $event->{Schema} =~ s/`$//;
15009   }
15010
15011   return;
15012}
15013
15014# make an effort to check if file is a raw binlog
15015# (i.e. was not converted to text using mysqlbinlog)
15016sub is_raw_binlog {
15017   my $filename = shift;
15018
15019   return -B $filename;
15020}
15021
15022sub _d {
15023   my ($package, undef, $line) = caller 0;
15024   @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
15025        map { defined $_ ? $_ : 'undef' }
15026        @_;
15027   print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
15028}
15029
15030# ############################################################################
15031# Run the program.
15032# ############################################################################
15033if ( !caller ) { exit main(@ARGV); }
15034
150351; # Because this is a module as well as a script.
15036
15037# #############################################################################
15038# Documentation.
15039# #############################################################################
15040
15041=pod
15042
15043=head1 NAME
15044
15045pt-query-digest - Analyze MySQL queries from logs, processlist, and tcpdump.
15046
15047=head1 SYNOPSIS
15048
15049Usage: pt-query-digest [OPTIONS] [FILES] [DSN]
15050
15051pt-query-digest analyzes MySQL queries from slow, general, and binary log
15052files.  It can also analyze queries from C<SHOW PROCESSLIST> and MySQL
15053protocol data from tcpdump.  By default, queries are grouped by fingerprint
15054and reported in descending order of query time (i.e. the slowest queries
15055first).  If no C<FILES> are given, the tool reads C<STDIN>.  The optional
15056C<DSN> is used for certain options like L<"--since"> and L<"--until">.
15057
15058Report the slowest queries from C<slow.log>:
15059
15060   pt-query-digest slow.log
15061
15062Report the slowest queries from the processlist on host1:
15063
15064   pt-query-digest --processlist h=host1
15065
15066Capture MySQL protocol data with tcppdump, then report the slowest queries:
15067
15068   tcpdump -s 65535 -x -nn -q -tttt -i any -c 1000 port 3306 > mysql.tcp.txt
15069
15070   pt-query-digest --type tcpdump mysql.tcp.txt
15071
15072Save query data from C<slow.log> to host2 for later review and trend analysis:
15073
15074   pt-query-digest --review h=host2 --no-report slow.log
15075
15076=head1 RISKS
15077
15078Percona Toolkit is mature, proven in the real world, and well tested,
15079but all database tools can pose a risk to the system and the database
15080server.  Before using this tool, please:
15081
15082=over
15083
15084=item * Read the tool's documentation
15085
15086=item * Review the tool's known L<"BUGS">
15087
15088=item * Test the tool on a non-production server
15089
15090=item * Backup your production server and verify the backups
15091
15092=back
15093
15094=head1 DESCRIPTION
15095
15096pt-query-digest is a sophisticated but easy to use tool for analyzing
15097MySQL queries.  It can analyze queries from MySQL slow, general, and binary
15098logs. (Binary logs must first be converted to text, see L<"--type">).
15099It can also use C<SHOW PROCESSLIST> and MySQL protocol data from tcpdump.
15100By default, the tool reports which queries are the slowest, and therefore
15101the most important to optimize.  More complex and custom-tailored reports
15102can be created by using options like L<"--group-by">, L<"--filter">, and
15103L<"--embedded-attributes">.
15104
15105Query analysis is a best-practice that should be done frequently.  To
15106make this easier, pt-query-digest has two features: query review
15107(L<"--review">) and query history (L<"--history">).  When the L<"--review">
15108option is used, all unique queries are saved to a database.  When the
15109tool is ran again with L<"--review">, queries marked as reviewed in
15110the database are not printed in the report.  This highlights new queries
15111that need to be reviewed.  When the L<"--history"> option is used,
15112query metrics (query time, lock time, etc.) for each unique query are
15113saved to database.  Each time the tool is ran with L<"--history">, the
15114more historical data is saved which can be used to trend and analyze
15115query performance over time.
15116
15117=head1 ATTRIBUTES
15118
15119pt-query-digest works on events, which are a collection of key-value pairs
15120called attributes.  You'll recognize most of the attributes right away:
15121C<Query_time>, C<Lock_time>, and so on.  You can just look at a slow log
15122and see them.  However, there are some that don't exist in the slow log,
15123and slow logs may actually include different kinds of attributes (for example,
15124you may have a server with the Percona patches).
15125
15126See L<"ATTRIBUTES REFERENCE"> near the end of this documentation for a list
15127of common and L<"--type"> specific attributes.  A familiarity with these
15128attributes is necessary for working with L<"--filter">,
15129L<"--ignore-attributes">, and other attribute-related options.
15130
15131With creative use of L<"--filter">, you can create new attributes derived
15132from existing attributes.  For example, to create an attribute called
15133C<Row_ratio> for examining the ratio of C<Rows_sent> to C<Rows_examined>,
15134specify a filter like:
15135
15136  --filter '($event->{Row_ratio} = $event->{Rows_sent} / ($event->{Rows_examined})) && 1'
15137
15138The C<&& 1> trick is needed to create a valid one-line syntax that is always
15139true, even if the assignment happens to evaluate false.  The new attribute will
15140automatically appears in the output:
15141
15142  # Row ratio        1.00    0.00      1    0.50      1    0.71    0.50
15143
15144Attributes created this way can be specified for L<"--order-by"> or any
15145option that requires an attribute.
15146
15147=head1 OUTPUT
15148
15149The default L<"--output"> is a query analysis report.  The L<"--[no]report">
15150option controls whether or not this report is printed.  Sometimes you may
15151want to parse all the queries but suppress the report, for example when using
15152L<"--review"> or L<"--history">.
15153
15154There is one paragraph for each class of query analyzed.  A "class" of queries
15155all have the same value for the L<"--group-by"> attribute which is
15156C<fingerprint> by default.  (See L<"ATTRIBUTES">.)  A fingerprint is an
15157abstracted version of the query text with literals removed, whitespace
15158collapsed, and so forth.  The report is formatted so it's easy to paste into
15159emails without wrapping, and all non-query lines begin with a comment, so you
15160can save it to a .sql file and open it in your favorite syntax-highlighting
15161text editor.  There is a response-time profile at the beginning.
15162
15163The output described here is controlled by L<"--report-format">.
15164That option allows you to specify what to print and in what order.
15165The default output in the default order is described here.
15166
15167The report, by default, begins with a paragraph about the entire analysis run
15168The information is very similar to what you'll see for each class of queries in
15169the log, but it doesn't have some information that would be too expensive to
15170keep globally for the analysis.  It also has some statistics about the code's
15171execution itself, such as the CPU and memory usage, the local date and time
15172of the run, and a list of input file read/parsed.
15173
15174Following this is the response-time profile over the events.  This is a
15175highly summarized view of the unique events in the detailed query report
15176that follows.  It contains the following columns:
15177
15178 Column        Meaning
15179 ============  ==========================================================
15180 Rank          The query's rank within the entire set of queries analyzed
15181 Query ID      The query's fingerprint
15182 Response time The total response time, and percentage of overall total
15183 Calls         The number of times this query was executed
15184 R/Call        The mean response time per execution
15185 V/M           The Variance-to-mean ratio of response time
15186 Item          The distilled query
15187
15188A final line whose rank is shown as MISC contains aggregate statistics on the
15189queries that were not included in the report, due to options such as
15190L<"--limit"> and L<"--outliers">.  For details on the variance-to-mean ratio,
15191please see http://en.wikipedia.org/wiki/Index_of_dispersion.
15192
15193Next, the detailed query report is printed.  Each query appears in a paragraph.
15194Here is a sample, slightly reformatted so 'perldoc' will not wrap lines in a
15195terminal.  The following will all be one paragraph, but we'll break it up for
15196commentary.
15197
15198 # Query 2: 0.01 QPS, 0.02x conc, ID 0xFDEA8D2993C9CAF3 at byte 160665
15199
15200This line identifies the sequential number of the query in the sort order
15201specified by L<"--order-by">.  Then there's the queries per second, and the
15202approximate concurrency for this query (calculated as a function of the timespan
15203and total Query_time).  Next there's a query ID.  This ID is a hex version of
15204the query's checksum in the database, if you're using L<"--review">.  You can
15205select the reviewed query's details from the database with a query like C<SELECT
15206.... WHERE checksum=0xFDEA8D2993C9CAF3>.
15207
15208If you are investigating the report and want to print out every sample of a
15209particular query, then the following L<"--filter"> may be helpful:
15210
15211   pt-query-digest slow.log           \
15212      --no-report                     \
15213      --output slowlog                \
15214      --filter '$event->{fingerprint} \
15215           && make_checksum($event->{fingerprint}) eq "FDEA8D2993C9CAF3"'
15216
15217Notice that you must remove the C<0x> prefix from the checksum.
15218
15219Finally, in case you want to find a sample of the query in the log file, there's
15220the byte offset where you can look.  (This is not always accurate, due to some
15221anomalies in the slow log format, but it's usually right.)  The position
15222refers to the worst sample, which we'll see more about below.
15223
15224Next is the table of metrics about this class of queries.
15225
15226 #           pct   total    min    max     avg     95%  stddev  median
15227 # Count       0       2
15228 # Exec time  13   1105s   552s   554s    553s    554s      2s    553s
15229 # Lock time   0   216us   99us  117us   108us   117us    12us   108us
15230 # Rows sent  20   6.26M  3.13M  3.13M   3.13M   3.13M   12.73   3.13M
15231 # Rows exam   0   6.26M  3.13M  3.13M   3.13M   3.13M   12.73   3.13M
15232
15233The first line is column headers for the table.  The percentage is the percent
15234of the total for the whole analysis run, and the total is the actual value of
15235the specified metric.  For example, in this case we can see that the query
15236executed 2 times, which is 13% of the total number of queries in the file.  The
15237min, max and avg columns are self-explanatory.  The 95% column shows the 95th
15238percentile; 95% of the values are less than or equal to this value.  The
15239standard deviation shows you how tightly grouped the values are.  The standard
15240deviation and median are both calculated from the 95th percentile, discarding
15241the extremely large values.
15242
15243The stddev, median and 95th percentile statistics are approximate.  Exact
15244statistics require keeping every value seen, sorting, and doing some
15245calculations on them.  This uses a lot of memory.  To avoid this, we keep 1000
15246buckets, each of them 5% bigger than the one before, ranging from .000001 up to
15247a very big number.  When we see a value we increment the bucket into which it
15248falls.  Thus we have fixed memory per class of queries.  The drawback is the
15249imprecision, which typically falls in the 5 percent range.
15250
15251Next we have statistics on the users, databases and time range for the query.
15252
15253 # Users       1   user1
15254 # Databases   2     db1(1), db2(1)
15255 # Time range 2008-11-26 04:55:18 to 2008-11-27 00:15:15
15256
15257The users and databases are shown as a count of distinct values, followed by the
15258values.  If there's only one, it's shown alone; if there are many, we show each
15259of the most frequent ones, followed by the number of times it appears.
15260
15261 # Query_time distribution
15262 #   1us
15263 #  10us
15264 # 100us
15265 #   1ms
15266 #  10ms  #####
15267 # 100ms  ####################
15268 #    1s  ##########
15269 #  10s+
15270
15271The execution times show a logarithmic chart of time clustering.  Each query
15272goes into one of the "buckets" and is counted up.  The buckets are powers of
15273ten.  The first bucket is all values in the "single microsecond range" -- that
15274is, less than 10us.  The second is "tens of microseconds," which is from 10us
15275up to (but not including) 100us; and so on.  The charted attribute can be
15276changed by specifying L<"--report-histogram"> but is limited to time-based
15277attributes.
15278
15279 # Tables
15280 #    SHOW TABLE STATUS LIKE 'table1'\G
15281 #    SHOW CREATE TABLE `table1`\G
15282 # EXPLAIN
15283 SELECT * FROM table1\G
15284
15285This section is a convenience: if you're trying to optimize the queries you see
15286in the slow log, you probably want to examine the table structure and size.
15287These are copy-and-paste-ready commands to do that.
15288
15289Finally, we see a sample of the queries in this class of query.  This is not a
15290random sample.  It is the query that performed the worst, according to the sort
15291order given by L<"--order-by">.  You will normally see a commented C<# EXPLAIN>
15292line just before it, so you can copy-paste the query to examine its EXPLAIN
15293plan. But for non-SELECT queries that isn't possible to do, so the tool tries to
15294transform the query into a roughly equivalent SELECT query, and adds that below.
15295
15296If you want to find this sample event in the log, use the offset mentioned
15297above, and something like the following:
15298
15299  tail -c +<offset> /path/to/file | head
15300
15301See also L<"--report-format">.
15302
15303=head1 QUERY REVIEW
15304
15305A query L<"--review"> is the process of storing all the query fingerprints
15306analyzed.  This has several benefits:
15307
15308=over
15309
15310=item *
15311
15312You can add metadata to classes of queries, such as marking them for follow-up,
15313adding notes to queries, or marking them with an issue ID for your issue
15314tracking system.
15315
15316=item *
15317
15318You can refer to the stored values on subsequent runs so you'll know whether
15319you've seen a query before.  This can help you cut down on duplicated work.
15320
15321=item *
15322
15323You can store historical data such as the row count, query times, and generally
15324anything you can see in the report.
15325
15326=back
15327
15328To use this feature, you run pt-query-digest with the L<"--review"> option.  It
15329will store the fingerprints and other information into the table you specify.
15330Next time you run it with the same option, it will do the following:
15331
15332=over
15333
15334=item *
15335
15336It won't show you queries you've already reviewed.  A query is considered to be
15337already reviewed if you've set a value for the C<reviewed_by> column.  (If you
15338want to see queries you've already reviewed, use the L<"--report-all"> option.)
15339
15340=item *
15341
15342Queries that you've reviewed, and don't appear in the output, will cause gaps in
15343the query number sequence in the first line of each paragraph.  And the value
15344you've specified for L<"--limit"> will still be honored.  So if you've reviewed all
15345queries in the top 10 and you ask for the top 10, you won't see anything in the
15346output.
15347
15348=item *
15349
15350If you want to see the queries you've already reviewed, you can specify
15351L<"--report-all">.  Then you'll see the normal analysis output, but you'll
15352also see the information from the review table, just below the execution time
15353graph.  For example,
15354
15355  # Review information
15356  #      comments: really bad IN() subquery, fix soon!
15357  #    first_seen: 2008-12-01 11:48:57
15358  #   jira_ticket: 1933
15359  #     last_seen: 2008-12-18 11:49:07
15360  #      priority: high
15361  #   reviewed_by: xaprb
15362  #   reviewed_on: 2008-12-18 15:03:11
15363
15364This metadata is useful because, as you analyze your queries, you get
15365your comments integrated right into the report.
15366
15367=back
15368
15369=head1 FINGERPRINTS
15370
15371A query fingerprint is the abstracted form of a query, which makes it possible
15372to group similar queries together.  Abstracting a query removes literal values,
15373normalizes whitespace, and so on.  For example, consider these two queries:
15374
15375  SELECT name, password FROM user WHERE id='12823';
15376  select name,   password from user
15377     where id=5;
15378
15379Both of those queries will fingerprint to
15380
15381  select name, password from user where id=?
15382
15383Once the query's fingerprint is known, we can then talk about a query as though
15384it represents all similar queries.
15385
15386What C<pt-query-digest> does is analogous to a GROUP BY statement in SQL.  (But
15387note that "multiple columns" doesn't define a multi-column grouping; it defines
15388multiple reports!) If your command-line looks like this,
15389
15390  pt-query-digest               \
15391      --group-by fingerprint    \
15392      --order-by Query_time:sum \
15393      --limit 10                \
15394      slow.log
15395
15396The corresponding pseudo-SQL looks like this:
15397
15398  SELECT WORST(query BY Query_time), SUM(Query_time), ...
15399  FROM /path/to/slow.log
15400  GROUP BY FINGERPRINT(query)
15401  ORDER BY SUM(Query_time) DESC
15402  LIMIT 10
15403
15404You can also use the value C<distill>, which is a kind of super-fingerprint.
15405See L<"--group-by"> for more.
15406
15407Query fingerprinting accommodates many special cases, which have proven
15408necessary in the real world.  For example, an C<IN> list with 5 literals
15409is really equivalent to one with 4 literals, so lists of literals are
15410collapsed to a single one.  If you find something that is not fingerprinted
15411properly, please submit a bug report with a reproducible test case.
15412
15413Here is a list of transformations during fingerprinting, which might not
15414be exhaustive:
15415
15416=over
15417
15418=item *
15419
15420Group all SELECT queries from mysqldump together, even if they are against
15421different tables.  The same applies to all queries from pt-table-checksum.
15422
15423=item *
15424
15425Shorten multi-value INSERT statements to a single VALUES() list.
15426
15427=item *
15428
15429Strip comments.
15430
15431=item *
15432
15433Abstract the databases in USE statements, so all USE statements are grouped
15434together.
15435
15436=item *
15437
15438Replace all literals, such as quoted strings.  For efficiency, the code that
15439replaces literal numbers is somewhat non-selective, and might replace some
15440things as numbers when they really are not.  Hexadecimal literals are also
15441replaced.  NULL is treated as a literal.  Numbers embedded in identifiers are
15442also replaced, so tables named similarly will be fingerprinted to the same
15443values (e.g. users_2009 and users_2010 will fingerprint identically).
15444
15445=item *
15446
15447Collapse all whitespace into a single space.
15448
15449=item *
15450
15451Lowercase the entire query.
15452
15453=item *
15454
15455Replace all literals inside of IN() and VALUES() lists with a single
15456placeholder, regardless of cardinality.
15457
15458=item *
15459
15460Collapse multiple identical UNION queries into a single one.
15461
15462=back
15463
15464=head1 OPTIONS
15465
15466This tool accepts additional command-line arguments.  Refer to the
15467L<"SYNOPSIS"> and usage information for details.
15468
15469=over
15470
15471=item --ask-pass
15472
15473Prompt for a password when connecting to MySQL.
15474
15475=item --attribute-aliases
15476
15477type: array; default: db|Schema
15478
15479List of attribute|alias,etc.
15480
15481Certain attributes have multiple names, like db and Schema.  If an event does
15482not have the primary attribute, pt-query-digest looks for an alias attribute.
15483If it finds an alias, it creates the primary attribute with the alias
15484attribute's value and removes the alias attribute.
15485
15486If the event has the primary attribute, all alias attributes are deleted.
15487
15488This helps simplify event attributes so that, for example, there will not
15489be report lines for both db and Schema.
15490
15491=item --attribute-value-limit
15492
15493type: int; default: 0
15494
15495A sanity limit for attribute values.
15496
15497This option deals with bugs in slow logging functionality that causes large
15498values for attributes.  If the attribute's value is bigger than this, the
15499last-seen value for that class of query is used instead.
15500Disabled by default.
15501
15502=item --charset
15503
15504short form: -A; type: string
15505
15506Default character set.  If the value is utf8, sets Perl's binmode on
15507STDOUT to utf8, passes the mysql_enable_utf8 option to DBD::mysql, and
15508runs SET NAMES UTF8 after connecting to MySQL.  Any other value sets
15509binmode on STDOUT without the utf8 layer, and runs SET NAMES after
15510connecting to MySQL.
15511
15512=item --config
15513
15514type: Array
15515
15516Read this comma-separated list of config files; if specified, this must be the
15517first option on the command line.
15518
15519=item --[no]continue-on-error
15520
15521default: yes
15522
15523Continue parsing even if there is an error.  The tool will not continue
15524forever: it stops once any process causes 100 errors, in which case there
15525is probably a bug in the tool or the input is invalid.
15526
15527=item --[no]create-history-table
15528
15529default: yes
15530
15531Create the L<"--history"> table if it does not exist.
15532
15533This option causes the table specified by L<"--history"> to be created
15534with the default structure shown in the documentation for L<"--history">.
15535
15536=item --[no]create-review-table
15537
15538default: yes
15539
15540Create the L<"--review"> table if it does not exist.
15541
15542This option causes the table specified by L<"--review"> to be created
15543with the default structure shown in the documentation for L<"--review">.
15544
15545=item --daemonize
15546
15547Fork to the background and detach from the shell.  POSIX
15548operating systems only.
15549
15550=item --database
15551
15552short form: -D; type: string
15553
15554Connect to this database.
15555
15556=item --defaults-file
15557
15558short form: -F; type: string
15559
15560Only read mysql options from the given file.  You must give an absolute pathname.
15561
15562=item --embedded-attributes
15563
15564type: array
15565
15566Two Perl regex patterns to capture pseudo-attributes embedded in queries.
15567
15568Embedded attributes might be special attribute-value pairs that you've hidden
15569in comments.  The first regex should match the entire set of attributes (in
15570case there are multiple).  The second regex should match and capture
15571attribute-value pairs from the first regex.
15572
15573For example, suppose your query looks like the following:
15574
15575  SELECT * from users -- file: /login.php, line: 493;
15576
15577You might run pt-query-digest with the following option:
15578
15579  pt-query-digest --embedded-attributes ' -- .*','(\w+): ([^\,]+)'
15580
15581The first regular expression captures the whole comment:
15582
15583  " -- file: /login.php, line: 493;"
15584
15585The second one splits it into attribute-value pairs and adds them to the event:
15586
15587   ATTRIBUTE  VALUE
15588   =========  ==========
15589   file       /login.php
15590   line       493
15591
15592B<NOTE>: All commas in the regex patterns must be escaped with \ otherwise
15593the pattern will break.
15594
15595=item --expected-range
15596
15597type: array; default: 5,10
15598
15599Explain items when there are more or fewer than expected.
15600
15601Defines the number of items expected to be seen in the report given by
15602L<"--[no]report">, as controlled by L<"--limit"> and L<"--outliers">.  If
15603there  are more or fewer items in the report, each one will explain why it was
15604included.
15605
15606=item --explain
15607
15608type: DSN
15609
15610Run EXPLAIN for the sample query with this DSN and print results.
15611
15612This works only when L<"--group-by"> includes fingerprint.  It causes
15613pt-query-digest to run EXPLAIN and include the output into the report.  For
15614safety, queries that appear to have a subquery that EXPLAIN will execute won't
15615be EXPLAINed.  Those are typically "derived table" queries of the form
15616
15617  select ... from ( select .... ) der;
15618
15619The EXPLAIN results are printed as a full vertical format in the event report,
15620which appears at the end of each event report in vertical style
15621(C<\G>) just like MySQL prints it.
15622
15623=item --filter
15624
15625type: string
15626
15627Discard events for which this Perl code doesn't return true.
15628
15629This option is a string of Perl code or a file containing Perl code that gets
15630compiled into a subroutine with one argument: $event.  This is a hashref.
15631If the given value is a readable file, then pt-query-digest reads the entire
15632file and uses its contents as the code.  The file should not contain
15633a shebang (#!/usr/bin/perl) line.
15634
15635If the code returns true, the chain of callbacks continues; otherwise it ends.
15636The code is the last statement in the subroutine other than C<return $event>.
15637The subroutine template is:
15638
15639  sub { $event = shift; filter && return $event; }
15640
15641Filters given on the command line are wrapped inside parentheses like like
15642C<( filter )>.  For complex, multi-line filters, you must put the code inside
15643a file so it will not be wrapped inside parentheses.  Either way, the filter
15644must produce syntactically valid code given the template.  For example, an
15645if-else branch given on the command line would not be valid:
15646
15647  --filter 'if () { } else { }'  # WRONG
15648
15649Since it's given on the command line, the if-else branch would be wrapped inside
15650parentheses which is not syntactically valid.  So to accomplish something more
15651complex like this would require putting the code in a file, for example
15652filter.txt:
15653
15654  my $event_ok; if (...) { $event_ok=1; } else { $event_ok=0; } $event_ok
15655
15656Then specify C<--filter filter.txt> to read the code from filter.txt.
15657
15658If the filter code won't compile, pt-query-digest will die with an error.
15659If the filter code does compile, an error may still occur at runtime if the
15660code tries to do something wrong (like pattern match an undefined value).
15661pt-query-digest does not provide any safeguards so code carefully!
15662
15663An example filter that discards everything but SELECT statements:
15664
15665  --filter '$event->{arg} =~ m/^select/i'
15666
15667This is compiled into a subroutine like the following:
15668
15669  sub { $event = shift; ( $event->{arg} =~ m/^select/i ) && return $event; }
15670
15671It is permissible for the code to have side effects (to alter C<$event>).
15672
15673See L<"ATTRIBUTES REFERENCE"> for a list of common and L<"--type"> specific
15674attributes.
15675
15676Here are more examples of filter code:
15677
15678=over
15679
15680=item Host/IP matches domain.com
15681
15682--filter '($event->{host} || $event->{ip} || "") =~ m/domain.com/'
15683
15684Sometimes MySQL logs the host where the IP is expected.  Therefore, we
15685check both.
15686
15687=item User matches john
15688
15689--filter '($event->{user} || "") =~ m/john/'
15690
15691=item More than 1 warning
15692
15693--filter '($event->{Warning_count} || 0) > 1'
15694
15695=item Query does full table scan or full join
15696
15697--filter '(($event->{Full_scan} || "") eq "Yes") || (($event->{Full_join} || "") eq "Yes")'
15698
15699=item Query was not served from query cache
15700
15701--filter '($event->{QC_Hit} || "") eq "No"'
15702
15703=item Query is 1 MB or larger
15704
15705--filter '$event->{bytes} >= 1_048_576'
15706
15707=back
15708
15709Since L<"--filter"> allows you to alter C<$event>, you can use it to do other
15710things, like create new attributes.  See L<"ATTRIBUTES"> for an example.
15711
15712=item --group-by
15713
15714type: Array; default: fingerprint
15715
15716Which attribute of the events to group by.
15717
15718In general, you can group queries into classes based on any attribute of the
15719query, such as C<user> or C<db>, which will by default show you which users
15720and which databases get the most C<Query_time>.  The default attribute,
15721C<fingerprint>, groups similar, abstracted queries into classes; see below
15722and see also L<"FINGERPRINTS">.
15723
15724A report is printed for each L<"--group-by"> value (unless C<--no-report> is
15725given).  Therefore, C<--group-by user,db> means "report on queries with the
15726same user and report on queries with the same db"; it does not mean "report
15727on queries with the same user and db."  See also L<"OUTPUT">.
15728
15729Every value must have a corresponding value in the same position in
15730L<"--order-by">.  However, adding values to L<"--group-by"> will automatically
15731add values to L<"--order-by">, for your convenience.
15732
15733There are several magical values that cause some extra data mining to happen
15734before the grouping takes place:
15735
15736=over
15737
15738=item fingerprint
15739
15740This causes events to be fingerprinted to abstract queries into
15741a canonical form, which is then used to group events together into a class.
15742See L<"FINGERPRINTS"> for more about fingerprinting.
15743
15744=item tables
15745
15746This causes events to be inspected for what appear to be tables, and
15747then aggregated by that.  Note that a query that contains two or more tables
15748will be counted as many times as there are tables; so a join against two tables
15749will count the Query_time against both tables.
15750
15751=item distill
15752
15753This is a sort of super-fingerprint that collapses queries down
15754into a suggestion of what they do, such as C<INSERT SELECT table1 table2>.
15755
15756=back
15757
15758=item --help
15759
15760Show help and exit.
15761
15762=item --history
15763
15764type: DSN
15765
15766Save metrics for each query class in the given table.  pt-query-digest saves
15767query metrics (query time, lock time, etc.) to this table so you can see how
15768query classes change over time.
15769
15770=for comment ignore-pt-internal-value
15771MAGIC_default_history_table
15772
15773The default table is C<percona_schema.query_history>.  Specify database
15774(D) and table (t) DSN options to override the default.  The database and
15775table are automatically created unless C<--no-create-history-table>
15776is specified (see L<"--[no]create-history-table">).
15777
15778pt-query-digest inspects the columns in the table.  The table must have at
15779least the following columns:
15780
15781  CREATE TABLE query_review_history (
15782    checksum     CHAR(32) NOT NULL,
15783    sample       TEXT NOT NULL
15784  );
15785
15786Any columns not mentioned above are inspected to see if they follow a certain
15787naming convention.  The column is special if the name ends with an underscore
15788followed by any of these values:
15789
15790=for comment ignore-pt-internal-value
15791MAGIC_history_columns
15792
15793  pct|avg|cnt|sum|min|max|pct_95|stddev|median|rank
15794
15795If the column ends with one of those values, then the prefix is interpreted as
15796the event attribute to store in that column, and the suffix is interpreted as
15797the metric to be stored.  For example, a column named C<Query_time_min> will be
15798used to store the minimum C<Query_time> for the class of events.
15799
15800The table should also have a primary key, but that is up to you, depending on
15801how you want to store the historical data.  We suggest adding ts_min and ts_max
15802columns and making them part of the primary key along with the checksum.  But
15803you could also just add a ts_min column and make it a DATE type, so you'd get
15804one row per class of queries per day.
15805
15806The following table definition is used for L<"--[no]create-history-table">:
15807
15808=for comment ignore-pt-internal-value
15809MAGIC_create_history_table
15810
15811 CREATE TABLE IF NOT EXISTS query_history (
15812   checksum             CHAR(32) NOT NULL,
15813   sample               TEXT NOT NULL,
15814   ts_min               DATETIME,
15815   ts_max               DATETIME,
15816   ts_cnt               FLOAT,
15817   Query_time_sum       FLOAT,
15818   Query_time_min       FLOAT,
15819   Query_time_max       FLOAT,
15820   Query_time_pct_95    FLOAT,
15821   Query_time_stddev    FLOAT,
15822   Query_time_median    FLOAT,
15823   Lock_time_sum        FLOAT,
15824   Lock_time_min        FLOAT,
15825   Lock_time_max        FLOAT,
15826   Lock_time_pct_95     FLOAT,
15827   Lock_time_stddev     FLOAT,
15828   Lock_time_median     FLOAT,
15829   Rows_sent_sum        FLOAT,
15830   Rows_sent_min        FLOAT,
15831   Rows_sent_max        FLOAT,
15832   Rows_sent_pct_95     FLOAT,
15833   Rows_sent_stddev     FLOAT,
15834   Rows_sent_median     FLOAT,
15835   Rows_examined_sum    FLOAT,
15836   Rows_examined_min    FLOAT,
15837   Rows_examined_max    FLOAT,
15838   Rows_examined_pct_95 FLOAT,
15839   Rows_examined_stddev FLOAT,
15840   Rows_examined_median FLOAT,
15841   -- Percona extended slowlog attributes
15842   -- http://www.percona.com/docs/wiki/patches:slow_extended
15843   Rows_affected_sum             FLOAT,
15844   Rows_affected_min             FLOAT,
15845   Rows_affected_max             FLOAT,
15846   Rows_affected_pct_95          FLOAT,
15847   Rows_affected_stddev          FLOAT,
15848   Rows_affected_median          FLOAT,
15849   Rows_read_sum                 FLOAT,
15850   Rows_read_min                 FLOAT,
15851   Rows_read_max                 FLOAT,
15852   Rows_read_pct_95              FLOAT,
15853   Rows_read_stddev              FLOAT,
15854   Rows_read_median              FLOAT,
15855   Merge_passes_sum              FLOAT,
15856   Merge_passes_min              FLOAT,
15857   Merge_passes_max              FLOAT,
15858   Merge_passes_pct_95           FLOAT,
15859   Merge_passes_stddev           FLOAT,
15860   Merge_passes_median           FLOAT,
15861   InnoDB_IO_r_ops_min           FLOAT,
15862   InnoDB_IO_r_ops_max           FLOAT,
15863   InnoDB_IO_r_ops_pct_95        FLOAT,
15864   InnoDB_IO_r_ops_stddev        FLOAT,
15865   InnoDB_IO_r_ops_median        FLOAT,
15866   InnoDB_IO_r_bytes_min         FLOAT,
15867   InnoDB_IO_r_bytes_max         FLOAT,
15868   InnoDB_IO_r_bytes_pct_95      FLOAT,
15869   InnoDB_IO_r_bytes_stddev      FLOAT,
15870   InnoDB_IO_r_bytes_median      FLOAT,
15871   InnoDB_IO_r_wait_min          FLOAT,
15872   InnoDB_IO_r_wait_max          FLOAT,
15873   InnoDB_IO_r_wait_pct_95       FLOAT,
15874   InnoDB_IO_r_wait_stddev       FLOAT,
15875   InnoDB_IO_r_wait_median       FLOAT,
15876   InnoDB_rec_lock_wait_min      FLOAT,
15877   InnoDB_rec_lock_wait_max      FLOAT,
15878   InnoDB_rec_lock_wait_pct_95   FLOAT,
15879   InnoDB_rec_lock_wait_stddev   FLOAT,
15880   InnoDB_rec_lock_wait_median   FLOAT,
15881   InnoDB_queue_wait_min         FLOAT,
15882   InnoDB_queue_wait_max         FLOAT,
15883   InnoDB_queue_wait_pct_95      FLOAT,
15884   InnoDB_queue_wait_stddev      FLOAT,
15885   InnoDB_queue_wait_median      FLOAT,
15886   InnoDB_pages_distinct_min     FLOAT,
15887   InnoDB_pages_distinct_max     FLOAT,
15888   InnoDB_pages_distinct_pct_95  FLOAT,
15889   InnoDB_pages_distinct_stddev  FLOAT,
15890   InnoDB_pages_distinct_median  FLOAT,
15891   -- Boolean (Yes/No) attributes.  Only the cnt and sum are needed
15892   -- for these.  cnt is how many times is attribute was recorded,
15893   -- and sum is how many of those times the value was Yes.  So
15894   -- sum/cnt * 100 equals the percentage of recorded times that
15895   -- the value was Yes.
15896   QC_Hit_cnt          FLOAT,
15897   QC_Hit_sum          FLOAT,
15898   Full_scan_cnt       FLOAT,
15899   Full_scan_sum       FLOAT,
15900   Full_join_cnt       FLOAT,
15901   Full_join_sum       FLOAT,
15902   Tmp_table_cnt       FLOAT,
15903   Tmp_table_sum       FLOAT,
15904   Tmp_table_on_disk_cnt FLOAT,
15905   Tmp_table_on_disk_sum FLOAT,
15906   Filesort_cnt          FLOAT,
15907   Filesort_sum          FLOAT,
15908   Filesort_on_disk_cnt  FLOAT,
15909   Filesort_on_disk_sum  FLOAT,
15910   PRIMARY KEY(checksum, ts_min, ts_max)
15911 );
15912
15913Note that we store the count (cnt) for the ts attribute only; it will be
15914redundant to store this for other attributes.
15915
15916Starting from Percona Toolkit 3.0.11, the checksum function has been updated to use 32 chars in the MD5 sum.
15917This causes the checksum field in the history table will have a different value than in the previous versions of the tool.
15918
15919=item --host
15920
15921short form: -h; type: string
15922
15923Connect to host.
15924
15925=item --ignore-attributes
15926
15927type: array; default: arg, cmd, insert_id, ip, port, Thread_id, timestamp, exptime, flags, key, res, val, server_id, offset, end_log_pos, Xid
15928
15929Do not aggregate these attributes.  Some attributes are not query metrics
15930but metadata which doesn't need to be (or can't be) aggregated.
15931
15932=item --inherit-attributes
15933
15934type: array; default: db,ts
15935
15936If missing, inherit these attributes from the last event that had them.
15937
15938This option sets which attributes are inherited or carried forward to events
15939which do not have them.  For example, if one event has the db attribute equal
15940to "foo", but the next event doesn't have the db attribute, then it inherits
15941"foo" for its db attribute.
15942
15943=item --interval
15944
15945type: float; default: .1
15946
15947How frequently to poll the processlist, in seconds.
15948
15949=item --iterations
15950
15951type: int; default: 1
15952
15953How many times to iterate through the collect-and-report cycle.  If 0, iterate
15954to infinity.  Each iteration runs for L<"--run-time"> amount of time.  An
15955iteration is usually determined by an amount of time and a report is printed
15956when that amount of time elapses.  With L<"--run-time-mode"> C<interval>,
15957an interval is instead determined by the interval time you specify with
15958L<"--run-time">.  See L<"--run-time"> and L<"--run-time-mode"> for more
15959information.
15960
15961=item --limit
15962
15963type: Array; default: 95%:20
15964
15965Limit output to the given percentage or count.
15966
15967If the argument is an integer, report only the top N worst queries.  If the
15968argument is an integer followed by the C<%> sign, report that percentage of the
15969worst queries.  If the percentage is followed by a colon and another integer,
15970report the top percentage or the number specified by that integer, whichever
15971comes first.
15972
15973The value is actually a comma-separated array of values, one for each item in
15974L<"--group-by">.  If you don't specify a value for any of those items, the
15975default is the top 95%.
15976
15977See also L<"--outliers">.
15978
15979=item --log
15980
15981type: string
15982
15983Print all output to this file when daemonized.
15984
15985=item --max-hostname-length
15986
15987type: int; default: 10
15988
15989Trim host names in reports to this length. 0=Do not trim host names.
15990
15991=item --max-line-length
15992
15993type: int; default: 74
15994
15995Trim lines to this length. 0=Do not trim lines.
15996
15997=item --order-by
15998
15999type: Array; default: Query_time:sum
16000
16001Sort events by this attribute and aggregate function.
16002
16003This is a comma-separated list of order-by expressions, one for each
16004L<"--group-by"> attribute.  The default C<Query_time:sum> is used for
16005L<"--group-by"> attributes without explicitly given L<"--order-by"> attributes
16006(that is, if you specify more L<"--group-by"> attributes than corresponding
16007L<"--order-by"> attributes).  The syntax is C<attribute:aggregate>.  See
16008L<"ATTRIBUTES"> for valid attributes.  Valid aggregates are:
16009
16010   Aggregate Meaning
16011   ========= ============================
16012   sum       Sum/total attribute value
16013   min       Minimum attribute value
16014   max       Maximum attribute value
16015   cnt       Frequency/count of the query
16016
16017For example, the default C<Query_time:sum> means that queries in the
16018query analysis report will be ordered (sorted) by their total query execution
16019time ("Exec time").  C<Query_time:max> orders the queries by their
16020maximum query execution time, so the query with the single largest
16021C<Query_time> will be list first.  C<cnt> refers more to the frequency
16022of the query as a whole, how often it appears; "Count" is its corresponding
16023line in the query analysis report.  So any attribute and C<cnt> should yield
16024the same report wherein queries are sorted by the number of times they
16025appear.
16026
16027When parsing general logs (L<"--type"> C<genlog>), the default L<"--order-by">
16028becomes C<Query_time:cnt>.  General logs do not report query times so only
16029the C<cnt> aggregate makes sense because all query times are zero.
16030
16031If you specify an attribute that doesn't exist in the events, then
16032pt-query-digest falls back to the default C<Query_time:sum> and prints a notice
16033at the beginning of the report for each query class.  You can create attributes
16034with L<"--filter"> and order by them; see L<"ATTRIBUTES"> for an example.
16035
16036=item --outliers
16037
16038type: array; default: Query_time:1:10
16039
16040Report outliers by attribute:percentile:count.
16041
16042The syntax of this option is a comma-separated list of colon-delimited strings.
16043The first field is the attribute by which an outlier is defined.  The second is
16044a number that is compared to the attribute's 95th percentile.  The third is
16045optional, and is compared to the attribute's cnt aggregate.  Queries that pass
16046this specification are added to the report, regardless of any limits you
16047specified in L<"--limit">.
16048
16049For example, to report queries whose 95th percentile Query_time is at least 60
16050seconds and which are seen at least 5 times, use the following argument:
16051
16052  --outliers Query_time:60:5
16053
16054You can specify an --outliers option for each value in L<"--group-by">.
16055
16056=item --output
16057
16058type: string; default: report
16059
16060How to format and print the query analysis results.  Accepted values are:
16061
16062   VALUE          FORMAT
16063   =======        ==============================
16064   report         Standard query analysis report
16065   slowlog        MySQL slow log
16066   json           JSON, on array per query class
16067   json-anon      JSON without example queries
16068   secure-slowlog JSON without example queries
16069
16070The entire C<report> output can be disabled by specifying C<--no-report>
16071(see L<"--[no]report">), and its sections can be disabled or rearranged
16072by specifying L<"--report-format">.
16073
16074C<json> output was introduced in 2.2.1 and is still in development,
16075so the data structure may change in future versions.
16076
16077=item --password
16078
16079short form: -p; type: string
16080
16081Password to use when connecting.
16082If password contains commas they must be escaped with a backslash: "exam\,ple"
16083
16084=item --pid
16085
16086type: string
16087
16088Create the given PID file.  The tool won't start if the PID file already
16089exists and the PID it contains is different than the current PID.  However,
16090if the PID file exists and the PID it contains is no longer running, the
16091tool will overwrite the PID file with the current PID.  The PID file is
16092removed automatically when the tool exits.
16093
16094=item --port
16095
16096short form: -P; type: int
16097
16098Port number to use for connection.
16099
16100=item --preserve-embedded-numbers
16101
16102Preserve numbers in database/table names when fingerprinting queries.
16103The standar fingeprint method replaces numbers in db/tables names, making
16104a query like 'SELECT * FROM db1.table2' to be figerprinted as 'SELECT * FROM db?.table?'.
16105This option changes that behaviour and the fingerprint will become
16106'SELECT * FROM db1.table2'.
16107
16108=item --processlist
16109
16110type: DSN
16111
16112Poll this DSN's processlist for queries, with L<"--interval"> sleep between.
16113
16114If the connection fails, pt-query-digest tries to reopen it once per second.
16115
16116=item --progress
16117
16118type: array; default: time,30
16119
16120Print progress reports to STDERR.  The value is a comma-separated list with two
16121parts.  The first part can be percentage, time, or iterations; the second part
16122specifies how often an update should be printed, in percentage, seconds, or
16123number of iterations.
16124
16125=item --read-timeout
16126
16127type: time; default: 0
16128
16129Wait this long for an event from the input; 0 to wait forever.
16130
16131This option sets the maximum time to wait for an event from the input.  It
16132applies to all types of input except L<"--processlist">.  If an
16133event is not received after the specified time, the script stops reading the
16134input and prints its reports.  If L<"--iterations"> is 0 or greater than
161351, the next iteration will begin, else the script will exit.
16136
16137This option requires the Perl POSIX module.
16138
16139=item --[no]report
16140
16141default: yes
16142
16143Print query analysis reports for each L<"--group-by"> attribute.  This is
16144the standard slow log analysis functionality.  See L<"OUTPUT"> for the
16145description of what this does and what the results look like.
16146
16147If you don't need a report (for example, when using L<"--review"> or
16148L<"--history">), it is best to specify C<--no-report> because this allows
16149the tool to skip some expensive operations.
16150
16151=item --report-all
16152
16153Report all queries, even ones that have been reviewed.  This only affects
16154the C<report> L<"--output"> when using L<"--review">.  Otherwise, all
16155queries are always printed.
16156
16157=item --report-format
16158
16159type: Array; default: rusage,date,hostname,files,header,profile,query_report,prepared
16160
16161Print these sections of the query analysis report.
16162
16163  SECTION      PRINTS
16164  ============ ======================================================
16165  rusage       CPU times and memory usage reported by ps
16166  date         Current local date and time
16167  hostname     Hostname of machine on which pt-query-digest was run
16168  files        Input files read/parse
16169  header       Summary of the entire analysis run
16170  profile      Compact table of queries for an overview of the report
16171  query_report Detailed information about each unique query
16172  prepared     Prepared statements
16173
16174The sections are printed in the order specified.  The rusage, date, files and
16175header sections are grouped together if specified together; other sections are
16176separated by blank lines.
16177
16178See L<"OUTPUT"> for more information on the various parts of the query report.
16179
16180=item --report-histogram
16181
16182type: string; default: Query_time
16183
16184Chart the distribution of this attribute's values.
16185
16186The distribution chart is limited to time-based attributes, so charting
16187C<Rows_examined>, for example, will produce a useless chart.  Charts look
16188like:
16189
16190  # Query_time distribution
16191  #   1us
16192  #  10us
16193  # 100us
16194  #   1ms
16195  #  10ms  ###########################
16196  # 100ms  ########################################################
16197  #    1s  ########
16198  #  10s+
16199
16200See L<"OUTPUT"> for more information.
16201
16202=item --resume
16203
16204type: string
16205
16206If specified, the tool writes the last file offset, if there is one,
16207to the given filename. When ran again with the same value for this option,
16208the tool reads the last file offset from the file, seeks to that position
16209in the log, and resumes parsing events from that point onward.
16210
16211=item --review
16212
16213type: DSN
16214
16215Save query classes for later review, and don't report already reviewed classes.
16216
16217=for comment ignore-pt-internal-value
16218MAGIC_default_review_table
16219
16220The default table is C<percona_schema.query_review>.  Specify database
16221(D) and table (t) DSN options to override the default.  The database and
16222table are automatically created unless C<--no-create-review-table>
16223is specified (see L<"--[no]create-review-table">).
16224
16225If the table was created manually, it must have at least the following columns.
16226You can add more columns for your own special purposes, but they won't be used
16227by pt-query-digest.
16228
16229=for comment ignore-pt-internal-value
16230MAGIC_create_review_table:
16231
16232  CREATE TABLE IF NOT EXISTS query_review (
16233     checksum     CHAR(32) NOT NULL PRIMARY KEY,
16234     fingerprint  TEXT NOT NULL,
16235     sample       TEXT NOT NULL,
16236     first_seen   DATETIME,
16237     last_seen    DATETIME,
16238     reviewed_by  VARCHAR(20),
16239     reviewed_on  DATETIME,
16240     comments     TEXT
16241  )
16242
16243The columns are:
16244
16245  COLUMN       MEANING
16246  ===========  ====================================================
16247  checksum     A 64-bit checksum of the query fingerprint
16248  fingerprint  The abstracted version of the query; its primary key
16249  sample       The query text of a sample of the class of queries
16250  first_seen   The smallest timestamp of this class of queries
16251  last_seen    The largest timestamp of this class of queries
16252  reviewed_by  Initially NULL; if set, query is skipped thereafter
16253  reviewed_on  Initially NULL; not assigned any special meaning
16254  comments     Initially NULL; not assigned any special meaning
16255
16256Note that the C<fingerprint> column is the true primary key for a class of
16257queries.  The C<checksum> is just a cryptographic hash of this value, which
16258provides a shorter value that is very likely to also be unique.
16259
16260After parsing and aggregating events, your table should contain a row for each
16261fingerprint.  This option depends on C<--group-by fingerprint> (which is the
16262default).  It will not work otherwise.
16263
16264=item --run-time
16265
16266type: time
16267
16268How long to run for each L<"--iterations">.  The default is to run forever
16269(you can interrupt with CTRL-C).  Because L<"--iterations"> defaults to 1,
16270if you only specify L<"--run-time">, pt-query-digest runs for that amount of
16271time and then exits.  The two options are specified together to do
16272collect-and-report cycles.  For example, specifying L<"--iterations"> C<4>
16273L<"--run-time"> C<15m> with a continuous input (like STDIN or
16274L<"--processlist">) will cause pt-query-digest to run for 1 hour
16275(15 minutes x 4), reporting four times, once at each 15 minute interval.
16276
16277=item --run-time-mode
16278
16279type: string; default: clock
16280
16281Set what the value of L<"--run-time"> operates on.  Following are the possible
16282values for this option:
16283
16284=over
16285
16286=item clock
16287
16288L<"--run-time"> specifies an amount of real clock time during which the tool
16289should run for each L<"--iterations">.
16290
16291=item event
16292
16293L<"--run-time"> specifies an amount of log time.  Log time is determined by
16294timestamps in the log.  The first timestamp seen is remembered, and each
16295timestamp after that is compared to the first to determine how much log time
16296has passed.  For example, if the first timestamp seen is C<12:00:00> and the
16297next is C<12:01:30>, that is 1 minute and 30 seconds of log time.  The tool
16298will read events until the log time is greater than or equal to the specified
16299L<"--run-time"> value.
16300
16301Since timestamps in logs are not always printed, or not always printed
16302frequently, this mode varies in accuracy.
16303
16304=item interval
16305
16306L<"--run-time"> specifies interval boundaries of log time into which events
16307are divided and reports are generated.  This mode is different from the
16308others because it doesn't specify how long to run.  The value of
16309L<"--run-time"> must be an interval that divides evenly into minutes, hours
16310or days.  For example, C<5m> divides evenly into hours (60/5=12, so 12
163115 minutes intervals per hour) but C<7m> does not (60/7=8.6).
16312
16313Specifying C<--run-time-mode interval --run-time 30m --iterations 0> is
16314similar to specifying C<--run-time-mode clock --run-time 30m --iterations 0>.
16315In the latter case, pt-query-digest will run forever, producing reports every
1631630 minutes, but this only works effectively with  continuous inputs like
16317STDIN and the processlist.  For fixed inputs, like log files, the former
16318example produces multiple reports by dividing the log into 30 minutes
16319intervals based on timestamps.
16320
16321Intervals are calculated from the zeroth second/minute/hour in which a
16322timestamp occurs, not from whatever time it specifies.  For example,
16323with 30 minute intervals and a timestamp of C<12:10:30>, the interval
16324is I<not> C<12:10:30> to C<12:40:30>, it is C<12:00:00> to C<12:29:59>.
16325Or, with 1 hour intervals, it is C<12:00:00> to C<12:59:59>.
16326When a new timestamp exceeds the interval, a report is printed, and the
16327next interval is recalculated based on the new timestamp.
16328
16329Since L<"--iterations"> is 1 by default, you probably want to specify
16330a new value else pt-query-digest will only get and report on the first
16331interval from the log since 1 interval = 1 iteration.  If you want to
16332get and report every interval in a log, specify L<"--iterations"> C<0>.
16333
16334=back
16335
16336=item --sample
16337
16338type: int
16339
16340Filter out all but the first N occurrences of each query.  The queries are
16341filtered on the first value in L<"--group-by">, so by default, this will filter
16342by query fingerprint.  For example, C<--sample 2> will permit two sample queries
16343for each fingerprint.  Useful in conjunction with C<--output slowlog> to print
16344the queries.  You probably want to set C<--no-report> to avoid the overhead of
16345aggregating and reporting if you're just using this to print out samples of
16346queries.  A complete example:
16347
16348  pt-query-digest --sample 2 --no-report --output slowlog slow.log
16349
16350=item --slave-user
16351
16352type: string
16353
16354Sets the user to be used to connect to the slaves.
16355This parameter allows you to have a different user with less privileges on the
16356slaves but that user must exist on all slaves.
16357
16358=item --slave-password
16359
16360type: string
16361
16362Sets the password to be used to connect to the slaves.
16363It can be used with --slave-user and the password for the user must be the same
16364on all slaves.
16365
16366=item --set-vars
16367
16368type: Array
16369
16370Set the MySQL variables in this comma-separated list of C<variable=value> pairs.
16371
16372By default, the tool sets:
16373
16374=for comment ignore-pt-internal-value
16375MAGIC_set_vars
16376
16377   wait_timeout=10000
16378
16379Variables specified on the command line override these defaults.  For
16380example, specifying C<--set-vars wait_timeout=500> overrides the defaultvalue of C<10000>.
16381
16382The tool prints a warning and continues if a variable cannot be set.
16383
16384=item --show-all
16385
16386type: Hash
16387
16388Show all values for these attributes.
16389
16390By default pt-query-digest only shows as many of an attribute's value that
16391fit on a single line.  This option allows you to specify attributes for which
16392all values will be shown (line width is ignored).  This only works for
16393attributes with string values like user, host, db, etc.  Multiple attributes
16394can be specified, comma-separated.
16395
16396=item --since
16397
16398type: string
16399
16400Parse only queries newer than this value (parse queries since this date).
16401
16402This option allows you to ignore queries older than a certain value and parse
16403only those queries which are more recent than the value.  The value can be
16404several types:
16405
16406  * Simple time value N with optional suffix: N[shmd], where
16407    s=seconds, h=hours, m=minutes, d=days (default s if no suffix
16408    given); this is like saying "since N[shmd] ago"
16409  * Full date with optional hours:minutes:seconds:
16410    YYYY-MM-DD [HH:MM:SS]
16411  * Short, MySQL-style date:
16412    YYMMDD [HH:MM:SS]
16413  * Any time expression evaluated by MySQL:
16414    CURRENT_DATE - INTERVAL 7 DAY
16415
16416If you give a MySQL time expression, and you have not also specified a DSN
16417for L<"--explain">, L<"--processlist">, or L<"--review">, then you must specify
16418a DSN on the command line so that pt-query-digest can connect to MySQL to
16419evaluate the expression.
16420
16421The MySQL time expression is wrapped inside a query like
16422"SELECT UNIX_TIMESTAMP(<expression>)", so be sure that the expression is
16423valid inside this query.  For example, do not use UNIX_TIMESTAMP() because
16424UNIX_TIMESTAMP(UNIX_TIMESTAMP()) returns 0.
16425
16426Events are assumed to be in chronological: older events at the beginning of
16427the log and newer events at the end of the log.  L<"--since"> is strict: it
16428ignores all queries until one is found that is new enough.  Therefore, if
16429the query events are not consistently timestamped, some may be ignored which
16430are actually new enough.
16431
16432See also L<"--until">.
16433
16434=item --socket
16435
16436short form: -S; type: string
16437
16438Socket file to use for connection.
16439
16440=item --timeline
16441
16442Show a timeline of events.
16443
16444This option makes pt-query-digest print another kind of report: a timeline of
16445the events.  Each query is still grouped and aggregate into classes according to
16446L<"--group-by">, but then they are printed in chronological order.  The timeline
16447report prints out the timestamp, interval, count and value of each classes.
16448
16449If all you want is the timeline report, then specify C<--no-report> to
16450suppress the default query analysis report.  Otherwise, the timeline report
16451will be printed at the end before the response-time profile
16452(see L<"--report-format"> and L<"OUTPUT">).
16453
16454For example, this:
16455
16456  pt-query-digest /path/to/log --group-by distill --timeline
16457
16458will print something like:
16459
16460  # ########################################################
16461  # distill report
16462  # ########################################################
16463  # 2009-07-25 11:19:27 1+00:00:01   2 SELECT foo
16464  # 2009-07-27 11:19:30      00:01   2 SELECT bar
16465  # 2009-07-27 11:30:00 1+06:30:00   2 SELECT foo
16466
16467=item --type
16468
16469type: Array; default: slowlog
16470
16471The type of input to parse.  The permitted types are
16472
16473=over
16474
16475=item binlog
16476
16477Parse a binary log file that has first been converted to text using mysqlbinlog.
16478
16479For example:
16480
16481   mysqlbinlog mysql-bin.000441 > mysql-bin.000441.txt
16482
16483   pt-query-digest --type binlog mysql-bin.000441.txt
16484
16485=item genlog
16486
16487Parse a MySQL general log file.  General logs lack a lot of L<"ATTRIBUTES">,
16488notably C<Query_time>.  The default L<"--order-by"> for general logs
16489changes to C<Query_time:cnt>.
16490
16491=item slowlog
16492
16493Parse a log file in any variation of MySQL slow log format.
16494
16495=item tcpdump
16496
16497Inspect network packets and decode the MySQL client protocol, extracting queries
16498and responses from it.
16499
16500pt-query-digest does not actually watch the network (i.e. it does NOT "sniff
16501packets").  Instead, it's just parsing the output of tcpdump.  You are
16502responsible for generating this output; pt-query-digest does not do it for you.
16503Then you send this to pt-query-digest as you would any log file: as files on the
16504command line or to STDIN.
16505
16506The parser expects the input to be formatted with the following options: C<-x -n
16507-q -tttt>.  For example, if you want to capture output from your local machine,
16508you can do something like the following (the port must come last on FreeBSD):
16509
16510  tcpdump -s 65535 -x -nn -q -tttt -i any -c 1000 port 3306 \
16511    > mysql.tcp.txt
16512  pt-query-digest --type tcpdump mysql.tcp.txt
16513
16514The other tcpdump parameters, such as -s, -c, and -i, are up to you.  Just make
16515sure the output looks like this (there is a line break in the first line to
16516avoid man-page problems):
16517
16518  2009-04-12 09:50:16.804849 IP 127.0.0.1.42167
16519         > 127.0.0.1.3306: tcp 37
16520      0x0000:  4508 0059 6eb2 4000 4006 cde2 7f00 0001
16521      0x0010:  ....
16522
16523Remember tcpdump has a handy -c option to stop after it captures some number of
16524packets!  That's very useful for testing your tcpdump command.  Note that
16525tcpdump can't capture traffic on a Unix socket.  Read
16526L<http://bugs.mysql.com/bug.php?id=31577> if you're confused about this.
16527
16528Devananda Van Der Veen explained on the MySQL Performance Blog how to capture
16529traffic without dropping packets on busy servers.  Dropped packets cause
16530pt-query-digest to miss the response to a request, then see the response to a
16531later request and assign the wrong execution time to the query.  You can change
16532the filter to something like the following to help capture a subset of the
16533queries.  (See L<http://www.mysqlperformanceblog.com/?p=6092> for details.)
16534
16535  tcpdump -i any -s 65535 -x -n -q -tttt \
16536     'port 3306 and tcp[1] & 7 == 2 and tcp[3] & 7 == 2'
16537
16538All MySQL servers running on port 3306 are automatically detected in the
16539tcpdump output.  Therefore, if the tcpdump out contains packets from
16540multiple servers on port 3306 (for example, 10.0.0.1:3306, 10.0.0.2:3306,
16541etc.), all packets/queries from all these servers will be analyzed
16542together as if they were one server.
16543
16544If you're analyzing traffic for a MySQL server that is not running on port
165453306, see L<"--watch-server">.
16546
16547Also note that pt-query-digest may fail to report the database for queries
16548when parsing tcpdump output.  The database is discovered only in the initial
16549connect events for a new client or when <USE db> is executed.  If the tcpdump
16550output contains neither of these, then pt-query-digest cannot discover the
16551database.
16552
16553Server-side prepared statements are supported.  SSL-encrypted traffic cannot be
16554inspected and decoded.
16555
16556=item rawlog
16557
16558Raw logs are not MySQL logs but simple text files with one SQL statement
16559per line, like:
16560
16561  SELECT c FROM t WHERE id=1
16562  /* Hello, world! */ SELECT * FROM t2 LIMIT 1
16563  INSERT INTO t (a, b) VALUES ('foo', 'bar')
16564  INSERT INTO t SELECT * FROM monkeys
16565
16566Since raw logs do not have any metrics, many options and features of
16567pt-query-digest do not work with them.
16568
16569One use case for raw logs is ranking queries by count when the only
16570information available is a list of queries, from polling C<SHOW PROCESSLIST>
16571for example.
16572
16573=back
16574
16575=item --until
16576
16577type: string
16578
16579Parse only queries older than this value (parse queries until this date).
16580
16581This option allows you to ignore queries newer than a certain value and parse
16582only those queries which are older than the value.  The value can be one of
16583the same types listed for L<"--since">.
16584
16585Unlike L<"--since">, L<"--until"> is not strict: all queries are parsed until
16586one has a timestamp that is equal to or greater than L<"--until">.  Then
16587all subsequent queries are ignored.
16588
16589=item --user
16590
16591short form: -u; type: string
16592
16593User for login if not current user.
16594
16595=item --variations
16596
16597type: Array
16598
16599Report the number of variations in these attributes' values.
16600
16601Variations show how many distinct values an attribute had within a class.
16602The usual value for this option is C<arg> which shows how many distinct queries
16603were in the class.  This can be useful to determine a query's cacheability.
16604
16605Distinct values are determined by CRC32 checksums of the attributes' values.
16606These checksums are reported in the query report for attributes specified by
16607this option, like:
16608
16609  # arg crc      109 (1/25%), 144 (1/25%)... 2 more
16610
16611In that class there were 4 distinct queries.  The checksums of the first two
16612variations are shown, and each one occurred once (or, 25% of the time).
16613
16614The counts of distinct variations is approximate because only 1,000 variations
16615are saved.  The mod (%) 1000 of the full CRC32 checksum is saved, so some
16616distinct checksums are treated as equal.
16617
16618=item --version
16619
16620Show version and exit.
16621
16622=item --[no]version-check
16623
16624default: yes
16625
16626Check for the latest version of Percona Toolkit, MySQL, and other programs.
16627
16628This is a standard "check for updates automatically" feature, with two
16629additional features.  First, the tool checks its own version and also the
16630versions of the following software: operating system, Percona Monitoring and
16631Management (PMM), MySQL, Perl, MySQL driver for Perl (DBD::mysql), and
16632Percona Toolkit. Second, it checks for and warns about versions with known
16633problems. For example, MySQL 5.5.25 had a critical bug and was re-released
16634as 5.5.25a.
16635
16636A secure connection to Percona’s Version Check database server is done to
16637perform these checks. Each request is logged by the server, including software
16638version numbers and unique ID of the checked system. The ID is generated by the
16639Percona Toolkit installation script or when the Version Check database call is
16640done for the first time.
16641
16642Any updates or known problems are printed to STDOUT before the tool's normal
16643output.  This feature should never interfere with the normal operation of the
16644tool.
16645
16646For more information, visit L<https://www.percona.com/doc/percona-toolkit/LATEST/version-check.html>.
16647
16648=item --[no]vertical-format
16649
16650default: yes
16651
16652Output a trailing "\G" in the reported SQL queries.
16653
16654This makes the mysql client display the result using vertical format.
16655Non-native MySQL clients like phpMyAdmin do not support this.
16656
16657=item --watch-server
16658
16659type: string
16660
16661This option tells pt-query-digest which server IP address and port (like
16662"10.0.0.1:3306") to watch when parsing tcpdump (for L<"--type"> tcpdump);
16663all other servers are ignored.  If you don't specify it,
16664pt-query-digest watches all servers by looking for any IP address using port
166653306 or "mysql".  If you're watching a server with a non-standard port, this
16666won't work, so you must specify the IP address and port to watch.
16667
16668If you want to watch a mix of servers, some running on standard port 3306
16669and some running on non-standard ports, you need to create separate
16670tcpdump outputs for the non-standard port servers and then specify this
16671option for each.  At present pt-query-digest cannot auto-detect servers on
16672port 3306 and also be told to watch a server on a non-standard port.
16673
16674=back
16675
16676=head1 DSN OPTIONS
16677
16678These DSN options are used to create a DSN.  Each option is given like
16679C<option=value>.  The options are case-sensitive, so P and p are not the
16680same option.  There cannot be whitespace before or after the C<=> and
16681if the value contains whitespace it must be quoted.  DSN options are
16682comma-separated.  See the L<percona-toolkit> manpage for full details.
16683
16684=over
16685
16686=item * A
16687
16688dsn: charset; copy: yes
16689
16690Default character set.
16691
16692=item * D
16693
16694dsn: database; copy: yes
16695
16696Default database to use when connecting to MySQL.
16697
16698=item * F
16699
16700dsn: mysql_read_default_file; copy: yes
16701
16702Only read default options from the given file.
16703
16704=item * h
16705
16706dsn: host; copy: yes
16707
16708Connect to host.
16709
16710=item * p
16711
16712dsn: password; copy: yes
16713
16714Password to use when connecting.
16715If password contains commas they must be escaped with a backslash: "exam\,ple"
16716
16717=item * P
16718
16719dsn: port; copy: yes
16720
16721Port number to use for connection.
16722
16723=item * S
16724
16725dsn: mysql_socket; copy: yes
16726
16727Socket file to use for connection.
16728
16729=item * t
16730
16731The L<"--review"> or L<"--history"> table.
16732
16733=item * u
16734
16735dsn: user; copy: yes
16736
16737User for login if not current user.
16738
16739=back
16740
16741=head1 ENVIRONMENT
16742
16743The environment variable C<PTDEBUG> enables verbose debugging output to STDERR.
16744To enable debugging and capture all output to a file, run the tool like:
16745
16746   PTDEBUG=1 pt-query-digest ... > FILE 2>&1
16747
16748Be careful: debugging output is voluminous and can generate several megabytes
16749of output.
16750
16751=head1 SYSTEM REQUIREMENTS
16752
16753You need Perl, DBI, DBD::mysql, and some core packages that ought to be
16754installed in any reasonably new version of Perl.
16755
16756=head1 BUGS
16757
16758For a list of known bugs, see L<http://www.percona.com/bugs/pt-query-digest>.
16759
16760Please report bugs at L<https://jira.percona.com/projects/PT>.
16761Include the following information in your bug report:
16762
16763=over
16764
16765=item * Complete command-line used to run the tool
16766
16767=item * Tool L<"--version">
16768
16769=item * MySQL version of all servers involved
16770
16771=item * Output from the tool including STDERR
16772
16773=item * Input files (log/dump/config files, etc.)
16774
16775=back
16776
16777If possible, include debugging output by running the tool with C<PTDEBUG>;
16778see L<"ENVIRONMENT">.
16779
16780=head1 DOWNLOADING
16781
16782Visit L<http://www.percona.com/software/percona-toolkit/> to download the
16783latest release of Percona Toolkit.  Or, get the latest release from the
16784command line:
16785
16786   wget percona.com/get/percona-toolkit.tar.gz
16787
16788   wget percona.com/get/percona-toolkit.rpm
16789
16790   wget percona.com/get/percona-toolkit.deb
16791
16792You can also get individual tools from the latest release:
16793
16794   wget percona.com/get/TOOL
16795
16796Replace C<TOOL> with the name of any tool.
16797
16798=head1 ATTRIBUTES REFERENCE
16799
16800Events may have the following attributes.  If writing a L<"--filter">,
16801be sure to check that an attribute is defined in each event before
16802using it, else the filter code may crash the tool with a
16803"use of uninitialized value" error.
16804
16805You can dump event attributes for any input like:
16806
16807  $ pt-query-digest                  \
16808      slow.log                       \
16809      --filter 'print Dumper $event' \
16810      --no-report                    \
16811      --sample 1
16812
16813That will produce a lot of output with "attribute => value" pairs like:
16814
16815   $VAR1 = {
16816     Query_time => '0.033384',
16817     Rows_examined => '0',
16818     Rows_sent => '0',
16819     Thread_id => '10',
16820     Tmp_table => 'No',
16821     Tmp_table_on_disk => 'No',
16822     arg => 'SELECT col FROM tbl WHERE id=5',
16823     bytes => 103,
16824     cmd => 'Query',
16825     db => 'db1',
16826     fingerprint => 'select col from tbl where id=?',
16827     host => '',
16828     pos_in_log => 1334,
16829     ts => '071218 11:48:27',
16830     user => '[SQL_SLAVE]'
16831   };
16832
16833=head2 COMMON
16834
16835These attribute are common to all input L<"--type"> and L<"--processlist">,
16836except where noted.
16837
16838=over
16839
16840=item arg
16841
16842The query text, or the command for admin commands like C<Ping>.
16843
16844=item bytes
16845
16846The byte length of the C<arg>.
16847
16848=item cmd
16849
16850"Query" or "Admin".
16851
16852=item db
16853
16854The current database.  The value comes from USE database statements.
16855By default, C<Schema> is an alias which is automatically
16856changed to C<db>; see L<"--attribute-aliases">.
16857
16858=item fingerprint
16859
16860An abstracted form of the query.  See L<"FINGERPRINTS">.
16861
16862=item host
16863
16864Client host which executed the query.
16865
16866=item pos_in_log
16867
16868The byte offset of the event in the log or tcpdump,
16869except for L<"--processlist">.
16870
16871=item Query_time
16872
16873The total time the query took, including lock time.
16874
16875=item ts
16876
16877The timestamp of when the query ended.
16878
16879=back
16880
16881=head2 SLOW, GENERAL, AND BINARY LOGS
16882
16883Events have all available attributes from the log file.  Therefore, you only
16884need to look at the log file to see which events are available, but remember:
16885not all events have the same attributes.
16886
16887Percona Server adds many attributes to the slow log; see
16888http://www.percona.com/docs/wiki/patches:slow_extended for more information.
16889
16890=head2 TCPDUMP
16891
16892These attributes are available when parsing L<"--type"> tcpdump.
16893
16894=over
16895
16896=item Error_no
16897
16898The MySQL error number if the query caused an error.
16899
16900=item ip
16901
16902The client's IP address.  Certain log files may also contain this attribute.
16903
16904=item No_good_index_used
16905
16906Yes or No if no good index existed for the query (flag set by server).
16907
16908=item No_index_used
16909
16910Yes or No if the query did not use any index (flag set by server).
16911
16912=item port
16913
16914The client's port number.
16915
16916=item Warning_count
16917
16918The number of warnings, as otherwise shown by C<SHOW WARNINGS>.
16919
16920=back
16921
16922=head2 PROCESSLIST
16923
16924If using L<"--processlist">, an C<id> attribute is available for
16925the process ID, in addition to the common attributes.
16926
16927=head1 AUTHORS
16928
16929Baron Schwartz, Daniel Nichter, and Brian Fraser
16930
16931=head1 ABOUT PERCONA TOOLKIT
16932
16933This tool is part of Percona Toolkit, a collection of advanced command-line
16934tools for MySQL developed by Percona.  Percona Toolkit was forked from two
16935projects in June, 2011: Maatkit and Aspersa.  Those projects were created by
16936Baron Schwartz and primarily developed by him and Daniel Nichter.  Visit
16937L<http://www.percona.com/software/> to learn about other free, open-source
16938software from Percona.
16939
16940=head1 COPYRIGHT, LICENSE, AND WARRANTY
16941
16942This program is copyright 2008-2018 Percona LLC and/or its affiliates.
16943
16944THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
16945WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
16946MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
16947
16948This program is free software; you can redistribute it and/or modify it under
16949the terms of the GNU General Public License as published by the Free Software
16950Foundation, version 2; OR the Perl Artistic License.  On UNIX and similar
16951systems, you can issue `man perlgpl' or `man perlartistic' to read these
16952licenses.
16953
16954You should have received a copy of the GNU General Public License along with
16955this program; if not, write to the Free Software Foundation, Inc., 59 Temple
16956Place, Suite 330, Boston, MA  02111-1307  USA.
16957
16958=head1 VERSION
16959
16960pt-query-digest 3.3.0
16961
16962=cut
16963