1# This program is copyright 2007-2011 Baron Schwartz, 2011 Percona Ireland Ltd.
2# Feedback and improvements are welcome.
3#
4# THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
5# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
6# MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
7#
8# This program is free software; you can redistribute it and/or modify it under
9# the terms of the GNU General Public License as published by the Free Software
10# Foundation, version 2; OR the Perl Artistic License.  On UNIX and similar
11# systems, you can issue `man perlgpl' or `man perlartistic' to read these
12# licenses.
13#
14# You should have received a copy of the GNU General Public License along with
15# this program; if not, write to the Free Software Foundation, Inc., 59 Temple
16# Place, Suite 330, Boston, MA  02111-1307  USA.
17# ###########################################################################
18# TableParser package
19# ###########################################################################
20{
21# Package: TableParser
22# TableParser parses SHOW CREATE TABLE.
23#
24# Several subs in this module require either a $ddl or $tbl param.
25#
26# $tbl is the return value from the sub below, parse().
27#
28# And some subs have an optional $opts param which is a hashref of options.
29package TableParser;
30
31use strict;
32use warnings FATAL => 'all';
33use English qw(-no_match_vars);
34use constant PTDEBUG => $ENV{PTDEBUG} || 0;
35
36use Data::Dumper;
37$Data::Dumper::Indent    = 1;
38$Data::Dumper::Sortkeys  = 1;
39$Data::Dumper::Quotekeys = 0;
40
41local $EVAL_ERROR;
42eval {
43   require Quoter;
44};
45
46sub new {
47   my ( $class, %args ) = @_;
48   my $self = { %args };
49   $self->{Quoter} ||= Quoter->new();
50   return bless $self, $class;
51}
52
53sub Quoter { shift->{Quoter} }
54
55sub get_create_table {
56   my ( $self, $dbh, $db, $tbl ) = @_;
57   die "I need a dbh parameter" unless $dbh;
58   die "I need a db parameter"  unless $db;
59   die "I need a tbl parameter" unless $tbl;
60   my $q = $self->{Quoter};
61
62   # To ensure a consistent output, we save the current (old) SQL mode,
63   # then set it to the new SQL mode that what we need, which is the
64   # default sql_mode=''.  When done, even if an error occurs, we restore
65   # the old SQL mode.  The main thing is that we do not want ANSI_QUOTES
66   # because there's code all throughout the tools that expect backtick `
67   # quoted idents, not double-quote " quoted idents.  For example:
68   # https://bugs.launchpad.net/percona-toolkit/+bug/1058285
69   my $new_sql_mode
70      = q{/*!40101 SET @OLD_SQL_MODE := @@SQL_MODE, }
71      . q{@@SQL_MODE := '', }
72      . q{@OLD_QUOTE := @@SQL_QUOTE_SHOW_CREATE, }
73      . q{@@SQL_QUOTE_SHOW_CREATE := 1 */};
74
75   my $old_sql_mode
76      = q{/*!40101 SET @@SQL_MODE := @OLD_SQL_MODE, }
77      . q{@@SQL_QUOTE_SHOW_CREATE := @OLD_QUOTE */};
78
79   # Set new SQL mode.
80   PTDEBUG && _d($new_sql_mode);
81   eval { $dbh->do($new_sql_mode); };
82   PTDEBUG && $EVAL_ERROR && _d($EVAL_ERROR);
83
84   # Must USE the tbl's db because some bug with SHOW CREATE TABLE on a
85   # view when the current db isn't the view's db causes MySQL to crash.
86   my $use_sql = 'USE ' . $q->quote($db);
87   PTDEBUG && _d($dbh, $use_sql);
88   $dbh->do($use_sql);
89
90   my $show_sql = "SHOW CREATE TABLE " . $q->quote($db, $tbl);
91   PTDEBUG && _d($show_sql);
92   my $href;
93   eval { $href = $dbh->selectrow_hashref($show_sql); };
94   if ( my $e = $EVAL_ERROR ) {
95      # Restore old SQL mode.
96      PTDEBUG && _d($old_sql_mode);
97      $dbh->do($old_sql_mode);
98
99      die $e;
100   }
101
102   # Restore old SQL mode.
103   PTDEBUG && _d($old_sql_mode);
104   $dbh->do($old_sql_mode);
105
106   # SHOW CREATE TABLE has at least 2 columns like:
107   # mysql> show create table city\G
108   # *************************** 1. row ***************************
109   #        Table: city
110   # Create Table: CREATE TABLE `city` (
111   #   `city_id` smallint(5) unsigned NOT NULL AUTO_INCREMENT,
112   #   ...
113   # We want the second column.
114   my ($key) = grep { m/create (?:table|view)/i } keys %$href;
115   if ( !$key ) {
116      die "Error: no 'Create Table' or 'Create View' in result set from "
117         . "$show_sql: " . Dumper($href);
118   }
119
120   return $href->{$key};
121}
122
123# Sub: parse
124#   Parse SHOW CREATE TABLE.
125#
126# Returns:
127#   Hashref of table structure
128sub parse {
129   my ( $self, $ddl, $opts ) = @_;
130   return unless $ddl;
131
132   # If ANSI_QUOTES is enabled, we can't parse. But we can translate ANSI_QUOTES
133   # into legacy quoting with backticks. The rules are: an identifier is
134   # surrounded with the quote characters, and embedded quote characters are
135   # doubled.
136   if ( $ddl =~ m/CREATE (?:TEMPORARY )?TABLE "/ ) {
137      $ddl = $self->ansi_to_legacy($ddl);
138   }
139   elsif ( $ddl !~ m/CREATE (?:TEMPORARY )?TABLE `/ ) {
140      die "TableParser doesn't handle CREATE TABLE without quoting.";
141   }
142
143   my ($name)     = $ddl =~ m/CREATE (?:TEMPORARY )?TABLE\s+(`.+?`)/;
144   (undef, $name) = $self->{Quoter}->split_unquote($name) if $name;
145
146   # Lowercase identifiers to avoid issues with case-sensitivity in Perl.
147   # (Bug #1910276).
148   $ddl =~ s/(`[^`\n]+`)/\L$1/gm;
149
150   my $engine = $self->get_engine($ddl);
151
152   my @defs   = $ddl =~ m/^(\s+`.*?),?$/gm;
153   my @cols   = map { $_ =~ m/`([^`]+)`/ } @defs;
154   PTDEBUG && _d('Table cols:', join(', ', map { "`$_`" } @cols));
155
156   # Save the column definitions *exactly*
157   my %def_for;
158   @def_for{@cols} = @defs;
159
160   # Find column types, whether numeric, whether nullable, whether
161   # auto-increment.
162   my (@nums, @null, @non_generated);
163   my (%type_for, %is_nullable, %is_numeric, %is_autoinc, %is_generated);
164   foreach my $col ( @cols ) {
165      my $def = $def_for{$col};
166
167      # Remove literal backticks (``) because they're superfluous for parsing
168      # the col.
169      # https://bugs.launchpad.net/percona-toolkit/+bug/1462904
170      $def =~ s/``//g;
171
172      my ( $type ) = $def =~ m/`[^`]+`\s([a-z]+)/;
173      die "Can't determine column type for $def" unless $type;
174      $type_for{$col} = $type;
175      if ( $type =~ m/(?:(?:tiny|big|medium|small)?int|float|double|decimal|year)/ ) {
176         push @nums, $col;
177         $is_numeric{$col} = 1;
178      }
179      if ( $def !~ m/NOT NULL/ ) {
180         push @null, $col;
181         $is_nullable{$col} = 1;
182      }
183      if ( remove_quoted_text($def) =~ m/\WGENERATED\W/i ) {
184          $is_generated{$col} = 1;
185      } else {
186          push @non_generated, $col;
187      }
188      $is_autoinc{$col} = $def =~ m/AUTO_INCREMENT/i ? 1 : 0;
189   }
190
191   # TODO: passing is_nullable this way is just a quick hack. Ultimately,
192   # we probably should decompose this sub further, taking out the block
193   # above that parses col props like nullability, auto_inc, type, etc.
194   my ($keys, $clustered_key) = $self->get_keys($ddl, $opts, \%is_nullable);
195
196   my ($charset) = $ddl =~ m/DEFAULT CHARSET=(\w+)/;
197
198   return {
199      name               => $name,
200      cols               => \@cols,
201      col_posn           => { map { $cols[$_] => $_ } 0..$#cols },
202      is_col             => { map { $_ => 1 } @non_generated },
203      null_cols          => \@null,
204      is_nullable        => \%is_nullable,
205      non_generated_cols => \@non_generated,
206      is_autoinc         => \%is_autoinc,
207      is_generated       => \%is_generated,
208      clustered_key      => $clustered_key,
209      keys               => $keys,
210      defs               => \%def_for,
211      numeric_cols       => \@nums,
212      is_numeric         => \%is_numeric,
213      engine             => $engine,
214      type_for           => \%type_for,
215      charset            => $charset,
216   };
217}
218
219sub remove_quoted_text {
220   my ($string) = @_;
221   $string =~ s/[^\\]`[^`]*[^\\]`//g;
222   $string =~ s/[^\\]"[^"]*[^\\]"//g;
223   $string =~ s/[^\\]'[^']*[^\\]'//g;
224   return $string;
225}
226
227# Sorts indexes in this order: PRIMARY, unique, non-nullable, any (shortest
228# first, alphabetical).  Only BTREE indexes are considered.
229# TODO: consider length as # of bytes instead of # of columns.
230sub sort_indexes {
231   my ( $self, $tbl ) = @_;
232
233   my @indexes
234      = sort {
235         (($a ne 'PRIMARY') <=> ($b ne 'PRIMARY'))
236         || ( !$tbl->{keys}->{$a}->{is_unique} <=> !$tbl->{keys}->{$b}->{is_unique} )
237         || ( $tbl->{keys}->{$a}->{is_nullable} <=> $tbl->{keys}->{$b}->{is_nullable} )
238         || ( scalar(@{$tbl->{keys}->{$a}->{cols}}) <=> scalar(@{$tbl->{keys}->{$b}->{cols}}) )
239      }
240      grep {
241         $tbl->{keys}->{$_}->{type} eq 'BTREE'
242      }
243      sort keys %{$tbl->{keys}};
244
245   PTDEBUG && _d('Indexes sorted best-first:', join(', ', @indexes));
246   return @indexes;
247}
248
249# Finds the 'best' index; if the user specifies one, dies if it's not in the
250# table.
251sub find_best_index {
252   my ( $self, $tbl, $index ) = @_;
253   my $best;
254   if ( $index ) {
255      ($best) = grep { uc $_ eq uc $index } keys %{$tbl->{keys}};
256   }
257   if ( !$best ) {
258      if ( $index ) {
259         # The user specified an index, so we can't choose our own.
260         die "Index '$index' does not exist in table";
261      }
262      else {
263         # Try to pick the best index.
264         # TODO: eliminate indexes that have column prefixes.
265         ($best) = $self->sort_indexes($tbl);
266      }
267   }
268   PTDEBUG && _d('Best index found is', $best);
269   return $best;
270}
271
272# Takes a dbh, database, table, quoter, and WHERE clause, and reports the
273# indexes MySQL thinks are best for EXPLAIN SELECT * FROM that table.  If no
274# WHERE, just returns an empty list.  If no possible_keys, returns empty list,
275# even if 'key' is not null.  Only adds 'key' to the list if it's included in
276# possible_keys.
277sub find_possible_keys {
278   my ( $self, $dbh, $database, $table, $quoter, $where ) = @_;
279   return () unless $where;
280   my $sql = 'EXPLAIN SELECT * FROM ' . $quoter->quote($database, $table)
281      . ' WHERE ' . $where;
282   PTDEBUG && _d($sql);
283   my $expl = $dbh->selectrow_hashref($sql);
284   # Normalize columns to lowercase
285   $expl = { map { lc($_) => $expl->{$_} } keys %$expl };
286   if ( $expl->{possible_keys} ) {
287      PTDEBUG && _d('possible_keys =', $expl->{possible_keys});
288      my @candidates = split(',', $expl->{possible_keys});
289      my %possible   = map { $_ => 1 } @candidates;
290      if ( $expl->{key} ) {
291         PTDEBUG && _d('MySQL chose', $expl->{key});
292         unshift @candidates, grep { $possible{$_} } split(',', $expl->{key});
293         PTDEBUG && _d('Before deduping:', join(', ', @candidates));
294         my %seen;
295         @candidates = grep { !$seen{$_}++ } @candidates;
296      }
297      PTDEBUG && _d('Final list:', join(', ', @candidates));
298      return @candidates;
299   }
300   else {
301      PTDEBUG && _d('No keys in possible_keys');
302      return ();
303   }
304}
305
306# Required args:
307#   * dbh  dbh: active dbh
308#   * db   scalar: database name to check
309#   * tbl  scalar: table name to check
310# Optional args:
311#   * all_privs  bool: check for all privs (select,insert,update,delete)
312# Returns: bool
313# Can die: no
314# check_table() checks the given table for certain criteria and returns
315# true if all criteria are found, else it returns false.  The existence
316# of the table is always checked; if no optional args are given, then this
317# is the only check.  Any error causes a false return value (e.g. if the
318# table is crashed).
319sub check_table {
320   my ( $self, %args ) = @_;
321   my @required_args = qw(dbh db tbl);
322   foreach my $arg ( @required_args ) {
323      die "I need a $arg argument" unless $args{$arg};
324   }
325   my ($dbh, $db, $tbl) = @args{@required_args};
326   my $q      = $self->{Quoter} || 'Quoter';
327   my $db_tbl = $q->quote($db, $tbl);
328   PTDEBUG && _d('Checking', $db_tbl);
329
330   $self->{check_table_error} = undef;
331
332   my $sql = "SHOW TABLES FROM " . $q->quote($db)
333           . ' LIKE ' . $q->literal_like($tbl);
334   PTDEBUG && _d($sql);
335   my $row;
336   eval {
337      $row = $dbh->selectrow_arrayref($sql);
338   };
339   if ( my $e = $EVAL_ERROR ) {
340      PTDEBUG && _d($e);
341      $self->{check_table_error} = $e;
342      return 0;
343   }
344   if ( !$row->[0] || $row->[0] ne $tbl ) {
345      PTDEBUG && _d('Table does not exist');
346      return 0;
347   }
348
349   PTDEBUG && _d('Table', $db, $tbl, 'exists');
350   return 1;
351
352   # No more privs check:
353   # https://bugs.launchpad.net/percona-toolkit/+bug/1036747
354}
355
356sub get_engine {
357   my ( $self, $ddl, $opts ) = @_;
358   my ( $engine ) = $ddl =~ m/\).*?(?:ENGINE|TYPE)=(\w+)/;
359   PTDEBUG && _d('Storage engine:', $engine);
360   return $engine || undef;
361}
362
363# $ddl is a SHOW CREATE TABLE returned from get_create_table().
364# The general format of a key is
365# [FOREIGN|UNIQUE|PRIMARY|FULLTEXT|SPATIAL] KEY `name` [USING BTREE|HASH] (`cols`).
366# Returns a hashref of keys and their properties and the clustered key (if
367# the engine is InnoDB):
368#   {
369#     key => {
370#       type         => BTREE, FULLTEXT or  SPATIAL
371#       name         => column name, like: "foo_key"
372#       colnames     => original col def string, like: "(`a`,`b`)"
373#       cols         => arrayref containing the col names, like: [qw(a b)]
374#       col_prefixes => arrayref containing any col prefixes (parallels cols)
375#       is_unique    => 1 if the col is UNIQUE or PRIMARY
376#       is_nullable  => true (> 0) if one or more col can be NULL
377#       is_col       => hashref with key for each col=>1
378#       ddl          => original key def string
379#     },
380#   },
381#   'PRIMARY',   # clustered key
382#
383# Foreign keys are ignored; use get_fks() instead.
384sub get_keys {
385   my ( $self, $ddl, $opts, $is_nullable ) = @_;
386   my $engine        = $self->get_engine($ddl);
387   my $keys          = {};
388   my $clustered_key = undef;
389
390   KEY:
391   foreach my $key ( $ddl =~ m/^  ((?:[A-Z]+ )?KEY .*)$/gm ) {
392
393      # If you want foreign keys, use get_fks() below.
394      next KEY if $key =~ m/FOREIGN/;
395
396      my $key_ddl = $key;
397      PTDEBUG && _d('Parsed key:', $key_ddl);
398
399      # Make allowances for HASH bugs in SHOW CREATE TABLE.  A non-MEMORY table
400      # will report its index as USING HASH even when this is not supported.
401      # The true type should be BTREE.  See
402      # http://bugs.mysql.com/bug.php?id=22632
403      # If ANSI quoting is in effect, we may not know the engine at all.
404      if ( !$engine || $engine !~ m/MEMORY|HEAP/ ) {
405         $key =~ s/USING HASH/USING BTREE/;
406      }
407
408      # Determine index type
409      my ( $type, $cols ) = $key =~ m/(?:USING (\w+))? \((.+)\)/;
410      my ( $special ) = $key =~ m/(FULLTEXT|SPATIAL)/;
411      $type = $type || $special || 'BTREE';
412      my ($name) = $key =~ m/(PRIMARY|`[^`]*`)/;
413      my $unique = $key =~ m/PRIMARY|UNIQUE/ ? 1 : 0;
414      my @cols;
415      my @col_prefixes;
416      foreach my $col_def ( $cols =~ m/`[^`]+`(?:\(\d+\))?/g ) {
417         # Parse columns of index including potential column prefixes
418         # E.g.: `a`,`b`(20)
419         my ($name, $prefix) = $col_def =~ m/`([^`]+)`(?:\((\d+)\))?/;
420         push @cols, $name;
421         push @col_prefixes, $prefix;
422      }
423      $name =~ s/`//g;
424
425      PTDEBUG && _d( $name, 'key cols:', join(', ', map { "`$_`" } @cols));
426
427      $keys->{$name} = {
428         name         => $name,
429         type         => $type,
430         colnames     => $cols,
431         cols         => \@cols,
432         col_prefixes => \@col_prefixes,
433         is_unique    => $unique,
434         is_nullable  => scalar(grep { $is_nullable->{$_} } @cols),
435         is_col       => { map { $_ => 1 } @cols },
436         ddl          => $key_ddl,
437      };
438
439      # Find clustered key (issue 295).
440      if ( ($engine || '') =~ m/InnoDB/i && !$clustered_key ) {
441         my $this_key = $keys->{$name};
442         if ( $this_key->{name} eq 'PRIMARY' ) {
443            $clustered_key = 'PRIMARY';
444         }
445         elsif ( $this_key->{is_unique} && !$this_key->{is_nullable} ) {
446            $clustered_key = $this_key->{name};
447         }
448         PTDEBUG && $clustered_key && _d('This key is the clustered key');
449      }
450   }
451
452   return $keys, $clustered_key;
453}
454
455# Like get_keys() above but only returns a hash of foreign keys.
456sub get_fks {
457   my ( $self, $ddl, $opts ) = @_;
458   my $q   = $self->{Quoter};
459   my $fks = {};
460
461   foreach my $fk (
462      $ddl =~ m/CONSTRAINT .* FOREIGN KEY .* REFERENCES [^\)]*\)/mg )
463   {
464      my ( $name ) = $fk =~ m/CONSTRAINT `(.*?)`/;
465      my ( $cols ) = $fk =~ m/FOREIGN KEY \(([^\)]+)\)/;
466      my ( $parent, $parent_cols ) = $fk =~ m/REFERENCES (\S+) \(([^\)]+)\)/;
467
468      my ($db, $tbl) = $q->split_unquote($parent, $opts->{database});
469      my %parent_tbl = (tbl => $tbl);
470      $parent_tbl{db} = $db if $db;
471
472      if ( $parent !~ m/\./ && $opts->{database} ) {
473         $parent = $q->quote($opts->{database}) . ".$parent";
474      }
475
476      $fks->{$name} = {
477         name           => $name,
478         colnames       => $cols,
479         cols           => [ map { s/[ `]+//g; $_; } split(',', $cols) ],
480         parent_tbl     => \%parent_tbl,
481         parent_tblname => $parent,
482         parent_cols    => [ map { s/[ `]+//g; $_; } split(',', $parent_cols) ],
483         parent_colnames=> $parent_cols,
484         ddl            => $fk,
485      };
486   }
487
488   return $fks;
489}
490
491# Removes the AUTO_INCREMENT property from the end of SHOW CREATE TABLE.  A
492# sample:
493# ) ENGINE=InnoDB AUTO_INCREMENT=201 DEFAULT CHARSET=utf8;
494sub remove_auto_increment {
495   my ( $self, $ddl ) = @_;
496   $ddl =~ s/(^\).*?) AUTO_INCREMENT=\d+\b/$1/m;
497   return $ddl;
498}
499
500sub get_table_status {
501   my ( $self, $dbh, $db, $like ) = @_;
502   my $q = $self->{Quoter};
503   my $sql = "SHOW TABLE STATUS FROM " . $q->quote($db);
504   my @params;
505   if ( $like ) {
506      $sql .= ' LIKE ?';
507      push @params, $like;
508   }
509   PTDEBUG && _d($sql, @params);
510   my $sth = $dbh->prepare($sql);
511   eval { $sth->execute(@params); };
512   if ($EVAL_ERROR) {
513      PTDEBUG && _d($EVAL_ERROR);
514      return;
515   }
516   my @tables = @{$sth->fetchall_arrayref({})};
517   @tables = map {
518      my %tbl; # Make a copy with lowercased keys
519      @tbl{ map { lc $_ } keys %$_ } = values %$_;
520      $tbl{engine} ||= $tbl{type} || $tbl{comment};
521      delete $tbl{type};
522      \%tbl;
523   } @tables;
524   return @tables;
525}
526
527# Translates ANSI quoting around SHOW CREATE TABLE (specifically this query's
528# output, not an arbitrary query) into legacy backtick-quoting.
529# DOESNT WORK: my $ansi_quote_re = qr/"(?:(?!(?<!")").)*"/;
530# DOESNT WORK: my $ansi_quote_re = qr/" [^\\"]* (?: (?:\\.|"") [^\\"]* )* "/ismx;
531my $ansi_quote_re = qr/" [^"]* (?: "" [^"]* )* (?<=.) "/ismx;
532sub ansi_to_legacy {
533   my ($self, $ddl) = @_;
534   $ddl =~ s/($ansi_quote_re)/ansi_quote_replace($1)/ge;
535   return $ddl;
536}
537
538# Translates a single string from ANSI quoting into legacy quoting by
539# un-doubling embedded double-double quotes, doubling backticks, and replacing
540# the delimiters.
541sub ansi_quote_replace {
542   my ($val) = @_;
543   $val =~ s/^"|"$//g;
544   $val =~ s/`/``/g;
545   $val =~ s/""/"/g;
546   return "`$val`";
547}
548
549sub _d {
550   my ($package, undef, $line) = caller 0;
551   @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
552        map { defined $_ ? $_ : 'undef' }
553        @_;
554   print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
555}
556
5571;
558}
559# ###########################################################################
560# End TableParser package
561# ###########################################################################
562