1# This program is copyright 2007-2011 Baron Schwartz, 2011 Percona Ireland Ltd. 2# Feedback and improvements are welcome. 3# 4# THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED 5# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF 6# MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. 7# 8# This program is free software; you can redistribute it and/or modify it under 9# the terms of the GNU General Public License as published by the Free Software 10# Foundation, version 2; OR the Perl Artistic License. On UNIX and similar 11# systems, you can issue `man perlgpl' or `man perlartistic' to read these 12# licenses. 13# 14# You should have received a copy of the GNU General Public License along with 15# this program; if not, write to the Free Software Foundation, Inc., 59 Temple 16# Place, Suite 330, Boston, MA 02111-1307 USA. 17# ########################################################################### 18# TableParser package 19# ########################################################################### 20{ 21# Package: TableParser 22# TableParser parses SHOW CREATE TABLE. 23# 24# Several subs in this module require either a $ddl or $tbl param. 25# 26# $tbl is the return value from the sub below, parse(). 27# 28# And some subs have an optional $opts param which is a hashref of options. 29package TableParser; 30 31use strict; 32use warnings FATAL => 'all'; 33use English qw(-no_match_vars); 34use constant PTDEBUG => $ENV{PTDEBUG} || 0; 35 36use Data::Dumper; 37$Data::Dumper::Indent = 1; 38$Data::Dumper::Sortkeys = 1; 39$Data::Dumper::Quotekeys = 0; 40 41local $EVAL_ERROR; 42eval { 43 require Quoter; 44}; 45 46sub new { 47 my ( $class, %args ) = @_; 48 my $self = { %args }; 49 $self->{Quoter} ||= Quoter->new(); 50 return bless $self, $class; 51} 52 53sub Quoter { shift->{Quoter} } 54 55sub get_create_table { 56 my ( $self, $dbh, $db, $tbl ) = @_; 57 die "I need a dbh parameter" unless $dbh; 58 die "I need a db parameter" unless $db; 59 die "I need a tbl parameter" unless $tbl; 60 my $q = $self->{Quoter}; 61 62 # To ensure a consistent output, we save the current (old) SQL mode, 63 # then set it to the new SQL mode that what we need, which is the 64 # default sql_mode=''. When done, even if an error occurs, we restore 65 # the old SQL mode. The main thing is that we do not want ANSI_QUOTES 66 # because there's code all throughout the tools that expect backtick ` 67 # quoted idents, not double-quote " quoted idents. For example: 68 # https://bugs.launchpad.net/percona-toolkit/+bug/1058285 69 my $new_sql_mode 70 = q{/*!40101 SET @OLD_SQL_MODE := @@SQL_MODE, } 71 . q{@@SQL_MODE := '', } 72 . q{@OLD_QUOTE := @@SQL_QUOTE_SHOW_CREATE, } 73 . q{@@SQL_QUOTE_SHOW_CREATE := 1 */}; 74 75 my $old_sql_mode 76 = q{/*!40101 SET @@SQL_MODE := @OLD_SQL_MODE, } 77 . q{@@SQL_QUOTE_SHOW_CREATE := @OLD_QUOTE */}; 78 79 # Set new SQL mode. 80 PTDEBUG && _d($new_sql_mode); 81 eval { $dbh->do($new_sql_mode); }; 82 PTDEBUG && $EVAL_ERROR && _d($EVAL_ERROR); 83 84 # Must USE the tbl's db because some bug with SHOW CREATE TABLE on a 85 # view when the current db isn't the view's db causes MySQL to crash. 86 my $use_sql = 'USE ' . $q->quote($db); 87 PTDEBUG && _d($dbh, $use_sql); 88 $dbh->do($use_sql); 89 90 my $show_sql = "SHOW CREATE TABLE " . $q->quote($db, $tbl); 91 PTDEBUG && _d($show_sql); 92 my $href; 93 eval { $href = $dbh->selectrow_hashref($show_sql); }; 94 if ( my $e = $EVAL_ERROR ) { 95 # Restore old SQL mode. 96 PTDEBUG && _d($old_sql_mode); 97 $dbh->do($old_sql_mode); 98 99 die $e; 100 } 101 102 # Restore old SQL mode. 103 PTDEBUG && _d($old_sql_mode); 104 $dbh->do($old_sql_mode); 105 106 # SHOW CREATE TABLE has at least 2 columns like: 107 # mysql> show create table city\G 108 # *************************** 1. row *************************** 109 # Table: city 110 # Create Table: CREATE TABLE `city` ( 111 # `city_id` smallint(5) unsigned NOT NULL AUTO_INCREMENT, 112 # ... 113 # We want the second column. 114 my ($key) = grep { m/create (?:table|view)/i } keys %$href; 115 if ( !$key ) { 116 die "Error: no 'Create Table' or 'Create View' in result set from " 117 . "$show_sql: " . Dumper($href); 118 } 119 120 return $href->{$key}; 121} 122 123# Sub: parse 124# Parse SHOW CREATE TABLE. 125# 126# Returns: 127# Hashref of table structure 128sub parse { 129 my ( $self, $ddl, $opts ) = @_; 130 return unless $ddl; 131 132 # If ANSI_QUOTES is enabled, we can't parse. But we can translate ANSI_QUOTES 133 # into legacy quoting with backticks. The rules are: an identifier is 134 # surrounded with the quote characters, and embedded quote characters are 135 # doubled. 136 if ( $ddl =~ m/CREATE (?:TEMPORARY )?TABLE "/ ) { 137 $ddl = $self->ansi_to_legacy($ddl); 138 } 139 elsif ( $ddl !~ m/CREATE (?:TEMPORARY )?TABLE `/ ) { 140 die "TableParser doesn't handle CREATE TABLE without quoting."; 141 } 142 143 my ($name) = $ddl =~ m/CREATE (?:TEMPORARY )?TABLE\s+(`.+?`)/; 144 (undef, $name) = $self->{Quoter}->split_unquote($name) if $name; 145 146 # Lowercase identifiers to avoid issues with case-sensitivity in Perl. 147 # (Bug #1910276). 148 $ddl =~ s/(`[^`\n]+`)/\L$1/gm; 149 150 my $engine = $self->get_engine($ddl); 151 152 my @defs = $ddl =~ m/^(\s+`.*?),?$/gm; 153 my @cols = map { $_ =~ m/`([^`]+)`/ } @defs; 154 PTDEBUG && _d('Table cols:', join(', ', map { "`$_`" } @cols)); 155 156 # Save the column definitions *exactly* 157 my %def_for; 158 @def_for{@cols} = @defs; 159 160 # Find column types, whether numeric, whether nullable, whether 161 # auto-increment. 162 my (@nums, @null, @non_generated); 163 my (%type_for, %is_nullable, %is_numeric, %is_autoinc, %is_generated); 164 foreach my $col ( @cols ) { 165 my $def = $def_for{$col}; 166 167 # Remove literal backticks (``) because they're superfluous for parsing 168 # the col. 169 # https://bugs.launchpad.net/percona-toolkit/+bug/1462904 170 $def =~ s/``//g; 171 172 my ( $type ) = $def =~ m/`[^`]+`\s([a-z]+)/; 173 die "Can't determine column type for $def" unless $type; 174 $type_for{$col} = $type; 175 if ( $type =~ m/(?:(?:tiny|big|medium|small)?int|float|double|decimal|year)/ ) { 176 push @nums, $col; 177 $is_numeric{$col} = 1; 178 } 179 if ( $def !~ m/NOT NULL/ ) { 180 push @null, $col; 181 $is_nullable{$col} = 1; 182 } 183 if ( remove_quoted_text($def) =~ m/\WGENERATED\W/i ) { 184 $is_generated{$col} = 1; 185 } else { 186 push @non_generated, $col; 187 } 188 $is_autoinc{$col} = $def =~ m/AUTO_INCREMENT/i ? 1 : 0; 189 } 190 191 # TODO: passing is_nullable this way is just a quick hack. Ultimately, 192 # we probably should decompose this sub further, taking out the block 193 # above that parses col props like nullability, auto_inc, type, etc. 194 my ($keys, $clustered_key) = $self->get_keys($ddl, $opts, \%is_nullable); 195 196 my ($charset) = $ddl =~ m/DEFAULT CHARSET=(\w+)/; 197 198 return { 199 name => $name, 200 cols => \@cols, 201 col_posn => { map { $cols[$_] => $_ } 0..$#cols }, 202 is_col => { map { $_ => 1 } @non_generated }, 203 null_cols => \@null, 204 is_nullable => \%is_nullable, 205 non_generated_cols => \@non_generated, 206 is_autoinc => \%is_autoinc, 207 is_generated => \%is_generated, 208 clustered_key => $clustered_key, 209 keys => $keys, 210 defs => \%def_for, 211 numeric_cols => \@nums, 212 is_numeric => \%is_numeric, 213 engine => $engine, 214 type_for => \%type_for, 215 charset => $charset, 216 }; 217} 218 219sub remove_quoted_text { 220 my ($string) = @_; 221 $string =~ s/[^\\]`[^`]*[^\\]`//g; 222 $string =~ s/[^\\]"[^"]*[^\\]"//g; 223 $string =~ s/[^\\]'[^']*[^\\]'//g; 224 return $string; 225} 226 227# Sorts indexes in this order: PRIMARY, unique, non-nullable, any (shortest 228# first, alphabetical). Only BTREE indexes are considered. 229# TODO: consider length as # of bytes instead of # of columns. 230sub sort_indexes { 231 my ( $self, $tbl ) = @_; 232 233 my @indexes 234 = sort { 235 (($a ne 'PRIMARY') <=> ($b ne 'PRIMARY')) 236 || ( !$tbl->{keys}->{$a}->{is_unique} <=> !$tbl->{keys}->{$b}->{is_unique} ) 237 || ( $tbl->{keys}->{$a}->{is_nullable} <=> $tbl->{keys}->{$b}->{is_nullable} ) 238 || ( scalar(@{$tbl->{keys}->{$a}->{cols}}) <=> scalar(@{$tbl->{keys}->{$b}->{cols}}) ) 239 } 240 grep { 241 $tbl->{keys}->{$_}->{type} eq 'BTREE' 242 } 243 sort keys %{$tbl->{keys}}; 244 245 PTDEBUG && _d('Indexes sorted best-first:', join(', ', @indexes)); 246 return @indexes; 247} 248 249# Finds the 'best' index; if the user specifies one, dies if it's not in the 250# table. 251sub find_best_index { 252 my ( $self, $tbl, $index ) = @_; 253 my $best; 254 if ( $index ) { 255 ($best) = grep { uc $_ eq uc $index } keys %{$tbl->{keys}}; 256 } 257 if ( !$best ) { 258 if ( $index ) { 259 # The user specified an index, so we can't choose our own. 260 die "Index '$index' does not exist in table"; 261 } 262 else { 263 # Try to pick the best index. 264 # TODO: eliminate indexes that have column prefixes. 265 ($best) = $self->sort_indexes($tbl); 266 } 267 } 268 PTDEBUG && _d('Best index found is', $best); 269 return $best; 270} 271 272# Takes a dbh, database, table, quoter, and WHERE clause, and reports the 273# indexes MySQL thinks are best for EXPLAIN SELECT * FROM that table. If no 274# WHERE, just returns an empty list. If no possible_keys, returns empty list, 275# even if 'key' is not null. Only adds 'key' to the list if it's included in 276# possible_keys. 277sub find_possible_keys { 278 my ( $self, $dbh, $database, $table, $quoter, $where ) = @_; 279 return () unless $where; 280 my $sql = 'EXPLAIN SELECT * FROM ' . $quoter->quote($database, $table) 281 . ' WHERE ' . $where; 282 PTDEBUG && _d($sql); 283 my $expl = $dbh->selectrow_hashref($sql); 284 # Normalize columns to lowercase 285 $expl = { map { lc($_) => $expl->{$_} } keys %$expl }; 286 if ( $expl->{possible_keys} ) { 287 PTDEBUG && _d('possible_keys =', $expl->{possible_keys}); 288 my @candidates = split(',', $expl->{possible_keys}); 289 my %possible = map { $_ => 1 } @candidates; 290 if ( $expl->{key} ) { 291 PTDEBUG && _d('MySQL chose', $expl->{key}); 292 unshift @candidates, grep { $possible{$_} } split(',', $expl->{key}); 293 PTDEBUG && _d('Before deduping:', join(', ', @candidates)); 294 my %seen; 295 @candidates = grep { !$seen{$_}++ } @candidates; 296 } 297 PTDEBUG && _d('Final list:', join(', ', @candidates)); 298 return @candidates; 299 } 300 else { 301 PTDEBUG && _d('No keys in possible_keys'); 302 return (); 303 } 304} 305 306# Required args: 307# * dbh dbh: active dbh 308# * db scalar: database name to check 309# * tbl scalar: table name to check 310# Optional args: 311# * all_privs bool: check for all privs (select,insert,update,delete) 312# Returns: bool 313# Can die: no 314# check_table() checks the given table for certain criteria and returns 315# true if all criteria are found, else it returns false. The existence 316# of the table is always checked; if no optional args are given, then this 317# is the only check. Any error causes a false return value (e.g. if the 318# table is crashed). 319sub check_table { 320 my ( $self, %args ) = @_; 321 my @required_args = qw(dbh db tbl); 322 foreach my $arg ( @required_args ) { 323 die "I need a $arg argument" unless $args{$arg}; 324 } 325 my ($dbh, $db, $tbl) = @args{@required_args}; 326 my $q = $self->{Quoter} || 'Quoter'; 327 my $db_tbl = $q->quote($db, $tbl); 328 PTDEBUG && _d('Checking', $db_tbl); 329 330 $self->{check_table_error} = undef; 331 332 my $sql = "SHOW TABLES FROM " . $q->quote($db) 333 . ' LIKE ' . $q->literal_like($tbl); 334 PTDEBUG && _d($sql); 335 my $row; 336 eval { 337 $row = $dbh->selectrow_arrayref($sql); 338 }; 339 if ( my $e = $EVAL_ERROR ) { 340 PTDEBUG && _d($e); 341 $self->{check_table_error} = $e; 342 return 0; 343 } 344 if ( !$row->[0] || $row->[0] ne $tbl ) { 345 PTDEBUG && _d('Table does not exist'); 346 return 0; 347 } 348 349 PTDEBUG && _d('Table', $db, $tbl, 'exists'); 350 return 1; 351 352 # No more privs check: 353 # https://bugs.launchpad.net/percona-toolkit/+bug/1036747 354} 355 356sub get_engine { 357 my ( $self, $ddl, $opts ) = @_; 358 my ( $engine ) = $ddl =~ m/\).*?(?:ENGINE|TYPE)=(\w+)/; 359 PTDEBUG && _d('Storage engine:', $engine); 360 return $engine || undef; 361} 362 363# $ddl is a SHOW CREATE TABLE returned from get_create_table(). 364# The general format of a key is 365# [FOREIGN|UNIQUE|PRIMARY|FULLTEXT|SPATIAL] KEY `name` [USING BTREE|HASH] (`cols`). 366# Returns a hashref of keys and their properties and the clustered key (if 367# the engine is InnoDB): 368# { 369# key => { 370# type => BTREE, FULLTEXT or SPATIAL 371# name => column name, like: "foo_key" 372# colnames => original col def string, like: "(`a`,`b`)" 373# cols => arrayref containing the col names, like: [qw(a b)] 374# col_prefixes => arrayref containing any col prefixes (parallels cols) 375# is_unique => 1 if the col is UNIQUE or PRIMARY 376# is_nullable => true (> 0) if one or more col can be NULL 377# is_col => hashref with key for each col=>1 378# ddl => original key def string 379# }, 380# }, 381# 'PRIMARY', # clustered key 382# 383# Foreign keys are ignored; use get_fks() instead. 384sub get_keys { 385 my ( $self, $ddl, $opts, $is_nullable ) = @_; 386 my $engine = $self->get_engine($ddl); 387 my $keys = {}; 388 my $clustered_key = undef; 389 390 KEY: 391 foreach my $key ( $ddl =~ m/^ ((?:[A-Z]+ )?KEY .*)$/gm ) { 392 393 # If you want foreign keys, use get_fks() below. 394 next KEY if $key =~ m/FOREIGN/; 395 396 my $key_ddl = $key; 397 PTDEBUG && _d('Parsed key:', $key_ddl); 398 399 # Make allowances for HASH bugs in SHOW CREATE TABLE. A non-MEMORY table 400 # will report its index as USING HASH even when this is not supported. 401 # The true type should be BTREE. See 402 # http://bugs.mysql.com/bug.php?id=22632 403 # If ANSI quoting is in effect, we may not know the engine at all. 404 if ( !$engine || $engine !~ m/MEMORY|HEAP/ ) { 405 $key =~ s/USING HASH/USING BTREE/; 406 } 407 408 # Determine index type 409 my ( $type, $cols ) = $key =~ m/(?:USING (\w+))? \((.+)\)/; 410 my ( $special ) = $key =~ m/(FULLTEXT|SPATIAL)/; 411 $type = $type || $special || 'BTREE'; 412 my ($name) = $key =~ m/(PRIMARY|`[^`]*`)/; 413 my $unique = $key =~ m/PRIMARY|UNIQUE/ ? 1 : 0; 414 my @cols; 415 my @col_prefixes; 416 foreach my $col_def ( $cols =~ m/`[^`]+`(?:\(\d+\))?/g ) { 417 # Parse columns of index including potential column prefixes 418 # E.g.: `a`,`b`(20) 419 my ($name, $prefix) = $col_def =~ m/`([^`]+)`(?:\((\d+)\))?/; 420 push @cols, $name; 421 push @col_prefixes, $prefix; 422 } 423 $name =~ s/`//g; 424 425 PTDEBUG && _d( $name, 'key cols:', join(', ', map { "`$_`" } @cols)); 426 427 $keys->{$name} = { 428 name => $name, 429 type => $type, 430 colnames => $cols, 431 cols => \@cols, 432 col_prefixes => \@col_prefixes, 433 is_unique => $unique, 434 is_nullable => scalar(grep { $is_nullable->{$_} } @cols), 435 is_col => { map { $_ => 1 } @cols }, 436 ddl => $key_ddl, 437 }; 438 439 # Find clustered key (issue 295). 440 if ( ($engine || '') =~ m/InnoDB/i && !$clustered_key ) { 441 my $this_key = $keys->{$name}; 442 if ( $this_key->{name} eq 'PRIMARY' ) { 443 $clustered_key = 'PRIMARY'; 444 } 445 elsif ( $this_key->{is_unique} && !$this_key->{is_nullable} ) { 446 $clustered_key = $this_key->{name}; 447 } 448 PTDEBUG && $clustered_key && _d('This key is the clustered key'); 449 } 450 } 451 452 return $keys, $clustered_key; 453} 454 455# Like get_keys() above but only returns a hash of foreign keys. 456sub get_fks { 457 my ( $self, $ddl, $opts ) = @_; 458 my $q = $self->{Quoter}; 459 my $fks = {}; 460 461 foreach my $fk ( 462 $ddl =~ m/CONSTRAINT .* FOREIGN KEY .* REFERENCES [^\)]*\)/mg ) 463 { 464 my ( $name ) = $fk =~ m/CONSTRAINT `(.*?)`/; 465 my ( $cols ) = $fk =~ m/FOREIGN KEY \(([^\)]+)\)/; 466 my ( $parent, $parent_cols ) = $fk =~ m/REFERENCES (\S+) \(([^\)]+)\)/; 467 468 my ($db, $tbl) = $q->split_unquote($parent, $opts->{database}); 469 my %parent_tbl = (tbl => $tbl); 470 $parent_tbl{db} = $db if $db; 471 472 if ( $parent !~ m/\./ && $opts->{database} ) { 473 $parent = $q->quote($opts->{database}) . ".$parent"; 474 } 475 476 $fks->{$name} = { 477 name => $name, 478 colnames => $cols, 479 cols => [ map { s/[ `]+//g; $_; } split(',', $cols) ], 480 parent_tbl => \%parent_tbl, 481 parent_tblname => $parent, 482 parent_cols => [ map { s/[ `]+//g; $_; } split(',', $parent_cols) ], 483 parent_colnames=> $parent_cols, 484 ddl => $fk, 485 }; 486 } 487 488 return $fks; 489} 490 491# Removes the AUTO_INCREMENT property from the end of SHOW CREATE TABLE. A 492# sample: 493# ) ENGINE=InnoDB AUTO_INCREMENT=201 DEFAULT CHARSET=utf8; 494sub remove_auto_increment { 495 my ( $self, $ddl ) = @_; 496 $ddl =~ s/(^\).*?) AUTO_INCREMENT=\d+\b/$1/m; 497 return $ddl; 498} 499 500sub get_table_status { 501 my ( $self, $dbh, $db, $like ) = @_; 502 my $q = $self->{Quoter}; 503 my $sql = "SHOW TABLE STATUS FROM " . $q->quote($db); 504 my @params; 505 if ( $like ) { 506 $sql .= ' LIKE ?'; 507 push @params, $like; 508 } 509 PTDEBUG && _d($sql, @params); 510 my $sth = $dbh->prepare($sql); 511 eval { $sth->execute(@params); }; 512 if ($EVAL_ERROR) { 513 PTDEBUG && _d($EVAL_ERROR); 514 return; 515 } 516 my @tables = @{$sth->fetchall_arrayref({})}; 517 @tables = map { 518 my %tbl; # Make a copy with lowercased keys 519 @tbl{ map { lc $_ } keys %$_ } = values %$_; 520 $tbl{engine} ||= $tbl{type} || $tbl{comment}; 521 delete $tbl{type}; 522 \%tbl; 523 } @tables; 524 return @tables; 525} 526 527# Translates ANSI quoting around SHOW CREATE TABLE (specifically this query's 528# output, not an arbitrary query) into legacy backtick-quoting. 529# DOESNT WORK: my $ansi_quote_re = qr/"(?:(?!(?<!")").)*"/; 530# DOESNT WORK: my $ansi_quote_re = qr/" [^\\"]* (?: (?:\\.|"") [^\\"]* )* "/ismx; 531my $ansi_quote_re = qr/" [^"]* (?: "" [^"]* )* (?<=.) "/ismx; 532sub ansi_to_legacy { 533 my ($self, $ddl) = @_; 534 $ddl =~ s/($ansi_quote_re)/ansi_quote_replace($1)/ge; 535 return $ddl; 536} 537 538# Translates a single string from ANSI quoting into legacy quoting by 539# un-doubling embedded double-double quotes, doubling backticks, and replacing 540# the delimiters. 541sub ansi_quote_replace { 542 my ($val) = @_; 543 $val =~ s/^"|"$//g; 544 $val =~ s/`/``/g; 545 $val =~ s/""/"/g; 546 return "`$val`"; 547} 548 549sub _d { 550 my ($package, undef, $line) = caller 0; 551 @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; } 552 map { defined $_ ? $_ : 'undef' } 553 @_; 554 print STDERR "# $package:$line $PID ", join(' ', @_), "\n"; 555} 556 5571; 558} 559# ########################################################################### 560# End TableParser package 561# ########################################################################### 562