1# This program is copyright 2007-2011 Baron Schwartz, 2011 Percona Ireland Ltd. 2# Feedback and improvements are welcome. 3# 4# THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED 5# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF 6# MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. 7# 8# This program is free software; you can redistribute it and/or modify it under 9# the terms of the GNU General Public License as published by the Free Software 10# Foundation, version 2; OR the Perl Artistic License. On UNIX and similar 11# systems, you can issue `man perlgpl' or `man perlartistic' to read these 12# licenses. 13# 14# You should have received a copy of the GNU General Public License along with 15# this program; if not, write to the Free Software Foundation, Inc., 59 Temple 16# Place, Suite 330, Boston, MA 02111-1307 USA. 17# ########################################################################### 18# TableNibbler package 19# ########################################################################### 20{ 21# Package: TableNibbler 22# TableNibbler determines how to nibble a table by chunks of rows. 23package TableNibbler; 24 25use strict; 26use warnings FATAL => 'all'; 27use English qw(-no_match_vars); 28use constant PTDEBUG => $ENV{PTDEBUG} || 0; 29 30sub new { 31 my ( $class, %args ) = @_; 32 my @required_args = qw(TableParser Quoter); 33 foreach my $arg ( @required_args ) { 34 die "I need a $arg argument" unless $args{$arg}; 35 } 36 my $self = { %args }; 37 return bless $self, $class; 38} 39 40# Arguments are as follows: 41# * tbl_struct Hashref returned from TableParser::parse(). 42# * cols Arrayref of columns to SELECT from the table 43# * index Which index to ascend; optional. 44# * n_index_cols The number of left-most index columns to use. 45# * asc_only Whether to ascend strictly, that is, the WHERE clause for 46# the asc_stmt will fetch the next row > the given arguments. 47# The option is to fetch the row >=, which could loop 48# infinitely. Default is false. 49# 50# Returns a hashref of 51# * cols: columns in the select stmt, with required extras appended 52# * index: index chosen to ascend 53# * where: WHERE clause 54# * slice: col ordinals to pull from a row that will satisfy ? placeholders 55# * scols: ditto, but column names instead of ordinals 56# 57# In other words, 58# $first = $dbh->prepare <....>; 59# $next = $dbh->prepare <....>; 60# $row = $first->fetchrow_arrayref(); 61# $row = $next->fetchrow_arrayref(@{$row}[@slice]); 62sub generate_asc_stmt { 63 my ( $self, %args ) = @_; 64 my @required_args = qw(tbl_struct index); 65 foreach my $arg ( @required_args ) { 66 die "I need a $arg argument" unless defined $args{$arg}; 67 } 68 my ($tbl_struct, $index) = @args{@required_args}; 69 my @cols = $args{cols} ? @{$args{cols}} : @{$tbl_struct->{cols}}; 70 my $q = $self->{Quoter}; 71 72 # This shouldn't happen. TableSyncNibble shouldn't call us with 73 # a nonexistent index. 74 die "Index '$index' does not exist in table" 75 unless exists $tbl_struct->{keys}->{$index}; 76 PTDEBUG && _d('Will ascend index', $index); 77 78 # These are the columns we'll ascend. 79 my @asc_cols = @{$tbl_struct->{keys}->{$index}->{cols}}; 80 if ( $args{asc_first} ) { 81 PTDEBUG && _d('Ascending only first column'); 82 @asc_cols = $asc_cols[0]; 83 } 84 elsif ( my $n = $args{n_index_cols} ) { 85 $n = scalar @asc_cols if $n > @asc_cols; 86 PTDEBUG && _d('Ascending only first', $n, 'columns'); 87 @asc_cols = @asc_cols[0..($n-1)]; 88 } 89 PTDEBUG && _d('Will ascend columns', join(', ', @asc_cols)); 90 91 # We found the columns by name, now find their positions for use as 92 # array slices, and make sure they are included in the SELECT list. 93 my @asc_slice; 94 my %col_posn = do { my $i = 0; map { $_ => $i++ } @cols }; 95 foreach my $col ( @asc_cols ) { 96 if ( !exists $col_posn{$col} ) { 97 push @cols, $col; 98 $col_posn{$col} = $#cols; 99 } 100 push @asc_slice, $col_posn{$col}; 101 } 102 PTDEBUG && _d('Will ascend, in ordinal position:', join(', ', @asc_slice)); 103 104 my $asc_stmt = { 105 cols => \@cols, 106 index => $index, 107 where => '', 108 slice => [], 109 scols => [], 110 }; 111 112 # ########################################################################## 113 # Figure out how to ascend the index by building a possibly complicated 114 # WHERE clause that will define a range beginning with a row retrieved by 115 # asc_stmt. If asc_only is given, the row's lower end should not include 116 # the row. 117 # ########################################################################## 118 if ( @asc_slice ) { 119 my $cmp_where; 120 foreach my $cmp ( qw(< <= >= >) ) { 121 # Generate all 4 types, then choose the right one. 122 $cmp_where = $self->generate_cmp_where( 123 type => $cmp, 124 slice => \@asc_slice, 125 cols => \@cols, 126 quoter => $q, 127 is_nullable => $tbl_struct->{is_nullable}, 128 type_for => $tbl_struct->{type_for}, 129 ); 130 $asc_stmt->{boundaries}->{$cmp} = $cmp_where->{where}; 131 } 132 my $cmp = $args{asc_only} ? '>' : '>='; 133 $asc_stmt->{where} = $asc_stmt->{boundaries}->{$cmp}; 134 $asc_stmt->{slice} = $cmp_where->{slice}; 135 $asc_stmt->{scols} = $cmp_where->{scols}; 136 } 137 138 return $asc_stmt; 139} 140 141# Generates a multi-column version of a WHERE statement. It can generate >, 142# >=, < and <= versions. 143# Assuming >= and a non-NULLable two-column index, the WHERE clause should look 144# like this: 145# WHERE (col1 > ?) OR (col1 = ? AND col2 >= ?) 146# Ascending-only and nullable require variations on this. The general 147# pattern is (>), (= >), (= = >), (= = = >=). 148sub generate_cmp_where { 149 my ( $self, %args ) = @_; 150 foreach my $arg ( qw(type slice cols is_nullable) ) { 151 die "I need a $arg arg" unless defined $args{$arg}; 152 } 153 my @slice = @{$args{slice}}; 154 my @cols = @{$args{cols}}; 155 my $is_nullable = $args{is_nullable}; 156 my $type_for = $args{type_for}; 157 my $type = $args{type}; 158 my $q = $self->{Quoter}; 159 160 (my $cmp = $type) =~ s/=//; 161 162 my @r_slice; # Resulting slice columns, by ordinal 163 my @r_scols; # Ditto, by name 164 165 my @clauses; 166 foreach my $i ( 0 .. $#slice ) { 167 my @clause; 168 169 # Most of the clauses should be strict equality. 170 foreach my $j ( 0 .. $i - 1 ) { 171 my $ord = $slice[$j]; 172 my $col = $cols[$ord]; 173 my $quo = $q->quote($col); 174 my $val = ($col && ($type_for->{$col} || '')) eq 'enum' ? "CAST(? AS UNSIGNED)" : "?"; 175 if ( $is_nullable->{$col} ) { 176 push @clause, "(($val IS NULL AND $quo IS NULL) OR ($quo = $val))"; 177 push @r_slice, $ord, $ord; 178 push @r_scols, $col, $col; 179 } 180 else { 181 push @clause, "$quo = $val"; 182 push @r_slice, $ord; 183 push @r_scols, $col; 184 } 185 } 186 187 # The last clause in each parenthesized group should be > or <, unless 188 # it's the very last of the whole WHERE clause and we are doing "or 189 # equal," when it should be >= or <=. 190 my $ord = $slice[$i]; 191 my $col = $cols[$ord]; 192 my $quo = $q->quote($col); 193 my $end = $i == $#slice; # Last clause of the whole group. 194 my $val = ($col && ($type_for->{$col} || '')) eq 'enum' ? "CAST(? AS UNSIGNED)" : "?"; 195 if ( $is_nullable->{$col} ) { 196 if ( $type =~ m/=/ && $end ) { 197 push @clause, "($val IS NULL OR $quo $type $val)"; 198 } 199 elsif ( $type =~ m/>/ ) { 200 push @clause, "($val IS NULL AND $quo IS NOT NULL) OR ($quo $cmp $val)"; 201 } 202 else { # If $type =~ m/</ ) { 203 push @clauses, "(($val IS NOT NULL AND $quo IS NULL) OR ($quo $cmp $val))"; 204 } 205 push @r_slice, $ord, $ord; 206 push @r_scols, $col, $col; 207 } 208 else { 209 push @r_slice, $ord; 210 push @r_scols, $col; 211 push @clause, ($type =~ m/=/ && $end ? "$quo $type $val" : "$quo $cmp $val"); 212 } 213 214 # Add the clause to the larger WHERE clause. 215 push @clauses, '(' . join(' AND ', @clause) . ')' if @clause; 216 } 217 my $result = '(' . join(' OR ', @clauses) . ')'; 218 my $where = { 219 slice => \@r_slice, 220 scols => \@r_scols, 221 where => $result, 222 }; 223 return $where; 224} 225 226# Figure out how to delete rows. DELETE requires either an index or all 227# columns. For that reason you should call this before calling 228# generate_asc_stmt(), so you know what columns you'll need to fetch from the 229# table. Arguments: 230# * tbl_struct 231# * cols 232# * index 233# These are the same as the arguments to generate_asc_stmt(). Return value is 234# similar too. 235sub generate_del_stmt { 236 my ( $self, %args ) = @_; 237 238 my $tbl = $args{tbl_struct}; 239 my @cols = $args{cols} ? @{$args{cols}} : (); 240 my $tp = $self->{TableParser}; 241 my $q = $self->{Quoter}; 242 243 my @del_cols; 244 my @del_slice; 245 246 # ########################################################################## 247 # Detect the best or preferred index to use for the WHERE clause needed to 248 # delete the rows. 249 # ########################################################################## 250 my $index = $tp->find_best_index($tbl, $args{index}); 251 die "Cannot find an ascendable index in table" unless $index; 252 253 # These are the columns needed for the DELETE statement's WHERE clause. 254 if ( $index && $tbl->{keys}->{$index}->{is_unique}) { 255 @del_cols = @{$tbl->{keys}->{$index}->{cols}}; 256 } 257 else { 258 @del_cols = @{$tbl->{cols}}; 259 } 260 PTDEBUG && _d('Columns needed for DELETE:', join(', ', @del_cols)); 261 262 # We found the columns by name, now find their positions for use as 263 # array slices, and make sure they are included in the SELECT list. 264 my %col_posn = do { my $i = 0; map { $_ => $i++ } @cols }; 265 foreach my $col ( @del_cols ) { 266 if ( !exists $col_posn{$col} ) { 267 push @cols, $col; 268 $col_posn{$col} = $#cols; 269 } 270 push @del_slice, $col_posn{$col}; 271 } 272 PTDEBUG && _d('Ordinals needed for DELETE:', join(', ', @del_slice)); 273 274 my $del_stmt = { 275 cols => \@cols, 276 index => $index, 277 where => '', 278 slice => [], 279 scols => [], 280 }; 281 282 # ########################################################################## 283 # Figure out how to target a single row with a WHERE clause. 284 # ########################################################################## 285 my @clauses; 286 foreach my $i ( 0 .. $#del_slice ) { 287 my $ord = $del_slice[$i]; 288 my $col = $cols[$ord]; 289 my $quo = $q->quote($col); 290 if ( $tbl->{is_nullable}->{$col} ) { 291 push @clauses, "((? IS NULL AND $quo IS NULL) OR ($quo = ?))"; 292 push @{$del_stmt->{slice}}, $ord, $ord; 293 push @{$del_stmt->{scols}}, $col, $col; 294 } 295 else { 296 push @clauses, "$quo = ?"; 297 push @{$del_stmt->{slice}}, $ord; 298 push @{$del_stmt->{scols}}, $col; 299 } 300 } 301 302 $del_stmt->{where} = '(' . join(' AND ', @clauses) . ')'; 303 304 return $del_stmt; 305} 306 307# Design an INSERT statement. This actually does very little; it just maps 308# the columns you know you'll get from the SELECT statement onto the columns 309# in the INSERT statement, returning only those that exist in both sets. 310# Arguments: 311# ins_tbl hashref returned by TableParser::parse() for the INSERT table 312# sel_cols arrayref of columns to SELECT from the SELECT table 313# Returns a hashref: 314# cols => arrayref of columns for INSERT 315# slice => arrayref of sel_cols indices corresponding to the INSERT cols 316# The cols array is used to construct the INSERT's INTO clause like: 317# INSERT INTO ins_tbl (@cols) VALUES ... 318# The slice array is used like: 319# $row = $sel_sth->fetchrow_arrayref(); 320# $ins_sth->execute(@{$row}[@slice]); 321# For example, if we select columns (a, b, c) but the insert table only 322# has columns (a, c), then the return hashref will be: 323# cols => [a, c] 324# slice => [0, 2] 325# Therefore, the select statement will return an array with 3 elements 326# (one for each column), but the insert statement will slice this array 327# to get only the elements/columns it needs. 328sub generate_ins_stmt { 329 my ( $self, %args ) = @_; 330 foreach my $arg ( qw(ins_tbl sel_cols) ) { 331 die "I need a $arg argument" unless $args{$arg}; 332 } 333 my $ins_tbl = $args{ins_tbl}; 334 my @sel_cols = @{$args{sel_cols}}; 335 336 die "You didn't specify any SELECT columns" unless @sel_cols; 337 338 my @ins_cols; 339 my @ins_slice; 340 for my $i ( 0..$#sel_cols ) { 341 next unless $ins_tbl->{is_col}->{$sel_cols[$i]}; 342 push @ins_cols, $sel_cols[$i]; 343 push @ins_slice, $i; 344 } 345 346 return { 347 cols => \@ins_cols, 348 slice => \@ins_slice, 349 }; 350} 351 352sub _d { 353 my ($package, undef, $line) = caller 0; 354 @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; } 355 map { defined $_ ? $_ : 'undef' } 356 @_; 357 print STDERR "# $package:$line $PID ", join(' ', @_), "\n"; 358} 359 3601; 361} 362# ########################################################################### 363# End TableNibbler package 364# ########################################################################### 365