1# This program is copyright 2007-2011 Baron Schwartz, 2011 Percona Ireland Ltd.
2# Feedback and improvements are welcome.
3#
4# THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
5# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
6# MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
7#
8# This program is free software; you can redistribute it and/or modify it under
9# the terms of the GNU General Public License as published by the Free Software
10# Foundation, version 2; OR the Perl Artistic License.  On UNIX and similar
11# systems, you can issue `man perlgpl' or `man perlartistic' to read these
12# licenses.
13#
14# You should have received a copy of the GNU General Public License along with
15# this program; if not, write to the Free Software Foundation, Inc., 59 Temple
16# Place, Suite 330, Boston, MA  02111-1307  USA.
17# ###########################################################################
18# TableNibbler package
19# ###########################################################################
20{
21# Package: TableNibbler
22# TableNibbler determines how to nibble a table by chunks of rows.
23package TableNibbler;
24
25use strict;
26use warnings FATAL => 'all';
27use English qw(-no_match_vars);
28use constant PTDEBUG => $ENV{PTDEBUG} || 0;
29
30sub new {
31   my ( $class, %args ) = @_;
32   my @required_args = qw(TableParser Quoter);
33   foreach my $arg ( @required_args ) {
34      die "I need a $arg argument" unless $args{$arg};
35   }
36   my $self = { %args };
37   return bless $self, $class;
38}
39
40# Arguments are as follows:
41# * tbl_struct    Hashref returned from TableParser::parse().
42# * cols          Arrayref of columns to SELECT from the table
43# * index         Which index to ascend; optional.
44# * n_index_cols  The number of left-most index columns to use.
45# * asc_only      Whether to ascend strictly, that is, the WHERE clause for
46#                 the asc_stmt will fetch the next row > the given arguments.
47#                 The option is to fetch the row >=, which could loop
48#                 infinitely.  Default is false.
49#
50# Returns a hashref of
51#   * cols:  columns in the select stmt, with required extras appended
52#   * index: index chosen to ascend
53#   * where: WHERE clause
54#   * slice: col ordinals to pull from a row that will satisfy ? placeholders
55#   * scols: ditto, but column names instead of ordinals
56#
57# In other words,
58#   $first = $dbh->prepare <....>;
59#   $next  = $dbh->prepare <....>;
60#   $row = $first->fetchrow_arrayref();
61#   $row = $next->fetchrow_arrayref(@{$row}[@slice]);
62sub generate_asc_stmt {
63   my ( $self, %args ) = @_;
64   my @required_args = qw(tbl_struct index);
65   foreach my $arg ( @required_args ) {
66      die "I need a $arg argument" unless defined $args{$arg};
67   }
68   my ($tbl_struct, $index) = @args{@required_args};
69   my @cols = $args{cols} ? @{$args{cols}} : @{$tbl_struct->{cols}};
70   my $q    = $self->{Quoter};
71
72   # This shouldn't happen.  TableSyncNibble shouldn't call us with
73   # a nonexistent index.
74   die "Index '$index' does not exist in table"
75      unless exists $tbl_struct->{keys}->{$index};
76   PTDEBUG && _d('Will ascend index', $index);
77
78   # These are the columns we'll ascend.
79   my @asc_cols = @{$tbl_struct->{keys}->{$index}->{cols}};
80   if ( $args{asc_first} ) {
81      PTDEBUG && _d('Ascending only first column');
82      @asc_cols = $asc_cols[0];
83   }
84   elsif ( my $n = $args{n_index_cols} ) {
85      $n = scalar @asc_cols if $n > @asc_cols;
86      PTDEBUG && _d('Ascending only first', $n, 'columns');
87      @asc_cols = @asc_cols[0..($n-1)];
88   }
89   PTDEBUG && _d('Will ascend columns', join(', ', @asc_cols));
90
91   # We found the columns by name, now find their positions for use as
92   # array slices, and make sure they are included in the SELECT list.
93   my @asc_slice;
94   my %col_posn = do { my $i = 0; map { $_ => $i++ } @cols };
95   foreach my $col ( @asc_cols ) {
96      if ( !exists $col_posn{$col} ) {
97         push @cols, $col;
98         $col_posn{$col} = $#cols;
99      }
100      push @asc_slice, $col_posn{$col};
101   }
102   PTDEBUG && _d('Will ascend, in ordinal position:', join(', ', @asc_slice));
103
104   my $asc_stmt = {
105      cols  => \@cols,
106      index => $index,
107      where => '',
108      slice => [],
109      scols => [],
110   };
111
112   # ##########################################################################
113   # Figure out how to ascend the index by building a possibly complicated
114   # WHERE clause that will define a range beginning with a row retrieved by
115   # asc_stmt.  If asc_only is given, the row's lower end should not include
116   # the row.
117   # ##########################################################################
118   if ( @asc_slice ) {
119      my $cmp_where;
120      foreach my $cmp ( qw(< <= >= >) ) {
121         # Generate all 4 types, then choose the right one.
122         $cmp_where = $self->generate_cmp_where(
123            type        => $cmp,
124            slice       => \@asc_slice,
125            cols        => \@cols,
126            quoter      => $q,
127            is_nullable => $tbl_struct->{is_nullable},
128            type_for    => $tbl_struct->{type_for},
129         );
130         $asc_stmt->{boundaries}->{$cmp} = $cmp_where->{where};
131      }
132      my $cmp = $args{asc_only} ? '>' : '>=';
133      $asc_stmt->{where} = $asc_stmt->{boundaries}->{$cmp};
134      $asc_stmt->{slice} = $cmp_where->{slice};
135      $asc_stmt->{scols} = $cmp_where->{scols};
136   }
137
138   return $asc_stmt;
139}
140
141# Generates a multi-column version of a WHERE statement.  It can generate >,
142# >=, < and <= versions.
143# Assuming >= and a non-NULLable two-column index, the WHERE clause should look
144# like this:
145# WHERE (col1 > ?) OR (col1 = ? AND col2 >= ?)
146# Ascending-only and nullable require variations on this.  The general
147# pattern is (>), (= >), (= = >), (= = = >=).
148sub generate_cmp_where {
149   my ( $self, %args ) = @_;
150   foreach my $arg ( qw(type slice cols is_nullable) ) {
151      die "I need a $arg arg" unless defined $args{$arg};
152   }
153   my @slice       = @{$args{slice}};
154   my @cols        = @{$args{cols}};
155   my $is_nullable = $args{is_nullable};
156   my $type_for    = $args{type_for};
157   my $type        = $args{type};
158   my $q           = $self->{Quoter};
159
160   (my $cmp = $type) =~ s/=//;
161
162   my @r_slice;    # Resulting slice columns, by ordinal
163   my @r_scols;    # Ditto, by name
164
165   my @clauses;
166   foreach my $i ( 0 .. $#slice ) {
167      my @clause;
168
169      # Most of the clauses should be strict equality.
170      foreach my $j ( 0 .. $i - 1 ) {
171         my $ord = $slice[$j];
172         my $col = $cols[$ord];
173         my $quo = $q->quote($col);
174         my $val = ($col && ($type_for->{$col} || '')) eq 'enum' ? "CAST(? AS UNSIGNED)" : "?";
175         if ( $is_nullable->{$col} ) {
176            push @clause, "(($val IS NULL AND $quo IS NULL) OR ($quo = $val))";
177            push @r_slice, $ord, $ord;
178            push @r_scols, $col, $col;
179         }
180         else {
181            push @clause, "$quo = $val";
182            push @r_slice, $ord;
183            push @r_scols, $col;
184         }
185      }
186
187      # The last clause in each parenthesized group should be > or <, unless
188      # it's the very last of the whole WHERE clause and we are doing "or
189      # equal," when it should be >= or <=.
190      my $ord = $slice[$i];
191      my $col = $cols[$ord];
192      my $quo = $q->quote($col);
193      my $end = $i == $#slice; # Last clause of the whole group.
194      my $val = ($col && ($type_for->{$col} || '')) eq 'enum' ? "CAST(? AS UNSIGNED)" : "?";
195      if ( $is_nullable->{$col} ) {
196         if ( $type =~ m/=/ && $end ) {
197            push @clause, "($val IS NULL OR $quo $type $val)";
198         }
199         elsif ( $type =~ m/>/ ) {
200            push @clause, "($val IS NULL AND $quo IS NOT NULL) OR ($quo $cmp $val)";
201         }
202         else { # If $type =~ m/</ ) {
203            push @clauses, "(($val IS NOT NULL AND $quo IS NULL) OR ($quo $cmp $val))";
204         }
205         push @r_slice, $ord, $ord;
206         push @r_scols, $col, $col;
207      }
208      else {
209         push @r_slice, $ord;
210         push @r_scols, $col;
211         push @clause, ($type =~ m/=/ && $end ? "$quo $type $val" : "$quo $cmp $val");
212      }
213
214      # Add the clause to the larger WHERE clause.
215      push @clauses, '(' . join(' AND ', @clause) . ')' if @clause;
216   }
217   my $result = '(' . join(' OR ', @clauses) . ')';
218   my $where = {
219      slice => \@r_slice,
220      scols => \@r_scols,
221      where => $result,
222   };
223   return $where;
224}
225
226# Figure out how to delete rows. DELETE requires either an index or all
227# columns.  For that reason you should call this before calling
228# generate_asc_stmt(), so you know what columns you'll need to fetch from the
229# table.  Arguments:
230#   * tbl_struct
231#   * cols
232#   * index
233# These are the same as the arguments to generate_asc_stmt().  Return value is
234# similar too.
235sub generate_del_stmt {
236   my ( $self, %args ) = @_;
237
238   my $tbl  = $args{tbl_struct};
239   my @cols = $args{cols} ? @{$args{cols}} : ();
240   my $tp   = $self->{TableParser};
241   my $q    = $self->{Quoter};
242
243   my @del_cols;
244   my @del_slice;
245
246   # ##########################################################################
247   # Detect the best or preferred index to use for the WHERE clause needed to
248   # delete the rows.
249   # ##########################################################################
250   my $index = $tp->find_best_index($tbl, $args{index});
251   die "Cannot find an ascendable index in table" unless $index;
252
253   # These are the columns needed for the DELETE statement's WHERE clause.
254   if ( $index && $tbl->{keys}->{$index}->{is_unique}) {
255      @del_cols = @{$tbl->{keys}->{$index}->{cols}};
256   }
257   else {
258      @del_cols = @{$tbl->{cols}};
259   }
260   PTDEBUG && _d('Columns needed for DELETE:', join(', ', @del_cols));
261
262   # We found the columns by name, now find their positions for use as
263   # array slices, and make sure they are included in the SELECT list.
264   my %col_posn = do { my $i = 0; map { $_ => $i++ } @cols };
265   foreach my $col ( @del_cols ) {
266      if ( !exists $col_posn{$col} ) {
267         push @cols, $col;
268         $col_posn{$col} = $#cols;
269      }
270      push @del_slice, $col_posn{$col};
271   }
272   PTDEBUG && _d('Ordinals needed for DELETE:', join(', ', @del_slice));
273
274   my $del_stmt = {
275      cols  => \@cols,
276      index => $index,
277      where => '',
278      slice => [],
279      scols => [],
280   };
281
282   # ##########################################################################
283   # Figure out how to target a single row with a WHERE clause.
284   # ##########################################################################
285   my @clauses;
286   foreach my $i ( 0 .. $#del_slice ) {
287      my $ord = $del_slice[$i];
288      my $col = $cols[$ord];
289      my $quo = $q->quote($col);
290      if ( $tbl->{is_nullable}->{$col} ) {
291         push @clauses, "((? IS NULL AND $quo IS NULL) OR ($quo = ?))";
292         push @{$del_stmt->{slice}}, $ord, $ord;
293         push @{$del_stmt->{scols}}, $col, $col;
294      }
295      else {
296         push @clauses, "$quo = ?";
297         push @{$del_stmt->{slice}}, $ord;
298         push @{$del_stmt->{scols}}, $col;
299      }
300   }
301
302   $del_stmt->{where} = '(' . join(' AND ', @clauses) . ')';
303
304   return $del_stmt;
305}
306
307# Design an INSERT statement.  This actually does very little; it just maps
308# the columns you know you'll get from the SELECT statement onto the columns
309# in the INSERT statement, returning only those that exist in both sets.
310# Arguments:
311#    ins_tbl   hashref returned by TableParser::parse() for the INSERT table
312#    sel_cols  arrayref of columns to SELECT from the SELECT table
313# Returns a hashref:
314#    cols  => arrayref of columns for INSERT
315#    slice => arrayref of sel_cols indices corresponding to the INSERT cols
316# The cols array is used to construct the INSERT's INTO clause like:
317#    INSERT INTO ins_tbl (@cols) VALUES ...
318# The slice array is used like:
319#    $row = $sel_sth->fetchrow_arrayref();
320#    $ins_sth->execute(@{$row}[@slice]);
321# For example, if we select columns (a, b, c) but the insert table only
322# has columns (a, c), then the return hashref will be:
323#    cols  => [a, c]
324#    slice => [0, 2]
325# Therefore, the select statement will return an array with 3 elements
326# (one for each column), but the insert statement will slice this array
327# to get only the elements/columns it needs.
328sub generate_ins_stmt {
329   my ( $self, %args ) = @_;
330   foreach my $arg ( qw(ins_tbl sel_cols) ) {
331      die "I need a $arg argument" unless $args{$arg};
332   }
333   my $ins_tbl  = $args{ins_tbl};
334   my @sel_cols = @{$args{sel_cols}};
335
336   die "You didn't specify any SELECT columns" unless @sel_cols;
337
338   my @ins_cols;
339   my @ins_slice;
340   for my $i ( 0..$#sel_cols ) {
341      next unless $ins_tbl->{is_col}->{$sel_cols[$i]};
342      push @ins_cols, $sel_cols[$i];
343      push @ins_slice, $i;
344   }
345
346   return {
347      cols  => \@ins_cols,
348      slice => \@ins_slice,
349   };
350}
351
352sub _d {
353   my ($package, undef, $line) = caller 0;
354   @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
355        map { defined $_ ? $_ : 'undef' }
356        @_;
357   print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
358}
359
3601;
361}
362# ###########################################################################
363# End TableNibbler package
364# ###########################################################################
365