1# This program is copyright 2007-2011 Baron Schwartz, 2011 Percona Ireland Ltd.
2# Feedback and improvements are welcome.
3#
4# THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
5# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
6# MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
7#
8# This program is free software; you can redistribute it and/or modify it under
9# the terms of the GNU General Public License as published by the Free Software
10# Foundation, version 2; OR the Perl Artistic License.  On UNIX and similar
11# systems, you can issue `man perlgpl' or `man perlartistic' to read these
12# licenses.
13#
14# You should have received a copy of the GNU General Public License along with
15# this program; if not, write to the Free Software Foundation, Inc., 59 Temple
16# Place, Suite 330, Boston, MA  02111-1307  USA.
17# ###########################################################################
18# RowDiff package
19# ###########################################################################
20{
21# Package: RowDiff
22# RowDiff compares two sets of rows to find ones that are different.
23package RowDiff;
24
25use strict;
26use warnings FATAL => 'all';
27use English qw(-no_match_vars);
28use constant PTDEBUG => $ENV{PTDEBUG} || 0;
29
30# Required args:
31#   * dbh           obj: dbh used for collation-specific string comparisons
32# Optional args:
33#   * same_row      Callback when rows are identical
34#   * not_in_left   Callback when right row is not in the left
35#   * not_in_right  Callback when left row is not in the right
36#   * key_cmp       Callback when a column value differs
37#   * done          Callback that stops compare_sets() if it returns true
38#   * trf           Callback to transform numeric values before comparison
39sub new {
40   my ( $class, %args ) = @_;
41   die "I need a dbh" unless $args{dbh};
42   my $self = { %args };
43   return bless $self, $class;
44}
45
46# Arguments:
47#   * left_sth    obj: sth
48#   * right_sth   obj: sth
49#   * syncer      obj: TableSync* module
50#   * tbl_struct  hashref: table struct from TableParser::parser()
51# Iterates through two sets of rows and finds differences.  Calls various
52# methods on the $syncer object when it finds differences, passing these
53# args and hashrefs to the differing rows ($lr and $rr).
54sub compare_sets {
55   my ( $self, %args ) = @_;
56   my @required_args = qw(left_sth right_sth syncer tbl_struct);
57   foreach my $arg ( @required_args ) {
58      die "I need a $arg argument" unless defined $args{$arg};
59   }
60   my $left_sth   = $args{left_sth};
61   my $right_sth  = $args{right_sth};
62   my $syncer     = $args{syncer};
63   my $tbl_struct = $args{tbl_struct};
64
65   my ($lr, $rr);    # Current row from the left/right sths.
66   $args{key_cols} = $syncer->key_cols();  # for key_cmp()
67
68   # We have to manually track if the left or right sth is done
69   # fetching rows because sth->{Active} is always true with
70   # DBD::mysql v3. And we cannot simply while ( $lr || $rr )
71   # because in the case where left and right have the same key,
72   # we do this:
73   #    $lr = $rr = undef; # Fetch another row from each side.
74   # Unsetting both $lr and $rr there would cause while () to
75   # terminate. (And while ( $lr && $rr ) is not what we want
76   # either.) Furthermore, we need to avoid trying to fetch more
77   # rows if there are none to fetch because doing this would
78   # cause a DBI error ("fetch without execute"). That's why we
79   # make these checks:
80   #    if ( !$lr && !$left_done )
81   #    if ( !$rr && !$right_done )
82   # If you make changes here, be sure to test both RowDiff.t
83   # and RowDiff-custom.t. Look inside the later to see what
84   # is custom about it.
85   my $left_done  = 0;
86   my $right_done = 0;
87   my $done       = $self->{done};
88
89   do {
90      if ( !$lr && !$left_done ) {
91         PTDEBUG && _d('Fetching row from left');
92         eval { $lr = $left_sth->fetchrow_hashref(); };
93         PTDEBUG && $EVAL_ERROR && _d($EVAL_ERROR);
94         $left_done = !$lr || $EVAL_ERROR ? 1 : 0;
95      }
96      elsif ( PTDEBUG ) {
97         _d('Left still has rows');
98      }
99
100      if ( !$rr && !$right_done ) {
101         PTDEBUG && _d('Fetching row from right');
102         eval { $rr = $right_sth->fetchrow_hashref(); };
103         PTDEBUG && $EVAL_ERROR && _d($EVAL_ERROR);
104         $right_done = !$rr || $EVAL_ERROR ? 1 : 0;
105      }
106      elsif ( PTDEBUG ) {
107         _d('Right still has rows');
108      }
109
110      my $cmp;
111      if ( $lr && $rr ) {
112         $cmp = $self->key_cmp(%args, lr => $lr, rr => $rr);
113         PTDEBUG && _d('Key comparison on left and right:', $cmp);
114      }
115      if ( $lr || $rr ) {
116         # If the current row is the "same row" on both sides, meaning the two
117         # rows have the same key, check the contents of the row to see if
118         # they're the same.
119         if ( $lr && $rr && defined $cmp && $cmp == 0 ) {
120            PTDEBUG && _d('Left and right have the same key');
121            $syncer->same_row(%args, lr => $lr, rr => $rr);
122            $self->{same_row}->(%args, lr => $lr, rr => $rr)
123               if $self->{same_row};
124            $lr = $rr = undef; # Fetch another row from each side.
125         }
126         # The row in the left doesn't exist in the right.
127         elsif ( !$rr || ( defined $cmp && $cmp < 0 ) ) {
128            PTDEBUG && _d('Left is not in right');
129            $syncer->not_in_right(%args, lr => $lr, rr => $rr);
130            $self->{not_in_right}->(%args, lr => $lr, rr => $rr)
131               if $self->{not_in_right};
132            $lr = undef;
133         }
134         # Symmetric to the above.
135         else {
136            PTDEBUG && _d('Right is not in left');
137            $syncer->not_in_left(%args, lr => $lr, rr => $rr);
138            $self->{not_in_left}->(%args, lr => $lr, rr => $rr)
139               if $self->{not_in_left};
140            $rr = undef;
141         }
142      }
143      $left_done = $right_done = 1 if $done && $done->(%args);
144   } while ( !($left_done && $right_done) );
145   PTDEBUG && _d('No more rows');
146   $syncer->done_with_rows();
147}
148
149# Compare two rows to determine how they should be ordered.  NULL sorts before
150# defined values in MySQL, so I consider undef "less than." Numbers are easy to
151# compare.  Otherwise string comparison is tricky.  This function must match
152# MySQL exactly or the merge algorithm runs off the rails, so when in doubt I
153# ask MySQL to compare strings for me.  I can handle numbers and "normal" latin1
154# characters without asking MySQL.  See
155# http://dev.mysql.com/doc/refman/5.0/en/charset-literal.html.  $r1 and $r2 are
156# row hashrefs.  $key_cols is an arrayref of the key columns to compare.  $tbl is the
157# structure returned by TableParser.  The result matches Perl's cmp or <=>
158# operators:
159# 1 cmp 0 =>  1
160# 1 cmp 1 =>  0
161# 1 cmp 2 => -1
162# TODO: must generate the comparator function dynamically for speed, so we don't
163# have to check the type of columns constantly
164sub key_cmp {
165   my ( $self, %args ) = @_;
166   my @required_args = qw(lr rr key_cols tbl_struct);
167   foreach my $arg ( @required_args ) {
168      die "I need a $arg argument" unless exists $args{$arg};
169   }
170   my ($lr, $rr, $key_cols, $tbl_struct) = @args{@required_args};
171   PTDEBUG && _d('Comparing keys using columns:', join(',', @$key_cols));
172
173   # Optional callbacks.
174   my $callback = $self->{key_cmp};
175   my $trf      = $self->{trf};
176
177   foreach my $col ( @$key_cols ) {
178      my $l = $lr->{$col};
179      my $r = $rr->{$col};
180      if ( !defined $l || !defined $r ) {
181         PTDEBUG && _d($col, 'is not defined in both rows');
182         return defined $l ? 1 : defined $r ? -1 : 0;
183      }
184      else {
185         if ( $tbl_struct->{is_numeric}->{$col} ) {   # Numeric column
186            PTDEBUG && _d($col, 'is numeric');
187            ($l, $r) = $trf->($l, $r, $tbl_struct, $col) if $trf;
188            my $cmp = $l <=> $r;
189            if ( $cmp ) {
190               PTDEBUG && _d('Column', $col, 'differs:', $l, '!=', $r);
191               $callback->($col, $l, $r) if $callback;
192               return $cmp;
193            }
194         }
195         # Do case-sensitive cmp, expecting most will be eq.  If that fails, try
196         # a case-insensitive cmp if possible; otherwise ask MySQL how to sort.
197         elsif ( $l ne $r ) {
198            my $cmp;
199            my $coll = $tbl_struct->{collation_for}->{$col};
200            if ( $coll && ( $coll ne 'latin1_swedish_ci'
201                           || $l =~ m/[^\040-\177]/ || $r =~ m/[^\040-\177]/) )
202            {
203               PTDEBUG && _d('Comparing', $col, 'via MySQL');
204               $cmp = $self->db_cmp($coll, $l, $r);
205            }
206            else {
207               PTDEBUG && _d('Comparing', $col, 'in lowercase');
208               $cmp = lc $l cmp lc $r;
209            }
210            if ( $cmp ) {
211               PTDEBUG && _d('Column', $col, 'differs:', $l, 'ne', $r);
212               $callback->($col, $l, $r) if $callback;
213               return $cmp;
214            }
215         }
216      }
217   }
218   return 0;
219}
220
221sub db_cmp {
222   my ( $self, $collation, $l, $r ) = @_;
223   if ( !$self->{sth}->{$collation} ) {
224      if ( !$self->{charset_for} ) {
225         PTDEBUG && _d('Fetching collations from MySQL');
226         my @collations = @{$self->{dbh}->selectall_arrayref(
227            'SHOW COLLATION', {Slice => { collation => 1, charset => 1 }})};
228         foreach my $collation ( @collations ) {
229            $self->{charset_for}->{$collation->{collation}}
230               = $collation->{charset};
231         }
232      }
233      my $sql = "SELECT STRCMP(_$self->{charset_for}->{$collation}? COLLATE $collation, "
234         . "_$self->{charset_for}->{$collation}? COLLATE $collation) AS res";
235      PTDEBUG && _d($sql);
236      $self->{sth}->{$collation} = $self->{dbh}->prepare($sql);
237   }
238   my $sth = $self->{sth}->{$collation};
239   $sth->execute($l, $r);
240   return $sth->fetchall_arrayref()->[0]->[0];
241}
242
243sub _d {
244   my ($package, undef, $line) = caller 0;
245   @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
246        map { defined $_ ? $_ : 'undef' }
247        @_;
248   print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
249}
250
2511;
252}
253# ###########################################################################
254# End RowDiff package
255# ###########################################################################
256