1# This program is copyright 2007-2011 Baron Schwartz, 2011 Percona Ireland Ltd. 2# Feedback and improvements are welcome. 3# 4# THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED 5# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF 6# MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. 7# 8# This program is free software; you can redistribute it and/or modify it under 9# the terms of the GNU General Public License as published by the Free Software 10# Foundation, version 2; OR the Perl Artistic License. On UNIX and similar 11# systems, you can issue `man perlgpl' or `man perlartistic' to read these 12# licenses. 13# 14# You should have received a copy of the GNU General Public License along with 15# this program; if not, write to the Free Software Foundation, Inc., 59 Temple 16# Place, Suite 330, Boston, MA 02111-1307 USA. 17# ########################################################################### 18# RowDiff package 19# ########################################################################### 20{ 21# Package: RowDiff 22# RowDiff compares two sets of rows to find ones that are different. 23package RowDiff; 24 25use strict; 26use warnings FATAL => 'all'; 27use English qw(-no_match_vars); 28use constant PTDEBUG => $ENV{PTDEBUG} || 0; 29 30# Required args: 31# * dbh obj: dbh used for collation-specific string comparisons 32# Optional args: 33# * same_row Callback when rows are identical 34# * not_in_left Callback when right row is not in the left 35# * not_in_right Callback when left row is not in the right 36# * key_cmp Callback when a column value differs 37# * done Callback that stops compare_sets() if it returns true 38# * trf Callback to transform numeric values before comparison 39sub new { 40 my ( $class, %args ) = @_; 41 die "I need a dbh" unless $args{dbh}; 42 my $self = { %args }; 43 return bless $self, $class; 44} 45 46# Arguments: 47# * left_sth obj: sth 48# * right_sth obj: sth 49# * syncer obj: TableSync* module 50# * tbl_struct hashref: table struct from TableParser::parser() 51# Iterates through two sets of rows and finds differences. Calls various 52# methods on the $syncer object when it finds differences, passing these 53# args and hashrefs to the differing rows ($lr and $rr). 54sub compare_sets { 55 my ( $self, %args ) = @_; 56 my @required_args = qw(left_sth right_sth syncer tbl_struct); 57 foreach my $arg ( @required_args ) { 58 die "I need a $arg argument" unless defined $args{$arg}; 59 } 60 my $left_sth = $args{left_sth}; 61 my $right_sth = $args{right_sth}; 62 my $syncer = $args{syncer}; 63 my $tbl_struct = $args{tbl_struct}; 64 65 my ($lr, $rr); # Current row from the left/right sths. 66 $args{key_cols} = $syncer->key_cols(); # for key_cmp() 67 68 # We have to manually track if the left or right sth is done 69 # fetching rows because sth->{Active} is always true with 70 # DBD::mysql v3. And we cannot simply while ( $lr || $rr ) 71 # because in the case where left and right have the same key, 72 # we do this: 73 # $lr = $rr = undef; # Fetch another row from each side. 74 # Unsetting both $lr and $rr there would cause while () to 75 # terminate. (And while ( $lr && $rr ) is not what we want 76 # either.) Furthermore, we need to avoid trying to fetch more 77 # rows if there are none to fetch because doing this would 78 # cause a DBI error ("fetch without execute"). That's why we 79 # make these checks: 80 # if ( !$lr && !$left_done ) 81 # if ( !$rr && !$right_done ) 82 # If you make changes here, be sure to test both RowDiff.t 83 # and RowDiff-custom.t. Look inside the later to see what 84 # is custom about it. 85 my $left_done = 0; 86 my $right_done = 0; 87 my $done = $self->{done}; 88 89 do { 90 if ( !$lr && !$left_done ) { 91 PTDEBUG && _d('Fetching row from left'); 92 eval { $lr = $left_sth->fetchrow_hashref(); }; 93 PTDEBUG && $EVAL_ERROR && _d($EVAL_ERROR); 94 $left_done = !$lr || $EVAL_ERROR ? 1 : 0; 95 } 96 elsif ( PTDEBUG ) { 97 _d('Left still has rows'); 98 } 99 100 if ( !$rr && !$right_done ) { 101 PTDEBUG && _d('Fetching row from right'); 102 eval { $rr = $right_sth->fetchrow_hashref(); }; 103 PTDEBUG && $EVAL_ERROR && _d($EVAL_ERROR); 104 $right_done = !$rr || $EVAL_ERROR ? 1 : 0; 105 } 106 elsif ( PTDEBUG ) { 107 _d('Right still has rows'); 108 } 109 110 my $cmp; 111 if ( $lr && $rr ) { 112 $cmp = $self->key_cmp(%args, lr => $lr, rr => $rr); 113 PTDEBUG && _d('Key comparison on left and right:', $cmp); 114 } 115 if ( $lr || $rr ) { 116 # If the current row is the "same row" on both sides, meaning the two 117 # rows have the same key, check the contents of the row to see if 118 # they're the same. 119 if ( $lr && $rr && defined $cmp && $cmp == 0 ) { 120 PTDEBUG && _d('Left and right have the same key'); 121 $syncer->same_row(%args, lr => $lr, rr => $rr); 122 $self->{same_row}->(%args, lr => $lr, rr => $rr) 123 if $self->{same_row}; 124 $lr = $rr = undef; # Fetch another row from each side. 125 } 126 # The row in the left doesn't exist in the right. 127 elsif ( !$rr || ( defined $cmp && $cmp < 0 ) ) { 128 PTDEBUG && _d('Left is not in right'); 129 $syncer->not_in_right(%args, lr => $lr, rr => $rr); 130 $self->{not_in_right}->(%args, lr => $lr, rr => $rr) 131 if $self->{not_in_right}; 132 $lr = undef; 133 } 134 # Symmetric to the above. 135 else { 136 PTDEBUG && _d('Right is not in left'); 137 $syncer->not_in_left(%args, lr => $lr, rr => $rr); 138 $self->{not_in_left}->(%args, lr => $lr, rr => $rr) 139 if $self->{not_in_left}; 140 $rr = undef; 141 } 142 } 143 $left_done = $right_done = 1 if $done && $done->(%args); 144 } while ( !($left_done && $right_done) ); 145 PTDEBUG && _d('No more rows'); 146 $syncer->done_with_rows(); 147} 148 149# Compare two rows to determine how they should be ordered. NULL sorts before 150# defined values in MySQL, so I consider undef "less than." Numbers are easy to 151# compare. Otherwise string comparison is tricky. This function must match 152# MySQL exactly or the merge algorithm runs off the rails, so when in doubt I 153# ask MySQL to compare strings for me. I can handle numbers and "normal" latin1 154# characters without asking MySQL. See 155# http://dev.mysql.com/doc/refman/5.0/en/charset-literal.html. $r1 and $r2 are 156# row hashrefs. $key_cols is an arrayref of the key columns to compare. $tbl is the 157# structure returned by TableParser. The result matches Perl's cmp or <=> 158# operators: 159# 1 cmp 0 => 1 160# 1 cmp 1 => 0 161# 1 cmp 2 => -1 162# TODO: must generate the comparator function dynamically for speed, so we don't 163# have to check the type of columns constantly 164sub key_cmp { 165 my ( $self, %args ) = @_; 166 my @required_args = qw(lr rr key_cols tbl_struct); 167 foreach my $arg ( @required_args ) { 168 die "I need a $arg argument" unless exists $args{$arg}; 169 } 170 my ($lr, $rr, $key_cols, $tbl_struct) = @args{@required_args}; 171 PTDEBUG && _d('Comparing keys using columns:', join(',', @$key_cols)); 172 173 # Optional callbacks. 174 my $callback = $self->{key_cmp}; 175 my $trf = $self->{trf}; 176 177 foreach my $col ( @$key_cols ) { 178 my $l = $lr->{$col}; 179 my $r = $rr->{$col}; 180 if ( !defined $l || !defined $r ) { 181 PTDEBUG && _d($col, 'is not defined in both rows'); 182 return defined $l ? 1 : defined $r ? -1 : 0; 183 } 184 else { 185 if ( $tbl_struct->{is_numeric}->{$col} ) { # Numeric column 186 PTDEBUG && _d($col, 'is numeric'); 187 ($l, $r) = $trf->($l, $r, $tbl_struct, $col) if $trf; 188 my $cmp = $l <=> $r; 189 if ( $cmp ) { 190 PTDEBUG && _d('Column', $col, 'differs:', $l, '!=', $r); 191 $callback->($col, $l, $r) if $callback; 192 return $cmp; 193 } 194 } 195 # Do case-sensitive cmp, expecting most will be eq. If that fails, try 196 # a case-insensitive cmp if possible; otherwise ask MySQL how to sort. 197 elsif ( $l ne $r ) { 198 my $cmp; 199 my $coll = $tbl_struct->{collation_for}->{$col}; 200 if ( $coll && ( $coll ne 'latin1_swedish_ci' 201 || $l =~ m/[^\040-\177]/ || $r =~ m/[^\040-\177]/) ) 202 { 203 PTDEBUG && _d('Comparing', $col, 'via MySQL'); 204 $cmp = $self->db_cmp($coll, $l, $r); 205 } 206 else { 207 PTDEBUG && _d('Comparing', $col, 'in lowercase'); 208 $cmp = lc $l cmp lc $r; 209 } 210 if ( $cmp ) { 211 PTDEBUG && _d('Column', $col, 'differs:', $l, 'ne', $r); 212 $callback->($col, $l, $r) if $callback; 213 return $cmp; 214 } 215 } 216 } 217 } 218 return 0; 219} 220 221sub db_cmp { 222 my ( $self, $collation, $l, $r ) = @_; 223 if ( !$self->{sth}->{$collation} ) { 224 if ( !$self->{charset_for} ) { 225 PTDEBUG && _d('Fetching collations from MySQL'); 226 my @collations = @{$self->{dbh}->selectall_arrayref( 227 'SHOW COLLATION', {Slice => { collation => 1, charset => 1 }})}; 228 foreach my $collation ( @collations ) { 229 $self->{charset_for}->{$collation->{collation}} 230 = $collation->{charset}; 231 } 232 } 233 my $sql = "SELECT STRCMP(_$self->{charset_for}->{$collation}? COLLATE $collation, " 234 . "_$self->{charset_for}->{$collation}? COLLATE $collation) AS res"; 235 PTDEBUG && _d($sql); 236 $self->{sth}->{$collation} = $self->{dbh}->prepare($sql); 237 } 238 my $sth = $self->{sth}->{$collation}; 239 $sth->execute($l, $r); 240 return $sth->fetchall_arrayref()->[0]->[0]; 241} 242 243sub _d { 244 my ($package, undef, $line) = caller 0; 245 @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; } 246 map { defined $_ ? $_ : 'undef' } 247 @_; 248 print STDERR "# $package:$line $PID ", join(' ', @_), "\n"; 249} 250 2511; 252} 253# ########################################################################### 254# End RowDiff package 255# ########################################################################### 256