1#!/usr/bin/perl
2
3BEGIN {
4   die "The PERCONA_TOOLKIT_BRANCH environment variable is not set.\n"
5      unless $ENV{PERCONA_TOOLKIT_BRANCH} && -d $ENV{PERCONA_TOOLKIT_BRANCH};
6   unshift @INC, "$ENV{PERCONA_TOOLKIT_BRANCH}/lib";
7};
8
9use strict;
10use warnings FATAL => 'all';
11use English qw(-no_match_vars);
12use Test::More;
13
14use TableParser;
15use TableChunker;
16use Quoter;
17use DSNParser;
18use Sandbox;
19use PerconaTest;
20
21my $dp = new DSNParser(opts=>$dsn_opts);
22my $sb = new Sandbox(basedir => '/tmp', DSNParser => $dp);
23my $dbh = $sb->get_dbh_for('master');
24
25if ( !$dbh ) {
26   plan skip_all => 'Cannot connect to sandbox master';
27}
28
29$sb->create_dbs($dbh, ['test']);
30
31my $q  = new Quoter();
32my $tp = new TableParser(Quoter => $q);
33my $c  = new TableChunker(Quoter => $q, TableParser => $tp);
34my $t;
35
36$t = $tp->parse( load_file('t/lib/samples/sakila.film.sql') );
37is_deeply(
38   [ $c->find_chunk_columns(tbl_struct=>$t) ],
39   [ 0,
40     { column => 'film_id', index => 'PRIMARY' },
41     { column => 'title', index => 'idx_title' },
42     { column => 'language_id', index => 'idx_fk_language_id' },
43     { column => 'original_language_id',
44       index => 'idx_fk_original_language_id' },
45   ],
46   'Found chunkable columns on sakila.film',
47);
48
49is_deeply(
50   [ $c->find_chunk_columns(tbl_struct=>$t, exact => 1) ],
51   [ 1, { column => 'film_id', index => 'PRIMARY' } ],
52   'Found exact chunkable columns on sakila.film',
53);
54
55# This test was removed because possible_keys was only used (vaguely)
56# by mk-table-sync/TableSync* but this functionality is now handled
57# in TableSync*::can_sync() with the optional args col and index.
58# In other words: it's someone else's job to get/check the preferred index.
59#is_deeply(
60#   [ $c->find_chunk_columns($t, { possible_keys => [qw(idx_fk_language_id)] }) ],
61#   [ 0,
62#     [
63#        { column => 'language_id', index => 'idx_fk_language_id' },
64#        { column => 'original_language_id',
65#             index => 'idx_fk_original_language_id' },
66#        { column => 'film_id', index => 'PRIMARY' },
67#     ]
68#   ],
69#   'Found preferred chunkable columns on sakila.film',
70#);
71
72$t = $tp->parse( load_file('t/lib/samples/pk_not_first.sql') );
73is_deeply(
74   [ $c->find_chunk_columns(tbl_struct=>$t) ],
75   [ 0,
76     { column => 'film_id', index => 'PRIMARY' },
77     { column => 'title', index => 'idx_title' },
78     { column => 'language_id', index => 'idx_fk_language_id' },
79     { column => 'original_language_id',
80        index => 'idx_fk_original_language_id' },
81   ],
82   'PK column is first',
83);
84
85is(
86   $c->inject_chunks(
87      query     => 'SELECT /*PROGRESS_COMMENT*//*CHUNK_NUM*/ FOO FROM 1/*WHERE*/',
88      database  => 'sakila',
89      table     => 'film',
90      chunks    => [ '1=1', 'a=b' ],
91      chunk_num => 1,
92      where     => ['FOO=BAR'],
93   ),
94   'SELECT /*sakila.film:2/2*/ 1 AS chunk_num, FOO FROM 1 WHERE (a=b) AND ((FOO=BAR))',
95   'Replaces chunk info into query',
96);
97
98is(
99   $c->inject_chunks(
100      query     => 'SELECT /*PROGRESS_COMMENT*//*CHUNK_NUM*/ FOO FROM 1/*WHERE*/',
101      database  => 'sakila',
102      table     => 'film',
103      chunks    => [ '1=1', 'a=b' ],
104      chunk_num => 1,
105      where     => ['FOO=BAR', undef],
106   ),
107   'SELECT /*sakila.film:2/2*/ 1 AS chunk_num, FOO FROM 1 WHERE (a=b) AND ((FOO=BAR))',
108   'Inject WHERE clause with undef item',
109);
110
111is(
112   $c->inject_chunks(
113      query     => 'SELECT /*PROGRESS_COMMENT*//*CHUNK_NUM*/ FOO FROM 1/*WHERE*/',
114      database  => 'sakila',
115      table     => 'film',
116      chunks    => [ '1=1', 'a=b' ],
117      chunk_num => 1,
118      where     => ['FOO=BAR', 'BAZ=BAT'],
119   ),
120   'SELECT /*sakila.film:2/2*/ 1 AS chunk_num, FOO FROM 1 WHERE (a=b) '
121      . 'AND ((FOO=BAR) AND (BAZ=BAT))',
122   'Inject WHERE with defined item',
123);
124
125# #############################################################################
126# Sandbox tests.
127# #############################################################################
128SKIP: {
129   skip 'Sandbox master does not have the sakila database', 21
130      unless @{$dbh->selectcol_arrayref("SHOW DATABASES LIKE 'sakila'")};
131
132   my @chunks;
133
134   @chunks = $c->calculate_chunks(
135      tbl_struct    => $t,
136      chunk_col     => 'film_id',
137      min           => 0,
138      max           => 99,
139      rows_in_range => 100,
140      chunk_size    => 30,
141      dbh           => $dbh,
142      db            => 'sakila',
143      tbl           => 'film',
144   );
145   is_deeply(
146      \@chunks,
147      [
148         "`film_id` < '30'",
149         "`film_id` >= '30' AND `film_id` < '60'",
150         "`film_id` >= '60' AND `film_id` < '90'",
151         "`film_id` >= '90'",
152      ],
153      'Got the right chunks from dividing 100 rows into 30-row chunks',
154   );
155
156   @chunks = $c->calculate_chunks(
157      tbl_struct    => $t,
158      chunk_col     => 'film_id',
159      min           => 0,
160      max           => 99,
161      rows_in_range => 100,
162      chunk_size    => 300,
163      dbh           => $dbh,
164      db            => 'sakila',
165      tbl           => 'film',
166   );
167   is_deeply(
168      \@chunks,
169      [
170         '1=1',
171      ],
172      'Got the right chunks from dividing 100 rows into 300-row chunks',
173   );
174
175   @chunks = $c->calculate_chunks(
176      tbl_struct    => $t,
177      chunk_col     => 'film_id',
178      min           => 0,
179      max           => 0,
180      rows_in_range => 100,
181      chunk_size    => 300,
182      dbh           => $dbh,
183      db            => 'sakila',
184      tbl           => 'film',
185   );
186   is_deeply(
187      \@chunks,
188      [
189         '1=1',
190      ],
191      'No rows, so one chunk',
192   );
193
194   @chunks = $c->calculate_chunks(
195      tbl_struct    => $t,
196      chunk_col     => 'original_language_id',
197      min           => 0,
198      max           => 99,
199      rows_in_range => 100,
200      chunk_size    => 50,
201      dbh           => $dbh,
202      db            => 'sakila',
203      tbl           => 'film',
204   );
205   is_deeply(
206      \@chunks,
207      [
208         "`original_language_id` < '50'",
209         "`original_language_id` >= '50'",
210         "`original_language_id` IS NULL",
211      ],
212      'Nullable column adds IS NULL chunk',
213   );
214
215   $t = $tp->parse( load_file('t/lib/samples/daycol.sql') );
216
217   @chunks = $c->calculate_chunks(
218      tbl_struct    => $t,
219      chunk_col     => 'a',
220      min           => '2001-01-01',
221      max           => '2002-01-01',
222      rows_in_range => 365,
223      chunk_size    => 90,
224      dbh           => $dbh,
225      db            => 'sakila',
226      tbl           => 'checksum_test_5',
227   );
228   is_deeply(
229      \@chunks,
230      [
231         "`a` < '2001-04-01'",
232         "`a` >= '2001-04-01' AND `a` < '2001-06-30'",
233         "`a` >= '2001-06-30' AND `a` < '2001-09-28'",
234         "`a` >= '2001-09-28' AND `a` < '2001-12-27'",
235         "`a` >= '2001-12-27'",
236      ],
237      'Date column chunks OK',
238   );
239
240   $t = $tp->parse( load_file('t/lib/samples/date.sql') );
241   @chunks = $c->calculate_chunks(
242      tbl_struct    => $t,
243      chunk_col     => 'a',
244      min           => '2000-01-01',
245      max           => '2005-11-26',
246      rows_in_range => 3,
247      chunk_size    => 1,
248      dbh           => $dbh,
249      db            => 'sakila',
250      tbl           => 'checksum_test_5',
251   );
252   is_deeply(
253      \@chunks,
254      [
255         "`a` < '2001-12-20'",
256         "`a` >= '2001-12-20' AND `a` < '2003-12-09'",
257         "`a` >= '2003-12-09'",
258      ],
259      'Date column chunks OK',
260   );
261
262   @chunks = $c->calculate_chunks(
263      tbl_struct    => $t,
264      chunk_col     => 'a',
265      min           => '0000-00-00',
266      max           => '2005-11-26',
267      rows_in_range => 3,
268      chunk_size    => 1,
269      dbh           => $dbh,
270      db            => 'sakila',
271      tbl           => 'checksum_test_5',
272   );
273   is_deeply(
274      \@chunks,
275      [
276         "`a` < '0668-08-20'",
277         "`a` >= '0668-08-20' AND `a` < '1337-04-09'",
278         "`a` >= '1337-04-09'",
279      ],
280      'Date column where min date is 0000-00-00',
281   );
282
283   $t = $tp->parse( load_file('t/lib/samples/datetime.sql') );
284   @chunks = $c->calculate_chunks(
285      tbl_struct    => $t,
286      chunk_col     => 'a',
287      min           => '1922-01-14 05:18:23',
288      max           => '2005-11-26 00:59:19',
289      rows_in_range => 3,
290      chunk_size    => 1,
291      dbh           => $dbh,
292      db            => 'sakila',
293      tbl           => 'checksum_test_5',
294   );
295   is_deeply(
296      \@chunks,
297      [
298         "`a` < '1949-12-28 19:52:02'",
299         "`a` >= '1949-12-28 19:52:02' AND `a` < '1977-12-12 10:25:41'",
300         "`a` >= '1977-12-12 10:25:41'",
301      ],
302      'Datetime column chunks OK',
303   );
304
305   @chunks = $c->calculate_chunks(
306      tbl_struct    => $t,
307      chunk_col     => 'a',
308      min           => '0000-00-00 00:00:00',
309      max           => '2005-11-26 00:59:19',
310      rows_in_range => 3,
311      chunk_size    => 1,
312      dbh           => $dbh,
313      db            => 'sakila',
314      tbl           => 'checksum_test_5',
315   );
316   is_deeply(
317      \@chunks,
318      [
319         "`a` < '0668-08-19 16:19:47'",
320         "`a` >= '0668-08-19 16:19:47' AND `a` < '1337-04-08 08:39:34'",
321         "`a` >= '1337-04-08 08:39:34'",
322      ],
323      'Datetime where min is 0000-00-00 00:00:00',
324   );
325
326   $t = $tp->parse( load_file('t/lib/samples/timecol.sql') );
327   @chunks = $c->calculate_chunks(
328      tbl_struct    => $t,
329      chunk_col     => 'a',
330      min           => '00:59:19',
331      max           => '09:03:15',
332      rows_in_range => 3,
333      chunk_size    => 1,
334      dbh           => $dbh,
335      db            => 'sakila',
336      tbl           => 'checksum_test_7',
337   );
338   is_deeply(
339      \@chunks,
340      [
341         "`a` < '03:40:38'",
342         "`a` >= '03:40:38' AND `a` < '06:21:57'",
343         "`a` >= '06:21:57'",
344      ],
345      'Time column chunks OK',
346   );
347
348   $t = $tp->parse( load_file('t/lib/samples/doublecol.sql') );
349   @chunks = $c->calculate_chunks(
350      tbl_struct    => $t,
351      chunk_col     => 'a',
352      min           => '1',
353      max           => '99.999',
354      rows_in_range => 3,
355      chunk_size    => 1,
356      dbh           => $dbh,
357      db            => 'sakila',
358      tbl           => 'checksum_test_8',
359   );
360   is_deeply(
361      \@chunks,
362      [
363         "`a` < '33.99966'",
364         "`a` >= '33.99966' AND `a` < '66.99933'",
365         "`a` >= '66.99933'",
366      ],
367      'Double column chunks OK',
368   );
369
370   @chunks = $c->calculate_chunks(
371      tbl_struct    => $t,
372      chunk_col     => 'a',
373      min           => '1',
374      max           => '2',
375      rows_in_range => 5,
376      chunk_size    => 3,
377      dbh           => $dbh,
378      db            => 'sakila',
379      tbl           => 'checksum_test_5',
380   );
381   is_deeply(
382      \@chunks,
383      [
384         "`a` < '1.6'",
385         "`a` >= '1.6'",
386      ],
387      'Double column chunks OK with smaller-than-int values',
388   );
389
390   eval {
391      @chunks = $c->calculate_chunks(
392         tbl_struct    => $t,
393         chunk_col     => 'a',
394         min           => '1',
395         max           => '2',
396         rows_in_range => 50000000,
397         chunk_size    => 3,
398         dbh           => $dbh,
399         db            => 'sakila',
400         tbl           => 'checksum_test_5',
401      );
402   };
403   is(
404      $EVAL_ERROR,
405      "Chunk size is too small: 1.00000 !> 1\n",
406      'Throws OK when too many chunks',
407   );
408
409   $t = $tp->parse( load_file('t/lib/samples/floatcol.sql') );
410   @chunks = $c->calculate_chunks(
411      tbl_struct    => $t,
412      chunk_col     => 'a',
413      min           => '1',
414      max           => '99.999',
415      rows_in_range => 3,
416      chunk_size    => 1,
417      dbh           => $dbh,
418      db            => 'sakila',
419      tbl           => 'checksum_test_5',
420   );
421   is_deeply(
422      \@chunks,
423      [
424         "`a` < '33.99966'",
425         "`a` >= '33.99966' AND `a` < '66.99933'",
426         "`a` >= '66.99933'",
427      ],
428      'Float column chunks OK',
429   );
430
431   $t = $tp->parse( load_file('t/lib/samples/decimalcol.sql') );
432   @chunks = $c->calculate_chunks(
433      tbl_struct    => $t,
434      chunk_col     => 'a',
435      min           => '1',
436      max           => '99.999',
437      rows_in_range => 3,
438      chunk_size    => 1,
439      dbh           => $dbh,
440      db            => 'sakila',
441      tbl           => 'checksum_test_5',
442   );
443   is_deeply(
444      \@chunks,
445      [
446         "`a` < '33.99966'",
447         "`a` >= '33.99966' AND `a` < '66.99933'",
448         "`a` >= '66.99933'",
449      ],
450      'Decimal column chunks OK',
451   );
452
453   throws_ok(
454      sub { $c->get_range_statistics(
455            dbh        => $dbh,
456            db         => 'sakila',
457            tbl        => 'film',
458            chunk_col  => 'film_id',
459            tbl_struct => {
460               type_for   => { film_id => 'int' },
461               is_numeric => { film_id => 1     },
462            },
463            where      => 'film_id>'
464         )
465      },
466      qr/WHERE \(film_id>\)/,
467      'Shows full SQL on error',
468   );
469
470   throws_ok(
471      sub { $c->size_to_rows(
472            dbh        => $dbh,
473            db         => 'sakila',
474            tbl        => 'film',
475            chunk_size => 'foo'
476         )
477      },
478      qr/Invalid chunk size/,
479      'Rejects chunk size',
480   );
481
482   is_deeply(
483      [ $c->size_to_rows(
484         dbh        => $dbh,
485         db         => 'sakila',
486         tbl        => 'film',
487         chunk_size => '5'
488      ) ],
489      [5, undef],
490      'Numeric size'
491   );
492   my ($size) = $c->size_to_rows(
493      dbh        => $dbh,
494      db         => 'sakila',
495      tbl        => 'film',
496      chunk_size => '5k'
497   );
498   ok($size >= 20 && $size <= 30, 'Convert bytes to rows');
499
500   my $avg;
501   ($size, $avg) = $c->size_to_rows(
502      dbh        => $dbh,
503      db         => 'sakila',
504      tbl        => 'film',
505      chunk_size => '5k'
506   );
507   # This will fail if we try to set a specific range, because Rows and
508   # Avg_row_length can vary slightly-to-greatly for InnoDB tables.
509   like(
510      $avg, qr/^\d+$/,
511      "size_to_rows() returns avg row len in list context ($avg)"
512   );
513
514   ($size, $avg) = $c->size_to_rows(
515      dbh            => $dbh,
516      db             => 'sakila',
517      tbl            => 'film',
518      chunk_size     => 5,
519      avg_row_length => 1,
520   );
521   # diag('size ', $size || 'undef', 'avg ', $avg || 'undef');
522   ok(
523      $size == 5 && ($avg >= 150 && $avg <= 280),
524      'size_to_rows() gets avg row length if asked'
525   );
526
527
528   # #########################################################################
529   # Issue 1084: Don't try to chunk small tables
530   # #########################################################################
531   $t = $tp->parse( $tp->get_create_table($dbh, 'sakila', 'country') );
532   @chunks = $c->calculate_chunks(
533      tbl_struct    => $t,
534      chunk_col     => 'country_id',
535      min           => '1',
536      max           => '109',
537      rows_in_range => 109,
538      chunk_size    => 110,
539      dbh           => $dbh,
540      db            => 'sakila',
541      tbl           => 'country',
542   );
543   is_deeply(
544      \@chunks,
545      ["1=1"],
546      "Doesn't chunk if chunk size > total rows"
547   );
548}
549
550# #############################################################################
551# Issue 47: TableChunker::range_num broken for very large bigint
552# #############################################################################
553$sb->load_file('master', 't/lib/samples/issue_47.sql');
554$t = $tp->parse( $tp->get_create_table($dbh, 'test', 'issue_47') );
555my %params = $c->get_range_statistics(
556   dbh        => $dbh,
557   db         => 'test',
558   tbl        => 'issue_47',
559   chunk_col  => 'userid',
560   tbl_struct => {
561      type_for   => { userid => 'int' },
562      is_numeric => { userid => 1     },
563   },
564);
565my @chunks;
566eval {
567   @chunks = $c->calculate_chunks(
568      dbh        => $dbh,
569      tbl_struct => $t,
570      chunk_col  => 'userid',
571      chunk_size => '4',
572      %params,
573   );
574};
575unlike($EVAL_ERROR, qr/Chunk size is too small/, 'Does not die chunking unsigned bitint (issue 47)');
576
577# #############################################################################
578# Issue 8: Add --force-index parameter to mk-table-checksum and mk-table-sync
579# #############################################################################
580is(
581   $c->inject_chunks(
582      query       => 'SELECT /*CHUNK_NUM*/ FROM /*DB_TBL*//*INDEX_HINT*//*WHERE*/',
583      database    => 'test',
584      table       => 'issue_8',
585      chunks      => [ '1=1', 'a=b' ],
586      chunk_num   => 1,
587      where       => [],
588      index_hint  => 'USE INDEX (`idx_a`)',
589   ),
590   'SELECT  1 AS chunk_num, FROM `test`.`issue_8` USE INDEX (`idx_a`) WHERE (a=b)',
591   'Adds USE INDEX (issue 8)'
592);
593
594$sb->load_file('master', 't/lib/samples/issue_8.sql');
595$t = $tp->parse( $tp->get_create_table($dbh, 'test', 'issue_8') );
596my @candidates = $c->find_chunk_columns(tbl_struct=>$t);
597is_deeply(
598   \@candidates,
599   [
600      0,
601      { column => 'id',    index => 'PRIMARY'  },
602      { column => 'foo',   index => 'uidx_foo' },
603   ],
604   'find_chunk_columns() returns col and idx candidates'
605);
606
607# #############################################################################
608# Issue 941: mk-table-checksum chunking should treat zero dates similar to NULL
609# #############################################################################
610use Data::Dumper;
611$Data::Dumper::Indent    = 1;
612$Data::Dumper::Sortkeys  = 1;
613$Data::Dumper::Quotekeys = 0;
614
615# These tables have rows like: 0, 100, 101, 102, etc.  Without the
616# zero-row option, the result is like:
617#   range stats:
618#     min           => '0',
619#     max           => '107',
620#     rows_in_range => '9'
621#   chunks:
622#     '`i` < 24',
623#     '`i` >= 24 AND `i` < 48',
624#     '`i` >= 48 AND `i` < 72',
625#     '`i` >= 72 AND `i` < 96',
626#     '`i` >= 96'
627# Problem is that the last chunk does all the work.  If the zero row
628# is ignored then the chunks are much better and the first chunk will
629# cover the zero row.
630
631$sb->load_file('master', 't/lib/samples/issue_941.sql');
632
633sub test_zero_row {
634   my ( $tbl, $range, $chunks, $zero_chunk ) = @_;
635   $zero_chunk = 1 unless defined $zero_chunk;
636   $t = $tp->parse( $tp->get_create_table($dbh, 'issue_941', $tbl) );
637   %params = $c->get_range_statistics(
638      dbh        => $dbh,
639      db         => 'issue_941',
640      tbl        => $tbl,
641      chunk_col  => $tbl,
642      tbl_struct => $t,
643      zero_chunk => $zero_chunk,
644   );
645   is_deeply(
646      \%params,
647      $range,
648      "$tbl range without zero row"
649   ) or print STDERR "Got ", Dumper(\%params);
650
651   @chunks = $c->calculate_chunks(
652      dbh        => $dbh,
653      db         => 'issue_941',
654      tbl        => $tbl,
655      tbl_struct => $t,
656      chunk_col  => $tbl,
657      chunk_size => '2',
658      zero_chunk => $zero_chunk,
659      %params,
660   );
661   is_deeply(
662      \@chunks,
663      $chunks,
664      "$tbl chunks without zero row"
665   ) or print STDERR "Got ", Dumper(\@chunks);
666
667   return;
668}
669
670# This can zero chunk because the min, 0, is >= 0.
671# The effective min becomes 100.
672test_zero_row(
673   'i',
674   { min=>0, max=>107, rows_in_range=>9 },
675   [
676      "`i` = 0",
677      "`i` > 0 AND `i` < '102'",
678      "`i` >= '102' AND `i` < '104'",
679      "`i` >= '104' AND `i` < '106'",
680      "`i` >= '106'",
681   ],
682);
683
684# This cannot zero chunk because the min is < 0.
685test_zero_row(
686   'i_neg',
687   { min=>-10, max=>-2, rows_in_range=>8 },
688   [
689      "`i_neg` < '-8'",
690      "`i_neg` >= '-8' AND `i_neg` < '-6'",
691      "`i_neg` >= '-6' AND `i_neg` < '-4'",
692      "`i_neg` >= '-4'"
693   ],
694);
695
696# This cannot zero chunk because the min is < 0.
697test_zero_row(
698   'i_neg_pos',
699   { min=>-10, max=>4, rows_in_range=>14 },
700   [
701      "`i_neg_pos` < '-8'",
702      "`i_neg_pos` >= '-8' AND `i_neg_pos` < '-6'",
703      "`i_neg_pos` >= '-6' AND `i_neg_pos` < '-4'",
704      "`i_neg_pos` >= '-4' AND `i_neg_pos` < '-2'",
705      "`i_neg_pos` >= '-2' AND `i_neg_pos` < '0'",
706      "`i_neg_pos` >= '0' AND `i_neg_pos` < '2'",
707      "`i_neg_pos` >= '2'",
708   ],
709);
710
711# There's no zero values in this table, but it can still
712# zero chunk because the min is >= 0.
713test_zero_row(
714   'i_null',
715   { min=>100, max=>107, rows_in_range=>9 },
716   [
717      "`i_null` = 0",
718      "`i_null` > 0 AND `i_null` < '102'",
719      "`i_null` >= '102' AND `i_null` < '104'",
720      "`i_null` >= '104' AND `i_null` < '106'",
721      "`i_null` >= '106'",
722      "`i_null` IS NULL",
723   ],
724);
725
726# Table d has a zero row, 0000-00-00, which is not a valid value
727# for min but can be selected by the zero chunk.
728test_zero_row(
729   'd',
730   {
731      min => '2010-03-01',
732      max => '2010-03-05',
733      rows_in_range => '6'
734   },
735   [
736      "`d` = 0",
737      "`d` > 0 AND `d` < '2010-03-03'",
738      "`d` >= '2010-03-03'",
739   ],
740);
741
742# Same as above: one zero row which we can select with the zero chunk.
743test_zero_row(
744   'dt',
745   {
746      min => '2010-03-01 02:01:00',
747      max => '2010-03-05 00:30:00',
748      rows_in_range => '6',
749   },
750   [
751      "`dt` = 0",
752      "`dt` > 0 AND `dt` < '2010-03-02 09:30:40'",
753      "`dt` >= '2010-03-02 09:30:40' AND `dt` < '2010-03-03 17:00:20'",
754      "`dt` >= '2010-03-03 17:00:20'",
755   ],
756);
757
758# #############################################################################
759# Issue 602: mk-table-checksum issue with invalid dates
760# #############################################################################
761$sb->load_file('master', 't/pt-table-checksum/samples/issue_602.sql');
762$t = $tp->parse( $tp->get_create_table($dbh, 'issue_602', 't') );
763%params = $c->get_range_statistics(
764   dbh        => $dbh,
765   db         => 'issue_602',
766   tbl        => 't',
767   chunk_col  => 'b',
768   tbl_struct => {
769      type_for   => { b => 'datetime' },
770      is_numeric => { b => 0          },
771   },
772);
773
774is_deeply(
775   \%params,
776   {
777      max => '2010-05-09 00:00:00',
778      min => '2010-04-30 00:00:00',
779      rows_in_range => '11',
780   },
781   "Ignores invalid min val, gets next valid min val"
782);
783
784throws_ok(
785   sub {
786      @chunks = $c->calculate_chunks(
787         dbh        => $dbh,
788         db         => 'issue_602',
789         tbl        => 't',
790         tbl_struct => $t,
791         chunk_col  => 'b',
792         chunk_size => '5',
793         %params,
794      )
795   },
796   qr//,
797   "No error with invalid min datetime (issue 602)"
798);
799
800# Like the test above but t2 has nothing but invalid rows.
801$t = $tp->parse( $tp->get_create_table($dbh, 'issue_602', 't2') );
802throws_ok(
803   sub {
804      $c->get_range_statistics(
805         dbh        => $dbh,
806         db         => 'issue_602',
807         tbl        => 't2',
808         chunk_col  => 'b',
809         tbl_struct => {
810            type_for   => { b => 'datetime' },
811            is_numeric => { b => 0          },
812         },
813      );
814   },
815   qr/Error finding a valid minimum value/,
816   "Dies if valid min value cannot be found"
817);
818
819# Try again with more tries: 6 instead of default 5.  Should
820# find a row this time.
821%params = $c->get_range_statistics(
822   dbh        => $dbh,
823   db         => 'issue_602',
824   tbl        => 't2',
825   chunk_col  => 'b',
826   tbl_struct => {
827      type_for   => { b => 'datetime' },
828      is_numeric => { b => 0          },
829   },
830   tries     => 6,
831);
832
833is_deeply(
834   \%params,
835   {
836      max => '2010-01-08 00:00:08',
837      min => '2010-01-07 00:00:07',
838      rows_in_range => 8,
839   },
840   "Gets valid min with enough tries"
841);
842
843
844# #############################################################################
845# Test issue 941 + issue 602
846# #############################################################################
847
848SKIP: {
849    skip "Requires MySQL < 5.7", 19 if ($sandbox_version ge '5.7');
850
851    $dbh->do("insert into issue_602.t values ('12', '0000-00-00 00:00:00')");
852
853    # Now we have:
854    # |   12 | 0000-00-00 00:00:00 |
855    # |   11 | 2010-00-09 00:00:00 |
856    # |   10 | 2010-04-30 00:00:00 |
857    # So min is a zero row.  If we don't want zero row, next min will be an
858    # invalid row, and we don't want that.  So we should get row "10" as min.
859
860    %params = $c->get_range_statistics(
861       dbh        => $dbh,
862       db         => 'issue_602',
863       tbl        => 't',
864       chunk_col  => 'b',
865       tbl_struct => {
866          type_for   => { b => 'datetime' },
867          is_numeric => { b => 0          },
868       },
869    );
870
871    is_deeply(
872       \%params,
873       {
874          min => '2010-04-30 00:00:00',
875          max => '2010-05-09 00:00:00',
876          rows_in_range => 12,
877       },
878       "Gets valid min after zero row"
879    );
880
881    # #############################################################################
882    # Test _validate_temporal_value() because it's magical.
883    # #############################################################################
884    my @invalid_t = (
885       '00:00:60',
886       '00:60:00',
887       '0000-00-00',
888       '2009-00-00',
889       '2009-13-00',
890       '0000-00-00 00:00:00',
891       '1000-00-00 00:00:00',
892       '2009-00-00 00:00:00',
893       '2009-13-00 00:00:00',
894       '2009-05-26 00:00:60',
895       '2009-05-26 00:60:00',
896       '2009-05-26 24:00:00',
897    );
898    foreach my $t ( @invalid_t ) {
899       my $res = TableChunker::_validate_temporal_value($dbh, $t);
900       is(
901          $res,
902          undef,
903          "$t is invalid"
904       );
905    }
906
907    my @valid_t = (
908       '00:00:01',
909       '1000-01-01',
910       '2009-01-01',
911       '1000-01-01 00:00:00',
912       '2009-01-01 00:00:00',
913       '2010-05-26 17:48:30',
914    );
915    foreach my $t ( @valid_t ) {
916       my $res = TableChunker::_validate_temporal_value($dbh, $t);
917       ok(
918          defined $res,
919          "$t is valid"
920       );
921    }
922}
923
924# #############################################################################
925# Test get_first_chunkable_column().
926# #############################################################################
927$t = $tp->parse( load_file('t/lib/samples/sakila.film.sql') );
928
929is_deeply(
930   [ $c->get_first_chunkable_column(tbl_struct=>$t) ],
931   [ 'film_id', 'PRIMARY' ],
932   "get_first_chunkable_column(), default column and index"
933);
934
935is_deeply(
936   [ $c->get_first_chunkable_column(
937      tbl_struct   => $t,
938      chunk_column => 'language_id',
939   ) ],
940   [ 'language_id', 'idx_fk_language_id' ],
941   "get_first_chunkable_column(), preferred column"
942);
943
944is_deeply(
945   [ $c->get_first_chunkable_column(
946      tbl_struct  => $t,
947      chunk_index => 'idx_fk_original_language_id',
948   ) ],
949   [ 'original_language_id', 'idx_fk_original_language_id' ],
950   "get_first_chunkable_column(), preferred index"
951);
952
953is_deeply(
954   [ $c->get_first_chunkable_column(
955      tbl_struct   => $t,
956      chunk_column => 'language_id',
957      chunk_index  => 'idx_fk_language_id',
958   ) ],
959   [ 'language_id', 'idx_fk_language_id' ],
960   "get_first_chunkable_column(), preferred column and index"
961);
962
963is_deeply(
964   [ $c->get_first_chunkable_column(
965      tbl_struct   => $t,
966      chunk_column => 'film_id',
967      chunk_index  => 'idx_fk_language_id',
968   ) ],
969   [ 'film_id', 'PRIMARY' ],
970   "get_first_chunkable_column(), bad preferred column and index"
971);
972
973$sb->load_file('master', "t/lib/samples/t1.sql", 'test');
974$t = $tp->parse( load_file('t/lib/samples/t1.sql') );
975
976is_deeply(
977   [ $c->get_first_chunkable_column(tbl_struct=>$t) ],
978   [undef, undef],
979   "get_first_chunkable_column(), no chunkable columns"
980);
981
982# char chunking ###############################################################
983$sb->load_file('master', "t/lib/samples/char-chunking/ascii.sql", 'test');
984$t = $tp->parse( $tp->get_create_table($dbh, 'test', 'ascii') );
985
986is_deeply(
987   [ $c->find_chunk_columns(tbl_struct=>$t) ],
988   [ 0,
989     { column => 'i', index => 'PRIMARY' },
990     { column => 'c', index => 'c'       },
991   ],
992   "Finds character column as a chunkable column"
993);
994
995is_deeply(
996   [ $c->get_first_chunkable_column(tbl_struct=>$t) ],
997   ['i', 'PRIMARY'],
998   "get_first_chunkable_column(), prefers PK over char col"
999);
1000is_deeply(
1001   [ $c->get_first_chunkable_column(tbl_struct=>$t, chunk_column=>'c') ],
1002   ['c', 'c'],
1003   "get_first_chunkable_column(), char col as preferred chunk col"
1004);
1005is_deeply(
1006   [ $c->get_first_chunkable_column(tbl_struct=>$t, chunk_index=>'c') ],
1007   ['c', 'c'],
1008   "get_first_chunkable_column(), char col as preferred chunk index"
1009);
1010
1011%params = $c->get_range_statistics(
1012   dbh        => $dbh,
1013   db         => 'test',
1014   tbl        => 'ascii',
1015   chunk_col  => 'c',
1016   tbl_struct => $t,
1017);
1018is_deeply(
1019   \%params,
1020   {
1021      min           => '',
1022      max           => 'ZESUS!!!',
1023      rows_in_range => '142',
1024   },
1025   "Range stats on character column"
1026);
1027
1028# #############################################################################
1029# Issue 1082: mk-table-checksum dies on single-row zero-pk table
1030# #############################################################################
1031sub chunk_it {
1032   my ( %args ) = @_;
1033   my %params = $c->get_range_statistics(
1034      dbh        => $dbh,
1035      db         => $args{db},
1036      tbl        => $args{tbl},
1037      chunk_col  => $args{chunk_col},
1038      tbl_struct => $args{tbl_struct},
1039   );
1040   my @chunks = $c->calculate_chunks(
1041      dbh        => $dbh,
1042      db         => $args{db},
1043      tbl        => $args{tbl},
1044      chunk_col  => $args{chunk_col},
1045      tbl_struct => $args{tbl_struct},
1046      chunk_size => $args{chunk_size} || 100,
1047      zero_chunk => $args{zero_chunk},
1048      %params,
1049   );
1050   is_deeply(
1051      \@chunks,
1052      $args{chunks},
1053      $args{msg},
1054   );
1055}
1056
1057$dbh->do("alter table test.t1 add unique index (a)");
1058my (undef,$output) = $dbh->selectrow_array("show create table test.t1");
1059$t = $tp->parse($output);
1060is_deeply(
1061   [ $c->get_first_chunkable_column(tbl_struct=>$t) ],
1062   [qw(a a)],
1063   "test.t1 chunkable col"
1064);
1065
1066$dbh->do('insert into test.t1 values (null)');
1067chunk_it(
1068   dbh        => $dbh,
1069   db         => 'test',
1070   tbl        => 't1',
1071   chunk_col  => 'a',
1072   tbl_struct => $t,
1073   zero_chunk => 1,
1074   chunks     => [qw(1=1)],
1075   msg        => 'Single NULL row'
1076);
1077
1078$dbh->do('insert into test.t1 values (null), (null), (null)');
1079chunk_it(
1080   dbh        => $dbh,
1081   db         => 'test',
1082   tbl        => 't1',
1083   chunk_col  => 'a',
1084   tbl_struct => $t,
1085   zero_chunk => 1,
1086   chunks     => [qw(1=1)],
1087   msg        => 'Several NULL rows'
1088);
1089
1090$dbh->do('truncate table test.t1');
1091$dbh->do('insert into test.t1 values (0)');
1092chunk_it(
1093   dbh        => $dbh,
1094   db         => 'test',
1095   tbl        => 't1',
1096   chunk_col  => 'a',
1097   tbl_struct => $t,
1098   zero_chunk => 1,
1099   chunks     => [qw(1=1)],
1100   msg        => 'Single zero row'
1101);
1102
1103# #############################################################################
1104# Issue 568: char chunking
1105# #############################################################################
1106sub count_rows {
1107   my ( $db_tbl, $col, @chunks ) = @_;
1108   my $total_rows = 0;
1109   foreach my $chunk ( @chunks ) {
1110      my $sql    = "SELECT $col FROM $db_tbl WHERE ($chunk) ORDER BY $col";
1111      my $rows   = $dbh->selectall_arrayref($sql);
1112      my $n_rows = scalar @$rows;
1113      $total_rows += $n_rows;
1114   }
1115   return $total_rows;
1116}
1117
1118SKIP: {
1119   skip 'Sandbox master does not have the sakila database', 1
1120      unless @{$dbh->selectcol_arrayref("SHOW DATABASES LIKE 'sakila'")};
1121
1122   my @chunks;
1123
1124   $t = $tp->parse( $tp->get_create_table($dbh, 'sakila', 'city') );
1125   @chunks = $c->calculate_chunks(
1126      tbl_struct    => $t,
1127      chunk_col     => 'city',
1128      min           => 'A Corua (La Corua)',
1129      max           => 'Ziguinchor',
1130      rows_in_range => 428,
1131      chunk_size    => 20,
1132      dbh           => $dbh,
1133      db            => 'sakila',
1134      tbl           => 'city',
1135   );
1136   is_deeply(
1137      \@chunks,
1138      [
1139         "`city` < 'C'",
1140         "`city` >= 'C' AND `city` < 'D'",
1141         "`city` >= 'D' AND `city` < 'E'",
1142         "`city` >= 'E' AND `city` < 'F'",
1143         "`city` >= 'F' AND `city` < 'G'",
1144         "`city` >= 'G' AND `city` < 'H'",
1145         "`city` >= 'H' AND `city` < 'I'",
1146         "`city` >= 'I' AND `city` < 'J'",
1147         "`city` >= 'J' AND `city` < 'K'",
1148         "`city` >= 'K' AND `city` < 'L'",
1149         "`city` >= 'L' AND `city` < 'M'",
1150         "`city` >= 'M' AND `city` < 'N'",
1151         "`city` >= 'N' AND `city` < 'O'",
1152         "`city` >= 'O' AND `city` < 'P'",
1153         "`city` >= 'P' AND `city` < 'Q'",
1154         "`city` >= 'Q' AND `city` < 'R'",
1155         "`city` >= 'R' AND `city` < 'S'",
1156         "`city` >= 'S' AND `city` < 'T'",
1157         "`city` >= 'T' AND `city` < 'U'",
1158         "`city` >= 'U' AND `city` < 'V'",
1159         "`city` >= 'V' AND `city` < 'W'",
1160         "`city` >= 'W' AND `city` < 'X'",
1161         "`city` >= 'X' AND `city` < 'Y'",
1162         "`city` >= 'Y' AND `city` < 'Z'",
1163         "`city` >= 'Z'",
1164      ],
1165      "Char chunk sakila.city.city"
1166   );
1167
1168   my $n_rows = count_rows("sakila.city", "city", @chunks);
1169   is(
1170      $n_rows,
1171      600,
1172      "sakila.city.city chunks select exactly 600 rows"
1173   );
1174}
1175
1176$sb->load_file('master', "t/lib/samples/char-chunking/world-city.sql", 'test');
1177$t = $tp->parse( $tp->get_create_table($dbh, 'test', 'world_city') );
1178%params = $c->get_range_statistics(
1179   dbh        => $dbh,
1180   db         => 'test',
1181   tbl        => 'world_city',
1182   chunk_col  => 'name',
1183   tbl_struct => $t,
1184   chunk_size => '500',
1185);
1186@chunks = $c->calculate_chunks(
1187   dbh           => $dbh,
1188   db            => 'test',
1189   tbl           => 'world_city',
1190   tbl_struct    => $t,
1191   chunk_col     => 'name',
1192   chunk_size    => 500,
1193   %params,
1194);
1195ok(
1196   @chunks >= 9,
1197   "At least 9 char chunks on test.world_city.name"
1198) or print STDERR Dumper(\@chunks);
1199
1200SKIP: {
1201   skip "Behaves differently on 5.5, code is a zombie, don't care",
1202   1, $sandbox_version ge '5.1';
1203   my $n_rows = count_rows("test.world_city", "name", @chunks);
1204   is(
1205      $n_rows,
1206      4079,
1207      "test.world_city.name chunks select exactly 4,079 rows"
1208   );
1209}
1210
1211# #############################################################################
1212# Bug #897758: TableChunker dies from an uninit value
1213# #############################################################################
1214
1215@chunks = $c->calculate_chunks(
1216   dbh           => $dbh,
1217   db            => 'test',
1218   tbl           => 'world_city',
1219   tbl_struct    => $t,
1220   chunk_col     => 'name',
1221   chunk_size    => 500,
1222   %params,
1223   chunk_range   => undef,
1224);
1225
1226ok( @chunks, "calculate_chunks picks a sane default for chunk_range" );
1227
1228# #############################################################################
1229# Issue 1182: mk-table-checksum not respecting chunk size
1230# #############################################################################
1231SKIP: {
1232   skip 'Sandbox master does not have the sakila database', 1
1233      unless @{$dbh->selectcol_arrayref("SHOW DATABASES LIKE 'sakila'")};
1234
1235   my @chunks;
1236   $t = $tp->parse( load_file('t/lib/samples/sakila.film.sql') );
1237
1238   @chunks = $c->calculate_chunks(
1239      tbl_struct    => $t,
1240      chunk_col     => 'film_id',
1241      min           => 0,
1242      max           => 99,
1243      rows_in_range => 100,
1244      chunk_size    => 30,
1245      dbh           => $dbh,
1246      db            => 'sakila',
1247      tbl           => 'film',
1248      chunk_range   => 'openclosed',
1249   );
1250   is_deeply(
1251      \@chunks,
1252      [
1253         "`film_id` < '30'",
1254         "`film_id` >= '30' AND `film_id` < '60'",
1255         "`film_id` >= '60' AND `film_id` < '90'",
1256         "`film_id` >= '90' AND `film_id` <= '99'",
1257      ],
1258      'openclosed chunk range adds AND chunk_col <= max (issue 1182)'
1259   );
1260};
1261
1262# ############################################################################
1263# Bug 821673: pt-table-checksum doesn't included --where in min max queries
1264# ############################################################################
1265$sb->load_file('master', "t/pt-table-checksum/samples/where01.sql");
1266$t = $tp->parse( $tp->get_create_table($dbh, 'test', 'checksum_test') );
1267%params = $c->get_range_statistics(
1268   dbh        => $dbh,
1269   db         => 'test',
1270   tbl        => 'checksum_test',
1271   chunk_col  => 'id',
1272   tbl_struct => $t,
1273   where      => "date = '2011-03-03'",
1274);
1275is(
1276   $params{min},
1277   11,
1278   'MIN int range stats with --where (bug 821673)'
1279);
1280is(
1281   $params{max},
1282   15,
1283   'MAX int range stats with --where (bug 821673)'
1284);
1285
1286# char chunking
1287$sb->load_file('master', "t/pt-table-checksum/samples/where02.sql");
1288$t = $tp->parse( $tp->get_create_table($dbh, 'test', 'checksum_test') );
1289%params = $c->get_range_statistics(
1290   dbh        => $dbh,
1291   db         => 'test',
1292   tbl        => 'checksum_test',
1293   chunk_col  => 'id',
1294   tbl_struct => $t,
1295   where      => "date = '2011-03-03'",
1296);
1297is(
1298   $params{min},
1299   'Apple',
1300   'MIN char range stats with --where (bug 821673)'
1301);
1302is(
1303   $params{max},
1304   'raspberry',
1305   'MAX char range stats with --where (bug 821673)'
1306);
1307
1308# It's difficult to construct a char chunk test where WHERE will matter.
1309#@chunks = $c->calculate_chunks(
1310#   dbh           => $dbh,
1311#   db            => 'test',
1312#   tbl           => 'checksum_test',
1313#   tbl_struct    => $t,
1314#   chunk_col     => 'id',
1315#   chunk_size    => 5,
1316#   where         => "date = '2011-03-03'",
1317#   %params,
1318#);
1319
1320# #############################################################################
1321# Bug 967451: Char chunking doesn't quote column name
1322# #############################################################################
1323$sb->load_file('master', "t/lib/samples/char-chunking/ascii.sql", 'test');
1324$dbh->do("ALTER TABLE test.ascii CHANGE COLUMN c `key` char(64) NOT NULL");
1325$t = $tp->parse( $tp->get_create_table($dbh, 'test', 'ascii') );
1326
1327%params = $c->get_range_statistics(
1328   dbh        => $dbh,
1329   db         => 'test',
1330   tbl        => 'ascii',
1331   chunk_col  => 'key',
1332   tbl_struct => $t,
1333);
1334is_deeply(
1335   \%params,
1336   {
1337      min           => '',
1338      max           => 'ZESUS!!!',
1339      rows_in_range => '142',
1340   },
1341   "Range stats for `key` col (bug 967451)"
1342);
1343
1344@chunks = $c->calculate_chunks(
1345   dbh        => $dbh,
1346   db         => 'test',
1347   tbl        => 'ascii',
1348   tbl_struct => $t,
1349   chunk_col  => 'key',
1350   chunk_size => '50',
1351   %params,
1352);
1353is_deeply(
1354   \@chunks,
1355   [
1356      "`key` < '5'",
1357      "`key` >= '5' AND `key` < 'I'",
1358      "`key` >= 'I'",
1359   ],
1360   "Caclulate chunks for `key` col (bug 967451)"
1361);
1362
1363# ############################################################################# ">
1364# base_count fails on n = 1000, base = 10
1365# https://bugs.launchpad.net/percona-toolkit/+bug/1028710
1366# #############################################################################
1367my $res = TableChunker->base_count(
1368   count_to => 1000,
1369   base     => 10,
1370   symbols  => ["a".."z"],
1371);
1372
1373is(
1374   $res,
1375   "baaa",
1376   "base_count's floor()s account for floating point arithmetics",
1377);
1378
1379# #############################################################################
1380# Bug 1034717: Divison by zero error when all columns tsart with the same char
1381# https://bugs.launchpad.net/percona-toolkit/+bug/1034717
1382# #############################################################################
1383$sb->load_file('master', "t/lib/samples/bug_1034717.sql", 'test');
1384$t = $tp->parse( $tp->get_create_table($dbh, 'bug_1034717', 'table1') );
1385
1386%params = $c->get_range_statistics(
1387   dbh        => $dbh,
1388   db         => 'bug_1034717',
1389   tbl        => 'table1',
1390   chunk_col  => 'field1',
1391   tbl_struct => $t,
1392);
1393
1394local $EVAL_ERROR;
1395eval {
1396   $c->calculate_chunks(
1397      dbh        => $dbh,
1398      db         => 'bug_1034717',
1399      tbl        => 'table1',
1400      tbl_struct => $t,
1401      chunk_col  => 'field1',
1402      chunk_size => '50',
1403      %params,
1404   );
1405};
1406like(
1407   $EVAL_ERROR,
1408   qr/^\QCannot chunk table `bug_1034717`.`table1` using the character column field1, most likely because all values start with the /,
1409   "Bug 1034717: Catches the base == 1 case and dies"
1410);
1411
1412# #############################################################################
1413# Done.
1414# #############################################################################
1415$sb->wipe_clean($dbh);
1416ok($sb->ok(), "Sandbox servers") or BAIL_OUT(__FILE__ . " broke the sandbox");
1417
1418done_testing;
1419