1 /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
3 #ident "$Id$"
4 /*======
5 This file is part of PerconaFT.
6 
7 
8 Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
9 
10     PerconaFT is free software: you can redistribute it and/or modify
11     it under the terms of the GNU General Public License, version 2,
12     as published by the Free Software Foundation.
13 
14     PerconaFT is distributed in the hope that it will be useful,
15     but WITHOUT ANY WARRANTY; without even the implied warranty of
16     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17     GNU General Public License for more details.
18 
19     You should have received a copy of the GNU General Public License
20     along with PerconaFT.  If not, see <http://www.gnu.org/licenses/>.
21 
22 ----------------------------------------
23 
24     PerconaFT is free software: you can redistribute it and/or modify
25     it under the terms of the GNU Affero General Public License, version 3,
26     as published by the Free Software Foundation.
27 
28     PerconaFT is distributed in the hope that it will be useful,
29     but WITHOUT ANY WARRANTY; without even the implied warranty of
30     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
31     GNU Affero General Public License for more details.
32 
33     You should have received a copy of the GNU Affero General Public License
34     along with PerconaFT.  If not, see <http://www.gnu.org/licenses/>.
35 ======= */
36 
37 #ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
38 
39 // it used to be the case that we copied the left and right keys of a
40 // range to be prelocked but never freed them, this test checks that they
41 // are freed (as of this time, this happens in ftnode_fetch_extra::destroy())
42 
43 #include "test.h"
44 
45 
46 #include <ft-cachetable-wrappers.h>
47 #include <ft-flusher.h>
48 
49 // Some constants to be used in calculations below
50 static const int nodesize = 1024; // Target max node size
51 static const int eltsize = 64;    // Element size (for most elements)
52 static const int bnsize = 256;    // Target basement node size
53 static const int eltsperbn = 256 / 64;  // bnsize / eltsize
54 static const int keylen = sizeof(long);
55 // vallen is eltsize - keylen and leafentry overhead
56 static const int vallen = 64 - sizeof(long) - (sizeof(((LEAFENTRY)NULL)->type)  // overhead from LE_CLEAN_MEMSIZE
57                                                +sizeof(uint32_t)
58                                                +sizeof(((LEAFENTRY)NULL)->u.clean.vallen));
59 #define dummy_msn_3884 ((MSN) { (uint64_t) 3884 * MIN_MSN.msn })
60 
61 static TOKUTXN const null_txn = 0;
62 static const char *fname = TOKU_TEST_FILENAME;
63 
64 static void
le_add_to_bn(bn_data * bn,uint32_t idx,const char * key,int keysize,const char * val,int valsize)65 le_add_to_bn(bn_data* bn, uint32_t idx, const  char *key, int keysize, const char *val, int valsize)
66 {
67     LEAFENTRY r = NULL;
68     uint32_t size_needed = LE_CLEAN_MEMSIZE(valsize);
69     void *maybe_free = nullptr;
70     bn->get_space_for_insert(
71         idx,
72         key,
73         keysize,
74         size_needed,
75         &r,
76         &maybe_free
77         );
78     if (maybe_free) {
79         toku_free(maybe_free);
80     }
81     resource_assert(r);
82     r->type = LE_CLEAN;
83     r->u.clean.vallen = valsize;
84     memcpy(r->u.clean.val, val, valsize);
85 }
86 
87 
88 static size_t
insert_dummy_value(FTNODE node,int bn,long k,uint32_t idx)89 insert_dummy_value(FTNODE node, int bn, long k, uint32_t idx)
90 {
91     char val[vallen];
92     memset(val, k, sizeof val);
93     le_add_to_bn(BLB_DATA(node, bn), idx,(char *) &k, keylen, val, vallen);
94     return LE_CLEAN_MEMSIZE(vallen) + keylen + sizeof(uint32_t);
95 }
96 
97 // TODO: this stuff should be in ft/ft-ops.cc, not in a test.
98 // it makes it incredibly hard to add things to an ftnode
99 // when tests hard code initializations...
100 static void
setup_ftnode_header(struct ftnode * node)101 setup_ftnode_header(struct ftnode *node)
102 {
103     node->flags = 0x11223344;
104     node->blocknum.b = 20;
105     node->layout_version = FT_LAYOUT_VERSION;
106     node->layout_version_original = FT_LAYOUT_VERSION;
107     node->height = 0;
108     node->set_dirty();
109     node->oldest_referenced_xid_known = TXNID_NONE;
110 }
111 
112 static void
setup_ftnode_partitions(struct ftnode * node,int n_children,const MSN msn,size_t maxbnsize UU ())113 setup_ftnode_partitions(struct ftnode *node, int n_children, const MSN msn, size_t maxbnsize UU())
114 {
115     node->n_children = n_children;
116     node->max_msn_applied_to_node_on_disk = msn;
117     MALLOC_N(node->n_children, node->bp);
118     for (int bn = 0; bn < node->n_children; ++bn) {
119         BP_STATE(node, bn) = PT_AVAIL;
120         set_BLB(node, bn, toku_create_empty_bn());
121         BLB_MAX_MSN_APPLIED(node, bn) = msn;
122     }
123     node->pivotkeys.create_empty();
124 }
125 
126 static void
verify_basement_node_msns(FTNODE node,MSN expected)127 verify_basement_node_msns(FTNODE node, MSN expected)
128 {
129     for(int i = 0; i < node->n_children; ++i) {
130         assert(expected.msn == BLB_MAX_MSN_APPLIED(node, i).msn);
131     }
132 }
133 
134 //
135 // Maximum node size according to the FT: 1024 (expected node size after split)
136 // Maximum basement node size: 256
137 // Actual node size before split: 2048
138 // Actual basement node size before split: 256
139 // Start by creating 8 basements, then split node, expected result of two nodes with 4 basements each.
140 static void
test_split_on_boundary(void)141 test_split_on_boundary(void)
142 {
143     struct ftnode sn;
144 
145     int fd = open(fname, O_RDWR|O_CREAT|O_BINARY, S_IRWXU|S_IRWXG|S_IRWXO); assert(fd >= 0);
146 
147     int r;
148 
149     setup_ftnode_header(&sn);
150     const int nelts = 2 * nodesize / eltsize;
151     setup_ftnode_partitions(&sn, nelts * eltsize / bnsize, dummy_msn_3884, bnsize);
152     for (int bn = 0; bn < sn.n_children; ++bn) {
153         long k;
154         for (int i = 0; i < eltsperbn; ++i) {
155             k = bn * eltsperbn + i;
156             insert_dummy_value(&sn, bn, k, i);
157         }
158         if (bn < sn.n_children - 1) {
159             DBT pivotkey;
160             sn.pivotkeys.insert_at(toku_fill_dbt(&pivotkey, &k, sizeof(k)), bn);
161         }
162     }
163 
164     unlink(fname);
165     CACHETABLE ct;
166     FT_HANDLE ft;
167     toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
168     r = toku_open_ft_handle(fname, 1, &ft, nodesize, bnsize, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); assert(r==0);
169 
170     FTNODE nodea, nodeb;
171     DBT splitk;
172     // if we haven't done it right, we should hit the assert in the top of move_leafentries
173     ftleaf_split(ft->ft, &sn, &nodea, &nodeb, &splitk, true, SPLIT_EVENLY, 0, NULL);
174 
175     verify_basement_node_msns(nodea, dummy_msn_3884);
176     verify_basement_node_msns(nodeb, dummy_msn_3884);
177 
178     toku_unpin_ftnode(ft->ft, nodeb);
179     r = toku_close_ft_handle_nolsn(ft, NULL); assert(r == 0);
180     toku_cachetable_close(&ct);
181 
182     toku_destroy_dbt(&splitk);
183     toku_destroy_ftnode_internals(&sn);
184 }
185 
186 //
187 // Maximum node size according to the FT: 1024 (expected node size after split)
188 // Maximum basement node size: 256 (except the last)
189 // Actual node size before split: 4095
190 // Actual basement node size before split: 256 (except the last, of size 2K)
191 //
192 // Start by creating 9 basements, the first 8 being of 256 bytes each,
193 // and the last with one row of size 2047 bytes.  Then split node,
194 // expected result is two nodes, one with 8 basement nodes and one
195 // with 1 basement node.
196 static void
test_split_with_everything_on_the_left(void)197 test_split_with_everything_on_the_left(void)
198 {
199     struct ftnode sn;
200 
201     int fd = open(fname, O_RDWR|O_CREAT|O_BINARY, S_IRWXU|S_IRWXG|S_IRWXO); assert(fd >= 0);
202 
203     int r;
204 
205     setup_ftnode_header(&sn);
206     const int nelts = 2 * nodesize / eltsize;
207     setup_ftnode_partitions(&sn, nelts * eltsize / bnsize + 1, dummy_msn_3884, 2 * nodesize);
208     size_t big_val_size = 0;
209     for (int bn = 0; bn < sn.n_children; ++bn) {
210         long k;
211         if (bn < sn.n_children - 1) {
212             for (int i = 0; i < eltsperbn; ++i) {
213                 k = bn * eltsperbn + i;
214                 big_val_size += insert_dummy_value(&sn, bn, k, i);
215             }
216             DBT pivotkey;
217             sn.pivotkeys.insert_at(toku_fill_dbt(&pivotkey, &k, sizeof(k)), bn);
218         } else {
219             k = bn * eltsperbn;
220             // we want this to be as big as the rest of our data and a
221             // little bigger, so the halfway mark will land inside this
222             // value and it will be split to the left
223             big_val_size += 100;
224             char * XMALLOC_N(big_val_size, big_val);
225             memset(big_val, k, big_val_size);
226             le_add_to_bn(BLB_DATA(&sn, bn), 0, (char *) &k, keylen, big_val, big_val_size);
227             toku_free(big_val);
228         }
229     }
230 
231     unlink(fname);
232     CACHETABLE ct;
233     FT_HANDLE ft;
234     toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
235     r = toku_open_ft_handle(fname, 1, &ft, nodesize, bnsize, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); assert(r==0);
236 
237     FTNODE nodea, nodeb;
238     DBT splitk;
239     // if we haven't done it right, we should hit the assert in the top of move_leafentries
240     ftleaf_split(ft->ft, &sn, &nodea, &nodeb, &splitk, true, SPLIT_EVENLY, 0, NULL);
241 
242     toku_unpin_ftnode(ft->ft, nodeb);
243     r = toku_close_ft_handle_nolsn(ft, NULL); assert(r == 0);
244     toku_cachetable_close(&ct);
245 
246     toku_destroy_dbt(&splitk);
247     toku_destroy_ftnode_internals(&sn);
248 }
249 
250 
251 //
252 // Maximum node size according to the FT: 1024 (expected node size after split)
253 // Maximum basement node size: 256 (except the last)
254 // Actual node size before split: 4095
255 // Actual basement node size before split: 256 (except the last, of size 2K)
256 //
257 // Start by creating 9 basements, the first 8 being of 256 bytes each,
258 // and the last with one row of size 2047 bytes.  Then split node,
259 // expected result is two nodes, one with 8 basement nodes and one
260 // with 1 basement node.
261 static void
test_split_on_boundary_of_last_node(void)262 test_split_on_boundary_of_last_node(void)
263 {
264     struct ftnode sn;
265 
266     int fd = open(fname, O_RDWR|O_CREAT|O_BINARY, S_IRWXU|S_IRWXG|S_IRWXO); assert(fd >= 0);
267 
268     int r;
269 
270     setup_ftnode_header(&sn);
271     const int nelts = 2 * nodesize / eltsize;
272     const size_t maxbnsize = 2 * nodesize;
273     setup_ftnode_partitions(&sn, nelts * eltsize / bnsize + 1, dummy_msn_3884, maxbnsize);
274     size_t big_val_size = 0;
275     for (int bn = 0; bn < sn.n_children; ++bn) {
276         long k;
277         if (bn < sn.n_children - 1) {
278             for (int i = 0; i < eltsperbn; ++i) {
279                 k = bn * eltsperbn + i;
280                 big_val_size += insert_dummy_value(&sn, bn, k, i);
281             }
282             DBT pivotkey;
283             sn.pivotkeys.insert_at(toku_fill_dbt(&pivotkey, &k, sizeof(k)), bn);
284         } else {
285             k = bn * eltsperbn;
286             // we want this to be slightly smaller than all the rest of
287             // the data combined, so the halfway mark will be just to its
288             // left and just this element will end up on the right of the split
289             big_val_size -= 1 + (sizeof(((LEAFENTRY)NULL)->type)  // overhead from LE_CLEAN_MEMSIZE
290                                  +sizeof(uint32_t) // sizeof(keylen)
291                                  +sizeof(((LEAFENTRY)NULL)->u.clean.vallen));
292             invariant(big_val_size <= maxbnsize);
293             char * XMALLOC_N(big_val_size, big_val);
294             memset(big_val, k, big_val_size);
295             le_add_to_bn(BLB_DATA(&sn, bn), 0, (char *) &k, keylen, big_val, big_val_size);
296             toku_free(big_val);
297         }
298     }
299 
300     unlink(fname);
301     CACHETABLE ct;
302     FT_HANDLE ft;
303     toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
304     r = toku_open_ft_handle(fname, 1, &ft, nodesize, bnsize, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); assert(r==0);
305 
306     FTNODE nodea, nodeb;
307     DBT splitk;
308     // if we haven't done it right, we should hit the assert in the top of move_leafentries
309     ftleaf_split(ft->ft, &sn, &nodea, &nodeb, &splitk, true, SPLIT_EVENLY, 0, NULL);
310 
311     toku_unpin_ftnode(ft->ft, nodeb);
312     r = toku_close_ft_handle_nolsn(ft, NULL); assert(r == 0);
313     toku_cachetable_close(&ct);
314 
315     toku_destroy_dbt(&splitk);
316     toku_destroy_ftnode_internals(&sn);
317 }
318 
319 static void
test_split_at_begin(void)320 test_split_at_begin(void)
321 {
322     struct ftnode sn;
323 
324     int fd = open(fname, O_RDWR|O_CREAT|O_BINARY, S_IRWXU|S_IRWXG|S_IRWXO); assert(fd >= 0);
325 
326     int r;
327 
328     setup_ftnode_header(&sn);
329     const int nelts = 2 * nodesize / eltsize;
330     const size_t maxbnsize = 2 * nodesize;
331     setup_ftnode_partitions(&sn, nelts * eltsize / bnsize, dummy_msn_3884, maxbnsize);
332     size_t totalbytes = 0;
333     for (int bn = 0; bn < sn.n_children; ++bn) {
334         long k;
335         for (int i = 0; i < eltsperbn; ++i) {
336             k = bn * eltsperbn + i;
337             if (bn == 0 && i == 0) {
338                 // we'll add the first element later when we know how big
339                 // to make it
340                 continue;
341             }
342             totalbytes += insert_dummy_value(&sn, bn, k, i-1);
343         }
344         if (bn < sn.n_children - 1) {
345             DBT pivotkey;
346             sn.pivotkeys.insert_at(toku_fill_dbt(&pivotkey, &k, sizeof(k)), bn);
347         }
348     }
349     {  // now add the first element
350         int bn = 0; long k = 0;
351         // add a few bytes so the halfway mark is definitely inside this
352         // val, which will make it go to the left and everything else to
353         // the right
354         char val[totalbytes + 3];
355         invariant(totalbytes + 3 <= maxbnsize);
356         memset(val, k, sizeof val);
357         le_add_to_bn(BLB_DATA(&sn, bn), 0, (char *) &k, keylen, val, totalbytes + 3);
358         totalbytes += LE_CLEAN_MEMSIZE(totalbytes + 3) + keylen + sizeof(uint32_t);
359     }
360 
361     unlink(fname);
362     CACHETABLE ct;
363     FT_HANDLE ft;
364     toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
365     r = toku_open_ft_handle(fname, 1, &ft, nodesize, bnsize, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); assert(r==0);
366 
367     FTNODE nodea, nodeb;
368     DBT splitk;
369     // if we haven't done it right, we should hit the assert in the top of move_leafentries
370     ftleaf_split(ft->ft, &sn, &nodea, &nodeb, &splitk, true, SPLIT_EVENLY, 0, NULL);
371 
372     toku_unpin_ftnode(ft->ft, nodeb);
373     r = toku_close_ft_handle_nolsn(ft, NULL); assert(r == 0);
374     toku_cachetable_close(&ct);
375 
376     toku_destroy_dbt(&splitk);
377     toku_destroy_ftnode_internals(&sn);
378 }
379 
380 static void
test_split_at_end(void)381 test_split_at_end(void)
382 {
383     struct ftnode sn;
384 
385     int fd = open(fname, O_RDWR|O_CREAT|O_BINARY, S_IRWXU|S_IRWXG|S_IRWXO); assert(fd >= 0);
386 
387     int r;
388 
389     setup_ftnode_header(&sn);
390     const int nelts = 2 * nodesize / eltsize;
391     const size_t maxbnsize = 2 * nodesize;
392     setup_ftnode_partitions(&sn, nelts * eltsize / bnsize, dummy_msn_3884, maxbnsize);
393     long totalbytes = 0;
394     int bn, i;
395     for (bn = 0; bn < sn.n_children; ++bn) {
396         long k;
397         for (i = 0; i < eltsperbn; ++i) {
398             k = bn * eltsperbn + i;
399             if (bn == sn.n_children - 1 && i == eltsperbn - 1) {
400                 // add a few bytes so the halfway mark is definitely inside this
401                 // val, which will make it go to the left and everything else to
402                 // the right, which is nothing, so we actually split at the very end
403                 char val[totalbytes + 3];
404                 invariant(totalbytes + 3 <= (long) maxbnsize);
405                 memset(val, k, sizeof val);
406                 le_add_to_bn(BLB_DATA(&sn, bn), i, (char *) &k, keylen, val, totalbytes + 3);
407                 totalbytes += LE_CLEAN_MEMSIZE(totalbytes + 3) + keylen + sizeof(uint32_t);
408             } else {
409                 totalbytes += insert_dummy_value(&sn, bn, k, i);
410             }
411         }
412         if (bn < sn.n_children - 1) {
413             DBT pivotkey;
414             sn.pivotkeys.insert_at(toku_fill_dbt(&pivotkey, &k, sizeof(k)), bn);
415         }
416     }
417 
418     unlink(fname);
419     CACHETABLE ct;
420     FT_HANDLE ft;
421     toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
422     r = toku_open_ft_handle(fname, 1, &ft, nodesize, bnsize, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); assert(r==0);
423 
424     FTNODE nodea, nodeb;
425     DBT splitk;
426     // if we haven't done it right, we should hit the assert in the top of move_leafentries
427     ftleaf_split(ft->ft, &sn, &nodea, &nodeb, &splitk, true, SPLIT_EVENLY, 0, NULL);
428 
429     toku_unpin_ftnode(ft->ft, nodeb);
430     r = toku_close_ft_handle_nolsn(ft, NULL); assert(r == 0);
431     toku_cachetable_close(&ct);
432 
433     toku_destroy_dbt(&splitk);
434     toku_destroy_ftnode_internals(&sn);
435 }
436 
437 // Maximum node size according to the FT: 1024 (expected node size after split)
438 // Maximum basement node size: 256
439 // Actual node size before split: 2048
440 // Actual basement node size before split: 256
441 // Start by creating 9 basements, then split node.
442 // Expected result of two nodes with 5 basements each.
443 static void
test_split_odd_nodes(void)444 test_split_odd_nodes(void)
445 {
446     struct ftnode sn;
447 
448     int fd = open(fname, O_RDWR|O_CREAT|O_BINARY, S_IRWXU|S_IRWXG|S_IRWXO);
449     assert(fd >= 0);
450 
451     int r;
452 
453     setup_ftnode_header(&sn);
454     // This will give us 9 children.
455     const int nelts = 2 * (nodesize + 128) / eltsize;
456     setup_ftnode_partitions(&sn, nelts * eltsize / bnsize, dummy_msn_3884, bnsize);
457     for (int bn = 0; bn < sn.n_children; ++bn) {
458         long k;
459         for (int i = 0; i < eltsperbn; ++i) {
460             k = bn * eltsperbn + i;
461             insert_dummy_value(&sn, bn, k, i);
462         }
463         if (bn < sn.n_children - 1) {
464             DBT pivotkey;
465             sn.pivotkeys.insert_at(toku_fill_dbt(&pivotkey, &k, sizeof(k)), bn);
466         }
467     }
468 
469     unlink(fname);
470     CACHETABLE ct;
471     FT_HANDLE ft;
472     toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
473     r = toku_open_ft_handle(fname, 1, &ft, nodesize, bnsize, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); assert(r==0);
474 
475     FTNODE nodea, nodeb;
476     DBT splitk;
477     // if we haven't done it right, we should hit the assert in the top of move_leafentries
478     ftleaf_split(ft->ft, &sn, &nodea, &nodeb, &splitk, true, SPLIT_EVENLY, 0, NULL);
479 
480     verify_basement_node_msns(nodea, dummy_msn_3884);
481     verify_basement_node_msns(nodeb, dummy_msn_3884);
482 
483     toku_unpin_ftnode(ft->ft, nodeb);
484     r = toku_close_ft_handle_nolsn(ft, NULL); assert(r == 0);
485     toku_cachetable_close(&ct);
486 
487     toku_destroy_dbt(&splitk);
488     toku_destroy_ftnode_internals(&sn);
489 }
490 
491 int
test_main(int argc,const char * argv[])492 test_main (int argc __attribute__((__unused__)), const char *argv[] __attribute__((__unused__))) {
493 
494     test_split_on_boundary();
495     test_split_with_everything_on_the_left();
496     test_split_on_boundary_of_last_node();
497     test_split_at_begin();
498     test_split_at_end();
499     test_split_odd_nodes();
500 
501     return 0;
502 }
503