1 /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
3 #ident "$Id$"
4 /*======
5 This file is part of PerconaFT.
6 
7 
8 Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
9 
10     PerconaFT is free software: you can redistribute it and/or modify
11     it under the terms of the GNU General Public License, version 2,
12     as published by the Free Software Foundation.
13 
14     PerconaFT is distributed in the hope that it will be useful,
15     but WITHOUT ANY WARRANTY; without even the implied warranty of
16     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17     GNU General Public License for more details.
18 
19     You should have received a copy of the GNU General Public License
20     along with PerconaFT.  If not, see <http://www.gnu.org/licenses/>.
21 
22 ----------------------------------------
23 
24     PerconaFT is free software: you can redistribute it and/or modify
25     it under the terms of the GNU Affero General Public License, version 3,
26     as published by the Free Software Foundation.
27 
28     PerconaFT is distributed in the hope that it will be useful,
29     but WITHOUT ANY WARRANTY; without even the implied warranty of
30     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
31     GNU Affero General Public License for more details.
32 
33     You should have received a copy of the GNU Affero General Public License
34     along with PerconaFT.  If not, see <http://www.gnu.org/licenses/>.
35 ======= */
36 
37 #ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
38 
39 ////////////////////////////////////////////////////////////////////
40 // ftverify - Command line tool that checks the validity of a given
41 // fractal tree file, one block at a time.
42 ////////////////////////////////////////////////////////////////////
43 
44 #include "portability/toku_assert.h"
45 #include "portability/toku_list.h"
46 #include "portability/toku_portability.h"
47 
48 #include "ft/serialize/block_allocator.h"
49 #include "ft/ft-internal.h"
50 #include "ft/serialize/ft-serialize.h"
51 #include "ft/serialize/ft_layout_version.h"
52 #include "ft/serialize/ft_node-serialize.h"
53 #include "ft/node.h"
54 #include "ft/serialize/rbuf.h"
55 #include "ft/serialize/sub_block.h"
56 #include "util/threadpool.h"
57 
58 #include <fcntl.h>
59 #include <math.h>
60 #include <stdio.h>
61 #include <stdlib.h>
62 #include <sys/stat.h>
63 #include <sys/types.h>
64 #include <sysexits.h>
65 #include <unistd.h>
66 
67 static int num_cores = 0; // cache the number of cores for the parallelization
68 static struct toku_thread_pool *ft_pool = NULL;
69 static FILE *outf;
70 static double pct = 0.5;
71 
72 // Struct for reporting sub block stats.
73 struct verify_block_extra {
74     BLOCKNUM b;
75     int n_sub_blocks;
76     uint32_t header_length;
77     uint32_t calc_xsum;
78     uint32_t stored_xsum;
79     bool header_valid;
80     bool sub_blocks_valid;
81     struct sub_block_info *sub_block_results;
82 };
83 
84 // Initialization function for the sub block stats.
85 static void
init_verify_block_extra(BLOCKNUM b,struct verify_block_extra * e)86 init_verify_block_extra(BLOCKNUM b, struct verify_block_extra *e)
87 {
88     static const struct verify_block_extra default_vbe =
89     {
90         .b = { 0 },
91         .n_sub_blocks = 0,
92         .header_length = 0,
93         .calc_xsum = 0,
94         .stored_xsum = 0,
95         .header_valid = true,
96         .sub_blocks_valid = true,
97         .sub_block_results = NULL
98     };
99     *e = default_vbe;
100     e->b = b;
101 }
102 
103 // Reports percentage of completed blocks.
104 static void
report(int64_t blocks_done,int64_t blocks_failed,int64_t total_blocks)105 report(int64_t blocks_done, int64_t blocks_failed, int64_t total_blocks)
106 {
107     int64_t blocks_per_report = llrint(pct * total_blocks / 100.0);
108     if (blocks_per_report < 1) {
109         blocks_per_report = 1;
110     }
111     if (blocks_done % blocks_per_report == 0) {
112         double pct_actually_done = (100.0 * blocks_done) / total_blocks;
113         printf("% 3.3lf%% | %" PRId64 " blocks checked, %" PRId64 " bad block(s) detected\n",
114                pct_actually_done, blocks_done, blocks_failed);
115         fflush(stdout);
116     }
117 }
118 
119 // Helper function to deserialize one of the two headers for the ft
120 // we are checking.
121 static void
deserialize_headers(int fd,struct ft ** h1p,struct ft ** h2p)122 deserialize_headers(int fd, struct ft **h1p, struct ft **h2p)
123 {
124     struct rbuf rb_0;
125     struct rbuf rb_1;
126     uint64_t checkpoint_count_0;
127     uint64_t checkpoint_count_1;
128     LSN checkpoint_lsn_0;
129     LSN checkpoint_lsn_1;
130     uint32_t version_0, version_1;
131     bool h0_acceptable = false;
132     bool h1_acceptable = false;
133     int r0, r1;
134     int r;
135 
136     {
137         toku_off_t header_0_off = 0;
138         r0 = deserialize_ft_from_fd_into_rbuf(
139             fd,
140             header_0_off,
141             &rb_0,
142             &checkpoint_count_0,
143             &checkpoint_lsn_0,
144             &version_0
145             );
146         if ((r0==0) && (checkpoint_lsn_0.lsn <= MAX_LSN.lsn)) {
147             h0_acceptable = true;
148         }
149     }
150     {
151         toku_off_t header_1_off = BlockAllocator::BLOCK_ALLOCATOR_HEADER_RESERVE;
152         r1 = deserialize_ft_from_fd_into_rbuf(
153             fd,
154             header_1_off,
155             &rb_1,
156             &checkpoint_count_1,
157             &checkpoint_lsn_1,
158             &version_1
159             );
160         if ((r1==0) && (checkpoint_lsn_1.lsn <= MAX_LSN.lsn)) {
161             h1_acceptable = true;
162         }
163     }
164 
165     // If either header is too new, the dictionary is unreadable
166     if (r0 == TOKUDB_DICTIONARY_TOO_NEW || r1 == TOKUDB_DICTIONARY_TOO_NEW) {
167         fprintf(stderr, "This dictionary was created with a version of PerconaFT that is too new.  Aborting.\n");
168         abort();
169     }
170     if (h0_acceptable) {
171         printf("Found dictionary header 1 with LSN %" PRIu64 "\n", checkpoint_lsn_0.lsn);
172         r = deserialize_ft_versioned(fd, &rb_0, h1p, version_0);
173 
174 	if (r != 0) {
175 	    printf("---Header Error----\n");
176 	}
177 
178     } else {
179         *h1p = NULL;
180     }
181     if (h1_acceptable) {
182         printf("Found dictionary header 2 with LSN %" PRIu64 "\n", checkpoint_lsn_1.lsn);
183         r = deserialize_ft_versioned(fd, &rb_1, h2p, version_1);
184 	if (r != 0) {
185 	    printf("---Header Error----\n");
186 	}
187     } else {
188         *h2p = NULL;
189     }
190 
191     if (rb_0.buf) toku_free(rb_0.buf);
192     if (rb_1.buf) toku_free(rb_1.buf);
193 }
194 
195 // Helper struct for tracking block checking progress.
196 struct check_block_table_extra {
197     int fd;
198     int64_t blocks_done, blocks_failed, total_blocks;
199     struct ft *h;
200 };
201 
202 // Check non-upgraded (legacy) node.
203 // NOTE: These nodes have less checksumming than more
204 // recent nodes.  This effectively means that we are
205 // skipping over these nodes.
206 static int
check_old_node(FTNODE node,struct rbuf * rb,int version)207 check_old_node(FTNODE node, struct rbuf *rb, int version)
208 {
209     int r = 0;
210     read_legacy_node_info(node, rb, version);
211     // For version 14 nodes, advance the buffer to the end
212     // and verify the checksum.
213     if (version == FT_FIRST_LAYOUT_VERSION_WITH_END_TO_END_CHECKSUM) {
214         // Advance the buffer to the end.
215         rb->ndone = rb->size - 4;
216         r = check_legacy_end_checksum(rb);
217     }
218 
219     return r;
220 }
221 
222 // Read, decompress, and check the given block.
223 static int
check_block(BLOCKNUM blocknum,int64_t UU (blocksize),int64_t UU (address),void * extra)224 check_block(BLOCKNUM blocknum, int64_t UU(blocksize), int64_t UU(address), void *extra)
225 {
226     int r = 0;
227     int failure = 0;
228     struct check_block_table_extra *CAST_FROM_VOIDP(cbte, extra);
229     int fd = cbte->fd;
230     FT ft = cbte->h;
231 
232     struct verify_block_extra be;
233     init_verify_block_extra(blocknum, &be);
234 
235     // Let's read the block off of disk and fill a buffer with that
236     // block.
237     struct rbuf rb = RBUF_INITIALIZER;
238     read_block_from_fd_into_rbuf(fd, blocknum, ft, &rb);
239 
240     // Allocate the node.
241     FTNODE XMALLOC(node);
242 
243     initialize_ftnode(node, blocknum);
244 
245     r = read_and_check_magic(&rb);
246     if (r == DB_BADFORMAT) {
247         printf(" Magic failed.\n");
248         failure++;
249     }
250 
251     r = read_and_check_version(node, &rb);
252     if (r != 0) {
253        	printf(" Version check failed.\n");
254         failure++;
255     }
256 
257     int version = node->layout_version_read_from_disk;
258 
259       ////////////////////////////
260      // UPGRADE FORK GOES HERE //
261     ////////////////////////////
262 
263     // Check nodes before major layout changes in version 15.
264     // All newer versions should follow the same layout, for now.
265     // This predicate would need to be changed if the layout
266     // of the nodes on disk does indeed change in the future.
267     if (version < FT_FIRST_LAYOUT_VERSION_WITH_BASEMENT_NODES)
268     {
269         struct rbuf nrb;
270         // Use old decompression method for legacy nodes.
271         r = decompress_from_raw_block_into_rbuf(rb.buf, rb.size, &nrb, blocknum);
272         if (r != 0) {
273             failure++;
274             goto cleanup;
275         }
276 
277         // Check the end-to-end checksum.
278         r = check_old_node(node, &nrb, version);
279         if (r != 0) {
280             failure++;
281         }
282         goto cleanup;
283     }
284 
285     read_node_info(node, &rb, version);
286 
287     FTNODE_DISK_DATA ndd;
288     allocate_and_read_partition_offsets(node, &rb, &ndd);
289 
290     r = check_node_info_checksum(&rb);
291     if (r == TOKUDB_BAD_CHECKSUM) {
292        	printf(" Node info checksum failed.\n");
293         failure++;
294     }
295 
296     // Get the partition info sub block.
297     struct sub_block sb;
298     sub_block_init(&sb);
299     r = read_compressed_sub_block(&rb, &sb);
300     if (r != 0) {
301        	printf(" Partition info checksum failed.\n");
302         failure++;
303     }
304 
305     just_decompress_sub_block(&sb);
306 
307     // If we want to inspect the data inside the partitions, we need
308     // to call setup_ftnode_partitions(node, bfe, true)
309 
310     // <CER> TODO: Create function for this.
311     // Using the node info, decompress all the keys and pivots to
312     // detect any corruptions.
313     for (int i = 0; i < node->n_children; ++i) {
314         uint32_t curr_offset = BP_START(ndd,i);
315         uint32_t curr_size   = BP_SIZE(ndd,i);
316         struct rbuf curr_rbuf = {.buf = NULL, .size = 0, .ndone = 0};
317         rbuf_init(&curr_rbuf, rb.buf + curr_offset, curr_size);
318         struct sub_block curr_sb;
319         sub_block_init(&curr_sb);
320 
321         r = read_compressed_sub_block(&rb, &sb);
322         if (r != 0) {
323             printf(" Compressed child partition %d checksum failed.\n", i);
324             failure++;
325         }
326         just_decompress_sub_block(&sb);
327 
328         r = verify_ftnode_sub_block(&sb, nullptr, blocknum);
329         if (r != 0) {
330             printf(" Uncompressed child partition %d checksum failed.\n", i);
331             failure++;
332         }
333 
334 	// <CER> If needed, we can print row and/or pivot info at this
335 	// point.
336     }
337 
338 cleanup:
339     // Cleanup and error incrementing.
340     if (failure) {
341        	cbte->blocks_failed++;
342     }
343 
344     cbte->blocks_done++;
345 
346     if (node) {
347         toku_free(node);
348     }
349 
350     // Print the status of this block to the console.
351     report(cbte->blocks_done, cbte->blocks_failed, cbte->total_blocks);
352     // We need to ALWAYS return 0 if we want to continue iterating
353     // through the nodes in the file.
354     r = 0;
355     return r;
356 }
357 
358 // This calls toku_blocktable_iterate on the given block table.
359 // Passes our check_block() function to be called as we iterate over
360 // the block table.  This will print any interesting failures and
361 // update us on our progress.
check_block_table(int fd,block_table * bt,struct ft * h)362 static void check_block_table(int fd, block_table *bt, struct ft *h) {
363     int64_t num_blocks = bt->get_blocks_in_use_unlocked();
364     printf("Starting verification of checkpoint containing");
365     printf(" %" PRId64 " blocks.\n", num_blocks);
366     fflush(stdout);
367 
368     struct check_block_table_extra extra = { .fd = fd,
369 					     .blocks_done = 0,
370 					     .blocks_failed = 0,
371 					     .total_blocks = num_blocks,
372 					     .h = h };
373     int r = bt->iterate(block_table::TRANSLATION_CURRENT,
374                     check_block,
375                     &extra,
376                     true,
377                     true);
378     if (r != 0) {
379         // We can print more information here if necessary.
380     }
381 
382     assert(extra.blocks_done == extra.total_blocks);
383     printf("Finished verification. ");
384     printf(" %" PRId64 " blocks checked,", extra.blocks_done);
385     printf(" %" PRId64 " bad block(s) detected\n", extra.blocks_failed);
386     fflush(stdout);
387 }
388 
389 int
main(int argc,char const * const argv[])390 main(int argc, char const * const argv[])
391 {
392     // open the file
393     int r = 0;
394     int dictfd;
395     const char *dictfname, *outfname;
396     if (argc < 3 || argc > 4) {
397         fprintf(stderr, "%s: Invalid arguments.\n", argv[0]);
398         fprintf(stderr, "Usage: %s <dictionary> <logfile> [report%%]\n", argv[0]);
399         r = EX_USAGE;
400         goto exit;
401     }
402 
403     assert(argc == 3 || argc == 4);
404     dictfname = argv[1];
405     outfname = argv[2];
406     if (argc == 4) {
407         set_errno(0);
408         pct = strtod(argv[3], NULL);
409         assert_zero(get_maybe_error_errno());
410         assert(pct > 0.0 && pct <= 100.0);
411     }
412 
413     // Open the file as read-only.
414     dictfd = open(dictfname, O_RDONLY | O_BINARY, S_IRWXU | S_IRWXG | S_IRWXO);
415     if (dictfd < 0) {
416         perror(dictfname);
417         fflush(stderr);
418         abort();
419     }
420     outf = fopen(outfname, "w");
421     if (!outf) {
422         perror(outfname);
423         fflush(stderr);
424         abort();
425     }
426 
427     // body of toku_ft_serialize_init();
428     num_cores = toku_os_get_number_active_processors();
429     r = toku_thread_pool_create(&ft_pool, num_cores); lazy_assert_zero(r);
430     assert_zero(r);
431 
432     // deserialize the header(s)
433     struct ft *h1, *h2;
434     deserialize_headers(dictfd, &h1, &h2);
435 
436     // walk over the block table and check blocks
437     if (h1) {
438         printf("Checking dictionary from header 1.\n");
439         check_block_table(dictfd, &h1->blocktable, h1);
440     }
441     if (h2) {
442         printf("Checking dictionary from header 2.\n");
443         check_block_table(dictfd, &h2->blocktable, h2);
444     }
445     if (h1 == NULL && h2 == NULL) {
446         printf("Both headers have a corruption and could not be used.\n");
447     }
448 
449     toku_thread_pool_destroy(&ft_pool);
450 exit:
451     return r;
452 }
453