1 /*-
2 * Copyright (c) 2014-2018 MongoDB, Inc.
3 * Copyright (c) 2008-2014 WiredTiger, Inc.
4 * All rights reserved.
5 *
6 * See the file LICENSE for redistribution information.
7 */
8
9 #include "wt_internal.h"
10
11 /*
12 * __wt_block_salvage_start --
13 * Start a file salvage.
14 */
15 int
__wt_block_salvage_start(WT_SESSION_IMPL * session,WT_BLOCK * block)16 __wt_block_salvage_start(WT_SESSION_IMPL *session, WT_BLOCK *block)
17 {
18 wt_off_t len;
19 uint32_t allocsize;
20
21 allocsize = block->allocsize;
22
23 /* Reset the description information in the first block. */
24 WT_RET(__wt_desc_write(session, block->fh, allocsize));
25
26 /*
27 * Salvage creates a new checkpoint when it's finished, set up for
28 * rolling an empty file forward.
29 */
30 WT_RET(__wt_block_ckpt_init(session, &block->live, "live"));
31
32 /*
33 * Truncate the file to an allocation-size multiple of blocks (bytes
34 * trailing the last block must be garbage, by definition).
35 */
36 len = allocsize;
37 if (block->size > allocsize)
38 len = (block->size / allocsize) * allocsize;
39 WT_RET(__wt_block_truncate(session, block, len));
40
41 /*
42 * The file's first allocation-sized block is description information,
43 * skip it when reading through the file.
44 */
45 block->slvg_off = allocsize;
46
47 /*
48 * The only checkpoint extent we care about is the allocation list.
49 * Start with the entire file on the allocation list, we'll "free"
50 * any blocks we don't want as we process the file.
51 */
52 WT_RET(__wt_block_insert_ext(
53 session, block, &block->live.alloc, allocsize, len - allocsize));
54
55 /* Salvage performs a checkpoint but doesn't start or resolve it. */
56 WT_ASSERT(session, block->ckpt_state == WT_CKPT_NONE);
57 block->ckpt_state = WT_CKPT_SALVAGE;
58
59 return (0);
60 }
61
62 /*
63 * __wt_block_salvage_end --
64 * End a file salvage.
65 */
66 int
__wt_block_salvage_end(WT_SESSION_IMPL * session,WT_BLOCK * block)67 __wt_block_salvage_end(WT_SESSION_IMPL *session, WT_BLOCK *block)
68 {
69 /* Salvage performs a checkpoint but doesn't start or resolve it. */
70 WT_ASSERT(session, block->ckpt_state == WT_CKPT_SALVAGE);
71 block->ckpt_state = WT_CKPT_NONE;
72
73 /* Discard the checkpoint. */
74 return (__wt_block_checkpoint_unload(session, block, false));
75 }
76
77 /*
78 * __wt_block_offset_invalid --
79 * Return if the block offset is insane.
80 */
81 bool
__wt_block_offset_invalid(WT_BLOCK * block,wt_off_t offset,uint32_t size)82 __wt_block_offset_invalid(WT_BLOCK *block, wt_off_t offset, uint32_t size)
83 {
84 if (size == 0) /* < minimum page size */
85 return (true);
86 if (size % block->allocsize != 0) /* not allocation-size units */
87 return (true);
88 if (size > WT_BTREE_PAGE_SIZE_MAX) /* > maximum page size */
89 return (true);
90 /* past end-of-file */
91 if (offset + (wt_off_t)size > block->size)
92 return (true);
93 return (false);
94 }
95
96 /*
97 * __wt_block_salvage_next --
98 * Return the address for the next potential block from the file.
99 */
100 int
__wt_block_salvage_next(WT_SESSION_IMPL * session,WT_BLOCK * block,uint8_t * addr,size_t * addr_sizep,bool * eofp)101 __wt_block_salvage_next(WT_SESSION_IMPL *session,
102 WT_BLOCK *block, uint8_t *addr, size_t *addr_sizep, bool *eofp)
103 {
104 WT_BLOCK_HEADER *blk;
105 WT_DECL_ITEM(tmp);
106 WT_DECL_RET;
107 WT_FH *fh;
108 wt_off_t max, offset;
109 uint32_t allocsize, checksum, size;
110 uint8_t *endp;
111
112 *eofp = 0;
113
114 fh = block->fh;
115 allocsize = block->allocsize;
116 WT_ERR(__wt_scr_alloc(session, allocsize, &tmp));
117
118 /* Read through the file, looking for pages. */
119 for (max = block->size;;) {
120 offset = block->slvg_off;
121 if (offset >= max) { /* Check eof. */
122 *eofp = 1;
123 goto done;
124 }
125
126 /*
127 * Read the start of a possible page (an allocation-size block),
128 * and get a page length from it. Move to the next allocation
129 * sized boundary, we'll never consider this one again.
130 */
131 WT_ERR(__wt_read(
132 session, fh, offset, (size_t)allocsize, tmp->mem));
133 blk = WT_BLOCK_HEADER_REF(tmp->mem);
134 __wt_block_header_byteswap(blk);
135 size = blk->disk_size;
136 checksum = blk->checksum;
137
138 /*
139 * Check the block size: if it's not insane, read the block.
140 * Reading the block validates any checksum; if reading the
141 * block succeeds, return its address as a possible page,
142 * otherwise, move past it.
143 */
144 if (!__wt_block_offset_invalid(block, offset, size) &&
145 __wt_block_read_off(
146 session, block, tmp, offset, size, checksum) == 0)
147 break;
148
149 /* Free the allocation-size block. */
150 __wt_verbose(session, WT_VERB_SALVAGE,
151 "skipping %" PRIu32 "B at file offset %" PRIuMAX,
152 allocsize, (uintmax_t)offset);
153 WT_ERR(__wt_block_off_free(
154 session, block, offset, (wt_off_t)allocsize));
155 block->slvg_off += allocsize;
156 }
157
158 /* Re-create the address cookie that should reference this block. */
159 endp = addr;
160 WT_ERR(__wt_block_addr_to_buffer(block, &endp, offset, size, checksum));
161 *addr_sizep = WT_PTRDIFF(endp, addr);
162
163 done:
164 err: __wt_scr_free(session, &tmp);
165 return (ret);
166 }
167
168 /*
169 * __wt_block_salvage_valid --
170 * Let salvage know if a block is valid.
171 */
172 int
__wt_block_salvage_valid(WT_SESSION_IMPL * session,WT_BLOCK * block,uint8_t * addr,size_t addr_size,bool valid)173 __wt_block_salvage_valid(WT_SESSION_IMPL *session,
174 WT_BLOCK *block, uint8_t *addr, size_t addr_size, bool valid)
175 {
176 wt_off_t offset;
177 uint32_t size, checksum;
178
179 WT_UNUSED(addr_size);
180
181 /*
182 * Crack the cookie.
183 * If the upper layer took the block, move past it; if the upper layer
184 * rejected the block, move past an allocation size chunk and free it.
185 */
186 WT_RET(
187 __wt_block_buffer_to_addr(block, addr, &offset, &size, &checksum));
188 if (valid)
189 block->slvg_off = offset + size;
190 else {
191 WT_RET(__wt_block_off_free(
192 session, block, offset, (wt_off_t)block->allocsize));
193 block->slvg_off = offset + block->allocsize;
194 }
195
196 return (0);
197 }
198