1 /*-
2  * Copyright (c) 2014-2018 MongoDB, Inc.
3  * Copyright (c) 2008-2014 WiredTiger, Inc.
4  *	All rights reserved.
5  *
6  * See the file LICENSE for redistribution information.
7  */
8 
9 #include "wt_internal.h"
10 
11 /*
12  * __wt_block_salvage_start --
13  *	Start a file salvage.
14  */
15 int
__wt_block_salvage_start(WT_SESSION_IMPL * session,WT_BLOCK * block)16 __wt_block_salvage_start(WT_SESSION_IMPL *session, WT_BLOCK *block)
17 {
18 	wt_off_t len;
19 	uint32_t allocsize;
20 
21 	allocsize = block->allocsize;
22 
23 	/* Reset the description information in the first block. */
24 	WT_RET(__wt_desc_write(session, block->fh, allocsize));
25 
26 	/*
27 	 * Salvage creates a new checkpoint when it's finished, set up for
28 	 * rolling an empty file forward.
29 	 */
30 	WT_RET(__wt_block_ckpt_init(session, &block->live, "live"));
31 
32 	/*
33 	 * Truncate the file to an allocation-size multiple of blocks (bytes
34 	 * trailing the last block must be garbage, by definition).
35 	 */
36 	len = allocsize;
37 	if (block->size > allocsize)
38 		len = (block->size / allocsize) * allocsize;
39 	WT_RET(__wt_block_truncate(session, block, len));
40 
41 	/*
42 	 * The file's first allocation-sized block is description information,
43 	 * skip it when reading through the file.
44 	 */
45 	block->slvg_off = allocsize;
46 
47 	/*
48 	 * The only checkpoint extent we care about is the allocation list.
49 	 * Start with the entire file on the allocation list, we'll "free"
50 	 * any blocks we don't want as we process the file.
51 	 */
52 	WT_RET(__wt_block_insert_ext(
53 	    session, block, &block->live.alloc, allocsize, len - allocsize));
54 
55 	/* Salvage performs a checkpoint but doesn't start or resolve it. */
56 	WT_ASSERT(session, block->ckpt_state == WT_CKPT_NONE);
57 	block->ckpt_state = WT_CKPT_SALVAGE;
58 
59 	return (0);
60 }
61 
62 /*
63  * __wt_block_salvage_end --
64  *	End a file salvage.
65  */
66 int
__wt_block_salvage_end(WT_SESSION_IMPL * session,WT_BLOCK * block)67 __wt_block_salvage_end(WT_SESSION_IMPL *session, WT_BLOCK *block)
68 {
69 	/* Salvage performs a checkpoint but doesn't start or resolve it. */
70 	WT_ASSERT(session, block->ckpt_state == WT_CKPT_SALVAGE);
71 	block->ckpt_state = WT_CKPT_NONE;
72 
73 	/* Discard the checkpoint. */
74 	return (__wt_block_checkpoint_unload(session, block, false));
75 }
76 
77 /*
78  * __wt_block_offset_invalid --
79  *	Return if the block offset is insane.
80  */
81 bool
__wt_block_offset_invalid(WT_BLOCK * block,wt_off_t offset,uint32_t size)82 __wt_block_offset_invalid(WT_BLOCK *block, wt_off_t offset, uint32_t size)
83 {
84 	if (size == 0)				/* < minimum page size */
85 		return (true);
86 	if (size % block->allocsize != 0)	/* not allocation-size units */
87 		return (true);
88 	if (size > WT_BTREE_PAGE_SIZE_MAX)	/* > maximum page size */
89 		return (true);
90 						/* past end-of-file */
91 	if (offset + (wt_off_t)size > block->size)
92 		return (true);
93 	return (false);
94 }
95 
96 /*
97  * __wt_block_salvage_next --
98  *	Return the address for the next potential block from the file.
99  */
100 int
__wt_block_salvage_next(WT_SESSION_IMPL * session,WT_BLOCK * block,uint8_t * addr,size_t * addr_sizep,bool * eofp)101 __wt_block_salvage_next(WT_SESSION_IMPL *session,
102     WT_BLOCK *block, uint8_t *addr, size_t *addr_sizep, bool *eofp)
103 {
104 	WT_BLOCK_HEADER *blk;
105 	WT_DECL_ITEM(tmp);
106 	WT_DECL_RET;
107 	WT_FH *fh;
108 	wt_off_t max, offset;
109 	uint32_t allocsize, checksum, size;
110 	uint8_t *endp;
111 
112 	*eofp = 0;
113 
114 	fh = block->fh;
115 	allocsize = block->allocsize;
116 	WT_ERR(__wt_scr_alloc(session, allocsize, &tmp));
117 
118 	/* Read through the file, looking for pages. */
119 	for (max = block->size;;) {
120 		offset = block->slvg_off;
121 		if (offset >= max) {			/* Check eof. */
122 			*eofp = 1;
123 			goto done;
124 		}
125 
126 		/*
127 		 * Read the start of a possible page (an allocation-size block),
128 		 * and get a page length from it.  Move to the next allocation
129 		 * sized boundary, we'll never consider this one again.
130 		 */
131 		WT_ERR(__wt_read(
132 		    session, fh, offset, (size_t)allocsize, tmp->mem));
133 		blk = WT_BLOCK_HEADER_REF(tmp->mem);
134 		__wt_block_header_byteswap(blk);
135 		size = blk->disk_size;
136 		checksum = blk->checksum;
137 
138 		/*
139 		 * Check the block size: if it's not insane, read the block.
140 		 * Reading the block validates any checksum; if reading the
141 		 * block succeeds, return its address as a possible page,
142 		 * otherwise, move past it.
143 		 */
144 		if (!__wt_block_offset_invalid(block, offset, size) &&
145 		    __wt_block_read_off(
146 		    session, block, tmp, offset, size, checksum) == 0)
147 			break;
148 
149 		/* Free the allocation-size block. */
150 		__wt_verbose(session, WT_VERB_SALVAGE,
151 		    "skipping %" PRIu32 "B at file offset %" PRIuMAX,
152 		    allocsize, (uintmax_t)offset);
153 		WT_ERR(__wt_block_off_free(
154 		    session, block, offset, (wt_off_t)allocsize));
155 		block->slvg_off += allocsize;
156 	}
157 
158 	/* Re-create the address cookie that should reference this block. */
159 	endp = addr;
160 	WT_ERR(__wt_block_addr_to_buffer(block, &endp, offset, size, checksum));
161 	*addr_sizep = WT_PTRDIFF(endp, addr);
162 
163 done:
164 err:	__wt_scr_free(session, &tmp);
165 	return (ret);
166 }
167 
168 /*
169  * __wt_block_salvage_valid --
170  *	Let salvage know if a block is valid.
171  */
172 int
__wt_block_salvage_valid(WT_SESSION_IMPL * session,WT_BLOCK * block,uint8_t * addr,size_t addr_size,bool valid)173 __wt_block_salvage_valid(WT_SESSION_IMPL *session,
174     WT_BLOCK *block, uint8_t *addr, size_t addr_size, bool valid)
175 {
176 	wt_off_t offset;
177 	uint32_t size, checksum;
178 
179 	WT_UNUSED(addr_size);
180 
181 	/*
182 	 * Crack the cookie.
183 	 * If the upper layer took the block, move past it; if the upper layer
184 	 * rejected the block, move past an allocation size chunk and free it.
185 	 */
186 	WT_RET(
187 	    __wt_block_buffer_to_addr(block, addr, &offset, &size, &checksum));
188 	if (valid)
189 		block->slvg_off = offset + size;
190 	else {
191 		WT_RET(__wt_block_off_free(
192 		    session, block, offset, (wt_off_t)block->allocsize));
193 		block->slvg_off = offset + block->allocsize;
194 	}
195 
196 	return (0);
197 }
198