1 /*-
2  * Copyright (c) 1996, 2020 Oracle and/or its affiliates.  All rights reserved.
3  *
4  * See the file LICENSE for license information.
5  *
6  * $Id$
7  */
8 
9 #include "db_config.h"
10 
11 #include "db_int.h"
12 #include "dbinc/db_page.h"
13 #include "dbinc/log.h"
14 #include "dbinc/mp.h"
15 #include "dbinc/lock.h"
16 #include "dbinc/fop.h"
17 #include "dbinc/btree.h"
18 #include "dbinc/hash.h"
19 
20 static int __db_pg_free_recover_int
21     __P((ENV *, DB_TXNHEAD *, __db_pg_freedata_args *,
22 	DB *, DB_LSN *, DB_MPOOLFILE *, db_recops, int));
23 
24 /*
25  * PUBLIC: int __db_addrem_recover
26  * PUBLIC:    __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
27  *
28  * This log message is generated whenever we add or remove a duplicate
29  * to/from a duplicate page.  On recover, we just do the opposite.
30  */
31 int
__db_addrem_recover(env,dbtp,lsnp,op,info)32 __db_addrem_recover(env, dbtp, lsnp, op, info)
33 	ENV *env;
34 	DBT *dbtp;
35 	DB_LSN *lsnp;
36 	db_recops op;
37 	void *info;
38 {
39 	__db_addrem_args *argp;
40 	DB *file_dbp;
41 	DBC *dbc;
42 	DB_MPOOLFILE *mpf;
43 	DB_THREAD_INFO *ip;
44 	DB_TXNHEAD *txnhead;
45 	PAGE *pagep;
46 	int cmp_n, cmp_p, modified, ret;
47 	u_int32_t opcode;
48 
49 	txnhead = info;
50 	ip = txnhead->thread_info;
51 	pagep = NULL;
52 	REC_PRINT(__db_addrem_print);
53 	REC_INTRO(__db_addrem_read, txnhead, 1);
54 
55 	REC_FGET(mpf, txnhead, argp->pgno, &pagep, done);
56 	modified = 0;
57 
58 	opcode = OP_MODE_GET(argp->opcode);
59 	cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
60 	cmp_p = LOG_COMPARE(&LSN(pagep), &argp->pagelsn);
61 	CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->pagelsn);
62 	CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp);
63 	if ((cmp_p == 0 && DB_REDO(op) && opcode == DB_ADD_DUP) ||
64 	    (cmp_n == 0 && DB_UNDO(op) && opcode == DB_REM_DUP)) {
65 		/* Need to redo an add, or undo a delete. */
66 		REC_DIRTY(mpf, txnhead, dbc->priority, &pagep);
67 		if ((ret = __db_pitem(dbc, pagep, argp->indx, argp->nbytes,
68 		    argp->hdr.size == 0 ? NULL : &argp->hdr,
69 		    argp->dbt.size == 0 ? NULL : &argp->dbt)) != 0)
70 			goto out;
71 		modified = 1;
72 
73 	} else if ((cmp_n == 0 && DB_UNDO(op) && opcode == DB_ADD_DUP) ||
74 	    (cmp_p == 0 && DB_REDO(op) && opcode == DB_REM_DUP)) {
75 		/* Need to undo an add, or redo a delete. */
76 		REC_DIRTY(mpf, txnhead, dbc->priority, &pagep);
77 		if ((ret = __db_ditem(dbc,
78 		    pagep, argp->indx, argp->nbytes)) != 0)
79 			goto out;
80 		modified = 1;
81 	}
82 
83 	if (modified) {
84 		if (DB_REDO(op))
85 			LSN(pagep) = *lsnp;
86 		else
87 			LSN(pagep) = argp->pagelsn;
88 	}
89 
90 	if ((ret = __memp_fput(mpf, ip, pagep, dbc->priority)) != 0)
91 		goto out;
92 	pagep = NULL;
93 
94 done:	*lsnp = argp->prev_lsn;
95 	ret = 0;
96 
97 out:	if (pagep != NULL)
98 		(void)__memp_fput(mpf, ip, pagep, dbc->priority);
99 	REC_CLOSE;
100 }
101 
102 /*
103  * PUBLIC: int __db_addrem_42_recover
104  * PUBLIC:    __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
105  *
106  * This log message is generated whenever we add or remove a duplicate
107  * to/from a duplicate page.  On recover, we just do the opposite.
108  */
109 int
__db_addrem_42_recover(env,dbtp,lsnp,op,info)110 __db_addrem_42_recover(env, dbtp, lsnp, op, info)
111 	ENV *env;
112 	DBT *dbtp;
113 	DB_LSN *lsnp;
114 	db_recops op;
115 	void *info;
116 {
117 	__db_addrem_42_args *argp;
118 	DB *file_dbp;
119 	DBC *dbc;
120 	DB_MPOOLFILE *mpf;
121 	DB_THREAD_INFO *ip;
122 	DB_TXNHEAD *txnhead;
123 	PAGE *pagep;
124 	int cmp_n, cmp_p, modified, ret;
125 
126 	txnhead = info;
127 	ip = txnhead->thread_info;
128 	pagep = NULL;
129 	REC_PRINT(__db_addrem_print);
130 	REC_INTRO(__db_addrem_42_read, txnhead, 1);
131 
132 	REC_FGET(mpf, txnhead, argp->pgno, &pagep, done);
133 	modified = 0;
134 
135 	cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
136 	cmp_p = LOG_COMPARE(&LSN(pagep), &argp->pagelsn);
137 	CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->pagelsn);
138 	CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp);
139 	if ((cmp_p == 0 && DB_REDO(op) && argp->opcode == DB_ADD_DUP) ||
140 	    (cmp_n == 0 && DB_UNDO(op) && argp->opcode == DB_REM_DUP)) {
141 		/* Need to redo an add, or undo a delete. */
142 		REC_DIRTY(mpf, txnhead, dbc->priority, &pagep);
143 		if ((ret = __db_pitem(dbc, pagep, argp->indx, argp->nbytes,
144 		    argp->hdr.size == 0 ? NULL : &argp->hdr,
145 		    argp->dbt.size == 0 ? NULL : &argp->dbt)) != 0)
146 			goto out;
147 		modified = 1;
148 
149 	} else if ((cmp_n == 0 && DB_UNDO(op) && argp->opcode == DB_ADD_DUP) ||
150 	    (cmp_p == 0 && DB_REDO(op) && argp->opcode == DB_REM_DUP)) {
151 		/* Need to undo an add, or redo a delete. */
152 		REC_DIRTY(mpf, txnhead, dbc->priority, &pagep);
153 		if ((ret = __db_ditem(dbc,
154 		    pagep, argp->indx, argp->nbytes)) != 0)
155 			goto out;
156 		modified = 1;
157 	}
158 
159 	if (modified) {
160 		if (DB_REDO(op))
161 			LSN(pagep) = *lsnp;
162 		else
163 			LSN(pagep) = argp->pagelsn;
164 	}
165 
166 	if ((ret = __memp_fput(mpf, ip, pagep, dbc->priority)) != 0)
167 		goto out;
168 	pagep = NULL;
169 
170 done:	*lsnp = argp->prev_lsn;
171 	ret = 0;
172 
173 out:	if (pagep != NULL)
174 		(void)__memp_fput(mpf, ip, pagep, dbc->priority);
175 	REC_CLOSE;
176 }
177 
178 /*
179  * PUBLIC: int __db_big_recover
180  * PUBLIC:     __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
181  */
182 int
__db_big_recover(env,dbtp,lsnp,op,info)183 __db_big_recover(env, dbtp, lsnp, op, info)
184 	ENV *env;
185 	DBT *dbtp;
186 	DB_LSN *lsnp;
187 	db_recops op;
188 	void *info;
189 {
190 	__db_big_args *argp;
191 	DB *file_dbp;
192 	DBC *dbc;
193 	DB_MPOOLFILE *mpf;
194 	DB_THREAD_INFO *ip;
195 	DB_TXNHEAD *txnhead;
196 	PAGE *pagep;
197 	int cmp_n, cmp_p, modified, ret;
198 	u_int32_t opcode;
199 
200 	txnhead = info;
201 	ip = txnhead->thread_info;
202 	pagep = NULL;
203 	REC_PRINT(__db_big_print);
204 	REC_INTRO(__db_big_read, txnhead, 0);
205 
206 	opcode = OP_MODE_GET(argp->opcode);
207 	REC_FGET(mpf, txnhead, argp->pgno, &pagep, ppage);
208 	modified = 0;
209 
210 	/*
211 	 * There are three pages we need to check.  The one on which we are
212 	 * adding data, the previous one whose next_pointer may have
213 	 * been updated, and the next one whose prev_pointer may have
214 	 * been updated.
215 	 */
216 	cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
217 	cmp_p = LOG_COMPARE(&LSN(pagep), &argp->pagelsn);
218 	CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->pagelsn);
219 	CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp);
220 	if ((cmp_p == 0 && DB_REDO(op) && opcode == DB_ADD_BIG) ||
221 	    (cmp_n == 0 && DB_UNDO(op) && opcode == DB_REM_BIG)) {
222 		/* We are either redo-ing an add, or undoing a delete. */
223 		REC_DIRTY(mpf, txnhead, file_dbp->priority, &pagep);
224 		P_INIT(pagep, file_dbp->pgsize, argp->pgno, argp->prev_pgno,
225 			argp->next_pgno, 0, P_OVERFLOW);
226 		OV_LEN(pagep) = argp->dbt.size;
227 		OV_REF(pagep) = 1;
228 		memcpy((u_int8_t *)pagep + P_OVERHEAD(file_dbp), argp->dbt.data,
229 		    argp->dbt.size);
230 		PREV_PGNO(pagep) = argp->prev_pgno;
231 		modified = 1;
232 	} else if ((cmp_n == 0 && DB_UNDO(op) && opcode == DB_ADD_BIG) ||
233 	    (cmp_p == 0 && DB_REDO(op) && opcode == DB_REM_BIG)) {
234 		/*
235 		 * We are either undo-ing an add or redo-ing a delete.
236 		 * The page is about to be reclaimed in either case, so
237 		 * there really isn't anything to do here.
238 		 */
239 		REC_DIRTY(mpf, txnhead, file_dbp->priority, &pagep);
240 		modified = 1;
241 	} else if (cmp_p == 0 && DB_REDO(op) && opcode == DB_APPEND_BIG) {
242 		/* We are redoing an append. */
243 		REC_DIRTY(mpf, txnhead, file_dbp->priority, &pagep);
244 		memcpy((u_int8_t *)pagep + P_OVERHEAD(file_dbp) +
245 		    OV_LEN(pagep), argp->dbt.data, argp->dbt.size);
246 		OV_LEN(pagep) += argp->dbt.size;
247 		modified = 1;
248 	} else if (cmp_n == 0 && DB_UNDO(op) && opcode == DB_APPEND_BIG) {
249 		/* We are undoing an append. */
250 		REC_DIRTY(mpf, txnhead, file_dbp->priority, &pagep);
251 		OV_LEN(pagep) -= argp->dbt.size;
252 		memset((u_int8_t *)pagep + P_OVERHEAD(file_dbp) +
253 		    OV_LEN(pagep), 0, argp->dbt.size);
254 		modified = 1;
255 	}
256 	if (modified)
257 		LSN(pagep) = DB_REDO(op) ? *lsnp : argp->pagelsn;
258 
259 	ret = __memp_fput(mpf, ip, pagep, file_dbp->priority);
260 	pagep = NULL;
261 	if (ret != 0)
262 		goto out;
263 
264 	/*
265 	 * We only delete a whole chain of overflow items, and appends only
266 	 * apply to a single page.  Adding a page is the only case that
267 	 * needs to update the chain.
268 	 */
269 ppage:	if (opcode != DB_ADD_BIG)
270 		goto done;
271 
272 	/* Now check the previous page. */
273 	if (argp->prev_pgno != PGNO_INVALID) {
274 		REC_FGET(mpf, txnhead, argp->prev_pgno, &pagep, npage);
275 		modified = 0;
276 
277 		cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
278 		cmp_p = LOG_COMPARE(&LSN(pagep), &argp->prevlsn);
279 		CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->prevlsn);
280 		CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp);
281 
282 		if (cmp_p == 0 && DB_REDO(op) && opcode == DB_ADD_BIG) {
283 			/* Redo add, undo delete. */
284 			REC_DIRTY(mpf, txnhead, file_dbp->priority, &pagep);
285 			NEXT_PGNO(pagep) = argp->pgno;
286 			modified = 1;
287 		} else if (cmp_n == 0 &&
288 		    DB_UNDO(op) && opcode == DB_ADD_BIG) {
289 			/* Redo delete, undo add. */
290 			REC_DIRTY(mpf, txnhead, file_dbp->priority, &pagep);
291 			NEXT_PGNO(pagep) = argp->next_pgno;
292 			modified = 1;
293 		}
294 		if (modified)
295 			LSN(pagep) = DB_REDO(op) ? *lsnp : argp->prevlsn;
296 		ret = __memp_fput(mpf, ip, pagep, file_dbp->priority);
297 		pagep = NULL;
298 		if (ret != 0)
299 			goto out;
300 	}
301 	pagep = NULL;
302 
303 	/* Now check the next page.  Can only be set on a delete. */
304 npage:	if (argp->next_pgno != PGNO_INVALID) {
305 		REC_FGET(mpf, txnhead, argp->next_pgno, &pagep, done);
306 		modified = 0;
307 
308 		cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
309 		cmp_p = LOG_COMPARE(&LSN(pagep), &argp->nextlsn);
310 		CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->nextlsn);
311 		CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp);
312 		if (cmp_p == 0 && DB_REDO(op)) {
313 			REC_DIRTY(mpf, txnhead, file_dbp->priority, &pagep);
314 			PREV_PGNO(pagep) = PGNO_INVALID;
315 			modified = 1;
316 		} else if (cmp_n == 0 && DB_UNDO(op)) {
317 			REC_DIRTY(mpf, txnhead, file_dbp->priority, &pagep);
318 			PREV_PGNO(pagep) = argp->pgno;
319 			modified = 1;
320 		}
321 		if (modified)
322 			LSN(pagep) = DB_REDO(op) ? *lsnp : argp->nextlsn;
323 		ret = __memp_fput(mpf, ip, pagep, file_dbp->priority);
324 		pagep = NULL;
325 		if (ret != 0)
326 			goto out;
327 	}
328 	pagep = NULL;
329 
330 done:	*lsnp = argp->prev_lsn;
331 	ret = 0;
332 
333 out:	if (pagep != NULL)
334 		(void)__memp_fput(mpf, ip, pagep, file_dbp->priority);
335 	REC_CLOSE;
336 }
337 
338 /*
339  * PUBLIC: int __db_big_42_recover
340  * PUBLIC:     __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
341  */
342 int
__db_big_42_recover(env,dbtp,lsnp,op,info)343 __db_big_42_recover(env, dbtp, lsnp, op, info)
344 	ENV *env;
345 	DBT *dbtp;
346 	DB_LSN *lsnp;
347 	db_recops op;
348 	void *info;
349 {
350 	__db_big_42_args *argp;
351 	DB *file_dbp;
352 	DBC *dbc;
353 	DB_MPOOLFILE *mpf;
354 	DB_THREAD_INFO *ip;
355 	DB_TXNHEAD *txnhead;
356 	PAGE *pagep;
357 	int cmp_n, cmp_p, modified, ret;
358 
359 	txnhead = info;
360 	ip = txnhead->thread_info;
361 	pagep = NULL;
362 	REC_PRINT(__db_big_print);
363 	REC_INTRO(__db_big_42_read, txnhead, 0);
364 
365 	REC_FGET(mpf, txnhead, argp->pgno, &pagep, ppage);
366 	modified = 0;
367 
368 	/*
369 	 * There are three pages we need to check.  The one on which we are
370 	 * adding data, the previous one whose next_pointer may have
371 	 * been updated, and the next one whose prev_pointer may have
372 	 * been updated.
373 	 */
374 	cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
375 	cmp_p = LOG_COMPARE(&LSN(pagep), &argp->pagelsn);
376 	CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->pagelsn);
377 	CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp);
378 	if ((cmp_p == 0 && DB_REDO(op) && argp->opcode == DB_ADD_BIG) ||
379 	    (cmp_n == 0 && DB_UNDO(op) && argp->opcode == DB_REM_BIG)) {
380 		/* We are either redo-ing an add, or undoing a delete. */
381 		REC_DIRTY(mpf, txnhead, file_dbp->priority, &pagep);
382 		P_INIT(pagep, file_dbp->pgsize, argp->pgno, argp->prev_pgno,
383 			argp->next_pgno, 0, P_OVERFLOW);
384 		OV_LEN(pagep) = argp->dbt.size;
385 		OV_REF(pagep) = 1;
386 		memcpy((u_int8_t *)pagep + P_OVERHEAD(file_dbp), argp->dbt.data,
387 		    argp->dbt.size);
388 		PREV_PGNO(pagep) = argp->prev_pgno;
389 		modified = 1;
390 	} else if ((cmp_n == 0 && DB_UNDO(op) && argp->opcode == DB_ADD_BIG) ||
391 	    (cmp_p == 0 && DB_REDO(op) && argp->opcode == DB_REM_BIG)) {
392 		/*
393 		 * We are either undo-ing an add or redo-ing a delete.
394 		 * The page is about to be reclaimed in either case, so
395 		 * there really isn't anything to do here.
396 		 */
397 		REC_DIRTY(mpf, txnhead, file_dbp->priority, &pagep);
398 		modified = 1;
399 	} else if (cmp_p == 0 && DB_REDO(op) && argp->opcode == DB_APPEND_BIG) {
400 		/* We are redoing an append. */
401 		REC_DIRTY(mpf, txnhead, file_dbp->priority, &pagep);
402 		memcpy((u_int8_t *)pagep + P_OVERHEAD(file_dbp) +
403 		    OV_LEN(pagep), argp->dbt.data, argp->dbt.size);
404 		OV_LEN(pagep) += argp->dbt.size;
405 		modified = 1;
406 	} else if (cmp_n == 0 && DB_UNDO(op) && argp->opcode == DB_APPEND_BIG) {
407 		/* We are undoing an append. */
408 		REC_DIRTY(mpf, txnhead, file_dbp->priority, &pagep);
409 		OV_LEN(pagep) -= argp->dbt.size;
410 		memset((u_int8_t *)pagep + P_OVERHEAD(file_dbp) +
411 		    OV_LEN(pagep), 0, argp->dbt.size);
412 		modified = 1;
413 	}
414 	if (modified)
415 		LSN(pagep) = DB_REDO(op) ? *lsnp : argp->pagelsn;
416 
417 	ret = __memp_fput(mpf, ip, pagep, file_dbp->priority);
418 	pagep = NULL;
419 	if (ret != 0)
420 		goto out;
421 
422 	/*
423 	 * We only delete a whole chain of overflow items, and appends only
424 	 * apply to a single page.  Adding a page is the only case that
425 	 * needs to update the chain.
426 	 */
427 ppage:	if (argp->opcode != DB_ADD_BIG)
428 		goto done;
429 
430 	/* Now check the previous page. */
431 	if (argp->prev_pgno != PGNO_INVALID) {
432 		REC_FGET(mpf, txnhead, argp->prev_pgno, &pagep, npage);
433 		modified = 0;
434 
435 		cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
436 		cmp_p = LOG_COMPARE(&LSN(pagep), &argp->prevlsn);
437 		CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->prevlsn);
438 		CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp);
439 
440 		if (cmp_p == 0 && DB_REDO(op) && argp->opcode == DB_ADD_BIG) {
441 			/* Redo add, undo delete. */
442 			REC_DIRTY(mpf, txnhead, file_dbp->priority, &pagep);
443 			NEXT_PGNO(pagep) = argp->pgno;
444 			modified = 1;
445 		} else if (cmp_n == 0 &&
446 		    DB_UNDO(op) && argp->opcode == DB_ADD_BIG) {
447 			/* Redo delete, undo add. */
448 			REC_DIRTY(mpf, txnhead, file_dbp->priority, &pagep);
449 			NEXT_PGNO(pagep) = argp->next_pgno;
450 			modified = 1;
451 		}
452 		if (modified)
453 			LSN(pagep) = DB_REDO(op) ? *lsnp : argp->prevlsn;
454 		ret = __memp_fput(mpf, ip, pagep, file_dbp->priority);
455 		pagep = NULL;
456 		if (ret != 0)
457 			goto out;
458 	}
459 	pagep = NULL;
460 
461 	/* Now check the next page.  Can only be set on a delete. */
462 npage:	if (argp->next_pgno != PGNO_INVALID) {
463 		REC_FGET(mpf, txnhead, argp->next_pgno, &pagep, done);
464 		modified = 0;
465 
466 		cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
467 		cmp_p = LOG_COMPARE(&LSN(pagep), &argp->nextlsn);
468 		CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->nextlsn);
469 		CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp);
470 		if (cmp_p == 0 && DB_REDO(op)) {
471 			REC_DIRTY(mpf, txnhead, file_dbp->priority, &pagep);
472 			PREV_PGNO(pagep) = PGNO_INVALID;
473 			modified = 1;
474 		} else if (cmp_n == 0 && DB_UNDO(op)) {
475 			REC_DIRTY(mpf, txnhead, file_dbp->priority, &pagep);
476 			PREV_PGNO(pagep) = argp->pgno;
477 			modified = 1;
478 		}
479 		if (modified)
480 			LSN(pagep) = DB_REDO(op) ? *lsnp : argp->nextlsn;
481 		ret = __memp_fput(mpf, ip, pagep, file_dbp->priority);
482 		pagep = NULL;
483 		if (ret != 0)
484 			goto out;
485 	}
486 	pagep = NULL;
487 
488 done:	*lsnp = argp->prev_lsn;
489 	ret = 0;
490 
491 out:	if (pagep != NULL)
492 		(void)__memp_fput(mpf, ip, pagep, file_dbp->priority);
493 	REC_CLOSE;
494 }
495 /*
496  * __db_ovref_recover --
497  *	Recovery function for __db_ovref().
498  *
499  * PUBLIC: int __db_ovref_recover
500  * PUBLIC:     __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
501  */
502 int
__db_ovref_recover(env,dbtp,lsnp,op,info)503 __db_ovref_recover(env, dbtp, lsnp, op, info)
504 	ENV *env;
505 	DBT *dbtp;
506 	DB_LSN *lsnp;
507 	db_recops op;
508 	void *info;
509 {
510 	__db_ovref_args *argp;
511 	DB *file_dbp;
512 	DBC *dbc;
513 	DB_MPOOLFILE *mpf;
514 	DB_THREAD_INFO *ip;
515 	DB_TXNHEAD *txnhead;
516 	PAGE *pagep;
517 	int cmp, ret;
518 
519 	txnhead = info;
520 	ip = txnhead->thread_info;
521 	pagep = NULL;
522 	REC_PRINT(__db_ovref_print);
523 	REC_INTRO(__db_ovref_read, txnhead, 0);
524 
525 	REC_FGET(mpf, txnhead, argp->pgno, &pagep, done);
526 
527 	cmp = LOG_COMPARE(&LSN(pagep), &argp->lsn);
528 	CHECK_LSN(env, op, cmp, &LSN(pagep), &argp->lsn);
529 	if (cmp == 0 && DB_REDO(op)) {
530 		/* Need to redo update described. */
531 		REC_DIRTY(mpf, txnhead, file_dbp->priority, &pagep);
532 		OV_REF(pagep) += argp->adjust;
533 		pagep->lsn = *lsnp;
534 	} else if (LOG_COMPARE(lsnp, &LSN(pagep)) == 0 && DB_UNDO(op)) {
535 		/* Need to undo update described. */
536 		REC_DIRTY(mpf, txnhead, file_dbp->priority, &pagep);
537 		OV_REF(pagep) -= argp->adjust;
538 		pagep->lsn = argp->lsn;
539 	}
540 	ret = __memp_fput(mpf, ip, pagep, file_dbp->priority);
541 	pagep = NULL;
542 	if (ret != 0)
543 		goto out;
544 	pagep = NULL;
545 
546 done:	*lsnp = argp->prev_lsn;
547 	ret = 0;
548 
549 out:	if (pagep != NULL)
550 		(void)__memp_fput(mpf, ip, pagep, file_dbp->priority);
551 	REC_CLOSE;
552 }
553 
554 /*
555  * __db_debug_recover --
556  *	Recovery function for debug.
557  *
558  * PUBLIC: int __db_debug_recover __P((ENV *,
559  * PUBLIC:     DBT *, DB_LSN *, db_recops, void *));
560  */
561 int
__db_debug_recover(env,dbtp,lsnp,op,info)562 __db_debug_recover(env, dbtp, lsnp, op, info)
563 	ENV *env;
564 	DBT *dbtp;
565 	DB_LSN *lsnp;
566 	db_recops op;
567 	void *info;
568 {
569 	__db_debug_args *argp;
570 	int ret;
571 
572 	REC_PRINT(__db_debug_print);
573 	REC_NOOP_INTRO(__db_debug_read);
574 
575 	*lsnp = argp->prev_lsn;
576 	ret = 0;
577 
578 	COMPQUIET(op, DB_TXN_ABORT);
579 	COMPQUIET(info, NULL);
580 	REC_NOOP_CLOSE;
581 }
582 
583 /*
584  * __db_noop_recover --
585  *	Recovery function for noop.
586  *
587  * PUBLIC: int __db_noop_recover __P((ENV *,
588  * PUBLIC:      DBT *, DB_LSN *, db_recops, void *));
589  */
590 int
__db_noop_recover(env,dbtp,lsnp,op,info)591 __db_noop_recover(env, dbtp, lsnp, op, info)
592 	ENV *env;
593 	DBT *dbtp;
594 	DB_LSN *lsnp;
595 	db_recops op;
596 	void *info;
597 {
598 	__db_noop_args *argp;
599 	DB *file_dbp;
600 	DBC *dbc;
601 	DB_MPOOLFILE *mpf;
602 	DB_THREAD_INFO *ip;
603 	DB_TXNHEAD *txnhead;
604 	PAGE *pagep;
605 	int cmp_n, cmp_p, ret;
606 
607 	txnhead = info;
608 	ip = txnhead->thread_info;
609 	pagep = NULL;
610 	REC_PRINT(__db_noop_print);
611 	REC_INTRO(__db_noop_read, txnhead, 0);
612 
613 	REC_FGET(mpf, txnhead, argp->pgno, &pagep, done);
614 
615 	cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
616 	cmp_p = LOG_COMPARE(&LSN(pagep), &argp->prevlsn);
617 	CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->prevlsn);
618 	CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp);
619 	if (cmp_p == 0 && DB_REDO(op)) {
620 		REC_DIRTY(mpf, txnhead, file_dbp->priority, &pagep);
621 		LSN(pagep) = *lsnp;
622 	} else if (cmp_n == 0 && DB_UNDO(op)) {
623 		REC_DIRTY(mpf, txnhead, file_dbp->priority, &pagep);
624 		LSN(pagep) = argp->prevlsn;
625 	}
626 	ret = __memp_fput(mpf, ip, pagep, file_dbp->priority);
627 	pagep = NULL;
628 
629 done:	*lsnp = argp->prev_lsn;
630 out:	if (pagep != NULL)
631 		(void)__memp_fput(mpf,
632 		    ip, pagep, file_dbp->priority);
633 	REC_CLOSE;
634 }
635 
636 /*
637  * __db_pg_alloc_recover --
638  *	Recovery function for pg_alloc.
639  *
640  * PUBLIC: int __db_pg_alloc_recover
641  * PUBLIC:   __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
642  */
643 int
__db_pg_alloc_recover(env,dbtp,lsnp,op,info)644 __db_pg_alloc_recover(env, dbtp, lsnp, op, info)
645 	ENV *env;
646 	DBT *dbtp;
647 	DB_LSN *lsnp;
648 	db_recops op;
649 	void *info;
650 {
651 	__db_pg_alloc_args *argp;
652 	DB *file_dbp;
653 	DBC *dbc;
654 	DBMETA *meta;
655 	DB_MPOOLFILE *mpf;
656 	DB_THREAD_INFO *ip;
657 	DB_TXN *txn;
658 	DB_TXNHEAD *txnhead;
659 	PAGE *pagep;
660 	db_pgno_t pgno;
661 	int cmp_n, cmp_p, created, level, ret;
662 
663 	txnhead = info;
664 	ip = txnhead->thread_info;
665 	txn = txnhead->txn;
666 	meta = NULL;
667 	pagep = NULL;
668 	created = 0;
669 	REC_PRINT(__db_pg_alloc_print);
670 	REC_INTRO(__db_pg_alloc_read, txnhead, 0);
671 
672 	/*
673 	 * Fix up the metadata page.  If we're redoing the operation, we have
674 	 * to get the metadata page and update its LSN and its free pointer.
675 	 * If we're undoing the operation and the page was ever created, we put
676 	 * it on the freelist.
677 	 */
678 	pgno = PGNO_BASE_MD;
679 	if ((ret = __memp_fget(mpf, &pgno, ip, txn, 0, &meta)) != 0) {
680 		/* The metadata page must always exist on redo. */
681 		if (DB_REDO(op)) {
682 			ret = __db_pgerr(file_dbp, pgno, ret);
683 			goto out;
684 		} else
685 			goto done;
686 	}
687 	cmp_n = LOG_COMPARE(lsnp, &LSN(meta));
688 	cmp_p = LOG_COMPARE(&LSN(meta), &argp->meta_lsn);
689 	CHECK_LSN(env, op, cmp_p, &LSN(meta), &argp->meta_lsn);
690 	CHECK_ABORT(env, op, cmp_n, &LSN(meta), lsnp);
691 	if (cmp_p == 0 && DB_REDO(op)) {
692 		/* Need to redo update described. */
693 		REC_DIRTY(mpf, txnhead, file_dbp->priority, &meta);
694 		LSN(meta) = *lsnp;
695 		meta->free = argp->next;
696 		if (argp->pgno > meta->last_pgno)
697 			meta->last_pgno = argp->pgno;
698 	} else if (cmp_n == 0 && DB_UNDO(op)) {
699 		/* Need to undo update described. */
700 		REC_DIRTY(mpf, txnhead, file_dbp->priority, &meta);
701 		LSN(meta) = argp->meta_lsn;
702 		/*
703 		 * If the page has a zero LSN then its newly created and
704 		 * will be truncated rather than go on the free list.
705 		 */
706 		if (!IS_ZERO_LSN(argp->page_lsn))
707 			meta->free = argp->pgno;
708 		meta->last_pgno = argp->last_pgno;
709 	}
710 
711 #ifdef HAVE_FTRUNCATE
712 	/*
713 	 * check to see if we are keeping a sorted freelist, if so put
714 	 * this back in the in memory list.  It must be the first element.
715 	 */
716 	if (op == DB_TXN_ABORT && !IS_ZERO_LSN(argp->page_lsn)) {
717 		db_pgno_t *list;
718 		u_int32_t nelem;
719 
720 		if ((ret = __memp_get_freelist(mpf, &nelem, &list)) != 0)
721 			goto out;
722 		if (list != NULL && (nelem == 0 || *list != argp->pgno)) {
723 			if ((ret =
724 			    __memp_extend_freelist(mpf, nelem + 1, &list)) != 0)
725 				goto out;
726 			if (nelem != 0)
727 				memmove(list + 1, list, nelem * sizeof(*list));
728 			*list = argp->pgno;
729 		}
730 	}
731 #endif
732 
733 	/*
734 	 * Fix up the allocated page. If the page does not exist
735 	 * and we can truncate it then don't create it.
736 	 * Otherwise if we're redoing the operation, we have
737 	 * to get the page (creating it if it doesn't exist), and update its
738 	 * LSN.  If we're undoing the operation, we have to reset the page's
739 	 * LSN and put it on the free list.
740 	 */
741 	if ((ret = __memp_fget(mpf, &argp->pgno, ip, txn, 0, &pagep)) != 0) {
742 		/*
743 		 * We have to be able to identify if a page was newly
744 		 * created so we can recover it properly.  We cannot simply
745 		 * look for an empty header, because hash uses a pgin
746 		 * function that will set the header.  Instead, we explicitly
747 		 * try for the page without CREATE and if that fails, then
748 		 * create it.
749 		 */
750 		if (DB_UNDO(op))
751 			goto do_truncate;
752 		if ((ret = __memp_fget(mpf, &argp->pgno, ip, txn,
753 		    DB_MPOOL_CREATE, &pagep)) != 0) {
754 			if (DB_UNDO(op) && ret == ENOSPC)
755 				goto do_truncate;
756 			ret = __db_pgerr(file_dbp, argp->pgno, ret);
757 			goto out;
758 		}
759 		created = 1;
760 	}
761 
762 	/* Fix up the allocated page. */
763 	cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
764 	cmp_p = LOG_COMPARE(&LSN(pagep), &argp->page_lsn);
765 
766 	/*
767 	 * If an initial allocation is aborted and then reallocated during
768 	 * an archival restore the log record will have an LSN for the page
769 	 * but the page will be empty.
770 	 */
771 	if (IS_ZERO_LSN(LSN(pagep)))
772 		cmp_p = 0;
773 
774 	CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->page_lsn);
775 	/*
776 	 * Another special case we have to handle is if we ended up with a
777 	 * page of all 0's which can happen if we abort between allocating a
778 	 * page in mpool and initializing it.  In that case, even if we're
779 	 * undoing, we need to re-initialize the page.
780 	 */
781 	if (DB_REDO(op) && cmp_p == 0) {
782 		/* Need to redo update described. */
783 		REC_DIRTY(mpf, txnhead, file_dbp->priority, &pagep);
784 		switch (argp->ptype) {
785 		case P_LBTREE:
786 		case P_LRECNO:
787 		case P_LDUP:
788 			level = LEAFLEVEL;
789 			break;
790 		default:
791 			level = 0;
792 			break;
793 		}
794 		P_INIT(pagep, file_dbp->pgsize,
795 		    argp->pgno, PGNO_INVALID, PGNO_INVALID, level, argp->ptype);
796 
797 		pagep->lsn = *lsnp;
798 	} else if (DB_UNDO(op) && (cmp_n == 0 || created)) {
799 		/*
800 		 * This is where we handle the case of a 0'd page (pagep->pgno
801 		 * is equal to PGNO_INVALID).
802 		 * Undo the allocation, reinitialize the page and
803 		 * link its next pointer to the free list.
804 		 */
805 		REC_DIRTY(mpf, txnhead, file_dbp->priority, &pagep);
806 		P_INIT(pagep, file_dbp->pgsize,
807 		    argp->pgno, PGNO_INVALID, argp->next, 0, P_INVALID);
808 
809 		pagep->lsn = argp->page_lsn;
810 	}
811 
812 do_truncate:
813 	/*
814 	 * If the page was newly created, give it back.
815 	 */
816 	if ((pagep == NULL || IS_ZERO_LSN(LSN(pagep))) &&
817 	    IS_ZERO_LSN(argp->page_lsn) && DB_UNDO(op)) {
818 		/* Discard the page. */
819 		if (pagep != NULL) {
820 			if ((ret = __memp_fput(mpf, ip,
821 			    pagep, DB_PRIORITY_VERY_LOW)) != 0)
822 				goto out;
823 			pagep = NULL;
824 		}
825 		/* Give the page back to the OS. */
826 		if (meta->last_pgno <= argp->pgno && (ret = __memp_ftruncate(
827 		    mpf, txn, ip, argp->pgno, MP_TRUNC_RECOVER)) != 0)
828 			goto out;
829 	}
830 
831 	if (pagep != NULL) {
832 		ret = __memp_fput(mpf, ip, pagep, file_dbp->priority);
833 		pagep = NULL;
834 		if (ret != 0)
835 			goto out;
836 	}
837 
838 	ret = __memp_fput(mpf, ip, meta, file_dbp->priority);
839 	meta = NULL;
840 	if (ret != 0)
841 		goto out;
842 
843 done:	*lsnp = argp->prev_lsn;
844 	ret = 0;
845 
846 out:	if (pagep != NULL)
847 		(void)__memp_fput(mpf, ip, pagep, file_dbp->priority);
848 	if (meta != NULL)
849 		(void)__memp_fput(mpf, ip, meta, file_dbp->priority);
850 	REC_CLOSE;
851 }
852 
853 /*
854  * __db_pg_free_recover_int --
855  */
856 static int
__db_pg_free_recover_int(env,txnhead,argp,file_dbp,lsnp,mpf,op,data)857 __db_pg_free_recover_int(env, txnhead, argp, file_dbp, lsnp, mpf, op, data)
858 	ENV *env;
859 	DB_TXNHEAD *txnhead;
860 	__db_pg_freedata_args *argp;
861 	DB *file_dbp;
862 	DB_LSN *lsnp;
863 	DB_MPOOLFILE *mpf;
864 	db_recops op;
865 	int data;
866 {
867 	DBMETA *meta;
868 	DB_LSN copy_lsn;
869 	DB_THREAD_INFO *ip;
870 	DB_TXN *txn;
871 	PAGE *pagep, *prevp;
872 	int cmp_n, cmp_p, is_meta, ret;
873 
874 	ip = txnhead->thread_info;
875 	txn = txnhead->txn;
876 	meta = NULL;
877 	pagep = prevp = NULL;
878 
879 	/*
880 	 * Get the "metapage".  This will either be the metapage
881 	 * or the previous page in the free list if we are doing
882 	 * sorted allocations.  If its a previous page then
883 	 * we will not be truncating.
884 	 */
885 	is_meta = argp->meta_pgno == PGNO_BASE_MD;
886 
887 	REC_FGET(mpf, txnhead, argp->meta_pgno, &meta, check_meta);
888 
889 	if (argp->meta_pgno != PGNO_BASE_MD)
890 		prevp = (PAGE *)meta;
891 
892 	cmp_n = LOG_COMPARE(lsnp, &LSN(meta));
893 	cmp_p = LOG_COMPARE(&LSN(meta), &argp->meta_lsn);
894 	CHECK_LSN(env, op, cmp_p, &LSN(meta), &argp->meta_lsn);
895 	CHECK_ABORT(env, op, cmp_n, &LSN(meta), lsnp);
896 
897 	/*
898 	 * Fix up the metadata page.  If we're redoing or undoing the operation
899 	 * we get the page and update its LSN, last and free pointer.
900 	 */
901 	if (cmp_p == 0 && DB_REDO(op)) {
902 		REC_DIRTY(mpf, txnhead, file_dbp->priority, &meta);
903 		/*
904 		 * If we are at the end of the file truncate, otherwise
905 		 * put on the free list.
906 		 */
907 #ifdef HAVE_FTRUNCATE
908 		if (argp->pgno == argp->last_pgno)
909 			meta->last_pgno = argp->pgno - 1;
910 		else
911 #endif
912 		if (is_meta)
913 			meta->free = argp->pgno;
914 		else
915 			NEXT_PGNO(prevp) = argp->pgno;
916 		LSN(meta) = *lsnp;
917 	} else if (cmp_n == 0 && DB_UNDO(op)) {
918 		/* Need to undo the deallocation. */
919 		REC_DIRTY(mpf, txnhead, file_dbp->priority, &meta);
920 		if (is_meta) {
921 			if (meta->last_pgno < argp->pgno)
922 				meta->last_pgno = argp->pgno;
923 			meta->free = argp->next;
924 		} else
925 			NEXT_PGNO(prevp) = argp->next;
926 		LSN(meta) = argp->meta_lsn;
927 	}
928 
929 check_meta:
930 	if (ret != 0 && is_meta) {
931 		/* The metadata page must always exist. */
932 		ret = __db_pgerr(file_dbp, argp->meta_pgno, ret);
933 		goto out;
934 	}
935 
936 	/*
937 	 * Get the freed page.  Don't create the page if we are going to
938 	 * free it.  If we're redoing the operation we get the page and
939 	 * explicitly discard its contents, then update its LSN. If we're
940 	 * undoing the operation, we get the page and restore its header.
941 	 */
942 	if (DB_REDO(op) || (is_meta && meta->last_pgno < argp->pgno)) {
943 		if ((ret = __memp_fget(mpf, &argp->pgno,
944 		    ip, txn, 0, &pagep)) != 0) {
945 			if (ret != DB_PAGE_NOTFOUND)
946 				goto out;
947 #ifdef HAVE_FTRUNCATE
948 			if (is_meta &&
949 			    DB_REDO(op) && meta->last_pgno <= argp->pgno)
950 				goto trunc;
951 #endif
952 			goto done;
953 		}
954 	} else if ((ret = __memp_fget(mpf, &argp->pgno,
955 	   ip, txn, DB_MPOOL_CREATE, &pagep)) != 0)
956 		goto out;
957 
958 	(void)__ua_memcpy(&copy_lsn, &LSN(argp->header.data), sizeof(DB_LSN));
959 	cmp_n = IS_ZERO_LSN(LSN(pagep)) ? 0 : LOG_COMPARE(lsnp, &LSN(pagep));
960 	cmp_p = LOG_COMPARE(&LSN(pagep), &copy_lsn);
961 
962 	/*
963 	 * This page got extended by a later allocation,
964 	 * but its allocation was not in the scope of this
965 	 * recovery pass.
966 	 */
967 	if (IS_ZERO_LSN(LSN(pagep)))
968 		cmp_p = 0;
969 
970 	CHECK_LSN(env, op, cmp_p, &LSN(pagep), &copy_lsn);
971 	/*
972 	 * We need to check that the page could have the current LSN
973 	 * which was copied before it was truncated in addition to
974 	 * the usual of having the previous LSN.
975 	 */
976 	if (DB_REDO(op) &&
977 	    (cmp_p == 0 || cmp_n == 0 ||
978 	    (IS_ZERO_LSN(copy_lsn) &&
979 	    LOG_COMPARE(&LSN(pagep), &argp->meta_lsn) <= 0))) {
980 		/* Need to redo the deallocation. */
981 		/*
982 		 * The page can be truncated if it was truncated at runtime
983 		 * and the current metapage reflects the truncation.
984 		 */
985 #ifdef HAVE_FTRUNCATE
986 		if (is_meta && meta->last_pgno <= argp->pgno &&
987 		    argp->last_pgno <= argp->pgno) {
988 			if ((ret = __memp_fput(mpf, ip,
989 			    pagep, DB_PRIORITY_VERY_LOW)) != 0)
990 				goto out;
991 			pagep = NULL;
992 trunc:			if ((ret = __memp_ftruncate(mpf, txn, ip,
993 			    argp->pgno, MP_TRUNC_RECOVER)) != 0)
994 				goto out;
995 		} else if (argp->last_pgno == argp->pgno) {
996 			/* The page was truncated at runtime, zero it out. */
997 			REC_DIRTY(mpf, txnhead, file_dbp->priority, &pagep);
998 			P_INIT(pagep, 0, PGNO_INVALID,
999 			    PGNO_INVALID, PGNO_INVALID, 0, P_INVALID);
1000 			ZERO_LSN(pagep->lsn);
1001 		} else
1002 #endif
1003 		if (cmp_p == 0 || IS_ZERO_LSN(LSN(pagep))) {
1004 			REC_DIRTY(mpf, txnhead, file_dbp->priority, &pagep);
1005 			P_INIT(pagep, file_dbp->pgsize,
1006 			    argp->pgno, PGNO_INVALID, argp->next, 0, P_INVALID);
1007 			pagep->lsn = *lsnp;
1008 
1009 		}
1010 	} else if (cmp_n == 0 && DB_UNDO(op)) {
1011 		/* Need to reallocate the page. */
1012 		REC_DIRTY(mpf, txnhead, file_dbp->priority, &pagep);
1013 		memcpy(pagep, argp->header.data, argp->header.size);
1014 		if (data)
1015 			memcpy((u_int8_t*)pagep + HOFFSET(pagep),
1016 			     argp->data.data, argp->data.size);
1017 	}
1018 	if (pagep != NULL &&
1019 	    (ret = __memp_fput(mpf, ip, pagep, file_dbp->priority)) != 0)
1020 		goto out;
1021 
1022 	pagep = NULL;
1023 #ifdef HAVE_FTRUNCATE
1024 	/*
1025 	 * If we are keeping an in memory free list remove this
1026 	 * element from the list.
1027 	 */
1028 	if (op == DB_TXN_ABORT && argp->pgno != argp->last_pgno) {
1029 		db_pgno_t *lp;
1030 		u_int32_t nelem, pos;
1031 
1032 		if ((ret = __memp_get_freelist(mpf, &nelem, &lp)) != 0)
1033 			goto out;
1034 		if (lp != NULL) {
1035 			pos = 0;
1036 			if (!is_meta) {
1037 				__db_freelist_pos(argp->pgno, lp, nelem, &pos);
1038 
1039 				/*
1040 				 * If we aborted after logging but before
1041 				 * updating the free list don't do anything.
1042 				 */
1043 				if (argp->pgno != lp[pos]) {
1044 					DB_ASSERT(env,
1045 					    argp->meta_pgno == lp[pos]);
1046 					goto done;
1047 				}
1048 				DB_ASSERT(env,
1049 				    argp->meta_pgno == lp[pos - 1]);
1050 			} else if (nelem != 0 && argp->pgno != lp[pos])
1051 				goto done;
1052 
1053 			if (pos < nelem)
1054 				memmove(&lp[pos], &lp[pos + 1],
1055 				    ((nelem - pos) - 1) * sizeof(*lp));
1056 
1057 			/* Shrink the list */
1058 			if ((ret =
1059 			    __memp_extend_freelist(mpf, nelem - 1, &lp)) != 0)
1060 				goto out;
1061 		}
1062 	}
1063 #endif
1064 done:
1065 	if (meta != NULL &&
1066 	     (ret = __memp_fput(mpf, ip,  meta, file_dbp->priority)) != 0)
1067 		goto out;
1068 	meta = NULL;
1069 	ret = 0;
1070 
1071 out:	if (pagep != NULL)
1072 		(void)__memp_fput(mpf, ip,  pagep, file_dbp->priority);
1073 	if (meta != NULL)
1074 		(void)__memp_fput(mpf, ip,  meta, file_dbp->priority);
1075 
1076 	return (ret);
1077 }
1078 
1079 /*
1080  * __db_pg_free_recover --
1081  *	Recovery function for pg_free.
1082  *
1083  * PUBLIC: int __db_pg_free_recover
1084  * PUBLIC:   __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
1085  */
1086 int
__db_pg_free_recover(env,dbtp,lsnp,op,info)1087 __db_pg_free_recover(env, dbtp, lsnp, op, info)
1088 	ENV *env;
1089 	DBT *dbtp;
1090 	DB_LSN *lsnp;
1091 	db_recops op;
1092 	void *info;
1093 {
1094 	__db_pg_free_args *argp;
1095 	DB *file_dbp;
1096 	DBC *dbc;
1097 	DB_MPOOLFILE *mpf;
1098 	DB_TXNHEAD *txnhead;
1099 	int ret;
1100 
1101 	txnhead = info;
1102 	REC_PRINT(__db_pg_free_print);
1103 	REC_INTRO(__db_pg_free_read, txnhead, 0);
1104 
1105 	if ((ret = __db_pg_free_recover_int(env, txnhead,
1106 	    (__db_pg_freedata_args *)argp, file_dbp, lsnp, mpf, op, 0)) != 0)
1107 		goto out;
1108 
1109 done:	*lsnp = argp->prev_lsn;
1110 out:
1111 	REC_CLOSE;
1112 }
1113 
1114 /*
1115  * __db_pg_freedata_recover --
1116  *	Recovery function for pg_freedata.
1117  *
1118  * PUBLIC: int __db_pg_freedata_recover
1119  * PUBLIC:   __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
1120  */
1121 int
__db_pg_freedata_recover(env,dbtp,lsnp,op,info)1122 __db_pg_freedata_recover(env, dbtp, lsnp, op, info)
1123 	ENV *env;
1124 	DBT *dbtp;
1125 	DB_LSN *lsnp;
1126 	db_recops op;
1127 	void *info;
1128 {
1129 	__db_pg_freedata_args *argp;
1130 	DB *file_dbp;
1131 	DBC *dbc;
1132 	DB_MPOOLFILE *mpf;
1133 	DB_TXNHEAD *txnhead;
1134 	int ret;
1135 
1136 	txnhead = info;
1137 	REC_PRINT(__db_pg_freedata_print);
1138 	REC_INTRO(__db_pg_freedata_read, txnhead, 0);
1139 
1140 	if ((ret = __db_pg_free_recover_int(env,
1141 	    txnhead, argp, file_dbp, lsnp, mpf, op, 1)) != 0)
1142 		goto out;
1143 
1144 done:	*lsnp = argp->prev_lsn;
1145 out:
1146 	REC_CLOSE;
1147 }
1148 
1149 /*
1150  * __db_cksum_recover --
1151  *	Recovery function for checksum failure log record.
1152  *
1153  * PUBLIC: int __db_cksum_recover __P((ENV *,
1154  * PUBLIC:      DBT *, DB_LSN *, db_recops, void *));
1155  */
1156 int
__db_cksum_recover(env,dbtp,lsnp,op,info)1157 __db_cksum_recover(env, dbtp, lsnp, op, info)
1158 	ENV *env;
1159 	DBT *dbtp;
1160 	DB_LSN *lsnp;
1161 	db_recops op;
1162 	void *info;
1163 {
1164 	__db_cksum_args *argp;
1165 	int ret;
1166 
1167 	REC_PRINT(__db_cksum_print);
1168 
1169 	if ((ret = __db_cksum_read(env, dbtp->data, &argp)) != 0)
1170 		return (ret);
1171 
1172 	/*
1173 	 * We had a checksum failure -- the only option is to run catastrophic
1174 	 * recovery.
1175 	 */
1176 	if (F_ISSET(env, ENV_RECOVER_FATAL))
1177 		ret = 0;
1178 	else {
1179 		__db_errx(env, DB_STR("0642",
1180 		    "Checksum failure requires catastrophic recovery"));
1181 		ret = __env_panic(env, DB_RUNRECOVERY);
1182 	}
1183 
1184 	__os_free(env, argp);
1185 
1186 	COMPQUIET(info, NULL);
1187 	COMPQUIET(lsnp, NULL);
1188 	COMPQUIET(op, DB_TXN_ABORT);
1189 
1190 	return (ret);
1191 }
1192 
1193 /*
1194  * __db_pg_init_recover --
1195  *	Recovery function to reinit pages after truncation.
1196  *
1197  * PUBLIC: int __db_pg_init_recover
1198  * PUBLIC:   __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
1199  */
1200 int
__db_pg_init_recover(env,dbtp,lsnp,op,info)1201 __db_pg_init_recover(env, dbtp, lsnp, op, info)
1202 	ENV *env;
1203 	DBT *dbtp;
1204 	DB_LSN *lsnp;
1205 	db_recops op;
1206 	void *info;
1207 {
1208 	__db_pg_init_args *argp;
1209 	DB *file_dbp;
1210 	DBC *dbc;
1211 	DB_LSN copy_lsn;
1212 	DB_MPOOLFILE *mpf;
1213 	DB_THREAD_INFO *ip;
1214 	DB_TXN *txn;
1215 	DB_TXNHEAD *txnhead;
1216 	PAGE *pagep;
1217 	int cmp_n, cmp_p, ret, t_ret, type;
1218 
1219 	txnhead = info;
1220 	ip = txnhead->thread_info;
1221 	txn = txnhead->txn;
1222 	pagep = NULL;
1223 	REC_PRINT(__db_pg_init_print);
1224 	REC_INTRO(__db_pg_init_read, txnhead, 0);
1225 
1226 	mpf = file_dbp->mpf;
1227 	if ((ret = __memp_fget(mpf, &argp->pgno, ip, txn, 0, &pagep)) != 0) {
1228 		if (DB_UNDO(op)) {
1229 			if (ret == DB_PAGE_NOTFOUND)
1230 				goto done;
1231 			else {
1232 				ret = __db_pgerr(file_dbp, argp->pgno, ret);
1233 				goto out;
1234 			}
1235 		}
1236 
1237 		/*
1238 		 * This page was truncated and may simply not have
1239 		 * had an item written to it yet.  This should only
1240 		 * happen on hash databases, so confirm that.
1241 		 */
1242 		DB_ASSERT(env, file_dbp->type == DB_HASH);
1243 		if ((ret = __memp_fget(mpf, &argp->pgno,
1244 		    ip, txn, DB_MPOOL_CREATE, &pagep)) != 0) {
1245 			ret = __db_pgerr(file_dbp, argp->pgno, ret);
1246 			goto out;
1247 		}
1248 	}
1249 
1250 	(void)__ua_memcpy(&copy_lsn, &LSN(argp->header.data), sizeof(DB_LSN));
1251 	cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
1252 	cmp_p = LOG_COMPARE(&LSN(pagep), &copy_lsn);
1253 	CHECK_LSN(env, op, cmp_p, &LSN(pagep), &copy_lsn);
1254 	CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp);
1255 
1256 	if (cmp_p == 0 && DB_REDO(op)) {
1257 		if (TYPE(pagep) == P_HASH)
1258 			type = P_HASH;
1259 		else
1260 			type = file_dbp->type == DB_RECNO ? P_LRECNO : P_LBTREE;
1261 		REC_DIRTY(mpf, txnhead, file_dbp->priority, &pagep);
1262 		P_INIT(pagep, file_dbp->pgsize, PGNO(pagep), PGNO_INVALID,
1263 		    PGNO_INVALID, TYPE(pagep) == P_HASH ? 0 : 1, type);
1264 		pagep->lsn = *lsnp;
1265 	} else if (cmp_n == 0 && DB_UNDO(op)) {
1266 		/* Put the data back on the page. */
1267 		REC_DIRTY(mpf, txnhead, file_dbp->priority, &pagep);
1268 		memcpy(pagep, argp->header.data, argp->header.size);
1269 		if (argp->data.size > 0)
1270 			memcpy((u_int8_t*)pagep + HOFFSET(pagep),
1271 			     argp->data.data, argp->data.size);
1272 	}
1273 
1274 done:	*lsnp = argp->prev_lsn;
1275 out:
1276 	if (pagep != NULL && (t_ret =
1277 	     __memp_fput(mpf, ip, pagep, file_dbp->priority)) != 0 && ret == 0)
1278 		ret = t_ret;
1279 	REC_CLOSE;
1280 }
1281 
1282 /*
1283  * __db_pg_trunc_recover --
1284  *	Recovery function for pg_trunc.
1285  *
1286  * PUBLIC: int __db_pg_trunc_recover
1287  * PUBLIC:   __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
1288  */
1289 int
__db_pg_trunc_recover(env,dbtp,lsnp,op,info)1290 __db_pg_trunc_recover(env, dbtp, lsnp, op, info)
1291 	ENV *env;
1292 	DBT *dbtp;
1293 	DB_LSN *lsnp;
1294 	db_recops op;
1295 	void *info;
1296 {
1297 #ifdef HAVE_FTRUNCATE
1298 	__db_pg_trunc_args *argp;
1299 	DB *file_dbp;
1300 	DBC *dbc;
1301 	DBMETA *meta;
1302 	DB_MPOOLFILE *mpf;
1303 	DB_THREAD_INFO *ip;
1304 	DB_TXN *txn;
1305 	DB_TXNHEAD *txnhead;
1306 	PAGE *pagep;
1307 	db_pglist_t *pglist, *lp;
1308 	db_pgno_t last_pgno, *list;
1309 	u_int32_t felem, nelem, pos;
1310 	int ret;
1311 
1312 	txnhead = info;
1313 	ip = txnhead->thread_info;
1314 	txn = txnhead->txn;
1315 	REC_PRINT(__db_pg_trunc_print);
1316 	REC_INTRO(__db_pg_trunc_read, txnhead, 1);
1317 
1318 	pglist = (db_pglist_t *) argp->list.data;
1319 	nelem = argp->list.size / sizeof(db_pglist_t);
1320 	if (DB_REDO(op)) {
1321 		/*
1322 		 * First call __db_pg_truncate to find the truncation
1323 		 * point, truncate the file and return the new last_pgno.
1324 		 */
1325 		last_pgno = argp->last_pgno;
1326 		if ((ret = __db_pg_truncate(dbc, txn, pglist,
1327 		    NULL, &nelem, argp->next_free, &last_pgno, lsnp, 1)) != 0)
1328 			goto out;
1329 
1330 		if (argp->last_free != PGNO_INVALID) {
1331 			/*
1332 			 * Update the next pointer of the last page in
1333 			 * the freelist.  If the truncation point is
1334 			 * beyond next_free then this is still in the freelist
1335 			 * otherwise the last_free page is at the end.
1336 			 */
1337 			if ((ret = __memp_fget(mpf,
1338 			    &argp->last_free, ip, txn, 0, &meta)) == 0) {
1339 				if (LOG_COMPARE(&LSN(meta),
1340 				     &argp->last_lsn) == 0) {
1341 					REC_DIRTY(mpf,
1342 					    txnhead, dbc->priority, &meta);
1343 					if (pglist->pgno > last_pgno)
1344 						NEXT_PGNO(meta) = PGNO_INVALID;
1345 					else
1346 						NEXT_PGNO(meta) = pglist->pgno;
1347 					LSN(meta) = *lsnp;
1348 				}
1349 				if ((ret = __memp_fput(mpf, ip,
1350 				    meta, file_dbp->priority)) != 0)
1351 					goto out;
1352 				meta = NULL;
1353 			} else if (ret != DB_PAGE_NOTFOUND)
1354 				goto out;
1355 		}
1356 		if ((ret = __memp_fget(mpf, &argp->meta, ip, txn,
1357 		    0, &meta)) != 0)
1358 			goto out;
1359 		if (LOG_COMPARE(&LSN(meta), &argp->meta_lsn) == 0) {
1360 			REC_DIRTY(mpf, txnhead, dbc->priority, &meta);
1361 			if (argp->last_free == PGNO_INVALID) {
1362 				if (nelem == 0)
1363 					meta->free = PGNO_INVALID;
1364 				else
1365 					meta->free = pglist->pgno;
1366 			}
1367 			/*
1368 			 * If this is part of a multi record truncate
1369 			 * this could be just the last page of this record
1370 			 * don't move the meta->last_pgno forward.
1371 			 */
1372 			if (meta->last_pgno > last_pgno)
1373 				meta->last_pgno = last_pgno;
1374 			LSN(meta) = *lsnp;
1375 		}
1376 	} else {
1377 		/* Put the free list back in its original order. */
1378 		for (lp = pglist; lp < &pglist[nelem]; lp++) {
1379 			if ((ret = __memp_fget(mpf, &lp->pgno, ip,
1380 			    txn, DB_MPOOL_CREATE, &pagep)) != 0)
1381 				goto out;
1382 			if (IS_ZERO_LSN(LSN(pagep)) ||
1383 			     LOG_COMPARE(&LSN(pagep), lsnp) == 0) {
1384 				REC_DIRTY(mpf, txnhead, dbc->priority, &pagep);
1385 				P_INIT(pagep, file_dbp->pgsize, lp->pgno,
1386 				    PGNO_INVALID, lp->next_pgno, 0, P_INVALID);
1387 				LSN(pagep) = lp->lsn;
1388 			}
1389 			if ((ret = __memp_fput(mpf,
1390 			    ip, pagep, file_dbp->priority)) != 0)
1391 				goto out;
1392 		}
1393 		/*
1394 		 * Link the truncated part back into the free list.
1395 		 * Its either after the last_free page or directly
1396 		 * linked to the metadata page.
1397 		 */
1398 		if (argp->last_free != PGNO_INVALID) {
1399 			if ((ret = __memp_fget(mpf, &argp->last_free,
1400 			    ip, txn, DB_MPOOL_EDIT, &meta)) == 0) {
1401 				if (LOG_COMPARE(&LSN(meta), lsnp) == 0) {
1402 					NEXT_PGNO(meta) = argp->next_free;
1403 					LSN(meta) = argp->last_lsn;
1404 				}
1405 				if ((ret = __memp_fput(mpf, ip,
1406 				    meta, file_dbp->priority)) != 0)
1407 					goto out;
1408 			} else if (ret != DB_PAGE_NOTFOUND)
1409 				goto out;
1410 			meta = NULL;
1411 		}
1412 		if ((ret = __memp_fget(mpf, &argp->meta,
1413 		    ip, txn, DB_MPOOL_EDIT, &meta)) != 0)
1414 			goto out;
1415 		if (LOG_COMPARE(&LSN(meta), lsnp) == 0) {
1416 			REC_DIRTY(mpf, txnhead, dbc->priority, &meta);
1417 			/*
1418 			 * If we had to break up the list last_pgno
1419 			 * may only represent the end of the block.
1420 			 */
1421 			if (meta->last_pgno < argp->last_pgno)
1422 				meta->last_pgno = argp->last_pgno;
1423 			if (argp->last_free == PGNO_INVALID)
1424 				meta->free = argp->next_free;
1425 			LSN(meta) = argp->meta_lsn;
1426 		}
1427 	}
1428 
1429 	if ((ret = __memp_fput(mpf, ip, meta, file_dbp->priority)) != 0)
1430 		goto out;
1431 
1432 	if (op == DB_TXN_ABORT) {
1433 		/*
1434 		 * Put the pages back on the in memory free list.
1435 		 * If this is part of a multi-record truncate then
1436 		 * we need to find this batch, it may not be at the end.
1437 		 * If we aborted while writing one of the log records
1438 		 * then this set may still be in the list.
1439 		 */
1440 		if ((ret = __memp_get_freelist(mpf, &felem, &list)) != 0)
1441 			goto out;
1442 		if (list != NULL) {
1443 			if (felem != 0 && list[felem - 1] > pglist->pgno) {
1444 				__db_freelist_pos(
1445 				    pglist->pgno, list, felem, &pos);
1446 				DB_ASSERT(env, pos < felem);
1447 				if (pglist->pgno == list[pos])
1448 					goto done;
1449 				pos++;
1450 			} else if (felem != 0 &&
1451 			    list[felem - 1] == pglist->pgno)
1452 				goto done;
1453 			else
1454 				pos = felem;
1455 			if ((ret = __memp_extend_freelist(
1456 			    mpf, felem + nelem, &list)) != 0)
1457 				goto out;
1458 			if (pos != felem)
1459 				memmove(&list[nelem + pos], &list[pos],
1460 				    sizeof(*list) * (felem - pos));
1461 			for (lp = pglist; lp < &pglist[nelem]; lp++)
1462 				list[pos++] = lp->pgno;
1463 		}
1464 	}
1465 
1466 done:	*lsnp = argp->prev_lsn;
1467 	ret = 0;
1468 
1469 out:	REC_CLOSE;
1470 #else
1471 	/*
1472 	 * If HAVE_FTRUNCATE is not defined, we'll never see pg_trunc records
1473 	 * to recover.
1474 	 */
1475 	COMPQUIET(env, NULL);
1476 	COMPQUIET(dbtp, NULL);
1477 	COMPQUIET(lsnp, NULL);
1478 	COMPQUIET(op,  DB_TXN_ABORT);
1479 	COMPQUIET(info, NULL);
1480 	return (EINVAL);
1481 #endif
1482 }
1483 /*
1484  * __db_realloc_recover --
1485  *	Recovery function for realloc.
1486  *
1487  * PUBLIC: int __db_realloc_recover
1488  * PUBLIC:   __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
1489  */
1490 int
__db_realloc_recover(env,dbtp,lsnp,op,info)1491 __db_realloc_recover(env, dbtp, lsnp, op, info)
1492 	ENV *env;
1493 	DBT *dbtp;
1494 	DB_LSN *lsnp;
1495 	db_recops op;
1496 	void *info;
1497 {
1498 	__db_realloc_args *argp;
1499 	DB *file_dbp;
1500 	DBC *dbc;
1501 	DB_MPOOLFILE *mpf;
1502 	DB_THREAD_INFO *ip;
1503 	DB_TXN *txn;
1504 	DB_TXNHEAD *txnhead;
1505 	PAGE *pagep;
1506 	db_pglist_t *pglist, *lp;
1507 #ifdef HAVE_FTRUNCATE
1508 	db_pgno_t *list;
1509 	u_int32_t felem, pos;
1510 #endif
1511 	u_int32_t nelem;
1512 	int cmp_n, cmp_p, ret;
1513 
1514 	txnhead = info;
1515 	ip = txnhead->thread_info;
1516 	txn = txnhead->txn;
1517 
1518 	REC_PRINT(__db_realloc_print);
1519 	REC_INTRO(__db_realloc_read, txnhead, 1);
1520 	mpf = file_dbp->mpf;
1521 
1522 	/*
1523 	 * First, iterate over all the pages and make sure they are all in
1524 	 * their prior or new states (according to the op).
1525 	 */
1526 	pglist = (db_pglist_t *) argp->list.data;
1527 	nelem = argp->list.size / sizeof(db_pglist_t);
1528 	for (lp = pglist; lp < &pglist[nelem]; lp++) {
1529 		if ((ret = __memp_fget(mpf, &lp->pgno, ip,
1530 		    txn, DB_MPOOL_CREATE, &pagep)) != 0)
1531 			goto out;
1532 		if (DB_REDO(op) && LOG_COMPARE(&LSN(pagep), &lp->lsn) == 0) {
1533 			REC_DIRTY(mpf, txnhead, dbc->priority, &pagep);
1534 			P_INIT(pagep, file_dbp->pgsize, lp->pgno,
1535 			    PGNO_INVALID, PGNO_INVALID, 0, argp->ptype);
1536 			LSN(pagep) = *lsnp;
1537 		} else if (DB_UNDO(op) && (IS_ZERO_LSN(LSN(pagep)) ||
1538 		     LOG_COMPARE(&LSN(pagep), lsnp) == 0)) {
1539 			REC_DIRTY(mpf, txnhead, dbc->priority, &pagep);
1540 			P_INIT(pagep, file_dbp->pgsize, lp->pgno,
1541 			    PGNO_INVALID, lp->next_pgno, 0, P_INVALID);
1542 			LSN(pagep) = lp->lsn;
1543 		}
1544 		if ((ret = __memp_fput(mpf,
1545 		    ip, pagep, file_dbp->priority)) != 0)
1546 			goto out;
1547 	}
1548 
1549 	/* Now, fix up the free list. */
1550 	if ((ret = __memp_fget(mpf,
1551 	    &argp->prev_pgno, ip, txn, 0, &pagep)) != 0)
1552 		goto out;
1553 
1554 	cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
1555 	cmp_p = LOG_COMPARE(&LSN(pagep), &argp->page_lsn);
1556 	CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->page_lsn);
1557 	CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp);
1558 
1559 	if (DB_REDO(op) && cmp_p == 0) {
1560 		REC_DIRTY(mpf, txnhead, dbc->priority, &pagep);
1561 		if (argp->prev_pgno == PGNO_BASE_MD)
1562 			((DBMETA *)pagep)->free = argp->next_free;
1563 		else
1564 			NEXT_PGNO(pagep) = argp->next_free;
1565 		LSN(pagep) = *lsnp;
1566 	} else if (DB_UNDO(op) && cmp_n == 0) {
1567 		REC_DIRTY(mpf, txnhead, dbc->priority, &pagep);
1568 		if (argp->prev_pgno == PGNO_BASE_MD)
1569 			((DBMETA *)pagep)->free = pglist->pgno;
1570 		else
1571 			NEXT_PGNO(pagep) = pglist->pgno;
1572 		LSN(pagep) = argp->page_lsn;
1573 	}
1574 	if ((ret = __memp_fput(mpf, ip, pagep, file_dbp->priority)) != 0)
1575 		goto out;
1576 
1577 #ifdef HAVE_FTRUNCATE
1578 	if (op == DB_TXN_ABORT) {
1579 		/* Put the pages back in the sorted list. */
1580 		if ((ret = __memp_get_freelist(mpf, &felem, &list)) != 0)
1581 			goto out;
1582 		if (list != NULL) {
1583 			__db_freelist_pos(pglist->pgno, list, felem, &pos);
1584 			if (pglist->pgno == list[pos])
1585 				goto done;
1586 			if ((ret = __memp_extend_freelist(
1587 			    mpf, felem + nelem, &list)) != 0)
1588 				goto out;
1589 			pos++;
1590 			if (pos != felem)
1591 				memmove(&list[pos+nelem],
1592 				    &list[pos], nelem * sizeof(*list));
1593 			for (lp = pglist; lp < &pglist[nelem]; lp++)
1594 				list[pos++] = lp->pgno;
1595 		}
1596 	}
1597 #endif
1598 
1599 done:	*lsnp = argp->prev_lsn;
1600 	ret = 0;
1601 
1602 out:	REC_CLOSE;
1603 }
1604 /*
1605  * __db_pg_sort_44_recover --
1606  *	Recovery function for pg_sort.
1607  * This is deprecated and kept for replication upgrades.
1608  *
1609  * PUBLIC: int __db_pg_sort_44_recover
1610  * PUBLIC:   __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
1611  */
1612 int
__db_pg_sort_44_recover(env,dbtp,lsnp,op,info)1613 __db_pg_sort_44_recover(env, dbtp, lsnp, op, info)
1614 	ENV *env;
1615 	DBT *dbtp;
1616 	DB_LSN *lsnp;
1617 	db_recops op;
1618 	void *info;
1619 {
1620 #ifdef HAVE_FTRUNCATE
1621 	__db_pg_sort_44_args *argp;
1622 	DB *file_dbp;
1623 	DBC *dbc;
1624 	DBMETA *meta;
1625 	DB_MPOOLFILE *mpf;
1626 	DB_THREAD_INFO *ip;
1627 	DB_TXN *txn;
1628 	DB_TXNHEAD *txnhead;
1629 	PAGE *pagep;
1630 	db_pglist_t *pglist, *lp;
1631 	db_pgno_t pgno, *list;
1632 	u_int32_t felem, nelem;
1633 	int ret;
1634 
1635 	txnhead = info;
1636 	ip = txnhead->thread_info;
1637 	txn = txnhead->txn;
1638 	REC_PRINT(__db_pg_sort_44_print);
1639 	REC_INTRO(__db_pg_sort_44_read, txnhead, 1);
1640 
1641 	pglist = (db_pglist_t *) argp->list.data;
1642 	nelem = argp->list.size / sizeof(db_pglist_t);
1643 	if (DB_REDO(op)) {
1644 		pgno = argp->last_pgno;
1645 		__db_freelist_sort(pglist, nelem);
1646 		if ((ret = __db_pg_truncate(dbc, txn,
1647 		    pglist, NULL, &nelem, PGNO_INVALID, &pgno, lsnp, 1)) != 0)
1648 			goto out;
1649 
1650 		if (argp->last_free != PGNO_INVALID) {
1651 			if ((ret = __memp_fget(mpf,
1652 			    &argp->last_free, ip, txn, 0, &meta)) == 0) {
1653 				if (LOG_COMPARE(&LSN(meta),
1654 				     &argp->last_lsn) == 0) {
1655 					REC_DIRTY(mpf,
1656 					    txnhead, dbc->priority, &meta);
1657 					NEXT_PGNO(meta) = PGNO_INVALID;
1658 					LSN(meta) = *lsnp;
1659 				}
1660 				if ((ret = __memp_fput(mpf, ip,
1661 				    meta, file_dbp->priority)) != 0)
1662 					goto out;
1663 				meta = NULL;
1664 			} else if (ret != DB_PAGE_NOTFOUND)
1665 				goto out;
1666 		}
1667 		if ((ret = __memp_fget(mpf, &argp->meta, ip, txn,
1668 		    0, &meta)) != 0)
1669 			goto out;
1670 		if (LOG_COMPARE(&LSN(meta), &argp->meta_lsn) == 0) {
1671 			REC_DIRTY(mpf, txnhead, dbc->priority, &meta);
1672 			if (argp->last_free == PGNO_INVALID) {
1673 				if (nelem == 0)
1674 					meta->free = PGNO_INVALID;
1675 				else
1676 					meta->free = pglist->pgno;
1677 			}
1678 			meta->last_pgno = pgno;
1679 			LSN(meta) = *lsnp;
1680 		}
1681 	} else {
1682 		/* Put the free list back in its original order. */
1683 		for (lp = pglist; lp < &pglist[nelem]; lp++) {
1684 			if ((ret = __memp_fget(mpf, &lp->pgno, ip,
1685 			    txn, DB_MPOOL_CREATE, &pagep)) != 0)
1686 				goto out;
1687 			if (IS_ZERO_LSN(LSN(pagep)) ||
1688 			     LOG_COMPARE(&LSN(pagep), lsnp) == 0) {
1689 				REC_DIRTY(mpf, txnhead, dbc->priority, &pagep);
1690 				if (lp == &pglist[nelem - 1])
1691 					pgno = PGNO_INVALID;
1692 				else
1693 					pgno = lp[1].pgno;
1694 
1695 				P_INIT(pagep, file_dbp->pgsize,
1696 				    lp->pgno, PGNO_INVALID, pgno, 0, P_INVALID);
1697 				LSN(pagep) = lp->lsn;
1698 			}
1699 			if ((ret = __memp_fput(mpf,
1700 			    ip, pagep, file_dbp->priority)) != 0)
1701 				goto out;
1702 		}
1703 		if (argp->last_free != PGNO_INVALID) {
1704 			if ((ret = __memp_fget(mpf, &argp->last_free,
1705 			    ip, txn, DB_MPOOL_EDIT, &meta)) == 0) {
1706 				if (LOG_COMPARE(&LSN(meta), lsnp) == 0) {
1707 					NEXT_PGNO(meta) = pglist->pgno;
1708 					LSN(meta) = argp->last_lsn;
1709 				}
1710 				if ((ret = __memp_fput(mpf, ip,
1711 				    meta, file_dbp->priority)) != 0)
1712 					goto out;
1713 			} else if (ret != DB_PAGE_NOTFOUND)
1714 				goto out;
1715 			meta = NULL;
1716 		}
1717 		if ((ret = __memp_fget(mpf, &argp->meta,
1718 		    ip, txn, DB_MPOOL_EDIT, &meta)) != 0)
1719 			goto out;
1720 		if (LOG_COMPARE(&LSN(meta), lsnp) == 0) {
1721 			REC_DIRTY(mpf, txnhead, dbc->priority, &meta);
1722 			meta->last_pgno = argp->last_pgno;
1723 			if (argp->last_free == PGNO_INVALID)
1724 				meta->free = pglist->pgno;
1725 			LSN(meta) = argp->meta_lsn;
1726 		}
1727 	}
1728 	if (op == DB_TXN_ABORT) {
1729 		if ((ret = __memp_get_freelist(mpf, &felem, &list)) != 0)
1730 			goto out;
1731 		if (list != NULL) {
1732 			DB_ASSERT(env, felem == 0 ||
1733 			    argp->last_free == list[felem - 1]);
1734 			if ((ret = __memp_extend_freelist(
1735 			    mpf, felem + nelem, &list)) != 0)
1736 				goto out;
1737 			for (lp = pglist; lp < &pglist[nelem]; lp++)
1738 				list[felem++] = lp->pgno;
1739 		}
1740 	}
1741 
1742 	if ((ret = __memp_fput(mpf, ip, meta, file_dbp->priority)) != 0)
1743 		goto out;
1744 
1745 done:	*lsnp = argp->prev_lsn;
1746 	ret = 0;
1747 
1748 out:	REC_CLOSE;
1749 #else
1750 	/*
1751 	 * If HAVE_FTRUNCATE is not defined, we'll never see pg_sort records
1752 	 * to recover.
1753 	 */
1754 	COMPQUIET(env, NULL);
1755 	COMPQUIET(dbtp, NULL);
1756 	COMPQUIET(lsnp, NULL);
1757 	COMPQUIET(op,  DB_TXN_ABORT);
1758 	COMPQUIET(info, NULL);
1759 	return (EINVAL);
1760 #endif
1761 }
1762 
1763 /*
1764  * __db_relink_recover --
1765  *	Recovery function for relink.
1766  *
1767  * PUBLIC: int __db_relink_recover
1768  * PUBLIC:   __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
1769  */
1770 int
__db_relink_recover(env,dbtp,lsnp,op,info)1771 __db_relink_recover(env, dbtp, lsnp, op, info)
1772 	ENV *env;
1773 	DBT *dbtp;
1774 	DB_LSN *lsnp;
1775 	db_recops op;
1776 	void *info;
1777 {
1778 	__db_relink_args *argp;
1779 	DB *file_dbp;
1780 	DBC *dbc;
1781 	DB_MPOOLFILE *mpf;
1782 	DB_THREAD_INFO *ip;
1783 	DB_TXN *txn;
1784 	DB_TXNHEAD *txnhead;
1785 	PAGE *pagep;
1786 	int cmp_n, cmp_p, ret;
1787 
1788 	txnhead = info;
1789 	ip = txnhead->thread_info;
1790 	txn = txnhead->txn;
1791 	pagep = NULL;
1792 	REC_PRINT(__db_relink_print);
1793 	REC_INTRO(__db_relink_read, txnhead, 0);
1794 
1795 	/*
1796 	 * There are up to three pages we need to check -- the page, and the
1797 	 * previous and next pages, if they existed.  For a page add operation,
1798 	 * the current page is the result of a split and is being recovered
1799 	 * elsewhere, so all we need do is recover the next page.
1800 	 */
1801 	if (argp->next_pgno == PGNO_INVALID)
1802 		goto prev;
1803 	if ((ret = __memp_fget(mpf,
1804 	    &argp->next_pgno, ip, txn, 0, &pagep)) != 0) {
1805 		if (ret != DB_PAGE_NOTFOUND) {
1806 			ret = __db_pgerr(file_dbp, argp->next_pgno, ret);
1807 			goto out;
1808 		} else
1809 			goto prev;
1810 	}
1811 
1812 	cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
1813 	cmp_p = LOG_COMPARE(&LSN(pagep), &argp->lsn_next);
1814 	CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->lsn_next);
1815 	CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp);
1816 	if (cmp_p == 0 && DB_REDO(op)) {
1817 		/* Redo the remove or replace. */
1818 		REC_DIRTY(mpf, txnhead, file_dbp->priority, &pagep);
1819 		if (argp->new_pgno == PGNO_INVALID)
1820 			pagep->prev_pgno = argp->prev_pgno;
1821 		else
1822 			pagep->prev_pgno = argp->new_pgno;
1823 
1824 		pagep->lsn = *lsnp;
1825 	} else if (cmp_n == 0 && DB_UNDO(op)) {
1826 		/* Undo the remove or replace. */
1827 		REC_DIRTY(mpf, txnhead, file_dbp->priority, &pagep);
1828 		pagep->prev_pgno = argp->pgno;
1829 
1830 		pagep->lsn = argp->lsn_next;
1831 	}
1832 
1833 	if ((ret = __memp_fput(mpf, ip, pagep, file_dbp->priority)) != 0)
1834 		goto out;
1835 	pagep = NULL;
1836 
1837 prev:	if (argp->prev_pgno == PGNO_INVALID)
1838 		goto done;
1839 	if ((ret = __memp_fget(mpf,
1840 	    &argp->prev_pgno, ip, txn, 0, &pagep)) != 0) {
1841 		if (ret != DB_PAGE_NOTFOUND) {
1842 			ret = __db_pgerr(file_dbp, argp->prev_pgno, ret);
1843 			goto out;
1844 		} else
1845 			goto done;
1846 	}
1847 
1848 	cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
1849 	cmp_p = LOG_COMPARE(&LSN(pagep), &argp->lsn_prev);
1850 	CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->lsn_prev);
1851 	CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp);
1852 	if (cmp_p == 0 && DB_REDO(op)) {
1853 		/* Redo the relink. */
1854 		REC_DIRTY(mpf, txnhead, file_dbp->priority, &pagep);
1855 		if (argp->new_pgno == PGNO_INVALID)
1856 			pagep->next_pgno = argp->next_pgno;
1857 		else
1858 			pagep->next_pgno = argp->new_pgno;
1859 
1860 		pagep->lsn = *lsnp;
1861 	} else if (cmp_n == 0 && DB_UNDO(op)) {
1862 		/* Undo the relink. */
1863 		REC_DIRTY(mpf, txnhead, file_dbp->priority, &pagep);
1864 		pagep->next_pgno = argp->pgno;
1865 		pagep->lsn = argp->lsn_prev;
1866 	}
1867 
1868 	if ((ret = __memp_fput(mpf,
1869 	     ip, pagep, file_dbp->priority)) != 0)
1870 		goto out;
1871 	pagep = NULL;
1872 
1873 done:	*lsnp = argp->prev_lsn;
1874 	ret = 0;
1875 
1876 out:	if (pagep != NULL)
1877 		(void)__memp_fput(mpf, ip, pagep, file_dbp->priority);
1878 	REC_CLOSE;
1879 }
1880 
1881 /*
1882  * __db_merge_recover --
1883  *	Recovery function for merge.
1884  *
1885  * PUBLIC: int __db_merge_recover
1886  * PUBLIC:   __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
1887  */
1888 int
__db_merge_recover(env,dbtp,lsnp,op,info)1889 __db_merge_recover(env, dbtp, lsnp, op, info)
1890 	ENV *env;
1891 	DBT *dbtp;
1892 	DB_LSN *lsnp;
1893 	db_recops op;
1894 	void *info;
1895 {
1896 	__db_merge_args *argp;
1897 	BTREE *bt;
1898 	BKEYDATA *bk;
1899 	DB *file_dbp;
1900 	DBC *dbc;
1901 	DB_LOCK handle_lock;
1902 	DB_LOCKREQ request;
1903 	DB_MPOOLFILE *mpf;
1904 	DB_THREAD_INFO *ip;
1905 	DB_TXN *txn;
1906 	DB_TXNHEAD *txnhead;
1907 	HASH *ht;
1908 	PAGE *pagep;
1909 	db_indx_t indx, *ninp, *pinp;
1910 	u_int32_t size;
1911 	u_int8_t *bp;
1912 	int cmp_n, cmp_p, i, ret, t_ret;
1913 
1914 	txnhead = info;
1915 	ip = txnhead->thread_info;
1916 	txn = txnhead->txn;
1917 	REC_PRINT(__db_merge_print);
1918 	REC_INTRO(__db_merge_read, txnhead, op != DB_TXN_APPLY);
1919 
1920 	/* Allocate our own cursor without DB_RECOVER as we need a locker. */
1921 	if (op == DB_TXN_APPLY && (ret = __db_cursor_int(file_dbp, ip, NULL,
1922 	    DB_QUEUE, PGNO_INVALID, 0, NULL, &dbc)) != 0)
1923 		goto out;
1924 	F_SET(dbc, DBC_RECOVER);
1925 
1926 	/* XXX Use REG_FGET() here? */
1927 	if ((ret = __memp_fget(mpf, &argp->pgno, ip, txn, 0, &pagep)) != 0) {
1928 		if (ret != DB_PAGE_NOTFOUND) {
1929 			ret = __db_pgerr(file_dbp, argp->pgno, ret);
1930 			goto out;
1931 		} else
1932 			goto next;
1933 	}
1934 
1935 	cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
1936 	cmp_p = LOG_COMPARE(&LSN(pagep), &argp->lsn);
1937 	CHECK_LSN(file_dbp->env, op, cmp_p, &LSN(pagep), &argp->lsn);
1938 	CHECK_ABORT(file_dbp->env, op, cmp_n, &LSN(pagep), lsnp);
1939 
1940 	if (cmp_p == 0 && DB_REDO(op)) {
1941 		/*
1942 		 * When pg_copy is set, we are copying onto a new page.
1943 		 */
1944 		DB_ASSERT(env, !argp->pg_copy || NUM_ENT(pagep) == 0);
1945 		REC_DIRTY(mpf, txnhead, dbc->priority, &pagep);
1946 		if (argp->pg_copy) {
1947 			if (argp->data.size == 0) {
1948 				memcpy(pagep, argp->hdr.data, argp->hdr.size);
1949 				pagep->pgno = argp->pgno;
1950 				goto do_lsn;
1951 			}
1952 			P_INIT(pagep, file_dbp->pgsize, pagep->pgno,
1953 			     PREV_PGNO(argp->hdr.data),
1954 			     NEXT_PGNO(argp->hdr.data),
1955 			     LEVEL(argp->hdr.data), TYPE(argp->hdr.data));
1956 		}
1957 		if (TYPE(pagep) == P_OVERFLOW) {
1958 			OV_REF(pagep) = OV_REF(argp->hdr.data);
1959 			OV_LEN(pagep) = OV_LEN(argp->hdr.data);
1960 			bp = (u_int8_t *)pagep + P_OVERHEAD(file_dbp);
1961 			memcpy(bp, argp->data.data, argp->data.size);
1962 		} else {
1963 			/* Copy the data segment. */
1964 			bp = (u_int8_t *)pagep +
1965 			     (db_indx_t)(HOFFSET(pagep) - argp->data.size);
1966 			memcpy(bp, argp->data.data, argp->data.size);
1967 
1968 			/* Copy index table offset past the current entries. */
1969 			pinp = P_INP(file_dbp, pagep) + NUM_ENT(pagep);
1970 			ninp = P_INP(file_dbp, argp->hdr.data);
1971 			for (i = 0; i < NUM_ENT(argp->hdr.data); i++)
1972 				*pinp++ = *ninp++
1973 				      - (file_dbp->pgsize - HOFFSET(pagep));
1974 			HOFFSET(pagep) -= argp->data.size;
1975 			NUM_ENT(pagep) += i;
1976 		}
1977 do_lsn:		pagep->lsn = *lsnp;
1978 		if (op == DB_TXN_APPLY) {
1979 			/*
1980 			 * If applying to an active system we must bump
1981 			 * the revision number so that the db will get
1982 			 * reopened.  We also need to move the handle
1983 			 * locks.  Note that the dbp will not have a
1984 			 * locker in a replication client apply thread.
1985 			 */
1986 			if (file_dbp->type == DB_HASH) {
1987 				if (argp->npgno == file_dbp->meta_pgno)
1988 					file_dbp->mpf->mfp->revision++;
1989 			} else {
1990 				bt = file_dbp->bt_internal;
1991 				if (argp->npgno == bt->bt_meta ||
1992 				    argp->npgno == bt->bt_root)
1993 					file_dbp->mpf->mfp->revision++;
1994 			}
1995 			if (argp->npgno == file_dbp->meta_pgno) {
1996 				F_CLR(file_dbp, DB_AM_RECOVER);
1997 				if ((ret = __fop_lock_handle(file_dbp->env,
1998 				    file_dbp, dbc->locker, DB_LOCK_READ,
1999 				    NULL, 0)) != 0)
2000 					goto err;
2001 				handle_lock = file_dbp->handle_lock;
2002 
2003 				file_dbp->meta_pgno = argp->pgno;
2004 				if ((ret = __fop_lock_handle(file_dbp->env,
2005 				    file_dbp, dbc->locker, DB_LOCK_READ,
2006 				    NULL, 0)) != 0)
2007 					goto err;
2008 
2009 				/* Move the other handles to the new lock. */
2010 				ret = __lock_change(file_dbp->env,
2011 				    &handle_lock, &file_dbp->handle_lock);
2012 
2013 err:				memset(&request, 0, sizeof(request));
2014 				request.op = DB_LOCK_PUT_ALL;
2015 				if ((t_ret = __lock_vec(
2016 				    file_dbp->env, dbc->locker,
2017 				    0, &request, 1, NULL)) != 0 && ret == 0)
2018 					ret = t_ret;
2019 				F_SET(file_dbp, DB_AM_RECOVER);
2020 				if (ret != 0)
2021 					goto out;
2022 			}
2023 		}
2024 
2025 	} else if (cmp_n == 0 && !DB_REDO(op)) {
2026 		REC_DIRTY(mpf, txnhead, dbc->priority, &pagep);
2027 		if (TYPE(pagep) == P_OVERFLOW) {
2028 			HOFFSET(pagep) = file_dbp->pgsize;
2029 			goto setlsn;
2030 		}
2031 
2032 		if (argp->pg_copy) {
2033 			/* The page was empty when we started. */
2034 			P_INIT(pagep, file_dbp->pgsize,
2035 			    pagep->pgno, PGNO_INVALID,
2036 			    PGNO_INVALID, 0, TYPE(argp->hdr.data));
2037 			goto setlsn;
2038 		}
2039 
2040 		/*
2041 		 * Since logging is logical at the page level we cannot just
2042 		 * truncate the data space.  Delete the proper number of items
2043 		 * from the logical end of the page.
2044 		 */
2045 		for (i = 0; i < NUM_ENT(argp->hdr.data); i++) {
2046 			indx = NUM_ENT(pagep) - 1;
2047 			if (TYPE(pagep) == P_LBTREE && indx != 0 &&
2048 			     P_INP(file_dbp, pagep)[indx] ==
2049 			     P_INP(file_dbp, pagep)[indx - P_INDX]) {
2050 				NUM_ENT(pagep)--;
2051 				continue;
2052 			}
2053 			switch (TYPE(pagep)) {
2054 			case P_LBTREE:
2055 			case P_LRECNO:
2056 			case P_LDUP:
2057 				bk = GET_BKEYDATA(file_dbp, pagep, indx);
2058 				size = BITEM_SIZE(bk);
2059 				break;
2060 
2061 			case P_IBTREE:
2062 				size = BINTERNAL_SIZE(
2063 				     GET_BINTERNAL(file_dbp, pagep, indx)->len);
2064 				break;
2065 			case P_IRECNO:
2066 				size = RINTERNAL_SIZE;
2067 				break;
2068 			case P_HASH:
2069 				size = LEN_HITEM(file_dbp,
2070 				    pagep, file_dbp->pgsize, indx);
2071 				break;
2072 			default:
2073 				ret = __db_pgfmt(env, PGNO(pagep));
2074 				goto out;
2075 			}
2076 			if ((ret = __db_ditem(dbc, pagep, indx, size)) != 0)
2077 				goto out;
2078 		}
2079 setlsn:		pagep->lsn = argp->lsn;
2080 	}
2081 
2082 	if ((ret = __memp_fput(mpf, ip, pagep, dbc->priority)) != 0)
2083 		goto out;
2084 
2085 next:	if ((ret = __memp_fget(mpf, &argp->npgno, ip, txn, 0, &pagep)) != 0) {
2086 		if (ret != DB_PAGE_NOTFOUND) {
2087 			ret = __db_pgerr(file_dbp, argp->pgno, ret);
2088 			goto out;
2089 		} else
2090 			goto done;
2091 	}
2092 
2093 	cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
2094 	cmp_p = LOG_COMPARE(&LSN(pagep), &argp->nlsn);
2095 	CHECK_LSN(file_dbp->env, op, cmp_p, &LSN(pagep), &argp->nlsn);
2096 
2097 	if (cmp_p == 0 && DB_REDO(op)) {
2098 		/* Need to truncate the page. */
2099 		REC_DIRTY(mpf, txnhead, dbc->priority, &pagep);
2100 		HOFFSET(pagep) = file_dbp->pgsize;
2101 		NUM_ENT(pagep) = 0;
2102 		pagep->lsn = *lsnp;
2103 	} else if (cmp_n == 0 && !DB_REDO(op)) {
2104 		/* Need to put the data back on the page. */
2105 		REC_DIRTY(mpf, txnhead, dbc->priority, &pagep);
2106 		if (TYPE(pagep) == P_OVERFLOW) {
2107 			OV_REF(pagep) = OV_REF(argp->hdr.data);
2108 			OV_LEN(pagep) = OV_LEN(argp->hdr.data);
2109 			bp = (u_int8_t *)pagep + P_OVERHEAD(file_dbp);
2110 			memcpy(bp, argp->data.data, argp->data.size);
2111 		} else {
2112 			bp = (u_int8_t *)pagep +
2113 			     (db_indx_t)(HOFFSET(pagep) - argp->data.size);
2114 			memcpy(bp, argp->data.data, argp->data.size);
2115 
2116 			if (argp->pg_copy)
2117 				memcpy(pagep, argp->hdr.data, argp->hdr.size);
2118 			else {
2119 				/* Copy index table. */
2120 				pinp = P_INP(file_dbp, pagep) + NUM_ENT(pagep);
2121 				ninp = P_INP(file_dbp, argp->hdr.data);
2122 				for (i = 0; i < NUM_ENT(argp->hdr.data); i++)
2123 					*pinp++ = *ninp++;
2124 				HOFFSET(pagep) -= argp->data.size;
2125 				NUM_ENT(pagep) += i;
2126 			}
2127 		}
2128 		pagep->lsn = argp->nlsn;
2129 		if (op == DB_TXN_ABORT) {
2130 			/*
2131 			 * If we are undoing a meta/root page move we must
2132 			 * bump the revision number. Put the handle
2133 			 * locks back to their original state if we
2134 			 * moved the metadata page.
2135 			 */
2136 			i = 0;
2137 			if (file_dbp->type == DB_HASH) {
2138 				ht = file_dbp->h_internal;
2139 				if (argp->pgno == ht->meta_pgno) {
2140 					ht->meta_pgno = argp->npgno;
2141 					file_dbp->mpf->mfp->revision++;
2142 					i = 1;
2143 				}
2144 			} else {
2145 				bt = file_dbp->bt_internal;
2146 				if (argp->pgno == bt->bt_meta) {
2147 					file_dbp->mpf->mfp->revision++;
2148 					bt->bt_meta = argp->npgno;
2149 					i = 1;
2150 				} else if (argp->pgno == bt->bt_root) {
2151 					file_dbp->mpf->mfp->revision++;
2152 					bt->bt_root = argp->npgno;
2153 				}
2154 			}
2155 			if (argp->pgno == file_dbp->meta_pgno)
2156 				file_dbp->meta_pgno = argp->npgno;
2157 
2158 			/*
2159 			 * If we detected a metadata page above, move
2160 			 * the handle locks to the new page.
2161 			 */
2162 			if (i == 1) {
2163 				handle_lock = file_dbp->handle_lock;
2164 				if ((ret = __fop_lock_handle(file_dbp->env,
2165 				    file_dbp, file_dbp->locker, DB_LOCK_READ,
2166 				    NULL, 0)) != 0)
2167 					goto out;
2168 
2169 				/* Move the other handles to the new lock. */
2170 				if ((ret = __lock_change(file_dbp->env,
2171 				    &handle_lock, &file_dbp->handle_lock)) != 0)
2172 					goto out;
2173 			}
2174 		}
2175 	}
2176 
2177 	if ((ret = __memp_fput(mpf,
2178 	     ip, pagep, dbc->priority)) != 0)
2179 		goto out;
2180 done:
2181 	*lsnp = argp->prev_lsn;
2182 	ret = 0;
2183 
2184 out:	REC_CLOSE;
2185 }
2186 
2187 /*
2188  * __db_pgno_recover --
2189  *	Recovery function for page number replacement.
2190  *
2191  * PUBLIC: int __db_pgno_recover
2192  * PUBLIC:   __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
2193  */
2194 int
__db_pgno_recover(env,dbtp,lsnp,op,info)2195 __db_pgno_recover(env, dbtp, lsnp, op, info)
2196 	ENV *env;
2197 	DBT *dbtp;
2198 	DB_LSN *lsnp;
2199 	db_recops op;
2200 	void *info;
2201 {
2202 	BINTERNAL *bi;
2203 	__db_pgno_args *argp;
2204 	DB_THREAD_INFO *ip;
2205 	DB *file_dbp;
2206 	DBC *dbc;
2207 	DB_MPOOLFILE *mpf;
2208 	DB_TXNHEAD *txnhead;
2209 	PAGE *pagep, *npagep;
2210 	db_pgno_t pgno, *pgnop;
2211 	int cmp_n, cmp_p, ret;
2212 
2213 	txnhead = info;
2214 	ip = txnhead->thread_info;
2215 	REC_PRINT(__db_pgno_print);
2216 	REC_INTRO(__db_pgno_read, txnhead, 0);
2217 
2218 	REC_FGET(mpf, txnhead, argp->pgno, &pagep, done);
2219 
2220 	cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
2221 	cmp_p = LOG_COMPARE(&LSN(pagep), &argp->lsn);
2222 	CHECK_LSN(file_dbp->env, op, cmp_p, &LSN(pagep), &argp->lsn);
2223 	CHECK_ABORT(file_dbp->env, op, cmp_n, &LSN(pagep), lsnp);
2224 
2225 	if ((cmp_p == 0 && DB_REDO(op)) || (cmp_n == 0 && !DB_REDO(op))) {
2226 		switch (TYPE(pagep)) {
2227 		case P_IBTREE:
2228 			/*
2229 			 * An internal record can have both a overflow
2230 			 * and child pointer.  Fetch the page to see
2231 			 * which it is.
2232 			 */
2233 			bi = GET_BINTERNAL(file_dbp, pagep, argp->indx);
2234 			if (B_TYPE(bi->type) == B_OVERFLOW) {
2235 				REC_FGET(mpf, txnhead,
2236 				    argp->npgno, &npagep, out);
2237 
2238 				if (TYPE(npagep) == P_OVERFLOW)
2239 					pgnop =
2240 					     &((BOVERFLOW *)(bi->data))->pgno;
2241 				else
2242 					pgnop = &bi->pgno;
2243 				if ((ret = __memp_fput(mpf, ip,
2244 				    npagep, file_dbp->priority)) != 0)
2245 					goto out;
2246 				break;
2247 			}
2248 			pgnop = &bi->pgno;
2249 			break;
2250 		case P_IRECNO:
2251 			pgnop =
2252 			     &GET_RINTERNAL(file_dbp, pagep, argp->indx)->pgno;
2253 			break;
2254 		case P_HASH:
2255 			pgnop = &pgno;
2256 			break;
2257 		default:
2258 			pgnop =
2259 			     &GET_BOVERFLOW(file_dbp, pagep, argp->indx)->pgno;
2260 			break;
2261 		}
2262 
2263 		if (DB_REDO(op)) {
2264 			/* Need to redo update described. */
2265 			REC_DIRTY(mpf, txnhead, file_dbp->priority, &pagep);
2266 			*pgnop = argp->npgno;
2267 			pagep->lsn = *lsnp;
2268 		} else {
2269 			REC_DIRTY(mpf, txnhead, file_dbp->priority, &pagep);
2270 			*pgnop = argp->opgno;
2271 			pagep->lsn = argp->lsn;
2272 		}
2273 		if (TYPE(pagep) == P_HASH)
2274 			memcpy(HOFFDUP_PGNO(P_ENTRY(file_dbp,
2275 			    pagep, argp->indx)), pgnop, sizeof(db_pgno_t));
2276 	}
2277 
2278 	if ((ret = __memp_fput(mpf, ip, pagep, file_dbp->priority)) != 0)
2279 		goto out;
2280 
2281 done:
2282 	*lsnp = argp->prev_lsn;
2283 	ret = 0;
2284 
2285 out:	REC_CLOSE;
2286 }
2287 
2288 /*
2289  * __db_pglist_swap -- swap a list of freelist pages.
2290  * PUBLIC: void __db_pglist_swap __P((u_int32_t, void *));
2291  */
2292 void
__db_pglist_swap(size,list)2293 __db_pglist_swap(size, list)
2294 	u_int32_t size;
2295 	void *list;
2296 {
2297 	db_pglist_t *lp;
2298 	u_int32_t nelem;
2299 
2300 	nelem = size / sizeof(db_pglist_t);
2301 
2302 	lp = (db_pglist_t *)list;
2303 	while (nelem-- > 0) {
2304 		P_32_SWAP(&lp->pgno);
2305 		P_32_SWAP(&lp->lsn.file);
2306 		P_32_SWAP(&lp->lsn.offset);
2307 		lp++;
2308 	}
2309 }
2310 
2311 /*
2312  * __db_pglist_print -- print a list of freelist pages.
2313  * PUBLIC: void __db_pglist_print __P((ENV *, DB_MSGBUF *, DBT *));
2314  */
2315 void
__db_pglist_print(env,mbp,list)2316 __db_pglist_print(env, mbp, list)
2317 	ENV *env;
2318 	DB_MSGBUF *mbp;
2319 	DBT *list;
2320 {
2321 	db_pglist_t *lp;
2322 	u_int32_t nelem;
2323 
2324 	nelem = list->size / sizeof(db_pglist_t);
2325 	lp = (db_pglist_t *)list->data;
2326 	__db_msgadd(env, mbp, "\t");
2327 	while (nelem-- > 0) {
2328 		__db_msgadd(env, mbp, "%lu [%lu][%lu]", (u_long)lp->pgno,
2329 		    (u_long)lp->lsn.file, (u_long)lp->lsn.offset);
2330 		if (nelem % 4 == 0)
2331 			__db_msgadd(env, mbp, "\n\t");
2332 		else
2333 			__db_msgadd(env, mbp, " ");
2334 		lp++;
2335 	}
2336 }
2337