1 /*-
2  * See the file LICENSE for redistribution information.
3  *
4  * Copyright (c) 1996, 2013 Oracle and/or its affiliates.  All rights reserved.
5  *
6  * $Id$
7  */
8 
9 #include "db_config.h"
10 
11 #include "db_int.h"
12 #include "dbinc/db_page.h"
13 #include "dbinc/log.h"
14 #include "dbinc/mp.h"
15 #include "dbinc/lock.h"
16 #include "dbinc/fop.h"
17 #include "dbinc/btree.h"
18 #include "dbinc/hash.h"
19 
20 static int __db_pg_free_recover_int __P((ENV *, DB_THREAD_INFO *,
21     __db_pg_freedata_args *, DB *, DB_LSN *, DB_MPOOLFILE *, db_recops, int));
22 static int __db_pg_free_recover_42_int __P((ENV *, DB_THREAD_INFO *,
23     __db_pg_freedata_42_args *,
24     DB *, DB_LSN *, DB_MPOOLFILE *, db_recops, int));
25 
26 /*
27  * PUBLIC: int __db_addrem_recover
28  * PUBLIC:    __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
29  *
30  * This log message is generated whenever we add or remove a duplicate
31  * to/from a duplicate page.  On recover, we just do the opposite.
32  */
33 int
__db_addrem_recover(env,dbtp,lsnp,op,info)34 __db_addrem_recover(env, dbtp, lsnp, op, info)
35 	ENV *env;
36 	DBT *dbtp;
37 	DB_LSN *lsnp;
38 	db_recops op;
39 	void *info;
40 {
41 	__db_addrem_args *argp;
42 	DB_THREAD_INFO *ip;
43 	DB *file_dbp;
44 	DBC *dbc;
45 	DB_MPOOLFILE *mpf;
46 	PAGE *pagep;
47 	int cmp_n, cmp_p, modified, ret;
48 	u_int32_t opcode;
49 
50 	ip = ((DB_TXNHEAD *)info)->thread_info;
51 	pagep = NULL;
52 	REC_PRINT(__db_addrem_print);
53 	REC_INTRO(__db_addrem_read, ip, 1);
54 
55 	REC_FGET(mpf, ip, argp->pgno, &pagep, done);
56 	modified = 0;
57 
58 	opcode = OP_MODE_GET(argp->opcode);
59 	cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
60 	cmp_p = LOG_COMPARE(&LSN(pagep), &argp->pagelsn);
61 	CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->pagelsn);
62 	CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp);
63 	if ((cmp_p == 0 && DB_REDO(op) && opcode == DB_ADD_DUP) ||
64 	    (cmp_n == 0 && DB_UNDO(op) && opcode == DB_REM_DUP)) {
65 		/* Need to redo an add, or undo a delete. */
66 		REC_DIRTY(mpf, ip, dbc->priority, &pagep);
67 		if ((ret = __db_pitem(dbc, pagep, argp->indx, argp->nbytes,
68 		    argp->hdr.size == 0 ? NULL : &argp->hdr,
69 		    argp->dbt.size == 0 ? NULL : &argp->dbt)) != 0)
70 			goto out;
71 		modified = 1;
72 
73 	} else if ((cmp_n == 0 && DB_UNDO(op) && opcode == DB_ADD_DUP) ||
74 	    (cmp_p == 0 && DB_REDO(op) && opcode == DB_REM_DUP)) {
75 		/* Need to undo an add, or redo a delete. */
76 		REC_DIRTY(mpf, ip, dbc->priority, &pagep);
77 		if ((ret = __db_ditem(dbc,
78 		    pagep, argp->indx, argp->nbytes)) != 0)
79 			goto out;
80 		modified = 1;
81 	}
82 
83 	if (modified) {
84 		if (DB_REDO(op))
85 			LSN(pagep) = *lsnp;
86 		else
87 			LSN(pagep) = argp->pagelsn;
88 	}
89 
90 	if ((ret = __memp_fput(mpf, ip, pagep, dbc->priority)) != 0)
91 		goto out;
92 	pagep = NULL;
93 
94 done:	*lsnp = argp->prev_lsn;
95 	ret = 0;
96 
97 out:	if (pagep != NULL)
98 		(void)__memp_fput(mpf, ip, pagep, dbc->priority);
99 	REC_CLOSE;
100 }
101 
102 /*
103  * PUBLIC: int __db_addrem_42_recover
104  * PUBLIC:    __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
105  *
106  * This log message is generated whenever we add or remove a duplicate
107  * to/from a duplicate page.  On recover, we just do the opposite.
108  */
109 int
__db_addrem_42_recover(env,dbtp,lsnp,op,info)110 __db_addrem_42_recover(env, dbtp, lsnp, op, info)
111 	ENV *env;
112 	DBT *dbtp;
113 	DB_LSN *lsnp;
114 	db_recops op;
115 	void *info;
116 {
117 	__db_addrem_42_args *argp;
118 	DB_THREAD_INFO *ip;
119 	DB *file_dbp;
120 	DBC *dbc;
121 	DB_MPOOLFILE *mpf;
122 	PAGE *pagep;
123 	int cmp_n, cmp_p, modified, ret;
124 
125 	ip = ((DB_TXNHEAD *)info)->thread_info;
126 	pagep = NULL;
127 	REC_PRINT(__db_addrem_print);
128 	REC_INTRO(__db_addrem_42_read, ip, 1);
129 
130 	REC_FGET(mpf, ip, argp->pgno, &pagep, done);
131 	modified = 0;
132 
133 	cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
134 	cmp_p = LOG_COMPARE(&LSN(pagep), &argp->pagelsn);
135 	CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->pagelsn);
136 	CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp);
137 	if ((cmp_p == 0 && DB_REDO(op) && argp->opcode == DB_ADD_DUP) ||
138 	    (cmp_n == 0 && DB_UNDO(op) && argp->opcode == DB_REM_DUP)) {
139 		/* Need to redo an add, or undo a delete. */
140 		REC_DIRTY(mpf, ip, dbc->priority, &pagep);
141 		if ((ret = __db_pitem(dbc, pagep, argp->indx, argp->nbytes,
142 		    argp->hdr.size == 0 ? NULL : &argp->hdr,
143 		    argp->dbt.size == 0 ? NULL : &argp->dbt)) != 0)
144 			goto out;
145 		modified = 1;
146 
147 	} else if ((cmp_n == 0 && DB_UNDO(op) && argp->opcode == DB_ADD_DUP) ||
148 	    (cmp_p == 0 && DB_REDO(op) && argp->opcode == DB_REM_DUP)) {
149 		/* Need to undo an add, or redo a delete. */
150 		REC_DIRTY(mpf, ip, dbc->priority, &pagep);
151 		if ((ret = __db_ditem(dbc,
152 		    pagep, argp->indx, argp->nbytes)) != 0)
153 			goto out;
154 		modified = 1;
155 	}
156 
157 	if (modified) {
158 		if (DB_REDO(op))
159 			LSN(pagep) = *lsnp;
160 		else
161 			LSN(pagep) = argp->pagelsn;
162 	}
163 
164 	if ((ret = __memp_fput(mpf, ip, pagep, dbc->priority)) != 0)
165 		goto out;
166 	pagep = NULL;
167 
168 done:	*lsnp = argp->prev_lsn;
169 	ret = 0;
170 
171 out:	if (pagep != NULL)
172 		(void)__memp_fput(mpf, ip, pagep, dbc->priority);
173 	REC_CLOSE;
174 }
175 
176 /*
177  * PUBLIC: int __db_big_recover
178  * PUBLIC:     __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
179  */
180 int
__db_big_recover(env,dbtp,lsnp,op,info)181 __db_big_recover(env, dbtp, lsnp, op, info)
182 	ENV *env;
183 	DBT *dbtp;
184 	DB_LSN *lsnp;
185 	db_recops op;
186 	void *info;
187 {
188 	__db_big_args *argp;
189 	DB_THREAD_INFO *ip;
190 	DB *file_dbp;
191 	DBC *dbc;
192 	DB_MPOOLFILE *mpf;
193 	PAGE *pagep;
194 	int cmp_n, cmp_p, modified, ret;
195 	u_int32_t opcode;
196 
197 	ip = ((DB_TXNHEAD *)info)->thread_info;
198 	pagep = NULL;
199 	REC_PRINT(__db_big_print);
200 	REC_INTRO(__db_big_read, ip, 0);
201 
202 	opcode = OP_MODE_GET(argp->opcode);
203 	REC_FGET(mpf, ip, argp->pgno, &pagep, ppage);
204 	modified = 0;
205 
206 	/*
207 	 * There are three pages we need to check.  The one on which we are
208 	 * adding data, the previous one whose next_pointer may have
209 	 * been updated, and the next one whose prev_pointer may have
210 	 * been updated.
211 	 */
212 	cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
213 	cmp_p = LOG_COMPARE(&LSN(pagep), &argp->pagelsn);
214 	CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->pagelsn);
215 	CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp);
216 	if ((cmp_p == 0 && DB_REDO(op) && opcode == DB_ADD_BIG) ||
217 	    (cmp_n == 0 && DB_UNDO(op) && opcode == DB_REM_BIG)) {
218 		/* We are either redo-ing an add, or undoing a delete. */
219 		REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
220 		P_INIT(pagep, file_dbp->pgsize, argp->pgno, argp->prev_pgno,
221 			argp->next_pgno, 0, P_OVERFLOW);
222 		OV_LEN(pagep) = argp->dbt.size;
223 		OV_REF(pagep) = 1;
224 		memcpy((u_int8_t *)pagep + P_OVERHEAD(file_dbp), argp->dbt.data,
225 		    argp->dbt.size);
226 		PREV_PGNO(pagep) = argp->prev_pgno;
227 		modified = 1;
228 	} else if ((cmp_n == 0 && DB_UNDO(op) && opcode == DB_ADD_BIG) ||
229 	    (cmp_p == 0 && DB_REDO(op) && opcode == DB_REM_BIG)) {
230 		/*
231 		 * We are either undo-ing an add or redo-ing a delete.
232 		 * The page is about to be reclaimed in either case, so
233 		 * there really isn't anything to do here.
234 		 */
235 		REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
236 		modified = 1;
237 	} else if (cmp_p == 0 && DB_REDO(op) && opcode == DB_APPEND_BIG) {
238 		/* We are redoing an append. */
239 		REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
240 		memcpy((u_int8_t *)pagep + P_OVERHEAD(file_dbp) +
241 		    OV_LEN(pagep), argp->dbt.data, argp->dbt.size);
242 		OV_LEN(pagep) += argp->dbt.size;
243 		modified = 1;
244 	} else if (cmp_n == 0 && DB_UNDO(op) && opcode == DB_APPEND_BIG) {
245 		/* We are undoing an append. */
246 		REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
247 		OV_LEN(pagep) -= argp->dbt.size;
248 		memset((u_int8_t *)pagep + P_OVERHEAD(file_dbp) +
249 		    OV_LEN(pagep), 0, argp->dbt.size);
250 		modified = 1;
251 	}
252 	if (modified)
253 		LSN(pagep) = DB_REDO(op) ? *lsnp : argp->pagelsn;
254 
255 	ret = __memp_fput(mpf, ip, pagep, file_dbp->priority);
256 	pagep = NULL;
257 	if (ret != 0)
258 		goto out;
259 
260 	/*
261 	 * We only delete a whole chain of overflow items, and appends only
262 	 * apply to a single page.  Adding a page is the only case that
263 	 * needs to update the chain.
264 	 */
265 ppage:	if (opcode != DB_ADD_BIG)
266 		goto done;
267 
268 	/* Now check the previous page. */
269 	if (argp->prev_pgno != PGNO_INVALID) {
270 		REC_FGET(mpf, ip, argp->prev_pgno, &pagep, npage);
271 		modified = 0;
272 
273 		cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
274 		cmp_p = LOG_COMPARE(&LSN(pagep), &argp->prevlsn);
275 		CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->prevlsn);
276 		CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp);
277 
278 		if (cmp_p == 0 && DB_REDO(op) && opcode == DB_ADD_BIG) {
279 			/* Redo add, undo delete. */
280 			REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
281 			NEXT_PGNO(pagep) = argp->pgno;
282 			modified = 1;
283 		} else if (cmp_n == 0 &&
284 		    DB_UNDO(op) && opcode == DB_ADD_BIG) {
285 			/* Redo delete, undo add. */
286 			REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
287 			NEXT_PGNO(pagep) = argp->next_pgno;
288 			modified = 1;
289 		}
290 		if (modified)
291 			LSN(pagep) = DB_REDO(op) ? *lsnp : argp->prevlsn;
292 		ret = __memp_fput(mpf, ip, pagep, file_dbp->priority);
293 		pagep = NULL;
294 		if (ret != 0)
295 			goto out;
296 	}
297 	pagep = NULL;
298 
299 	/* Now check the next page.  Can only be set on a delete. */
300 npage:	if (argp->next_pgno != PGNO_INVALID) {
301 		REC_FGET(mpf, ip, argp->next_pgno, &pagep, done);
302 		modified = 0;
303 
304 		cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
305 		cmp_p = LOG_COMPARE(&LSN(pagep), &argp->nextlsn);
306 		CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->nextlsn);
307 		CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp);
308 		if (cmp_p == 0 && DB_REDO(op)) {
309 			REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
310 			PREV_PGNO(pagep) = PGNO_INVALID;
311 			modified = 1;
312 		} else if (cmp_n == 0 && DB_UNDO(op)) {
313 			REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
314 			PREV_PGNO(pagep) = argp->pgno;
315 			modified = 1;
316 		}
317 		if (modified)
318 			LSN(pagep) = DB_REDO(op) ? *lsnp : argp->nextlsn;
319 		ret = __memp_fput(mpf, ip, pagep, file_dbp->priority);
320 		pagep = NULL;
321 		if (ret != 0)
322 			goto out;
323 	}
324 	pagep = NULL;
325 
326 done:	*lsnp = argp->prev_lsn;
327 	ret = 0;
328 
329 out:	if (pagep != NULL)
330 		(void)__memp_fput(mpf, ip, pagep, file_dbp->priority);
331 	REC_CLOSE;
332 }
333 
334 /*
335  * PUBLIC: int __db_big_42_recover
336  * PUBLIC:     __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
337  */
338 int
__db_big_42_recover(env,dbtp,lsnp,op,info)339 __db_big_42_recover(env, dbtp, lsnp, op, info)
340 	ENV *env;
341 	DBT *dbtp;
342 	DB_LSN *lsnp;
343 	db_recops op;
344 	void *info;
345 {
346 	__db_big_42_args *argp;
347 	DB_THREAD_INFO *ip;
348 	DB *file_dbp;
349 	DBC *dbc;
350 	DB_MPOOLFILE *mpf;
351 	PAGE *pagep;
352 	int cmp_n, cmp_p, modified, ret;
353 
354 	ip = ((DB_TXNHEAD *)info)->thread_info;
355 	pagep = NULL;
356 	REC_PRINT(__db_big_print);
357 	REC_INTRO(__db_big_42_read, ip, 0);
358 
359 	REC_FGET(mpf, ip, argp->pgno, &pagep, ppage);
360 	modified = 0;
361 
362 	/*
363 	 * There are three pages we need to check.  The one on which we are
364 	 * adding data, the previous one whose next_pointer may have
365 	 * been updated, and the next one whose prev_pointer may have
366 	 * been updated.
367 	 */
368 	cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
369 	cmp_p = LOG_COMPARE(&LSN(pagep), &argp->pagelsn);
370 	CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->pagelsn);
371 	CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp);
372 	if ((cmp_p == 0 && DB_REDO(op) && argp->opcode == DB_ADD_BIG) ||
373 	    (cmp_n == 0 && DB_UNDO(op) && argp->opcode == DB_REM_BIG)) {
374 		/* We are either redo-ing an add, or undoing a delete. */
375 		REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
376 		P_INIT(pagep, file_dbp->pgsize, argp->pgno, argp->prev_pgno,
377 			argp->next_pgno, 0, P_OVERFLOW);
378 		OV_LEN(pagep) = argp->dbt.size;
379 		OV_REF(pagep) = 1;
380 		memcpy((u_int8_t *)pagep + P_OVERHEAD(file_dbp), argp->dbt.data,
381 		    argp->dbt.size);
382 		PREV_PGNO(pagep) = argp->prev_pgno;
383 		modified = 1;
384 	} else if ((cmp_n == 0 && DB_UNDO(op) && argp->opcode == DB_ADD_BIG) ||
385 	    (cmp_p == 0 && DB_REDO(op) && argp->opcode == DB_REM_BIG)) {
386 		/*
387 		 * We are either undo-ing an add or redo-ing a delete.
388 		 * The page is about to be reclaimed in either case, so
389 		 * there really isn't anything to do here.
390 		 */
391 		REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
392 		modified = 1;
393 	} else if (cmp_p == 0 && DB_REDO(op) && argp->opcode == DB_APPEND_BIG) {
394 		/* We are redoing an append. */
395 		REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
396 		memcpy((u_int8_t *)pagep + P_OVERHEAD(file_dbp) +
397 		    OV_LEN(pagep), argp->dbt.data, argp->dbt.size);
398 		OV_LEN(pagep) += argp->dbt.size;
399 		modified = 1;
400 	} else if (cmp_n == 0 && DB_UNDO(op) && argp->opcode == DB_APPEND_BIG) {
401 		/* We are undoing an append. */
402 		REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
403 		OV_LEN(pagep) -= argp->dbt.size;
404 		memset((u_int8_t *)pagep + P_OVERHEAD(file_dbp) +
405 		    OV_LEN(pagep), 0, argp->dbt.size);
406 		modified = 1;
407 	}
408 	if (modified)
409 		LSN(pagep) = DB_REDO(op) ? *lsnp : argp->pagelsn;
410 
411 	ret = __memp_fput(mpf, ip, pagep, file_dbp->priority);
412 	pagep = NULL;
413 	if (ret != 0)
414 		goto out;
415 
416 	/*
417 	 * We only delete a whole chain of overflow items, and appends only
418 	 * apply to a single page.  Adding a page is the only case that
419 	 * needs to update the chain.
420 	 */
421 ppage:	if (argp->opcode != DB_ADD_BIG)
422 		goto done;
423 
424 	/* Now check the previous page. */
425 	if (argp->prev_pgno != PGNO_INVALID) {
426 		REC_FGET(mpf, ip, argp->prev_pgno, &pagep, npage);
427 		modified = 0;
428 
429 		cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
430 		cmp_p = LOG_COMPARE(&LSN(pagep), &argp->prevlsn);
431 		CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->prevlsn);
432 		CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp);
433 
434 		if (cmp_p == 0 && DB_REDO(op) && argp->opcode == DB_ADD_BIG) {
435 			/* Redo add, undo delete. */
436 			REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
437 			NEXT_PGNO(pagep) = argp->pgno;
438 			modified = 1;
439 		} else if (cmp_n == 0 &&
440 		    DB_UNDO(op) && argp->opcode == DB_ADD_BIG) {
441 			/* Redo delete, undo add. */
442 			REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
443 			NEXT_PGNO(pagep) = argp->next_pgno;
444 			modified = 1;
445 		}
446 		if (modified)
447 			LSN(pagep) = DB_REDO(op) ? *lsnp : argp->prevlsn;
448 		ret = __memp_fput(mpf, ip, pagep, file_dbp->priority);
449 		pagep = NULL;
450 		if (ret != 0)
451 			goto out;
452 	}
453 	pagep = NULL;
454 
455 	/* Now check the next page.  Can only be set on a delete. */
456 npage:	if (argp->next_pgno != PGNO_INVALID) {
457 		REC_FGET(mpf, ip, argp->next_pgno, &pagep, done);
458 		modified = 0;
459 
460 		cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
461 		cmp_p = LOG_COMPARE(&LSN(pagep), &argp->nextlsn);
462 		CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->nextlsn);
463 		CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp);
464 		if (cmp_p == 0 && DB_REDO(op)) {
465 			REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
466 			PREV_PGNO(pagep) = PGNO_INVALID;
467 			modified = 1;
468 		} else if (cmp_n == 0 && DB_UNDO(op)) {
469 			REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
470 			PREV_PGNO(pagep) = argp->pgno;
471 			modified = 1;
472 		}
473 		if (modified)
474 			LSN(pagep) = DB_REDO(op) ? *lsnp : argp->nextlsn;
475 		ret = __memp_fput(mpf, ip, pagep, file_dbp->priority);
476 		pagep = NULL;
477 		if (ret != 0)
478 			goto out;
479 	}
480 	pagep = NULL;
481 
482 done:	*lsnp = argp->prev_lsn;
483 	ret = 0;
484 
485 out:	if (pagep != NULL)
486 		(void)__memp_fput(mpf, ip, pagep, file_dbp->priority);
487 	REC_CLOSE;
488 }
489 /*
490  * __db_ovref_recover --
491  *	Recovery function for __db_ovref().
492  *
493  * PUBLIC: int __db_ovref_recover
494  * PUBLIC:     __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
495  */
496 int
__db_ovref_recover(env,dbtp,lsnp,op,info)497 __db_ovref_recover(env, dbtp, lsnp, op, info)
498 	ENV *env;
499 	DBT *dbtp;
500 	DB_LSN *lsnp;
501 	db_recops op;
502 	void *info;
503 {
504 	__db_ovref_args *argp;
505 	DB_THREAD_INFO *ip;
506 	DB *file_dbp;
507 	DBC *dbc;
508 	DB_MPOOLFILE *mpf;
509 	PAGE *pagep;
510 	int cmp, ret;
511 
512 	ip = ((DB_TXNHEAD *)info)->thread_info;
513 	pagep = NULL;
514 	REC_PRINT(__db_ovref_print);
515 	REC_INTRO(__db_ovref_read, ip, 0);
516 
517 	REC_FGET(mpf, ip, argp->pgno, &pagep, done);
518 
519 	cmp = LOG_COMPARE(&LSN(pagep), &argp->lsn);
520 	CHECK_LSN(env, op, cmp, &LSN(pagep), &argp->lsn);
521 	if (cmp == 0 && DB_REDO(op)) {
522 		/* Need to redo update described. */
523 		REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
524 		OV_REF(pagep) += argp->adjust;
525 		pagep->lsn = *lsnp;
526 	} else if (LOG_COMPARE(lsnp, &LSN(pagep)) == 0 && DB_UNDO(op)) {
527 		/* Need to undo update described. */
528 		REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
529 		OV_REF(pagep) -= argp->adjust;
530 		pagep->lsn = argp->lsn;
531 	}
532 	ret = __memp_fput(mpf, ip, pagep, file_dbp->priority);
533 	pagep = NULL;
534 	if (ret != 0)
535 		goto out;
536 	pagep = NULL;
537 
538 done:	*lsnp = argp->prev_lsn;
539 	ret = 0;
540 
541 out:	if (pagep != NULL)
542 		(void)__memp_fput(mpf, ip, pagep, file_dbp->priority);
543 	REC_CLOSE;
544 }
545 
546 /*
547  * __db_debug_recover --
548  *	Recovery function for debug.
549  *
550  * PUBLIC: int __db_debug_recover __P((ENV *,
551  * PUBLIC:     DBT *, DB_LSN *, db_recops, void *));
552  */
553 int
__db_debug_recover(env,dbtp,lsnp,op,info)554 __db_debug_recover(env, dbtp, lsnp, op, info)
555 	ENV *env;
556 	DBT *dbtp;
557 	DB_LSN *lsnp;
558 	db_recops op;
559 	void *info;
560 {
561 	__db_debug_args *argp;
562 	int ret;
563 
564 	COMPQUIET(op, DB_TXN_ABORT);
565 	COMPQUIET(info, NULL);
566 
567 	REC_PRINT(__db_debug_print);
568 	REC_NOOP_INTRO(__db_debug_read);
569 
570 	*lsnp = argp->prev_lsn;
571 	ret = 0;
572 
573 	REC_NOOP_CLOSE;
574 }
575 
576 /*
577  * __db_noop_recover --
578  *	Recovery function for noop.
579  *
580  * PUBLIC: int __db_noop_recover __P((ENV *,
581  * PUBLIC:      DBT *, DB_LSN *, db_recops, void *));
582  */
583 int
__db_noop_recover(env,dbtp,lsnp,op,info)584 __db_noop_recover(env, dbtp, lsnp, op, info)
585 	ENV *env;
586 	DBT *dbtp;
587 	DB_LSN *lsnp;
588 	db_recops op;
589 	void *info;
590 {
591 	__db_noop_args *argp;
592 	DB_THREAD_INFO *ip;
593 	DB *file_dbp;
594 	DBC *dbc;
595 	DB_MPOOLFILE *mpf;
596 	PAGE *pagep;
597 	int cmp_n, cmp_p, ret;
598 
599 	ip = ((DB_TXNHEAD *)info)->thread_info;
600 	pagep = NULL;
601 	REC_PRINT(__db_noop_print);
602 	REC_INTRO(__db_noop_read, ip, 0);
603 
604 	REC_FGET(mpf, ip, argp->pgno, &pagep, done);
605 
606 	cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
607 	cmp_p = LOG_COMPARE(&LSN(pagep), &argp->prevlsn);
608 	CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->prevlsn);
609 	CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp);
610 	if (cmp_p == 0 && DB_REDO(op)) {
611 		REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
612 		LSN(pagep) = *lsnp;
613 	} else if (cmp_n == 0 && DB_UNDO(op)) {
614 		REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
615 		LSN(pagep) = argp->prevlsn;
616 	}
617 	ret = __memp_fput(mpf, ip, pagep, file_dbp->priority);
618 	pagep = NULL;
619 
620 done:	*lsnp = argp->prev_lsn;
621 out:	if (pagep != NULL)
622 		(void)__memp_fput(mpf,
623 		    ip, pagep, file_dbp->priority);
624 	REC_CLOSE;
625 }
626 
627 /*
628  * __db_pg_alloc_recover --
629  *	Recovery function for pg_alloc.
630  *
631  * PUBLIC: int __db_pg_alloc_recover
632  * PUBLIC:   __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
633  */
634 int
__db_pg_alloc_recover(env,dbtp,lsnp,op,info)635 __db_pg_alloc_recover(env, dbtp, lsnp, op, info)
636 	ENV *env;
637 	DBT *dbtp;
638 	DB_LSN *lsnp;
639 	db_recops op;
640 	void *info;
641 {
642 	__db_pg_alloc_args *argp;
643 	DB_THREAD_INFO *ip;
644 	DB *file_dbp;
645 	DBC *dbc;
646 	DBMETA *meta;
647 	DB_MPOOLFILE *mpf;
648 	PAGE *pagep;
649 	db_pgno_t pgno;
650 	int cmp_n, cmp_p, created, level, ret;
651 
652 	ip = ((DB_TXNHEAD *)info)->thread_info;
653 	meta = NULL;
654 	pagep = NULL;
655 	created = 0;
656 	REC_PRINT(__db_pg_alloc_print);
657 	REC_INTRO(__db_pg_alloc_read, ip, 0);
658 
659 	/*
660 	 * Fix up the metadata page.  If we're redoing the operation, we have
661 	 * to get the metadata page and update its LSN and its free pointer.
662 	 * If we're undoing the operation and the page was ever created, we put
663 	 * it on the freelist.
664 	 */
665 	pgno = PGNO_BASE_MD;
666 	if ((ret = __memp_fget(mpf, &pgno, ip, NULL, 0, &meta)) != 0) {
667 		/* The metadata page must always exist on redo. */
668 		if (DB_REDO(op)) {
669 			ret = __db_pgerr(file_dbp, pgno, ret);
670 			goto out;
671 		} else
672 			goto done;
673 	}
674 	cmp_n = LOG_COMPARE(lsnp, &LSN(meta));
675 	cmp_p = LOG_COMPARE(&LSN(meta), &argp->meta_lsn);
676 	CHECK_LSN(env, op, cmp_p, &LSN(meta), &argp->meta_lsn);
677 	CHECK_ABORT(env, op, cmp_n, &LSN(meta), lsnp);
678 	if (cmp_p == 0 && DB_REDO(op)) {
679 		/* Need to redo update described. */
680 		REC_DIRTY(mpf, ip, file_dbp->priority, &meta);
681 		LSN(meta) = *lsnp;
682 		meta->free = argp->next;
683 		if (argp->pgno > meta->last_pgno)
684 			meta->last_pgno = argp->pgno;
685 	} else if (cmp_n == 0 && DB_UNDO(op)) {
686 		/* Need to undo update described. */
687 		REC_DIRTY(mpf, ip, file_dbp->priority, &meta);
688 		LSN(meta) = argp->meta_lsn;
689 		/*
690 		 * If the page has a zero LSN then its newly created and
691 		 * will be truncated rather than go on the free list.
692 		 */
693 		if (!IS_ZERO_LSN(argp->page_lsn))
694 			meta->free = argp->pgno;
695 		meta->last_pgno = argp->last_pgno;
696 	}
697 
698 #ifdef HAVE_FTRUNCATE
699 	/*
700 	 * check to see if we are keeping a sorted freelist, if so put
701 	 * this back in the in memory list.  It must be the first element.
702 	 */
703 	if (op == DB_TXN_ABORT && !IS_ZERO_LSN(argp->page_lsn)) {
704 		db_pgno_t *list;
705 		u_int32_t nelem;
706 
707 		if ((ret = __memp_get_freelist(mpf, &nelem, &list)) != 0)
708 			goto out;
709 		if (list != NULL && (nelem == 0 || *list != argp->pgno)) {
710 			if ((ret =
711 			    __memp_extend_freelist(mpf, nelem + 1, &list)) != 0)
712 				goto out;
713 			if (nelem != 0)
714 				memmove(list + 1, list, nelem * sizeof(*list));
715 			*list = argp->pgno;
716 		}
717 	}
718 #endif
719 
720 	/*
721 	 * Fix up the allocated page. If the page does not exist
722 	 * and we can truncate it then don't create it.
723 	 * Otherwise if we're redoing the operation, we have
724 	 * to get the page (creating it if it doesn't exist), and update its
725 	 * LSN.  If we're undoing the operation, we have to reset the page's
726 	 * LSN and put it on the free list.
727 	 */
728 	if ((ret = __memp_fget(mpf, &argp->pgno, ip, NULL, 0, &pagep)) != 0) {
729 		/*
730 		 * We have to be able to identify if a page was newly
731 		 * created so we can recover it properly.  We cannot simply
732 		 * look for an empty header, because hash uses a pgin
733 		 * function that will set the header.  Instead, we explicitly
734 		 * try for the page without CREATE and if that fails, then
735 		 * create it.
736 		 */
737 		if (DB_UNDO(op))
738 			goto do_truncate;
739 		if ((ret = __memp_fget(mpf, &argp->pgno, ip, NULL,
740 		    DB_MPOOL_CREATE, &pagep)) != 0) {
741 			if (DB_UNDO(op) && ret == ENOSPC)
742 				goto do_truncate;
743 			ret = __db_pgerr(file_dbp, argp->pgno, ret);
744 			goto out;
745 		}
746 		created = 1;
747 	}
748 
749 	/* Fix up the allocated page. */
750 	cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
751 	cmp_p = LOG_COMPARE(&LSN(pagep), &argp->page_lsn);
752 
753 	/*
754 	 * If an initial allocation is aborted and then reallocated during
755 	 * an archival restore the log record will have an LSN for the page
756 	 * but the page will be empty.
757 	 */
758 	if (IS_ZERO_LSN(LSN(pagep)))
759 		cmp_p = 0;
760 
761 	CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->page_lsn);
762 	/*
763 	 * Another special case we have to handle is if we ended up with a
764 	 * page of all 0's which can happen if we abort between allocating a
765 	 * page in mpool and initializing it.  In that case, even if we're
766 	 * undoing, we need to re-initialize the page.
767 	 */
768 	if (DB_REDO(op) && cmp_p == 0) {
769 		/* Need to redo update described. */
770 		REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
771 		switch (argp->ptype) {
772 		case P_LBTREE:
773 		case P_LRECNO:
774 		case P_LDUP:
775 			level = LEAFLEVEL;
776 			break;
777 		default:
778 			level = 0;
779 			break;
780 		}
781 		P_INIT(pagep, file_dbp->pgsize,
782 		    argp->pgno, PGNO_INVALID, PGNO_INVALID, level, argp->ptype);
783 
784 		pagep->lsn = *lsnp;
785 	} else if (DB_UNDO(op) && (cmp_n == 0 || created)) {
786 		/*
787 		 * This is where we handle the case of a 0'd page (pagep->pgno
788 		 * is equal to PGNO_INVALID).
789 		 * Undo the allocation, reinitialize the page and
790 		 * link its next pointer to the free list.
791 		 */
792 		REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
793 		P_INIT(pagep, file_dbp->pgsize,
794 		    argp->pgno, PGNO_INVALID, argp->next, 0, P_INVALID);
795 
796 		pagep->lsn = argp->page_lsn;
797 	}
798 
799 do_truncate:
800 	/*
801 	 * If the page was newly created, give it back.
802 	 */
803 	if ((pagep == NULL || IS_ZERO_LSN(LSN(pagep))) &&
804 	    IS_ZERO_LSN(argp->page_lsn) && DB_UNDO(op)) {
805 		/* Discard the page. */
806 		if (pagep != NULL) {
807 			if ((ret = __memp_fput(mpf, ip,
808 			    pagep, DB_PRIORITY_VERY_LOW)) != 0)
809 				goto out;
810 			pagep = NULL;
811 		}
812 		/* Give the page back to the OS. */
813 		if (meta->last_pgno <= argp->pgno && (ret = __memp_ftruncate(
814 		    mpf, NULL, ip, argp->pgno, MP_TRUNC_RECOVER)) != 0)
815 			goto out;
816 	}
817 
818 	if (pagep != NULL) {
819 		ret = __memp_fput(mpf, ip, pagep, file_dbp->priority);
820 		pagep = NULL;
821 		if (ret != 0)
822 			goto out;
823 	}
824 
825 	ret = __memp_fput(mpf, ip, meta, file_dbp->priority);
826 	meta = NULL;
827 	if (ret != 0)
828 		goto out;
829 
830 done:	*lsnp = argp->prev_lsn;
831 	ret = 0;
832 
833 out:	if (pagep != NULL)
834 		(void)__memp_fput(mpf, ip, pagep, file_dbp->priority);
835 	if (meta != NULL)
836 		(void)__memp_fput(mpf, ip, meta, file_dbp->priority);
837 	REC_CLOSE;
838 }
839 
840 /*
841  * __db_pg_free_recover_int --
842  */
843 static int
__db_pg_free_recover_int(env,ip,argp,file_dbp,lsnp,mpf,op,data)844 __db_pg_free_recover_int(env, ip, argp, file_dbp, lsnp, mpf, op, data)
845 	ENV *env;
846 	DB_THREAD_INFO *ip;
847 	__db_pg_freedata_args *argp;
848 	DB *file_dbp;
849 	DB_LSN *lsnp;
850 	DB_MPOOLFILE *mpf;
851 	db_recops op;
852 	int data;
853 {
854 	DBMETA *meta;
855 	DB_LSN copy_lsn;
856 	PAGE *pagep, *prevp;
857 	int cmp_n, cmp_p, is_meta, ret;
858 
859 	meta = NULL;
860 	pagep = prevp = NULL;
861 
862 	/*
863 	 * Get the "metapage".  This will either be the metapage
864 	 * or the previous page in the free list if we are doing
865 	 * sorted allocations.  If its a previous page then
866 	 * we will not be truncating.
867 	 */
868 	is_meta = argp->meta_pgno == PGNO_BASE_MD;
869 
870 	REC_FGET(mpf, ip, argp->meta_pgno, &meta, check_meta);
871 
872 	if (argp->meta_pgno != PGNO_BASE_MD)
873 		prevp = (PAGE *)meta;
874 
875 	cmp_n = LOG_COMPARE(lsnp, &LSN(meta));
876 	cmp_p = LOG_COMPARE(&LSN(meta), &argp->meta_lsn);
877 	CHECK_LSN(env, op, cmp_p, &LSN(meta), &argp->meta_lsn);
878 	CHECK_ABORT(env, op, cmp_n, &LSN(meta), lsnp);
879 
880 	/*
881 	 * Fix up the metadata page.  If we're redoing or undoing the operation
882 	 * we get the page and update its LSN, last and free pointer.
883 	 */
884 	if (cmp_p == 0 && DB_REDO(op)) {
885 		REC_DIRTY(mpf, ip, file_dbp->priority, &meta);
886 		/*
887 		 * If we are at the end of the file truncate, otherwise
888 		 * put on the free list.
889 		 */
890 #ifdef HAVE_FTRUNCATE
891 		if (argp->pgno == argp->last_pgno)
892 			meta->last_pgno = argp->pgno - 1;
893 		else
894 #endif
895 		if (is_meta)
896 			meta->free = argp->pgno;
897 		else
898 			NEXT_PGNO(prevp) = argp->pgno;
899 		LSN(meta) = *lsnp;
900 	} else if (cmp_n == 0 && DB_UNDO(op)) {
901 		/* Need to undo the deallocation. */
902 		REC_DIRTY(mpf, ip, file_dbp->priority, &meta);
903 		if (is_meta) {
904 			if (meta->last_pgno < argp->pgno)
905 				meta->last_pgno = argp->pgno;
906 			meta->free = argp->next;
907 		} else
908 			NEXT_PGNO(prevp) = argp->next;
909 		LSN(meta) = argp->meta_lsn;
910 	}
911 
912 check_meta:
913 	if (ret != 0 && is_meta) {
914 		/* The metadata page must always exist. */
915 		ret = __db_pgerr(file_dbp, argp->meta_pgno, ret);
916 		goto out;
917 	}
918 
919 	/*
920 	 * Get the freed page.  Don't create the page if we are going to
921 	 * free it.  If we're redoing the operation we get the page and
922 	 * explicitly discard its contents, then update its LSN. If we're
923 	 * undoing the operation, we get the page and restore its header.
924 	 */
925 	if (DB_REDO(op) || (is_meta && meta->last_pgno < argp->pgno)) {
926 		if ((ret = __memp_fget(mpf, &argp->pgno,
927 		    ip, NULL, 0, &pagep)) != 0) {
928 			if (ret != DB_PAGE_NOTFOUND)
929 				goto out;
930 #ifdef HAVE_FTRUNCATE
931 			if (is_meta &&
932 			    DB_REDO(op) && meta->last_pgno <= argp->pgno)
933 				goto trunc;
934 #endif
935 			goto done;
936 		}
937 	} else if ((ret = __memp_fget(mpf, &argp->pgno,
938 	   ip, NULL, DB_MPOOL_CREATE, &pagep)) != 0)
939 		goto out;
940 
941 	(void)__ua_memcpy(&copy_lsn, &LSN(argp->header.data), sizeof(DB_LSN));
942 	cmp_n = IS_ZERO_LSN(LSN(pagep)) ? 0 : LOG_COMPARE(lsnp, &LSN(pagep));
943 	cmp_p = LOG_COMPARE(&LSN(pagep), &copy_lsn);
944 
945 	/*
946 	 * This page got extended by a later allocation,
947 	 * but its allocation was not in the scope of this
948 	 * recovery pass.
949 	 */
950 	if (IS_ZERO_LSN(LSN(pagep)))
951 		cmp_p = 0;
952 
953 	CHECK_LSN(env, op, cmp_p, &LSN(pagep), &copy_lsn);
954 	/*
955 	 * We need to check that the page could have the current LSN
956 	 * which was copied before it was truncated in addition to
957 	 * the usual of having the previous LSN.
958 	 */
959 	if (DB_REDO(op) &&
960 	    (cmp_p == 0 || cmp_n == 0 ||
961 	    (IS_ZERO_LSN(copy_lsn) &&
962 	    LOG_COMPARE(&LSN(pagep), &argp->meta_lsn) <= 0))) {
963 		/* Need to redo the deallocation. */
964 		/*
965 		 * The page can be truncated if it was truncated at runtime
966 		 * and the current metapage reflects the truncation.
967 		 */
968 #ifdef HAVE_FTRUNCATE
969 		if (is_meta && meta->last_pgno <= argp->pgno &&
970 		    argp->last_pgno <= argp->pgno) {
971 			if ((ret = __memp_fput(mpf, ip,
972 			    pagep, DB_PRIORITY_VERY_LOW)) != 0)
973 				goto out;
974 			pagep = NULL;
975 trunc:			if ((ret = __memp_ftruncate(mpf, NULL, ip,
976 			    argp->pgno, MP_TRUNC_RECOVER)) != 0)
977 				goto out;
978 		} else if (argp->last_pgno == argp->pgno) {
979 			/* The page was truncated at runtime, zero it out. */
980 			REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
981 			P_INIT(pagep, 0, PGNO_INVALID,
982 			    PGNO_INVALID, PGNO_INVALID, 0, P_INVALID);
983 			ZERO_LSN(pagep->lsn);
984 		} else
985 #endif
986 		if (cmp_p == 0 || IS_ZERO_LSN(LSN(pagep))) {
987 			REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
988 			P_INIT(pagep, file_dbp->pgsize,
989 			    argp->pgno, PGNO_INVALID, argp->next, 0, P_INVALID);
990 			pagep->lsn = *lsnp;
991 
992 		}
993 	} else if (cmp_n == 0 && DB_UNDO(op)) {
994 		/* Need to reallocate the page. */
995 		REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
996 		memcpy(pagep, argp->header.data, argp->header.size);
997 		if (data)
998 			memcpy((u_int8_t*)pagep + HOFFSET(pagep),
999 			     argp->data.data, argp->data.size);
1000 	}
1001 	if (pagep != NULL &&
1002 	    (ret = __memp_fput(mpf, ip, pagep, file_dbp->priority)) != 0)
1003 		goto out;
1004 
1005 	pagep = NULL;
1006 #ifdef HAVE_FTRUNCATE
1007 	/*
1008 	 * If we are keeping an in memory free list remove this
1009 	 * element from the list.
1010 	 */
1011 	if (op == DB_TXN_ABORT && argp->pgno != argp->last_pgno) {
1012 		db_pgno_t *lp;
1013 		u_int32_t nelem, pos;
1014 
1015 		if ((ret = __memp_get_freelist(mpf, &nelem, &lp)) != 0)
1016 			goto out;
1017 		if (lp != NULL) {
1018 			pos = 0;
1019 			if (!is_meta) {
1020 				__db_freelist_pos(argp->pgno, lp, nelem, &pos);
1021 
1022 				/*
1023 				 * If we aborted after logging but before
1024 				 * updating the free list don't do anything.
1025 				 */
1026 				if (argp->pgno != lp[pos]) {
1027 					DB_ASSERT(env,
1028 					    argp->meta_pgno == lp[pos]);
1029 					goto done;
1030 				}
1031 				DB_ASSERT(env,
1032 				    argp->meta_pgno == lp[pos - 1]);
1033 			} else if (nelem != 0 && argp->pgno != lp[pos])
1034 				goto done;
1035 
1036 			if (pos < nelem)
1037 				memmove(&lp[pos], &lp[pos + 1],
1038 				    ((nelem - pos) - 1) * sizeof(*lp));
1039 
1040 			/* Shrink the list */
1041 			if ((ret =
1042 			    __memp_extend_freelist(mpf, nelem - 1, &lp)) != 0)
1043 				goto out;
1044 		}
1045 	}
1046 #endif
1047 done:
1048 	if (meta != NULL &&
1049 	     (ret = __memp_fput(mpf, ip,  meta, file_dbp->priority)) != 0)
1050 		goto out;
1051 	meta = NULL;
1052 	ret = 0;
1053 
1054 out:	if (pagep != NULL)
1055 		(void)__memp_fput(mpf, ip,  pagep, file_dbp->priority);
1056 	if (meta != NULL)
1057 		(void)__memp_fput(mpf, ip,  meta, file_dbp->priority);
1058 
1059 	return (ret);
1060 }
1061 
1062 /*
1063  * __db_pg_free_recover --
1064  *	Recovery function for pg_free.
1065  *
1066  * PUBLIC: int __db_pg_free_recover
1067  * PUBLIC:   __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
1068  */
1069 int
__db_pg_free_recover(env,dbtp,lsnp,op,info)1070 __db_pg_free_recover(env, dbtp, lsnp, op, info)
1071 	ENV *env;
1072 	DBT *dbtp;
1073 	DB_LSN *lsnp;
1074 	db_recops op;
1075 	void *info;
1076 {
1077 	__db_pg_free_args *argp;
1078 	DB *file_dbp;
1079 	DBC *dbc;
1080 	DB_MPOOLFILE *mpf;
1081 	DB_THREAD_INFO *ip;
1082 	int ret;
1083 
1084 	ip = ((DB_TXNHEAD *)info)->thread_info;
1085 	REC_PRINT(__db_pg_free_print);
1086 	REC_INTRO(__db_pg_free_read, ip, 0);
1087 
1088 	if ((ret = __db_pg_free_recover_int(env, ip,
1089 	     (__db_pg_freedata_args *)argp, file_dbp, lsnp, mpf, op, 0)) != 0)
1090 		goto out;
1091 
1092 done:	*lsnp = argp->prev_lsn;
1093 out:
1094 	REC_CLOSE;
1095 }
1096 
1097 /*
1098  * __db_pg_freedata_recover --
1099  *	Recovery function for pg_freedata.
1100  *
1101  * PUBLIC: int __db_pg_freedata_recover
1102  * PUBLIC:   __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
1103  */
1104 int
__db_pg_freedata_recover(env,dbtp,lsnp,op,info)1105 __db_pg_freedata_recover(env, dbtp, lsnp, op, info)
1106 	ENV *env;
1107 	DBT *dbtp;
1108 	DB_LSN *lsnp;
1109 	db_recops op;
1110 	void *info;
1111 {
1112 	__db_pg_freedata_args *argp;
1113 	DB *file_dbp;
1114 	DBC *dbc;
1115 	DB_MPOOLFILE *mpf;
1116 	DB_THREAD_INFO *ip;
1117 	int ret;
1118 
1119 	ip = ((DB_TXNHEAD *)info)->thread_info;
1120 	REC_PRINT(__db_pg_freedata_print);
1121 	REC_INTRO(__db_pg_freedata_read, ip, 0);
1122 
1123 	if ((ret = __db_pg_free_recover_int(env,
1124 	    ip, argp, file_dbp, lsnp, mpf, op, 1)) != 0)
1125 		goto out;
1126 
1127 done:	*lsnp = argp->prev_lsn;
1128 out:
1129 	REC_CLOSE;
1130 }
1131 
1132 /*
1133  * __db_cksum_recover --
1134  *	Recovery function for checksum failure log record.
1135  *
1136  * PUBLIC: int __db_cksum_recover __P((ENV *,
1137  * PUBLIC:      DBT *, DB_LSN *, db_recops, void *));
1138  */
1139 int
__db_cksum_recover(env,dbtp,lsnp,op,info)1140 __db_cksum_recover(env, dbtp, lsnp, op, info)
1141 	ENV *env;
1142 	DBT *dbtp;
1143 	DB_LSN *lsnp;
1144 	db_recops op;
1145 	void *info;
1146 {
1147 	__db_cksum_args *argp;
1148 	int ret;
1149 
1150 	COMPQUIET(info, NULL);
1151 	COMPQUIET(lsnp, NULL);
1152 	COMPQUIET(op, DB_TXN_ABORT);
1153 
1154 	REC_PRINT(__db_cksum_print);
1155 
1156 	if ((ret = __db_cksum_read(env, dbtp->data, &argp)) != 0)
1157 		return (ret);
1158 
1159 	/*
1160 	 * We had a checksum failure -- the only option is to run catastrophic
1161 	 * recovery.
1162 	 */
1163 	if (F_ISSET(env, ENV_RECOVER_FATAL))
1164 		ret = 0;
1165 	else {
1166 		__db_errx(env, DB_STR("0642",
1167 		    "Checksum failure requires catastrophic recovery"));
1168 		ret = __env_panic(env, DB_RUNRECOVERY);
1169 	}
1170 
1171 	__os_free(env, argp);
1172 	return (ret);
1173 }
1174 
1175 /*
1176  * __db_pg_init_recover --
1177  *	Recovery function to reinit pages after truncation.
1178  *
1179  * PUBLIC: int __db_pg_init_recover
1180  * PUBLIC:   __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
1181  */
1182 int
__db_pg_init_recover(env,dbtp,lsnp,op,info)1183 __db_pg_init_recover(env, dbtp, lsnp, op, info)
1184 	ENV *env;
1185 	DBT *dbtp;
1186 	DB_LSN *lsnp;
1187 	db_recops op;
1188 	void *info;
1189 {
1190 	__db_pg_init_args *argp;
1191 	DB_THREAD_INFO *ip;
1192 	DB *file_dbp;
1193 	DBC *dbc;
1194 	DB_LSN copy_lsn;
1195 	DB_MPOOLFILE *mpf;
1196 	PAGE *pagep;
1197 	int cmp_n, cmp_p, ret, type;
1198 
1199 	ip = ((DB_TXNHEAD *)info)->thread_info;
1200 	REC_PRINT(__db_pg_init_print);
1201 	REC_INTRO(__db_pg_init_read, ip, 0);
1202 
1203 	mpf = file_dbp->mpf;
1204 	if ((ret = __memp_fget(mpf, &argp->pgno, ip, NULL, 0, &pagep)) != 0) {
1205 		if (DB_UNDO(op)) {
1206 			if (ret == DB_PAGE_NOTFOUND)
1207 				goto done;
1208 			else {
1209 				ret = __db_pgerr(file_dbp, argp->pgno, ret);
1210 				goto out;
1211 			}
1212 		}
1213 
1214 		/*
1215 		 * This page was truncated and may simply not have
1216 		 * had an item written to it yet.  This should only
1217 		 * happen on hash databases, so confirm that.
1218 		 */
1219 		DB_ASSERT(env, file_dbp->type == DB_HASH);
1220 		if ((ret = __memp_fget(mpf, &argp->pgno,
1221 		    ip, NULL, DB_MPOOL_CREATE, &pagep)) != 0) {
1222 			ret = __db_pgerr(file_dbp, argp->pgno, ret);
1223 			goto out;
1224 		}
1225 	}
1226 
1227 	(void)__ua_memcpy(&copy_lsn, &LSN(argp->header.data), sizeof(DB_LSN));
1228 	cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
1229 	cmp_p = LOG_COMPARE(&LSN(pagep), &copy_lsn);
1230 	CHECK_LSN(env, op, cmp_p, &LSN(pagep), &copy_lsn);
1231 	CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp);
1232 
1233 	if (cmp_p == 0 && DB_REDO(op)) {
1234 		if (TYPE(pagep) == P_HASH)
1235 			type = P_HASH;
1236 		else
1237 			type = file_dbp->type == DB_RECNO ? P_LRECNO : P_LBTREE;
1238 		REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
1239 		P_INIT(pagep, file_dbp->pgsize, PGNO(pagep), PGNO_INVALID,
1240 		    PGNO_INVALID, TYPE(pagep) == P_HASH ? 0 : 1, type);
1241 		pagep->lsn = *lsnp;
1242 	} else if (cmp_n == 0 && DB_UNDO(op)) {
1243 		/* Put the data back on the page. */
1244 		REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
1245 		memcpy(pagep, argp->header.data, argp->header.size);
1246 		if (argp->data.size > 0)
1247 			memcpy((u_int8_t*)pagep + HOFFSET(pagep),
1248 			     argp->data.data, argp->data.size);
1249 	}
1250 	if ((ret = __memp_fput(mpf, ip, pagep, file_dbp->priority)) != 0)
1251 		goto out;
1252 
1253 done:	*lsnp = argp->prev_lsn;
1254 out:
1255 	REC_CLOSE;
1256 }
1257 
1258 /*
1259  * __db_pg_trunc_recover --
1260  *	Recovery function for pg_trunc.
1261  *
1262  * PUBLIC: int __db_pg_trunc_recover
1263  * PUBLIC:   __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
1264  */
1265 int
__db_pg_trunc_recover(env,dbtp,lsnp,op,info)1266 __db_pg_trunc_recover(env, dbtp, lsnp, op, info)
1267 	ENV *env;
1268 	DBT *dbtp;
1269 	DB_LSN *lsnp;
1270 	db_recops op;
1271 	void *info;
1272 {
1273 #ifdef HAVE_FTRUNCATE
1274 	__db_pg_trunc_args *argp;
1275 	DB_THREAD_INFO *ip;
1276 	DB *file_dbp;
1277 	DBC *dbc;
1278 	DBMETA *meta;
1279 	DB_MPOOLFILE *mpf;
1280 	PAGE *pagep;
1281 	db_pglist_t *pglist, *lp;
1282 	db_pgno_t last_pgno, *list;
1283 	u_int32_t felem, nelem, pos;
1284 	int ret;
1285 
1286 	ip = ((DB_TXNHEAD *)info)->thread_info;
1287 	REC_PRINT(__db_pg_trunc_print);
1288 	REC_INTRO(__db_pg_trunc_read, ip, 1);
1289 
1290 	pglist = (db_pglist_t *) argp->list.data;
1291 	nelem = argp->list.size / sizeof(db_pglist_t);
1292 	if (DB_REDO(op)) {
1293 		/*
1294 		 * First call __db_pg_truncate to find the truncation
1295 		 * point, truncate the file and return the new last_pgno.
1296 		 */
1297 		last_pgno = argp->last_pgno;
1298 		if ((ret = __db_pg_truncate(dbc, NULL, pglist,
1299 		    NULL, &nelem, argp->next_free, &last_pgno, lsnp, 1)) != 0)
1300 			goto out;
1301 
1302 		if (argp->last_free != PGNO_INVALID) {
1303 			/*
1304 			 * Update the next pointer of the last page in
1305 			 * the freelist.  If the truncation point is
1306 			 * beyond next_free then this is still in the freelist
1307 			 * otherwise the last_free page is at the end.
1308 			 */
1309 			if ((ret = __memp_fget(mpf,
1310 			    &argp->last_free, ip, NULL, 0, &meta)) == 0) {
1311 				if (LOG_COMPARE(&LSN(meta),
1312 				     &argp->last_lsn) == 0) {
1313 					REC_DIRTY(mpf,
1314 					    ip, dbc->priority, &meta);
1315 					if (pglist->pgno > last_pgno)
1316 						NEXT_PGNO(meta) = PGNO_INVALID;
1317 					else
1318 						NEXT_PGNO(meta) = pglist->pgno;
1319 					LSN(meta) = *lsnp;
1320 				}
1321 				if ((ret = __memp_fput(mpf, ip,
1322 				    meta, file_dbp->priority)) != 0)
1323 					goto out;
1324 				meta = NULL;
1325 			} else if (ret != DB_PAGE_NOTFOUND)
1326 				goto out;
1327 		}
1328 		if ((ret = __memp_fget(mpf, &argp->meta, ip, NULL,
1329 		    0, &meta)) != 0)
1330 			goto out;
1331 		if (LOG_COMPARE(&LSN(meta), &argp->meta_lsn) == 0) {
1332 			REC_DIRTY(mpf, ip, dbc->priority, &meta);
1333 			if (argp->last_free == PGNO_INVALID) {
1334 				if (nelem == 0)
1335 					meta->free = PGNO_INVALID;
1336 				else
1337 					meta->free = pglist->pgno;
1338 			}
1339 			/*
1340 			 * If this is part of a multi record truncate
1341 			 * this could be just the last page of this record
1342 			 * don't move the meta->last_pgno forward.
1343 			 */
1344 			if (meta->last_pgno > last_pgno)
1345 				meta->last_pgno = last_pgno;
1346 			LSN(meta) = *lsnp;
1347 		}
1348 	} else {
1349 		/* Put the free list back in its original order. */
1350 		for (lp = pglist; lp < &pglist[nelem]; lp++) {
1351 			if ((ret = __memp_fget(mpf, &lp->pgno, ip,
1352 			    NULL, DB_MPOOL_CREATE, &pagep)) != 0)
1353 				goto out;
1354 			if (IS_ZERO_LSN(LSN(pagep)) ||
1355 			     LOG_COMPARE(&LSN(pagep), lsnp) == 0) {
1356 				REC_DIRTY(mpf, ip, dbc->priority, &pagep);
1357 				P_INIT(pagep, file_dbp->pgsize, lp->pgno,
1358 				    PGNO_INVALID, lp->next_pgno, 0, P_INVALID);
1359 				LSN(pagep) = lp->lsn;
1360 			}
1361 			if ((ret = __memp_fput(mpf,
1362 			    ip, pagep, file_dbp->priority)) != 0)
1363 				goto out;
1364 		}
1365 		/*
1366 		 * Link the truncated part back into the free list.
1367 		 * Its either after the last_free page or directly
1368 		 * linked to the metadata page.
1369 		 */
1370 		if (argp->last_free != PGNO_INVALID) {
1371 			if ((ret = __memp_fget(mpf, &argp->last_free,
1372 			    ip, NULL, DB_MPOOL_EDIT, &meta)) == 0) {
1373 				if (LOG_COMPARE(&LSN(meta), lsnp) == 0) {
1374 					NEXT_PGNO(meta) = argp->next_free;
1375 					LSN(meta) = argp->last_lsn;
1376 				}
1377 				if ((ret = __memp_fput(mpf, ip,
1378 				    meta, file_dbp->priority)) != 0)
1379 					goto out;
1380 			} else if (ret != DB_PAGE_NOTFOUND)
1381 				goto out;
1382 			meta = NULL;
1383 		}
1384 		if ((ret = __memp_fget(mpf, &argp->meta,
1385 		    ip, NULL, DB_MPOOL_EDIT, &meta)) != 0)
1386 			goto out;
1387 		if (LOG_COMPARE(&LSN(meta), lsnp) == 0) {
1388 			REC_DIRTY(mpf, ip, dbc->priority, &meta);
1389 			/*
1390 			 * If we had to break up the list last_pgno
1391 			 * may only represent the end of the block.
1392 			 */
1393 			if (meta->last_pgno < argp->last_pgno)
1394 				meta->last_pgno = argp->last_pgno;
1395 			if (argp->last_free == PGNO_INVALID)
1396 				meta->free = argp->next_free;
1397 			LSN(meta) = argp->meta_lsn;
1398 		}
1399 	}
1400 
1401 	if ((ret = __memp_fput(mpf, ip, meta, file_dbp->priority)) != 0)
1402 		goto out;
1403 
1404 	if (op == DB_TXN_ABORT) {
1405 		/*
1406 		 * Put the pages back on the in memory free list.
1407 		 * If this is part of a multi-record truncate then
1408 		 * we need to find this batch, it may not be at the end.
1409 		 * If we aborted while writing one of the log records
1410 		 * then this set may still be in the list.
1411 		 */
1412 		if ((ret = __memp_get_freelist(mpf, &felem, &list)) != 0)
1413 			goto out;
1414 		if (list != NULL) {
1415 			if (felem != 0 && list[felem - 1] > pglist->pgno) {
1416 				__db_freelist_pos(
1417 				    pglist->pgno, list, felem, &pos);
1418 				DB_ASSERT(env, pos < felem);
1419 				if (pglist->pgno == list[pos])
1420 					goto done;
1421 				pos++;
1422 			} else if (felem != 0 &&
1423 			    list[felem - 1] == pglist->pgno)
1424 				goto done;
1425 			else
1426 				pos = felem;
1427 			if ((ret = __memp_extend_freelist(
1428 			    mpf, felem + nelem, &list)) != 0)
1429 				goto out;
1430 			if (pos != felem)
1431 				memmove(&list[nelem + pos], &list[pos],
1432 				    sizeof(*list) * (felem - pos));
1433 			for (lp = pglist; lp < &pglist[nelem]; lp++)
1434 				list[pos++] = lp->pgno;
1435 		}
1436 	}
1437 
1438 done:	*lsnp = argp->prev_lsn;
1439 	ret = 0;
1440 
1441 out:	REC_CLOSE;
1442 #else
1443 	/*
1444 	 * If HAVE_FTRUNCATE is not defined, we'll never see pg_trunc records
1445 	 * to recover.
1446 	 */
1447 	COMPQUIET(env, NULL);
1448 	COMPQUIET(dbtp, NULL);
1449 	COMPQUIET(lsnp, NULL);
1450 	COMPQUIET(op,  DB_TXN_ABORT);
1451 	COMPQUIET(info, NULL);
1452 	return (EINVAL);
1453 #endif
1454 }
1455 /*
1456  * __db_realloc_recover --
1457  *	Recovery function for realloc.
1458  *
1459  * PUBLIC: int __db_realloc_recover
1460  * PUBLIC:   __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
1461  */
1462 int
__db_realloc_recover(env,dbtp,lsnp,op,info)1463 __db_realloc_recover(env, dbtp, lsnp, op, info)
1464 	ENV *env;
1465 	DBT *dbtp;
1466 	DB_LSN *lsnp;
1467 	db_recops op;
1468 	void *info;
1469 {
1470 	__db_realloc_args *argp;
1471 	DB *file_dbp;
1472 	DBC *dbc;
1473 	DB_MPOOLFILE *mpf;
1474 	DB_THREAD_INFO *ip;
1475 	PAGE *pagep;
1476 	db_pglist_t *pglist, *lp;
1477 #ifdef HAVE_FTRUNCATE
1478 	db_pgno_t *list;
1479 	u_int32_t felem, pos;
1480 #endif
1481 	u_int32_t nelem;
1482 	int cmp_n, cmp_p, ret;
1483 
1484 	ip = ((DB_TXNHEAD *)info)->thread_info;
1485 
1486 	REC_PRINT(__db_realloc_print);
1487 	REC_INTRO(__db_realloc_read, ip, 1);
1488 	mpf = file_dbp->mpf;
1489 
1490 	/*
1491 	 * First, iterate over all the pages and make sure they are all in
1492 	 * their prior or new states (according to the op).
1493 	 */
1494 	pglist = (db_pglist_t *) argp->list.data;
1495 	nelem = argp->list.size / sizeof(db_pglist_t);
1496 	for (lp = pglist; lp < &pglist[nelem]; lp++) {
1497 		if ((ret = __memp_fget(mpf, &lp->pgno, ip,
1498 		    NULL, DB_MPOOL_CREATE, &pagep)) != 0)
1499 			goto out;
1500 		if (DB_REDO(op) && LOG_COMPARE(&LSN(pagep), &lp->lsn) == 0) {
1501 			REC_DIRTY(mpf, ip, dbc->priority, &pagep);
1502 			P_INIT(pagep, file_dbp->pgsize, lp->pgno,
1503 			    PGNO_INVALID, PGNO_INVALID, 0, argp->ptype);
1504 			LSN(pagep) = *lsnp;
1505 		} else if (DB_UNDO(op) && (IS_ZERO_LSN(LSN(pagep)) ||
1506 		     LOG_COMPARE(&LSN(pagep), lsnp) == 0)) {
1507 			REC_DIRTY(mpf, ip, dbc->priority, &pagep);
1508 			P_INIT(pagep, file_dbp->pgsize, lp->pgno,
1509 			    PGNO_INVALID, lp->next_pgno, 0, P_INVALID);
1510 			LSN(pagep) = lp->lsn;
1511 		}
1512 		if ((ret = __memp_fput(mpf,
1513 		    ip, pagep, file_dbp->priority)) != 0)
1514 			goto out;
1515 	}
1516 
1517 	/* Now, fix up the free list. */
1518 	if ((ret = __memp_fget(mpf,
1519 	    &argp->prev_pgno, ip, NULL, 0, &pagep)) != 0)
1520 		goto out;
1521 
1522 	cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
1523 	cmp_p = LOG_COMPARE(&LSN(pagep), &argp->page_lsn);
1524 	CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->page_lsn);
1525 	CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp);
1526 
1527 	if (DB_REDO(op) && cmp_p == 0) {
1528 		REC_DIRTY(mpf, ip, dbc->priority, &pagep);
1529 		if (argp->prev_pgno == PGNO_BASE_MD)
1530 			((DBMETA *)pagep)->free = argp->next_free;
1531 		else
1532 			NEXT_PGNO(pagep) = argp->next_free;
1533 		LSN(pagep) = *lsnp;
1534 	} else if (DB_UNDO(op) && cmp_n == 0) {
1535 		REC_DIRTY(mpf, ip, dbc->priority, &pagep);
1536 		if (argp->prev_pgno == PGNO_BASE_MD)
1537 			((DBMETA *)pagep)->free = pglist->pgno;
1538 		else
1539 			NEXT_PGNO(pagep) = pglist->pgno;
1540 		LSN(pagep) = argp->page_lsn;
1541 	}
1542 	if ((ret = __memp_fput(mpf, ip, pagep, file_dbp->priority)) != 0)
1543 		goto out;
1544 
1545 #ifdef HAVE_FTRUNCATE
1546 	if (op == DB_TXN_ABORT) {
1547 		/* Put the pages back in the sorted list. */
1548 		if ((ret = __memp_get_freelist(mpf, &felem, &list)) != 0)
1549 			goto out;
1550 		if (list != NULL) {
1551 			__db_freelist_pos(pglist->pgno, list, felem, &pos);
1552 			if (pglist->pgno == list[pos])
1553 				goto done;
1554 			if ((ret = __memp_extend_freelist(
1555 			    mpf, felem + nelem, &list)) != 0)
1556 				goto out;
1557 			pos++;
1558 			if (pos != felem)
1559 				memmove(&list[pos+nelem],
1560 				    &list[pos], nelem * sizeof(*list));
1561 			for (lp = pglist; lp < &pglist[nelem]; lp++)
1562 				list[pos++] = lp->pgno;
1563 		}
1564 	}
1565 #endif
1566 
1567 done:	*lsnp = argp->prev_lsn;
1568 	ret = 0;
1569 
1570 out:	REC_CLOSE;
1571 }
1572 /*
1573  * __db_pg_sort_44_recover --
1574  *	Recovery function for pg_sort.
1575  * This is deprecated and kept for replication upgrades.
1576  *
1577  * PUBLIC: int __db_pg_sort_44_recover
1578  * PUBLIC:   __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
1579  */
1580 int
__db_pg_sort_44_recover(env,dbtp,lsnp,op,info)1581 __db_pg_sort_44_recover(env, dbtp, lsnp, op, info)
1582 	ENV *env;
1583 	DBT *dbtp;
1584 	DB_LSN *lsnp;
1585 	db_recops op;
1586 	void *info;
1587 {
1588 #ifdef HAVE_FTRUNCATE
1589 	__db_pg_sort_44_args *argp;
1590 	DB_THREAD_INFO *ip;
1591 	DB *file_dbp;
1592 	DBC *dbc;
1593 	DBMETA *meta;
1594 	DB_MPOOLFILE *mpf;
1595 	PAGE *pagep;
1596 	db_pglist_t *pglist, *lp;
1597 	db_pgno_t pgno, *list;
1598 	u_int32_t felem, nelem;
1599 	int ret;
1600 
1601 	ip = ((DB_TXNHEAD *)info)->thread_info;
1602 	REC_PRINT(__db_pg_sort_44_print);
1603 	REC_INTRO(__db_pg_sort_44_read, ip, 1);
1604 
1605 	pglist = (db_pglist_t *) argp->list.data;
1606 	nelem = argp->list.size / sizeof(db_pglist_t);
1607 	if (DB_REDO(op)) {
1608 		pgno = argp->last_pgno;
1609 		__db_freelist_sort(pglist, nelem);
1610 		if ((ret = __db_pg_truncate(dbc, NULL,
1611 		    pglist, NULL, &nelem, PGNO_INVALID, &pgno, lsnp, 1)) != 0)
1612 			goto out;
1613 
1614 		if (argp->last_free != PGNO_INVALID) {
1615 			if ((ret = __memp_fget(mpf,
1616 			    &argp->last_free, ip, NULL, 0, &meta)) == 0) {
1617 				if (LOG_COMPARE(&LSN(meta),
1618 				     &argp->last_lsn) == 0) {
1619 					REC_DIRTY(mpf,
1620 					    ip, dbc->priority, &meta);
1621 					NEXT_PGNO(meta) = PGNO_INVALID;
1622 					LSN(meta) = *lsnp;
1623 				}
1624 				if ((ret = __memp_fput(mpf, ip,
1625 				    meta, file_dbp->priority)) != 0)
1626 					goto out;
1627 				meta = NULL;
1628 			} else if (ret != DB_PAGE_NOTFOUND)
1629 				goto out;
1630 		}
1631 		if ((ret = __memp_fget(mpf, &argp->meta, ip, NULL,
1632 		    0, &meta)) != 0)
1633 			goto out;
1634 		if (LOG_COMPARE(&LSN(meta), &argp->meta_lsn) == 0) {
1635 			REC_DIRTY(mpf, ip, dbc->priority, &meta);
1636 			if (argp->last_free == PGNO_INVALID) {
1637 				if (nelem == 0)
1638 					meta->free = PGNO_INVALID;
1639 				else
1640 					meta->free = pglist->pgno;
1641 			}
1642 			meta->last_pgno = pgno;
1643 			LSN(meta) = *lsnp;
1644 		}
1645 	} else {
1646 		/* Put the free list back in its original order. */
1647 		for (lp = pglist; lp < &pglist[nelem]; lp++) {
1648 			if ((ret = __memp_fget(mpf, &lp->pgno, ip,
1649 			    NULL, DB_MPOOL_CREATE, &pagep)) != 0)
1650 				goto out;
1651 			if (IS_ZERO_LSN(LSN(pagep)) ||
1652 			     LOG_COMPARE(&LSN(pagep), lsnp) == 0) {
1653 				REC_DIRTY(mpf, ip, dbc->priority, &pagep);
1654 				if (lp == &pglist[nelem - 1])
1655 					pgno = PGNO_INVALID;
1656 				else
1657 					pgno = lp[1].pgno;
1658 
1659 				P_INIT(pagep, file_dbp->pgsize,
1660 				    lp->pgno, PGNO_INVALID, pgno, 0, P_INVALID);
1661 				LSN(pagep) = lp->lsn;
1662 			}
1663 			if ((ret = __memp_fput(mpf,
1664 			    ip, pagep, file_dbp->priority)) != 0)
1665 				goto out;
1666 		}
1667 		if (argp->last_free != PGNO_INVALID) {
1668 			if ((ret = __memp_fget(mpf, &argp->last_free,
1669 			    ip, NULL, DB_MPOOL_EDIT, &meta)) == 0) {
1670 				if (LOG_COMPARE(&LSN(meta), lsnp) == 0) {
1671 					NEXT_PGNO(meta) = pglist->pgno;
1672 					LSN(meta) = argp->last_lsn;
1673 				}
1674 				if ((ret = __memp_fput(mpf, ip,
1675 				    meta, file_dbp->priority)) != 0)
1676 					goto out;
1677 			} else if (ret != DB_PAGE_NOTFOUND)
1678 				goto out;
1679 			meta = NULL;
1680 		}
1681 		if ((ret = __memp_fget(mpf, &argp->meta,
1682 		    ip, NULL, DB_MPOOL_EDIT, &meta)) != 0)
1683 			goto out;
1684 		if (LOG_COMPARE(&LSN(meta), lsnp) == 0) {
1685 			REC_DIRTY(mpf, ip, dbc->priority, &meta);
1686 			meta->last_pgno = argp->last_pgno;
1687 			if (argp->last_free == PGNO_INVALID)
1688 				meta->free = pglist->pgno;
1689 			LSN(meta) = argp->meta_lsn;
1690 		}
1691 	}
1692 	if (op == DB_TXN_ABORT) {
1693 		if ((ret = __memp_get_freelist(mpf, &felem, &list)) != 0)
1694 			goto out;
1695 		if (list != NULL) {
1696 			DB_ASSERT(env, felem == 0 ||
1697 			    argp->last_free == list[felem - 1]);
1698 			if ((ret = __memp_extend_freelist(
1699 			    mpf, felem + nelem, &list)) != 0)
1700 				goto out;
1701 			for (lp = pglist; lp < &pglist[nelem]; lp++)
1702 				list[felem++] = lp->pgno;
1703 		}
1704 	}
1705 
1706 	if ((ret = __memp_fput(mpf, ip, meta, file_dbp->priority)) != 0)
1707 		goto out;
1708 
1709 done:	*lsnp = argp->prev_lsn;
1710 	ret = 0;
1711 
1712 out:	REC_CLOSE;
1713 #else
1714 	/*
1715 	 * If HAVE_FTRUNCATE is not defined, we'll never see pg_sort records
1716 	 * to recover.
1717 	 */
1718 	COMPQUIET(env, NULL);
1719 	COMPQUIET(dbtp, NULL);
1720 	COMPQUIET(lsnp, NULL);
1721 	COMPQUIET(op,  DB_TXN_ABORT);
1722 	COMPQUIET(info, NULL);
1723 	return (EINVAL);
1724 #endif
1725 }
1726 
1727 /*
1728  * __db_pg_alloc_42_recover --
1729  *	Recovery function for pg_alloc.
1730  *
1731  * PUBLIC: int __db_pg_alloc_42_recover
1732  * PUBLIC:   __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
1733  */
1734 int
__db_pg_alloc_42_recover(env,dbtp,lsnp,op,info)1735 __db_pg_alloc_42_recover(env, dbtp, lsnp, op, info)
1736 	ENV *env;
1737 	DBT *dbtp;
1738 	DB_LSN *lsnp;
1739 	db_recops op;
1740 	void *info;
1741 {
1742 	__db_pg_alloc_42_args *argp;
1743 	DB_THREAD_INFO *ip;
1744 	DB *file_dbp;
1745 	DBC *dbc;
1746 	DBMETA *meta;
1747 	DB_MPOOLFILE *mpf;
1748 	PAGE *pagep;
1749 	db_pgno_t pgno;
1750 	int cmp_n, cmp_p, created, level, ret;
1751 
1752 	ip = ((DB_TXNHEAD *)info)->thread_info;
1753 	meta = NULL;
1754 	pagep = NULL;
1755 	created = 0;
1756 	REC_PRINT(__db_pg_alloc_42_print);
1757 	REC_INTRO(__db_pg_alloc_42_read, ip, 0);
1758 
1759 	/*
1760 	 * Fix up the metadata page.  If we're redoing the operation, we have
1761 	 * to get the metadata page and update its LSN and its free pointer.
1762 	 * If we're undoing the operation and the page was ever created, we put
1763 	 * it on the freelist.
1764 	 */
1765 	pgno = PGNO_BASE_MD;
1766 	if ((ret = __memp_fget(mpf, &pgno, ip, NULL, 0, &meta)) != 0) {
1767 		/* The metadata page must always exist on redo. */
1768 		if (DB_REDO(op)) {
1769 			ret = __db_pgerr(file_dbp, pgno, ret);
1770 			goto out;
1771 		} else
1772 			goto done;
1773 	}
1774 	cmp_n = LOG_COMPARE(lsnp, &LSN(meta));
1775 	cmp_p = LOG_COMPARE(&LSN(meta), &argp->meta_lsn);
1776 	CHECK_LSN(env, op, cmp_p, &LSN(meta), &argp->meta_lsn);
1777 	if (cmp_p == 0 && DB_REDO(op)) {
1778 		/* Need to redo update described. */
1779 		REC_DIRTY(mpf, ip, file_dbp->priority, &meta);
1780 		LSN(meta) = *lsnp;
1781 		meta->free = argp->next;
1782 		if (argp->pgno > meta->last_pgno)
1783 			meta->last_pgno = argp->pgno;
1784 	} else if (cmp_n == 0 && DB_UNDO(op)) {
1785 		goto no_rollback;
1786 	}
1787 
1788 	/*
1789 	 * Fix up the allocated page. If the page does not exist
1790 	 * and we can truncate it then don't create it.
1791 	 * Otherwise if we're redoing the operation, we have
1792 	 * to get the page (creating it if it doesn't exist), and update its
1793 	 * LSN.  If we're undoing the operation, we have to reset the page's
1794 	 * LSN and put it on the free list, or truncate it.
1795 	 */
1796 	if ((ret = __memp_fget(mpf, &argp->pgno, ip, NULL, 0, &pagep)) != 0) {
1797 		/*
1798 		 * We have to be able to identify if a page was newly
1799 		 * created so we can recover it properly.  We cannot simply
1800 		 * look for an empty header, because hash uses a pgin
1801 		 * function that will set the header.  Instead, we explicitly
1802 		 * try for the page without CREATE and if that fails, then
1803 		 * create it.
1804 		 */
1805 		if ((ret = __memp_fget(mpf, &argp->pgno,
1806 		    ip, NULL, DB_MPOOL_CREATE, &pagep)) != 0) {
1807 			if (DB_UNDO(op) && ret == ENOSPC)
1808 				goto do_truncate;
1809 			ret = __db_pgerr(file_dbp, argp->pgno, ret);
1810 			goto out;
1811 		}
1812 		created = 1;
1813 	}
1814 
1815 	/* Fix up the allocated page. */
1816 	cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
1817 	cmp_p = LOG_COMPARE(&LSN(pagep), &argp->page_lsn);
1818 
1819 	/*
1820 	 * If an initial allocation is aborted and then reallocated during
1821 	 * an archival restore the log record will have an LSN for the page
1822 	 * but the page will be empty.
1823 	 */
1824 	if (IS_ZERO_LSN(LSN(pagep)) ||
1825 	    (IS_ZERO_LSN(argp->page_lsn) && IS_INIT_LSN(LSN(pagep))))
1826 		cmp_p = 0;
1827 
1828 	CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->page_lsn);
1829 	/*
1830 	 * Another special case we have to handle is if we ended up with a
1831 	 * page of all 0's which can happen if we abort between allocating a
1832 	 * page in mpool and initializing it.  In that case, even if we're
1833 	 * undoing, we need to re-initialize the page.
1834 	 */
1835 	if (DB_REDO(op) && cmp_p == 0) {
1836 		/* Need to redo update described. */
1837 		switch (argp->ptype) {
1838 		case P_LBTREE:
1839 		case P_LRECNO:
1840 		case P_LDUP:
1841 			level = LEAFLEVEL;
1842 			break;
1843 		default:
1844 			level = 0;
1845 			break;
1846 		}
1847 		REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
1848 		P_INIT(pagep, file_dbp->pgsize,
1849 		    argp->pgno, PGNO_INVALID, PGNO_INVALID, level, argp->ptype);
1850 
1851 		pagep->lsn = *lsnp;
1852 	} else if (DB_UNDO(op) && (cmp_n == 0 || created)) {
1853 		/*
1854 		 * This is where we handle the case of a 0'd page (pagep->pgno
1855 		 * is equal to PGNO_INVALID).
1856 		 * Undo the allocation, reinitialize the page and
1857 		 * link its next pointer to the free list.
1858 		 */
1859 		REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
1860 		P_INIT(pagep, file_dbp->pgsize,
1861 		    argp->pgno, PGNO_INVALID, argp->next, 0, P_INVALID);
1862 
1863 		pagep->lsn = argp->page_lsn;
1864 	}
1865 
1866 do_truncate:
1867 	/*
1868 	 * We cannot undo things from 4.2 land, because we nolonger
1869 	 * have limbo processing.
1870 	 */
1871 	if ((pagep == NULL || IS_ZERO_LSN(LSN(pagep))) &&
1872 	    IS_ZERO_LSN(argp->page_lsn) && DB_UNDO(op)) {
1873 no_rollback:	__db_errx(env, DB_STR("0643",
1874 "Cannot replicate prepared transactions from master running release 4.2 "));
1875 		ret = __env_panic(env, EINVAL);
1876 	}
1877 
1878 	if (pagep != NULL &&
1879 	    (ret = __memp_fput(mpf, ip, pagep, file_dbp->priority)) != 0)
1880 		goto out;
1881 	pagep = NULL;
1882 
1883 	if ((ret = __memp_fput(mpf, ip, meta, file_dbp->priority)) != 0)
1884 		goto out;
1885 	meta = NULL;
1886 
1887 done:	*lsnp = argp->prev_lsn;
1888 	ret = 0;
1889 
1890 out:	if (pagep != NULL)
1891 		(void)__memp_fput(mpf, ip, pagep, file_dbp->priority);
1892 	if (meta != NULL)
1893 		(void)__memp_fput(mpf, ip, meta, file_dbp->priority);
1894 	REC_CLOSE;
1895 }
1896 
1897 /*
1898  * __db_pg_free_recover_42_int --
1899  */
1900 static int
__db_pg_free_recover_42_int(env,ip,argp,file_dbp,lsnp,mpf,op,data)1901 __db_pg_free_recover_42_int(env, ip, argp, file_dbp, lsnp, mpf, op, data)
1902 	ENV *env;
1903 	DB_THREAD_INFO *ip;
1904 	__db_pg_freedata_42_args *argp;
1905 	DB *file_dbp;
1906 	DB_LSN *lsnp;
1907 	DB_MPOOLFILE *mpf;
1908 	db_recops op;
1909 	int data;
1910 {
1911 	DBMETA *meta;
1912 	DB_LSN copy_lsn;
1913 	PAGE *pagep, *prevp;
1914 	int cmp_n, cmp_p, is_meta, ret;
1915 
1916 	meta = NULL;
1917 	pagep = NULL;
1918 	prevp = NULL;
1919 
1920 	/*
1921 	 * Get the "metapage".  This will either be the metapage
1922 	 * or the previous page in the free list if we are doing
1923 	 * sorted allocations.  If its a previous page then
1924 	 * we will not be truncating.
1925 	 */
1926 	is_meta = argp->meta_pgno == PGNO_BASE_MD;
1927 
1928 	REC_FGET(mpf, ip, argp->meta_pgno, &meta, check_meta);
1929 
1930 	if (argp->meta_pgno != PGNO_BASE_MD)
1931 		prevp = (PAGE *)meta;
1932 
1933 	cmp_n = LOG_COMPARE(lsnp, &LSN(meta));
1934 	cmp_p = LOG_COMPARE(&LSN(meta), &argp->meta_lsn);
1935 	CHECK_LSN(env, op, cmp_p, &LSN(meta), &argp->meta_lsn);
1936 
1937 	/*
1938 	 * Fix up the metadata page.  If we're redoing or undoing the operation
1939 	 * we get the page and update its LSN, last and free pointer.
1940 	 */
1941 	if (cmp_p == 0 && DB_REDO(op)) {
1942 		/* Need to redo the deallocation. */
1943 		REC_DIRTY(mpf, ip, file_dbp->priority, &meta);
1944 		if (prevp == NULL)
1945 			meta->free = argp->pgno;
1946 		else
1947 			NEXT_PGNO(prevp) = argp->pgno;
1948 		/*
1949 		 * If this was a compensating transaction and
1950 		 * we are a replica, then we never executed the
1951 		 * original allocation which incremented meta->free.
1952 		 */
1953 		if (prevp == NULL && meta->last_pgno < meta->free)
1954 			meta->last_pgno = meta->free;
1955 		LSN(meta) = *lsnp;
1956 	} else if (cmp_n == 0 && DB_UNDO(op)) {
1957 		/* Need to undo the deallocation. */
1958 		REC_DIRTY(mpf, ip, file_dbp->priority, &meta);
1959 		if (prevp == NULL)
1960 			meta->free = argp->next;
1961 		else
1962 			NEXT_PGNO(prevp) = argp->next;
1963 		LSN(meta) = argp->meta_lsn;
1964 		if (prevp == NULL && meta->last_pgno < argp->pgno)
1965 			meta->last_pgno = argp->pgno;
1966 	}
1967 
1968 check_meta:
1969 	if (ret != 0 && is_meta) {
1970 		/* The metadata page must always exist. */
1971 		ret = __db_pgerr(file_dbp, argp->meta_pgno, ret);
1972 		goto out;
1973 	}
1974 
1975 	/*
1976 	 * Get the freed page.  If we support truncate then don't
1977 	 * create the page if we are going to free it.  If we're
1978 	 * redoing the operation we get the page and explicitly discard
1979 	 * its contents, then update its LSN.  If we're undoing the
1980 	 * operation, we get the page and restore its header.
1981 	 * If we don't support truncate, then we must create the page
1982 	 * and roll it back.
1983 	 */
1984 	if ((ret = __memp_fget(mpf, &argp->pgno,
1985 	    ip, NULL, DB_MPOOL_CREATE, &pagep)) != 0)
1986 		goto out;
1987 
1988 	(void)__ua_memcpy(&copy_lsn, &LSN(argp->header.data), sizeof(DB_LSN));
1989 	cmp_n = IS_ZERO_LSN(LSN(pagep)) ? 0 : LOG_COMPARE(lsnp, &LSN(pagep));
1990 	cmp_p = LOG_COMPARE(&LSN(pagep), &copy_lsn);
1991 
1992 	CHECK_LSN(env, op, cmp_p, &LSN(pagep), &copy_lsn);
1993 	if (DB_REDO(op) &&
1994 	    (cmp_p == 0 ||
1995 	    (IS_ZERO_LSN(copy_lsn) &&
1996 	    LOG_COMPARE(&LSN(pagep), &argp->meta_lsn) <= 0))) {
1997 		/* Need to redo the deallocation. */
1998 		REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
1999 		P_INIT(pagep, file_dbp->pgsize,
2000 		    argp->pgno, PGNO_INVALID, argp->next, 0, P_INVALID);
2001 		pagep->lsn = *lsnp;
2002 	} else if (cmp_n == 0 && DB_UNDO(op)) {
2003 		/* Need to reallocate the page. */
2004 		REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
2005 		memcpy(pagep, argp->header.data, argp->header.size);
2006 		if (data)
2007 			memcpy((u_int8_t*)pagep + HOFFSET(pagep),
2008 			     argp->data.data, argp->data.size);
2009 	}
2010 	if (pagep != NULL &&
2011 	    (ret = __memp_fput(mpf, ip, pagep, file_dbp->priority)) != 0)
2012 		goto out;
2013 
2014 	pagep = NULL;
2015 	if (meta != NULL &&
2016 	    (ret = __memp_fput(mpf, ip, meta, file_dbp->priority)) != 0)
2017 		goto out;
2018 	meta = NULL;
2019 
2020 	ret = 0;
2021 
2022 out:	if (pagep != NULL)
2023 		(void)__memp_fput(mpf, ip, pagep, file_dbp->priority);
2024 	if (meta != NULL)
2025 		(void)__memp_fput(mpf, ip, meta, file_dbp->priority);
2026 
2027 	return (ret);
2028 }
2029 
2030 /*
2031  * __db_pg_free_42_recover --
2032  *	Recovery function for pg_free.
2033  *
2034  * PUBLIC: int __db_pg_free_42_recover
2035  * PUBLIC:   __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
2036  */
2037 int
__db_pg_free_42_recover(env,dbtp,lsnp,op,info)2038 __db_pg_free_42_recover(env, dbtp, lsnp, op, info)
2039 	ENV *env;
2040 	DBT *dbtp;
2041 	DB_LSN *lsnp;
2042 	db_recops op;
2043 	void *info;
2044 {
2045 	__db_pg_free_42_args *argp;
2046 	DB *file_dbp;
2047 	DBC *dbc;
2048 	DB_MPOOLFILE *mpf;
2049 	DB_THREAD_INFO *ip;
2050 	int ret;
2051 
2052 	ip = ((DB_TXNHEAD *)info)->thread_info;
2053 	REC_PRINT(__db_pg_free_42_print);
2054 	REC_INTRO(__db_pg_free_42_read, ip, 0);
2055 
2056 	ret = __db_pg_free_recover_42_int(env, ip,
2057 	     (__db_pg_freedata_42_args *)argp, file_dbp, lsnp, mpf, op, 0);
2058 
2059 done:	*lsnp = argp->prev_lsn;
2060 out:
2061 	REC_CLOSE;
2062 }
2063 
2064 /*
2065  * __db_pg_freedata_42_recover --
2066  *	Recovery function for pg_freedata.
2067  *
2068  * PUBLIC: int __db_pg_freedata_42_recover
2069  * PUBLIC:   __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
2070  */
2071 int
__db_pg_freedata_42_recover(env,dbtp,lsnp,op,info)2072 __db_pg_freedata_42_recover(env, dbtp, lsnp, op, info)
2073 	ENV *env;
2074 	DBT *dbtp;
2075 	DB_LSN *lsnp;
2076 	db_recops op;
2077 	void *info;
2078 {
2079 	__db_pg_freedata_42_args *argp;
2080 	DB *file_dbp;
2081 	DBC *dbc;
2082 	DB_MPOOLFILE *mpf;
2083 	DB_THREAD_INFO *ip;
2084 	int ret;
2085 
2086 	ip = ((DB_TXNHEAD *)info)->thread_info;
2087 	REC_PRINT(__db_pg_freedata_42_print);
2088 	REC_INTRO(__db_pg_freedata_42_read, ip, 0);
2089 
2090 	ret = __db_pg_free_recover_42_int(
2091 	    env, ip, argp, file_dbp, lsnp, mpf, op, 1);
2092 
2093 done:	*lsnp = argp->prev_lsn;
2094 out:
2095 	REC_CLOSE;
2096 }
2097 
2098 /*
2099  * __db_relink_42_recover --
2100  *	Recovery function for relink.
2101  *
2102  * PUBLIC: int __db_relink_42_recover
2103  * PUBLIC:   __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
2104  */
2105 int
__db_relink_42_recover(env,dbtp,lsnp,op,info)2106 __db_relink_42_recover(env, dbtp, lsnp, op, info)
2107 	ENV *env;
2108 	DBT *dbtp;
2109 	DB_LSN *lsnp;
2110 	db_recops op;
2111 	void *info;
2112 {
2113 	__db_relink_42_args *argp;
2114 	DB_THREAD_INFO *ip;
2115 	DB *file_dbp;
2116 	DBC *dbc;
2117 	DB_MPOOLFILE *mpf;
2118 	PAGE *pagep;
2119 	int cmp_n, cmp_p, modified, ret;
2120 
2121 	ip = ((DB_TXNHEAD *)info)->thread_info;
2122 	pagep = NULL;
2123 	REC_PRINT(__db_relink_42_print);
2124 	REC_INTRO(__db_relink_42_read, ip, 0);
2125 
2126 	/*
2127 	 * There are up to three pages we need to check -- the page, and the
2128 	 * previous and next pages, if they existed.  For a page add operation,
2129 	 * the current page is the result of a split and is being recovered
2130 	 * elsewhere, so all we need do is recover the next page.
2131 	 */
2132 	if ((ret = __memp_fget(mpf, &argp->pgno, ip, NULL, 0, &pagep)) != 0) {
2133 		if (DB_REDO(op)) {
2134 			ret = __db_pgerr(file_dbp, argp->pgno, ret);
2135 			goto out;
2136 		}
2137 		goto next2;
2138 	}
2139 	if (argp->opcode == DB_ADD_PAGE_COMPAT)
2140 		goto next1;
2141 
2142 	cmp_p = LOG_COMPARE(&LSN(pagep), &argp->lsn);
2143 	CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->lsn);
2144 	if (cmp_p == 0 && DB_REDO(op)) {
2145 		/* Redo the relink. */
2146 		REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
2147 		pagep->lsn = *lsnp;
2148 	} else if (LOG_COMPARE(lsnp, &LSN(pagep)) == 0 && DB_UNDO(op)) {
2149 		/* Undo the relink. */
2150 		REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
2151 		pagep->next_pgno = argp->next;
2152 		pagep->prev_pgno = argp->prev;
2153 		pagep->lsn = argp->lsn;
2154 	}
2155 next1:	if ((ret = __memp_fput(mpf, ip, pagep, file_dbp->priority)) != 0)
2156 		goto out;
2157 	pagep = NULL;
2158 
2159 next2:	if ((ret = __memp_fget(mpf, &argp->next, ip, NULL, 0, &pagep)) != 0) {
2160 		if (DB_REDO(op)) {
2161 			ret = __db_pgerr(file_dbp, argp->next, ret);
2162 			goto out;
2163 		}
2164 		goto prev;
2165 	}
2166 	modified = 0;
2167 	cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
2168 	cmp_p = LOG_COMPARE(&LSN(pagep), &argp->lsn_next);
2169 	CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->lsn_next);
2170 	if ((argp->opcode == DB_REM_PAGE_COMPAT && cmp_p == 0 && DB_REDO(op)) ||
2171 	    (argp->opcode == DB_ADD_PAGE_COMPAT && cmp_n == 0 && DB_UNDO(op))) {
2172 		/* Redo the remove or undo the add. */
2173 		REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
2174 		pagep->prev_pgno = argp->prev;
2175 		modified = 1;
2176 	} else if ((argp->opcode == DB_REM_PAGE_COMPAT &&
2177 	    cmp_n == 0 && DB_UNDO(op)) ||
2178 	    (argp->opcode == DB_ADD_PAGE_COMPAT && cmp_p == 0 && DB_REDO(op))) {
2179 		/* Undo the remove or redo the add. */
2180 		REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
2181 		pagep->prev_pgno = argp->pgno;
2182 		modified = 1;
2183 	}
2184 	if (modified) {
2185 		if (DB_UNDO(op))
2186 			pagep->lsn = argp->lsn_next;
2187 		else
2188 			pagep->lsn = *lsnp;
2189 	}
2190 	if ((ret = __memp_fput(mpf, ip, pagep, file_dbp->priority)) != 0)
2191 		goto out;
2192 	pagep = NULL;
2193 	if (argp->opcode == DB_ADD_PAGE_COMPAT)
2194 		goto done;
2195 
2196 prev:	if ((ret = __memp_fget(mpf, &argp->prev, ip, NULL, 0, &pagep)) != 0) {
2197 		if (DB_REDO(op)) {
2198 			ret = __db_pgerr(file_dbp, argp->prev, ret);
2199 			goto out;
2200 		}
2201 		goto done;
2202 	}
2203 	modified = 0;
2204 	cmp_p = LOG_COMPARE(&LSN(pagep), &argp->lsn_prev);
2205 	CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->lsn_prev);
2206 	if (cmp_p == 0 && DB_REDO(op)) {
2207 		/* Redo the relink. */
2208 		REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
2209 		pagep->next_pgno = argp->next;
2210 		modified = 1;
2211 	} else if (LOG_COMPARE(lsnp, &LSN(pagep)) == 0 && DB_UNDO(op)) {
2212 		/* Undo the relink. */
2213 		REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
2214 		pagep->next_pgno = argp->pgno;
2215 		modified = 1;
2216 	}
2217 	if (modified) {
2218 		if (DB_UNDO(op))
2219 			pagep->lsn = argp->lsn_prev;
2220 		else
2221 			pagep->lsn = *lsnp;
2222 	}
2223 	if ((ret = __memp_fput(mpf, ip, pagep, file_dbp->priority)) != 0)
2224 		goto out;
2225 	pagep = NULL;
2226 
2227 done:	*lsnp = argp->prev_lsn;
2228 	ret = 0;
2229 
2230 out:	if (pagep != NULL)
2231 		(void)__memp_fput(mpf, ip, pagep, file_dbp->priority);
2232 	REC_CLOSE;
2233 }
2234 
2235 /*
2236  * __db_relink_recover --
2237  *	Recovery function for relink.
2238  *
2239  * PUBLIC: int __db_relink_recover
2240  * PUBLIC:   __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
2241  */
2242 int
__db_relink_recover(env,dbtp,lsnp,op,info)2243 __db_relink_recover(env, dbtp, lsnp, op, info)
2244 	ENV *env;
2245 	DBT *dbtp;
2246 	DB_LSN *lsnp;
2247 	db_recops op;
2248 	void *info;
2249 {
2250 	__db_relink_args *argp;
2251 	DB_THREAD_INFO *ip;
2252 	DB *file_dbp;
2253 	DBC *dbc;
2254 	DB_MPOOLFILE *mpf;
2255 	PAGE *pagep;
2256 	int cmp_n, cmp_p, ret;
2257 
2258 	ip = ((DB_TXNHEAD *)info)->thread_info;
2259 	pagep = NULL;
2260 	REC_PRINT(__db_relink_print);
2261 	REC_INTRO(__db_relink_read, ip, 0);
2262 
2263 	/*
2264 	 * There are up to three pages we need to check -- the page, and the
2265 	 * previous and next pages, if they existed.  For a page add operation,
2266 	 * the current page is the result of a split and is being recovered
2267 	 * elsewhere, so all we need do is recover the next page.
2268 	 */
2269 	if (argp->next_pgno == PGNO_INVALID)
2270 		goto prev;
2271 	if ((ret = __memp_fget(mpf,
2272 	    &argp->next_pgno, ip, NULL, 0, &pagep)) != 0) {
2273 		if (ret != DB_PAGE_NOTFOUND) {
2274 			ret = __db_pgerr(file_dbp, argp->next_pgno, ret);
2275 			goto out;
2276 		} else
2277 			goto prev;
2278 	}
2279 
2280 	cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
2281 	cmp_p = LOG_COMPARE(&LSN(pagep), &argp->lsn_next);
2282 	CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->lsn_next);
2283 	CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp);
2284 	if (cmp_p == 0 && DB_REDO(op)) {
2285 		/* Redo the remove or replace. */
2286 		REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
2287 		if (argp->new_pgno == PGNO_INVALID)
2288 			pagep->prev_pgno = argp->prev_pgno;
2289 		else
2290 			pagep->prev_pgno = argp->new_pgno;
2291 
2292 		pagep->lsn = *lsnp;
2293 	} else if (cmp_n == 0 && DB_UNDO(op)) {
2294 		/* Undo the remove or replace. */
2295 		REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
2296 		pagep->prev_pgno = argp->pgno;
2297 
2298 		pagep->lsn = argp->lsn_next;
2299 	}
2300 
2301 	if ((ret = __memp_fput(mpf, ip, pagep, file_dbp->priority)) != 0)
2302 		goto out;
2303 	pagep = NULL;
2304 
2305 prev:	if (argp->prev_pgno == PGNO_INVALID)
2306 		goto done;
2307 	if ((ret = __memp_fget(mpf,
2308 	    &argp->prev_pgno, ip, NULL, 0, &pagep)) != 0) {
2309 		if (ret != DB_PAGE_NOTFOUND) {
2310 			ret = __db_pgerr(file_dbp, argp->prev_pgno, ret);
2311 			goto out;
2312 		} else
2313 			goto done;
2314 	}
2315 
2316 	cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
2317 	cmp_p = LOG_COMPARE(&LSN(pagep), &argp->lsn_prev);
2318 	CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->lsn_prev);
2319 	CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp);
2320 	if (cmp_p == 0 && DB_REDO(op)) {
2321 		/* Redo the relink. */
2322 		REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
2323 		if (argp->new_pgno == PGNO_INVALID)
2324 			pagep->next_pgno = argp->next_pgno;
2325 		else
2326 			pagep->next_pgno = argp->new_pgno;
2327 
2328 		pagep->lsn = *lsnp;
2329 	} else if (cmp_n == 0 && DB_UNDO(op)) {
2330 		/* Undo the relink. */
2331 		REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
2332 		pagep->next_pgno = argp->pgno;
2333 		pagep->lsn = argp->lsn_prev;
2334 	}
2335 
2336 	if ((ret = __memp_fput(mpf,
2337 	     ip, pagep, file_dbp->priority)) != 0)
2338 		goto out;
2339 	pagep = NULL;
2340 
2341 done:	*lsnp = argp->prev_lsn;
2342 	ret = 0;
2343 
2344 out:	if (pagep != NULL)
2345 		(void)__memp_fput(mpf, ip, pagep, file_dbp->priority);
2346 	REC_CLOSE;
2347 }
2348 
2349 /*
2350  * __db_merge_recover --
2351  *	Recovery function for merge.
2352  *
2353  * PUBLIC: int __db_merge_recover
2354  * PUBLIC:   __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
2355  */
2356 int
__db_merge_recover(env,dbtp,lsnp,op,info)2357 __db_merge_recover(env, dbtp, lsnp, op, info)
2358 	ENV *env;
2359 	DBT *dbtp;
2360 	DB_LSN *lsnp;
2361 	db_recops op;
2362 	void *info;
2363 {
2364 	__db_merge_args *argp;
2365 	BTREE *bt;
2366 	DB_THREAD_INFO *ip;
2367 	BKEYDATA *bk;
2368 	DB *file_dbp;
2369 	DBC *dbc;
2370 	DB_LOCK handle_lock;
2371 	DB_LOCKREQ request;
2372 	DB_MPOOLFILE *mpf;
2373 	HASH *ht;
2374 	PAGE *pagep;
2375 	db_indx_t indx, *ninp, *pinp;
2376 	u_int32_t size;
2377 	u_int8_t *bp;
2378 	int cmp_n, cmp_p, i, ret, t_ret;
2379 
2380 	ip = ((DB_TXNHEAD *)info)->thread_info;
2381 	REC_PRINT(__db_merge_print);
2382 	REC_INTRO(__db_merge_read, ip, op != DB_TXN_APPLY);
2383 
2384 	/* Allocate our own cursor without DB_RECOVER as we need a locker. */
2385 	if (op == DB_TXN_APPLY && (ret = __db_cursor_int(file_dbp, ip, NULL,
2386 	    DB_QUEUE, PGNO_INVALID, 0, NULL, &dbc)) != 0)
2387 		goto out;
2388 	F_SET(dbc, DBC_RECOVER);
2389 
2390 	if ((ret = __memp_fget(mpf, &argp->pgno, ip, NULL, 0, &pagep)) != 0) {
2391 		if (ret != DB_PAGE_NOTFOUND) {
2392 			ret = __db_pgerr(file_dbp, argp->pgno, ret);
2393 			goto out;
2394 		} else
2395 			goto next;
2396 	}
2397 
2398 	cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
2399 	cmp_p = LOG_COMPARE(&LSN(pagep), &argp->lsn);
2400 	CHECK_LSN(file_dbp->env, op, cmp_p, &LSN(pagep), &argp->lsn);
2401 	CHECK_ABORT(file_dbp->env, op, cmp_n, &LSN(pagep), lsnp);
2402 
2403 	if (cmp_p == 0 && DB_REDO(op)) {
2404 		/*
2405 		 * When pg_copy is set, we are copying onto a new page.
2406 		 */
2407 		DB_ASSERT(env, !argp->pg_copy || NUM_ENT(pagep) == 0);
2408 		REC_DIRTY(mpf, ip, dbc->priority, &pagep);
2409 		if (argp->pg_copy) {
2410 			if (argp->data.size == 0) {
2411 				memcpy(pagep, argp->hdr.data, argp->hdr.size);
2412 				pagep->pgno = argp->pgno;
2413 				goto do_lsn;
2414 			}
2415 			P_INIT(pagep, file_dbp->pgsize, pagep->pgno,
2416 			     PREV_PGNO(argp->hdr.data),
2417 			     NEXT_PGNO(argp->hdr.data),
2418 			     LEVEL(argp->hdr.data), TYPE(argp->hdr.data));
2419 		}
2420 		if (TYPE(pagep) == P_OVERFLOW) {
2421 			OV_REF(pagep) = OV_REF(argp->hdr.data);
2422 			OV_LEN(pagep) = OV_LEN(argp->hdr.data);
2423 			bp = (u_int8_t *)pagep + P_OVERHEAD(file_dbp);
2424 			memcpy(bp, argp->data.data, argp->data.size);
2425 		} else {
2426 			/* Copy the data segment. */
2427 			bp = (u_int8_t *)pagep +
2428 			     (db_indx_t)(HOFFSET(pagep) - argp->data.size);
2429 			memcpy(bp, argp->data.data, argp->data.size);
2430 
2431 			/* Copy index table offset past the current entries. */
2432 			pinp = P_INP(file_dbp, pagep) + NUM_ENT(pagep);
2433 			ninp = P_INP(file_dbp, argp->hdr.data);
2434 			for (i = 0; i < NUM_ENT(argp->hdr.data); i++)
2435 				*pinp++ = *ninp++
2436 				      - (file_dbp->pgsize - HOFFSET(pagep));
2437 			HOFFSET(pagep) -= argp->data.size;
2438 			NUM_ENT(pagep) += i;
2439 		}
2440 do_lsn:		pagep->lsn = *lsnp;
2441 		if (op == DB_TXN_APPLY) {
2442 			/*
2443 			 * If applying to an active system we must bump
2444 			 * the revision number so that the db will get
2445 			 * reopened.  We also need to move the handle
2446 			 * locks.  Note that the dbp will not have a
2447 			 * locker in a replication client apply thread.
2448 			 */
2449 			if (file_dbp->type == DB_HASH) {
2450 				if (argp->npgno == file_dbp->meta_pgno)
2451 					file_dbp->mpf->mfp->revision++;
2452 			} else {
2453 				bt = file_dbp->bt_internal;
2454 				if (argp->npgno == bt->bt_meta ||
2455 				    argp->npgno == bt->bt_root)
2456 					file_dbp->mpf->mfp->revision++;
2457 			}
2458 			if (argp->npgno == file_dbp->meta_pgno) {
2459 				F_CLR(file_dbp, DB_AM_RECOVER);
2460 				if ((ret = __fop_lock_handle(file_dbp->env,
2461 				    file_dbp, dbc->locker, DB_LOCK_READ,
2462 				    NULL, 0)) != 0)
2463 					goto err;
2464 				handle_lock = file_dbp->handle_lock;
2465 
2466 				file_dbp->meta_pgno = argp->pgno;
2467 				if ((ret = __fop_lock_handle(file_dbp->env,
2468 				    file_dbp, dbc->locker, DB_LOCK_READ,
2469 				    NULL, 0)) != 0)
2470 					goto err;
2471 
2472 				/* Move the other handles to the new lock. */
2473 				ret = __lock_change(file_dbp->env,
2474 				    &handle_lock, &file_dbp->handle_lock);
2475 
2476 err:				memset(&request, 0, sizeof(request));
2477 				request.op = DB_LOCK_PUT_ALL;
2478 				if ((t_ret = __lock_vec(
2479 				    file_dbp->env, dbc->locker,
2480 				    0, &request, 1, NULL)) != 0 && ret == 0)
2481 					ret = t_ret;
2482 				F_SET(file_dbp, DB_AM_RECOVER);
2483 				if (ret != 0)
2484 					goto out;
2485 			}
2486 		}
2487 
2488 	} else if (cmp_n == 0 && !DB_REDO(op)) {
2489 		REC_DIRTY(mpf, ip, dbc->priority, &pagep);
2490 		if (TYPE(pagep) == P_OVERFLOW) {
2491 			HOFFSET(pagep) = file_dbp->pgsize;
2492 			goto setlsn;
2493 		}
2494 
2495 		if (argp->pg_copy) {
2496 			/* The page was empty when we started. */
2497 			P_INIT(pagep, file_dbp->pgsize,
2498 			    pagep->pgno, PGNO_INVALID,
2499 			    PGNO_INVALID, 0, TYPE(argp->hdr.data));
2500 			goto setlsn;
2501 		}
2502 
2503 		/*
2504 		 * Since logging is logical at the page level we cannot just
2505 		 * truncate the data space.  Delete the proper number of items
2506 		 * from the logical end of the page.
2507 		 */
2508 		for (i = 0; i < NUM_ENT(argp->hdr.data); i++) {
2509 			indx = NUM_ENT(pagep) - 1;
2510 			if (TYPE(pagep) == P_LBTREE && indx != 0 &&
2511 			     P_INP(file_dbp, pagep)[indx] ==
2512 			     P_INP(file_dbp, pagep)[indx - P_INDX]) {
2513 				NUM_ENT(pagep)--;
2514 				continue;
2515 			}
2516 			switch (TYPE(pagep)) {
2517 			case P_LBTREE:
2518 			case P_LRECNO:
2519 			case P_LDUP:
2520 				bk = GET_BKEYDATA(file_dbp, pagep, indx);
2521 				size = BITEM_SIZE(bk);
2522 				break;
2523 
2524 			case P_IBTREE:
2525 				size = BINTERNAL_SIZE(
2526 				     GET_BINTERNAL(file_dbp, pagep, indx)->len);
2527 				break;
2528 			case P_IRECNO:
2529 				size = RINTERNAL_SIZE;
2530 				break;
2531 			case P_HASH:
2532 				size = LEN_HITEM(file_dbp,
2533 				    pagep, file_dbp->pgsize, indx);
2534 				break;
2535 			default:
2536 				ret = __db_pgfmt(env, PGNO(pagep));
2537 				goto out;
2538 			}
2539 			if ((ret = __db_ditem(dbc, pagep, indx, size)) != 0)
2540 				goto out;
2541 		}
2542 setlsn:		pagep->lsn = argp->lsn;
2543 	}
2544 
2545 	if ((ret = __memp_fput(mpf, ip, pagep, dbc->priority)) != 0)
2546 		goto out;
2547 
2548 next:	if ((ret = __memp_fget(mpf, &argp->npgno, ip, NULL, 0, &pagep)) != 0) {
2549 		if (ret != DB_PAGE_NOTFOUND) {
2550 			ret = __db_pgerr(file_dbp, argp->pgno, ret);
2551 			goto out;
2552 		} else
2553 			goto done;
2554 	}
2555 
2556 	cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
2557 	cmp_p = LOG_COMPARE(&LSN(pagep), &argp->nlsn);
2558 	CHECK_LSN(file_dbp->env, op, cmp_p, &LSN(pagep), &argp->nlsn);
2559 
2560 	if (cmp_p == 0 && DB_REDO(op)) {
2561 		/* Need to truncate the page. */
2562 		REC_DIRTY(mpf, ip, dbc->priority, &pagep);
2563 		HOFFSET(pagep) = file_dbp->pgsize;
2564 		NUM_ENT(pagep) = 0;
2565 		pagep->lsn = *lsnp;
2566 	} else if (cmp_n == 0 && !DB_REDO(op)) {
2567 		/* Need to put the data back on the page. */
2568 		REC_DIRTY(mpf, ip, dbc->priority, &pagep);
2569 		if (TYPE(pagep) == P_OVERFLOW) {
2570 			OV_REF(pagep) = OV_REF(argp->hdr.data);
2571 			OV_LEN(pagep) = OV_LEN(argp->hdr.data);
2572 			bp = (u_int8_t *)pagep + P_OVERHEAD(file_dbp);
2573 			memcpy(bp, argp->data.data, argp->data.size);
2574 		} else {
2575 			bp = (u_int8_t *)pagep +
2576 			     (db_indx_t)(HOFFSET(pagep) - argp->data.size);
2577 			memcpy(bp, argp->data.data, argp->data.size);
2578 
2579 			if (argp->pg_copy)
2580 				memcpy(pagep, argp->hdr.data, argp->hdr.size);
2581 			else {
2582 				/* Copy index table. */
2583 				pinp = P_INP(file_dbp, pagep) + NUM_ENT(pagep);
2584 				ninp = P_INP(file_dbp, argp->hdr.data);
2585 				for (i = 0; i < NUM_ENT(argp->hdr.data); i++)
2586 					*pinp++ = *ninp++;
2587 				HOFFSET(pagep) -= argp->data.size;
2588 				NUM_ENT(pagep) += i;
2589 			}
2590 		}
2591 		pagep->lsn = argp->nlsn;
2592 		if (op == DB_TXN_ABORT) {
2593 			/*
2594 			 * If we are undoing a meta/root page move we must
2595 			 * bump the revision number. Put the handle
2596 			 * locks back to their original state if we
2597 			 * moved the metadata page.
2598 			 */
2599 			i = 0;
2600 			if (file_dbp->type == DB_HASH) {
2601 				ht = file_dbp->h_internal;
2602 				if (argp->pgno == ht->meta_pgno) {
2603 					ht->meta_pgno = argp->npgno;
2604 					file_dbp->mpf->mfp->revision++;
2605 					i = 1;
2606 				}
2607 			} else {
2608 				bt = file_dbp->bt_internal;
2609 				if (argp->pgno == bt->bt_meta) {
2610 					file_dbp->mpf->mfp->revision++;
2611 					bt->bt_meta = argp->npgno;
2612 					i = 1;
2613 				} else if (argp->pgno == bt->bt_root) {
2614 					file_dbp->mpf->mfp->revision++;
2615 					bt->bt_root = argp->npgno;
2616 				}
2617 			}
2618 			if (argp->pgno == file_dbp->meta_pgno)
2619 				file_dbp->meta_pgno = argp->npgno;
2620 
2621 			/*
2622 			 * If we detected a metadata page above, move
2623 			 * the handle locks to the new page.
2624 			 */
2625 			if (i == 1) {
2626 				handle_lock = file_dbp->handle_lock;
2627 				if ((ret = __fop_lock_handle(file_dbp->env,
2628 				    file_dbp, file_dbp->locker, DB_LOCK_READ,
2629 				    NULL, 0)) != 0)
2630 					goto out;
2631 
2632 				/* Move the other handles to the new lock. */
2633 				if ((ret = __lock_change(file_dbp->env,
2634 				    &handle_lock, &file_dbp->handle_lock)) != 0)
2635 					goto out;
2636 			}
2637 		}
2638 	}
2639 
2640 	if ((ret = __memp_fput(mpf,
2641 	     ip, pagep, dbc->priority)) != 0)
2642 		goto out;
2643 done:
2644 	*lsnp = argp->prev_lsn;
2645 	ret = 0;
2646 
2647 out:	REC_CLOSE;
2648 }
2649 
2650 /*
2651  * __db_pgno_recover --
2652  *	Recovery function for page number replacment.
2653  *
2654  * PUBLIC: int __db_pgno_recover
2655  * PUBLIC:   __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
2656  */
2657 int
__db_pgno_recover(env,dbtp,lsnp,op,info)2658 __db_pgno_recover(env, dbtp, lsnp, op, info)
2659 	ENV *env;
2660 	DBT *dbtp;
2661 	DB_LSN *lsnp;
2662 	db_recops op;
2663 	void *info;
2664 {
2665 	BINTERNAL *bi;
2666 	__db_pgno_args *argp;
2667 	DB_THREAD_INFO *ip;
2668 	DB *file_dbp;
2669 	DBC *dbc;
2670 	DB_MPOOLFILE *mpf;
2671 	PAGE *pagep, *npagep;
2672 	db_pgno_t pgno, *pgnop;
2673 	int cmp_n, cmp_p, ret;
2674 
2675 	ip = ((DB_TXNHEAD *)info)->thread_info;
2676 	REC_PRINT(__db_pgno_print);
2677 	REC_INTRO(__db_pgno_read, ip, 0);
2678 
2679 	REC_FGET(mpf, ip, argp->pgno, &pagep, done);
2680 
2681 	cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
2682 	cmp_p = LOG_COMPARE(&LSN(pagep), &argp->lsn);
2683 	CHECK_LSN(file_dbp->env, op, cmp_p, &LSN(pagep), &argp->lsn);
2684 	CHECK_ABORT(file_dbp->env, op, cmp_n, &LSN(pagep), lsnp);
2685 
2686 	if ((cmp_p == 0 && DB_REDO(op)) || (cmp_n == 0 && !DB_REDO(op))) {
2687 		switch (TYPE(pagep)) {
2688 		case P_IBTREE:
2689 			/*
2690 			 * An internal record can have both a overflow
2691 			 * and child pointer.  Fetch the page to see
2692 			 * which it is.
2693 			 */
2694 			bi = GET_BINTERNAL(file_dbp, pagep, argp->indx);
2695 			if (B_TYPE(bi->type) == B_OVERFLOW) {
2696 				REC_FGET(mpf, ip, argp->npgno, &npagep, out);
2697 
2698 				if (TYPE(npagep) == P_OVERFLOW)
2699 					pgnop =
2700 					     &((BOVERFLOW *)(bi->data))->pgno;
2701 				else
2702 					pgnop = &bi->pgno;
2703 				if ((ret = __memp_fput(mpf, ip,
2704 				    npagep, file_dbp->priority)) != 0)
2705 					goto out;
2706 				break;
2707 			}
2708 			pgnop = &bi->pgno;
2709 			break;
2710 		case P_IRECNO:
2711 			pgnop =
2712 			     &GET_RINTERNAL(file_dbp, pagep, argp->indx)->pgno;
2713 			break;
2714 		case P_HASH:
2715 			pgnop = &pgno;
2716 			break;
2717 		default:
2718 			pgnop =
2719 			     &GET_BOVERFLOW(file_dbp, pagep, argp->indx)->pgno;
2720 			break;
2721 		}
2722 
2723 		if (DB_REDO(op)) {
2724 			/* Need to redo update described. */
2725 			REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
2726 			*pgnop = argp->npgno;
2727 			pagep->lsn = *lsnp;
2728 		} else {
2729 			REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
2730 			*pgnop = argp->opgno;
2731 			pagep->lsn = argp->lsn;
2732 		}
2733 		if (TYPE(pagep) == P_HASH)
2734 			memcpy(HOFFDUP_PGNO(P_ENTRY(file_dbp,
2735 			    pagep, argp->indx)), pgnop, sizeof(db_pgno_t));
2736 	}
2737 
2738 	if ((ret = __memp_fput(mpf, ip, pagep, file_dbp->priority)) != 0)
2739 		goto out;
2740 
2741 done:
2742 	*lsnp = argp->prev_lsn;
2743 	ret = 0;
2744 
2745 out:	REC_CLOSE;
2746 }
2747 
2748 /*
2749  * __db_pglist_swap -- swap a list of freelist pages.
2750  * PUBLIC: void __db_pglist_swap __P((u_int32_t, void *));
2751  */
2752 void
__db_pglist_swap(size,list)2753 __db_pglist_swap(size, list)
2754 	u_int32_t size;
2755 	void *list;
2756 {
2757 	db_pglist_t *lp;
2758 	u_int32_t nelem;
2759 
2760 	nelem = size / sizeof(db_pglist_t);
2761 
2762 	lp = (db_pglist_t *)list;
2763 	while (nelem-- > 0) {
2764 		P_32_SWAP(&lp->pgno);
2765 		P_32_SWAP(&lp->lsn.file);
2766 		P_32_SWAP(&lp->lsn.offset);
2767 		lp++;
2768 	}
2769 }
2770 
2771 /*
2772  * __db_pglist_print -- print a list of freelist pages.
2773  * PUBLIC: void __db_pglist_print __P((ENV *, DB_MSGBUF *, DBT *));
2774  */
2775 void
__db_pglist_print(env,mbp,list)2776 __db_pglist_print(env, mbp, list)
2777 	ENV *env;
2778 	DB_MSGBUF *mbp;
2779 	DBT *list;
2780 {
2781 	db_pglist_t *lp;
2782 	u_int32_t nelem;
2783 
2784 	nelem = list->size / sizeof(db_pglist_t);
2785 	lp = (db_pglist_t *)list->data;
2786 	__db_msgadd(env, mbp, "\t");
2787 	while (nelem-- > 0) {
2788 		__db_msgadd(env, mbp, "%lu [%lu][%lu]", (u_long)lp->pgno,
2789 		    (u_long)lp->lsn.file, (u_long)lp->lsn.offset);
2790 		if (nelem % 4 == 0)
2791 			__db_msgadd(env, mbp, "\n\t");
2792 		else
2793 			__db_msgadd(env, mbp, " ");
2794 		lp++;
2795 	}
2796 }
2797