1 /*-
2  * See the file LICENSE for redistribution information.
3  *
4  * Copyright (c) 1996, 2013 Oracle and/or its affiliates.  All rights reserved.
5  *
6  * $Id$
7  */
8 
9 #include "db_config.h"
10 
11 #include "db_int.h"
12 #include "dbinc/db_page.h"
13 #include "dbinc/btree.h"
14 #include "dbinc/lock.h"
15 #include "dbinc/mp.h"
16 
17 #define	IS_BTREE_PAGE(pagep)						\
18 	(TYPE(pagep) == P_IBTREE ||					\
19 	 TYPE(pagep) == P_LBTREE || TYPE(pagep) == P_LDUP)
20 
21 /*
22  * __bam_split_recover --
23  *	Recovery function for split.
24  *
25  * PUBLIC: int __bam_split_recover
26  * PUBLIC:   __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
27  */
28 int
__bam_split_recover(env,dbtp,lsnp,op,info)29 __bam_split_recover(env, dbtp, lsnp, op, info)
30 	ENV *env;
31 	DBT *dbtp;
32 	DB_LSN *lsnp;
33 	db_recops op;
34 	void *info;
35 {
36 	__bam_split_args *argp;
37 	DB_THREAD_INFO *ip;
38 	DB *file_dbp;
39 	DBC *dbc;
40 	DB_LSN *plsnp;
41 	DB_MPOOLFILE *mpf;
42 	PAGE *_lp, *lp, *np, *pp, *_rp, *rp, *sp;
43 	db_pgno_t pgno, parent_pgno;
44 	u_int32_t opflags, size;
45 	int cmp, l_update, p_update, r_update, ret, rootsplit, t_ret;
46 
47 	ip = ((DB_TXNHEAD *)info)->thread_info;
48 	REC_PRINT(__bam_split_print);
49 
50 	_lp = lp = np = pp = _rp = rp = NULL;
51 	sp = NULL;
52 
53 	REC_INTRO(__bam_split_read, ip, 0);
54 
55 	opflags = OP_MODE_GET(argp->opflags);
56 	if ((ret = __db_cursor_int(file_dbp, ip, NULL,
57 	    (opflags & SPL_RECNO) ?  DB_RECNO : DB_BTREE,
58 	    PGNO_INVALID, DB_RECOVER, NULL, &dbc)) != 0)
59 		goto out;
60 	if (opflags & SPL_NRECS)
61 		F_SET((BTREE_CURSOR *)dbc->internal, C_RECNUM);
62 
63 	/*
64 	 * There are two kinds of splits that we have to recover from.  The
65 	 * first is a root-page split, where the root page is split from a
66 	 * leaf page into an internal page and two new leaf pages are created.
67 	 * The second is where a page is split into two pages, and a new key
68 	 * is inserted into the parent page.
69 	 *
70 	 * DBTs are not aligned in log records, so we need to copy the page
71 	 * so that we can access fields within it throughout this routine.
72 	 * Although we could hardcode the unaligned copies in this routine,
73 	 * we will be calling into regular btree functions with this page,
74 	 * so it's got to be aligned.  Copying it into allocated memory is
75 	 * the only way to guarantee this.
76 	 */
77 	if ((ret = __os_malloc(env, argp->pg.size, &sp)) != 0)
78 		goto out;
79 	memcpy(sp, argp->pg.data, argp->pg.size);
80 
81 	pgno = PGNO(sp);
82 	parent_pgno = argp->ppgno;
83 	rootsplit = parent_pgno == pgno;
84 
85 	/* Get the pages going down the tree. */
86 	REC_FGET(mpf, ip, parent_pgno, &pp, left);
87 left:	REC_FGET(mpf, ip, argp->left, &lp, right);
88 right:	REC_FGET(mpf, ip, argp->right, &rp, redo);
89 
90 redo:	if (DB_REDO(op)) {
91 		l_update = r_update = p_update = 0;
92 		/*
93 		 * Decide if we need to resplit the page.
94 		 *
95 		 * If this is a root split, then the root has to exist unless
96 		 * we have truncated it due to a future deallocation.
97 		 */
98 		if (pp != NULL) {
99 			if (rootsplit)
100 				plsnp = &LSN(argp->pg.data);
101 			else
102 				plsnp = &argp->plsn;
103 			cmp = LOG_COMPARE(&LSN(pp), plsnp);
104 			CHECK_LSN(env, op, cmp, &LSN(pp), plsnp);
105 			if (cmp == 0)
106 				p_update = 1;
107 		}
108 
109 		if (lp != NULL) {
110 			cmp = LOG_COMPARE(&LSN(lp), &argp->llsn);
111 			CHECK_LSN(env, op, cmp, &LSN(lp), &argp->llsn);
112 			if (cmp == 0)
113 				l_update = 1;
114 		}
115 
116 		if (rp != NULL) {
117 			cmp = LOG_COMPARE(&LSN(rp), &argp->rlsn);
118 			CHECK_LSN(env, op, cmp, &LSN(rp), &argp->rlsn);
119 			if (cmp == 0)
120 				r_update = 1;
121 		}
122 
123 		if (!p_update && !l_update && !r_update)
124 			goto check_next;
125 
126 		/* Allocate and initialize new left/right child pages. */
127 		if ((ret = __os_malloc(env, file_dbp->pgsize, &_lp)) != 0 ||
128 		    (ret = __os_malloc(env, file_dbp->pgsize, &_rp)) != 0)
129 			goto out;
130 		if (rootsplit) {
131 			P_INIT(_lp, file_dbp->pgsize, argp->left,
132 			    PGNO_INVALID,
133 			    ISINTERNAL(sp) ? PGNO_INVALID : argp->right,
134 			    LEVEL(sp), TYPE(sp));
135 			P_INIT(_rp, file_dbp->pgsize, argp->right,
136 			    ISINTERNAL(sp) ?  PGNO_INVALID : argp->left,
137 			    PGNO_INVALID, LEVEL(sp), TYPE(sp));
138 		} else {
139 			P_INIT(_lp, file_dbp->pgsize, PGNO(sp),
140 			    ISINTERNAL(sp) ? PGNO_INVALID : PREV_PGNO(sp),
141 			    ISINTERNAL(sp) ? PGNO_INVALID : argp->right,
142 			    LEVEL(sp), TYPE(sp));
143 			P_INIT(_rp, file_dbp->pgsize, argp->right,
144 			    ISINTERNAL(sp) ? PGNO_INVALID : sp->pgno,
145 			    ISINTERNAL(sp) ? PGNO_INVALID : NEXT_PGNO(sp),
146 			    LEVEL(sp), TYPE(sp));
147 		}
148 
149 		/* Split the page. */
150 		if ((ret = __bam_copy(file_dbp, sp, _lp, 0, argp->indx)) != 0 ||
151 		    (ret = __bam_copy(file_dbp, sp, _rp, argp->indx,
152 		    NUM_ENT(sp))) != 0)
153 			goto out;
154 
155 		if (l_update) {
156 			REC_DIRTY(mpf, ip, file_dbp->priority, &lp);
157 			memcpy(lp, _lp, file_dbp->pgsize);
158 			lp->lsn = *lsnp;
159 		}
160 
161 		if (r_update) {
162 			REC_DIRTY(mpf, ip, file_dbp->priority, &rp);
163 			memcpy(rp, _rp, file_dbp->pgsize);
164 			rp->lsn = *lsnp;
165 		}
166 
167 		/*
168 		 * Drop the latches on the lower level pages before
169 		 * getting an exclusive latch on the higher level page.
170 		 */
171 		if (lp != NULL && (ret = __memp_fput(mpf,
172 		    ip, lp, file_dbp->priority)) && ret == 0)
173 			goto out;
174 		lp = NULL;
175 		if (rp != NULL && (ret = __memp_fput(mpf,
176 		    ip, rp, file_dbp->priority)) && ret == 0)
177 			goto out;
178 		rp = NULL;
179 		/*
180 		 * If the parent page is wrong, update it.
181 		 * For recno the insert into an existing parent
182 		 * was logged separately.
183 		 * If it is a root page update initialize the page and
184 		 * update the record counts if needed.
185 		 * Then insert the record for the right hand child page.
186 		 */
187 		if (p_update) {
188 			REC_DIRTY(mpf, ip, file_dbp->priority, &pp);
189 
190 			if (rootsplit) {
191 				P_INIT(pp, file_dbp->pgsize, pgno, PGNO_INVALID,
192 				    PGNO_INVALID, _lp->level + 1,
193 				    (opflags & SPL_RECNO) ?
194 				    P_IRECNO : P_IBTREE);
195 				if (opflags & SPL_NRECS) {
196 					RE_NREC_SET(pp,
197 					    __bam_total(file_dbp, _lp) +
198 					    __bam_total(file_dbp, _rp));
199 				}
200 				if ((ret = __db_pitem_nolog(dbc, pp,
201 				    argp->pindx, argp->pentry.size,
202 				    &argp->pentry, NULL)) != 0)
203 					goto out;
204 
205 			} else if (opflags & SPL_NRECS)
206 				goto recno;
207 			if ((ret = __db_pitem_nolog(dbc, pp, argp->pindx + 1,
208 			    argp->rentry.size, &argp->rentry, NULL)) != 0)
209 				goto out;
210 recno:			pp->lsn = *lsnp;
211 		}
212 
213 check_next:	/*
214 		 * Finally, redo the next-page link if necessary.  This is of
215 		 * interest only if it wasn't a root split -- inserting a new
216 		 * page in the tree requires that any following page have its
217 		 * previous-page pointer updated to our new page.  The next
218 		 * page must exist because we're redoing the operation.
219 		 */
220 		if (!rootsplit && argp->npgno != PGNO_INVALID) {
221 			REC_FGET(mpf, ip, argp->npgno, &np, done);
222 			cmp = LOG_COMPARE(&LSN(np), &argp->nlsn);
223 			CHECK_LSN(env, op, cmp, &LSN(np), &argp->nlsn);
224 			if (cmp == 0) {
225 				REC_DIRTY(mpf, ip, file_dbp->priority, &np);
226 				PREV_PGNO(np) = argp->right;
227 				np->lsn = *lsnp;
228 			}
229 		}
230 	} else {
231 		/*
232 		 * If it's a root split and the left child ever existed, update
233 		 * its LSN.   Otherwise its the split page. If
234 		 * right child ever existed, root split or not, update its LSN.
235 		 * The undo of the page allocation(s) will restore them to the
236 		 * free list.
237 		 */
238 		if (rootsplit && lp != NULL &&
239 		    LOG_COMPARE(lsnp, &LSN(lp)) == 0) {
240 			REC_DIRTY(mpf, ip, file_dbp->priority, &lp);
241 			lp->lsn = argp->llsn;
242 		}
243 		if (rp != NULL &&
244 		    LOG_COMPARE(lsnp, &LSN(rp)) == 0) {
245 			REC_DIRTY(mpf, ip, file_dbp->priority, &rp);
246 			rp->lsn = argp->rlsn;
247 		}
248 		/*
249 		 * Drop the lower level pages before getting an exclusive
250 		 * latch on  the parent.
251 		 */
252 		if (rp != NULL && (ret = __memp_fput(mpf,
253 		    ip, rp, file_dbp->priority)))
254 			goto out;
255 		rp = NULL;
256 
257 		/*
258 		 * Check the state of the split page.  If its a rootsplit
259 		 * then that's the rootpage otherwise its the left page.
260 		 */
261 		if (rootsplit) {
262 			DB_ASSERT(env, pgno == argp->ppgno);
263 			if (lp != NULL && (ret = __memp_fput(mpf, ip,
264 			     lp, file_dbp->priority)) != 0)
265 				goto out;
266 			lp = pp;
267 			pp = NULL;
268 		}
269 		if (lp != NULL) {
270 			cmp = LOG_COMPARE(lsnp, &LSN(lp));
271 			CHECK_ABORT(env, op, cmp, &LSN(lp), lsnp);
272 			if (cmp == 0) {
273 				REC_DIRTY(mpf, ip, file_dbp->priority, &lp);
274 				memcpy(lp, argp->pg.data, argp->pg.size);
275 				if ((ret = __memp_fput(mpf,
276 				    ip, lp, file_dbp->priority)))
277 					goto out;
278 				lp = NULL;
279 			}
280 		}
281 
282 		/*
283 		 * Next we can update the parent removing the new index.
284 		 * If this has record numbers, then we log this separately.
285 		 */
286 		if (pp != NULL) {
287 			DB_ASSERT(env, !rootsplit);
288 			cmp = LOG_COMPARE(lsnp, &LSN(pp));
289 			CHECK_ABORT(env, op, cmp, &LSN(pp), lsnp);
290 			if (cmp == 0) {
291 				REC_DIRTY(mpf, ip, file_dbp->priority, &pp);
292 				if ((opflags & SPL_NRECS) == 0) {
293 					size  = BINTERNAL_SIZE(
294 					    GET_BINTERNAL(file_dbp,
295 					    pp, argp->pindx + 1)->len);
296 
297 					if ((ret = __db_ditem(dbc, pp,
298 					    argp->pindx + 1, size)) != 0)
299 						goto out;
300 				}
301 				pp->lsn = argp->plsn;
302 			}
303 		}
304 
305 		/*
306 		 * Finally, undo the next-page link if necessary.  This is of
307 		 * interest only if it wasn't a root split -- inserting a new
308 		 * page in the tree requires that any following page have its
309 		 * previous-page pointer updated to our new page.  Since it's
310 		 * possible that the next-page never existed, we ignore it as
311 		 * if there's nothing to undo.
312 		 */
313 		if (!rootsplit && argp->npgno != PGNO_INVALID) {
314 			if ((ret = __memp_fget(mpf, &argp->npgno,
315 			    ip, NULL, DB_MPOOL_EDIT, &np)) != 0) {
316 				np = NULL;
317 				goto done;
318 			}
319 			if (LOG_COMPARE(lsnp, &LSN(np)) == 0) {
320 				REC_DIRTY(mpf, ip, file_dbp->priority, &np);
321 				PREV_PGNO(np) = argp->left;
322 				np->lsn = argp->nlsn;
323 			}
324 		}
325 	}
326 
327 done:	*lsnp = argp->prev_lsn;
328 	ret = 0;
329 
330 out:	/* Free any pages that are left. */
331 	if (lp != NULL && (t_ret = __memp_fput(mpf,
332 	    ip, lp, file_dbp->priority)) != 0 && ret == 0)
333 		ret = t_ret;
334 	if (np != NULL && (t_ret = __memp_fput(mpf,
335 	    ip, np, file_dbp->priority)) != 0 && ret == 0)
336 		ret = t_ret;
337 	if (rp != NULL && (t_ret = __memp_fput(mpf,
338 	     ip, rp, file_dbp->priority)) != 0 && ret == 0)
339 		ret = t_ret;
340 	if (pp != NULL && (t_ret = __memp_fput(mpf,
341 	    ip, pp, file_dbp->priority)) != 0 && ret == 0)
342 		ret = t_ret;
343 
344 	/* Free any allocated space. */
345 	if (_lp != NULL)
346 		__os_free(env, _lp);
347 	if (_rp != NULL)
348 		__os_free(env, _rp);
349 	if (sp != NULL)
350 		__os_free(env, sp);
351 
352 	REC_CLOSE;
353 }
354 /*
355  * __bam_split_48_recover --
356  *	Recovery function for split.
357  *
358  * PUBLIC: int __bam_split_48_recover
359  * PUBLIC:   __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
360  */
361 int
__bam_split_48_recover(env,dbtp,lsnp,op,info)362 __bam_split_48_recover(env, dbtp, lsnp, op, info)
363 	ENV *env;
364 	DBT *dbtp;
365 	DB_LSN *lsnp;
366 	db_recops op;
367 	void *info;
368 {
369 	__bam_split_48_args *argp;
370 	DB_THREAD_INFO *ip;
371 	DB *file_dbp;
372 	DBC *dbc;
373 	DB_LSN *plsnp;
374 	DB_MPOOLFILE *mpf;
375 	PAGE *_lp, *lp, *np, *pp, *_rp, *rp, *sp;
376 	db_pgno_t pgno, parent_pgno;
377 	u_int32_t ptype, size;
378 	int cmp, l_update, p_update, r_update, ret, rootsplit, t_ret;
379 
380 	ip = ((DB_TXNHEAD *)info)->thread_info;
381 	REC_PRINT(__bam_split_print);
382 
383 	_lp = lp = np = pp = _rp = rp = NULL;
384 	sp = NULL;
385 
386 	REC_INTRO(__bam_split_48_read, ip, 0);
387 
388 	if ((ret = __db_cursor_int(file_dbp, ip, NULL,
389 	    (argp->opflags & SPL_RECNO) ?  DB_RECNO : DB_BTREE,
390 	    PGNO_INVALID, DB_RECOVER, NULL, &dbc)) != 0)
391 		goto out;
392 	if (argp->opflags & SPL_NRECS)
393 		F_SET((BTREE_CURSOR *)dbc->internal, C_RECNUM);
394 
395 	/*
396 	 * There are two kinds of splits that we have to recover from.  The
397 	 * first is a root-page split, where the root page is split from a
398 	 * leaf page into an internal page and two new leaf pages are created.
399 	 * The second is where a page is split into two pages, and a new key
400 	 * is inserted into the parent page.
401 	 *
402 	 * DBTs are not aligned in log records, so we need to copy the page
403 	 * so that we can access fields within it throughout this routine.
404 	 * Although we could hardcode the unaligned copies in this routine,
405 	 * we will be calling into regular btree functions with this page,
406 	 * so it's got to be aligned.  Copying it into allocated memory is
407 	 * the only way to guarantee this.
408 	 */
409 	if ((ret = __os_malloc(env, argp->pg.size, &sp)) != 0)
410 		goto out;
411 	memcpy(sp, argp->pg.data, argp->pg.size);
412 
413 	pgno = PGNO(sp);
414 	parent_pgno = argp->ppgno;
415 	rootsplit = parent_pgno == pgno;
416 
417 	/* Get the pages going down the tree. */
418 	REC_FGET(mpf, ip, parent_pgno, &pp, left);
419 left:	REC_FGET(mpf, ip, argp->left, &lp, right);
420 right:	REC_FGET(mpf, ip, argp->right, &rp, redo);
421 
422 redo:	if (DB_REDO(op)) {
423 		l_update = r_update = p_update = 0;
424 		/*
425 		 * Decide if we need to resplit the page.
426 		 *
427 		 * If this is a root split, then the root has to exist unless
428 		 * we have truncated it due to a future deallocation.
429 		 */
430 		if (pp != NULL) {
431 			if (rootsplit)
432 				plsnp = &LSN(argp->pg.data);
433 			else
434 				plsnp = &argp->plsn;
435 			cmp = LOG_COMPARE(&LSN(pp), plsnp);
436 			CHECK_LSN(env, op, cmp, &LSN(pp), plsnp);
437 			if (cmp == 0)
438 				p_update = 1;
439 		}
440 
441 		if (lp != NULL) {
442 			cmp = LOG_COMPARE(&LSN(lp), &argp->llsn);
443 			CHECK_LSN(env, op, cmp, &LSN(lp), &argp->llsn);
444 			if (cmp == 0)
445 				l_update = 1;
446 		}
447 
448 		if (rp != NULL) {
449 			cmp = LOG_COMPARE(&LSN(rp), &argp->rlsn);
450 			CHECK_LSN(env, op, cmp, &LSN(rp), &argp->rlsn);
451 			if (cmp == 0)
452 				r_update = 1;
453 		}
454 
455 		if (!p_update && !l_update && !r_update)
456 			goto check_next;
457 
458 		/* Allocate and initialize new left/right child pages. */
459 		if ((ret = __os_malloc(env, file_dbp->pgsize, &_lp)) != 0 ||
460 		    (ret = __os_malloc(env, file_dbp->pgsize, &_rp)) != 0)
461 			goto out;
462 		if (rootsplit) {
463 			P_INIT(_lp, file_dbp->pgsize, argp->left,
464 			    PGNO_INVALID,
465 			    ISINTERNAL(sp) ? PGNO_INVALID : argp->right,
466 			    LEVEL(sp), TYPE(sp));
467 			P_INIT(_rp, file_dbp->pgsize, argp->right,
468 			    ISINTERNAL(sp) ?  PGNO_INVALID : argp->left,
469 			    PGNO_INVALID, LEVEL(sp), TYPE(sp));
470 		} else {
471 			P_INIT(_lp, file_dbp->pgsize, PGNO(sp),
472 			    ISINTERNAL(sp) ? PGNO_INVALID : PREV_PGNO(sp),
473 			    ISINTERNAL(sp) ? PGNO_INVALID : argp->right,
474 			    LEVEL(sp), TYPE(sp));
475 			P_INIT(_rp, file_dbp->pgsize, argp->right,
476 			    ISINTERNAL(sp) ? PGNO_INVALID : sp->pgno,
477 			    ISINTERNAL(sp) ? PGNO_INVALID : NEXT_PGNO(sp),
478 			    LEVEL(sp), TYPE(sp));
479 		}
480 
481 		/* Split the page. */
482 		if ((ret = __bam_copy(file_dbp, sp, _lp, 0, argp->indx)) != 0 ||
483 		    (ret = __bam_copy(file_dbp, sp, _rp, argp->indx,
484 		    NUM_ENT(sp))) != 0)
485 			goto out;
486 
487 		if (l_update) {
488 			REC_DIRTY(mpf, ip, file_dbp->priority, &lp);
489 			memcpy(lp, _lp, file_dbp->pgsize);
490 			lp->lsn = *lsnp;
491 		}
492 
493 		if (r_update) {
494 			REC_DIRTY(mpf, ip, file_dbp->priority, &rp);
495 			memcpy(rp, _rp, file_dbp->pgsize);
496 			rp->lsn = *lsnp;
497 		}
498 
499 		/*
500 		 * Drop the latches on the lower level pages before
501 		 * getting an exclusive latch on the higher level page.
502 		 */
503 		if (lp != NULL && (ret = __memp_fput(mpf,
504 		    ip, lp, file_dbp->priority)) && ret == 0)
505 			goto out;
506 		lp = NULL;
507 		if (rp != NULL && (ret = __memp_fput(mpf,
508 		    ip, rp, file_dbp->priority)) && ret == 0)
509 			goto out;
510 		rp = NULL;
511 		/*
512 		 * If the parent page is wrong, update it.
513 		 * Initialize the page.  If it is a root page update
514 		 * the record counts if needed and put the first record in.
515 		 * Then insert the record for the right hand child page.
516 		 */
517 		if (p_update) {
518 			REC_DIRTY(mpf, ip, file_dbp->priority, &pp);
519 			if (argp->opflags & SPL_RECNO)
520 				ptype = P_IRECNO;
521 			else
522 				ptype = P_IBTREE;
523 
524 			if (rootsplit) {
525 				P_INIT(pp, file_dbp->pgsize, pgno, PGNO_INVALID,
526 				    PGNO_INVALID, _lp->level + 1, ptype);
527 				if (argp->opflags & SPL_NRECS) {
528 					RE_NREC_SET(pp,
529 					    __bam_total(file_dbp, _lp) +
530 					    __bam_total(file_dbp, _rp));
531 				}
532 				if ((ret = __db_pitem_nolog(dbc, pp,
533 				    argp->pindx, argp->pentry.size,
534 				    &argp->pentry, NULL)) != 0)
535 					goto out;
536 
537 			}
538 			if ((ret = __db_pitem_nolog(dbc, pp, argp->pindx + 1,
539 			    argp->rentry.size, &argp->rentry, NULL)) != 0)
540 				goto out;
541 			pp->lsn = *lsnp;
542 		}
543 
544 check_next:	/*
545 		 * Finally, redo the next-page link if necessary.  This is of
546 		 * interest only if it wasn't a root split -- inserting a new
547 		 * page in the tree requires that any following page have its
548 		 * previous-page pointer updated to our new page.  The next
549 		 * page must exist because we're redoing the operation.
550 		 */
551 		if (!rootsplit && argp->npgno != PGNO_INVALID) {
552 			REC_FGET(mpf, ip, argp->npgno, &np, done);
553 			cmp = LOG_COMPARE(&LSN(np), &argp->nlsn);
554 			CHECK_LSN(env, op, cmp, &LSN(np), &argp->nlsn);
555 			if (cmp == 0) {
556 				REC_DIRTY(mpf, ip, file_dbp->priority, &np);
557 				PREV_PGNO(np) = argp->right;
558 				np->lsn = *lsnp;
559 			}
560 		}
561 	} else {
562 		/*
563 		 * If it's a root split and the left child ever existed, update
564 		 * its LSN.   Otherwise its the split page. If
565 		 * right child ever existed, root split or not, update its LSN.
566 		 * The undo of the page allocation(s) will restore them to the
567 		 * free list.
568 		 */
569 		if (rootsplit && lp != NULL &&
570 		    LOG_COMPARE(lsnp, &LSN(lp)) == 0) {
571 			REC_DIRTY(mpf, ip, file_dbp->priority, &lp);
572 			lp->lsn = argp->llsn;
573 		}
574 		if (rp != NULL &&
575 		    LOG_COMPARE(lsnp, &LSN(rp)) == 0) {
576 			REC_DIRTY(mpf, ip, file_dbp->priority, &rp);
577 			rp->lsn = argp->rlsn;
578 		}
579 		/*
580 		 * Drop the lower level pages before getting an exclusive
581 		 * latch on  the parent.
582 		 */
583 		if (rp != NULL && (ret = __memp_fput(mpf,
584 		    ip, rp, file_dbp->priority)))
585 			goto out;
586 		rp = NULL;
587 
588 		/*
589 		 * Check the state of the split page.  If its a rootsplit
590 		 * then that's the rootpage otherwise its the left page.
591 		 */
592 		if (rootsplit) {
593 			DB_ASSERT(env, pgno == argp->ppgno);
594 			if (lp != NULL && (ret = __memp_fput(mpf, ip,
595 			     lp, file_dbp->priority)) != 0)
596 				goto out;
597 			lp = pp;
598 			pp = NULL;
599 		}
600 		if (lp != NULL) {
601 			cmp = LOG_COMPARE(lsnp, &LSN(lp));
602 			CHECK_ABORT(env, op, cmp, &LSN(lp), lsnp);
603 			if (cmp == 0) {
604 				REC_DIRTY(mpf, ip, file_dbp->priority, &lp);
605 				memcpy(lp, argp->pg.data, argp->pg.size);
606 				if ((ret = __memp_fput(mpf,
607 				    ip, lp, file_dbp->priority)))
608 					goto out;
609 				lp = NULL;
610 			}
611 		}
612 
613 		/*
614 		 * Next we can update the parent removing the new index.
615 		 */
616 		if (pp != NULL) {
617 			DB_ASSERT(env, !rootsplit);
618 			cmp = LOG_COMPARE(lsnp, &LSN(pp));
619 			CHECK_ABORT(env, op, cmp, &LSN(pp), lsnp);
620 			if (cmp == 0) {
621 				REC_DIRTY(mpf, ip, file_dbp->priority, &pp);
622 				if (argp->opflags & SPL_RECNO)
623 					size = RINTERNAL_SIZE;
624 				else
625 					size  = BINTERNAL_SIZE(
626 					    GET_BINTERNAL(file_dbp,
627 					    pp, argp->pindx + 1)->len);
628 
629 				if ((ret = __db_ditem(dbc, pp,
630 				    argp->pindx + 1, size)) != 0)
631 					goto out;
632 				pp->lsn = argp->plsn;
633 			}
634 		}
635 
636 		/*
637 		 * Finally, undo the next-page link if necessary.  This is of
638 		 * interest only if it wasn't a root split -- inserting a new
639 		 * page in the tree requires that any following page have its
640 		 * previous-page pointer updated to our new page.  Since it's
641 		 * possible that the next-page never existed, we ignore it as
642 		 * if there's nothing to undo.
643 		 */
644 		if (!rootsplit && argp->npgno != PGNO_INVALID) {
645 			if ((ret = __memp_fget(mpf, &argp->npgno,
646 			    ip, NULL, DB_MPOOL_EDIT, &np)) != 0) {
647 				np = NULL;
648 				goto done;
649 			}
650 			if (LOG_COMPARE(lsnp, &LSN(np)) == 0) {
651 				REC_DIRTY(mpf, ip, file_dbp->priority, &np);
652 				PREV_PGNO(np) = argp->left;
653 				np->lsn = argp->nlsn;
654 			}
655 		}
656 	}
657 
658 done:	*lsnp = argp->prev_lsn;
659 	ret = 0;
660 
661 out:	/* Free any pages that are left. */
662 	if (lp != NULL && (t_ret = __memp_fput(mpf,
663 	    ip, lp, file_dbp->priority)) != 0 && ret == 0)
664 		ret = t_ret;
665 	if (np != NULL && (t_ret = __memp_fput(mpf,
666 	    ip, np, file_dbp->priority)) != 0 && ret == 0)
667 		ret = t_ret;
668 	if (rp != NULL && (t_ret = __memp_fput(mpf,
669 	     ip, rp, file_dbp->priority)) != 0 && ret == 0)
670 		ret = t_ret;
671 	if (pp != NULL && (t_ret = __memp_fput(mpf,
672 	    ip, pp, file_dbp->priority)) != 0 && ret == 0)
673 		ret = t_ret;
674 
675 	/* Free any allocated space. */
676 	if (_lp != NULL)
677 		__os_free(env, _lp);
678 	if (_rp != NULL)
679 		__os_free(env, _rp);
680 	if (sp != NULL)
681 		__os_free(env, sp);
682 
683 	REC_CLOSE;
684 }
685 /*
686  * __bam_split_recover --
687  *	Recovery function for split.
688  *
689  * PUBLIC: int __bam_split_42_recover
690  * PUBLIC:   __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
691  */
692 int
__bam_split_42_recover(env,dbtp,lsnp,op,info)693 __bam_split_42_recover(env, dbtp, lsnp, op, info)
694 	ENV *env;
695 	DBT *dbtp;
696 	DB_LSN *lsnp;
697 	db_recops op;
698 	void *info;
699 {
700 	__bam_split_42_args *argp;
701 	DB_THREAD_INFO *ip;
702 	DB *file_dbp;
703 	DBC *dbc;
704 	DB_MPOOLFILE *mpf;
705 	PAGE *_lp, *lp, *np, *pp, *_rp, *rp, *sp;
706 	db_pgno_t pgno, root_pgno;
707 	u_int32_t ptype;
708 	int cmp, l_update, p_update, r_update, rc, ret, rootsplit, t_ret;
709 
710 	ip = ((DB_TXNHEAD *)info)->thread_info;
711 	REC_PRINT(__bam_split_print);
712 
713 	_lp = lp = np = pp = _rp = rp = NULL;
714 	sp = NULL;
715 
716 	REC_INTRO(__bam_split_42_read, ip, 0);
717 
718 	/*
719 	 * There are two kinds of splits that we have to recover from.  The
720 	 * first is a root-page split, where the root page is split from a
721 	 * leaf page into an internal page and two new leaf pages are created.
722 	 * The second is where a page is split into two pages, and a new key
723 	 * is inserted into the parent page.
724 	 *
725 	 * DBTs are not aligned in log records, so we need to copy the page
726 	 * so that we can access fields within it throughout this routine.
727 	 * Although we could hardcode the unaligned copies in this routine,
728 	 * we will be calling into regular btree functions with this page,
729 	 * so it's got to be aligned.  Copying it into allocated memory is
730 	 * the only way to guarantee this.
731 	 */
732 	if ((ret = __os_malloc(env, argp->pg.size, &sp)) != 0)
733 		goto out;
734 	memcpy(sp, argp->pg.data, argp->pg.size);
735 
736 	pgno = PGNO(sp);
737 	root_pgno = argp->root_pgno;
738 	rootsplit = root_pgno != PGNO_INVALID;
739 	REC_FGET(mpf, ip, argp->left, &lp, right);
740 right:	REC_FGET(mpf, ip, argp->right, &rp, redo);
741 
742 redo:	if (DB_REDO(op)) {
743 		l_update = r_update = p_update = 0;
744 		/*
745 		 * Decide if we need to resplit the page.
746 		 *
747 		 * If this is a root split, then the root has to exist unless
748 		 * we have truncated it due to a future deallocation.
749 		 */
750 		if (rootsplit) {
751 			REC_FGET(mpf, ip, root_pgno, &pp, do_left);
752 			cmp = LOG_COMPARE(&LSN(pp), &LSN(argp->pg.data));
753 			CHECK_LSN(env, op,
754 			    cmp, &LSN(pp), &LSN(argp->pg.data));
755 			p_update = cmp  == 0;
756 		}
757 
758 do_left:	if (lp != NULL) {
759 			cmp = LOG_COMPARE(&LSN(lp), &argp->llsn);
760 			CHECK_LSN(env, op, cmp, &LSN(lp), &argp->llsn);
761 			if (cmp == 0)
762 				l_update = 1;
763 		}
764 
765 		if (rp != NULL) {
766 			cmp = LOG_COMPARE(&LSN(rp), &argp->rlsn);
767 			CHECK_LSN(env, op, cmp, &LSN(rp), &argp->rlsn);
768 			if (cmp == 0)
769 				r_update = 1;
770 		}
771 
772 		if (!p_update && !l_update && !r_update)
773 			goto check_next;
774 
775 		/* Allocate and initialize new left/right child pages. */
776 		if ((ret = __os_malloc(env, file_dbp->pgsize, &_lp)) != 0 ||
777 		    (ret = __os_malloc(env, file_dbp->pgsize, &_rp)) != 0)
778 			goto out;
779 		if (rootsplit) {
780 			P_INIT(_lp, file_dbp->pgsize, argp->left,
781 			    PGNO_INVALID,
782 			    ISINTERNAL(sp) ? PGNO_INVALID : argp->right,
783 			    LEVEL(sp), TYPE(sp));
784 			P_INIT(_rp, file_dbp->pgsize, argp->right,
785 			    ISINTERNAL(sp) ?  PGNO_INVALID : argp->left,
786 			    PGNO_INVALID, LEVEL(sp), TYPE(sp));
787 		} else {
788 			P_INIT(_lp, file_dbp->pgsize, PGNO(sp),
789 			    ISINTERNAL(sp) ? PGNO_INVALID : PREV_PGNO(sp),
790 			    ISINTERNAL(sp) ? PGNO_INVALID : argp->right,
791 			    LEVEL(sp), TYPE(sp));
792 			P_INIT(_rp, file_dbp->pgsize, argp->right,
793 			    ISINTERNAL(sp) ? PGNO_INVALID : sp->pgno,
794 			    ISINTERNAL(sp) ? PGNO_INVALID : NEXT_PGNO(sp),
795 			    LEVEL(sp), TYPE(sp));
796 		}
797 
798 		/* Split the page. */
799 		if ((ret = __bam_copy(file_dbp, sp, _lp, 0, argp->indx)) != 0 ||
800 		    (ret = __bam_copy(file_dbp, sp, _rp, argp->indx,
801 		    NUM_ENT(sp))) != 0)
802 			goto out;
803 
804 		if (l_update) {
805 			REC_DIRTY(mpf, ip, file_dbp->priority, &lp);
806 			memcpy(lp, _lp, file_dbp->pgsize);
807 			lp->lsn = *lsnp;
808 			if ((ret = __memp_fput(mpf,
809 			     ip, lp, file_dbp->priority)) != 0)
810 				goto out;
811 			lp = NULL;
812 		}
813 
814 		if (r_update) {
815 			REC_DIRTY(mpf, ip, file_dbp->priority, &rp);
816 			memcpy(rp, _rp, file_dbp->pgsize);
817 			rp->lsn = *lsnp;
818 			if ((ret = __memp_fput(mpf,
819 			    ip, rp, file_dbp->priority)) != 0)
820 				goto out;
821 			rp = NULL;
822 		}
823 
824 		/*
825 		 * If the parent page is wrong, update it.  This is of interest
826 		 * only if it was a root split, since root splits create parent
827 		 * pages.  All other splits modify a parent page, but those are
828 		 * separately logged and recovered.
829 		 */
830 		if (rootsplit && p_update) {
831 			if (IS_BTREE_PAGE(sp)) {
832 				ptype = P_IBTREE;
833 				rc = argp->opflags & SPL_NRECS ? 1 : 0;
834 			} else {
835 				ptype = P_IRECNO;
836 				rc = 1;
837 			}
838 
839 			REC_DIRTY(mpf, ip, file_dbp->priority, &pp);
840 			P_INIT(pp, file_dbp->pgsize, root_pgno,
841 			    PGNO_INVALID, PGNO_INVALID, _lp->level + 1, ptype);
842 			RE_NREC_SET(pp, rc ? __bam_total(file_dbp, _lp) +
843 			    __bam_total(file_dbp, _rp) : 0);
844 
845 			pp->lsn = *lsnp;
846 			if ((ret = __memp_fput(mpf,
847 			     ip, pp, file_dbp->priority)) != 0)
848 				goto out;
849 			pp = NULL;
850 		}
851 
852 check_next:	/*
853 		 * Finally, redo the next-page link if necessary.  This is of
854 		 * interest only if it wasn't a root split -- inserting a new
855 		 * page in the tree requires that any following page have its
856 		 * previous-page pointer updated to our new page.  The next
857 		 * page must exist because we're redoing the operation.
858 		 */
859 		if (!rootsplit && argp->npgno != PGNO_INVALID) {
860 			if ((ret = __memp_fget(mpf, &argp->npgno,
861 			    ip, NULL, 0, &np)) != 0) {
862 				if (ret != DB_PAGE_NOTFOUND) {
863 					ret = __db_pgerr(
864 					    file_dbp, argp->npgno, ret);
865 					goto out;
866 				} else
867 					goto done;
868 			}
869 			cmp = LOG_COMPARE(&LSN(np), &argp->nlsn);
870 			CHECK_LSN(env, op, cmp, &LSN(np), &argp->nlsn);
871 			if (cmp == 0) {
872 				REC_DIRTY(mpf, ip, file_dbp->priority, &np);
873 				PREV_PGNO(np) = argp->right;
874 				np->lsn = *lsnp;
875 				if ((ret = __memp_fput(mpf, ip,
876 				    np, file_dbp->priority)) != 0)
877 					goto out;
878 				np = NULL;
879 			}
880 		}
881 	} else {
882 		/*
883 		 * If the split page is wrong, replace its contents with the
884 		 * logged page contents.  If the page doesn't exist, it means
885 		 * that the create of the page never happened, nor did any of
886 		 * the adds onto the page that caused the split, and there's
887 		 * really no undo-ing to be done.
888 		 */
889 		if ((ret = __memp_fget(mpf, &pgno, ip, NULL,
890 		    DB_MPOOL_EDIT, &pp)) != 0) {
891 			pp = NULL;
892 			goto lrundo;
893 		}
894 		if (LOG_COMPARE(lsnp, &LSN(pp)) == 0) {
895 			REC_DIRTY(mpf, ip, file_dbp->priority, &pp);
896 			memcpy(pp, argp->pg.data, argp->pg.size);
897 			if ((ret = __memp_fput(mpf,
898 			     ip, pp, file_dbp->priority)) != 0)
899 				goto out;
900 			pp = NULL;
901 		}
902 
903 		/*
904 		 * If it's a root split and the left child ever existed, update
905 		 * its LSN.  (If it's not a root split, we've updated the left
906 		 * page already -- it's the same as the split page.) If the
907 		 * right child ever existed, root split or not, update its LSN.
908 		 * The undo of the page allocation(s) will restore them to the
909 		 * free list.
910 		 */
911 lrundo:		if ((rootsplit && lp != NULL) || rp != NULL) {
912 			if (rootsplit && lp != NULL &&
913 			    LOG_COMPARE(lsnp, &LSN(lp)) == 0) {
914 				REC_DIRTY(mpf, ip, file_dbp->priority, &lp);
915 				lp->lsn = argp->llsn;
916 				if ((ret = __memp_fput(mpf, ip,
917 				    lp, file_dbp->priority)) != 0)
918 					goto out;
919 				lp = NULL;
920 			}
921 			if (rp != NULL &&
922 			    LOG_COMPARE(lsnp, &LSN(rp)) == 0) {
923 				REC_DIRTY(mpf, ip, file_dbp->priority, &rp);
924 				rp->lsn = argp->rlsn;
925 				if ((ret = __memp_fput(mpf, ip,
926 				     rp, file_dbp->priority)) != 0)
927 					goto out;
928 				rp = NULL;
929 			}
930 		}
931 
932 		/*
933 		 * Finally, undo the next-page link if necessary.  This is of
934 		 * interest only if it wasn't a root split -- inserting a new
935 		 * page in the tree requires that any following page have its
936 		 * previous-page pointer updated to our new page.  Since it's
937 		 * possible that the next-page never existed, we ignore it as
938 		 * if there's nothing to undo.
939 		 */
940 		if (!rootsplit && argp->npgno != PGNO_INVALID) {
941 			if ((ret = __memp_fget(mpf, &argp->npgno,
942 			    ip, NULL, DB_MPOOL_EDIT, &np)) != 0) {
943 				np = NULL;
944 				goto done;
945 			}
946 			if (LOG_COMPARE(lsnp, &LSN(np)) == 0) {
947 				REC_DIRTY(mpf, ip, file_dbp->priority, &np);
948 				PREV_PGNO(np) = argp->left;
949 				np->lsn = argp->nlsn;
950 				if (__memp_fput(mpf,
951 				     ip, np, file_dbp->priority))
952 					goto out;
953 				np = NULL;
954 			}
955 		}
956 	}
957 
958 done:	*lsnp = argp->prev_lsn;
959 	ret = 0;
960 
961 out:	/* Free any pages that weren't dirtied. */
962 	if (pp != NULL && (t_ret = __memp_fput(mpf,
963 	    ip, pp, file_dbp->priority)) != 0 && ret == 0)
964 		ret = t_ret;
965 	if (lp != NULL && (t_ret = __memp_fput(mpf,
966 	    ip, lp, file_dbp->priority)) != 0 && ret == 0)
967 		ret = t_ret;
968 	if (np != NULL && (t_ret = __memp_fput(mpf,
969 	    ip, np, file_dbp->priority)) != 0 && ret == 0)
970 		ret = t_ret;
971 	if (rp != NULL && (t_ret = __memp_fput(mpf,
972 	     ip, rp, file_dbp->priority)) != 0 && ret == 0)
973 		ret = t_ret;
974 
975 	/* Free any allocated space. */
976 	if (_lp != NULL)
977 		__os_free(env, _lp);
978 	if (_rp != NULL)
979 		__os_free(env, _rp);
980 	if (sp != NULL)
981 		__os_free(env, sp);
982 
983 	REC_CLOSE;
984 }
985 
986 /*
987  * __bam_rsplit_recover --
988  *	Recovery function for a reverse split.
989  *
990  * PUBLIC: int __bam_rsplit_recover
991  * PUBLIC:   __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
992  */
993 int
__bam_rsplit_recover(env,dbtp,lsnp,op,info)994 __bam_rsplit_recover(env, dbtp, lsnp, op, info)
995 	ENV *env;
996 	DBT *dbtp;
997 	DB_LSN *lsnp;
998 	db_recops op;
999 	void *info;
1000 {
1001 	__bam_rsplit_args *argp;
1002 	DB_THREAD_INFO *ip;
1003 	DB *file_dbp;
1004 	DBC *dbc;
1005 	DB_LSN copy_lsn;
1006 	DB_MPOOLFILE *mpf;
1007 	PAGE *pagep;
1008 	db_pgno_t pgno, root_pgno;
1009 	db_recno_t rcnt;
1010 	int cmp_n, cmp_p, ret;
1011 
1012 	ip = ((DB_TXNHEAD *)info)->thread_info;
1013 	pagep = NULL;
1014 	REC_PRINT(__bam_rsplit_print);
1015 	REC_INTRO(__bam_rsplit_read, ip, 1);
1016 
1017 	/* Fix the root page. */
1018 	pgno = root_pgno = argp->root_pgno;
1019 	if ((ret = __memp_fget(mpf, &pgno, ip, NULL, 0, &pagep)) != 0) {
1020 		if (ret != DB_PAGE_NOTFOUND) {
1021 			ret = __db_pgerr(file_dbp, pgno, ret);
1022 			goto out;
1023 		} else
1024 			goto do_page;
1025 	}
1026 
1027 	cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
1028 	cmp_p = LOG_COMPARE(&LSN(pagep), &argp->rootlsn);
1029 	CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->rootlsn);
1030 	CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp);
1031 	if (cmp_p == 0 && DB_REDO(op)) {
1032 		/*
1033 		 * Copy the new data to the root page.  If it is not now a
1034 		 * leaf page we need to restore the record number.  We could
1035 		 * try to determine if C_RECNUM was set in the btree, but
1036 		 * that's not really necessary since the field is not used
1037 		 * otherwise.
1038 		 */
1039 		REC_DIRTY(mpf, ip, dbc->priority, &pagep);
1040 		rcnt = RE_NREC(pagep);
1041 		memcpy(pagep, argp->pgdbt.data, argp->pgdbt.size);
1042 		if (LEVEL(pagep) > LEAFLEVEL)
1043 			RE_NREC_SET(pagep, rcnt);
1044 		pagep->pgno = root_pgno;
1045 		pagep->lsn = *lsnp;
1046 	} else if (cmp_n == 0 && DB_UNDO(op)) {
1047 		/* Need to undo update described. */
1048 		REC_DIRTY(mpf, ip, dbc->priority, &pagep);
1049 		P_INIT(pagep, file_dbp->pgsize, root_pgno,
1050 		    argp->nrec, PGNO_INVALID, pagep->level + 1,
1051 		    IS_BTREE_PAGE(pagep) ? P_IBTREE : P_IRECNO);
1052 		if ((ret = __db_pitem(dbc, pagep, 0,
1053 		    argp->rootent.size, &argp->rootent, NULL)) != 0)
1054 			goto out;
1055 		pagep->lsn = argp->rootlsn;
1056 	}
1057 	if ((ret = __memp_fput(mpf, ip, pagep, dbc->priority)) != 0)
1058 		goto out;
1059 	pagep = NULL;
1060 
1061 do_page:
1062 	/*
1063 	 * Fix the page copied over the root page.  It's possible that the
1064 	 * page never made it to disk, or was truncated so if the page
1065 	 * doesn't exist, it's okay and there's nothing further to do.
1066 	 */
1067 	if ((ret = __memp_fget(mpf, &argp->pgno, ip, NULL, 0, &pagep)) != 0) {
1068 		if (ret != DB_PAGE_NOTFOUND) {
1069 			ret = __db_pgerr(file_dbp, argp->pgno, ret);
1070 			goto out;
1071 		} else
1072 			goto done;
1073 	}
1074 	(void)__ua_memcpy(&copy_lsn, &LSN(argp->pgdbt.data), sizeof(DB_LSN));
1075 	cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
1076 	cmp_p = LOG_COMPARE(&LSN(pagep), &copy_lsn);
1077 	CHECK_LSN(env, op, cmp_p, &LSN(pagep), &copy_lsn);
1078 	CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp);
1079 	if (cmp_p == 0 && DB_REDO(op)) {
1080 		/* Need to redo update described. */
1081 		REC_DIRTY(mpf, ip, dbc->priority, &pagep);
1082 		pagep->lsn = *lsnp;
1083 	} else if (cmp_n == 0 && DB_UNDO(op)) {
1084 		/* Need to undo update described. */
1085 		REC_DIRTY(mpf, ip, dbc->priority, &pagep);
1086 		memcpy(pagep, argp->pgdbt.data, argp->pgdbt.size);
1087 	}
1088 	if ((ret = __memp_fput(mpf, ip, pagep, dbc->priority)) != 0)
1089 		goto out;
1090 	pagep = NULL;
1091 
1092 done:	*lsnp = argp->prev_lsn;
1093 	ret = 0;
1094 
1095 out:	if (pagep != NULL)
1096 		(void)__memp_fput(mpf, ip, pagep, dbc->priority);
1097 	REC_CLOSE;
1098 }
1099 
1100 /*
1101  * __bam_adj_recover --
1102  *	Recovery function for adj.
1103  *
1104  * PUBLIC: int __bam_adj_recover
1105  * PUBLIC:   __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
1106  */
1107 int
__bam_adj_recover(env,dbtp,lsnp,op,info)1108 __bam_adj_recover(env, dbtp, lsnp, op, info)
1109 	ENV *env;
1110 	DBT *dbtp;
1111 	DB_LSN *lsnp;
1112 	db_recops op;
1113 	void *info;
1114 {
1115 	__bam_adj_args *argp;
1116 	DB_THREAD_INFO *ip;
1117 	DB *file_dbp;
1118 	DBC *dbc;
1119 	DB_MPOOLFILE *mpf;
1120 	PAGE *pagep;
1121 	int cmp_n, cmp_p, ret;
1122 
1123 	ip = ((DB_TXNHEAD *)info)->thread_info;
1124 	pagep = NULL;
1125 	REC_PRINT(__bam_adj_print);
1126 	REC_INTRO(__bam_adj_read, ip, 1);
1127 
1128 	/* Get the page; if it never existed and we're undoing, we're done. */
1129 	if ((ret = __memp_fget(mpf, &argp->pgno, ip, NULL, 0, &pagep)) != 0) {
1130 		if (ret != DB_PAGE_NOTFOUND) {
1131 			ret = __db_pgerr(file_dbp, argp->pgno, ret);
1132 			goto out;
1133 		} else
1134 			goto done;
1135 	}
1136 
1137 	cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
1138 	cmp_p = LOG_COMPARE(&LSN(pagep), &argp->lsn);
1139 	CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->lsn);
1140 	if (cmp_p == 0 && DB_REDO(op)) {
1141 		/* Need to redo update described. */
1142 		REC_DIRTY(mpf, ip, dbc->priority, &pagep);
1143 		if ((ret = __bam_adjindx(dbc,
1144 		    pagep, argp->indx, argp->indx_copy, argp->is_insert)) != 0)
1145 			goto out;
1146 
1147 		LSN(pagep) = *lsnp;
1148 	} else if (cmp_n == 0 && DB_UNDO(op)) {
1149 		/* Need to undo update described. */
1150 		REC_DIRTY(mpf, ip, dbc->priority, &pagep);
1151 		if ((ret = __bam_adjindx(dbc,
1152 		    pagep, argp->indx, argp->indx_copy, !argp->is_insert)) != 0)
1153 			goto out;
1154 
1155 		LSN(pagep) = argp->lsn;
1156 	}
1157 	if ((ret = __memp_fput(mpf, ip, pagep, dbc->priority)) != 0)
1158 		goto out;
1159 	pagep = NULL;
1160 
1161 done:	*lsnp = argp->prev_lsn;
1162 	ret = 0;
1163 
1164 out:	if (pagep != NULL)
1165 		(void)__memp_fput(mpf, ip, pagep, dbc->priority);
1166 	REC_CLOSE;
1167 }
1168 
1169 /*
1170  * __bam_cadjust_recover --
1171  *	Recovery function for the adjust of a count change in an internal
1172  *	page.
1173  *
1174  * PUBLIC: int __bam_cadjust_recover
1175  * PUBLIC:   __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
1176  */
1177 int
__bam_cadjust_recover(env,dbtp,lsnp,op,info)1178 __bam_cadjust_recover(env, dbtp, lsnp, op, info)
1179 	ENV *env;
1180 	DBT *dbtp;
1181 	DB_LSN *lsnp;
1182 	db_recops op;
1183 	void *info;
1184 {
1185 	__bam_cadjust_args *argp;
1186 	DB_THREAD_INFO *ip;
1187 	DB *file_dbp;
1188 	DBC *dbc;
1189 	DB_MPOOLFILE *mpf;
1190 	PAGE *pagep;
1191 	int cmp_n, cmp_p, ret;
1192 
1193 	ip = ((DB_TXNHEAD *)info)->thread_info;
1194 	pagep = NULL;
1195 	REC_PRINT(__bam_cadjust_print);
1196 	REC_INTRO(__bam_cadjust_read, ip, 0);
1197 
1198 	/* Get the page; if it never existed and we're undoing, we're done. */
1199 	if ((ret = __memp_fget(mpf, &argp->pgno, ip, NULL, 0, &pagep)) != 0) {
1200 		if (ret != DB_PAGE_NOTFOUND) {
1201 			ret = __db_pgerr(file_dbp, argp->pgno, ret);
1202 			goto out;
1203 		} else
1204 			goto done;
1205 	}
1206 
1207 	cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
1208 	cmp_p = LOG_COMPARE(&LSN(pagep), &argp->lsn);
1209 	CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->lsn);
1210 	if (cmp_p == 0 && DB_REDO(op)) {
1211 		/* Need to redo update described. */
1212 		REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
1213 		if (IS_BTREE_PAGE(pagep)) {
1214 			GET_BINTERNAL(file_dbp, pagep, argp->indx)->nrecs +=
1215 			    argp->adjust;
1216 			if (argp->opflags & CAD_UPDATEROOT)
1217 				RE_NREC_ADJ(pagep, argp->adjust);
1218 		} else {
1219 			GET_RINTERNAL(file_dbp, pagep, argp->indx)->nrecs +=
1220 			    argp->adjust;
1221 			if (argp->opflags & CAD_UPDATEROOT)
1222 				RE_NREC_ADJ(pagep, argp->adjust);
1223 		}
1224 
1225 		LSN(pagep) = *lsnp;
1226 	} else if (cmp_n == 0 && DB_UNDO(op)) {
1227 		/* Need to undo update described. */
1228 		REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
1229 		if (IS_BTREE_PAGE(pagep)) {
1230 			GET_BINTERNAL(file_dbp, pagep, argp->indx)->nrecs -=
1231 			    argp->adjust;
1232 			if (argp->opflags & CAD_UPDATEROOT)
1233 				RE_NREC_ADJ(pagep, -(argp->adjust));
1234 		} else {
1235 			GET_RINTERNAL(file_dbp, pagep, argp->indx)->nrecs -=
1236 			    argp->adjust;
1237 			if (argp->opflags & CAD_UPDATEROOT)
1238 				RE_NREC_ADJ(pagep, -(argp->adjust));
1239 		}
1240 		LSN(pagep) = argp->lsn;
1241 	}
1242 	if ((ret = __memp_fput(mpf, ip, pagep, file_dbp->priority)) != 0)
1243 		goto out;
1244 	pagep = NULL;
1245 
1246 done:	*lsnp = argp->prev_lsn;
1247 	ret = 0;
1248 
1249 out:	if (pagep != NULL)
1250 		(void)__memp_fput(mpf, ip, pagep, file_dbp->priority);
1251 	REC_CLOSE;
1252 }
1253 
1254 /*
1255  * __bam_cdel_recover --
1256  *	Recovery function for the intent-to-delete of a cursor record.
1257  *
1258  * PUBLIC: int __bam_cdel_recover
1259  * PUBLIC:   __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
1260  */
1261 int
__bam_cdel_recover(env,dbtp,lsnp,op,info)1262 __bam_cdel_recover(env, dbtp, lsnp, op, info)
1263 	ENV *env;
1264 	DBT *dbtp;
1265 	DB_LSN *lsnp;
1266 	db_recops op;
1267 	void *info;
1268 {
1269 	__bam_cdel_args *argp;
1270 	DB_THREAD_INFO *ip;
1271 	DB *file_dbp;
1272 	DBC *dbc;
1273 	DB_MPOOLFILE *mpf;
1274 	PAGE *pagep;
1275 	u_int32_t indx;
1276 	int cmp_n, cmp_p, ret;
1277 
1278 	ip = ((DB_TXNHEAD *)info)->thread_info;
1279 	pagep = NULL;
1280 	REC_PRINT(__bam_cdel_print);
1281 	REC_INTRO(__bam_cdel_read, ip, 0);
1282 
1283 	/* Get the page; if it never existed and we're undoing, we're done. */
1284 	if ((ret = __memp_fget(mpf, &argp->pgno, ip, NULL, 0, &pagep)) != 0) {
1285 		if (ret != DB_PAGE_NOTFOUND) {
1286 			ret = __db_pgerr(file_dbp, argp->pgno, ret);
1287 			goto out;
1288 		} else
1289 			goto done;
1290 	}
1291 
1292 	cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
1293 	cmp_p = LOG_COMPARE(&LSN(pagep), &argp->lsn);
1294 	CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->lsn);
1295 	CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp);
1296 	if (cmp_p == 0 && DB_REDO(op)) {
1297 		/* Need to redo update described. */
1298 		REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
1299 		indx = argp->indx + (TYPE(pagep) == P_LBTREE ? O_INDX : 0);
1300 		B_DSET(GET_BKEYDATA(file_dbp, pagep, indx)->type);
1301 
1302 		LSN(pagep) = *lsnp;
1303 	} else if (cmp_n == 0 && DB_UNDO(op)) {
1304 		/* Need to undo update described. */
1305 		REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
1306 		indx = argp->indx + (TYPE(pagep) == P_LBTREE ? O_INDX : 0);
1307 		B_DCLR(GET_BKEYDATA(file_dbp, pagep, indx)->type);
1308 
1309 		if ((ret = __bam_ca_delete(
1310 		    file_dbp, argp->pgno, argp->indx, 0, NULL)) != 0)
1311 			goto out;
1312 
1313 		LSN(pagep) = argp->lsn;
1314 	}
1315 	if ((ret = __memp_fput(mpf, ip, pagep, file_dbp->priority)) != 0)
1316 		goto out;
1317 	pagep = NULL;
1318 
1319 done:	*lsnp = argp->prev_lsn;
1320 	ret = 0;
1321 
1322 out:	if (pagep != NULL)
1323 		(void)__memp_fput(mpf, ip, pagep, file_dbp->priority);
1324 	REC_CLOSE;
1325 }
1326 
1327 /*
1328  * __bam_repl_recover --
1329  *	Recovery function for page item replacement.
1330  *
1331  * PUBLIC: int __bam_repl_recover
1332  * PUBLIC:   __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
1333  */
1334 int
__bam_repl_recover(env,dbtp,lsnp,op,info)1335 __bam_repl_recover(env, dbtp, lsnp, op, info)
1336 	ENV *env;
1337 	DBT *dbtp;
1338 	DB_LSN *lsnp;
1339 	db_recops op;
1340 	void *info;
1341 {
1342 	__bam_repl_args *argp;
1343 	DB_THREAD_INFO *ip;
1344 	BKEYDATA *bk;
1345 	DB *file_dbp;
1346 	DBC *dbc;
1347 	DBT dbt;
1348 	DB_MPOOLFILE *mpf;
1349 	PAGE *pagep;
1350 	int cmp_n, cmp_p, ret;
1351 	u_int32_t len;
1352 	u_int8_t *dp, *p;
1353 
1354 	ip = ((DB_TXNHEAD *)info)->thread_info;
1355 	pagep = NULL;
1356 	REC_PRINT(__bam_repl_print);
1357 	REC_INTRO(__bam_repl_read, ip, 1);
1358 
1359 	/* Get the page; if it never existed and we're undoing, we're done. */
1360 	if ((ret = __memp_fget(mpf, &argp->pgno, ip, NULL, 0, &pagep)) != 0) {
1361 		if (ret != DB_PAGE_NOTFOUND) {
1362 			ret = __db_pgerr(file_dbp, argp->pgno, ret);
1363 			goto out;
1364 		} else
1365 			goto done;
1366 	}
1367 
1368 	cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
1369 	cmp_p = LOG_COMPARE(&LSN(pagep), &argp->lsn);
1370 	CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->lsn);
1371 	CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp);
1372 	if (cmp_p == 0 && DB_REDO(op)) {
1373 		/*
1374 		 * Need to redo update described.
1375 		 *
1376 		 * Re-build the replacement item.
1377 		 */
1378 		REC_DIRTY(mpf, ip, dbc->priority, &pagep);
1379 		bk = GET_BKEYDATA(file_dbp, pagep, argp->indx);
1380 		dp = bk->data;
1381 		len = bk->len;
1382 		memset(&dbt, 0, sizeof(dbt));
1383 		dbt.size = argp->prefix + argp->suffix + argp->repl.size;
1384 		if ((ret = __os_malloc(env, dbt.size, &dbt.data)) != 0)
1385 			goto out;
1386 		p = dbt.data;
1387 		memcpy(p, dp, argp->prefix);
1388 		p += argp->prefix;
1389 		memcpy(p, argp->repl.data, argp->repl.size);
1390 		p += argp->repl.size;
1391 		memcpy(p, dp + (len - argp->suffix), argp->suffix);
1392 
1393 		ret = __bam_ritem(dbc, pagep, argp->indx, &dbt, 0);
1394 		__os_free(env, dbt.data);
1395 		if (ret != 0)
1396 			goto out;
1397 
1398 		LSN(pagep) = *lsnp;
1399 	} else if (cmp_n == 0 && DB_UNDO(op)) {
1400 		/*
1401 		 * Need to undo update described.
1402 		 *
1403 		 * Re-build the original item.
1404 		 */
1405 		REC_DIRTY(mpf, ip, dbc->priority, &pagep);
1406 		bk = GET_BKEYDATA(file_dbp, pagep, argp->indx);
1407 		dp = bk->data;
1408 		len = bk->len;
1409 		memset(&dbt, 0, sizeof(dbt));
1410 		dbt.size = argp->prefix + argp->suffix + argp->orig.size;
1411 		if ((ret = __os_malloc(env, dbt.size, &dbt.data)) != 0)
1412 			goto out;
1413 		p = dbt.data;
1414 		memcpy(p, dp, argp->prefix);
1415 		p += argp->prefix;
1416 		memcpy(p, argp->orig.data, argp->orig.size);
1417 		p += argp->orig.size;
1418 		memcpy(p, dp + (len - argp->suffix), argp->suffix);
1419 
1420 		ret = __bam_ritem(dbc, pagep, argp->indx, &dbt, 0);
1421 		__os_free(env, dbt.data);
1422 		if (ret != 0)
1423 			goto out;
1424 
1425 		/* Reset the deleted flag, if necessary. */
1426 		if (argp->isdeleted)
1427 			B_DSET(GET_BKEYDATA(file_dbp, pagep, argp->indx)->type);
1428 
1429 		LSN(pagep) = argp->lsn;
1430 	}
1431 	if ((ret = __memp_fput(mpf, ip, pagep, dbc->priority)) != 0)
1432 		goto out;
1433 	pagep = NULL;
1434 
1435 done:	*lsnp = argp->prev_lsn;
1436 	ret = 0;
1437 
1438 out:	if (pagep != NULL)
1439 		(void)__memp_fput(mpf, ip, pagep, dbc->priority);
1440 	REC_CLOSE;
1441 }
1442 
1443 /*
1444  * __bam_irep_recover --
1445  *	Recovery function for internal page item replacement.
1446  *
1447  * PUBLIC: int __bam_irep_recover
1448  * PUBLIC:   __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
1449  */
1450 int
__bam_irep_recover(env,dbtp,lsnp,op,info)1451 __bam_irep_recover(env, dbtp, lsnp, op, info)
1452 	ENV *env;
1453 	DBT *dbtp;
1454 	DB_LSN *lsnp;
1455 	db_recops op;
1456 	void *info;
1457 {
1458 	__bam_irep_args *argp;
1459 	BINTERNAL *bn;
1460 	DB_THREAD_INFO *ip;
1461 	DB *file_dbp;
1462 	DBC *dbc;
1463 	DB_MPOOLFILE *mpf;
1464 	PAGE *pagep;
1465 	int cmp_n, cmp_p, ret;
1466 
1467 	ip = ((DB_TXNHEAD *)info)->thread_info;
1468 	pagep = NULL;
1469 	REC_PRINT(__bam_irep_print);
1470 	REC_INTRO(__bam_irep_read, ip, 1);
1471 
1472 	/* Get the page; if it never existed and we're undoing, we're done. */
1473 	if ((ret = __memp_fget(mpf, &argp->pgno, ip, NULL, 0, &pagep)) != 0) {
1474 		if (ret != DB_PAGE_NOTFOUND) {
1475 			ret = __db_pgerr(file_dbp, argp->pgno, ret);
1476 			goto out;
1477 		} else
1478 			goto done;
1479 	}
1480 
1481 	cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
1482 	cmp_p = LOG_COMPARE(&LSN(pagep), &argp->lsn);
1483 	CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->lsn);
1484 	CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp);
1485 	if (cmp_p == 0 && DB_REDO(op)) {
1486 		REC_DIRTY(mpf, ip, dbc->priority, &pagep);
1487 		bn = (BINTERNAL *)argp->hdr.data;
1488 		if ((ret = __bam_ritem_nolog(dbc,
1489 		    pagep, argp->indx, &argp->hdr, &argp->data, bn->type)) != 0)
1490 			goto out;
1491 		LSN(pagep) = *lsnp;
1492 	} else if (cmp_n == 0 && DB_UNDO(op)) {
1493 		REC_DIRTY(mpf, ip, dbc->priority, &pagep);
1494 		bn = (BINTERNAL *)argp->old.data;
1495 		if ((ret = __bam_ritem_nolog(dbc,
1496 		    pagep, argp->indx, &argp->old, NULL, bn->type)) != 0)
1497 			goto out;
1498 		LSN(pagep) = argp->lsn;
1499 	}
1500 
1501 	if ((ret = __memp_fput(mpf, ip, pagep, dbc->priority)) != 0)
1502 		goto out;
1503 	pagep = NULL;
1504 
1505 done:	*lsnp = argp->prev_lsn;
1506 	ret = 0;
1507 
1508 out:	if (pagep != NULL)
1509 		(void)__memp_fput(mpf, ip, pagep, dbc->priority);
1510 	REC_CLOSE;
1511 }
1512 
1513 /*
1514  * __bam_root_recover --
1515  *	Recovery function for setting the root page on the meta-data page.
1516  *
1517  * PUBLIC: int __bam_root_recover
1518  * PUBLIC:   __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
1519  */
1520 int
__bam_root_recover(env,dbtp,lsnp,op,info)1521 __bam_root_recover(env, dbtp, lsnp, op, info)
1522 	ENV *env;
1523 	DBT *dbtp;
1524 	DB_LSN *lsnp;
1525 	db_recops op;
1526 	void *info;
1527 {
1528 	__bam_root_args *argp;
1529 	DB_THREAD_INFO *ip;
1530 	BTMETA *meta;
1531 	DB *file_dbp;
1532 	DBC *dbc;
1533 	DB_MPOOLFILE *mpf;
1534 	int cmp_n, cmp_p, ret;
1535 
1536 	ip = ((DB_TXNHEAD *)info)->thread_info;
1537 	meta = NULL;
1538 	REC_PRINT(__bam_root_print);
1539 	REC_INTRO(__bam_root_read, ip, 0);
1540 
1541 	if ((ret = __memp_fget(mpf, &argp->meta_pgno, ip, NULL,
1542 	    0, &meta)) != 0) {
1543 		if (ret != DB_PAGE_NOTFOUND) {
1544 			ret = __db_pgerr(file_dbp, argp->meta_pgno, ret);
1545 			goto out;
1546 		} else
1547 			goto done;
1548 	}
1549 
1550 	cmp_n = LOG_COMPARE(lsnp, &LSN(meta));
1551 	cmp_p = LOG_COMPARE(&LSN(meta), &argp->meta_lsn);
1552 	CHECK_LSN(env, op, cmp_p, &LSN(meta), &argp->meta_lsn);
1553 	CHECK_ABORT(env, op, cmp_n, &LSN(meta), lsnp);
1554 	if (cmp_p == 0 && DB_REDO(op)) {
1555 		/* Need to redo update described. */
1556 		REC_DIRTY(mpf, ip, file_dbp->priority, &meta);
1557 		meta->root = argp->root_pgno;
1558 		meta->dbmeta.lsn = *lsnp;
1559 		((BTREE *)file_dbp->bt_internal)->bt_root = meta->root;
1560 	} else if (cmp_n == 0 && DB_UNDO(op)) {
1561 		/* Nothing to undo except lsn. */
1562 		REC_DIRTY(mpf, ip, file_dbp->priority, &meta);
1563 		meta->dbmeta.lsn = argp->meta_lsn;
1564 	}
1565 	if ((ret = __memp_fput(mpf, ip, meta, file_dbp->priority)) != 0)
1566 		goto out;
1567 	meta = NULL;
1568 
1569 done:	*lsnp = argp->prev_lsn;
1570 	ret = 0;
1571 
1572 out:	if (meta != NULL)
1573 		(void)__memp_fput(mpf, ip, meta, file_dbp->priority);
1574 	REC_CLOSE;
1575 }
1576 
1577 /*
1578  * __bam_curadj_recover --
1579  *	Transaction abort function to undo cursor adjustments.
1580  *	This should only be triggered by subtransaction aborts.
1581  *
1582  * PUBLIC: int __bam_curadj_recover
1583  * PUBLIC:   __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
1584  */
1585 int
__bam_curadj_recover(env,dbtp,lsnp,op,info)1586 __bam_curadj_recover(env, dbtp, lsnp, op, info)
1587 	ENV *env;
1588 	DBT *dbtp;
1589 	DB_LSN *lsnp;
1590 	db_recops op;
1591 	void *info;
1592 {
1593 	__bam_curadj_args *argp;
1594 	DB_THREAD_INFO *ip;
1595 	DB *file_dbp;
1596 	DBC *dbc;
1597 	DB_MPOOLFILE *mpf;
1598 	int ret;
1599 
1600 	COMPQUIET(mpf, NULL);
1601 
1602 	ip = ((DB_TXNHEAD *)info)->thread_info;
1603 	REC_PRINT(__bam_curadj_print);
1604 	REC_INTRO(__bam_curadj_read, ip, 1);
1605 
1606 	ret = 0;
1607 	if (op != DB_TXN_ABORT)
1608 		goto done;
1609 
1610 	switch (argp->mode) {
1611 	case DB_CA_DI:
1612 		if ((ret = __bam_ca_di(dbc, argp->from_pgno,
1613 		    argp->from_indx, -(int)argp->first_indx)) != 0)
1614 			goto out;
1615 		break;
1616 	case DB_CA_DUP:
1617 		if ((ret = __bam_ca_undodup(file_dbp, argp->first_indx,
1618 		    argp->from_pgno, argp->from_indx, argp->to_indx)) != 0)
1619 			goto out;
1620 		break;
1621 
1622 	case DB_CA_RSPLIT:
1623 		if ((ret =
1624 		    __bam_ca_rsplit(dbc, argp->to_pgno, argp->from_pgno)) != 0)
1625 			goto out;
1626 		break;
1627 
1628 	case DB_CA_SPLIT:
1629 		if ((ret = __bam_ca_undosplit(file_dbp, argp->from_pgno,
1630 		    argp->to_pgno, argp->left_pgno, argp->from_indx)) != 0)
1631 			goto out;
1632 		break;
1633 	}
1634 
1635 done:	*lsnp = argp->prev_lsn;
1636 out:	REC_CLOSE;
1637 }
1638 
1639 /*
1640  * __bam_rcuradj_recover --
1641  *	Transaction abort function to undo cursor adjustments in rrecno.
1642  *	This should only be triggered by subtransaction aborts.
1643  *
1644  * PUBLIC: int __bam_rcuradj_recover
1645  * PUBLIC:   __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
1646  */
1647 int
__bam_rcuradj_recover(env,dbtp,lsnp,op,info)1648 __bam_rcuradj_recover(env, dbtp, lsnp, op, info)
1649 	ENV *env;
1650 	DBT *dbtp;
1651 	DB_LSN *lsnp;
1652 	db_recops op;
1653 	void *info;
1654 {
1655 	__bam_rcuradj_args *argp;
1656 	DB_THREAD_INFO *ip;
1657 	BTREE_CURSOR *cp;
1658 	DB *file_dbp;
1659 	DBC *dbc, *rdbc;
1660 	DB_MPOOLFILE *mpf;
1661 	int ret, t_ret;
1662 
1663 	COMPQUIET(mpf, NULL);
1664 
1665 	ip = ((DB_TXNHEAD *)info)->thread_info;
1666 	rdbc = NULL;
1667 	REC_PRINT(__bam_rcuradj_print);
1668 	REC_INTRO(__bam_rcuradj_read, ip, 1);
1669 
1670 	ret = t_ret = 0;
1671 
1672 	if (op != DB_TXN_ABORT)
1673 		goto done;
1674 
1675 	/*
1676 	 * We don't know whether we're in an offpage dup set, and
1677 	 * thus don't know whether the dbc REC_INTRO has handed us is
1678 	 * of a reasonable type.  It's certainly unset, so if this is
1679 	 * an offpage dup set, we don't have an OPD cursor.  The
1680 	 * simplest solution is just to allocate a whole new cursor
1681 	 * for our use;  we're only really using it to hold pass some
1682 	 * state into __ram_ca, and this way we don't need to make
1683 	 * this function know anything about how offpage dups work.
1684 	 */
1685 	if ((ret = __db_cursor_int(file_dbp, NULL,
1686 		NULL, DB_RECNO, argp->root, DB_RECOVER, NULL, &rdbc)) != 0)
1687 		goto out;
1688 
1689 	cp = (BTREE_CURSOR *)rdbc->internal;
1690 	F_SET(cp, C_RENUMBER);
1691 	cp->recno = argp->recno;
1692 
1693 	switch (argp->mode) {
1694 	case CA_DELETE:
1695 		/*
1696 		 * The way to undo a delete is with an insert.  Since
1697 		 * we're undoing it, the delete flag must be set.
1698 		 */
1699 		F_SET(cp, C_DELETED);
1700 		F_SET(cp, C_RENUMBER);	/* Just in case. */
1701 		cp->order = argp->order;
1702 		if ((ret = __ram_ca(rdbc, CA_ICURRENT, NULL)) != 0)
1703 			goto out;
1704 		break;
1705 	case CA_IAFTER:
1706 	case CA_IBEFORE:
1707 	case CA_ICURRENT:
1708 		/*
1709 		 * The way to undo an insert is with a delete.  The delete
1710 		 * flag is unset to start with.
1711 		 */
1712 		F_CLR(cp, C_DELETED);
1713 		cp->order = INVALID_ORDER;
1714 		if ((ret = __ram_ca(rdbc, CA_DELETE, NULL)) != 0)
1715 			goto out;
1716 		break;
1717 	}
1718 
1719 done:	*lsnp = argp->prev_lsn;
1720 out:	if (rdbc != NULL && (t_ret = __dbc_close(rdbc)) != 0 && ret == 0)
1721 		ret = t_ret;
1722 	REC_CLOSE;
1723 }
1724 
1725 /*
1726  * __bam_merge_44_recover --
1727  *	Recovery function for merge.
1728  *
1729  * PUBLIC: int __bam_merge_44_recover
1730  * PUBLIC:   __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
1731  */
1732 int
__bam_merge_44_recover(env,dbtp,lsnp,op,info)1733 __bam_merge_44_recover(env, dbtp, lsnp, op, info)
1734 	ENV *env;
1735 	DBT *dbtp;
1736 	DB_LSN *lsnp;
1737 	db_recops op;
1738 	void *info;
1739 {
1740 	__bam_merge_44_args *argp;
1741 	DB_THREAD_INFO *ip;
1742 	BKEYDATA *bk;
1743 	DB *file_dbp;
1744 	DBC *dbc;
1745 	DB_MPOOLFILE *mpf;
1746 	PAGE *pagep;
1747 	db_indx_t indx, *ninp, *pinp;
1748 	u_int32_t size;
1749 	u_int8_t *bp;
1750 	int cmp_n, cmp_p, i, ret;
1751 
1752 	ip = ((DB_TXNHEAD *)info)->thread_info;
1753 	REC_PRINT(__bam_merge_44_print);
1754 	REC_INTRO(__bam_merge_44_read, ip, 1);
1755 
1756 	if ((ret = __memp_fget(mpf, &argp->pgno, ip, NULL, 0, &pagep)) != 0) {
1757 		if (ret != DB_PAGE_NOTFOUND) {
1758 			ret = __db_pgerr(file_dbp, argp->pgno, ret);
1759 			goto out;
1760 		} else
1761 			goto next;
1762 	}
1763 
1764 	cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
1765 	cmp_p = LOG_COMPARE(&LSN(pagep), &argp->lsn);
1766 	CHECK_LSN(file_dbp->env, op, cmp_p, &LSN(pagep), &argp->lsn);
1767 
1768 	if (cmp_p == 0 && DB_REDO(op)) {
1769 		/*
1770 		 * If the header is provided the page is empty, copy the
1771 		 * needed data.
1772 		 */
1773 		DB_ASSERT(env, argp->hdr.size == 0 || NUM_ENT(pagep) == 0);
1774 		REC_DIRTY(mpf, ip, dbc->priority, &pagep);
1775 		if (argp->hdr.size != 0) {
1776 			P_INIT(pagep, file_dbp->pgsize, pagep->pgno,
1777 			     PREV_PGNO(argp->hdr.data),
1778 			     NEXT_PGNO(argp->hdr.data),
1779 			     LEVEL(argp->hdr.data), TYPE(argp->hdr.data));
1780 		}
1781 		if (TYPE(pagep) == P_OVERFLOW) {
1782 			OV_REF(pagep) = OV_REF(argp->hdr.data);
1783 			OV_LEN(pagep) = OV_LEN(argp->hdr.data);
1784 			bp = (u_int8_t *) pagep + P_OVERHEAD(file_dbp);
1785 			memcpy(bp, argp->data.data, argp->data.size);
1786 		} else {
1787 			/* Copy the data segment. */
1788 			bp = (u_int8_t *)pagep +
1789 			     (db_indx_t)(HOFFSET(pagep) - argp->data.size);
1790 			memcpy(bp, argp->data.data, argp->data.size);
1791 
1792 			/* Copy index table offset past the current entries. */
1793 			pinp = P_INP(file_dbp, pagep) + NUM_ENT(pagep);
1794 			ninp = argp->ind.data;
1795 			for (i = 0;
1796 			     i < (int)(argp->ind.size / sizeof(*ninp)); i++)
1797 				*pinp++ = *ninp++
1798 				      - (file_dbp->pgsize - HOFFSET(pagep));
1799 			HOFFSET(pagep) -= argp->data.size;
1800 			NUM_ENT(pagep) += i;
1801 		}
1802 		pagep->lsn = *lsnp;
1803 	} else if (cmp_n == 0 && !DB_REDO(op)) {
1804 		/*
1805 		 * Since logging is logical at the page level
1806 		 * we cannot just truncate the data space.  Delete
1807 		 * the proper number of items from the logical end
1808 		 * of the page.
1809 		 */
1810 		REC_DIRTY(mpf, ip, dbc->priority, &pagep);
1811 		for (i = 0; i < (int)(argp->ind.size / sizeof(*ninp)); i++) {
1812 			indx = NUM_ENT(pagep) - 1;
1813 			if (P_INP(file_dbp, pagep)[indx] ==
1814 			     P_INP(file_dbp, pagep)[indx - P_INDX]) {
1815 				NUM_ENT(pagep)--;
1816 				continue;
1817 			}
1818 			switch (TYPE(pagep)) {
1819 			case P_LBTREE:
1820 			case P_LRECNO:
1821 			case P_LDUP:
1822 				bk = GET_BKEYDATA(file_dbp, pagep, indx);
1823 				size = BITEM_SIZE(bk);
1824 				break;
1825 
1826 			case P_IBTREE:
1827 				size = BINTERNAL_SIZE(
1828 				     GET_BINTERNAL(file_dbp, pagep, indx)->len);
1829 				break;
1830 			case P_IRECNO:
1831 				size = RINTERNAL_SIZE;
1832 				break;
1833 
1834 			default:
1835 				ret = __db_pgfmt(env, PGNO(pagep));
1836 				goto out;
1837 			}
1838 			if ((ret =
1839 			     __db_ditem(dbc, pagep, indx, size)) != 0)
1840 				goto out;
1841 		}
1842 		if (argp->ind.size == 0)
1843 			HOFFSET(pagep) = file_dbp->pgsize;
1844 		pagep->lsn = argp->lsn;
1845 	}
1846 
1847 	if ((ret = __memp_fput(mpf, ip, pagep, dbc->priority)) != 0)
1848 		goto out;
1849 
1850 next:	if ((ret = __memp_fget(mpf, &argp->npgno, ip, NULL, 0, &pagep)) != 0) {
1851 		if (ret != DB_PAGE_NOTFOUND) {
1852 			ret = __db_pgerr(file_dbp, argp->pgno, ret);
1853 			goto out;
1854 		} else
1855 			goto done;
1856 	}
1857 
1858 	cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
1859 	cmp_p = LOG_COMPARE(&LSN(pagep), &argp->nlsn);
1860 	CHECK_LSN(file_dbp->env, op, cmp_p, &LSN(pagep), &argp->nlsn);
1861 
1862 	if (cmp_p == 0 && DB_REDO(op)) {
1863 		/* Need to truncate the page. */
1864 		REC_DIRTY(mpf, ip, dbc->priority, &pagep);
1865 		HOFFSET(pagep) = file_dbp->pgsize;
1866 		NUM_ENT(pagep) = 0;
1867 		pagep->lsn = *lsnp;
1868 	} else if (cmp_n == 0 && !DB_REDO(op)) {
1869 		/* Need to put the data back on the page. */
1870 		REC_DIRTY(mpf, ip, dbc->priority, &pagep);
1871 		if (TYPE(pagep) == P_OVERFLOW) {
1872 			OV_REF(pagep) = OV_REF(argp->hdr.data);
1873 			OV_LEN(pagep) = OV_LEN(argp->hdr.data);
1874 			bp = (u_int8_t *) pagep + P_OVERHEAD(file_dbp);
1875 			memcpy(bp, argp->data.data, argp->data.size);
1876 		} else {
1877 			bp = (u_int8_t *)pagep +
1878 			     (db_indx_t)(HOFFSET(pagep) - argp->data.size);
1879 			memcpy(bp, argp->data.data, argp->data.size);
1880 
1881 			/* Copy index table. */
1882 			pinp = P_INP(file_dbp, pagep) + NUM_ENT(pagep);
1883 			ninp = argp->ind.data;
1884 			for (i = 0;
1885 			    i < (int)(argp->ind.size / sizeof(*ninp)); i++)
1886 				*pinp++ = *ninp++;
1887 			HOFFSET(pagep) -= argp->data.size;
1888 			NUM_ENT(pagep) = i;
1889 		}
1890 		pagep->lsn = argp->nlsn;
1891 	}
1892 
1893 	if ((ret = __memp_fput(mpf,
1894 	     ip, pagep, dbc->priority)) != 0)
1895 		goto out;
1896 done:
1897 	*lsnp = argp->prev_lsn;
1898 	ret = 0;
1899 
1900 out:	REC_CLOSE;
1901 }
1902 
1903 /*
1904  * __bam_relink_43_recover --
1905  *	Recovery function for relink.
1906  *
1907  * PUBLIC: int __bam_relink_43_recover
1908  * PUBLIC:   __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
1909  */
1910 int
__bam_relink_43_recover(env,dbtp,lsnp,op,info)1911 __bam_relink_43_recover(env, dbtp, lsnp, op, info)
1912 	ENV *env;
1913 	DBT *dbtp;
1914 	DB_LSN *lsnp;
1915 	db_recops op;
1916 	void *info;
1917 {
1918 	__bam_relink_43_args *argp;
1919 	DB_THREAD_INFO *ip;
1920 	DB *file_dbp;
1921 	DBC *dbc;
1922 	DB_MPOOLFILE *mpf;
1923 	PAGE *pagep;
1924 	int cmp_n, cmp_p, modified, ret;
1925 
1926 	ip = ((DB_TXNHEAD *)info)->thread_info;
1927 	pagep = NULL;
1928 	REC_PRINT(__bam_relink_43_print);
1929 	REC_INTRO(__bam_relink_43_read, ip, 0);
1930 
1931 	/*
1932 	 * There are up to three pages we need to check -- the page, and the
1933 	 * previous and next pages, if they existed.  For a page add operation,
1934 	 * the current page is the result of a split and is being recovered
1935 	 * elsewhere, so all we need do is recover the next page.
1936 	 */
1937 	if ((ret = __memp_fget(mpf, &argp->pgno, ip, NULL, 0, &pagep)) != 0) {
1938 		if (ret != DB_PAGE_NOTFOUND) {
1939 			ret = __db_pgerr(file_dbp, argp->pgno, ret);
1940 			goto out;
1941 		} else
1942 			goto next2;
1943 	}
1944 
1945 	cmp_p = LOG_COMPARE(&LSN(pagep), &argp->lsn);
1946 	CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->lsn);
1947 	if (cmp_p == 0 && DB_REDO(op)) {
1948 		/* Redo the relink. */
1949 		REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
1950 		pagep->lsn = *lsnp;
1951 	} else if (LOG_COMPARE(lsnp, &LSN(pagep)) == 0 && DB_UNDO(op)) {
1952 		/* Undo the relink. */
1953 		REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
1954 		pagep->next_pgno = argp->next;
1955 		pagep->prev_pgno = argp->prev;
1956 		pagep->lsn = argp->lsn;
1957 	}
1958 	if ((ret = __memp_fput(mpf,
1959 	     ip, pagep, file_dbp->priority)) != 0)
1960 		goto out;
1961 	pagep = NULL;
1962 
1963 next2: if ((ret = __memp_fget(mpf, &argp->next, ip, NULL, 0, &pagep)) != 0) {
1964 		if (ret != DB_PAGE_NOTFOUND) {
1965 			ret = __db_pgerr(file_dbp, argp->next, ret);
1966 			goto out;
1967 		} else
1968 			goto prev;
1969 	}
1970 
1971 	modified = 0;
1972 	cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
1973 	cmp_p = LOG_COMPARE(&LSN(pagep), &argp->lsn_next);
1974 	CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->lsn_next);
1975 	if (cmp_p == 0 && DB_REDO(op)) {
1976 		/* Redo the remove or undo the add. */
1977 		REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
1978 		pagep->prev_pgno = argp->prev;
1979 		modified = 1;
1980 	} else if (cmp_n == 0 && DB_UNDO(op)) {
1981 		/* Undo the remove or redo the add. */
1982 		REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
1983 		pagep->prev_pgno = argp->pgno;
1984 		modified = 1;
1985 	}
1986 	if (modified) {
1987 		if (DB_UNDO(op))
1988 			pagep->lsn = argp->lsn_next;
1989 		else
1990 			pagep->lsn = *lsnp;
1991 	}
1992 	if ((ret = __memp_fput(mpf,
1993 	     ip, pagep, file_dbp->priority)) != 0)
1994 		goto out;
1995 	pagep = NULL;
1996 
1997 prev: if ((ret = __memp_fget(mpf, &argp->prev, ip, NULL, 0, &pagep)) != 0) {
1998 		if (ret != DB_PAGE_NOTFOUND) {
1999 			ret = __db_pgerr(file_dbp, argp->prev, ret);
2000 			goto out;
2001 		} else
2002 			goto done;
2003 	}
2004 
2005 	modified = 0;
2006 	cmp_p = LOG_COMPARE(&LSN(pagep), &argp->lsn_prev);
2007 	CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->lsn_prev);
2008 	if (cmp_p == 0 && DB_REDO(op)) {
2009 		/* Redo the relink. */
2010 		REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
2011 		pagep->next_pgno = argp->next;
2012 		modified = 1;
2013 	} else if (LOG_COMPARE(lsnp, &LSN(pagep)) == 0 && DB_UNDO(op)) {
2014 		/* Undo the relink. */
2015 		REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
2016 		pagep->next_pgno = argp->pgno;
2017 		modified = 1;
2018 	}
2019 	if (modified) {
2020 		if (DB_UNDO(op))
2021 			pagep->lsn = argp->lsn_prev;
2022 		else
2023 			pagep->lsn = *lsnp;
2024 	}
2025 	if ((ret = __memp_fput(mpf,
2026 	     ip, pagep, file_dbp->priority)) != 0)
2027 		goto out;
2028 	pagep = NULL;
2029 
2030 done:	*lsnp = argp->prev_lsn;
2031 	ret = 0;
2032 
2033 out:	if (pagep != NULL)
2034 		(void)__memp_fput(mpf, ip, pagep, file_dbp->priority);
2035 	REC_CLOSE;
2036 }
2037