1 /*-
2 * See the file LICENSE for redistribution information.
3 *
4 * Copyright (c) 1996, 2013 Oracle and/or its affiliates. All rights reserved.
5 *
6 * $Id$
7 */
8
9 #include "db_config.h"
10
11 #include "db_int.h"
12 #include "dbinc/db_page.h"
13 #include "dbinc/btree.h"
14 #include "dbinc/lock.h"
15 #include "dbinc/mp.h"
16
17 #define IS_BTREE_PAGE(pagep) \
18 (TYPE(pagep) == P_IBTREE || \
19 TYPE(pagep) == P_LBTREE || TYPE(pagep) == P_LDUP)
20
21 /*
22 * __bam_split_recover --
23 * Recovery function for split.
24 *
25 * PUBLIC: int __bam_split_recover
26 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
27 */
28 int
__bam_split_recover(env,dbtp,lsnp,op,info)29 __bam_split_recover(env, dbtp, lsnp, op, info)
30 ENV *env;
31 DBT *dbtp;
32 DB_LSN *lsnp;
33 db_recops op;
34 void *info;
35 {
36 __bam_split_args *argp;
37 DB_THREAD_INFO *ip;
38 DB *file_dbp;
39 DBC *dbc;
40 DB_LSN *plsnp;
41 DB_MPOOLFILE *mpf;
42 PAGE *_lp, *lp, *np, *pp, *_rp, *rp, *sp;
43 db_pgno_t pgno, parent_pgno;
44 u_int32_t opflags, size;
45 int cmp, l_update, p_update, r_update, ret, rootsplit, t_ret;
46
47 ip = ((DB_TXNHEAD *)info)->thread_info;
48 REC_PRINT(__bam_split_print);
49
50 _lp = lp = np = pp = _rp = rp = NULL;
51 sp = NULL;
52
53 REC_INTRO(__bam_split_read, ip, 0);
54
55 opflags = OP_MODE_GET(argp->opflags);
56 if ((ret = __db_cursor_int(file_dbp, ip, NULL,
57 (opflags & SPL_RECNO) ? DB_RECNO : DB_BTREE,
58 PGNO_INVALID, DB_RECOVER, NULL, &dbc)) != 0)
59 goto out;
60 if (opflags & SPL_NRECS)
61 F_SET((BTREE_CURSOR *)dbc->internal, C_RECNUM);
62
63 /*
64 * There are two kinds of splits that we have to recover from. The
65 * first is a root-page split, where the root page is split from a
66 * leaf page into an internal page and two new leaf pages are created.
67 * The second is where a page is split into two pages, and a new key
68 * is inserted into the parent page.
69 *
70 * DBTs are not aligned in log records, so we need to copy the page
71 * so that we can access fields within it throughout this routine.
72 * Although we could hardcode the unaligned copies in this routine,
73 * we will be calling into regular btree functions with this page,
74 * so it's got to be aligned. Copying it into allocated memory is
75 * the only way to guarantee this.
76 */
77 if ((ret = __os_malloc(env, argp->pg.size, &sp)) != 0)
78 goto out;
79 memcpy(sp, argp->pg.data, argp->pg.size);
80
81 pgno = PGNO(sp);
82 parent_pgno = argp->ppgno;
83 rootsplit = parent_pgno == pgno;
84
85 /* Get the pages going down the tree. */
86 REC_FGET(mpf, ip, parent_pgno, &pp, left);
87 left: REC_FGET(mpf, ip, argp->left, &lp, right);
88 right: REC_FGET(mpf, ip, argp->right, &rp, redo);
89
90 redo: if (DB_REDO(op)) {
91 l_update = r_update = p_update = 0;
92 /*
93 * Decide if we need to resplit the page.
94 *
95 * If this is a root split, then the root has to exist unless
96 * we have truncated it due to a future deallocation.
97 */
98 if (pp != NULL) {
99 if (rootsplit)
100 plsnp = &LSN(argp->pg.data);
101 else
102 plsnp = &argp->plsn;
103 cmp = LOG_COMPARE(&LSN(pp), plsnp);
104 CHECK_LSN(env, op, cmp, &LSN(pp), plsnp);
105 if (cmp == 0)
106 p_update = 1;
107 }
108
109 if (lp != NULL) {
110 cmp = LOG_COMPARE(&LSN(lp), &argp->llsn);
111 CHECK_LSN(env, op, cmp, &LSN(lp), &argp->llsn);
112 if (cmp == 0)
113 l_update = 1;
114 }
115
116 if (rp != NULL) {
117 cmp = LOG_COMPARE(&LSN(rp), &argp->rlsn);
118 CHECK_LSN(env, op, cmp, &LSN(rp), &argp->rlsn);
119 if (cmp == 0)
120 r_update = 1;
121 }
122
123 if (!p_update && !l_update && !r_update)
124 goto check_next;
125
126 /* Allocate and initialize new left/right child pages. */
127 if ((ret = __os_malloc(env, file_dbp->pgsize, &_lp)) != 0 ||
128 (ret = __os_malloc(env, file_dbp->pgsize, &_rp)) != 0)
129 goto out;
130 if (rootsplit) {
131 P_INIT(_lp, file_dbp->pgsize, argp->left,
132 PGNO_INVALID,
133 ISINTERNAL(sp) ? PGNO_INVALID : argp->right,
134 LEVEL(sp), TYPE(sp));
135 P_INIT(_rp, file_dbp->pgsize, argp->right,
136 ISINTERNAL(sp) ? PGNO_INVALID : argp->left,
137 PGNO_INVALID, LEVEL(sp), TYPE(sp));
138 } else {
139 P_INIT(_lp, file_dbp->pgsize, PGNO(sp),
140 ISINTERNAL(sp) ? PGNO_INVALID : PREV_PGNO(sp),
141 ISINTERNAL(sp) ? PGNO_INVALID : argp->right,
142 LEVEL(sp), TYPE(sp));
143 P_INIT(_rp, file_dbp->pgsize, argp->right,
144 ISINTERNAL(sp) ? PGNO_INVALID : sp->pgno,
145 ISINTERNAL(sp) ? PGNO_INVALID : NEXT_PGNO(sp),
146 LEVEL(sp), TYPE(sp));
147 }
148
149 /* Split the page. */
150 if ((ret = __bam_copy(file_dbp, sp, _lp, 0, argp->indx)) != 0 ||
151 (ret = __bam_copy(file_dbp, sp, _rp, argp->indx,
152 NUM_ENT(sp))) != 0)
153 goto out;
154
155 if (l_update) {
156 REC_DIRTY(mpf, ip, file_dbp->priority, &lp);
157 memcpy(lp, _lp, file_dbp->pgsize);
158 lp->lsn = *lsnp;
159 }
160
161 if (r_update) {
162 REC_DIRTY(mpf, ip, file_dbp->priority, &rp);
163 memcpy(rp, _rp, file_dbp->pgsize);
164 rp->lsn = *lsnp;
165 }
166
167 /*
168 * Drop the latches on the lower level pages before
169 * getting an exclusive latch on the higher level page.
170 */
171 if (lp != NULL && (ret = __memp_fput(mpf,
172 ip, lp, file_dbp->priority)) && ret == 0)
173 goto out;
174 lp = NULL;
175 if (rp != NULL && (ret = __memp_fput(mpf,
176 ip, rp, file_dbp->priority)) && ret == 0)
177 goto out;
178 rp = NULL;
179 /*
180 * If the parent page is wrong, update it.
181 * For recno the insert into an existing parent
182 * was logged separately.
183 * If it is a root page update initialize the page and
184 * update the record counts if needed.
185 * Then insert the record for the right hand child page.
186 */
187 if (p_update) {
188 REC_DIRTY(mpf, ip, file_dbp->priority, &pp);
189
190 if (rootsplit) {
191 P_INIT(pp, file_dbp->pgsize, pgno, PGNO_INVALID,
192 PGNO_INVALID, _lp->level + 1,
193 (opflags & SPL_RECNO) ?
194 P_IRECNO : P_IBTREE);
195 if (opflags & SPL_NRECS) {
196 RE_NREC_SET(pp,
197 __bam_total(file_dbp, _lp) +
198 __bam_total(file_dbp, _rp));
199 }
200 if ((ret = __db_pitem_nolog(dbc, pp,
201 argp->pindx, argp->pentry.size,
202 &argp->pentry, NULL)) != 0)
203 goto out;
204
205 } else if (opflags & SPL_NRECS)
206 goto recno;
207 if ((ret = __db_pitem_nolog(dbc, pp, argp->pindx + 1,
208 argp->rentry.size, &argp->rentry, NULL)) != 0)
209 goto out;
210 recno: pp->lsn = *lsnp;
211 }
212
213 check_next: /*
214 * Finally, redo the next-page link if necessary. This is of
215 * interest only if it wasn't a root split -- inserting a new
216 * page in the tree requires that any following page have its
217 * previous-page pointer updated to our new page. The next
218 * page must exist because we're redoing the operation.
219 */
220 if (!rootsplit && argp->npgno != PGNO_INVALID) {
221 REC_FGET(mpf, ip, argp->npgno, &np, done);
222 cmp = LOG_COMPARE(&LSN(np), &argp->nlsn);
223 CHECK_LSN(env, op, cmp, &LSN(np), &argp->nlsn);
224 if (cmp == 0) {
225 REC_DIRTY(mpf, ip, file_dbp->priority, &np);
226 PREV_PGNO(np) = argp->right;
227 np->lsn = *lsnp;
228 }
229 }
230 } else {
231 /*
232 * If it's a root split and the left child ever existed, update
233 * its LSN. Otherwise its the split page. If
234 * right child ever existed, root split or not, update its LSN.
235 * The undo of the page allocation(s) will restore them to the
236 * free list.
237 */
238 if (rootsplit && lp != NULL &&
239 LOG_COMPARE(lsnp, &LSN(lp)) == 0) {
240 REC_DIRTY(mpf, ip, file_dbp->priority, &lp);
241 lp->lsn = argp->llsn;
242 }
243 if (rp != NULL &&
244 LOG_COMPARE(lsnp, &LSN(rp)) == 0) {
245 REC_DIRTY(mpf, ip, file_dbp->priority, &rp);
246 rp->lsn = argp->rlsn;
247 }
248 /*
249 * Drop the lower level pages before getting an exclusive
250 * latch on the parent.
251 */
252 if (rp != NULL && (ret = __memp_fput(mpf,
253 ip, rp, file_dbp->priority)))
254 goto out;
255 rp = NULL;
256
257 /*
258 * Check the state of the split page. If its a rootsplit
259 * then that's the rootpage otherwise its the left page.
260 */
261 if (rootsplit) {
262 DB_ASSERT(env, pgno == argp->ppgno);
263 if (lp != NULL && (ret = __memp_fput(mpf, ip,
264 lp, file_dbp->priority)) != 0)
265 goto out;
266 lp = pp;
267 pp = NULL;
268 }
269 if (lp != NULL) {
270 cmp = LOG_COMPARE(lsnp, &LSN(lp));
271 CHECK_ABORT(env, op, cmp, &LSN(lp), lsnp);
272 if (cmp == 0) {
273 REC_DIRTY(mpf, ip, file_dbp->priority, &lp);
274 memcpy(lp, argp->pg.data, argp->pg.size);
275 if ((ret = __memp_fput(mpf,
276 ip, lp, file_dbp->priority)))
277 goto out;
278 lp = NULL;
279 }
280 }
281
282 /*
283 * Next we can update the parent removing the new index.
284 * If this has record numbers, then we log this separately.
285 */
286 if (pp != NULL) {
287 DB_ASSERT(env, !rootsplit);
288 cmp = LOG_COMPARE(lsnp, &LSN(pp));
289 CHECK_ABORT(env, op, cmp, &LSN(pp), lsnp);
290 if (cmp == 0) {
291 REC_DIRTY(mpf, ip, file_dbp->priority, &pp);
292 if ((opflags & SPL_NRECS) == 0) {
293 size = BINTERNAL_SIZE(
294 GET_BINTERNAL(file_dbp,
295 pp, argp->pindx + 1)->len);
296
297 if ((ret = __db_ditem(dbc, pp,
298 argp->pindx + 1, size)) != 0)
299 goto out;
300 }
301 pp->lsn = argp->plsn;
302 }
303 }
304
305 /*
306 * Finally, undo the next-page link if necessary. This is of
307 * interest only if it wasn't a root split -- inserting a new
308 * page in the tree requires that any following page have its
309 * previous-page pointer updated to our new page. Since it's
310 * possible that the next-page never existed, we ignore it as
311 * if there's nothing to undo.
312 */
313 if (!rootsplit && argp->npgno != PGNO_INVALID) {
314 if ((ret = __memp_fget(mpf, &argp->npgno,
315 ip, NULL, DB_MPOOL_EDIT, &np)) != 0) {
316 np = NULL;
317 goto done;
318 }
319 if (LOG_COMPARE(lsnp, &LSN(np)) == 0) {
320 REC_DIRTY(mpf, ip, file_dbp->priority, &np);
321 PREV_PGNO(np) = argp->left;
322 np->lsn = argp->nlsn;
323 }
324 }
325 }
326
327 done: *lsnp = argp->prev_lsn;
328 ret = 0;
329
330 out: /* Free any pages that are left. */
331 if (lp != NULL && (t_ret = __memp_fput(mpf,
332 ip, lp, file_dbp->priority)) != 0 && ret == 0)
333 ret = t_ret;
334 if (np != NULL && (t_ret = __memp_fput(mpf,
335 ip, np, file_dbp->priority)) != 0 && ret == 0)
336 ret = t_ret;
337 if (rp != NULL && (t_ret = __memp_fput(mpf,
338 ip, rp, file_dbp->priority)) != 0 && ret == 0)
339 ret = t_ret;
340 if (pp != NULL && (t_ret = __memp_fput(mpf,
341 ip, pp, file_dbp->priority)) != 0 && ret == 0)
342 ret = t_ret;
343
344 /* Free any allocated space. */
345 if (_lp != NULL)
346 __os_free(env, _lp);
347 if (_rp != NULL)
348 __os_free(env, _rp);
349 if (sp != NULL)
350 __os_free(env, sp);
351
352 REC_CLOSE;
353 }
354 /*
355 * __bam_split_48_recover --
356 * Recovery function for split.
357 *
358 * PUBLIC: int __bam_split_48_recover
359 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
360 */
361 int
__bam_split_48_recover(env,dbtp,lsnp,op,info)362 __bam_split_48_recover(env, dbtp, lsnp, op, info)
363 ENV *env;
364 DBT *dbtp;
365 DB_LSN *lsnp;
366 db_recops op;
367 void *info;
368 {
369 __bam_split_48_args *argp;
370 DB_THREAD_INFO *ip;
371 DB *file_dbp;
372 DBC *dbc;
373 DB_LSN *plsnp;
374 DB_MPOOLFILE *mpf;
375 PAGE *_lp, *lp, *np, *pp, *_rp, *rp, *sp;
376 db_pgno_t pgno, parent_pgno;
377 u_int32_t ptype, size;
378 int cmp, l_update, p_update, r_update, ret, rootsplit, t_ret;
379
380 ip = ((DB_TXNHEAD *)info)->thread_info;
381 REC_PRINT(__bam_split_print);
382
383 _lp = lp = np = pp = _rp = rp = NULL;
384 sp = NULL;
385
386 REC_INTRO(__bam_split_48_read, ip, 0);
387
388 if ((ret = __db_cursor_int(file_dbp, ip, NULL,
389 (argp->opflags & SPL_RECNO) ? DB_RECNO : DB_BTREE,
390 PGNO_INVALID, DB_RECOVER, NULL, &dbc)) != 0)
391 goto out;
392 if (argp->opflags & SPL_NRECS)
393 F_SET((BTREE_CURSOR *)dbc->internal, C_RECNUM);
394
395 /*
396 * There are two kinds of splits that we have to recover from. The
397 * first is a root-page split, where the root page is split from a
398 * leaf page into an internal page and two new leaf pages are created.
399 * The second is where a page is split into two pages, and a new key
400 * is inserted into the parent page.
401 *
402 * DBTs are not aligned in log records, so we need to copy the page
403 * so that we can access fields within it throughout this routine.
404 * Although we could hardcode the unaligned copies in this routine,
405 * we will be calling into regular btree functions with this page,
406 * so it's got to be aligned. Copying it into allocated memory is
407 * the only way to guarantee this.
408 */
409 if ((ret = __os_malloc(env, argp->pg.size, &sp)) != 0)
410 goto out;
411 memcpy(sp, argp->pg.data, argp->pg.size);
412
413 pgno = PGNO(sp);
414 parent_pgno = argp->ppgno;
415 rootsplit = parent_pgno == pgno;
416
417 /* Get the pages going down the tree. */
418 REC_FGET(mpf, ip, parent_pgno, &pp, left);
419 left: REC_FGET(mpf, ip, argp->left, &lp, right);
420 right: REC_FGET(mpf, ip, argp->right, &rp, redo);
421
422 redo: if (DB_REDO(op)) {
423 l_update = r_update = p_update = 0;
424 /*
425 * Decide if we need to resplit the page.
426 *
427 * If this is a root split, then the root has to exist unless
428 * we have truncated it due to a future deallocation.
429 */
430 if (pp != NULL) {
431 if (rootsplit)
432 plsnp = &LSN(argp->pg.data);
433 else
434 plsnp = &argp->plsn;
435 cmp = LOG_COMPARE(&LSN(pp), plsnp);
436 CHECK_LSN(env, op, cmp, &LSN(pp), plsnp);
437 if (cmp == 0)
438 p_update = 1;
439 }
440
441 if (lp != NULL) {
442 cmp = LOG_COMPARE(&LSN(lp), &argp->llsn);
443 CHECK_LSN(env, op, cmp, &LSN(lp), &argp->llsn);
444 if (cmp == 0)
445 l_update = 1;
446 }
447
448 if (rp != NULL) {
449 cmp = LOG_COMPARE(&LSN(rp), &argp->rlsn);
450 CHECK_LSN(env, op, cmp, &LSN(rp), &argp->rlsn);
451 if (cmp == 0)
452 r_update = 1;
453 }
454
455 if (!p_update && !l_update && !r_update)
456 goto check_next;
457
458 /* Allocate and initialize new left/right child pages. */
459 if ((ret = __os_malloc(env, file_dbp->pgsize, &_lp)) != 0 ||
460 (ret = __os_malloc(env, file_dbp->pgsize, &_rp)) != 0)
461 goto out;
462 if (rootsplit) {
463 P_INIT(_lp, file_dbp->pgsize, argp->left,
464 PGNO_INVALID,
465 ISINTERNAL(sp) ? PGNO_INVALID : argp->right,
466 LEVEL(sp), TYPE(sp));
467 P_INIT(_rp, file_dbp->pgsize, argp->right,
468 ISINTERNAL(sp) ? PGNO_INVALID : argp->left,
469 PGNO_INVALID, LEVEL(sp), TYPE(sp));
470 } else {
471 P_INIT(_lp, file_dbp->pgsize, PGNO(sp),
472 ISINTERNAL(sp) ? PGNO_INVALID : PREV_PGNO(sp),
473 ISINTERNAL(sp) ? PGNO_INVALID : argp->right,
474 LEVEL(sp), TYPE(sp));
475 P_INIT(_rp, file_dbp->pgsize, argp->right,
476 ISINTERNAL(sp) ? PGNO_INVALID : sp->pgno,
477 ISINTERNAL(sp) ? PGNO_INVALID : NEXT_PGNO(sp),
478 LEVEL(sp), TYPE(sp));
479 }
480
481 /* Split the page. */
482 if ((ret = __bam_copy(file_dbp, sp, _lp, 0, argp->indx)) != 0 ||
483 (ret = __bam_copy(file_dbp, sp, _rp, argp->indx,
484 NUM_ENT(sp))) != 0)
485 goto out;
486
487 if (l_update) {
488 REC_DIRTY(mpf, ip, file_dbp->priority, &lp);
489 memcpy(lp, _lp, file_dbp->pgsize);
490 lp->lsn = *lsnp;
491 }
492
493 if (r_update) {
494 REC_DIRTY(mpf, ip, file_dbp->priority, &rp);
495 memcpy(rp, _rp, file_dbp->pgsize);
496 rp->lsn = *lsnp;
497 }
498
499 /*
500 * Drop the latches on the lower level pages before
501 * getting an exclusive latch on the higher level page.
502 */
503 if (lp != NULL && (ret = __memp_fput(mpf,
504 ip, lp, file_dbp->priority)) && ret == 0)
505 goto out;
506 lp = NULL;
507 if (rp != NULL && (ret = __memp_fput(mpf,
508 ip, rp, file_dbp->priority)) && ret == 0)
509 goto out;
510 rp = NULL;
511 /*
512 * If the parent page is wrong, update it.
513 * Initialize the page. If it is a root page update
514 * the record counts if needed and put the first record in.
515 * Then insert the record for the right hand child page.
516 */
517 if (p_update) {
518 REC_DIRTY(mpf, ip, file_dbp->priority, &pp);
519 if (argp->opflags & SPL_RECNO)
520 ptype = P_IRECNO;
521 else
522 ptype = P_IBTREE;
523
524 if (rootsplit) {
525 P_INIT(pp, file_dbp->pgsize, pgno, PGNO_INVALID,
526 PGNO_INVALID, _lp->level + 1, ptype);
527 if (argp->opflags & SPL_NRECS) {
528 RE_NREC_SET(pp,
529 __bam_total(file_dbp, _lp) +
530 __bam_total(file_dbp, _rp));
531 }
532 if ((ret = __db_pitem_nolog(dbc, pp,
533 argp->pindx, argp->pentry.size,
534 &argp->pentry, NULL)) != 0)
535 goto out;
536
537 }
538 if ((ret = __db_pitem_nolog(dbc, pp, argp->pindx + 1,
539 argp->rentry.size, &argp->rentry, NULL)) != 0)
540 goto out;
541 pp->lsn = *lsnp;
542 }
543
544 check_next: /*
545 * Finally, redo the next-page link if necessary. This is of
546 * interest only if it wasn't a root split -- inserting a new
547 * page in the tree requires that any following page have its
548 * previous-page pointer updated to our new page. The next
549 * page must exist because we're redoing the operation.
550 */
551 if (!rootsplit && argp->npgno != PGNO_INVALID) {
552 REC_FGET(mpf, ip, argp->npgno, &np, done);
553 cmp = LOG_COMPARE(&LSN(np), &argp->nlsn);
554 CHECK_LSN(env, op, cmp, &LSN(np), &argp->nlsn);
555 if (cmp == 0) {
556 REC_DIRTY(mpf, ip, file_dbp->priority, &np);
557 PREV_PGNO(np) = argp->right;
558 np->lsn = *lsnp;
559 }
560 }
561 } else {
562 /*
563 * If it's a root split and the left child ever existed, update
564 * its LSN. Otherwise its the split page. If
565 * right child ever existed, root split or not, update its LSN.
566 * The undo of the page allocation(s) will restore them to the
567 * free list.
568 */
569 if (rootsplit && lp != NULL &&
570 LOG_COMPARE(lsnp, &LSN(lp)) == 0) {
571 REC_DIRTY(mpf, ip, file_dbp->priority, &lp);
572 lp->lsn = argp->llsn;
573 }
574 if (rp != NULL &&
575 LOG_COMPARE(lsnp, &LSN(rp)) == 0) {
576 REC_DIRTY(mpf, ip, file_dbp->priority, &rp);
577 rp->lsn = argp->rlsn;
578 }
579 /*
580 * Drop the lower level pages before getting an exclusive
581 * latch on the parent.
582 */
583 if (rp != NULL && (ret = __memp_fput(mpf,
584 ip, rp, file_dbp->priority)))
585 goto out;
586 rp = NULL;
587
588 /*
589 * Check the state of the split page. If its a rootsplit
590 * then that's the rootpage otherwise its the left page.
591 */
592 if (rootsplit) {
593 DB_ASSERT(env, pgno == argp->ppgno);
594 if (lp != NULL && (ret = __memp_fput(mpf, ip,
595 lp, file_dbp->priority)) != 0)
596 goto out;
597 lp = pp;
598 pp = NULL;
599 }
600 if (lp != NULL) {
601 cmp = LOG_COMPARE(lsnp, &LSN(lp));
602 CHECK_ABORT(env, op, cmp, &LSN(lp), lsnp);
603 if (cmp == 0) {
604 REC_DIRTY(mpf, ip, file_dbp->priority, &lp);
605 memcpy(lp, argp->pg.data, argp->pg.size);
606 if ((ret = __memp_fput(mpf,
607 ip, lp, file_dbp->priority)))
608 goto out;
609 lp = NULL;
610 }
611 }
612
613 /*
614 * Next we can update the parent removing the new index.
615 */
616 if (pp != NULL) {
617 DB_ASSERT(env, !rootsplit);
618 cmp = LOG_COMPARE(lsnp, &LSN(pp));
619 CHECK_ABORT(env, op, cmp, &LSN(pp), lsnp);
620 if (cmp == 0) {
621 REC_DIRTY(mpf, ip, file_dbp->priority, &pp);
622 if (argp->opflags & SPL_RECNO)
623 size = RINTERNAL_SIZE;
624 else
625 size = BINTERNAL_SIZE(
626 GET_BINTERNAL(file_dbp,
627 pp, argp->pindx + 1)->len);
628
629 if ((ret = __db_ditem(dbc, pp,
630 argp->pindx + 1, size)) != 0)
631 goto out;
632 pp->lsn = argp->plsn;
633 }
634 }
635
636 /*
637 * Finally, undo the next-page link if necessary. This is of
638 * interest only if it wasn't a root split -- inserting a new
639 * page in the tree requires that any following page have its
640 * previous-page pointer updated to our new page. Since it's
641 * possible that the next-page never existed, we ignore it as
642 * if there's nothing to undo.
643 */
644 if (!rootsplit && argp->npgno != PGNO_INVALID) {
645 if ((ret = __memp_fget(mpf, &argp->npgno,
646 ip, NULL, DB_MPOOL_EDIT, &np)) != 0) {
647 np = NULL;
648 goto done;
649 }
650 if (LOG_COMPARE(lsnp, &LSN(np)) == 0) {
651 REC_DIRTY(mpf, ip, file_dbp->priority, &np);
652 PREV_PGNO(np) = argp->left;
653 np->lsn = argp->nlsn;
654 }
655 }
656 }
657
658 done: *lsnp = argp->prev_lsn;
659 ret = 0;
660
661 out: /* Free any pages that are left. */
662 if (lp != NULL && (t_ret = __memp_fput(mpf,
663 ip, lp, file_dbp->priority)) != 0 && ret == 0)
664 ret = t_ret;
665 if (np != NULL && (t_ret = __memp_fput(mpf,
666 ip, np, file_dbp->priority)) != 0 && ret == 0)
667 ret = t_ret;
668 if (rp != NULL && (t_ret = __memp_fput(mpf,
669 ip, rp, file_dbp->priority)) != 0 && ret == 0)
670 ret = t_ret;
671 if (pp != NULL && (t_ret = __memp_fput(mpf,
672 ip, pp, file_dbp->priority)) != 0 && ret == 0)
673 ret = t_ret;
674
675 /* Free any allocated space. */
676 if (_lp != NULL)
677 __os_free(env, _lp);
678 if (_rp != NULL)
679 __os_free(env, _rp);
680 if (sp != NULL)
681 __os_free(env, sp);
682
683 REC_CLOSE;
684 }
685 /*
686 * __bam_split_recover --
687 * Recovery function for split.
688 *
689 * PUBLIC: int __bam_split_42_recover
690 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
691 */
692 int
__bam_split_42_recover(env,dbtp,lsnp,op,info)693 __bam_split_42_recover(env, dbtp, lsnp, op, info)
694 ENV *env;
695 DBT *dbtp;
696 DB_LSN *lsnp;
697 db_recops op;
698 void *info;
699 {
700 __bam_split_42_args *argp;
701 DB_THREAD_INFO *ip;
702 DB *file_dbp;
703 DBC *dbc;
704 DB_MPOOLFILE *mpf;
705 PAGE *_lp, *lp, *np, *pp, *_rp, *rp, *sp;
706 db_pgno_t pgno, root_pgno;
707 u_int32_t ptype;
708 int cmp, l_update, p_update, r_update, rc, ret, rootsplit, t_ret;
709
710 ip = ((DB_TXNHEAD *)info)->thread_info;
711 REC_PRINT(__bam_split_print);
712
713 _lp = lp = np = pp = _rp = rp = NULL;
714 sp = NULL;
715
716 REC_INTRO(__bam_split_42_read, ip, 0);
717
718 /*
719 * There are two kinds of splits that we have to recover from. The
720 * first is a root-page split, where the root page is split from a
721 * leaf page into an internal page and two new leaf pages are created.
722 * The second is where a page is split into two pages, and a new key
723 * is inserted into the parent page.
724 *
725 * DBTs are not aligned in log records, so we need to copy the page
726 * so that we can access fields within it throughout this routine.
727 * Although we could hardcode the unaligned copies in this routine,
728 * we will be calling into regular btree functions with this page,
729 * so it's got to be aligned. Copying it into allocated memory is
730 * the only way to guarantee this.
731 */
732 if ((ret = __os_malloc(env, argp->pg.size, &sp)) != 0)
733 goto out;
734 memcpy(sp, argp->pg.data, argp->pg.size);
735
736 pgno = PGNO(sp);
737 root_pgno = argp->root_pgno;
738 rootsplit = root_pgno != PGNO_INVALID;
739 REC_FGET(mpf, ip, argp->left, &lp, right);
740 right: REC_FGET(mpf, ip, argp->right, &rp, redo);
741
742 redo: if (DB_REDO(op)) {
743 l_update = r_update = p_update = 0;
744 /*
745 * Decide if we need to resplit the page.
746 *
747 * If this is a root split, then the root has to exist unless
748 * we have truncated it due to a future deallocation.
749 */
750 if (rootsplit) {
751 REC_FGET(mpf, ip, root_pgno, &pp, do_left);
752 cmp = LOG_COMPARE(&LSN(pp), &LSN(argp->pg.data));
753 CHECK_LSN(env, op,
754 cmp, &LSN(pp), &LSN(argp->pg.data));
755 p_update = cmp == 0;
756 }
757
758 do_left: if (lp != NULL) {
759 cmp = LOG_COMPARE(&LSN(lp), &argp->llsn);
760 CHECK_LSN(env, op, cmp, &LSN(lp), &argp->llsn);
761 if (cmp == 0)
762 l_update = 1;
763 }
764
765 if (rp != NULL) {
766 cmp = LOG_COMPARE(&LSN(rp), &argp->rlsn);
767 CHECK_LSN(env, op, cmp, &LSN(rp), &argp->rlsn);
768 if (cmp == 0)
769 r_update = 1;
770 }
771
772 if (!p_update && !l_update && !r_update)
773 goto check_next;
774
775 /* Allocate and initialize new left/right child pages. */
776 if ((ret = __os_malloc(env, file_dbp->pgsize, &_lp)) != 0 ||
777 (ret = __os_malloc(env, file_dbp->pgsize, &_rp)) != 0)
778 goto out;
779 if (rootsplit) {
780 P_INIT(_lp, file_dbp->pgsize, argp->left,
781 PGNO_INVALID,
782 ISINTERNAL(sp) ? PGNO_INVALID : argp->right,
783 LEVEL(sp), TYPE(sp));
784 P_INIT(_rp, file_dbp->pgsize, argp->right,
785 ISINTERNAL(sp) ? PGNO_INVALID : argp->left,
786 PGNO_INVALID, LEVEL(sp), TYPE(sp));
787 } else {
788 P_INIT(_lp, file_dbp->pgsize, PGNO(sp),
789 ISINTERNAL(sp) ? PGNO_INVALID : PREV_PGNO(sp),
790 ISINTERNAL(sp) ? PGNO_INVALID : argp->right,
791 LEVEL(sp), TYPE(sp));
792 P_INIT(_rp, file_dbp->pgsize, argp->right,
793 ISINTERNAL(sp) ? PGNO_INVALID : sp->pgno,
794 ISINTERNAL(sp) ? PGNO_INVALID : NEXT_PGNO(sp),
795 LEVEL(sp), TYPE(sp));
796 }
797
798 /* Split the page. */
799 if ((ret = __bam_copy(file_dbp, sp, _lp, 0, argp->indx)) != 0 ||
800 (ret = __bam_copy(file_dbp, sp, _rp, argp->indx,
801 NUM_ENT(sp))) != 0)
802 goto out;
803
804 if (l_update) {
805 REC_DIRTY(mpf, ip, file_dbp->priority, &lp);
806 memcpy(lp, _lp, file_dbp->pgsize);
807 lp->lsn = *lsnp;
808 if ((ret = __memp_fput(mpf,
809 ip, lp, file_dbp->priority)) != 0)
810 goto out;
811 lp = NULL;
812 }
813
814 if (r_update) {
815 REC_DIRTY(mpf, ip, file_dbp->priority, &rp);
816 memcpy(rp, _rp, file_dbp->pgsize);
817 rp->lsn = *lsnp;
818 if ((ret = __memp_fput(mpf,
819 ip, rp, file_dbp->priority)) != 0)
820 goto out;
821 rp = NULL;
822 }
823
824 /*
825 * If the parent page is wrong, update it. This is of interest
826 * only if it was a root split, since root splits create parent
827 * pages. All other splits modify a parent page, but those are
828 * separately logged and recovered.
829 */
830 if (rootsplit && p_update) {
831 if (IS_BTREE_PAGE(sp)) {
832 ptype = P_IBTREE;
833 rc = argp->opflags & SPL_NRECS ? 1 : 0;
834 } else {
835 ptype = P_IRECNO;
836 rc = 1;
837 }
838
839 REC_DIRTY(mpf, ip, file_dbp->priority, &pp);
840 P_INIT(pp, file_dbp->pgsize, root_pgno,
841 PGNO_INVALID, PGNO_INVALID, _lp->level + 1, ptype);
842 RE_NREC_SET(pp, rc ? __bam_total(file_dbp, _lp) +
843 __bam_total(file_dbp, _rp) : 0);
844
845 pp->lsn = *lsnp;
846 if ((ret = __memp_fput(mpf,
847 ip, pp, file_dbp->priority)) != 0)
848 goto out;
849 pp = NULL;
850 }
851
852 check_next: /*
853 * Finally, redo the next-page link if necessary. This is of
854 * interest only if it wasn't a root split -- inserting a new
855 * page in the tree requires that any following page have its
856 * previous-page pointer updated to our new page. The next
857 * page must exist because we're redoing the operation.
858 */
859 if (!rootsplit && argp->npgno != PGNO_INVALID) {
860 if ((ret = __memp_fget(mpf, &argp->npgno,
861 ip, NULL, 0, &np)) != 0) {
862 if (ret != DB_PAGE_NOTFOUND) {
863 ret = __db_pgerr(
864 file_dbp, argp->npgno, ret);
865 goto out;
866 } else
867 goto done;
868 }
869 cmp = LOG_COMPARE(&LSN(np), &argp->nlsn);
870 CHECK_LSN(env, op, cmp, &LSN(np), &argp->nlsn);
871 if (cmp == 0) {
872 REC_DIRTY(mpf, ip, file_dbp->priority, &np);
873 PREV_PGNO(np) = argp->right;
874 np->lsn = *lsnp;
875 if ((ret = __memp_fput(mpf, ip,
876 np, file_dbp->priority)) != 0)
877 goto out;
878 np = NULL;
879 }
880 }
881 } else {
882 /*
883 * If the split page is wrong, replace its contents with the
884 * logged page contents. If the page doesn't exist, it means
885 * that the create of the page never happened, nor did any of
886 * the adds onto the page that caused the split, and there's
887 * really no undo-ing to be done.
888 */
889 if ((ret = __memp_fget(mpf, &pgno, ip, NULL,
890 DB_MPOOL_EDIT, &pp)) != 0) {
891 pp = NULL;
892 goto lrundo;
893 }
894 if (LOG_COMPARE(lsnp, &LSN(pp)) == 0) {
895 REC_DIRTY(mpf, ip, file_dbp->priority, &pp);
896 memcpy(pp, argp->pg.data, argp->pg.size);
897 if ((ret = __memp_fput(mpf,
898 ip, pp, file_dbp->priority)) != 0)
899 goto out;
900 pp = NULL;
901 }
902
903 /*
904 * If it's a root split and the left child ever existed, update
905 * its LSN. (If it's not a root split, we've updated the left
906 * page already -- it's the same as the split page.) If the
907 * right child ever existed, root split or not, update its LSN.
908 * The undo of the page allocation(s) will restore them to the
909 * free list.
910 */
911 lrundo: if ((rootsplit && lp != NULL) || rp != NULL) {
912 if (rootsplit && lp != NULL &&
913 LOG_COMPARE(lsnp, &LSN(lp)) == 0) {
914 REC_DIRTY(mpf, ip, file_dbp->priority, &lp);
915 lp->lsn = argp->llsn;
916 if ((ret = __memp_fput(mpf, ip,
917 lp, file_dbp->priority)) != 0)
918 goto out;
919 lp = NULL;
920 }
921 if (rp != NULL &&
922 LOG_COMPARE(lsnp, &LSN(rp)) == 0) {
923 REC_DIRTY(mpf, ip, file_dbp->priority, &rp);
924 rp->lsn = argp->rlsn;
925 if ((ret = __memp_fput(mpf, ip,
926 rp, file_dbp->priority)) != 0)
927 goto out;
928 rp = NULL;
929 }
930 }
931
932 /*
933 * Finally, undo the next-page link if necessary. This is of
934 * interest only if it wasn't a root split -- inserting a new
935 * page in the tree requires that any following page have its
936 * previous-page pointer updated to our new page. Since it's
937 * possible that the next-page never existed, we ignore it as
938 * if there's nothing to undo.
939 */
940 if (!rootsplit && argp->npgno != PGNO_INVALID) {
941 if ((ret = __memp_fget(mpf, &argp->npgno,
942 ip, NULL, DB_MPOOL_EDIT, &np)) != 0) {
943 np = NULL;
944 goto done;
945 }
946 if (LOG_COMPARE(lsnp, &LSN(np)) == 0) {
947 REC_DIRTY(mpf, ip, file_dbp->priority, &np);
948 PREV_PGNO(np) = argp->left;
949 np->lsn = argp->nlsn;
950 if (__memp_fput(mpf,
951 ip, np, file_dbp->priority))
952 goto out;
953 np = NULL;
954 }
955 }
956 }
957
958 done: *lsnp = argp->prev_lsn;
959 ret = 0;
960
961 out: /* Free any pages that weren't dirtied. */
962 if (pp != NULL && (t_ret = __memp_fput(mpf,
963 ip, pp, file_dbp->priority)) != 0 && ret == 0)
964 ret = t_ret;
965 if (lp != NULL && (t_ret = __memp_fput(mpf,
966 ip, lp, file_dbp->priority)) != 0 && ret == 0)
967 ret = t_ret;
968 if (np != NULL && (t_ret = __memp_fput(mpf,
969 ip, np, file_dbp->priority)) != 0 && ret == 0)
970 ret = t_ret;
971 if (rp != NULL && (t_ret = __memp_fput(mpf,
972 ip, rp, file_dbp->priority)) != 0 && ret == 0)
973 ret = t_ret;
974
975 /* Free any allocated space. */
976 if (_lp != NULL)
977 __os_free(env, _lp);
978 if (_rp != NULL)
979 __os_free(env, _rp);
980 if (sp != NULL)
981 __os_free(env, sp);
982
983 REC_CLOSE;
984 }
985
986 /*
987 * __bam_rsplit_recover --
988 * Recovery function for a reverse split.
989 *
990 * PUBLIC: int __bam_rsplit_recover
991 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
992 */
993 int
__bam_rsplit_recover(env,dbtp,lsnp,op,info)994 __bam_rsplit_recover(env, dbtp, lsnp, op, info)
995 ENV *env;
996 DBT *dbtp;
997 DB_LSN *lsnp;
998 db_recops op;
999 void *info;
1000 {
1001 __bam_rsplit_args *argp;
1002 DB_THREAD_INFO *ip;
1003 DB *file_dbp;
1004 DBC *dbc;
1005 DB_LSN copy_lsn;
1006 DB_MPOOLFILE *mpf;
1007 PAGE *pagep;
1008 db_pgno_t pgno, root_pgno;
1009 db_recno_t rcnt;
1010 int cmp_n, cmp_p, ret;
1011
1012 ip = ((DB_TXNHEAD *)info)->thread_info;
1013 pagep = NULL;
1014 REC_PRINT(__bam_rsplit_print);
1015 REC_INTRO(__bam_rsplit_read, ip, 1);
1016
1017 /* Fix the root page. */
1018 pgno = root_pgno = argp->root_pgno;
1019 if ((ret = __memp_fget(mpf, &pgno, ip, NULL, 0, &pagep)) != 0) {
1020 if (ret != DB_PAGE_NOTFOUND) {
1021 ret = __db_pgerr(file_dbp, pgno, ret);
1022 goto out;
1023 } else
1024 goto do_page;
1025 }
1026
1027 cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
1028 cmp_p = LOG_COMPARE(&LSN(pagep), &argp->rootlsn);
1029 CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->rootlsn);
1030 CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp);
1031 if (cmp_p == 0 && DB_REDO(op)) {
1032 /*
1033 * Copy the new data to the root page. If it is not now a
1034 * leaf page we need to restore the record number. We could
1035 * try to determine if C_RECNUM was set in the btree, but
1036 * that's not really necessary since the field is not used
1037 * otherwise.
1038 */
1039 REC_DIRTY(mpf, ip, dbc->priority, &pagep);
1040 rcnt = RE_NREC(pagep);
1041 memcpy(pagep, argp->pgdbt.data, argp->pgdbt.size);
1042 if (LEVEL(pagep) > LEAFLEVEL)
1043 RE_NREC_SET(pagep, rcnt);
1044 pagep->pgno = root_pgno;
1045 pagep->lsn = *lsnp;
1046 } else if (cmp_n == 0 && DB_UNDO(op)) {
1047 /* Need to undo update described. */
1048 REC_DIRTY(mpf, ip, dbc->priority, &pagep);
1049 P_INIT(pagep, file_dbp->pgsize, root_pgno,
1050 argp->nrec, PGNO_INVALID, pagep->level + 1,
1051 IS_BTREE_PAGE(pagep) ? P_IBTREE : P_IRECNO);
1052 if ((ret = __db_pitem(dbc, pagep, 0,
1053 argp->rootent.size, &argp->rootent, NULL)) != 0)
1054 goto out;
1055 pagep->lsn = argp->rootlsn;
1056 }
1057 if ((ret = __memp_fput(mpf, ip, pagep, dbc->priority)) != 0)
1058 goto out;
1059 pagep = NULL;
1060
1061 do_page:
1062 /*
1063 * Fix the page copied over the root page. It's possible that the
1064 * page never made it to disk, or was truncated so if the page
1065 * doesn't exist, it's okay and there's nothing further to do.
1066 */
1067 if ((ret = __memp_fget(mpf, &argp->pgno, ip, NULL, 0, &pagep)) != 0) {
1068 if (ret != DB_PAGE_NOTFOUND) {
1069 ret = __db_pgerr(file_dbp, argp->pgno, ret);
1070 goto out;
1071 } else
1072 goto done;
1073 }
1074 (void)__ua_memcpy(©_lsn, &LSN(argp->pgdbt.data), sizeof(DB_LSN));
1075 cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
1076 cmp_p = LOG_COMPARE(&LSN(pagep), ©_lsn);
1077 CHECK_LSN(env, op, cmp_p, &LSN(pagep), ©_lsn);
1078 CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp);
1079 if (cmp_p == 0 && DB_REDO(op)) {
1080 /* Need to redo update described. */
1081 REC_DIRTY(mpf, ip, dbc->priority, &pagep);
1082 pagep->lsn = *lsnp;
1083 } else if (cmp_n == 0 && DB_UNDO(op)) {
1084 /* Need to undo update described. */
1085 REC_DIRTY(mpf, ip, dbc->priority, &pagep);
1086 memcpy(pagep, argp->pgdbt.data, argp->pgdbt.size);
1087 }
1088 if ((ret = __memp_fput(mpf, ip, pagep, dbc->priority)) != 0)
1089 goto out;
1090 pagep = NULL;
1091
1092 done: *lsnp = argp->prev_lsn;
1093 ret = 0;
1094
1095 out: if (pagep != NULL)
1096 (void)__memp_fput(mpf, ip, pagep, dbc->priority);
1097 REC_CLOSE;
1098 }
1099
1100 /*
1101 * __bam_adj_recover --
1102 * Recovery function for adj.
1103 *
1104 * PUBLIC: int __bam_adj_recover
1105 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
1106 */
1107 int
__bam_adj_recover(env,dbtp,lsnp,op,info)1108 __bam_adj_recover(env, dbtp, lsnp, op, info)
1109 ENV *env;
1110 DBT *dbtp;
1111 DB_LSN *lsnp;
1112 db_recops op;
1113 void *info;
1114 {
1115 __bam_adj_args *argp;
1116 DB_THREAD_INFO *ip;
1117 DB *file_dbp;
1118 DBC *dbc;
1119 DB_MPOOLFILE *mpf;
1120 PAGE *pagep;
1121 int cmp_n, cmp_p, ret;
1122
1123 ip = ((DB_TXNHEAD *)info)->thread_info;
1124 pagep = NULL;
1125 REC_PRINT(__bam_adj_print);
1126 REC_INTRO(__bam_adj_read, ip, 1);
1127
1128 /* Get the page; if it never existed and we're undoing, we're done. */
1129 if ((ret = __memp_fget(mpf, &argp->pgno, ip, NULL, 0, &pagep)) != 0) {
1130 if (ret != DB_PAGE_NOTFOUND) {
1131 ret = __db_pgerr(file_dbp, argp->pgno, ret);
1132 goto out;
1133 } else
1134 goto done;
1135 }
1136
1137 cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
1138 cmp_p = LOG_COMPARE(&LSN(pagep), &argp->lsn);
1139 CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->lsn);
1140 if (cmp_p == 0 && DB_REDO(op)) {
1141 /* Need to redo update described. */
1142 REC_DIRTY(mpf, ip, dbc->priority, &pagep);
1143 if ((ret = __bam_adjindx(dbc,
1144 pagep, argp->indx, argp->indx_copy, argp->is_insert)) != 0)
1145 goto out;
1146
1147 LSN(pagep) = *lsnp;
1148 } else if (cmp_n == 0 && DB_UNDO(op)) {
1149 /* Need to undo update described. */
1150 REC_DIRTY(mpf, ip, dbc->priority, &pagep);
1151 if ((ret = __bam_adjindx(dbc,
1152 pagep, argp->indx, argp->indx_copy, !argp->is_insert)) != 0)
1153 goto out;
1154
1155 LSN(pagep) = argp->lsn;
1156 }
1157 if ((ret = __memp_fput(mpf, ip, pagep, dbc->priority)) != 0)
1158 goto out;
1159 pagep = NULL;
1160
1161 done: *lsnp = argp->prev_lsn;
1162 ret = 0;
1163
1164 out: if (pagep != NULL)
1165 (void)__memp_fput(mpf, ip, pagep, dbc->priority);
1166 REC_CLOSE;
1167 }
1168
1169 /*
1170 * __bam_cadjust_recover --
1171 * Recovery function for the adjust of a count change in an internal
1172 * page.
1173 *
1174 * PUBLIC: int __bam_cadjust_recover
1175 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
1176 */
1177 int
__bam_cadjust_recover(env,dbtp,lsnp,op,info)1178 __bam_cadjust_recover(env, dbtp, lsnp, op, info)
1179 ENV *env;
1180 DBT *dbtp;
1181 DB_LSN *lsnp;
1182 db_recops op;
1183 void *info;
1184 {
1185 __bam_cadjust_args *argp;
1186 DB_THREAD_INFO *ip;
1187 DB *file_dbp;
1188 DBC *dbc;
1189 DB_MPOOLFILE *mpf;
1190 PAGE *pagep;
1191 int cmp_n, cmp_p, ret;
1192
1193 ip = ((DB_TXNHEAD *)info)->thread_info;
1194 pagep = NULL;
1195 REC_PRINT(__bam_cadjust_print);
1196 REC_INTRO(__bam_cadjust_read, ip, 0);
1197
1198 /* Get the page; if it never existed and we're undoing, we're done. */
1199 if ((ret = __memp_fget(mpf, &argp->pgno, ip, NULL, 0, &pagep)) != 0) {
1200 if (ret != DB_PAGE_NOTFOUND) {
1201 ret = __db_pgerr(file_dbp, argp->pgno, ret);
1202 goto out;
1203 } else
1204 goto done;
1205 }
1206
1207 cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
1208 cmp_p = LOG_COMPARE(&LSN(pagep), &argp->lsn);
1209 CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->lsn);
1210 if (cmp_p == 0 && DB_REDO(op)) {
1211 /* Need to redo update described. */
1212 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
1213 if (IS_BTREE_PAGE(pagep)) {
1214 GET_BINTERNAL(file_dbp, pagep, argp->indx)->nrecs +=
1215 argp->adjust;
1216 if (argp->opflags & CAD_UPDATEROOT)
1217 RE_NREC_ADJ(pagep, argp->adjust);
1218 } else {
1219 GET_RINTERNAL(file_dbp, pagep, argp->indx)->nrecs +=
1220 argp->adjust;
1221 if (argp->opflags & CAD_UPDATEROOT)
1222 RE_NREC_ADJ(pagep, argp->adjust);
1223 }
1224
1225 LSN(pagep) = *lsnp;
1226 } else if (cmp_n == 0 && DB_UNDO(op)) {
1227 /* Need to undo update described. */
1228 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
1229 if (IS_BTREE_PAGE(pagep)) {
1230 GET_BINTERNAL(file_dbp, pagep, argp->indx)->nrecs -=
1231 argp->adjust;
1232 if (argp->opflags & CAD_UPDATEROOT)
1233 RE_NREC_ADJ(pagep, -(argp->adjust));
1234 } else {
1235 GET_RINTERNAL(file_dbp, pagep, argp->indx)->nrecs -=
1236 argp->adjust;
1237 if (argp->opflags & CAD_UPDATEROOT)
1238 RE_NREC_ADJ(pagep, -(argp->adjust));
1239 }
1240 LSN(pagep) = argp->lsn;
1241 }
1242 if ((ret = __memp_fput(mpf, ip, pagep, file_dbp->priority)) != 0)
1243 goto out;
1244 pagep = NULL;
1245
1246 done: *lsnp = argp->prev_lsn;
1247 ret = 0;
1248
1249 out: if (pagep != NULL)
1250 (void)__memp_fput(mpf, ip, pagep, file_dbp->priority);
1251 REC_CLOSE;
1252 }
1253
1254 /*
1255 * __bam_cdel_recover --
1256 * Recovery function for the intent-to-delete of a cursor record.
1257 *
1258 * PUBLIC: int __bam_cdel_recover
1259 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
1260 */
1261 int
__bam_cdel_recover(env,dbtp,lsnp,op,info)1262 __bam_cdel_recover(env, dbtp, lsnp, op, info)
1263 ENV *env;
1264 DBT *dbtp;
1265 DB_LSN *lsnp;
1266 db_recops op;
1267 void *info;
1268 {
1269 __bam_cdel_args *argp;
1270 DB_THREAD_INFO *ip;
1271 DB *file_dbp;
1272 DBC *dbc;
1273 DB_MPOOLFILE *mpf;
1274 PAGE *pagep;
1275 u_int32_t indx;
1276 int cmp_n, cmp_p, ret;
1277
1278 ip = ((DB_TXNHEAD *)info)->thread_info;
1279 pagep = NULL;
1280 REC_PRINT(__bam_cdel_print);
1281 REC_INTRO(__bam_cdel_read, ip, 0);
1282
1283 /* Get the page; if it never existed and we're undoing, we're done. */
1284 if ((ret = __memp_fget(mpf, &argp->pgno, ip, NULL, 0, &pagep)) != 0) {
1285 if (ret != DB_PAGE_NOTFOUND) {
1286 ret = __db_pgerr(file_dbp, argp->pgno, ret);
1287 goto out;
1288 } else
1289 goto done;
1290 }
1291
1292 cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
1293 cmp_p = LOG_COMPARE(&LSN(pagep), &argp->lsn);
1294 CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->lsn);
1295 CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp);
1296 if (cmp_p == 0 && DB_REDO(op)) {
1297 /* Need to redo update described. */
1298 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
1299 indx = argp->indx + (TYPE(pagep) == P_LBTREE ? O_INDX : 0);
1300 B_DSET(GET_BKEYDATA(file_dbp, pagep, indx)->type);
1301
1302 LSN(pagep) = *lsnp;
1303 } else if (cmp_n == 0 && DB_UNDO(op)) {
1304 /* Need to undo update described. */
1305 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
1306 indx = argp->indx + (TYPE(pagep) == P_LBTREE ? O_INDX : 0);
1307 B_DCLR(GET_BKEYDATA(file_dbp, pagep, indx)->type);
1308
1309 if ((ret = __bam_ca_delete(
1310 file_dbp, argp->pgno, argp->indx, 0, NULL)) != 0)
1311 goto out;
1312
1313 LSN(pagep) = argp->lsn;
1314 }
1315 if ((ret = __memp_fput(mpf, ip, pagep, file_dbp->priority)) != 0)
1316 goto out;
1317 pagep = NULL;
1318
1319 done: *lsnp = argp->prev_lsn;
1320 ret = 0;
1321
1322 out: if (pagep != NULL)
1323 (void)__memp_fput(mpf, ip, pagep, file_dbp->priority);
1324 REC_CLOSE;
1325 }
1326
1327 /*
1328 * __bam_repl_recover --
1329 * Recovery function for page item replacement.
1330 *
1331 * PUBLIC: int __bam_repl_recover
1332 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
1333 */
1334 int
__bam_repl_recover(env,dbtp,lsnp,op,info)1335 __bam_repl_recover(env, dbtp, lsnp, op, info)
1336 ENV *env;
1337 DBT *dbtp;
1338 DB_LSN *lsnp;
1339 db_recops op;
1340 void *info;
1341 {
1342 __bam_repl_args *argp;
1343 DB_THREAD_INFO *ip;
1344 BKEYDATA *bk;
1345 DB *file_dbp;
1346 DBC *dbc;
1347 DBT dbt;
1348 DB_MPOOLFILE *mpf;
1349 PAGE *pagep;
1350 int cmp_n, cmp_p, ret;
1351 u_int32_t len;
1352 u_int8_t *dp, *p;
1353
1354 ip = ((DB_TXNHEAD *)info)->thread_info;
1355 pagep = NULL;
1356 REC_PRINT(__bam_repl_print);
1357 REC_INTRO(__bam_repl_read, ip, 1);
1358
1359 /* Get the page; if it never existed and we're undoing, we're done. */
1360 if ((ret = __memp_fget(mpf, &argp->pgno, ip, NULL, 0, &pagep)) != 0) {
1361 if (ret != DB_PAGE_NOTFOUND) {
1362 ret = __db_pgerr(file_dbp, argp->pgno, ret);
1363 goto out;
1364 } else
1365 goto done;
1366 }
1367
1368 cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
1369 cmp_p = LOG_COMPARE(&LSN(pagep), &argp->lsn);
1370 CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->lsn);
1371 CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp);
1372 if (cmp_p == 0 && DB_REDO(op)) {
1373 /*
1374 * Need to redo update described.
1375 *
1376 * Re-build the replacement item.
1377 */
1378 REC_DIRTY(mpf, ip, dbc->priority, &pagep);
1379 bk = GET_BKEYDATA(file_dbp, pagep, argp->indx);
1380 dp = bk->data;
1381 len = bk->len;
1382 memset(&dbt, 0, sizeof(dbt));
1383 dbt.size = argp->prefix + argp->suffix + argp->repl.size;
1384 if ((ret = __os_malloc(env, dbt.size, &dbt.data)) != 0)
1385 goto out;
1386 p = dbt.data;
1387 memcpy(p, dp, argp->prefix);
1388 p += argp->prefix;
1389 memcpy(p, argp->repl.data, argp->repl.size);
1390 p += argp->repl.size;
1391 memcpy(p, dp + (len - argp->suffix), argp->suffix);
1392
1393 ret = __bam_ritem(dbc, pagep, argp->indx, &dbt, 0);
1394 __os_free(env, dbt.data);
1395 if (ret != 0)
1396 goto out;
1397
1398 LSN(pagep) = *lsnp;
1399 } else if (cmp_n == 0 && DB_UNDO(op)) {
1400 /*
1401 * Need to undo update described.
1402 *
1403 * Re-build the original item.
1404 */
1405 REC_DIRTY(mpf, ip, dbc->priority, &pagep);
1406 bk = GET_BKEYDATA(file_dbp, pagep, argp->indx);
1407 dp = bk->data;
1408 len = bk->len;
1409 memset(&dbt, 0, sizeof(dbt));
1410 dbt.size = argp->prefix + argp->suffix + argp->orig.size;
1411 if ((ret = __os_malloc(env, dbt.size, &dbt.data)) != 0)
1412 goto out;
1413 p = dbt.data;
1414 memcpy(p, dp, argp->prefix);
1415 p += argp->prefix;
1416 memcpy(p, argp->orig.data, argp->orig.size);
1417 p += argp->orig.size;
1418 memcpy(p, dp + (len - argp->suffix), argp->suffix);
1419
1420 ret = __bam_ritem(dbc, pagep, argp->indx, &dbt, 0);
1421 __os_free(env, dbt.data);
1422 if (ret != 0)
1423 goto out;
1424
1425 /* Reset the deleted flag, if necessary. */
1426 if (argp->isdeleted)
1427 B_DSET(GET_BKEYDATA(file_dbp, pagep, argp->indx)->type);
1428
1429 LSN(pagep) = argp->lsn;
1430 }
1431 if ((ret = __memp_fput(mpf, ip, pagep, dbc->priority)) != 0)
1432 goto out;
1433 pagep = NULL;
1434
1435 done: *lsnp = argp->prev_lsn;
1436 ret = 0;
1437
1438 out: if (pagep != NULL)
1439 (void)__memp_fput(mpf, ip, pagep, dbc->priority);
1440 REC_CLOSE;
1441 }
1442
1443 /*
1444 * __bam_irep_recover --
1445 * Recovery function for internal page item replacement.
1446 *
1447 * PUBLIC: int __bam_irep_recover
1448 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
1449 */
1450 int
__bam_irep_recover(env,dbtp,lsnp,op,info)1451 __bam_irep_recover(env, dbtp, lsnp, op, info)
1452 ENV *env;
1453 DBT *dbtp;
1454 DB_LSN *lsnp;
1455 db_recops op;
1456 void *info;
1457 {
1458 __bam_irep_args *argp;
1459 BINTERNAL *bn;
1460 DB_THREAD_INFO *ip;
1461 DB *file_dbp;
1462 DBC *dbc;
1463 DB_MPOOLFILE *mpf;
1464 PAGE *pagep;
1465 int cmp_n, cmp_p, ret;
1466
1467 ip = ((DB_TXNHEAD *)info)->thread_info;
1468 pagep = NULL;
1469 REC_PRINT(__bam_irep_print);
1470 REC_INTRO(__bam_irep_read, ip, 1);
1471
1472 /* Get the page; if it never existed and we're undoing, we're done. */
1473 if ((ret = __memp_fget(mpf, &argp->pgno, ip, NULL, 0, &pagep)) != 0) {
1474 if (ret != DB_PAGE_NOTFOUND) {
1475 ret = __db_pgerr(file_dbp, argp->pgno, ret);
1476 goto out;
1477 } else
1478 goto done;
1479 }
1480
1481 cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
1482 cmp_p = LOG_COMPARE(&LSN(pagep), &argp->lsn);
1483 CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->lsn);
1484 CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp);
1485 if (cmp_p == 0 && DB_REDO(op)) {
1486 REC_DIRTY(mpf, ip, dbc->priority, &pagep);
1487 bn = (BINTERNAL *)argp->hdr.data;
1488 if ((ret = __bam_ritem_nolog(dbc,
1489 pagep, argp->indx, &argp->hdr, &argp->data, bn->type)) != 0)
1490 goto out;
1491 LSN(pagep) = *lsnp;
1492 } else if (cmp_n == 0 && DB_UNDO(op)) {
1493 REC_DIRTY(mpf, ip, dbc->priority, &pagep);
1494 bn = (BINTERNAL *)argp->old.data;
1495 if ((ret = __bam_ritem_nolog(dbc,
1496 pagep, argp->indx, &argp->old, NULL, bn->type)) != 0)
1497 goto out;
1498 LSN(pagep) = argp->lsn;
1499 }
1500
1501 if ((ret = __memp_fput(mpf, ip, pagep, dbc->priority)) != 0)
1502 goto out;
1503 pagep = NULL;
1504
1505 done: *lsnp = argp->prev_lsn;
1506 ret = 0;
1507
1508 out: if (pagep != NULL)
1509 (void)__memp_fput(mpf, ip, pagep, dbc->priority);
1510 REC_CLOSE;
1511 }
1512
1513 /*
1514 * __bam_root_recover --
1515 * Recovery function for setting the root page on the meta-data page.
1516 *
1517 * PUBLIC: int __bam_root_recover
1518 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
1519 */
1520 int
__bam_root_recover(env,dbtp,lsnp,op,info)1521 __bam_root_recover(env, dbtp, lsnp, op, info)
1522 ENV *env;
1523 DBT *dbtp;
1524 DB_LSN *lsnp;
1525 db_recops op;
1526 void *info;
1527 {
1528 __bam_root_args *argp;
1529 DB_THREAD_INFO *ip;
1530 BTMETA *meta;
1531 DB *file_dbp;
1532 DBC *dbc;
1533 DB_MPOOLFILE *mpf;
1534 int cmp_n, cmp_p, ret;
1535
1536 ip = ((DB_TXNHEAD *)info)->thread_info;
1537 meta = NULL;
1538 REC_PRINT(__bam_root_print);
1539 REC_INTRO(__bam_root_read, ip, 0);
1540
1541 if ((ret = __memp_fget(mpf, &argp->meta_pgno, ip, NULL,
1542 0, &meta)) != 0) {
1543 if (ret != DB_PAGE_NOTFOUND) {
1544 ret = __db_pgerr(file_dbp, argp->meta_pgno, ret);
1545 goto out;
1546 } else
1547 goto done;
1548 }
1549
1550 cmp_n = LOG_COMPARE(lsnp, &LSN(meta));
1551 cmp_p = LOG_COMPARE(&LSN(meta), &argp->meta_lsn);
1552 CHECK_LSN(env, op, cmp_p, &LSN(meta), &argp->meta_lsn);
1553 CHECK_ABORT(env, op, cmp_n, &LSN(meta), lsnp);
1554 if (cmp_p == 0 && DB_REDO(op)) {
1555 /* Need to redo update described. */
1556 REC_DIRTY(mpf, ip, file_dbp->priority, &meta);
1557 meta->root = argp->root_pgno;
1558 meta->dbmeta.lsn = *lsnp;
1559 ((BTREE *)file_dbp->bt_internal)->bt_root = meta->root;
1560 } else if (cmp_n == 0 && DB_UNDO(op)) {
1561 /* Nothing to undo except lsn. */
1562 REC_DIRTY(mpf, ip, file_dbp->priority, &meta);
1563 meta->dbmeta.lsn = argp->meta_lsn;
1564 }
1565 if ((ret = __memp_fput(mpf, ip, meta, file_dbp->priority)) != 0)
1566 goto out;
1567 meta = NULL;
1568
1569 done: *lsnp = argp->prev_lsn;
1570 ret = 0;
1571
1572 out: if (meta != NULL)
1573 (void)__memp_fput(mpf, ip, meta, file_dbp->priority);
1574 REC_CLOSE;
1575 }
1576
1577 /*
1578 * __bam_curadj_recover --
1579 * Transaction abort function to undo cursor adjustments.
1580 * This should only be triggered by subtransaction aborts.
1581 *
1582 * PUBLIC: int __bam_curadj_recover
1583 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
1584 */
1585 int
__bam_curadj_recover(env,dbtp,lsnp,op,info)1586 __bam_curadj_recover(env, dbtp, lsnp, op, info)
1587 ENV *env;
1588 DBT *dbtp;
1589 DB_LSN *lsnp;
1590 db_recops op;
1591 void *info;
1592 {
1593 __bam_curadj_args *argp;
1594 DB_THREAD_INFO *ip;
1595 DB *file_dbp;
1596 DBC *dbc;
1597 DB_MPOOLFILE *mpf;
1598 int ret;
1599
1600 COMPQUIET(mpf, NULL);
1601
1602 ip = ((DB_TXNHEAD *)info)->thread_info;
1603 REC_PRINT(__bam_curadj_print);
1604 REC_INTRO(__bam_curadj_read, ip, 1);
1605
1606 ret = 0;
1607 if (op != DB_TXN_ABORT)
1608 goto done;
1609
1610 switch (argp->mode) {
1611 case DB_CA_DI:
1612 if ((ret = __bam_ca_di(dbc, argp->from_pgno,
1613 argp->from_indx, -(int)argp->first_indx)) != 0)
1614 goto out;
1615 break;
1616 case DB_CA_DUP:
1617 if ((ret = __bam_ca_undodup(file_dbp, argp->first_indx,
1618 argp->from_pgno, argp->from_indx, argp->to_indx)) != 0)
1619 goto out;
1620 break;
1621
1622 case DB_CA_RSPLIT:
1623 if ((ret =
1624 __bam_ca_rsplit(dbc, argp->to_pgno, argp->from_pgno)) != 0)
1625 goto out;
1626 break;
1627
1628 case DB_CA_SPLIT:
1629 if ((ret = __bam_ca_undosplit(file_dbp, argp->from_pgno,
1630 argp->to_pgno, argp->left_pgno, argp->from_indx)) != 0)
1631 goto out;
1632 break;
1633 }
1634
1635 done: *lsnp = argp->prev_lsn;
1636 out: REC_CLOSE;
1637 }
1638
1639 /*
1640 * __bam_rcuradj_recover --
1641 * Transaction abort function to undo cursor adjustments in rrecno.
1642 * This should only be triggered by subtransaction aborts.
1643 *
1644 * PUBLIC: int __bam_rcuradj_recover
1645 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
1646 */
1647 int
__bam_rcuradj_recover(env,dbtp,lsnp,op,info)1648 __bam_rcuradj_recover(env, dbtp, lsnp, op, info)
1649 ENV *env;
1650 DBT *dbtp;
1651 DB_LSN *lsnp;
1652 db_recops op;
1653 void *info;
1654 {
1655 __bam_rcuradj_args *argp;
1656 DB_THREAD_INFO *ip;
1657 BTREE_CURSOR *cp;
1658 DB *file_dbp;
1659 DBC *dbc, *rdbc;
1660 DB_MPOOLFILE *mpf;
1661 int ret, t_ret;
1662
1663 COMPQUIET(mpf, NULL);
1664
1665 ip = ((DB_TXNHEAD *)info)->thread_info;
1666 rdbc = NULL;
1667 REC_PRINT(__bam_rcuradj_print);
1668 REC_INTRO(__bam_rcuradj_read, ip, 1);
1669
1670 ret = t_ret = 0;
1671
1672 if (op != DB_TXN_ABORT)
1673 goto done;
1674
1675 /*
1676 * We don't know whether we're in an offpage dup set, and
1677 * thus don't know whether the dbc REC_INTRO has handed us is
1678 * of a reasonable type. It's certainly unset, so if this is
1679 * an offpage dup set, we don't have an OPD cursor. The
1680 * simplest solution is just to allocate a whole new cursor
1681 * for our use; we're only really using it to hold pass some
1682 * state into __ram_ca, and this way we don't need to make
1683 * this function know anything about how offpage dups work.
1684 */
1685 if ((ret = __db_cursor_int(file_dbp, NULL,
1686 NULL, DB_RECNO, argp->root, DB_RECOVER, NULL, &rdbc)) != 0)
1687 goto out;
1688
1689 cp = (BTREE_CURSOR *)rdbc->internal;
1690 F_SET(cp, C_RENUMBER);
1691 cp->recno = argp->recno;
1692
1693 switch (argp->mode) {
1694 case CA_DELETE:
1695 /*
1696 * The way to undo a delete is with an insert. Since
1697 * we're undoing it, the delete flag must be set.
1698 */
1699 F_SET(cp, C_DELETED);
1700 F_SET(cp, C_RENUMBER); /* Just in case. */
1701 cp->order = argp->order;
1702 if ((ret = __ram_ca(rdbc, CA_ICURRENT, NULL)) != 0)
1703 goto out;
1704 break;
1705 case CA_IAFTER:
1706 case CA_IBEFORE:
1707 case CA_ICURRENT:
1708 /*
1709 * The way to undo an insert is with a delete. The delete
1710 * flag is unset to start with.
1711 */
1712 F_CLR(cp, C_DELETED);
1713 cp->order = INVALID_ORDER;
1714 if ((ret = __ram_ca(rdbc, CA_DELETE, NULL)) != 0)
1715 goto out;
1716 break;
1717 }
1718
1719 done: *lsnp = argp->prev_lsn;
1720 out: if (rdbc != NULL && (t_ret = __dbc_close(rdbc)) != 0 && ret == 0)
1721 ret = t_ret;
1722 REC_CLOSE;
1723 }
1724
1725 /*
1726 * __bam_merge_44_recover --
1727 * Recovery function for merge.
1728 *
1729 * PUBLIC: int __bam_merge_44_recover
1730 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
1731 */
1732 int
__bam_merge_44_recover(env,dbtp,lsnp,op,info)1733 __bam_merge_44_recover(env, dbtp, lsnp, op, info)
1734 ENV *env;
1735 DBT *dbtp;
1736 DB_LSN *lsnp;
1737 db_recops op;
1738 void *info;
1739 {
1740 __bam_merge_44_args *argp;
1741 DB_THREAD_INFO *ip;
1742 BKEYDATA *bk;
1743 DB *file_dbp;
1744 DBC *dbc;
1745 DB_MPOOLFILE *mpf;
1746 PAGE *pagep;
1747 db_indx_t indx, *ninp, *pinp;
1748 u_int32_t size;
1749 u_int8_t *bp;
1750 int cmp_n, cmp_p, i, ret;
1751
1752 ip = ((DB_TXNHEAD *)info)->thread_info;
1753 REC_PRINT(__bam_merge_44_print);
1754 REC_INTRO(__bam_merge_44_read, ip, 1);
1755
1756 if ((ret = __memp_fget(mpf, &argp->pgno, ip, NULL, 0, &pagep)) != 0) {
1757 if (ret != DB_PAGE_NOTFOUND) {
1758 ret = __db_pgerr(file_dbp, argp->pgno, ret);
1759 goto out;
1760 } else
1761 goto next;
1762 }
1763
1764 cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
1765 cmp_p = LOG_COMPARE(&LSN(pagep), &argp->lsn);
1766 CHECK_LSN(file_dbp->env, op, cmp_p, &LSN(pagep), &argp->lsn);
1767
1768 if (cmp_p == 0 && DB_REDO(op)) {
1769 /*
1770 * If the header is provided the page is empty, copy the
1771 * needed data.
1772 */
1773 DB_ASSERT(env, argp->hdr.size == 0 || NUM_ENT(pagep) == 0);
1774 REC_DIRTY(mpf, ip, dbc->priority, &pagep);
1775 if (argp->hdr.size != 0) {
1776 P_INIT(pagep, file_dbp->pgsize, pagep->pgno,
1777 PREV_PGNO(argp->hdr.data),
1778 NEXT_PGNO(argp->hdr.data),
1779 LEVEL(argp->hdr.data), TYPE(argp->hdr.data));
1780 }
1781 if (TYPE(pagep) == P_OVERFLOW) {
1782 OV_REF(pagep) = OV_REF(argp->hdr.data);
1783 OV_LEN(pagep) = OV_LEN(argp->hdr.data);
1784 bp = (u_int8_t *) pagep + P_OVERHEAD(file_dbp);
1785 memcpy(bp, argp->data.data, argp->data.size);
1786 } else {
1787 /* Copy the data segment. */
1788 bp = (u_int8_t *)pagep +
1789 (db_indx_t)(HOFFSET(pagep) - argp->data.size);
1790 memcpy(bp, argp->data.data, argp->data.size);
1791
1792 /* Copy index table offset past the current entries. */
1793 pinp = P_INP(file_dbp, pagep) + NUM_ENT(pagep);
1794 ninp = argp->ind.data;
1795 for (i = 0;
1796 i < (int)(argp->ind.size / sizeof(*ninp)); i++)
1797 *pinp++ = *ninp++
1798 - (file_dbp->pgsize - HOFFSET(pagep));
1799 HOFFSET(pagep) -= argp->data.size;
1800 NUM_ENT(pagep) += i;
1801 }
1802 pagep->lsn = *lsnp;
1803 } else if (cmp_n == 0 && !DB_REDO(op)) {
1804 /*
1805 * Since logging is logical at the page level
1806 * we cannot just truncate the data space. Delete
1807 * the proper number of items from the logical end
1808 * of the page.
1809 */
1810 REC_DIRTY(mpf, ip, dbc->priority, &pagep);
1811 for (i = 0; i < (int)(argp->ind.size / sizeof(*ninp)); i++) {
1812 indx = NUM_ENT(pagep) - 1;
1813 if (P_INP(file_dbp, pagep)[indx] ==
1814 P_INP(file_dbp, pagep)[indx - P_INDX]) {
1815 NUM_ENT(pagep)--;
1816 continue;
1817 }
1818 switch (TYPE(pagep)) {
1819 case P_LBTREE:
1820 case P_LRECNO:
1821 case P_LDUP:
1822 bk = GET_BKEYDATA(file_dbp, pagep, indx);
1823 size = BITEM_SIZE(bk);
1824 break;
1825
1826 case P_IBTREE:
1827 size = BINTERNAL_SIZE(
1828 GET_BINTERNAL(file_dbp, pagep, indx)->len);
1829 break;
1830 case P_IRECNO:
1831 size = RINTERNAL_SIZE;
1832 break;
1833
1834 default:
1835 ret = __db_pgfmt(env, PGNO(pagep));
1836 goto out;
1837 }
1838 if ((ret =
1839 __db_ditem(dbc, pagep, indx, size)) != 0)
1840 goto out;
1841 }
1842 if (argp->ind.size == 0)
1843 HOFFSET(pagep) = file_dbp->pgsize;
1844 pagep->lsn = argp->lsn;
1845 }
1846
1847 if ((ret = __memp_fput(mpf, ip, pagep, dbc->priority)) != 0)
1848 goto out;
1849
1850 next: if ((ret = __memp_fget(mpf, &argp->npgno, ip, NULL, 0, &pagep)) != 0) {
1851 if (ret != DB_PAGE_NOTFOUND) {
1852 ret = __db_pgerr(file_dbp, argp->pgno, ret);
1853 goto out;
1854 } else
1855 goto done;
1856 }
1857
1858 cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
1859 cmp_p = LOG_COMPARE(&LSN(pagep), &argp->nlsn);
1860 CHECK_LSN(file_dbp->env, op, cmp_p, &LSN(pagep), &argp->nlsn);
1861
1862 if (cmp_p == 0 && DB_REDO(op)) {
1863 /* Need to truncate the page. */
1864 REC_DIRTY(mpf, ip, dbc->priority, &pagep);
1865 HOFFSET(pagep) = file_dbp->pgsize;
1866 NUM_ENT(pagep) = 0;
1867 pagep->lsn = *lsnp;
1868 } else if (cmp_n == 0 && !DB_REDO(op)) {
1869 /* Need to put the data back on the page. */
1870 REC_DIRTY(mpf, ip, dbc->priority, &pagep);
1871 if (TYPE(pagep) == P_OVERFLOW) {
1872 OV_REF(pagep) = OV_REF(argp->hdr.data);
1873 OV_LEN(pagep) = OV_LEN(argp->hdr.data);
1874 bp = (u_int8_t *) pagep + P_OVERHEAD(file_dbp);
1875 memcpy(bp, argp->data.data, argp->data.size);
1876 } else {
1877 bp = (u_int8_t *)pagep +
1878 (db_indx_t)(HOFFSET(pagep) - argp->data.size);
1879 memcpy(bp, argp->data.data, argp->data.size);
1880
1881 /* Copy index table. */
1882 pinp = P_INP(file_dbp, pagep) + NUM_ENT(pagep);
1883 ninp = argp->ind.data;
1884 for (i = 0;
1885 i < (int)(argp->ind.size / sizeof(*ninp)); i++)
1886 *pinp++ = *ninp++;
1887 HOFFSET(pagep) -= argp->data.size;
1888 NUM_ENT(pagep) = i;
1889 }
1890 pagep->lsn = argp->nlsn;
1891 }
1892
1893 if ((ret = __memp_fput(mpf,
1894 ip, pagep, dbc->priority)) != 0)
1895 goto out;
1896 done:
1897 *lsnp = argp->prev_lsn;
1898 ret = 0;
1899
1900 out: REC_CLOSE;
1901 }
1902
1903 /*
1904 * __bam_relink_43_recover --
1905 * Recovery function for relink.
1906 *
1907 * PUBLIC: int __bam_relink_43_recover
1908 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
1909 */
1910 int
__bam_relink_43_recover(env,dbtp,lsnp,op,info)1911 __bam_relink_43_recover(env, dbtp, lsnp, op, info)
1912 ENV *env;
1913 DBT *dbtp;
1914 DB_LSN *lsnp;
1915 db_recops op;
1916 void *info;
1917 {
1918 __bam_relink_43_args *argp;
1919 DB_THREAD_INFO *ip;
1920 DB *file_dbp;
1921 DBC *dbc;
1922 DB_MPOOLFILE *mpf;
1923 PAGE *pagep;
1924 int cmp_n, cmp_p, modified, ret;
1925
1926 ip = ((DB_TXNHEAD *)info)->thread_info;
1927 pagep = NULL;
1928 REC_PRINT(__bam_relink_43_print);
1929 REC_INTRO(__bam_relink_43_read, ip, 0);
1930
1931 /*
1932 * There are up to three pages we need to check -- the page, and the
1933 * previous and next pages, if they existed. For a page add operation,
1934 * the current page is the result of a split and is being recovered
1935 * elsewhere, so all we need do is recover the next page.
1936 */
1937 if ((ret = __memp_fget(mpf, &argp->pgno, ip, NULL, 0, &pagep)) != 0) {
1938 if (ret != DB_PAGE_NOTFOUND) {
1939 ret = __db_pgerr(file_dbp, argp->pgno, ret);
1940 goto out;
1941 } else
1942 goto next2;
1943 }
1944
1945 cmp_p = LOG_COMPARE(&LSN(pagep), &argp->lsn);
1946 CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->lsn);
1947 if (cmp_p == 0 && DB_REDO(op)) {
1948 /* Redo the relink. */
1949 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
1950 pagep->lsn = *lsnp;
1951 } else if (LOG_COMPARE(lsnp, &LSN(pagep)) == 0 && DB_UNDO(op)) {
1952 /* Undo the relink. */
1953 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
1954 pagep->next_pgno = argp->next;
1955 pagep->prev_pgno = argp->prev;
1956 pagep->lsn = argp->lsn;
1957 }
1958 if ((ret = __memp_fput(mpf,
1959 ip, pagep, file_dbp->priority)) != 0)
1960 goto out;
1961 pagep = NULL;
1962
1963 next2: if ((ret = __memp_fget(mpf, &argp->next, ip, NULL, 0, &pagep)) != 0) {
1964 if (ret != DB_PAGE_NOTFOUND) {
1965 ret = __db_pgerr(file_dbp, argp->next, ret);
1966 goto out;
1967 } else
1968 goto prev;
1969 }
1970
1971 modified = 0;
1972 cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
1973 cmp_p = LOG_COMPARE(&LSN(pagep), &argp->lsn_next);
1974 CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->lsn_next);
1975 if (cmp_p == 0 && DB_REDO(op)) {
1976 /* Redo the remove or undo the add. */
1977 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
1978 pagep->prev_pgno = argp->prev;
1979 modified = 1;
1980 } else if (cmp_n == 0 && DB_UNDO(op)) {
1981 /* Undo the remove or redo the add. */
1982 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
1983 pagep->prev_pgno = argp->pgno;
1984 modified = 1;
1985 }
1986 if (modified) {
1987 if (DB_UNDO(op))
1988 pagep->lsn = argp->lsn_next;
1989 else
1990 pagep->lsn = *lsnp;
1991 }
1992 if ((ret = __memp_fput(mpf,
1993 ip, pagep, file_dbp->priority)) != 0)
1994 goto out;
1995 pagep = NULL;
1996
1997 prev: if ((ret = __memp_fget(mpf, &argp->prev, ip, NULL, 0, &pagep)) != 0) {
1998 if (ret != DB_PAGE_NOTFOUND) {
1999 ret = __db_pgerr(file_dbp, argp->prev, ret);
2000 goto out;
2001 } else
2002 goto done;
2003 }
2004
2005 modified = 0;
2006 cmp_p = LOG_COMPARE(&LSN(pagep), &argp->lsn_prev);
2007 CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->lsn_prev);
2008 if (cmp_p == 0 && DB_REDO(op)) {
2009 /* Redo the relink. */
2010 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
2011 pagep->next_pgno = argp->next;
2012 modified = 1;
2013 } else if (LOG_COMPARE(lsnp, &LSN(pagep)) == 0 && DB_UNDO(op)) {
2014 /* Undo the relink. */
2015 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
2016 pagep->next_pgno = argp->pgno;
2017 modified = 1;
2018 }
2019 if (modified) {
2020 if (DB_UNDO(op))
2021 pagep->lsn = argp->lsn_prev;
2022 else
2023 pagep->lsn = *lsnp;
2024 }
2025 if ((ret = __memp_fput(mpf,
2026 ip, pagep, file_dbp->priority)) != 0)
2027 goto out;
2028 pagep = NULL;
2029
2030 done: *lsnp = argp->prev_lsn;
2031 ret = 0;
2032
2033 out: if (pagep != NULL)
2034 (void)__memp_fput(mpf, ip, pagep, file_dbp->priority);
2035 REC_CLOSE;
2036 }
2037