1 /*-
2 * See the file LICENSE for redistribution information.
3 *
4 * Copyright (c) 1996, 2013 Oracle and/or its affiliates. All rights reserved.
5 *
6 * $Id$
7 */
8
9 #include "db_config.h"
10
11 #include "db_int.h"
12 #include "dbinc/db_page.h"
13 #include "dbinc/log.h"
14 #include "dbinc/mp.h"
15 #include "dbinc/lock.h"
16 #include "dbinc/fop.h"
17 #include "dbinc/btree.h"
18 #include "dbinc/hash.h"
19
20 static int __db_pg_free_recover_int __P((ENV *, DB_THREAD_INFO *,
21 __db_pg_freedata_args *, DB *, DB_LSN *, DB_MPOOLFILE *, db_recops, int));
22 static int __db_pg_free_recover_42_int __P((ENV *, DB_THREAD_INFO *,
23 __db_pg_freedata_42_args *,
24 DB *, DB_LSN *, DB_MPOOLFILE *, db_recops, int));
25
26 /*
27 * PUBLIC: int __db_addrem_recover
28 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
29 *
30 * This log message is generated whenever we add or remove a duplicate
31 * to/from a duplicate page. On recover, we just do the opposite.
32 */
33 int
__db_addrem_recover(env,dbtp,lsnp,op,info)34 __db_addrem_recover(env, dbtp, lsnp, op, info)
35 ENV *env;
36 DBT *dbtp;
37 DB_LSN *lsnp;
38 db_recops op;
39 void *info;
40 {
41 __db_addrem_args *argp;
42 DB_THREAD_INFO *ip;
43 DB *file_dbp;
44 DBC *dbc;
45 DB_MPOOLFILE *mpf;
46 PAGE *pagep;
47 int cmp_n, cmp_p, modified, ret;
48 u_int32_t opcode;
49
50 ip = ((DB_TXNHEAD *)info)->thread_info;
51 pagep = NULL;
52 REC_PRINT(__db_addrem_print);
53 REC_INTRO(__db_addrem_read, ip, 1);
54
55 REC_FGET(mpf, ip, argp->pgno, &pagep, done);
56 modified = 0;
57
58 opcode = OP_MODE_GET(argp->opcode);
59 cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
60 cmp_p = LOG_COMPARE(&LSN(pagep), &argp->pagelsn);
61 CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->pagelsn);
62 CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp);
63 if ((cmp_p == 0 && DB_REDO(op) && opcode == DB_ADD_DUP) ||
64 (cmp_n == 0 && DB_UNDO(op) && opcode == DB_REM_DUP)) {
65 /* Need to redo an add, or undo a delete. */
66 REC_DIRTY(mpf, ip, dbc->priority, &pagep);
67 if ((ret = __db_pitem(dbc, pagep, argp->indx, argp->nbytes,
68 argp->hdr.size == 0 ? NULL : &argp->hdr,
69 argp->dbt.size == 0 ? NULL : &argp->dbt)) != 0)
70 goto out;
71 modified = 1;
72
73 } else if ((cmp_n == 0 && DB_UNDO(op) && opcode == DB_ADD_DUP) ||
74 (cmp_p == 0 && DB_REDO(op) && opcode == DB_REM_DUP)) {
75 /* Need to undo an add, or redo a delete. */
76 REC_DIRTY(mpf, ip, dbc->priority, &pagep);
77 if ((ret = __db_ditem(dbc,
78 pagep, argp->indx, argp->nbytes)) != 0)
79 goto out;
80 modified = 1;
81 }
82
83 if (modified) {
84 if (DB_REDO(op))
85 LSN(pagep) = *lsnp;
86 else
87 LSN(pagep) = argp->pagelsn;
88 }
89
90 if ((ret = __memp_fput(mpf, ip, pagep, dbc->priority)) != 0)
91 goto out;
92 pagep = NULL;
93
94 done: *lsnp = argp->prev_lsn;
95 ret = 0;
96
97 out: if (pagep != NULL)
98 (void)__memp_fput(mpf, ip, pagep, dbc->priority);
99 REC_CLOSE;
100 }
101
102 /*
103 * PUBLIC: int __db_addrem_42_recover
104 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
105 *
106 * This log message is generated whenever we add or remove a duplicate
107 * to/from a duplicate page. On recover, we just do the opposite.
108 */
109 int
__db_addrem_42_recover(env,dbtp,lsnp,op,info)110 __db_addrem_42_recover(env, dbtp, lsnp, op, info)
111 ENV *env;
112 DBT *dbtp;
113 DB_LSN *lsnp;
114 db_recops op;
115 void *info;
116 {
117 __db_addrem_42_args *argp;
118 DB_THREAD_INFO *ip;
119 DB *file_dbp;
120 DBC *dbc;
121 DB_MPOOLFILE *mpf;
122 PAGE *pagep;
123 int cmp_n, cmp_p, modified, ret;
124
125 ip = ((DB_TXNHEAD *)info)->thread_info;
126 pagep = NULL;
127 REC_PRINT(__db_addrem_print);
128 REC_INTRO(__db_addrem_42_read, ip, 1);
129
130 REC_FGET(mpf, ip, argp->pgno, &pagep, done);
131 modified = 0;
132
133 cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
134 cmp_p = LOG_COMPARE(&LSN(pagep), &argp->pagelsn);
135 CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->pagelsn);
136 CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp);
137 if ((cmp_p == 0 && DB_REDO(op) && argp->opcode == DB_ADD_DUP) ||
138 (cmp_n == 0 && DB_UNDO(op) && argp->opcode == DB_REM_DUP)) {
139 /* Need to redo an add, or undo a delete. */
140 REC_DIRTY(mpf, ip, dbc->priority, &pagep);
141 if ((ret = __db_pitem(dbc, pagep, argp->indx, argp->nbytes,
142 argp->hdr.size == 0 ? NULL : &argp->hdr,
143 argp->dbt.size == 0 ? NULL : &argp->dbt)) != 0)
144 goto out;
145 modified = 1;
146
147 } else if ((cmp_n == 0 && DB_UNDO(op) && argp->opcode == DB_ADD_DUP) ||
148 (cmp_p == 0 && DB_REDO(op) && argp->opcode == DB_REM_DUP)) {
149 /* Need to undo an add, or redo a delete. */
150 REC_DIRTY(mpf, ip, dbc->priority, &pagep);
151 if ((ret = __db_ditem(dbc,
152 pagep, argp->indx, argp->nbytes)) != 0)
153 goto out;
154 modified = 1;
155 }
156
157 if (modified) {
158 if (DB_REDO(op))
159 LSN(pagep) = *lsnp;
160 else
161 LSN(pagep) = argp->pagelsn;
162 }
163
164 if ((ret = __memp_fput(mpf, ip, pagep, dbc->priority)) != 0)
165 goto out;
166 pagep = NULL;
167
168 done: *lsnp = argp->prev_lsn;
169 ret = 0;
170
171 out: if (pagep != NULL)
172 (void)__memp_fput(mpf, ip, pagep, dbc->priority);
173 REC_CLOSE;
174 }
175
176 /*
177 * PUBLIC: int __db_big_recover
178 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
179 */
180 int
__db_big_recover(env,dbtp,lsnp,op,info)181 __db_big_recover(env, dbtp, lsnp, op, info)
182 ENV *env;
183 DBT *dbtp;
184 DB_LSN *lsnp;
185 db_recops op;
186 void *info;
187 {
188 __db_big_args *argp;
189 DB_THREAD_INFO *ip;
190 DB *file_dbp;
191 DBC *dbc;
192 DB_MPOOLFILE *mpf;
193 PAGE *pagep;
194 int cmp_n, cmp_p, modified, ret;
195 u_int32_t opcode;
196
197 ip = ((DB_TXNHEAD *)info)->thread_info;
198 pagep = NULL;
199 REC_PRINT(__db_big_print);
200 REC_INTRO(__db_big_read, ip, 0);
201
202 opcode = OP_MODE_GET(argp->opcode);
203 REC_FGET(mpf, ip, argp->pgno, &pagep, ppage);
204 modified = 0;
205
206 /*
207 * There are three pages we need to check. The one on which we are
208 * adding data, the previous one whose next_pointer may have
209 * been updated, and the next one whose prev_pointer may have
210 * been updated.
211 */
212 cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
213 cmp_p = LOG_COMPARE(&LSN(pagep), &argp->pagelsn);
214 CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->pagelsn);
215 CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp);
216 if ((cmp_p == 0 && DB_REDO(op) && opcode == DB_ADD_BIG) ||
217 (cmp_n == 0 && DB_UNDO(op) && opcode == DB_REM_BIG)) {
218 /* We are either redo-ing an add, or undoing a delete. */
219 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
220 P_INIT(pagep, file_dbp->pgsize, argp->pgno, argp->prev_pgno,
221 argp->next_pgno, 0, P_OVERFLOW);
222 OV_LEN(pagep) = argp->dbt.size;
223 OV_REF(pagep) = 1;
224 memcpy((u_int8_t *)pagep + P_OVERHEAD(file_dbp), argp->dbt.data,
225 argp->dbt.size);
226 PREV_PGNO(pagep) = argp->prev_pgno;
227 modified = 1;
228 } else if ((cmp_n == 0 && DB_UNDO(op) && opcode == DB_ADD_BIG) ||
229 (cmp_p == 0 && DB_REDO(op) && opcode == DB_REM_BIG)) {
230 /*
231 * We are either undo-ing an add or redo-ing a delete.
232 * The page is about to be reclaimed in either case, so
233 * there really isn't anything to do here.
234 */
235 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
236 modified = 1;
237 } else if (cmp_p == 0 && DB_REDO(op) && opcode == DB_APPEND_BIG) {
238 /* We are redoing an append. */
239 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
240 memcpy((u_int8_t *)pagep + P_OVERHEAD(file_dbp) +
241 OV_LEN(pagep), argp->dbt.data, argp->dbt.size);
242 OV_LEN(pagep) += argp->dbt.size;
243 modified = 1;
244 } else if (cmp_n == 0 && DB_UNDO(op) && opcode == DB_APPEND_BIG) {
245 /* We are undoing an append. */
246 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
247 OV_LEN(pagep) -= argp->dbt.size;
248 memset((u_int8_t *)pagep + P_OVERHEAD(file_dbp) +
249 OV_LEN(pagep), 0, argp->dbt.size);
250 modified = 1;
251 }
252 if (modified)
253 LSN(pagep) = DB_REDO(op) ? *lsnp : argp->pagelsn;
254
255 ret = __memp_fput(mpf, ip, pagep, file_dbp->priority);
256 pagep = NULL;
257 if (ret != 0)
258 goto out;
259
260 /*
261 * We only delete a whole chain of overflow items, and appends only
262 * apply to a single page. Adding a page is the only case that
263 * needs to update the chain.
264 */
265 ppage: if (opcode != DB_ADD_BIG)
266 goto done;
267
268 /* Now check the previous page. */
269 if (argp->prev_pgno != PGNO_INVALID) {
270 REC_FGET(mpf, ip, argp->prev_pgno, &pagep, npage);
271 modified = 0;
272
273 cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
274 cmp_p = LOG_COMPARE(&LSN(pagep), &argp->prevlsn);
275 CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->prevlsn);
276 CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp);
277
278 if (cmp_p == 0 && DB_REDO(op) && opcode == DB_ADD_BIG) {
279 /* Redo add, undo delete. */
280 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
281 NEXT_PGNO(pagep) = argp->pgno;
282 modified = 1;
283 } else if (cmp_n == 0 &&
284 DB_UNDO(op) && opcode == DB_ADD_BIG) {
285 /* Redo delete, undo add. */
286 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
287 NEXT_PGNO(pagep) = argp->next_pgno;
288 modified = 1;
289 }
290 if (modified)
291 LSN(pagep) = DB_REDO(op) ? *lsnp : argp->prevlsn;
292 ret = __memp_fput(mpf, ip, pagep, file_dbp->priority);
293 pagep = NULL;
294 if (ret != 0)
295 goto out;
296 }
297 pagep = NULL;
298
299 /* Now check the next page. Can only be set on a delete. */
300 npage: if (argp->next_pgno != PGNO_INVALID) {
301 REC_FGET(mpf, ip, argp->next_pgno, &pagep, done);
302 modified = 0;
303
304 cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
305 cmp_p = LOG_COMPARE(&LSN(pagep), &argp->nextlsn);
306 CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->nextlsn);
307 CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp);
308 if (cmp_p == 0 && DB_REDO(op)) {
309 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
310 PREV_PGNO(pagep) = PGNO_INVALID;
311 modified = 1;
312 } else if (cmp_n == 0 && DB_UNDO(op)) {
313 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
314 PREV_PGNO(pagep) = argp->pgno;
315 modified = 1;
316 }
317 if (modified)
318 LSN(pagep) = DB_REDO(op) ? *lsnp : argp->nextlsn;
319 ret = __memp_fput(mpf, ip, pagep, file_dbp->priority);
320 pagep = NULL;
321 if (ret != 0)
322 goto out;
323 }
324 pagep = NULL;
325
326 done: *lsnp = argp->prev_lsn;
327 ret = 0;
328
329 out: if (pagep != NULL)
330 (void)__memp_fput(mpf, ip, pagep, file_dbp->priority);
331 REC_CLOSE;
332 }
333
334 /*
335 * PUBLIC: int __db_big_42_recover
336 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
337 */
338 int
__db_big_42_recover(env,dbtp,lsnp,op,info)339 __db_big_42_recover(env, dbtp, lsnp, op, info)
340 ENV *env;
341 DBT *dbtp;
342 DB_LSN *lsnp;
343 db_recops op;
344 void *info;
345 {
346 __db_big_42_args *argp;
347 DB_THREAD_INFO *ip;
348 DB *file_dbp;
349 DBC *dbc;
350 DB_MPOOLFILE *mpf;
351 PAGE *pagep;
352 int cmp_n, cmp_p, modified, ret;
353
354 ip = ((DB_TXNHEAD *)info)->thread_info;
355 pagep = NULL;
356 REC_PRINT(__db_big_print);
357 REC_INTRO(__db_big_42_read, ip, 0);
358
359 REC_FGET(mpf, ip, argp->pgno, &pagep, ppage);
360 modified = 0;
361
362 /*
363 * There are three pages we need to check. The one on which we are
364 * adding data, the previous one whose next_pointer may have
365 * been updated, and the next one whose prev_pointer may have
366 * been updated.
367 */
368 cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
369 cmp_p = LOG_COMPARE(&LSN(pagep), &argp->pagelsn);
370 CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->pagelsn);
371 CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp);
372 if ((cmp_p == 0 && DB_REDO(op) && argp->opcode == DB_ADD_BIG) ||
373 (cmp_n == 0 && DB_UNDO(op) && argp->opcode == DB_REM_BIG)) {
374 /* We are either redo-ing an add, or undoing a delete. */
375 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
376 P_INIT(pagep, file_dbp->pgsize, argp->pgno, argp->prev_pgno,
377 argp->next_pgno, 0, P_OVERFLOW);
378 OV_LEN(pagep) = argp->dbt.size;
379 OV_REF(pagep) = 1;
380 memcpy((u_int8_t *)pagep + P_OVERHEAD(file_dbp), argp->dbt.data,
381 argp->dbt.size);
382 PREV_PGNO(pagep) = argp->prev_pgno;
383 modified = 1;
384 } else if ((cmp_n == 0 && DB_UNDO(op) && argp->opcode == DB_ADD_BIG) ||
385 (cmp_p == 0 && DB_REDO(op) && argp->opcode == DB_REM_BIG)) {
386 /*
387 * We are either undo-ing an add or redo-ing a delete.
388 * The page is about to be reclaimed in either case, so
389 * there really isn't anything to do here.
390 */
391 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
392 modified = 1;
393 } else if (cmp_p == 0 && DB_REDO(op) && argp->opcode == DB_APPEND_BIG) {
394 /* We are redoing an append. */
395 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
396 memcpy((u_int8_t *)pagep + P_OVERHEAD(file_dbp) +
397 OV_LEN(pagep), argp->dbt.data, argp->dbt.size);
398 OV_LEN(pagep) += argp->dbt.size;
399 modified = 1;
400 } else if (cmp_n == 0 && DB_UNDO(op) && argp->opcode == DB_APPEND_BIG) {
401 /* We are undoing an append. */
402 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
403 OV_LEN(pagep) -= argp->dbt.size;
404 memset((u_int8_t *)pagep + P_OVERHEAD(file_dbp) +
405 OV_LEN(pagep), 0, argp->dbt.size);
406 modified = 1;
407 }
408 if (modified)
409 LSN(pagep) = DB_REDO(op) ? *lsnp : argp->pagelsn;
410
411 ret = __memp_fput(mpf, ip, pagep, file_dbp->priority);
412 pagep = NULL;
413 if (ret != 0)
414 goto out;
415
416 /*
417 * We only delete a whole chain of overflow items, and appends only
418 * apply to a single page. Adding a page is the only case that
419 * needs to update the chain.
420 */
421 ppage: if (argp->opcode != DB_ADD_BIG)
422 goto done;
423
424 /* Now check the previous page. */
425 if (argp->prev_pgno != PGNO_INVALID) {
426 REC_FGET(mpf, ip, argp->prev_pgno, &pagep, npage);
427 modified = 0;
428
429 cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
430 cmp_p = LOG_COMPARE(&LSN(pagep), &argp->prevlsn);
431 CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->prevlsn);
432 CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp);
433
434 if (cmp_p == 0 && DB_REDO(op) && argp->opcode == DB_ADD_BIG) {
435 /* Redo add, undo delete. */
436 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
437 NEXT_PGNO(pagep) = argp->pgno;
438 modified = 1;
439 } else if (cmp_n == 0 &&
440 DB_UNDO(op) && argp->opcode == DB_ADD_BIG) {
441 /* Redo delete, undo add. */
442 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
443 NEXT_PGNO(pagep) = argp->next_pgno;
444 modified = 1;
445 }
446 if (modified)
447 LSN(pagep) = DB_REDO(op) ? *lsnp : argp->prevlsn;
448 ret = __memp_fput(mpf, ip, pagep, file_dbp->priority);
449 pagep = NULL;
450 if (ret != 0)
451 goto out;
452 }
453 pagep = NULL;
454
455 /* Now check the next page. Can only be set on a delete. */
456 npage: if (argp->next_pgno != PGNO_INVALID) {
457 REC_FGET(mpf, ip, argp->next_pgno, &pagep, done);
458 modified = 0;
459
460 cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
461 cmp_p = LOG_COMPARE(&LSN(pagep), &argp->nextlsn);
462 CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->nextlsn);
463 CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp);
464 if (cmp_p == 0 && DB_REDO(op)) {
465 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
466 PREV_PGNO(pagep) = PGNO_INVALID;
467 modified = 1;
468 } else if (cmp_n == 0 && DB_UNDO(op)) {
469 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
470 PREV_PGNO(pagep) = argp->pgno;
471 modified = 1;
472 }
473 if (modified)
474 LSN(pagep) = DB_REDO(op) ? *lsnp : argp->nextlsn;
475 ret = __memp_fput(mpf, ip, pagep, file_dbp->priority);
476 pagep = NULL;
477 if (ret != 0)
478 goto out;
479 }
480 pagep = NULL;
481
482 done: *lsnp = argp->prev_lsn;
483 ret = 0;
484
485 out: if (pagep != NULL)
486 (void)__memp_fput(mpf, ip, pagep, file_dbp->priority);
487 REC_CLOSE;
488 }
489 /*
490 * __db_ovref_recover --
491 * Recovery function for __db_ovref().
492 *
493 * PUBLIC: int __db_ovref_recover
494 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
495 */
496 int
__db_ovref_recover(env,dbtp,lsnp,op,info)497 __db_ovref_recover(env, dbtp, lsnp, op, info)
498 ENV *env;
499 DBT *dbtp;
500 DB_LSN *lsnp;
501 db_recops op;
502 void *info;
503 {
504 __db_ovref_args *argp;
505 DB_THREAD_INFO *ip;
506 DB *file_dbp;
507 DBC *dbc;
508 DB_MPOOLFILE *mpf;
509 PAGE *pagep;
510 int cmp, ret;
511
512 ip = ((DB_TXNHEAD *)info)->thread_info;
513 pagep = NULL;
514 REC_PRINT(__db_ovref_print);
515 REC_INTRO(__db_ovref_read, ip, 0);
516
517 REC_FGET(mpf, ip, argp->pgno, &pagep, done);
518
519 cmp = LOG_COMPARE(&LSN(pagep), &argp->lsn);
520 CHECK_LSN(env, op, cmp, &LSN(pagep), &argp->lsn);
521 if (cmp == 0 && DB_REDO(op)) {
522 /* Need to redo update described. */
523 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
524 OV_REF(pagep) += argp->adjust;
525 pagep->lsn = *lsnp;
526 } else if (LOG_COMPARE(lsnp, &LSN(pagep)) == 0 && DB_UNDO(op)) {
527 /* Need to undo update described. */
528 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
529 OV_REF(pagep) -= argp->adjust;
530 pagep->lsn = argp->lsn;
531 }
532 ret = __memp_fput(mpf, ip, pagep, file_dbp->priority);
533 pagep = NULL;
534 if (ret != 0)
535 goto out;
536 pagep = NULL;
537
538 done: *lsnp = argp->prev_lsn;
539 ret = 0;
540
541 out: if (pagep != NULL)
542 (void)__memp_fput(mpf, ip, pagep, file_dbp->priority);
543 REC_CLOSE;
544 }
545
546 /*
547 * __db_debug_recover --
548 * Recovery function for debug.
549 *
550 * PUBLIC: int __db_debug_recover __P((ENV *,
551 * PUBLIC: DBT *, DB_LSN *, db_recops, void *));
552 */
553 int
__db_debug_recover(env,dbtp,lsnp,op,info)554 __db_debug_recover(env, dbtp, lsnp, op, info)
555 ENV *env;
556 DBT *dbtp;
557 DB_LSN *lsnp;
558 db_recops op;
559 void *info;
560 {
561 __db_debug_args *argp;
562 int ret;
563
564 COMPQUIET(op, DB_TXN_ABORT);
565 COMPQUIET(info, NULL);
566
567 REC_PRINT(__db_debug_print);
568 REC_NOOP_INTRO(__db_debug_read);
569
570 *lsnp = argp->prev_lsn;
571 ret = 0;
572
573 REC_NOOP_CLOSE;
574 }
575
576 /*
577 * __db_noop_recover --
578 * Recovery function for noop.
579 *
580 * PUBLIC: int __db_noop_recover __P((ENV *,
581 * PUBLIC: DBT *, DB_LSN *, db_recops, void *));
582 */
583 int
__db_noop_recover(env,dbtp,lsnp,op,info)584 __db_noop_recover(env, dbtp, lsnp, op, info)
585 ENV *env;
586 DBT *dbtp;
587 DB_LSN *lsnp;
588 db_recops op;
589 void *info;
590 {
591 __db_noop_args *argp;
592 DB_THREAD_INFO *ip;
593 DB *file_dbp;
594 DBC *dbc;
595 DB_MPOOLFILE *mpf;
596 PAGE *pagep;
597 int cmp_n, cmp_p, ret;
598
599 ip = ((DB_TXNHEAD *)info)->thread_info;
600 pagep = NULL;
601 REC_PRINT(__db_noop_print);
602 REC_INTRO(__db_noop_read, ip, 0);
603
604 REC_FGET(mpf, ip, argp->pgno, &pagep, done);
605
606 cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
607 cmp_p = LOG_COMPARE(&LSN(pagep), &argp->prevlsn);
608 CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->prevlsn);
609 CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp);
610 if (cmp_p == 0 && DB_REDO(op)) {
611 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
612 LSN(pagep) = *lsnp;
613 } else if (cmp_n == 0 && DB_UNDO(op)) {
614 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
615 LSN(pagep) = argp->prevlsn;
616 }
617 ret = __memp_fput(mpf, ip, pagep, file_dbp->priority);
618 pagep = NULL;
619
620 done: *lsnp = argp->prev_lsn;
621 out: if (pagep != NULL)
622 (void)__memp_fput(mpf,
623 ip, pagep, file_dbp->priority);
624 REC_CLOSE;
625 }
626
627 /*
628 * __db_pg_alloc_recover --
629 * Recovery function for pg_alloc.
630 *
631 * PUBLIC: int __db_pg_alloc_recover
632 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
633 */
634 int
__db_pg_alloc_recover(env,dbtp,lsnp,op,info)635 __db_pg_alloc_recover(env, dbtp, lsnp, op, info)
636 ENV *env;
637 DBT *dbtp;
638 DB_LSN *lsnp;
639 db_recops op;
640 void *info;
641 {
642 __db_pg_alloc_args *argp;
643 DB_THREAD_INFO *ip;
644 DB *file_dbp;
645 DBC *dbc;
646 DBMETA *meta;
647 DB_MPOOLFILE *mpf;
648 PAGE *pagep;
649 db_pgno_t pgno;
650 int cmp_n, cmp_p, created, level, ret;
651
652 ip = ((DB_TXNHEAD *)info)->thread_info;
653 meta = NULL;
654 pagep = NULL;
655 created = 0;
656 REC_PRINT(__db_pg_alloc_print);
657 REC_INTRO(__db_pg_alloc_read, ip, 0);
658
659 /*
660 * Fix up the metadata page. If we're redoing the operation, we have
661 * to get the metadata page and update its LSN and its free pointer.
662 * If we're undoing the operation and the page was ever created, we put
663 * it on the freelist.
664 */
665 pgno = PGNO_BASE_MD;
666 if ((ret = __memp_fget(mpf, &pgno, ip, NULL, 0, &meta)) != 0) {
667 /* The metadata page must always exist on redo. */
668 if (DB_REDO(op)) {
669 ret = __db_pgerr(file_dbp, pgno, ret);
670 goto out;
671 } else
672 goto done;
673 }
674 cmp_n = LOG_COMPARE(lsnp, &LSN(meta));
675 cmp_p = LOG_COMPARE(&LSN(meta), &argp->meta_lsn);
676 CHECK_LSN(env, op, cmp_p, &LSN(meta), &argp->meta_lsn);
677 CHECK_ABORT(env, op, cmp_n, &LSN(meta), lsnp);
678 if (cmp_p == 0 && DB_REDO(op)) {
679 /* Need to redo update described. */
680 REC_DIRTY(mpf, ip, file_dbp->priority, &meta);
681 LSN(meta) = *lsnp;
682 meta->free = argp->next;
683 if (argp->pgno > meta->last_pgno)
684 meta->last_pgno = argp->pgno;
685 } else if (cmp_n == 0 && DB_UNDO(op)) {
686 /* Need to undo update described. */
687 REC_DIRTY(mpf, ip, file_dbp->priority, &meta);
688 LSN(meta) = argp->meta_lsn;
689 /*
690 * If the page has a zero LSN then its newly created and
691 * will be truncated rather than go on the free list.
692 */
693 if (!IS_ZERO_LSN(argp->page_lsn))
694 meta->free = argp->pgno;
695 meta->last_pgno = argp->last_pgno;
696 }
697
698 #ifdef HAVE_FTRUNCATE
699 /*
700 * check to see if we are keeping a sorted freelist, if so put
701 * this back in the in memory list. It must be the first element.
702 */
703 if (op == DB_TXN_ABORT && !IS_ZERO_LSN(argp->page_lsn)) {
704 db_pgno_t *list;
705 u_int32_t nelem;
706
707 if ((ret = __memp_get_freelist(mpf, &nelem, &list)) != 0)
708 goto out;
709 if (list != NULL && (nelem == 0 || *list != argp->pgno)) {
710 if ((ret =
711 __memp_extend_freelist(mpf, nelem + 1, &list)) != 0)
712 goto out;
713 if (nelem != 0)
714 memmove(list + 1, list, nelem * sizeof(*list));
715 *list = argp->pgno;
716 }
717 }
718 #endif
719
720 /*
721 * Fix up the allocated page. If the page does not exist
722 * and we can truncate it then don't create it.
723 * Otherwise if we're redoing the operation, we have
724 * to get the page (creating it if it doesn't exist), and update its
725 * LSN. If we're undoing the operation, we have to reset the page's
726 * LSN and put it on the free list.
727 */
728 if ((ret = __memp_fget(mpf, &argp->pgno, ip, NULL, 0, &pagep)) != 0) {
729 /*
730 * We have to be able to identify if a page was newly
731 * created so we can recover it properly. We cannot simply
732 * look for an empty header, because hash uses a pgin
733 * function that will set the header. Instead, we explicitly
734 * try for the page without CREATE and if that fails, then
735 * create it.
736 */
737 if (DB_UNDO(op))
738 goto do_truncate;
739 if ((ret = __memp_fget(mpf, &argp->pgno, ip, NULL,
740 DB_MPOOL_CREATE, &pagep)) != 0) {
741 if (DB_UNDO(op) && ret == ENOSPC)
742 goto do_truncate;
743 ret = __db_pgerr(file_dbp, argp->pgno, ret);
744 goto out;
745 }
746 created = 1;
747 }
748
749 /* Fix up the allocated page. */
750 cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
751 cmp_p = LOG_COMPARE(&LSN(pagep), &argp->page_lsn);
752
753 /*
754 * If an initial allocation is aborted and then reallocated during
755 * an archival restore the log record will have an LSN for the page
756 * but the page will be empty.
757 */
758 if (IS_ZERO_LSN(LSN(pagep)))
759 cmp_p = 0;
760
761 CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->page_lsn);
762 /*
763 * Another special case we have to handle is if we ended up with a
764 * page of all 0's which can happen if we abort between allocating a
765 * page in mpool and initializing it. In that case, even if we're
766 * undoing, we need to re-initialize the page.
767 */
768 if (DB_REDO(op) && cmp_p == 0) {
769 /* Need to redo update described. */
770 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
771 switch (argp->ptype) {
772 case P_LBTREE:
773 case P_LRECNO:
774 case P_LDUP:
775 level = LEAFLEVEL;
776 break;
777 default:
778 level = 0;
779 break;
780 }
781 P_INIT(pagep, file_dbp->pgsize,
782 argp->pgno, PGNO_INVALID, PGNO_INVALID, level, argp->ptype);
783
784 pagep->lsn = *lsnp;
785 } else if (DB_UNDO(op) && (cmp_n == 0 || created)) {
786 /*
787 * This is where we handle the case of a 0'd page (pagep->pgno
788 * is equal to PGNO_INVALID).
789 * Undo the allocation, reinitialize the page and
790 * link its next pointer to the free list.
791 */
792 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
793 P_INIT(pagep, file_dbp->pgsize,
794 argp->pgno, PGNO_INVALID, argp->next, 0, P_INVALID);
795
796 pagep->lsn = argp->page_lsn;
797 }
798
799 do_truncate:
800 /*
801 * If the page was newly created, give it back.
802 */
803 if ((pagep == NULL || IS_ZERO_LSN(LSN(pagep))) &&
804 IS_ZERO_LSN(argp->page_lsn) && DB_UNDO(op)) {
805 /* Discard the page. */
806 if (pagep != NULL) {
807 if ((ret = __memp_fput(mpf, ip,
808 pagep, DB_PRIORITY_VERY_LOW)) != 0)
809 goto out;
810 pagep = NULL;
811 }
812 /* Give the page back to the OS. */
813 if (meta->last_pgno <= argp->pgno && (ret = __memp_ftruncate(
814 mpf, NULL, ip, argp->pgno, MP_TRUNC_RECOVER)) != 0)
815 goto out;
816 }
817
818 if (pagep != NULL) {
819 ret = __memp_fput(mpf, ip, pagep, file_dbp->priority);
820 pagep = NULL;
821 if (ret != 0)
822 goto out;
823 }
824
825 ret = __memp_fput(mpf, ip, meta, file_dbp->priority);
826 meta = NULL;
827 if (ret != 0)
828 goto out;
829
830 done: *lsnp = argp->prev_lsn;
831 ret = 0;
832
833 out: if (pagep != NULL)
834 (void)__memp_fput(mpf, ip, pagep, file_dbp->priority);
835 if (meta != NULL)
836 (void)__memp_fput(mpf, ip, meta, file_dbp->priority);
837 REC_CLOSE;
838 }
839
840 /*
841 * __db_pg_free_recover_int --
842 */
843 static int
__db_pg_free_recover_int(env,ip,argp,file_dbp,lsnp,mpf,op,data)844 __db_pg_free_recover_int(env, ip, argp, file_dbp, lsnp, mpf, op, data)
845 ENV *env;
846 DB_THREAD_INFO *ip;
847 __db_pg_freedata_args *argp;
848 DB *file_dbp;
849 DB_LSN *lsnp;
850 DB_MPOOLFILE *mpf;
851 db_recops op;
852 int data;
853 {
854 DBMETA *meta;
855 DB_LSN copy_lsn;
856 PAGE *pagep, *prevp;
857 int cmp_n, cmp_p, is_meta, ret;
858
859 meta = NULL;
860 pagep = prevp = NULL;
861
862 /*
863 * Get the "metapage". This will either be the metapage
864 * or the previous page in the free list if we are doing
865 * sorted allocations. If its a previous page then
866 * we will not be truncating.
867 */
868 is_meta = argp->meta_pgno == PGNO_BASE_MD;
869
870 REC_FGET(mpf, ip, argp->meta_pgno, &meta, check_meta);
871
872 if (argp->meta_pgno != PGNO_BASE_MD)
873 prevp = (PAGE *)meta;
874
875 cmp_n = LOG_COMPARE(lsnp, &LSN(meta));
876 cmp_p = LOG_COMPARE(&LSN(meta), &argp->meta_lsn);
877 CHECK_LSN(env, op, cmp_p, &LSN(meta), &argp->meta_lsn);
878 CHECK_ABORT(env, op, cmp_n, &LSN(meta), lsnp);
879
880 /*
881 * Fix up the metadata page. If we're redoing or undoing the operation
882 * we get the page and update its LSN, last and free pointer.
883 */
884 if (cmp_p == 0 && DB_REDO(op)) {
885 REC_DIRTY(mpf, ip, file_dbp->priority, &meta);
886 /*
887 * If we are at the end of the file truncate, otherwise
888 * put on the free list.
889 */
890 #ifdef HAVE_FTRUNCATE
891 if (argp->pgno == argp->last_pgno)
892 meta->last_pgno = argp->pgno - 1;
893 else
894 #endif
895 if (is_meta)
896 meta->free = argp->pgno;
897 else
898 NEXT_PGNO(prevp) = argp->pgno;
899 LSN(meta) = *lsnp;
900 } else if (cmp_n == 0 && DB_UNDO(op)) {
901 /* Need to undo the deallocation. */
902 REC_DIRTY(mpf, ip, file_dbp->priority, &meta);
903 if (is_meta) {
904 if (meta->last_pgno < argp->pgno)
905 meta->last_pgno = argp->pgno;
906 meta->free = argp->next;
907 } else
908 NEXT_PGNO(prevp) = argp->next;
909 LSN(meta) = argp->meta_lsn;
910 }
911
912 check_meta:
913 if (ret != 0 && is_meta) {
914 /* The metadata page must always exist. */
915 ret = __db_pgerr(file_dbp, argp->meta_pgno, ret);
916 goto out;
917 }
918
919 /*
920 * Get the freed page. Don't create the page if we are going to
921 * free it. If we're redoing the operation we get the page and
922 * explicitly discard its contents, then update its LSN. If we're
923 * undoing the operation, we get the page and restore its header.
924 */
925 if (DB_REDO(op) || (is_meta && meta->last_pgno < argp->pgno)) {
926 if ((ret = __memp_fget(mpf, &argp->pgno,
927 ip, NULL, 0, &pagep)) != 0) {
928 if (ret != DB_PAGE_NOTFOUND)
929 goto out;
930 #ifdef HAVE_FTRUNCATE
931 if (is_meta &&
932 DB_REDO(op) && meta->last_pgno <= argp->pgno)
933 goto trunc;
934 #endif
935 goto done;
936 }
937 } else if ((ret = __memp_fget(mpf, &argp->pgno,
938 ip, NULL, DB_MPOOL_CREATE, &pagep)) != 0)
939 goto out;
940
941 (void)__ua_memcpy(©_lsn, &LSN(argp->header.data), sizeof(DB_LSN));
942 cmp_n = IS_ZERO_LSN(LSN(pagep)) ? 0 : LOG_COMPARE(lsnp, &LSN(pagep));
943 cmp_p = LOG_COMPARE(&LSN(pagep), ©_lsn);
944
945 /*
946 * This page got extended by a later allocation,
947 * but its allocation was not in the scope of this
948 * recovery pass.
949 */
950 if (IS_ZERO_LSN(LSN(pagep)))
951 cmp_p = 0;
952
953 CHECK_LSN(env, op, cmp_p, &LSN(pagep), ©_lsn);
954 /*
955 * We need to check that the page could have the current LSN
956 * which was copied before it was truncated in addition to
957 * the usual of having the previous LSN.
958 */
959 if (DB_REDO(op) &&
960 (cmp_p == 0 || cmp_n == 0 ||
961 (IS_ZERO_LSN(copy_lsn) &&
962 LOG_COMPARE(&LSN(pagep), &argp->meta_lsn) <= 0))) {
963 /* Need to redo the deallocation. */
964 /*
965 * The page can be truncated if it was truncated at runtime
966 * and the current metapage reflects the truncation.
967 */
968 #ifdef HAVE_FTRUNCATE
969 if (is_meta && meta->last_pgno <= argp->pgno &&
970 argp->last_pgno <= argp->pgno) {
971 if ((ret = __memp_fput(mpf, ip,
972 pagep, DB_PRIORITY_VERY_LOW)) != 0)
973 goto out;
974 pagep = NULL;
975 trunc: if ((ret = __memp_ftruncate(mpf, NULL, ip,
976 argp->pgno, MP_TRUNC_RECOVER)) != 0)
977 goto out;
978 } else if (argp->last_pgno == argp->pgno) {
979 /* The page was truncated at runtime, zero it out. */
980 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
981 P_INIT(pagep, 0, PGNO_INVALID,
982 PGNO_INVALID, PGNO_INVALID, 0, P_INVALID);
983 ZERO_LSN(pagep->lsn);
984 } else
985 #endif
986 if (cmp_p == 0 || IS_ZERO_LSN(LSN(pagep))) {
987 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
988 P_INIT(pagep, file_dbp->pgsize,
989 argp->pgno, PGNO_INVALID, argp->next, 0, P_INVALID);
990 pagep->lsn = *lsnp;
991
992 }
993 } else if (cmp_n == 0 && DB_UNDO(op)) {
994 /* Need to reallocate the page. */
995 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
996 memcpy(pagep, argp->header.data, argp->header.size);
997 if (data)
998 memcpy((u_int8_t*)pagep + HOFFSET(pagep),
999 argp->data.data, argp->data.size);
1000 }
1001 if (pagep != NULL &&
1002 (ret = __memp_fput(mpf, ip, pagep, file_dbp->priority)) != 0)
1003 goto out;
1004
1005 pagep = NULL;
1006 #ifdef HAVE_FTRUNCATE
1007 /*
1008 * If we are keeping an in memory free list remove this
1009 * element from the list.
1010 */
1011 if (op == DB_TXN_ABORT && argp->pgno != argp->last_pgno) {
1012 db_pgno_t *lp;
1013 u_int32_t nelem, pos;
1014
1015 if ((ret = __memp_get_freelist(mpf, &nelem, &lp)) != 0)
1016 goto out;
1017 if (lp != NULL) {
1018 pos = 0;
1019 if (!is_meta) {
1020 __db_freelist_pos(argp->pgno, lp, nelem, &pos);
1021
1022 /*
1023 * If we aborted after logging but before
1024 * updating the free list don't do anything.
1025 */
1026 if (argp->pgno != lp[pos]) {
1027 DB_ASSERT(env,
1028 argp->meta_pgno == lp[pos]);
1029 goto done;
1030 }
1031 DB_ASSERT(env,
1032 argp->meta_pgno == lp[pos - 1]);
1033 } else if (nelem != 0 && argp->pgno != lp[pos])
1034 goto done;
1035
1036 if (pos < nelem)
1037 memmove(&lp[pos], &lp[pos + 1],
1038 ((nelem - pos) - 1) * sizeof(*lp));
1039
1040 /* Shrink the list */
1041 if ((ret =
1042 __memp_extend_freelist(mpf, nelem - 1, &lp)) != 0)
1043 goto out;
1044 }
1045 }
1046 #endif
1047 done:
1048 if (meta != NULL &&
1049 (ret = __memp_fput(mpf, ip, meta, file_dbp->priority)) != 0)
1050 goto out;
1051 meta = NULL;
1052 ret = 0;
1053
1054 out: if (pagep != NULL)
1055 (void)__memp_fput(mpf, ip, pagep, file_dbp->priority);
1056 if (meta != NULL)
1057 (void)__memp_fput(mpf, ip, meta, file_dbp->priority);
1058
1059 return (ret);
1060 }
1061
1062 /*
1063 * __db_pg_free_recover --
1064 * Recovery function for pg_free.
1065 *
1066 * PUBLIC: int __db_pg_free_recover
1067 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
1068 */
1069 int
__db_pg_free_recover(env,dbtp,lsnp,op,info)1070 __db_pg_free_recover(env, dbtp, lsnp, op, info)
1071 ENV *env;
1072 DBT *dbtp;
1073 DB_LSN *lsnp;
1074 db_recops op;
1075 void *info;
1076 {
1077 __db_pg_free_args *argp;
1078 DB *file_dbp;
1079 DBC *dbc;
1080 DB_MPOOLFILE *mpf;
1081 DB_THREAD_INFO *ip;
1082 int ret;
1083
1084 ip = ((DB_TXNHEAD *)info)->thread_info;
1085 REC_PRINT(__db_pg_free_print);
1086 REC_INTRO(__db_pg_free_read, ip, 0);
1087
1088 if ((ret = __db_pg_free_recover_int(env, ip,
1089 (__db_pg_freedata_args *)argp, file_dbp, lsnp, mpf, op, 0)) != 0)
1090 goto out;
1091
1092 done: *lsnp = argp->prev_lsn;
1093 out:
1094 REC_CLOSE;
1095 }
1096
1097 /*
1098 * __db_pg_freedata_recover --
1099 * Recovery function for pg_freedata.
1100 *
1101 * PUBLIC: int __db_pg_freedata_recover
1102 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
1103 */
1104 int
__db_pg_freedata_recover(env,dbtp,lsnp,op,info)1105 __db_pg_freedata_recover(env, dbtp, lsnp, op, info)
1106 ENV *env;
1107 DBT *dbtp;
1108 DB_LSN *lsnp;
1109 db_recops op;
1110 void *info;
1111 {
1112 __db_pg_freedata_args *argp;
1113 DB *file_dbp;
1114 DBC *dbc;
1115 DB_MPOOLFILE *mpf;
1116 DB_THREAD_INFO *ip;
1117 int ret;
1118
1119 ip = ((DB_TXNHEAD *)info)->thread_info;
1120 REC_PRINT(__db_pg_freedata_print);
1121 REC_INTRO(__db_pg_freedata_read, ip, 0);
1122
1123 if ((ret = __db_pg_free_recover_int(env,
1124 ip, argp, file_dbp, lsnp, mpf, op, 1)) != 0)
1125 goto out;
1126
1127 done: *lsnp = argp->prev_lsn;
1128 out:
1129 REC_CLOSE;
1130 }
1131
1132 /*
1133 * __db_cksum_recover --
1134 * Recovery function for checksum failure log record.
1135 *
1136 * PUBLIC: int __db_cksum_recover __P((ENV *,
1137 * PUBLIC: DBT *, DB_LSN *, db_recops, void *));
1138 */
1139 int
__db_cksum_recover(env,dbtp,lsnp,op,info)1140 __db_cksum_recover(env, dbtp, lsnp, op, info)
1141 ENV *env;
1142 DBT *dbtp;
1143 DB_LSN *lsnp;
1144 db_recops op;
1145 void *info;
1146 {
1147 __db_cksum_args *argp;
1148 int ret;
1149
1150 COMPQUIET(info, NULL);
1151 COMPQUIET(lsnp, NULL);
1152 COMPQUIET(op, DB_TXN_ABORT);
1153
1154 REC_PRINT(__db_cksum_print);
1155
1156 if ((ret = __db_cksum_read(env, dbtp->data, &argp)) != 0)
1157 return (ret);
1158
1159 /*
1160 * We had a checksum failure -- the only option is to run catastrophic
1161 * recovery.
1162 */
1163 if (F_ISSET(env, ENV_RECOVER_FATAL))
1164 ret = 0;
1165 else {
1166 __db_errx(env, DB_STR("0642",
1167 "Checksum failure requires catastrophic recovery"));
1168 ret = __env_panic(env, DB_RUNRECOVERY);
1169 }
1170
1171 __os_free(env, argp);
1172 return (ret);
1173 }
1174
1175 /*
1176 * __db_pg_init_recover --
1177 * Recovery function to reinit pages after truncation.
1178 *
1179 * PUBLIC: int __db_pg_init_recover
1180 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
1181 */
1182 int
__db_pg_init_recover(env,dbtp,lsnp,op,info)1183 __db_pg_init_recover(env, dbtp, lsnp, op, info)
1184 ENV *env;
1185 DBT *dbtp;
1186 DB_LSN *lsnp;
1187 db_recops op;
1188 void *info;
1189 {
1190 __db_pg_init_args *argp;
1191 DB_THREAD_INFO *ip;
1192 DB *file_dbp;
1193 DBC *dbc;
1194 DB_LSN copy_lsn;
1195 DB_MPOOLFILE *mpf;
1196 PAGE *pagep;
1197 int cmp_n, cmp_p, ret, type;
1198
1199 ip = ((DB_TXNHEAD *)info)->thread_info;
1200 REC_PRINT(__db_pg_init_print);
1201 REC_INTRO(__db_pg_init_read, ip, 0);
1202
1203 mpf = file_dbp->mpf;
1204 if ((ret = __memp_fget(mpf, &argp->pgno, ip, NULL, 0, &pagep)) != 0) {
1205 if (DB_UNDO(op)) {
1206 if (ret == DB_PAGE_NOTFOUND)
1207 goto done;
1208 else {
1209 ret = __db_pgerr(file_dbp, argp->pgno, ret);
1210 goto out;
1211 }
1212 }
1213
1214 /*
1215 * This page was truncated and may simply not have
1216 * had an item written to it yet. This should only
1217 * happen on hash databases, so confirm that.
1218 */
1219 DB_ASSERT(env, file_dbp->type == DB_HASH);
1220 if ((ret = __memp_fget(mpf, &argp->pgno,
1221 ip, NULL, DB_MPOOL_CREATE, &pagep)) != 0) {
1222 ret = __db_pgerr(file_dbp, argp->pgno, ret);
1223 goto out;
1224 }
1225 }
1226
1227 (void)__ua_memcpy(©_lsn, &LSN(argp->header.data), sizeof(DB_LSN));
1228 cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
1229 cmp_p = LOG_COMPARE(&LSN(pagep), ©_lsn);
1230 CHECK_LSN(env, op, cmp_p, &LSN(pagep), ©_lsn);
1231 CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp);
1232
1233 if (cmp_p == 0 && DB_REDO(op)) {
1234 if (TYPE(pagep) == P_HASH)
1235 type = P_HASH;
1236 else
1237 type = file_dbp->type == DB_RECNO ? P_LRECNO : P_LBTREE;
1238 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
1239 P_INIT(pagep, file_dbp->pgsize, PGNO(pagep), PGNO_INVALID,
1240 PGNO_INVALID, TYPE(pagep) == P_HASH ? 0 : 1, type);
1241 pagep->lsn = *lsnp;
1242 } else if (cmp_n == 0 && DB_UNDO(op)) {
1243 /* Put the data back on the page. */
1244 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
1245 memcpy(pagep, argp->header.data, argp->header.size);
1246 if (argp->data.size > 0)
1247 memcpy((u_int8_t*)pagep + HOFFSET(pagep),
1248 argp->data.data, argp->data.size);
1249 }
1250 if ((ret = __memp_fput(mpf, ip, pagep, file_dbp->priority)) != 0)
1251 goto out;
1252
1253 done: *lsnp = argp->prev_lsn;
1254 out:
1255 REC_CLOSE;
1256 }
1257
1258 /*
1259 * __db_pg_trunc_recover --
1260 * Recovery function for pg_trunc.
1261 *
1262 * PUBLIC: int __db_pg_trunc_recover
1263 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
1264 */
1265 int
__db_pg_trunc_recover(env,dbtp,lsnp,op,info)1266 __db_pg_trunc_recover(env, dbtp, lsnp, op, info)
1267 ENV *env;
1268 DBT *dbtp;
1269 DB_LSN *lsnp;
1270 db_recops op;
1271 void *info;
1272 {
1273 #ifdef HAVE_FTRUNCATE
1274 __db_pg_trunc_args *argp;
1275 DB_THREAD_INFO *ip;
1276 DB *file_dbp;
1277 DBC *dbc;
1278 DBMETA *meta;
1279 DB_MPOOLFILE *mpf;
1280 PAGE *pagep;
1281 db_pglist_t *pglist, *lp;
1282 db_pgno_t last_pgno, *list;
1283 u_int32_t felem, nelem, pos;
1284 int ret;
1285
1286 ip = ((DB_TXNHEAD *)info)->thread_info;
1287 REC_PRINT(__db_pg_trunc_print);
1288 REC_INTRO(__db_pg_trunc_read, ip, 1);
1289
1290 pglist = (db_pglist_t *) argp->list.data;
1291 nelem = argp->list.size / sizeof(db_pglist_t);
1292 if (DB_REDO(op)) {
1293 /*
1294 * First call __db_pg_truncate to find the truncation
1295 * point, truncate the file and return the new last_pgno.
1296 */
1297 last_pgno = argp->last_pgno;
1298 if ((ret = __db_pg_truncate(dbc, NULL, pglist,
1299 NULL, &nelem, argp->next_free, &last_pgno, lsnp, 1)) != 0)
1300 goto out;
1301
1302 if (argp->last_free != PGNO_INVALID) {
1303 /*
1304 * Update the next pointer of the last page in
1305 * the freelist. If the truncation point is
1306 * beyond next_free then this is still in the freelist
1307 * otherwise the last_free page is at the end.
1308 */
1309 if ((ret = __memp_fget(mpf,
1310 &argp->last_free, ip, NULL, 0, &meta)) == 0) {
1311 if (LOG_COMPARE(&LSN(meta),
1312 &argp->last_lsn) == 0) {
1313 REC_DIRTY(mpf,
1314 ip, dbc->priority, &meta);
1315 if (pglist->pgno > last_pgno)
1316 NEXT_PGNO(meta) = PGNO_INVALID;
1317 else
1318 NEXT_PGNO(meta) = pglist->pgno;
1319 LSN(meta) = *lsnp;
1320 }
1321 if ((ret = __memp_fput(mpf, ip,
1322 meta, file_dbp->priority)) != 0)
1323 goto out;
1324 meta = NULL;
1325 } else if (ret != DB_PAGE_NOTFOUND)
1326 goto out;
1327 }
1328 if ((ret = __memp_fget(mpf, &argp->meta, ip, NULL,
1329 0, &meta)) != 0)
1330 goto out;
1331 if (LOG_COMPARE(&LSN(meta), &argp->meta_lsn) == 0) {
1332 REC_DIRTY(mpf, ip, dbc->priority, &meta);
1333 if (argp->last_free == PGNO_INVALID) {
1334 if (nelem == 0)
1335 meta->free = PGNO_INVALID;
1336 else
1337 meta->free = pglist->pgno;
1338 }
1339 /*
1340 * If this is part of a multi record truncate
1341 * this could be just the last page of this record
1342 * don't move the meta->last_pgno forward.
1343 */
1344 if (meta->last_pgno > last_pgno)
1345 meta->last_pgno = last_pgno;
1346 LSN(meta) = *lsnp;
1347 }
1348 } else {
1349 /* Put the free list back in its original order. */
1350 for (lp = pglist; lp < &pglist[nelem]; lp++) {
1351 if ((ret = __memp_fget(mpf, &lp->pgno, ip,
1352 NULL, DB_MPOOL_CREATE, &pagep)) != 0)
1353 goto out;
1354 if (IS_ZERO_LSN(LSN(pagep)) ||
1355 LOG_COMPARE(&LSN(pagep), lsnp) == 0) {
1356 REC_DIRTY(mpf, ip, dbc->priority, &pagep);
1357 P_INIT(pagep, file_dbp->pgsize, lp->pgno,
1358 PGNO_INVALID, lp->next_pgno, 0, P_INVALID);
1359 LSN(pagep) = lp->lsn;
1360 }
1361 if ((ret = __memp_fput(mpf,
1362 ip, pagep, file_dbp->priority)) != 0)
1363 goto out;
1364 }
1365 /*
1366 * Link the truncated part back into the free list.
1367 * Its either after the last_free page or directly
1368 * linked to the metadata page.
1369 */
1370 if (argp->last_free != PGNO_INVALID) {
1371 if ((ret = __memp_fget(mpf, &argp->last_free,
1372 ip, NULL, DB_MPOOL_EDIT, &meta)) == 0) {
1373 if (LOG_COMPARE(&LSN(meta), lsnp) == 0) {
1374 NEXT_PGNO(meta) = argp->next_free;
1375 LSN(meta) = argp->last_lsn;
1376 }
1377 if ((ret = __memp_fput(mpf, ip,
1378 meta, file_dbp->priority)) != 0)
1379 goto out;
1380 } else if (ret != DB_PAGE_NOTFOUND)
1381 goto out;
1382 meta = NULL;
1383 }
1384 if ((ret = __memp_fget(mpf, &argp->meta,
1385 ip, NULL, DB_MPOOL_EDIT, &meta)) != 0)
1386 goto out;
1387 if (LOG_COMPARE(&LSN(meta), lsnp) == 0) {
1388 REC_DIRTY(mpf, ip, dbc->priority, &meta);
1389 /*
1390 * If we had to break up the list last_pgno
1391 * may only represent the end of the block.
1392 */
1393 if (meta->last_pgno < argp->last_pgno)
1394 meta->last_pgno = argp->last_pgno;
1395 if (argp->last_free == PGNO_INVALID)
1396 meta->free = argp->next_free;
1397 LSN(meta) = argp->meta_lsn;
1398 }
1399 }
1400
1401 if ((ret = __memp_fput(mpf, ip, meta, file_dbp->priority)) != 0)
1402 goto out;
1403
1404 if (op == DB_TXN_ABORT) {
1405 /*
1406 * Put the pages back on the in memory free list.
1407 * If this is part of a multi-record truncate then
1408 * we need to find this batch, it may not be at the end.
1409 * If we aborted while writing one of the log records
1410 * then this set may still be in the list.
1411 */
1412 if ((ret = __memp_get_freelist(mpf, &felem, &list)) != 0)
1413 goto out;
1414 if (list != NULL) {
1415 if (felem != 0 && list[felem - 1] > pglist->pgno) {
1416 __db_freelist_pos(
1417 pglist->pgno, list, felem, &pos);
1418 DB_ASSERT(env, pos < felem);
1419 if (pglist->pgno == list[pos])
1420 goto done;
1421 pos++;
1422 } else if (felem != 0 &&
1423 list[felem - 1] == pglist->pgno)
1424 goto done;
1425 else
1426 pos = felem;
1427 if ((ret = __memp_extend_freelist(
1428 mpf, felem + nelem, &list)) != 0)
1429 goto out;
1430 if (pos != felem)
1431 memmove(&list[nelem + pos], &list[pos],
1432 sizeof(*list) * (felem - pos));
1433 for (lp = pglist; lp < &pglist[nelem]; lp++)
1434 list[pos++] = lp->pgno;
1435 }
1436 }
1437
1438 done: *lsnp = argp->prev_lsn;
1439 ret = 0;
1440
1441 out: REC_CLOSE;
1442 #else
1443 /*
1444 * If HAVE_FTRUNCATE is not defined, we'll never see pg_trunc records
1445 * to recover.
1446 */
1447 COMPQUIET(env, NULL);
1448 COMPQUIET(dbtp, NULL);
1449 COMPQUIET(lsnp, NULL);
1450 COMPQUIET(op, DB_TXN_ABORT);
1451 COMPQUIET(info, NULL);
1452 return (EINVAL);
1453 #endif
1454 }
1455 /*
1456 * __db_realloc_recover --
1457 * Recovery function for realloc.
1458 *
1459 * PUBLIC: int __db_realloc_recover
1460 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
1461 */
1462 int
__db_realloc_recover(env,dbtp,lsnp,op,info)1463 __db_realloc_recover(env, dbtp, lsnp, op, info)
1464 ENV *env;
1465 DBT *dbtp;
1466 DB_LSN *lsnp;
1467 db_recops op;
1468 void *info;
1469 {
1470 __db_realloc_args *argp;
1471 DB *file_dbp;
1472 DBC *dbc;
1473 DB_MPOOLFILE *mpf;
1474 DB_THREAD_INFO *ip;
1475 PAGE *pagep;
1476 db_pglist_t *pglist, *lp;
1477 #ifdef HAVE_FTRUNCATE
1478 db_pgno_t *list;
1479 u_int32_t felem, pos;
1480 #endif
1481 u_int32_t nelem;
1482 int cmp_n, cmp_p, ret;
1483
1484 ip = ((DB_TXNHEAD *)info)->thread_info;
1485
1486 REC_PRINT(__db_realloc_print);
1487 REC_INTRO(__db_realloc_read, ip, 1);
1488 mpf = file_dbp->mpf;
1489
1490 /*
1491 * First, iterate over all the pages and make sure they are all in
1492 * their prior or new states (according to the op).
1493 */
1494 pglist = (db_pglist_t *) argp->list.data;
1495 nelem = argp->list.size / sizeof(db_pglist_t);
1496 for (lp = pglist; lp < &pglist[nelem]; lp++) {
1497 if ((ret = __memp_fget(mpf, &lp->pgno, ip,
1498 NULL, DB_MPOOL_CREATE, &pagep)) != 0)
1499 goto out;
1500 if (DB_REDO(op) && LOG_COMPARE(&LSN(pagep), &lp->lsn) == 0) {
1501 REC_DIRTY(mpf, ip, dbc->priority, &pagep);
1502 P_INIT(pagep, file_dbp->pgsize, lp->pgno,
1503 PGNO_INVALID, PGNO_INVALID, 0, argp->ptype);
1504 LSN(pagep) = *lsnp;
1505 } else if (DB_UNDO(op) && (IS_ZERO_LSN(LSN(pagep)) ||
1506 LOG_COMPARE(&LSN(pagep), lsnp) == 0)) {
1507 REC_DIRTY(mpf, ip, dbc->priority, &pagep);
1508 P_INIT(pagep, file_dbp->pgsize, lp->pgno,
1509 PGNO_INVALID, lp->next_pgno, 0, P_INVALID);
1510 LSN(pagep) = lp->lsn;
1511 }
1512 if ((ret = __memp_fput(mpf,
1513 ip, pagep, file_dbp->priority)) != 0)
1514 goto out;
1515 }
1516
1517 /* Now, fix up the free list. */
1518 if ((ret = __memp_fget(mpf,
1519 &argp->prev_pgno, ip, NULL, 0, &pagep)) != 0)
1520 goto out;
1521
1522 cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
1523 cmp_p = LOG_COMPARE(&LSN(pagep), &argp->page_lsn);
1524 CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->page_lsn);
1525 CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp);
1526
1527 if (DB_REDO(op) && cmp_p == 0) {
1528 REC_DIRTY(mpf, ip, dbc->priority, &pagep);
1529 if (argp->prev_pgno == PGNO_BASE_MD)
1530 ((DBMETA *)pagep)->free = argp->next_free;
1531 else
1532 NEXT_PGNO(pagep) = argp->next_free;
1533 LSN(pagep) = *lsnp;
1534 } else if (DB_UNDO(op) && cmp_n == 0) {
1535 REC_DIRTY(mpf, ip, dbc->priority, &pagep);
1536 if (argp->prev_pgno == PGNO_BASE_MD)
1537 ((DBMETA *)pagep)->free = pglist->pgno;
1538 else
1539 NEXT_PGNO(pagep) = pglist->pgno;
1540 LSN(pagep) = argp->page_lsn;
1541 }
1542 if ((ret = __memp_fput(mpf, ip, pagep, file_dbp->priority)) != 0)
1543 goto out;
1544
1545 #ifdef HAVE_FTRUNCATE
1546 if (op == DB_TXN_ABORT) {
1547 /* Put the pages back in the sorted list. */
1548 if ((ret = __memp_get_freelist(mpf, &felem, &list)) != 0)
1549 goto out;
1550 if (list != NULL) {
1551 __db_freelist_pos(pglist->pgno, list, felem, &pos);
1552 if (pglist->pgno == list[pos])
1553 goto done;
1554 if ((ret = __memp_extend_freelist(
1555 mpf, felem + nelem, &list)) != 0)
1556 goto out;
1557 pos++;
1558 if (pos != felem)
1559 memmove(&list[pos+nelem],
1560 &list[pos], nelem * sizeof(*list));
1561 for (lp = pglist; lp < &pglist[nelem]; lp++)
1562 list[pos++] = lp->pgno;
1563 }
1564 }
1565 #endif
1566
1567 done: *lsnp = argp->prev_lsn;
1568 ret = 0;
1569
1570 out: REC_CLOSE;
1571 }
1572 /*
1573 * __db_pg_sort_44_recover --
1574 * Recovery function for pg_sort.
1575 * This is deprecated and kept for replication upgrades.
1576 *
1577 * PUBLIC: int __db_pg_sort_44_recover
1578 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
1579 */
1580 int
__db_pg_sort_44_recover(env,dbtp,lsnp,op,info)1581 __db_pg_sort_44_recover(env, dbtp, lsnp, op, info)
1582 ENV *env;
1583 DBT *dbtp;
1584 DB_LSN *lsnp;
1585 db_recops op;
1586 void *info;
1587 {
1588 #ifdef HAVE_FTRUNCATE
1589 __db_pg_sort_44_args *argp;
1590 DB_THREAD_INFO *ip;
1591 DB *file_dbp;
1592 DBC *dbc;
1593 DBMETA *meta;
1594 DB_MPOOLFILE *mpf;
1595 PAGE *pagep;
1596 db_pglist_t *pglist, *lp;
1597 db_pgno_t pgno, *list;
1598 u_int32_t felem, nelem;
1599 int ret;
1600
1601 ip = ((DB_TXNHEAD *)info)->thread_info;
1602 REC_PRINT(__db_pg_sort_44_print);
1603 REC_INTRO(__db_pg_sort_44_read, ip, 1);
1604
1605 pglist = (db_pglist_t *) argp->list.data;
1606 nelem = argp->list.size / sizeof(db_pglist_t);
1607 if (DB_REDO(op)) {
1608 pgno = argp->last_pgno;
1609 __db_freelist_sort(pglist, nelem);
1610 if ((ret = __db_pg_truncate(dbc, NULL,
1611 pglist, NULL, &nelem, PGNO_INVALID, &pgno, lsnp, 1)) != 0)
1612 goto out;
1613
1614 if (argp->last_free != PGNO_INVALID) {
1615 if ((ret = __memp_fget(mpf,
1616 &argp->last_free, ip, NULL, 0, &meta)) == 0) {
1617 if (LOG_COMPARE(&LSN(meta),
1618 &argp->last_lsn) == 0) {
1619 REC_DIRTY(mpf,
1620 ip, dbc->priority, &meta);
1621 NEXT_PGNO(meta) = PGNO_INVALID;
1622 LSN(meta) = *lsnp;
1623 }
1624 if ((ret = __memp_fput(mpf, ip,
1625 meta, file_dbp->priority)) != 0)
1626 goto out;
1627 meta = NULL;
1628 } else if (ret != DB_PAGE_NOTFOUND)
1629 goto out;
1630 }
1631 if ((ret = __memp_fget(mpf, &argp->meta, ip, NULL,
1632 0, &meta)) != 0)
1633 goto out;
1634 if (LOG_COMPARE(&LSN(meta), &argp->meta_lsn) == 0) {
1635 REC_DIRTY(mpf, ip, dbc->priority, &meta);
1636 if (argp->last_free == PGNO_INVALID) {
1637 if (nelem == 0)
1638 meta->free = PGNO_INVALID;
1639 else
1640 meta->free = pglist->pgno;
1641 }
1642 meta->last_pgno = pgno;
1643 LSN(meta) = *lsnp;
1644 }
1645 } else {
1646 /* Put the free list back in its original order. */
1647 for (lp = pglist; lp < &pglist[nelem]; lp++) {
1648 if ((ret = __memp_fget(mpf, &lp->pgno, ip,
1649 NULL, DB_MPOOL_CREATE, &pagep)) != 0)
1650 goto out;
1651 if (IS_ZERO_LSN(LSN(pagep)) ||
1652 LOG_COMPARE(&LSN(pagep), lsnp) == 0) {
1653 REC_DIRTY(mpf, ip, dbc->priority, &pagep);
1654 if (lp == &pglist[nelem - 1])
1655 pgno = PGNO_INVALID;
1656 else
1657 pgno = lp[1].pgno;
1658
1659 P_INIT(pagep, file_dbp->pgsize,
1660 lp->pgno, PGNO_INVALID, pgno, 0, P_INVALID);
1661 LSN(pagep) = lp->lsn;
1662 }
1663 if ((ret = __memp_fput(mpf,
1664 ip, pagep, file_dbp->priority)) != 0)
1665 goto out;
1666 }
1667 if (argp->last_free != PGNO_INVALID) {
1668 if ((ret = __memp_fget(mpf, &argp->last_free,
1669 ip, NULL, DB_MPOOL_EDIT, &meta)) == 0) {
1670 if (LOG_COMPARE(&LSN(meta), lsnp) == 0) {
1671 NEXT_PGNO(meta) = pglist->pgno;
1672 LSN(meta) = argp->last_lsn;
1673 }
1674 if ((ret = __memp_fput(mpf, ip,
1675 meta, file_dbp->priority)) != 0)
1676 goto out;
1677 } else if (ret != DB_PAGE_NOTFOUND)
1678 goto out;
1679 meta = NULL;
1680 }
1681 if ((ret = __memp_fget(mpf, &argp->meta,
1682 ip, NULL, DB_MPOOL_EDIT, &meta)) != 0)
1683 goto out;
1684 if (LOG_COMPARE(&LSN(meta), lsnp) == 0) {
1685 REC_DIRTY(mpf, ip, dbc->priority, &meta);
1686 meta->last_pgno = argp->last_pgno;
1687 if (argp->last_free == PGNO_INVALID)
1688 meta->free = pglist->pgno;
1689 LSN(meta) = argp->meta_lsn;
1690 }
1691 }
1692 if (op == DB_TXN_ABORT) {
1693 if ((ret = __memp_get_freelist(mpf, &felem, &list)) != 0)
1694 goto out;
1695 if (list != NULL) {
1696 DB_ASSERT(env, felem == 0 ||
1697 argp->last_free == list[felem - 1]);
1698 if ((ret = __memp_extend_freelist(
1699 mpf, felem + nelem, &list)) != 0)
1700 goto out;
1701 for (lp = pglist; lp < &pglist[nelem]; lp++)
1702 list[felem++] = lp->pgno;
1703 }
1704 }
1705
1706 if ((ret = __memp_fput(mpf, ip, meta, file_dbp->priority)) != 0)
1707 goto out;
1708
1709 done: *lsnp = argp->prev_lsn;
1710 ret = 0;
1711
1712 out: REC_CLOSE;
1713 #else
1714 /*
1715 * If HAVE_FTRUNCATE is not defined, we'll never see pg_sort records
1716 * to recover.
1717 */
1718 COMPQUIET(env, NULL);
1719 COMPQUIET(dbtp, NULL);
1720 COMPQUIET(lsnp, NULL);
1721 COMPQUIET(op, DB_TXN_ABORT);
1722 COMPQUIET(info, NULL);
1723 return (EINVAL);
1724 #endif
1725 }
1726
1727 /*
1728 * __db_pg_alloc_42_recover --
1729 * Recovery function for pg_alloc.
1730 *
1731 * PUBLIC: int __db_pg_alloc_42_recover
1732 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
1733 */
1734 int
__db_pg_alloc_42_recover(env,dbtp,lsnp,op,info)1735 __db_pg_alloc_42_recover(env, dbtp, lsnp, op, info)
1736 ENV *env;
1737 DBT *dbtp;
1738 DB_LSN *lsnp;
1739 db_recops op;
1740 void *info;
1741 {
1742 __db_pg_alloc_42_args *argp;
1743 DB_THREAD_INFO *ip;
1744 DB *file_dbp;
1745 DBC *dbc;
1746 DBMETA *meta;
1747 DB_MPOOLFILE *mpf;
1748 PAGE *pagep;
1749 db_pgno_t pgno;
1750 int cmp_n, cmp_p, created, level, ret;
1751
1752 ip = ((DB_TXNHEAD *)info)->thread_info;
1753 meta = NULL;
1754 pagep = NULL;
1755 created = 0;
1756 REC_PRINT(__db_pg_alloc_42_print);
1757 REC_INTRO(__db_pg_alloc_42_read, ip, 0);
1758
1759 /*
1760 * Fix up the metadata page. If we're redoing the operation, we have
1761 * to get the metadata page and update its LSN and its free pointer.
1762 * If we're undoing the operation and the page was ever created, we put
1763 * it on the freelist.
1764 */
1765 pgno = PGNO_BASE_MD;
1766 if ((ret = __memp_fget(mpf, &pgno, ip, NULL, 0, &meta)) != 0) {
1767 /* The metadata page must always exist on redo. */
1768 if (DB_REDO(op)) {
1769 ret = __db_pgerr(file_dbp, pgno, ret);
1770 goto out;
1771 } else
1772 goto done;
1773 }
1774 cmp_n = LOG_COMPARE(lsnp, &LSN(meta));
1775 cmp_p = LOG_COMPARE(&LSN(meta), &argp->meta_lsn);
1776 CHECK_LSN(env, op, cmp_p, &LSN(meta), &argp->meta_lsn);
1777 if (cmp_p == 0 && DB_REDO(op)) {
1778 /* Need to redo update described. */
1779 REC_DIRTY(mpf, ip, file_dbp->priority, &meta);
1780 LSN(meta) = *lsnp;
1781 meta->free = argp->next;
1782 if (argp->pgno > meta->last_pgno)
1783 meta->last_pgno = argp->pgno;
1784 } else if (cmp_n == 0 && DB_UNDO(op)) {
1785 goto no_rollback;
1786 }
1787
1788 /*
1789 * Fix up the allocated page. If the page does not exist
1790 * and we can truncate it then don't create it.
1791 * Otherwise if we're redoing the operation, we have
1792 * to get the page (creating it if it doesn't exist), and update its
1793 * LSN. If we're undoing the operation, we have to reset the page's
1794 * LSN and put it on the free list, or truncate it.
1795 */
1796 if ((ret = __memp_fget(mpf, &argp->pgno, ip, NULL, 0, &pagep)) != 0) {
1797 /*
1798 * We have to be able to identify if a page was newly
1799 * created so we can recover it properly. We cannot simply
1800 * look for an empty header, because hash uses a pgin
1801 * function that will set the header. Instead, we explicitly
1802 * try for the page without CREATE and if that fails, then
1803 * create it.
1804 */
1805 if ((ret = __memp_fget(mpf, &argp->pgno,
1806 ip, NULL, DB_MPOOL_CREATE, &pagep)) != 0) {
1807 if (DB_UNDO(op) && ret == ENOSPC)
1808 goto do_truncate;
1809 ret = __db_pgerr(file_dbp, argp->pgno, ret);
1810 goto out;
1811 }
1812 created = 1;
1813 }
1814
1815 /* Fix up the allocated page. */
1816 cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
1817 cmp_p = LOG_COMPARE(&LSN(pagep), &argp->page_lsn);
1818
1819 /*
1820 * If an initial allocation is aborted and then reallocated during
1821 * an archival restore the log record will have an LSN for the page
1822 * but the page will be empty.
1823 */
1824 if (IS_ZERO_LSN(LSN(pagep)) ||
1825 (IS_ZERO_LSN(argp->page_lsn) && IS_INIT_LSN(LSN(pagep))))
1826 cmp_p = 0;
1827
1828 CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->page_lsn);
1829 /*
1830 * Another special case we have to handle is if we ended up with a
1831 * page of all 0's which can happen if we abort between allocating a
1832 * page in mpool and initializing it. In that case, even if we're
1833 * undoing, we need to re-initialize the page.
1834 */
1835 if (DB_REDO(op) && cmp_p == 0) {
1836 /* Need to redo update described. */
1837 switch (argp->ptype) {
1838 case P_LBTREE:
1839 case P_LRECNO:
1840 case P_LDUP:
1841 level = LEAFLEVEL;
1842 break;
1843 default:
1844 level = 0;
1845 break;
1846 }
1847 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
1848 P_INIT(pagep, file_dbp->pgsize,
1849 argp->pgno, PGNO_INVALID, PGNO_INVALID, level, argp->ptype);
1850
1851 pagep->lsn = *lsnp;
1852 } else if (DB_UNDO(op) && (cmp_n == 0 || created)) {
1853 /*
1854 * This is where we handle the case of a 0'd page (pagep->pgno
1855 * is equal to PGNO_INVALID).
1856 * Undo the allocation, reinitialize the page and
1857 * link its next pointer to the free list.
1858 */
1859 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
1860 P_INIT(pagep, file_dbp->pgsize,
1861 argp->pgno, PGNO_INVALID, argp->next, 0, P_INVALID);
1862
1863 pagep->lsn = argp->page_lsn;
1864 }
1865
1866 do_truncate:
1867 /*
1868 * We cannot undo things from 4.2 land, because we nolonger
1869 * have limbo processing.
1870 */
1871 if ((pagep == NULL || IS_ZERO_LSN(LSN(pagep))) &&
1872 IS_ZERO_LSN(argp->page_lsn) && DB_UNDO(op)) {
1873 no_rollback: __db_errx(env, DB_STR("0643",
1874 "Cannot replicate prepared transactions from master running release 4.2 "));
1875 ret = __env_panic(env, EINVAL);
1876 }
1877
1878 if (pagep != NULL &&
1879 (ret = __memp_fput(mpf, ip, pagep, file_dbp->priority)) != 0)
1880 goto out;
1881 pagep = NULL;
1882
1883 if ((ret = __memp_fput(mpf, ip, meta, file_dbp->priority)) != 0)
1884 goto out;
1885 meta = NULL;
1886
1887 done: *lsnp = argp->prev_lsn;
1888 ret = 0;
1889
1890 out: if (pagep != NULL)
1891 (void)__memp_fput(mpf, ip, pagep, file_dbp->priority);
1892 if (meta != NULL)
1893 (void)__memp_fput(mpf, ip, meta, file_dbp->priority);
1894 REC_CLOSE;
1895 }
1896
1897 /*
1898 * __db_pg_free_recover_42_int --
1899 */
1900 static int
__db_pg_free_recover_42_int(env,ip,argp,file_dbp,lsnp,mpf,op,data)1901 __db_pg_free_recover_42_int(env, ip, argp, file_dbp, lsnp, mpf, op, data)
1902 ENV *env;
1903 DB_THREAD_INFO *ip;
1904 __db_pg_freedata_42_args *argp;
1905 DB *file_dbp;
1906 DB_LSN *lsnp;
1907 DB_MPOOLFILE *mpf;
1908 db_recops op;
1909 int data;
1910 {
1911 DBMETA *meta;
1912 DB_LSN copy_lsn;
1913 PAGE *pagep, *prevp;
1914 int cmp_n, cmp_p, is_meta, ret;
1915
1916 meta = NULL;
1917 pagep = NULL;
1918 prevp = NULL;
1919
1920 /*
1921 * Get the "metapage". This will either be the metapage
1922 * or the previous page in the free list if we are doing
1923 * sorted allocations. If its a previous page then
1924 * we will not be truncating.
1925 */
1926 is_meta = argp->meta_pgno == PGNO_BASE_MD;
1927
1928 REC_FGET(mpf, ip, argp->meta_pgno, &meta, check_meta);
1929
1930 if (argp->meta_pgno != PGNO_BASE_MD)
1931 prevp = (PAGE *)meta;
1932
1933 cmp_n = LOG_COMPARE(lsnp, &LSN(meta));
1934 cmp_p = LOG_COMPARE(&LSN(meta), &argp->meta_lsn);
1935 CHECK_LSN(env, op, cmp_p, &LSN(meta), &argp->meta_lsn);
1936
1937 /*
1938 * Fix up the metadata page. If we're redoing or undoing the operation
1939 * we get the page and update its LSN, last and free pointer.
1940 */
1941 if (cmp_p == 0 && DB_REDO(op)) {
1942 /* Need to redo the deallocation. */
1943 REC_DIRTY(mpf, ip, file_dbp->priority, &meta);
1944 if (prevp == NULL)
1945 meta->free = argp->pgno;
1946 else
1947 NEXT_PGNO(prevp) = argp->pgno;
1948 /*
1949 * If this was a compensating transaction and
1950 * we are a replica, then we never executed the
1951 * original allocation which incremented meta->free.
1952 */
1953 if (prevp == NULL && meta->last_pgno < meta->free)
1954 meta->last_pgno = meta->free;
1955 LSN(meta) = *lsnp;
1956 } else if (cmp_n == 0 && DB_UNDO(op)) {
1957 /* Need to undo the deallocation. */
1958 REC_DIRTY(mpf, ip, file_dbp->priority, &meta);
1959 if (prevp == NULL)
1960 meta->free = argp->next;
1961 else
1962 NEXT_PGNO(prevp) = argp->next;
1963 LSN(meta) = argp->meta_lsn;
1964 if (prevp == NULL && meta->last_pgno < argp->pgno)
1965 meta->last_pgno = argp->pgno;
1966 }
1967
1968 check_meta:
1969 if (ret != 0 && is_meta) {
1970 /* The metadata page must always exist. */
1971 ret = __db_pgerr(file_dbp, argp->meta_pgno, ret);
1972 goto out;
1973 }
1974
1975 /*
1976 * Get the freed page. If we support truncate then don't
1977 * create the page if we are going to free it. If we're
1978 * redoing the operation we get the page and explicitly discard
1979 * its contents, then update its LSN. If we're undoing the
1980 * operation, we get the page and restore its header.
1981 * If we don't support truncate, then we must create the page
1982 * and roll it back.
1983 */
1984 if ((ret = __memp_fget(mpf, &argp->pgno,
1985 ip, NULL, DB_MPOOL_CREATE, &pagep)) != 0)
1986 goto out;
1987
1988 (void)__ua_memcpy(©_lsn, &LSN(argp->header.data), sizeof(DB_LSN));
1989 cmp_n = IS_ZERO_LSN(LSN(pagep)) ? 0 : LOG_COMPARE(lsnp, &LSN(pagep));
1990 cmp_p = LOG_COMPARE(&LSN(pagep), ©_lsn);
1991
1992 CHECK_LSN(env, op, cmp_p, &LSN(pagep), ©_lsn);
1993 if (DB_REDO(op) &&
1994 (cmp_p == 0 ||
1995 (IS_ZERO_LSN(copy_lsn) &&
1996 LOG_COMPARE(&LSN(pagep), &argp->meta_lsn) <= 0))) {
1997 /* Need to redo the deallocation. */
1998 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
1999 P_INIT(pagep, file_dbp->pgsize,
2000 argp->pgno, PGNO_INVALID, argp->next, 0, P_INVALID);
2001 pagep->lsn = *lsnp;
2002 } else if (cmp_n == 0 && DB_UNDO(op)) {
2003 /* Need to reallocate the page. */
2004 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
2005 memcpy(pagep, argp->header.data, argp->header.size);
2006 if (data)
2007 memcpy((u_int8_t*)pagep + HOFFSET(pagep),
2008 argp->data.data, argp->data.size);
2009 }
2010 if (pagep != NULL &&
2011 (ret = __memp_fput(mpf, ip, pagep, file_dbp->priority)) != 0)
2012 goto out;
2013
2014 pagep = NULL;
2015 if (meta != NULL &&
2016 (ret = __memp_fput(mpf, ip, meta, file_dbp->priority)) != 0)
2017 goto out;
2018 meta = NULL;
2019
2020 ret = 0;
2021
2022 out: if (pagep != NULL)
2023 (void)__memp_fput(mpf, ip, pagep, file_dbp->priority);
2024 if (meta != NULL)
2025 (void)__memp_fput(mpf, ip, meta, file_dbp->priority);
2026
2027 return (ret);
2028 }
2029
2030 /*
2031 * __db_pg_free_42_recover --
2032 * Recovery function for pg_free.
2033 *
2034 * PUBLIC: int __db_pg_free_42_recover
2035 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
2036 */
2037 int
__db_pg_free_42_recover(env,dbtp,lsnp,op,info)2038 __db_pg_free_42_recover(env, dbtp, lsnp, op, info)
2039 ENV *env;
2040 DBT *dbtp;
2041 DB_LSN *lsnp;
2042 db_recops op;
2043 void *info;
2044 {
2045 __db_pg_free_42_args *argp;
2046 DB *file_dbp;
2047 DBC *dbc;
2048 DB_MPOOLFILE *mpf;
2049 DB_THREAD_INFO *ip;
2050 int ret;
2051
2052 ip = ((DB_TXNHEAD *)info)->thread_info;
2053 REC_PRINT(__db_pg_free_42_print);
2054 REC_INTRO(__db_pg_free_42_read, ip, 0);
2055
2056 ret = __db_pg_free_recover_42_int(env, ip,
2057 (__db_pg_freedata_42_args *)argp, file_dbp, lsnp, mpf, op, 0);
2058
2059 done: *lsnp = argp->prev_lsn;
2060 out:
2061 REC_CLOSE;
2062 }
2063
2064 /*
2065 * __db_pg_freedata_42_recover --
2066 * Recovery function for pg_freedata.
2067 *
2068 * PUBLIC: int __db_pg_freedata_42_recover
2069 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
2070 */
2071 int
__db_pg_freedata_42_recover(env,dbtp,lsnp,op,info)2072 __db_pg_freedata_42_recover(env, dbtp, lsnp, op, info)
2073 ENV *env;
2074 DBT *dbtp;
2075 DB_LSN *lsnp;
2076 db_recops op;
2077 void *info;
2078 {
2079 __db_pg_freedata_42_args *argp;
2080 DB *file_dbp;
2081 DBC *dbc;
2082 DB_MPOOLFILE *mpf;
2083 DB_THREAD_INFO *ip;
2084 int ret;
2085
2086 ip = ((DB_TXNHEAD *)info)->thread_info;
2087 REC_PRINT(__db_pg_freedata_42_print);
2088 REC_INTRO(__db_pg_freedata_42_read, ip, 0);
2089
2090 ret = __db_pg_free_recover_42_int(
2091 env, ip, argp, file_dbp, lsnp, mpf, op, 1);
2092
2093 done: *lsnp = argp->prev_lsn;
2094 out:
2095 REC_CLOSE;
2096 }
2097
2098 /*
2099 * __db_relink_42_recover --
2100 * Recovery function for relink.
2101 *
2102 * PUBLIC: int __db_relink_42_recover
2103 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
2104 */
2105 int
__db_relink_42_recover(env,dbtp,lsnp,op,info)2106 __db_relink_42_recover(env, dbtp, lsnp, op, info)
2107 ENV *env;
2108 DBT *dbtp;
2109 DB_LSN *lsnp;
2110 db_recops op;
2111 void *info;
2112 {
2113 __db_relink_42_args *argp;
2114 DB_THREAD_INFO *ip;
2115 DB *file_dbp;
2116 DBC *dbc;
2117 DB_MPOOLFILE *mpf;
2118 PAGE *pagep;
2119 int cmp_n, cmp_p, modified, ret;
2120
2121 ip = ((DB_TXNHEAD *)info)->thread_info;
2122 pagep = NULL;
2123 REC_PRINT(__db_relink_42_print);
2124 REC_INTRO(__db_relink_42_read, ip, 0);
2125
2126 /*
2127 * There are up to three pages we need to check -- the page, and the
2128 * previous and next pages, if they existed. For a page add operation,
2129 * the current page is the result of a split and is being recovered
2130 * elsewhere, so all we need do is recover the next page.
2131 */
2132 if ((ret = __memp_fget(mpf, &argp->pgno, ip, NULL, 0, &pagep)) != 0) {
2133 if (DB_REDO(op)) {
2134 ret = __db_pgerr(file_dbp, argp->pgno, ret);
2135 goto out;
2136 }
2137 goto next2;
2138 }
2139 if (argp->opcode == DB_ADD_PAGE_COMPAT)
2140 goto next1;
2141
2142 cmp_p = LOG_COMPARE(&LSN(pagep), &argp->lsn);
2143 CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->lsn);
2144 if (cmp_p == 0 && DB_REDO(op)) {
2145 /* Redo the relink. */
2146 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
2147 pagep->lsn = *lsnp;
2148 } else if (LOG_COMPARE(lsnp, &LSN(pagep)) == 0 && DB_UNDO(op)) {
2149 /* Undo the relink. */
2150 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
2151 pagep->next_pgno = argp->next;
2152 pagep->prev_pgno = argp->prev;
2153 pagep->lsn = argp->lsn;
2154 }
2155 next1: if ((ret = __memp_fput(mpf, ip, pagep, file_dbp->priority)) != 0)
2156 goto out;
2157 pagep = NULL;
2158
2159 next2: if ((ret = __memp_fget(mpf, &argp->next, ip, NULL, 0, &pagep)) != 0) {
2160 if (DB_REDO(op)) {
2161 ret = __db_pgerr(file_dbp, argp->next, ret);
2162 goto out;
2163 }
2164 goto prev;
2165 }
2166 modified = 0;
2167 cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
2168 cmp_p = LOG_COMPARE(&LSN(pagep), &argp->lsn_next);
2169 CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->lsn_next);
2170 if ((argp->opcode == DB_REM_PAGE_COMPAT && cmp_p == 0 && DB_REDO(op)) ||
2171 (argp->opcode == DB_ADD_PAGE_COMPAT && cmp_n == 0 && DB_UNDO(op))) {
2172 /* Redo the remove or undo the add. */
2173 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
2174 pagep->prev_pgno = argp->prev;
2175 modified = 1;
2176 } else if ((argp->opcode == DB_REM_PAGE_COMPAT &&
2177 cmp_n == 0 && DB_UNDO(op)) ||
2178 (argp->opcode == DB_ADD_PAGE_COMPAT && cmp_p == 0 && DB_REDO(op))) {
2179 /* Undo the remove or redo the add. */
2180 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
2181 pagep->prev_pgno = argp->pgno;
2182 modified = 1;
2183 }
2184 if (modified) {
2185 if (DB_UNDO(op))
2186 pagep->lsn = argp->lsn_next;
2187 else
2188 pagep->lsn = *lsnp;
2189 }
2190 if ((ret = __memp_fput(mpf, ip, pagep, file_dbp->priority)) != 0)
2191 goto out;
2192 pagep = NULL;
2193 if (argp->opcode == DB_ADD_PAGE_COMPAT)
2194 goto done;
2195
2196 prev: if ((ret = __memp_fget(mpf, &argp->prev, ip, NULL, 0, &pagep)) != 0) {
2197 if (DB_REDO(op)) {
2198 ret = __db_pgerr(file_dbp, argp->prev, ret);
2199 goto out;
2200 }
2201 goto done;
2202 }
2203 modified = 0;
2204 cmp_p = LOG_COMPARE(&LSN(pagep), &argp->lsn_prev);
2205 CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->lsn_prev);
2206 if (cmp_p == 0 && DB_REDO(op)) {
2207 /* Redo the relink. */
2208 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
2209 pagep->next_pgno = argp->next;
2210 modified = 1;
2211 } else if (LOG_COMPARE(lsnp, &LSN(pagep)) == 0 && DB_UNDO(op)) {
2212 /* Undo the relink. */
2213 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
2214 pagep->next_pgno = argp->pgno;
2215 modified = 1;
2216 }
2217 if (modified) {
2218 if (DB_UNDO(op))
2219 pagep->lsn = argp->lsn_prev;
2220 else
2221 pagep->lsn = *lsnp;
2222 }
2223 if ((ret = __memp_fput(mpf, ip, pagep, file_dbp->priority)) != 0)
2224 goto out;
2225 pagep = NULL;
2226
2227 done: *lsnp = argp->prev_lsn;
2228 ret = 0;
2229
2230 out: if (pagep != NULL)
2231 (void)__memp_fput(mpf, ip, pagep, file_dbp->priority);
2232 REC_CLOSE;
2233 }
2234
2235 /*
2236 * __db_relink_recover --
2237 * Recovery function for relink.
2238 *
2239 * PUBLIC: int __db_relink_recover
2240 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
2241 */
2242 int
__db_relink_recover(env,dbtp,lsnp,op,info)2243 __db_relink_recover(env, dbtp, lsnp, op, info)
2244 ENV *env;
2245 DBT *dbtp;
2246 DB_LSN *lsnp;
2247 db_recops op;
2248 void *info;
2249 {
2250 __db_relink_args *argp;
2251 DB_THREAD_INFO *ip;
2252 DB *file_dbp;
2253 DBC *dbc;
2254 DB_MPOOLFILE *mpf;
2255 PAGE *pagep;
2256 int cmp_n, cmp_p, ret;
2257
2258 ip = ((DB_TXNHEAD *)info)->thread_info;
2259 pagep = NULL;
2260 REC_PRINT(__db_relink_print);
2261 REC_INTRO(__db_relink_read, ip, 0);
2262
2263 /*
2264 * There are up to three pages we need to check -- the page, and the
2265 * previous and next pages, if they existed. For a page add operation,
2266 * the current page is the result of a split and is being recovered
2267 * elsewhere, so all we need do is recover the next page.
2268 */
2269 if (argp->next_pgno == PGNO_INVALID)
2270 goto prev;
2271 if ((ret = __memp_fget(mpf,
2272 &argp->next_pgno, ip, NULL, 0, &pagep)) != 0) {
2273 if (ret != DB_PAGE_NOTFOUND) {
2274 ret = __db_pgerr(file_dbp, argp->next_pgno, ret);
2275 goto out;
2276 } else
2277 goto prev;
2278 }
2279
2280 cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
2281 cmp_p = LOG_COMPARE(&LSN(pagep), &argp->lsn_next);
2282 CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->lsn_next);
2283 CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp);
2284 if (cmp_p == 0 && DB_REDO(op)) {
2285 /* Redo the remove or replace. */
2286 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
2287 if (argp->new_pgno == PGNO_INVALID)
2288 pagep->prev_pgno = argp->prev_pgno;
2289 else
2290 pagep->prev_pgno = argp->new_pgno;
2291
2292 pagep->lsn = *lsnp;
2293 } else if (cmp_n == 0 && DB_UNDO(op)) {
2294 /* Undo the remove or replace. */
2295 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
2296 pagep->prev_pgno = argp->pgno;
2297
2298 pagep->lsn = argp->lsn_next;
2299 }
2300
2301 if ((ret = __memp_fput(mpf, ip, pagep, file_dbp->priority)) != 0)
2302 goto out;
2303 pagep = NULL;
2304
2305 prev: if (argp->prev_pgno == PGNO_INVALID)
2306 goto done;
2307 if ((ret = __memp_fget(mpf,
2308 &argp->prev_pgno, ip, NULL, 0, &pagep)) != 0) {
2309 if (ret != DB_PAGE_NOTFOUND) {
2310 ret = __db_pgerr(file_dbp, argp->prev_pgno, ret);
2311 goto out;
2312 } else
2313 goto done;
2314 }
2315
2316 cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
2317 cmp_p = LOG_COMPARE(&LSN(pagep), &argp->lsn_prev);
2318 CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->lsn_prev);
2319 CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp);
2320 if (cmp_p == 0 && DB_REDO(op)) {
2321 /* Redo the relink. */
2322 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
2323 if (argp->new_pgno == PGNO_INVALID)
2324 pagep->next_pgno = argp->next_pgno;
2325 else
2326 pagep->next_pgno = argp->new_pgno;
2327
2328 pagep->lsn = *lsnp;
2329 } else if (cmp_n == 0 && DB_UNDO(op)) {
2330 /* Undo the relink. */
2331 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
2332 pagep->next_pgno = argp->pgno;
2333 pagep->lsn = argp->lsn_prev;
2334 }
2335
2336 if ((ret = __memp_fput(mpf,
2337 ip, pagep, file_dbp->priority)) != 0)
2338 goto out;
2339 pagep = NULL;
2340
2341 done: *lsnp = argp->prev_lsn;
2342 ret = 0;
2343
2344 out: if (pagep != NULL)
2345 (void)__memp_fput(mpf, ip, pagep, file_dbp->priority);
2346 REC_CLOSE;
2347 }
2348
2349 /*
2350 * __db_merge_recover --
2351 * Recovery function for merge.
2352 *
2353 * PUBLIC: int __db_merge_recover
2354 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
2355 */
2356 int
__db_merge_recover(env,dbtp,lsnp,op,info)2357 __db_merge_recover(env, dbtp, lsnp, op, info)
2358 ENV *env;
2359 DBT *dbtp;
2360 DB_LSN *lsnp;
2361 db_recops op;
2362 void *info;
2363 {
2364 __db_merge_args *argp;
2365 BTREE *bt;
2366 DB_THREAD_INFO *ip;
2367 BKEYDATA *bk;
2368 DB *file_dbp;
2369 DBC *dbc;
2370 DB_LOCK handle_lock;
2371 DB_LOCKREQ request;
2372 DB_MPOOLFILE *mpf;
2373 HASH *ht;
2374 PAGE *pagep;
2375 db_indx_t indx, *ninp, *pinp;
2376 u_int32_t size;
2377 u_int8_t *bp;
2378 int cmp_n, cmp_p, i, ret, t_ret;
2379
2380 ip = ((DB_TXNHEAD *)info)->thread_info;
2381 REC_PRINT(__db_merge_print);
2382 REC_INTRO(__db_merge_read, ip, op != DB_TXN_APPLY);
2383
2384 /* Allocate our own cursor without DB_RECOVER as we need a locker. */
2385 if (op == DB_TXN_APPLY && (ret = __db_cursor_int(file_dbp, ip, NULL,
2386 DB_QUEUE, PGNO_INVALID, 0, NULL, &dbc)) != 0)
2387 goto out;
2388 F_SET(dbc, DBC_RECOVER);
2389
2390 if ((ret = __memp_fget(mpf, &argp->pgno, ip, NULL, 0, &pagep)) != 0) {
2391 if (ret != DB_PAGE_NOTFOUND) {
2392 ret = __db_pgerr(file_dbp, argp->pgno, ret);
2393 goto out;
2394 } else
2395 goto next;
2396 }
2397
2398 cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
2399 cmp_p = LOG_COMPARE(&LSN(pagep), &argp->lsn);
2400 CHECK_LSN(file_dbp->env, op, cmp_p, &LSN(pagep), &argp->lsn);
2401 CHECK_ABORT(file_dbp->env, op, cmp_n, &LSN(pagep), lsnp);
2402
2403 if (cmp_p == 0 && DB_REDO(op)) {
2404 /*
2405 * When pg_copy is set, we are copying onto a new page.
2406 */
2407 DB_ASSERT(env, !argp->pg_copy || NUM_ENT(pagep) == 0);
2408 REC_DIRTY(mpf, ip, dbc->priority, &pagep);
2409 if (argp->pg_copy) {
2410 if (argp->data.size == 0) {
2411 memcpy(pagep, argp->hdr.data, argp->hdr.size);
2412 pagep->pgno = argp->pgno;
2413 goto do_lsn;
2414 }
2415 P_INIT(pagep, file_dbp->pgsize, pagep->pgno,
2416 PREV_PGNO(argp->hdr.data),
2417 NEXT_PGNO(argp->hdr.data),
2418 LEVEL(argp->hdr.data), TYPE(argp->hdr.data));
2419 }
2420 if (TYPE(pagep) == P_OVERFLOW) {
2421 OV_REF(pagep) = OV_REF(argp->hdr.data);
2422 OV_LEN(pagep) = OV_LEN(argp->hdr.data);
2423 bp = (u_int8_t *)pagep + P_OVERHEAD(file_dbp);
2424 memcpy(bp, argp->data.data, argp->data.size);
2425 } else {
2426 /* Copy the data segment. */
2427 bp = (u_int8_t *)pagep +
2428 (db_indx_t)(HOFFSET(pagep) - argp->data.size);
2429 memcpy(bp, argp->data.data, argp->data.size);
2430
2431 /* Copy index table offset past the current entries. */
2432 pinp = P_INP(file_dbp, pagep) + NUM_ENT(pagep);
2433 ninp = P_INP(file_dbp, argp->hdr.data);
2434 for (i = 0; i < NUM_ENT(argp->hdr.data); i++)
2435 *pinp++ = *ninp++
2436 - (file_dbp->pgsize - HOFFSET(pagep));
2437 HOFFSET(pagep) -= argp->data.size;
2438 NUM_ENT(pagep) += i;
2439 }
2440 do_lsn: pagep->lsn = *lsnp;
2441 if (op == DB_TXN_APPLY) {
2442 /*
2443 * If applying to an active system we must bump
2444 * the revision number so that the db will get
2445 * reopened. We also need to move the handle
2446 * locks. Note that the dbp will not have a
2447 * locker in a replication client apply thread.
2448 */
2449 if (file_dbp->type == DB_HASH) {
2450 if (argp->npgno == file_dbp->meta_pgno)
2451 file_dbp->mpf->mfp->revision++;
2452 } else {
2453 bt = file_dbp->bt_internal;
2454 if (argp->npgno == bt->bt_meta ||
2455 argp->npgno == bt->bt_root)
2456 file_dbp->mpf->mfp->revision++;
2457 }
2458 if (argp->npgno == file_dbp->meta_pgno) {
2459 F_CLR(file_dbp, DB_AM_RECOVER);
2460 if ((ret = __fop_lock_handle(file_dbp->env,
2461 file_dbp, dbc->locker, DB_LOCK_READ,
2462 NULL, 0)) != 0)
2463 goto err;
2464 handle_lock = file_dbp->handle_lock;
2465
2466 file_dbp->meta_pgno = argp->pgno;
2467 if ((ret = __fop_lock_handle(file_dbp->env,
2468 file_dbp, dbc->locker, DB_LOCK_READ,
2469 NULL, 0)) != 0)
2470 goto err;
2471
2472 /* Move the other handles to the new lock. */
2473 ret = __lock_change(file_dbp->env,
2474 &handle_lock, &file_dbp->handle_lock);
2475
2476 err: memset(&request, 0, sizeof(request));
2477 request.op = DB_LOCK_PUT_ALL;
2478 if ((t_ret = __lock_vec(
2479 file_dbp->env, dbc->locker,
2480 0, &request, 1, NULL)) != 0 && ret == 0)
2481 ret = t_ret;
2482 F_SET(file_dbp, DB_AM_RECOVER);
2483 if (ret != 0)
2484 goto out;
2485 }
2486 }
2487
2488 } else if (cmp_n == 0 && !DB_REDO(op)) {
2489 REC_DIRTY(mpf, ip, dbc->priority, &pagep);
2490 if (TYPE(pagep) == P_OVERFLOW) {
2491 HOFFSET(pagep) = file_dbp->pgsize;
2492 goto setlsn;
2493 }
2494
2495 if (argp->pg_copy) {
2496 /* The page was empty when we started. */
2497 P_INIT(pagep, file_dbp->pgsize,
2498 pagep->pgno, PGNO_INVALID,
2499 PGNO_INVALID, 0, TYPE(argp->hdr.data));
2500 goto setlsn;
2501 }
2502
2503 /*
2504 * Since logging is logical at the page level we cannot just
2505 * truncate the data space. Delete the proper number of items
2506 * from the logical end of the page.
2507 */
2508 for (i = 0; i < NUM_ENT(argp->hdr.data); i++) {
2509 indx = NUM_ENT(pagep) - 1;
2510 if (TYPE(pagep) == P_LBTREE && indx != 0 &&
2511 P_INP(file_dbp, pagep)[indx] ==
2512 P_INP(file_dbp, pagep)[indx - P_INDX]) {
2513 NUM_ENT(pagep)--;
2514 continue;
2515 }
2516 switch (TYPE(pagep)) {
2517 case P_LBTREE:
2518 case P_LRECNO:
2519 case P_LDUP:
2520 bk = GET_BKEYDATA(file_dbp, pagep, indx);
2521 size = BITEM_SIZE(bk);
2522 break;
2523
2524 case P_IBTREE:
2525 size = BINTERNAL_SIZE(
2526 GET_BINTERNAL(file_dbp, pagep, indx)->len);
2527 break;
2528 case P_IRECNO:
2529 size = RINTERNAL_SIZE;
2530 break;
2531 case P_HASH:
2532 size = LEN_HITEM(file_dbp,
2533 pagep, file_dbp->pgsize, indx);
2534 break;
2535 default:
2536 ret = __db_pgfmt(env, PGNO(pagep));
2537 goto out;
2538 }
2539 if ((ret = __db_ditem(dbc, pagep, indx, size)) != 0)
2540 goto out;
2541 }
2542 setlsn: pagep->lsn = argp->lsn;
2543 }
2544
2545 if ((ret = __memp_fput(mpf, ip, pagep, dbc->priority)) != 0)
2546 goto out;
2547
2548 next: if ((ret = __memp_fget(mpf, &argp->npgno, ip, NULL, 0, &pagep)) != 0) {
2549 if (ret != DB_PAGE_NOTFOUND) {
2550 ret = __db_pgerr(file_dbp, argp->pgno, ret);
2551 goto out;
2552 } else
2553 goto done;
2554 }
2555
2556 cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
2557 cmp_p = LOG_COMPARE(&LSN(pagep), &argp->nlsn);
2558 CHECK_LSN(file_dbp->env, op, cmp_p, &LSN(pagep), &argp->nlsn);
2559
2560 if (cmp_p == 0 && DB_REDO(op)) {
2561 /* Need to truncate the page. */
2562 REC_DIRTY(mpf, ip, dbc->priority, &pagep);
2563 HOFFSET(pagep) = file_dbp->pgsize;
2564 NUM_ENT(pagep) = 0;
2565 pagep->lsn = *lsnp;
2566 } else if (cmp_n == 0 && !DB_REDO(op)) {
2567 /* Need to put the data back on the page. */
2568 REC_DIRTY(mpf, ip, dbc->priority, &pagep);
2569 if (TYPE(pagep) == P_OVERFLOW) {
2570 OV_REF(pagep) = OV_REF(argp->hdr.data);
2571 OV_LEN(pagep) = OV_LEN(argp->hdr.data);
2572 bp = (u_int8_t *)pagep + P_OVERHEAD(file_dbp);
2573 memcpy(bp, argp->data.data, argp->data.size);
2574 } else {
2575 bp = (u_int8_t *)pagep +
2576 (db_indx_t)(HOFFSET(pagep) - argp->data.size);
2577 memcpy(bp, argp->data.data, argp->data.size);
2578
2579 if (argp->pg_copy)
2580 memcpy(pagep, argp->hdr.data, argp->hdr.size);
2581 else {
2582 /* Copy index table. */
2583 pinp = P_INP(file_dbp, pagep) + NUM_ENT(pagep);
2584 ninp = P_INP(file_dbp, argp->hdr.data);
2585 for (i = 0; i < NUM_ENT(argp->hdr.data); i++)
2586 *pinp++ = *ninp++;
2587 HOFFSET(pagep) -= argp->data.size;
2588 NUM_ENT(pagep) += i;
2589 }
2590 }
2591 pagep->lsn = argp->nlsn;
2592 if (op == DB_TXN_ABORT) {
2593 /*
2594 * If we are undoing a meta/root page move we must
2595 * bump the revision number. Put the handle
2596 * locks back to their original state if we
2597 * moved the metadata page.
2598 */
2599 i = 0;
2600 if (file_dbp->type == DB_HASH) {
2601 ht = file_dbp->h_internal;
2602 if (argp->pgno == ht->meta_pgno) {
2603 ht->meta_pgno = argp->npgno;
2604 file_dbp->mpf->mfp->revision++;
2605 i = 1;
2606 }
2607 } else {
2608 bt = file_dbp->bt_internal;
2609 if (argp->pgno == bt->bt_meta) {
2610 file_dbp->mpf->mfp->revision++;
2611 bt->bt_meta = argp->npgno;
2612 i = 1;
2613 } else if (argp->pgno == bt->bt_root) {
2614 file_dbp->mpf->mfp->revision++;
2615 bt->bt_root = argp->npgno;
2616 }
2617 }
2618 if (argp->pgno == file_dbp->meta_pgno)
2619 file_dbp->meta_pgno = argp->npgno;
2620
2621 /*
2622 * If we detected a metadata page above, move
2623 * the handle locks to the new page.
2624 */
2625 if (i == 1) {
2626 handle_lock = file_dbp->handle_lock;
2627 if ((ret = __fop_lock_handle(file_dbp->env,
2628 file_dbp, file_dbp->locker, DB_LOCK_READ,
2629 NULL, 0)) != 0)
2630 goto out;
2631
2632 /* Move the other handles to the new lock. */
2633 if ((ret = __lock_change(file_dbp->env,
2634 &handle_lock, &file_dbp->handle_lock)) != 0)
2635 goto out;
2636 }
2637 }
2638 }
2639
2640 if ((ret = __memp_fput(mpf,
2641 ip, pagep, dbc->priority)) != 0)
2642 goto out;
2643 done:
2644 *lsnp = argp->prev_lsn;
2645 ret = 0;
2646
2647 out: REC_CLOSE;
2648 }
2649
2650 /*
2651 * __db_pgno_recover --
2652 * Recovery function for page number replacment.
2653 *
2654 * PUBLIC: int __db_pgno_recover
2655 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
2656 */
2657 int
__db_pgno_recover(env,dbtp,lsnp,op,info)2658 __db_pgno_recover(env, dbtp, lsnp, op, info)
2659 ENV *env;
2660 DBT *dbtp;
2661 DB_LSN *lsnp;
2662 db_recops op;
2663 void *info;
2664 {
2665 BINTERNAL *bi;
2666 __db_pgno_args *argp;
2667 DB_THREAD_INFO *ip;
2668 DB *file_dbp;
2669 DBC *dbc;
2670 DB_MPOOLFILE *mpf;
2671 PAGE *pagep, *npagep;
2672 db_pgno_t pgno, *pgnop;
2673 int cmp_n, cmp_p, ret;
2674
2675 ip = ((DB_TXNHEAD *)info)->thread_info;
2676 REC_PRINT(__db_pgno_print);
2677 REC_INTRO(__db_pgno_read, ip, 0);
2678
2679 REC_FGET(mpf, ip, argp->pgno, &pagep, done);
2680
2681 cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
2682 cmp_p = LOG_COMPARE(&LSN(pagep), &argp->lsn);
2683 CHECK_LSN(file_dbp->env, op, cmp_p, &LSN(pagep), &argp->lsn);
2684 CHECK_ABORT(file_dbp->env, op, cmp_n, &LSN(pagep), lsnp);
2685
2686 if ((cmp_p == 0 && DB_REDO(op)) || (cmp_n == 0 && !DB_REDO(op))) {
2687 switch (TYPE(pagep)) {
2688 case P_IBTREE:
2689 /*
2690 * An internal record can have both a overflow
2691 * and child pointer. Fetch the page to see
2692 * which it is.
2693 */
2694 bi = GET_BINTERNAL(file_dbp, pagep, argp->indx);
2695 if (B_TYPE(bi->type) == B_OVERFLOW) {
2696 REC_FGET(mpf, ip, argp->npgno, &npagep, out);
2697
2698 if (TYPE(npagep) == P_OVERFLOW)
2699 pgnop =
2700 &((BOVERFLOW *)(bi->data))->pgno;
2701 else
2702 pgnop = &bi->pgno;
2703 if ((ret = __memp_fput(mpf, ip,
2704 npagep, file_dbp->priority)) != 0)
2705 goto out;
2706 break;
2707 }
2708 pgnop = &bi->pgno;
2709 break;
2710 case P_IRECNO:
2711 pgnop =
2712 &GET_RINTERNAL(file_dbp, pagep, argp->indx)->pgno;
2713 break;
2714 case P_HASH:
2715 pgnop = &pgno;
2716 break;
2717 default:
2718 pgnop =
2719 &GET_BOVERFLOW(file_dbp, pagep, argp->indx)->pgno;
2720 break;
2721 }
2722
2723 if (DB_REDO(op)) {
2724 /* Need to redo update described. */
2725 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
2726 *pgnop = argp->npgno;
2727 pagep->lsn = *lsnp;
2728 } else {
2729 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
2730 *pgnop = argp->opgno;
2731 pagep->lsn = argp->lsn;
2732 }
2733 if (TYPE(pagep) == P_HASH)
2734 memcpy(HOFFDUP_PGNO(P_ENTRY(file_dbp,
2735 pagep, argp->indx)), pgnop, sizeof(db_pgno_t));
2736 }
2737
2738 if ((ret = __memp_fput(mpf, ip, pagep, file_dbp->priority)) != 0)
2739 goto out;
2740
2741 done:
2742 *lsnp = argp->prev_lsn;
2743 ret = 0;
2744
2745 out: REC_CLOSE;
2746 }
2747
2748 /*
2749 * __db_pglist_swap -- swap a list of freelist pages.
2750 * PUBLIC: void __db_pglist_swap __P((u_int32_t, void *));
2751 */
2752 void
__db_pglist_swap(size,list)2753 __db_pglist_swap(size, list)
2754 u_int32_t size;
2755 void *list;
2756 {
2757 db_pglist_t *lp;
2758 u_int32_t nelem;
2759
2760 nelem = size / sizeof(db_pglist_t);
2761
2762 lp = (db_pglist_t *)list;
2763 while (nelem-- > 0) {
2764 P_32_SWAP(&lp->pgno);
2765 P_32_SWAP(&lp->lsn.file);
2766 P_32_SWAP(&lp->lsn.offset);
2767 lp++;
2768 }
2769 }
2770
2771 /*
2772 * __db_pglist_print -- print a list of freelist pages.
2773 * PUBLIC: void __db_pglist_print __P((ENV *, DB_MSGBUF *, DBT *));
2774 */
2775 void
__db_pglist_print(env,mbp,list)2776 __db_pglist_print(env, mbp, list)
2777 ENV *env;
2778 DB_MSGBUF *mbp;
2779 DBT *list;
2780 {
2781 db_pglist_t *lp;
2782 u_int32_t nelem;
2783
2784 nelem = list->size / sizeof(db_pglist_t);
2785 lp = (db_pglist_t *)list->data;
2786 __db_msgadd(env, mbp, "\t");
2787 while (nelem-- > 0) {
2788 __db_msgadd(env, mbp, "%lu [%lu][%lu]", (u_long)lp->pgno,
2789 (u_long)lp->lsn.file, (u_long)lp->lsn.offset);
2790 if (nelem % 4 == 0)
2791 __db_msgadd(env, mbp, "\n\t");
2792 else
2793 __db_msgadd(env, mbp, " ");
2794 lp++;
2795 }
2796 }
2797