1 /*-
2 * Copyright (c) 1996, 2020 Oracle and/or its affiliates. All rights reserved.
3 *
4 * See the file LICENSE for license information.
5 *
6 * $Id$
7 */
8
9 #include "db_config.h"
10
11 #include "db_int.h"
12 #include "dbinc/db_page.h"
13 #include "dbinc/log.h"
14 #include "dbinc/mp.h"
15 #include "dbinc/lock.h"
16 #include "dbinc/fop.h"
17 #include "dbinc/btree.h"
18 #include "dbinc/hash.h"
19
20 static int __db_pg_free_recover_int
21 __P((ENV *, DB_TXNHEAD *, __db_pg_freedata_args *,
22 DB *, DB_LSN *, DB_MPOOLFILE *, db_recops, int));
23
24 /*
25 * PUBLIC: int __db_addrem_recover
26 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
27 *
28 * This log message is generated whenever we add or remove a duplicate
29 * to/from a duplicate page. On recover, we just do the opposite.
30 */
31 int
__db_addrem_recover(env,dbtp,lsnp,op,info)32 __db_addrem_recover(env, dbtp, lsnp, op, info)
33 ENV *env;
34 DBT *dbtp;
35 DB_LSN *lsnp;
36 db_recops op;
37 void *info;
38 {
39 __db_addrem_args *argp;
40 DB *file_dbp;
41 DBC *dbc;
42 DB_MPOOLFILE *mpf;
43 DB_THREAD_INFO *ip;
44 DB_TXNHEAD *txnhead;
45 PAGE *pagep;
46 int cmp_n, cmp_p, modified, ret;
47 u_int32_t opcode;
48
49 txnhead = info;
50 ip = txnhead->thread_info;
51 pagep = NULL;
52 REC_PRINT(__db_addrem_print);
53 REC_INTRO(__db_addrem_read, txnhead, 1);
54
55 REC_FGET(mpf, txnhead, argp->pgno, &pagep, done);
56 modified = 0;
57
58 opcode = OP_MODE_GET(argp->opcode);
59 cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
60 cmp_p = LOG_COMPARE(&LSN(pagep), &argp->pagelsn);
61 CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->pagelsn);
62 CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp);
63 if ((cmp_p == 0 && DB_REDO(op) && opcode == DB_ADD_DUP) ||
64 (cmp_n == 0 && DB_UNDO(op) && opcode == DB_REM_DUP)) {
65 /* Need to redo an add, or undo a delete. */
66 REC_DIRTY(mpf, txnhead, dbc->priority, &pagep);
67 if ((ret = __db_pitem(dbc, pagep, argp->indx, argp->nbytes,
68 argp->hdr.size == 0 ? NULL : &argp->hdr,
69 argp->dbt.size == 0 ? NULL : &argp->dbt)) != 0)
70 goto out;
71 modified = 1;
72
73 } else if ((cmp_n == 0 && DB_UNDO(op) && opcode == DB_ADD_DUP) ||
74 (cmp_p == 0 && DB_REDO(op) && opcode == DB_REM_DUP)) {
75 /* Need to undo an add, or redo a delete. */
76 REC_DIRTY(mpf, txnhead, dbc->priority, &pagep);
77 if ((ret = __db_ditem(dbc,
78 pagep, argp->indx, argp->nbytes)) != 0)
79 goto out;
80 modified = 1;
81 }
82
83 if (modified) {
84 if (DB_REDO(op))
85 LSN(pagep) = *lsnp;
86 else
87 LSN(pagep) = argp->pagelsn;
88 }
89
90 if ((ret = __memp_fput(mpf, ip, pagep, dbc->priority)) != 0)
91 goto out;
92 pagep = NULL;
93
94 done: *lsnp = argp->prev_lsn;
95 ret = 0;
96
97 out: if (pagep != NULL)
98 (void)__memp_fput(mpf, ip, pagep, dbc->priority);
99 REC_CLOSE;
100 }
101
102 /*
103 * PUBLIC: int __db_addrem_42_recover
104 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
105 *
106 * This log message is generated whenever we add or remove a duplicate
107 * to/from a duplicate page. On recover, we just do the opposite.
108 */
109 int
__db_addrem_42_recover(env,dbtp,lsnp,op,info)110 __db_addrem_42_recover(env, dbtp, lsnp, op, info)
111 ENV *env;
112 DBT *dbtp;
113 DB_LSN *lsnp;
114 db_recops op;
115 void *info;
116 {
117 __db_addrem_42_args *argp;
118 DB *file_dbp;
119 DBC *dbc;
120 DB_MPOOLFILE *mpf;
121 DB_THREAD_INFO *ip;
122 DB_TXNHEAD *txnhead;
123 PAGE *pagep;
124 int cmp_n, cmp_p, modified, ret;
125
126 txnhead = info;
127 ip = txnhead->thread_info;
128 pagep = NULL;
129 REC_PRINT(__db_addrem_print);
130 REC_INTRO(__db_addrem_42_read, txnhead, 1);
131
132 REC_FGET(mpf, txnhead, argp->pgno, &pagep, done);
133 modified = 0;
134
135 cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
136 cmp_p = LOG_COMPARE(&LSN(pagep), &argp->pagelsn);
137 CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->pagelsn);
138 CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp);
139 if ((cmp_p == 0 && DB_REDO(op) && argp->opcode == DB_ADD_DUP) ||
140 (cmp_n == 0 && DB_UNDO(op) && argp->opcode == DB_REM_DUP)) {
141 /* Need to redo an add, or undo a delete. */
142 REC_DIRTY(mpf, txnhead, dbc->priority, &pagep);
143 if ((ret = __db_pitem(dbc, pagep, argp->indx, argp->nbytes,
144 argp->hdr.size == 0 ? NULL : &argp->hdr,
145 argp->dbt.size == 0 ? NULL : &argp->dbt)) != 0)
146 goto out;
147 modified = 1;
148
149 } else if ((cmp_n == 0 && DB_UNDO(op) && argp->opcode == DB_ADD_DUP) ||
150 (cmp_p == 0 && DB_REDO(op) && argp->opcode == DB_REM_DUP)) {
151 /* Need to undo an add, or redo a delete. */
152 REC_DIRTY(mpf, txnhead, dbc->priority, &pagep);
153 if ((ret = __db_ditem(dbc,
154 pagep, argp->indx, argp->nbytes)) != 0)
155 goto out;
156 modified = 1;
157 }
158
159 if (modified) {
160 if (DB_REDO(op))
161 LSN(pagep) = *lsnp;
162 else
163 LSN(pagep) = argp->pagelsn;
164 }
165
166 if ((ret = __memp_fput(mpf, ip, pagep, dbc->priority)) != 0)
167 goto out;
168 pagep = NULL;
169
170 done: *lsnp = argp->prev_lsn;
171 ret = 0;
172
173 out: if (pagep != NULL)
174 (void)__memp_fput(mpf, ip, pagep, dbc->priority);
175 REC_CLOSE;
176 }
177
178 /*
179 * PUBLIC: int __db_big_recover
180 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
181 */
182 int
__db_big_recover(env,dbtp,lsnp,op,info)183 __db_big_recover(env, dbtp, lsnp, op, info)
184 ENV *env;
185 DBT *dbtp;
186 DB_LSN *lsnp;
187 db_recops op;
188 void *info;
189 {
190 __db_big_args *argp;
191 DB *file_dbp;
192 DBC *dbc;
193 DB_MPOOLFILE *mpf;
194 DB_THREAD_INFO *ip;
195 DB_TXNHEAD *txnhead;
196 PAGE *pagep;
197 int cmp_n, cmp_p, modified, ret;
198 u_int32_t opcode;
199
200 txnhead = info;
201 ip = txnhead->thread_info;
202 pagep = NULL;
203 REC_PRINT(__db_big_print);
204 REC_INTRO(__db_big_read, txnhead, 0);
205
206 opcode = OP_MODE_GET(argp->opcode);
207 REC_FGET(mpf, txnhead, argp->pgno, &pagep, ppage);
208 modified = 0;
209
210 /*
211 * There are three pages we need to check. The one on which we are
212 * adding data, the previous one whose next_pointer may have
213 * been updated, and the next one whose prev_pointer may have
214 * been updated.
215 */
216 cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
217 cmp_p = LOG_COMPARE(&LSN(pagep), &argp->pagelsn);
218 CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->pagelsn);
219 CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp);
220 if ((cmp_p == 0 && DB_REDO(op) && opcode == DB_ADD_BIG) ||
221 (cmp_n == 0 && DB_UNDO(op) && opcode == DB_REM_BIG)) {
222 /* We are either redo-ing an add, or undoing a delete. */
223 REC_DIRTY(mpf, txnhead, file_dbp->priority, &pagep);
224 P_INIT(pagep, file_dbp->pgsize, argp->pgno, argp->prev_pgno,
225 argp->next_pgno, 0, P_OVERFLOW);
226 OV_LEN(pagep) = argp->dbt.size;
227 OV_REF(pagep) = 1;
228 memcpy((u_int8_t *)pagep + P_OVERHEAD(file_dbp), argp->dbt.data,
229 argp->dbt.size);
230 PREV_PGNO(pagep) = argp->prev_pgno;
231 modified = 1;
232 } else if ((cmp_n == 0 && DB_UNDO(op) && opcode == DB_ADD_BIG) ||
233 (cmp_p == 0 && DB_REDO(op) && opcode == DB_REM_BIG)) {
234 /*
235 * We are either undo-ing an add or redo-ing a delete.
236 * The page is about to be reclaimed in either case, so
237 * there really isn't anything to do here.
238 */
239 REC_DIRTY(mpf, txnhead, file_dbp->priority, &pagep);
240 modified = 1;
241 } else if (cmp_p == 0 && DB_REDO(op) && opcode == DB_APPEND_BIG) {
242 /* We are redoing an append. */
243 REC_DIRTY(mpf, txnhead, file_dbp->priority, &pagep);
244 memcpy((u_int8_t *)pagep + P_OVERHEAD(file_dbp) +
245 OV_LEN(pagep), argp->dbt.data, argp->dbt.size);
246 OV_LEN(pagep) += argp->dbt.size;
247 modified = 1;
248 } else if (cmp_n == 0 && DB_UNDO(op) && opcode == DB_APPEND_BIG) {
249 /* We are undoing an append. */
250 REC_DIRTY(mpf, txnhead, file_dbp->priority, &pagep);
251 OV_LEN(pagep) -= argp->dbt.size;
252 memset((u_int8_t *)pagep + P_OVERHEAD(file_dbp) +
253 OV_LEN(pagep), 0, argp->dbt.size);
254 modified = 1;
255 }
256 if (modified)
257 LSN(pagep) = DB_REDO(op) ? *lsnp : argp->pagelsn;
258
259 ret = __memp_fput(mpf, ip, pagep, file_dbp->priority);
260 pagep = NULL;
261 if (ret != 0)
262 goto out;
263
264 /*
265 * We only delete a whole chain of overflow items, and appends only
266 * apply to a single page. Adding a page is the only case that
267 * needs to update the chain.
268 */
269 ppage: if (opcode != DB_ADD_BIG)
270 goto done;
271
272 /* Now check the previous page. */
273 if (argp->prev_pgno != PGNO_INVALID) {
274 REC_FGET(mpf, txnhead, argp->prev_pgno, &pagep, npage);
275 modified = 0;
276
277 cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
278 cmp_p = LOG_COMPARE(&LSN(pagep), &argp->prevlsn);
279 CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->prevlsn);
280 CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp);
281
282 if (cmp_p == 0 && DB_REDO(op) && opcode == DB_ADD_BIG) {
283 /* Redo add, undo delete. */
284 REC_DIRTY(mpf, txnhead, file_dbp->priority, &pagep);
285 NEXT_PGNO(pagep) = argp->pgno;
286 modified = 1;
287 } else if (cmp_n == 0 &&
288 DB_UNDO(op) && opcode == DB_ADD_BIG) {
289 /* Redo delete, undo add. */
290 REC_DIRTY(mpf, txnhead, file_dbp->priority, &pagep);
291 NEXT_PGNO(pagep) = argp->next_pgno;
292 modified = 1;
293 }
294 if (modified)
295 LSN(pagep) = DB_REDO(op) ? *lsnp : argp->prevlsn;
296 ret = __memp_fput(mpf, ip, pagep, file_dbp->priority);
297 pagep = NULL;
298 if (ret != 0)
299 goto out;
300 }
301 pagep = NULL;
302
303 /* Now check the next page. Can only be set on a delete. */
304 npage: if (argp->next_pgno != PGNO_INVALID) {
305 REC_FGET(mpf, txnhead, argp->next_pgno, &pagep, done);
306 modified = 0;
307
308 cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
309 cmp_p = LOG_COMPARE(&LSN(pagep), &argp->nextlsn);
310 CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->nextlsn);
311 CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp);
312 if (cmp_p == 0 && DB_REDO(op)) {
313 REC_DIRTY(mpf, txnhead, file_dbp->priority, &pagep);
314 PREV_PGNO(pagep) = PGNO_INVALID;
315 modified = 1;
316 } else if (cmp_n == 0 && DB_UNDO(op)) {
317 REC_DIRTY(mpf, txnhead, file_dbp->priority, &pagep);
318 PREV_PGNO(pagep) = argp->pgno;
319 modified = 1;
320 }
321 if (modified)
322 LSN(pagep) = DB_REDO(op) ? *lsnp : argp->nextlsn;
323 ret = __memp_fput(mpf, ip, pagep, file_dbp->priority);
324 pagep = NULL;
325 if (ret != 0)
326 goto out;
327 }
328 pagep = NULL;
329
330 done: *lsnp = argp->prev_lsn;
331 ret = 0;
332
333 out: if (pagep != NULL)
334 (void)__memp_fput(mpf, ip, pagep, file_dbp->priority);
335 REC_CLOSE;
336 }
337
338 /*
339 * PUBLIC: int __db_big_42_recover
340 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
341 */
342 int
__db_big_42_recover(env,dbtp,lsnp,op,info)343 __db_big_42_recover(env, dbtp, lsnp, op, info)
344 ENV *env;
345 DBT *dbtp;
346 DB_LSN *lsnp;
347 db_recops op;
348 void *info;
349 {
350 __db_big_42_args *argp;
351 DB *file_dbp;
352 DBC *dbc;
353 DB_MPOOLFILE *mpf;
354 DB_THREAD_INFO *ip;
355 DB_TXNHEAD *txnhead;
356 PAGE *pagep;
357 int cmp_n, cmp_p, modified, ret;
358
359 txnhead = info;
360 ip = txnhead->thread_info;
361 pagep = NULL;
362 REC_PRINT(__db_big_print);
363 REC_INTRO(__db_big_42_read, txnhead, 0);
364
365 REC_FGET(mpf, txnhead, argp->pgno, &pagep, ppage);
366 modified = 0;
367
368 /*
369 * There are three pages we need to check. The one on which we are
370 * adding data, the previous one whose next_pointer may have
371 * been updated, and the next one whose prev_pointer may have
372 * been updated.
373 */
374 cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
375 cmp_p = LOG_COMPARE(&LSN(pagep), &argp->pagelsn);
376 CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->pagelsn);
377 CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp);
378 if ((cmp_p == 0 && DB_REDO(op) && argp->opcode == DB_ADD_BIG) ||
379 (cmp_n == 0 && DB_UNDO(op) && argp->opcode == DB_REM_BIG)) {
380 /* We are either redo-ing an add, or undoing a delete. */
381 REC_DIRTY(mpf, txnhead, file_dbp->priority, &pagep);
382 P_INIT(pagep, file_dbp->pgsize, argp->pgno, argp->prev_pgno,
383 argp->next_pgno, 0, P_OVERFLOW);
384 OV_LEN(pagep) = argp->dbt.size;
385 OV_REF(pagep) = 1;
386 memcpy((u_int8_t *)pagep + P_OVERHEAD(file_dbp), argp->dbt.data,
387 argp->dbt.size);
388 PREV_PGNO(pagep) = argp->prev_pgno;
389 modified = 1;
390 } else if ((cmp_n == 0 && DB_UNDO(op) && argp->opcode == DB_ADD_BIG) ||
391 (cmp_p == 0 && DB_REDO(op) && argp->opcode == DB_REM_BIG)) {
392 /*
393 * We are either undo-ing an add or redo-ing a delete.
394 * The page is about to be reclaimed in either case, so
395 * there really isn't anything to do here.
396 */
397 REC_DIRTY(mpf, txnhead, file_dbp->priority, &pagep);
398 modified = 1;
399 } else if (cmp_p == 0 && DB_REDO(op) && argp->opcode == DB_APPEND_BIG) {
400 /* We are redoing an append. */
401 REC_DIRTY(mpf, txnhead, file_dbp->priority, &pagep);
402 memcpy((u_int8_t *)pagep + P_OVERHEAD(file_dbp) +
403 OV_LEN(pagep), argp->dbt.data, argp->dbt.size);
404 OV_LEN(pagep) += argp->dbt.size;
405 modified = 1;
406 } else if (cmp_n == 0 && DB_UNDO(op) && argp->opcode == DB_APPEND_BIG) {
407 /* We are undoing an append. */
408 REC_DIRTY(mpf, txnhead, file_dbp->priority, &pagep);
409 OV_LEN(pagep) -= argp->dbt.size;
410 memset((u_int8_t *)pagep + P_OVERHEAD(file_dbp) +
411 OV_LEN(pagep), 0, argp->dbt.size);
412 modified = 1;
413 }
414 if (modified)
415 LSN(pagep) = DB_REDO(op) ? *lsnp : argp->pagelsn;
416
417 ret = __memp_fput(mpf, ip, pagep, file_dbp->priority);
418 pagep = NULL;
419 if (ret != 0)
420 goto out;
421
422 /*
423 * We only delete a whole chain of overflow items, and appends only
424 * apply to a single page. Adding a page is the only case that
425 * needs to update the chain.
426 */
427 ppage: if (argp->opcode != DB_ADD_BIG)
428 goto done;
429
430 /* Now check the previous page. */
431 if (argp->prev_pgno != PGNO_INVALID) {
432 REC_FGET(mpf, txnhead, argp->prev_pgno, &pagep, npage);
433 modified = 0;
434
435 cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
436 cmp_p = LOG_COMPARE(&LSN(pagep), &argp->prevlsn);
437 CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->prevlsn);
438 CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp);
439
440 if (cmp_p == 0 && DB_REDO(op) && argp->opcode == DB_ADD_BIG) {
441 /* Redo add, undo delete. */
442 REC_DIRTY(mpf, txnhead, file_dbp->priority, &pagep);
443 NEXT_PGNO(pagep) = argp->pgno;
444 modified = 1;
445 } else if (cmp_n == 0 &&
446 DB_UNDO(op) && argp->opcode == DB_ADD_BIG) {
447 /* Redo delete, undo add. */
448 REC_DIRTY(mpf, txnhead, file_dbp->priority, &pagep);
449 NEXT_PGNO(pagep) = argp->next_pgno;
450 modified = 1;
451 }
452 if (modified)
453 LSN(pagep) = DB_REDO(op) ? *lsnp : argp->prevlsn;
454 ret = __memp_fput(mpf, ip, pagep, file_dbp->priority);
455 pagep = NULL;
456 if (ret != 0)
457 goto out;
458 }
459 pagep = NULL;
460
461 /* Now check the next page. Can only be set on a delete. */
462 npage: if (argp->next_pgno != PGNO_INVALID) {
463 REC_FGET(mpf, txnhead, argp->next_pgno, &pagep, done);
464 modified = 0;
465
466 cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
467 cmp_p = LOG_COMPARE(&LSN(pagep), &argp->nextlsn);
468 CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->nextlsn);
469 CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp);
470 if (cmp_p == 0 && DB_REDO(op)) {
471 REC_DIRTY(mpf, txnhead, file_dbp->priority, &pagep);
472 PREV_PGNO(pagep) = PGNO_INVALID;
473 modified = 1;
474 } else if (cmp_n == 0 && DB_UNDO(op)) {
475 REC_DIRTY(mpf, txnhead, file_dbp->priority, &pagep);
476 PREV_PGNO(pagep) = argp->pgno;
477 modified = 1;
478 }
479 if (modified)
480 LSN(pagep) = DB_REDO(op) ? *lsnp : argp->nextlsn;
481 ret = __memp_fput(mpf, ip, pagep, file_dbp->priority);
482 pagep = NULL;
483 if (ret != 0)
484 goto out;
485 }
486 pagep = NULL;
487
488 done: *lsnp = argp->prev_lsn;
489 ret = 0;
490
491 out: if (pagep != NULL)
492 (void)__memp_fput(mpf, ip, pagep, file_dbp->priority);
493 REC_CLOSE;
494 }
495 /*
496 * __db_ovref_recover --
497 * Recovery function for __db_ovref().
498 *
499 * PUBLIC: int __db_ovref_recover
500 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
501 */
502 int
__db_ovref_recover(env,dbtp,lsnp,op,info)503 __db_ovref_recover(env, dbtp, lsnp, op, info)
504 ENV *env;
505 DBT *dbtp;
506 DB_LSN *lsnp;
507 db_recops op;
508 void *info;
509 {
510 __db_ovref_args *argp;
511 DB *file_dbp;
512 DBC *dbc;
513 DB_MPOOLFILE *mpf;
514 DB_THREAD_INFO *ip;
515 DB_TXNHEAD *txnhead;
516 PAGE *pagep;
517 int cmp, ret;
518
519 txnhead = info;
520 ip = txnhead->thread_info;
521 pagep = NULL;
522 REC_PRINT(__db_ovref_print);
523 REC_INTRO(__db_ovref_read, txnhead, 0);
524
525 REC_FGET(mpf, txnhead, argp->pgno, &pagep, done);
526
527 cmp = LOG_COMPARE(&LSN(pagep), &argp->lsn);
528 CHECK_LSN(env, op, cmp, &LSN(pagep), &argp->lsn);
529 if (cmp == 0 && DB_REDO(op)) {
530 /* Need to redo update described. */
531 REC_DIRTY(mpf, txnhead, file_dbp->priority, &pagep);
532 OV_REF(pagep) += argp->adjust;
533 pagep->lsn = *lsnp;
534 } else if (LOG_COMPARE(lsnp, &LSN(pagep)) == 0 && DB_UNDO(op)) {
535 /* Need to undo update described. */
536 REC_DIRTY(mpf, txnhead, file_dbp->priority, &pagep);
537 OV_REF(pagep) -= argp->adjust;
538 pagep->lsn = argp->lsn;
539 }
540 ret = __memp_fput(mpf, ip, pagep, file_dbp->priority);
541 pagep = NULL;
542 if (ret != 0)
543 goto out;
544 pagep = NULL;
545
546 done: *lsnp = argp->prev_lsn;
547 ret = 0;
548
549 out: if (pagep != NULL)
550 (void)__memp_fput(mpf, ip, pagep, file_dbp->priority);
551 REC_CLOSE;
552 }
553
554 /*
555 * __db_debug_recover --
556 * Recovery function for debug.
557 *
558 * PUBLIC: int __db_debug_recover __P((ENV *,
559 * PUBLIC: DBT *, DB_LSN *, db_recops, void *));
560 */
561 int
__db_debug_recover(env,dbtp,lsnp,op,info)562 __db_debug_recover(env, dbtp, lsnp, op, info)
563 ENV *env;
564 DBT *dbtp;
565 DB_LSN *lsnp;
566 db_recops op;
567 void *info;
568 {
569 __db_debug_args *argp;
570 int ret;
571
572 REC_PRINT(__db_debug_print);
573 REC_NOOP_INTRO(__db_debug_read);
574
575 *lsnp = argp->prev_lsn;
576 ret = 0;
577
578 COMPQUIET(op, DB_TXN_ABORT);
579 COMPQUIET(info, NULL);
580 REC_NOOP_CLOSE;
581 }
582
583 /*
584 * __db_noop_recover --
585 * Recovery function for noop.
586 *
587 * PUBLIC: int __db_noop_recover __P((ENV *,
588 * PUBLIC: DBT *, DB_LSN *, db_recops, void *));
589 */
590 int
__db_noop_recover(env,dbtp,lsnp,op,info)591 __db_noop_recover(env, dbtp, lsnp, op, info)
592 ENV *env;
593 DBT *dbtp;
594 DB_LSN *lsnp;
595 db_recops op;
596 void *info;
597 {
598 __db_noop_args *argp;
599 DB *file_dbp;
600 DBC *dbc;
601 DB_MPOOLFILE *mpf;
602 DB_THREAD_INFO *ip;
603 DB_TXNHEAD *txnhead;
604 PAGE *pagep;
605 int cmp_n, cmp_p, ret;
606
607 txnhead = info;
608 ip = txnhead->thread_info;
609 pagep = NULL;
610 REC_PRINT(__db_noop_print);
611 REC_INTRO(__db_noop_read, txnhead, 0);
612
613 REC_FGET(mpf, txnhead, argp->pgno, &pagep, done);
614
615 cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
616 cmp_p = LOG_COMPARE(&LSN(pagep), &argp->prevlsn);
617 CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->prevlsn);
618 CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp);
619 if (cmp_p == 0 && DB_REDO(op)) {
620 REC_DIRTY(mpf, txnhead, file_dbp->priority, &pagep);
621 LSN(pagep) = *lsnp;
622 } else if (cmp_n == 0 && DB_UNDO(op)) {
623 REC_DIRTY(mpf, txnhead, file_dbp->priority, &pagep);
624 LSN(pagep) = argp->prevlsn;
625 }
626 ret = __memp_fput(mpf, ip, pagep, file_dbp->priority);
627 pagep = NULL;
628
629 done: *lsnp = argp->prev_lsn;
630 out: if (pagep != NULL)
631 (void)__memp_fput(mpf,
632 ip, pagep, file_dbp->priority);
633 REC_CLOSE;
634 }
635
636 /*
637 * __db_pg_alloc_recover --
638 * Recovery function for pg_alloc.
639 *
640 * PUBLIC: int __db_pg_alloc_recover
641 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
642 */
643 int
__db_pg_alloc_recover(env,dbtp,lsnp,op,info)644 __db_pg_alloc_recover(env, dbtp, lsnp, op, info)
645 ENV *env;
646 DBT *dbtp;
647 DB_LSN *lsnp;
648 db_recops op;
649 void *info;
650 {
651 __db_pg_alloc_args *argp;
652 DB *file_dbp;
653 DBC *dbc;
654 DBMETA *meta;
655 DB_MPOOLFILE *mpf;
656 DB_THREAD_INFO *ip;
657 DB_TXN *txn;
658 DB_TXNHEAD *txnhead;
659 PAGE *pagep;
660 db_pgno_t pgno;
661 int cmp_n, cmp_p, created, level, ret;
662
663 txnhead = info;
664 ip = txnhead->thread_info;
665 txn = txnhead->txn;
666 meta = NULL;
667 pagep = NULL;
668 created = 0;
669 REC_PRINT(__db_pg_alloc_print);
670 REC_INTRO(__db_pg_alloc_read, txnhead, 0);
671
672 /*
673 * Fix up the metadata page. If we're redoing the operation, we have
674 * to get the metadata page and update its LSN and its free pointer.
675 * If we're undoing the operation and the page was ever created, we put
676 * it on the freelist.
677 */
678 pgno = PGNO_BASE_MD;
679 if ((ret = __memp_fget(mpf, &pgno, ip, txn, 0, &meta)) != 0) {
680 /* The metadata page must always exist on redo. */
681 if (DB_REDO(op)) {
682 ret = __db_pgerr(file_dbp, pgno, ret);
683 goto out;
684 } else
685 goto done;
686 }
687 cmp_n = LOG_COMPARE(lsnp, &LSN(meta));
688 cmp_p = LOG_COMPARE(&LSN(meta), &argp->meta_lsn);
689 CHECK_LSN(env, op, cmp_p, &LSN(meta), &argp->meta_lsn);
690 CHECK_ABORT(env, op, cmp_n, &LSN(meta), lsnp);
691 if (cmp_p == 0 && DB_REDO(op)) {
692 /* Need to redo update described. */
693 REC_DIRTY(mpf, txnhead, file_dbp->priority, &meta);
694 LSN(meta) = *lsnp;
695 meta->free = argp->next;
696 if (argp->pgno > meta->last_pgno)
697 meta->last_pgno = argp->pgno;
698 } else if (cmp_n == 0 && DB_UNDO(op)) {
699 /* Need to undo update described. */
700 REC_DIRTY(mpf, txnhead, file_dbp->priority, &meta);
701 LSN(meta) = argp->meta_lsn;
702 /*
703 * If the page has a zero LSN then its newly created and
704 * will be truncated rather than go on the free list.
705 */
706 if (!IS_ZERO_LSN(argp->page_lsn))
707 meta->free = argp->pgno;
708 meta->last_pgno = argp->last_pgno;
709 }
710
711 #ifdef HAVE_FTRUNCATE
712 /*
713 * check to see if we are keeping a sorted freelist, if so put
714 * this back in the in memory list. It must be the first element.
715 */
716 if (op == DB_TXN_ABORT && !IS_ZERO_LSN(argp->page_lsn)) {
717 db_pgno_t *list;
718 u_int32_t nelem;
719
720 if ((ret = __memp_get_freelist(mpf, &nelem, &list)) != 0)
721 goto out;
722 if (list != NULL && (nelem == 0 || *list != argp->pgno)) {
723 if ((ret =
724 __memp_extend_freelist(mpf, nelem + 1, &list)) != 0)
725 goto out;
726 if (nelem != 0)
727 memmove(list + 1, list, nelem * sizeof(*list));
728 *list = argp->pgno;
729 }
730 }
731 #endif
732
733 /*
734 * Fix up the allocated page. If the page does not exist
735 * and we can truncate it then don't create it.
736 * Otherwise if we're redoing the operation, we have
737 * to get the page (creating it if it doesn't exist), and update its
738 * LSN. If we're undoing the operation, we have to reset the page's
739 * LSN and put it on the free list.
740 */
741 if ((ret = __memp_fget(mpf, &argp->pgno, ip, txn, 0, &pagep)) != 0) {
742 /*
743 * We have to be able to identify if a page was newly
744 * created so we can recover it properly. We cannot simply
745 * look for an empty header, because hash uses a pgin
746 * function that will set the header. Instead, we explicitly
747 * try for the page without CREATE and if that fails, then
748 * create it.
749 */
750 if (DB_UNDO(op))
751 goto do_truncate;
752 if ((ret = __memp_fget(mpf, &argp->pgno, ip, txn,
753 DB_MPOOL_CREATE, &pagep)) != 0) {
754 if (DB_UNDO(op) && ret == ENOSPC)
755 goto do_truncate;
756 ret = __db_pgerr(file_dbp, argp->pgno, ret);
757 goto out;
758 }
759 created = 1;
760 }
761
762 /* Fix up the allocated page. */
763 cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
764 cmp_p = LOG_COMPARE(&LSN(pagep), &argp->page_lsn);
765
766 /*
767 * If an initial allocation is aborted and then reallocated during
768 * an archival restore the log record will have an LSN for the page
769 * but the page will be empty.
770 */
771 if (IS_ZERO_LSN(LSN(pagep)))
772 cmp_p = 0;
773
774 CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->page_lsn);
775 /*
776 * Another special case we have to handle is if we ended up with a
777 * page of all 0's which can happen if we abort between allocating a
778 * page in mpool and initializing it. In that case, even if we're
779 * undoing, we need to re-initialize the page.
780 */
781 if (DB_REDO(op) && cmp_p == 0) {
782 /* Need to redo update described. */
783 REC_DIRTY(mpf, txnhead, file_dbp->priority, &pagep);
784 switch (argp->ptype) {
785 case P_LBTREE:
786 case P_LRECNO:
787 case P_LDUP:
788 level = LEAFLEVEL;
789 break;
790 default:
791 level = 0;
792 break;
793 }
794 P_INIT(pagep, file_dbp->pgsize,
795 argp->pgno, PGNO_INVALID, PGNO_INVALID, level, argp->ptype);
796
797 pagep->lsn = *lsnp;
798 } else if (DB_UNDO(op) && (cmp_n == 0 || created)) {
799 /*
800 * This is where we handle the case of a 0'd page (pagep->pgno
801 * is equal to PGNO_INVALID).
802 * Undo the allocation, reinitialize the page and
803 * link its next pointer to the free list.
804 */
805 REC_DIRTY(mpf, txnhead, file_dbp->priority, &pagep);
806 P_INIT(pagep, file_dbp->pgsize,
807 argp->pgno, PGNO_INVALID, argp->next, 0, P_INVALID);
808
809 pagep->lsn = argp->page_lsn;
810 }
811
812 do_truncate:
813 /*
814 * If the page was newly created, give it back.
815 */
816 if ((pagep == NULL || IS_ZERO_LSN(LSN(pagep))) &&
817 IS_ZERO_LSN(argp->page_lsn) && DB_UNDO(op)) {
818 /* Discard the page. */
819 if (pagep != NULL) {
820 if ((ret = __memp_fput(mpf, ip,
821 pagep, DB_PRIORITY_VERY_LOW)) != 0)
822 goto out;
823 pagep = NULL;
824 }
825 /* Give the page back to the OS. */
826 if (meta->last_pgno <= argp->pgno && (ret = __memp_ftruncate(
827 mpf, txn, ip, argp->pgno, MP_TRUNC_RECOVER)) != 0)
828 goto out;
829 }
830
831 if (pagep != NULL) {
832 ret = __memp_fput(mpf, ip, pagep, file_dbp->priority);
833 pagep = NULL;
834 if (ret != 0)
835 goto out;
836 }
837
838 ret = __memp_fput(mpf, ip, meta, file_dbp->priority);
839 meta = NULL;
840 if (ret != 0)
841 goto out;
842
843 done: *lsnp = argp->prev_lsn;
844 ret = 0;
845
846 out: if (pagep != NULL)
847 (void)__memp_fput(mpf, ip, pagep, file_dbp->priority);
848 if (meta != NULL)
849 (void)__memp_fput(mpf, ip, meta, file_dbp->priority);
850 REC_CLOSE;
851 }
852
853 /*
854 * __db_pg_free_recover_int --
855 */
856 static int
__db_pg_free_recover_int(env,txnhead,argp,file_dbp,lsnp,mpf,op,data)857 __db_pg_free_recover_int(env, txnhead, argp, file_dbp, lsnp, mpf, op, data)
858 ENV *env;
859 DB_TXNHEAD *txnhead;
860 __db_pg_freedata_args *argp;
861 DB *file_dbp;
862 DB_LSN *lsnp;
863 DB_MPOOLFILE *mpf;
864 db_recops op;
865 int data;
866 {
867 DBMETA *meta;
868 DB_LSN copy_lsn;
869 DB_THREAD_INFO *ip;
870 DB_TXN *txn;
871 PAGE *pagep, *prevp;
872 int cmp_n, cmp_p, is_meta, ret;
873
874 ip = txnhead->thread_info;
875 txn = txnhead->txn;
876 meta = NULL;
877 pagep = prevp = NULL;
878
879 /*
880 * Get the "metapage". This will either be the metapage
881 * or the previous page in the free list if we are doing
882 * sorted allocations. If its a previous page then
883 * we will not be truncating.
884 */
885 is_meta = argp->meta_pgno == PGNO_BASE_MD;
886
887 REC_FGET(mpf, txnhead, argp->meta_pgno, &meta, check_meta);
888
889 if (argp->meta_pgno != PGNO_BASE_MD)
890 prevp = (PAGE *)meta;
891
892 cmp_n = LOG_COMPARE(lsnp, &LSN(meta));
893 cmp_p = LOG_COMPARE(&LSN(meta), &argp->meta_lsn);
894 CHECK_LSN(env, op, cmp_p, &LSN(meta), &argp->meta_lsn);
895 CHECK_ABORT(env, op, cmp_n, &LSN(meta), lsnp);
896
897 /*
898 * Fix up the metadata page. If we're redoing or undoing the operation
899 * we get the page and update its LSN, last and free pointer.
900 */
901 if (cmp_p == 0 && DB_REDO(op)) {
902 REC_DIRTY(mpf, txnhead, file_dbp->priority, &meta);
903 /*
904 * If we are at the end of the file truncate, otherwise
905 * put on the free list.
906 */
907 #ifdef HAVE_FTRUNCATE
908 if (argp->pgno == argp->last_pgno)
909 meta->last_pgno = argp->pgno - 1;
910 else
911 #endif
912 if (is_meta)
913 meta->free = argp->pgno;
914 else
915 NEXT_PGNO(prevp) = argp->pgno;
916 LSN(meta) = *lsnp;
917 } else if (cmp_n == 0 && DB_UNDO(op)) {
918 /* Need to undo the deallocation. */
919 REC_DIRTY(mpf, txnhead, file_dbp->priority, &meta);
920 if (is_meta) {
921 if (meta->last_pgno < argp->pgno)
922 meta->last_pgno = argp->pgno;
923 meta->free = argp->next;
924 } else
925 NEXT_PGNO(prevp) = argp->next;
926 LSN(meta) = argp->meta_lsn;
927 }
928
929 check_meta:
930 if (ret != 0 && is_meta) {
931 /* The metadata page must always exist. */
932 ret = __db_pgerr(file_dbp, argp->meta_pgno, ret);
933 goto out;
934 }
935
936 /*
937 * Get the freed page. Don't create the page if we are going to
938 * free it. If we're redoing the operation we get the page and
939 * explicitly discard its contents, then update its LSN. If we're
940 * undoing the operation, we get the page and restore its header.
941 */
942 if (DB_REDO(op) || (is_meta && meta->last_pgno < argp->pgno)) {
943 if ((ret = __memp_fget(mpf, &argp->pgno,
944 ip, txn, 0, &pagep)) != 0) {
945 if (ret != DB_PAGE_NOTFOUND)
946 goto out;
947 #ifdef HAVE_FTRUNCATE
948 if (is_meta &&
949 DB_REDO(op) && meta->last_pgno <= argp->pgno)
950 goto trunc;
951 #endif
952 goto done;
953 }
954 } else if ((ret = __memp_fget(mpf, &argp->pgno,
955 ip, txn, DB_MPOOL_CREATE, &pagep)) != 0)
956 goto out;
957
958 (void)__ua_memcpy(©_lsn, &LSN(argp->header.data), sizeof(DB_LSN));
959 cmp_n = IS_ZERO_LSN(LSN(pagep)) ? 0 : LOG_COMPARE(lsnp, &LSN(pagep));
960 cmp_p = LOG_COMPARE(&LSN(pagep), ©_lsn);
961
962 /*
963 * This page got extended by a later allocation,
964 * but its allocation was not in the scope of this
965 * recovery pass.
966 */
967 if (IS_ZERO_LSN(LSN(pagep)))
968 cmp_p = 0;
969
970 CHECK_LSN(env, op, cmp_p, &LSN(pagep), ©_lsn);
971 /*
972 * We need to check that the page could have the current LSN
973 * which was copied before it was truncated in addition to
974 * the usual of having the previous LSN.
975 */
976 if (DB_REDO(op) &&
977 (cmp_p == 0 || cmp_n == 0 ||
978 (IS_ZERO_LSN(copy_lsn) &&
979 LOG_COMPARE(&LSN(pagep), &argp->meta_lsn) <= 0))) {
980 /* Need to redo the deallocation. */
981 /*
982 * The page can be truncated if it was truncated at runtime
983 * and the current metapage reflects the truncation.
984 */
985 #ifdef HAVE_FTRUNCATE
986 if (is_meta && meta->last_pgno <= argp->pgno &&
987 argp->last_pgno <= argp->pgno) {
988 if ((ret = __memp_fput(mpf, ip,
989 pagep, DB_PRIORITY_VERY_LOW)) != 0)
990 goto out;
991 pagep = NULL;
992 trunc: if ((ret = __memp_ftruncate(mpf, txn, ip,
993 argp->pgno, MP_TRUNC_RECOVER)) != 0)
994 goto out;
995 } else if (argp->last_pgno == argp->pgno) {
996 /* The page was truncated at runtime, zero it out. */
997 REC_DIRTY(mpf, txnhead, file_dbp->priority, &pagep);
998 P_INIT(pagep, 0, PGNO_INVALID,
999 PGNO_INVALID, PGNO_INVALID, 0, P_INVALID);
1000 ZERO_LSN(pagep->lsn);
1001 } else
1002 #endif
1003 if (cmp_p == 0 || IS_ZERO_LSN(LSN(pagep))) {
1004 REC_DIRTY(mpf, txnhead, file_dbp->priority, &pagep);
1005 P_INIT(pagep, file_dbp->pgsize,
1006 argp->pgno, PGNO_INVALID, argp->next, 0, P_INVALID);
1007 pagep->lsn = *lsnp;
1008
1009 }
1010 } else if (cmp_n == 0 && DB_UNDO(op)) {
1011 /* Need to reallocate the page. */
1012 REC_DIRTY(mpf, txnhead, file_dbp->priority, &pagep);
1013 memcpy(pagep, argp->header.data, argp->header.size);
1014 if (data)
1015 memcpy((u_int8_t*)pagep + HOFFSET(pagep),
1016 argp->data.data, argp->data.size);
1017 }
1018 if (pagep != NULL &&
1019 (ret = __memp_fput(mpf, ip, pagep, file_dbp->priority)) != 0)
1020 goto out;
1021
1022 pagep = NULL;
1023 #ifdef HAVE_FTRUNCATE
1024 /*
1025 * If we are keeping an in memory free list remove this
1026 * element from the list.
1027 */
1028 if (op == DB_TXN_ABORT && argp->pgno != argp->last_pgno) {
1029 db_pgno_t *lp;
1030 u_int32_t nelem, pos;
1031
1032 if ((ret = __memp_get_freelist(mpf, &nelem, &lp)) != 0)
1033 goto out;
1034 if (lp != NULL) {
1035 pos = 0;
1036 if (!is_meta) {
1037 __db_freelist_pos(argp->pgno, lp, nelem, &pos);
1038
1039 /*
1040 * If we aborted after logging but before
1041 * updating the free list don't do anything.
1042 */
1043 if (argp->pgno != lp[pos]) {
1044 DB_ASSERT(env,
1045 argp->meta_pgno == lp[pos]);
1046 goto done;
1047 }
1048 DB_ASSERT(env,
1049 argp->meta_pgno == lp[pos - 1]);
1050 } else if (nelem != 0 && argp->pgno != lp[pos])
1051 goto done;
1052
1053 if (pos < nelem)
1054 memmove(&lp[pos], &lp[pos + 1],
1055 ((nelem - pos) - 1) * sizeof(*lp));
1056
1057 /* Shrink the list */
1058 if ((ret =
1059 __memp_extend_freelist(mpf, nelem - 1, &lp)) != 0)
1060 goto out;
1061 }
1062 }
1063 #endif
1064 done:
1065 if (meta != NULL &&
1066 (ret = __memp_fput(mpf, ip, meta, file_dbp->priority)) != 0)
1067 goto out;
1068 meta = NULL;
1069 ret = 0;
1070
1071 out: if (pagep != NULL)
1072 (void)__memp_fput(mpf, ip, pagep, file_dbp->priority);
1073 if (meta != NULL)
1074 (void)__memp_fput(mpf, ip, meta, file_dbp->priority);
1075
1076 return (ret);
1077 }
1078
1079 /*
1080 * __db_pg_free_recover --
1081 * Recovery function for pg_free.
1082 *
1083 * PUBLIC: int __db_pg_free_recover
1084 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
1085 */
1086 int
__db_pg_free_recover(env,dbtp,lsnp,op,info)1087 __db_pg_free_recover(env, dbtp, lsnp, op, info)
1088 ENV *env;
1089 DBT *dbtp;
1090 DB_LSN *lsnp;
1091 db_recops op;
1092 void *info;
1093 {
1094 __db_pg_free_args *argp;
1095 DB *file_dbp;
1096 DBC *dbc;
1097 DB_MPOOLFILE *mpf;
1098 DB_TXNHEAD *txnhead;
1099 int ret;
1100
1101 txnhead = info;
1102 REC_PRINT(__db_pg_free_print);
1103 REC_INTRO(__db_pg_free_read, txnhead, 0);
1104
1105 if ((ret = __db_pg_free_recover_int(env, txnhead,
1106 (__db_pg_freedata_args *)argp, file_dbp, lsnp, mpf, op, 0)) != 0)
1107 goto out;
1108
1109 done: *lsnp = argp->prev_lsn;
1110 out:
1111 REC_CLOSE;
1112 }
1113
1114 /*
1115 * __db_pg_freedata_recover --
1116 * Recovery function for pg_freedata.
1117 *
1118 * PUBLIC: int __db_pg_freedata_recover
1119 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
1120 */
1121 int
__db_pg_freedata_recover(env,dbtp,lsnp,op,info)1122 __db_pg_freedata_recover(env, dbtp, lsnp, op, info)
1123 ENV *env;
1124 DBT *dbtp;
1125 DB_LSN *lsnp;
1126 db_recops op;
1127 void *info;
1128 {
1129 __db_pg_freedata_args *argp;
1130 DB *file_dbp;
1131 DBC *dbc;
1132 DB_MPOOLFILE *mpf;
1133 DB_TXNHEAD *txnhead;
1134 int ret;
1135
1136 txnhead = info;
1137 REC_PRINT(__db_pg_freedata_print);
1138 REC_INTRO(__db_pg_freedata_read, txnhead, 0);
1139
1140 if ((ret = __db_pg_free_recover_int(env,
1141 txnhead, argp, file_dbp, lsnp, mpf, op, 1)) != 0)
1142 goto out;
1143
1144 done: *lsnp = argp->prev_lsn;
1145 out:
1146 REC_CLOSE;
1147 }
1148
1149 /*
1150 * __db_cksum_recover --
1151 * Recovery function for checksum failure log record.
1152 *
1153 * PUBLIC: int __db_cksum_recover __P((ENV *,
1154 * PUBLIC: DBT *, DB_LSN *, db_recops, void *));
1155 */
1156 int
__db_cksum_recover(env,dbtp,lsnp,op,info)1157 __db_cksum_recover(env, dbtp, lsnp, op, info)
1158 ENV *env;
1159 DBT *dbtp;
1160 DB_LSN *lsnp;
1161 db_recops op;
1162 void *info;
1163 {
1164 __db_cksum_args *argp;
1165 int ret;
1166
1167 REC_PRINT(__db_cksum_print);
1168
1169 if ((ret = __db_cksum_read(env, dbtp->data, &argp)) != 0)
1170 return (ret);
1171
1172 /*
1173 * We had a checksum failure -- the only option is to run catastrophic
1174 * recovery.
1175 */
1176 if (F_ISSET(env, ENV_RECOVER_FATAL))
1177 ret = 0;
1178 else {
1179 __db_errx(env, DB_STR("0642",
1180 "Checksum failure requires catastrophic recovery"));
1181 ret = __env_panic(env, DB_RUNRECOVERY);
1182 }
1183
1184 __os_free(env, argp);
1185
1186 COMPQUIET(info, NULL);
1187 COMPQUIET(lsnp, NULL);
1188 COMPQUIET(op, DB_TXN_ABORT);
1189
1190 return (ret);
1191 }
1192
1193 /*
1194 * __db_pg_init_recover --
1195 * Recovery function to reinit pages after truncation.
1196 *
1197 * PUBLIC: int __db_pg_init_recover
1198 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
1199 */
1200 int
__db_pg_init_recover(env,dbtp,lsnp,op,info)1201 __db_pg_init_recover(env, dbtp, lsnp, op, info)
1202 ENV *env;
1203 DBT *dbtp;
1204 DB_LSN *lsnp;
1205 db_recops op;
1206 void *info;
1207 {
1208 __db_pg_init_args *argp;
1209 DB *file_dbp;
1210 DBC *dbc;
1211 DB_LSN copy_lsn;
1212 DB_MPOOLFILE *mpf;
1213 DB_THREAD_INFO *ip;
1214 DB_TXN *txn;
1215 DB_TXNHEAD *txnhead;
1216 PAGE *pagep;
1217 int cmp_n, cmp_p, ret, t_ret, type;
1218
1219 txnhead = info;
1220 ip = txnhead->thread_info;
1221 txn = txnhead->txn;
1222 pagep = NULL;
1223 REC_PRINT(__db_pg_init_print);
1224 REC_INTRO(__db_pg_init_read, txnhead, 0);
1225
1226 mpf = file_dbp->mpf;
1227 if ((ret = __memp_fget(mpf, &argp->pgno, ip, txn, 0, &pagep)) != 0) {
1228 if (DB_UNDO(op)) {
1229 if (ret == DB_PAGE_NOTFOUND)
1230 goto done;
1231 else {
1232 ret = __db_pgerr(file_dbp, argp->pgno, ret);
1233 goto out;
1234 }
1235 }
1236
1237 /*
1238 * This page was truncated and may simply not have
1239 * had an item written to it yet. This should only
1240 * happen on hash databases, so confirm that.
1241 */
1242 DB_ASSERT(env, file_dbp->type == DB_HASH);
1243 if ((ret = __memp_fget(mpf, &argp->pgno,
1244 ip, txn, DB_MPOOL_CREATE, &pagep)) != 0) {
1245 ret = __db_pgerr(file_dbp, argp->pgno, ret);
1246 goto out;
1247 }
1248 }
1249
1250 (void)__ua_memcpy(©_lsn, &LSN(argp->header.data), sizeof(DB_LSN));
1251 cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
1252 cmp_p = LOG_COMPARE(&LSN(pagep), ©_lsn);
1253 CHECK_LSN(env, op, cmp_p, &LSN(pagep), ©_lsn);
1254 CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp);
1255
1256 if (cmp_p == 0 && DB_REDO(op)) {
1257 if (TYPE(pagep) == P_HASH)
1258 type = P_HASH;
1259 else
1260 type = file_dbp->type == DB_RECNO ? P_LRECNO : P_LBTREE;
1261 REC_DIRTY(mpf, txnhead, file_dbp->priority, &pagep);
1262 P_INIT(pagep, file_dbp->pgsize, PGNO(pagep), PGNO_INVALID,
1263 PGNO_INVALID, TYPE(pagep) == P_HASH ? 0 : 1, type);
1264 pagep->lsn = *lsnp;
1265 } else if (cmp_n == 0 && DB_UNDO(op)) {
1266 /* Put the data back on the page. */
1267 REC_DIRTY(mpf, txnhead, file_dbp->priority, &pagep);
1268 memcpy(pagep, argp->header.data, argp->header.size);
1269 if (argp->data.size > 0)
1270 memcpy((u_int8_t*)pagep + HOFFSET(pagep),
1271 argp->data.data, argp->data.size);
1272 }
1273
1274 done: *lsnp = argp->prev_lsn;
1275 out:
1276 if (pagep != NULL && (t_ret =
1277 __memp_fput(mpf, ip, pagep, file_dbp->priority)) != 0 && ret == 0)
1278 ret = t_ret;
1279 REC_CLOSE;
1280 }
1281
1282 /*
1283 * __db_pg_trunc_recover --
1284 * Recovery function for pg_trunc.
1285 *
1286 * PUBLIC: int __db_pg_trunc_recover
1287 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
1288 */
1289 int
__db_pg_trunc_recover(env,dbtp,lsnp,op,info)1290 __db_pg_trunc_recover(env, dbtp, lsnp, op, info)
1291 ENV *env;
1292 DBT *dbtp;
1293 DB_LSN *lsnp;
1294 db_recops op;
1295 void *info;
1296 {
1297 #ifdef HAVE_FTRUNCATE
1298 __db_pg_trunc_args *argp;
1299 DB *file_dbp;
1300 DBC *dbc;
1301 DBMETA *meta;
1302 DB_MPOOLFILE *mpf;
1303 DB_THREAD_INFO *ip;
1304 DB_TXN *txn;
1305 DB_TXNHEAD *txnhead;
1306 PAGE *pagep;
1307 db_pglist_t *pglist, *lp;
1308 db_pgno_t last_pgno, *list;
1309 u_int32_t felem, nelem, pos;
1310 int ret;
1311
1312 txnhead = info;
1313 ip = txnhead->thread_info;
1314 txn = txnhead->txn;
1315 REC_PRINT(__db_pg_trunc_print);
1316 REC_INTRO(__db_pg_trunc_read, txnhead, 1);
1317
1318 pglist = (db_pglist_t *) argp->list.data;
1319 nelem = argp->list.size / sizeof(db_pglist_t);
1320 if (DB_REDO(op)) {
1321 /*
1322 * First call __db_pg_truncate to find the truncation
1323 * point, truncate the file and return the new last_pgno.
1324 */
1325 last_pgno = argp->last_pgno;
1326 if ((ret = __db_pg_truncate(dbc, txn, pglist,
1327 NULL, &nelem, argp->next_free, &last_pgno, lsnp, 1)) != 0)
1328 goto out;
1329
1330 if (argp->last_free != PGNO_INVALID) {
1331 /*
1332 * Update the next pointer of the last page in
1333 * the freelist. If the truncation point is
1334 * beyond next_free then this is still in the freelist
1335 * otherwise the last_free page is at the end.
1336 */
1337 if ((ret = __memp_fget(mpf,
1338 &argp->last_free, ip, txn, 0, &meta)) == 0) {
1339 if (LOG_COMPARE(&LSN(meta),
1340 &argp->last_lsn) == 0) {
1341 REC_DIRTY(mpf,
1342 txnhead, dbc->priority, &meta);
1343 if (pglist->pgno > last_pgno)
1344 NEXT_PGNO(meta) = PGNO_INVALID;
1345 else
1346 NEXT_PGNO(meta) = pglist->pgno;
1347 LSN(meta) = *lsnp;
1348 }
1349 if ((ret = __memp_fput(mpf, ip,
1350 meta, file_dbp->priority)) != 0)
1351 goto out;
1352 meta = NULL;
1353 } else if (ret != DB_PAGE_NOTFOUND)
1354 goto out;
1355 }
1356 if ((ret = __memp_fget(mpf, &argp->meta, ip, txn,
1357 0, &meta)) != 0)
1358 goto out;
1359 if (LOG_COMPARE(&LSN(meta), &argp->meta_lsn) == 0) {
1360 REC_DIRTY(mpf, txnhead, dbc->priority, &meta);
1361 if (argp->last_free == PGNO_INVALID) {
1362 if (nelem == 0)
1363 meta->free = PGNO_INVALID;
1364 else
1365 meta->free = pglist->pgno;
1366 }
1367 /*
1368 * If this is part of a multi record truncate
1369 * this could be just the last page of this record
1370 * don't move the meta->last_pgno forward.
1371 */
1372 if (meta->last_pgno > last_pgno)
1373 meta->last_pgno = last_pgno;
1374 LSN(meta) = *lsnp;
1375 }
1376 } else {
1377 /* Put the free list back in its original order. */
1378 for (lp = pglist; lp < &pglist[nelem]; lp++) {
1379 if ((ret = __memp_fget(mpf, &lp->pgno, ip,
1380 txn, DB_MPOOL_CREATE, &pagep)) != 0)
1381 goto out;
1382 if (IS_ZERO_LSN(LSN(pagep)) ||
1383 LOG_COMPARE(&LSN(pagep), lsnp) == 0) {
1384 REC_DIRTY(mpf, txnhead, dbc->priority, &pagep);
1385 P_INIT(pagep, file_dbp->pgsize, lp->pgno,
1386 PGNO_INVALID, lp->next_pgno, 0, P_INVALID);
1387 LSN(pagep) = lp->lsn;
1388 }
1389 if ((ret = __memp_fput(mpf,
1390 ip, pagep, file_dbp->priority)) != 0)
1391 goto out;
1392 }
1393 /*
1394 * Link the truncated part back into the free list.
1395 * Its either after the last_free page or directly
1396 * linked to the metadata page.
1397 */
1398 if (argp->last_free != PGNO_INVALID) {
1399 if ((ret = __memp_fget(mpf, &argp->last_free,
1400 ip, txn, DB_MPOOL_EDIT, &meta)) == 0) {
1401 if (LOG_COMPARE(&LSN(meta), lsnp) == 0) {
1402 NEXT_PGNO(meta) = argp->next_free;
1403 LSN(meta) = argp->last_lsn;
1404 }
1405 if ((ret = __memp_fput(mpf, ip,
1406 meta, file_dbp->priority)) != 0)
1407 goto out;
1408 } else if (ret != DB_PAGE_NOTFOUND)
1409 goto out;
1410 meta = NULL;
1411 }
1412 if ((ret = __memp_fget(mpf, &argp->meta,
1413 ip, txn, DB_MPOOL_EDIT, &meta)) != 0)
1414 goto out;
1415 if (LOG_COMPARE(&LSN(meta), lsnp) == 0) {
1416 REC_DIRTY(mpf, txnhead, dbc->priority, &meta);
1417 /*
1418 * If we had to break up the list last_pgno
1419 * may only represent the end of the block.
1420 */
1421 if (meta->last_pgno < argp->last_pgno)
1422 meta->last_pgno = argp->last_pgno;
1423 if (argp->last_free == PGNO_INVALID)
1424 meta->free = argp->next_free;
1425 LSN(meta) = argp->meta_lsn;
1426 }
1427 }
1428
1429 if ((ret = __memp_fput(mpf, ip, meta, file_dbp->priority)) != 0)
1430 goto out;
1431
1432 if (op == DB_TXN_ABORT) {
1433 /*
1434 * Put the pages back on the in memory free list.
1435 * If this is part of a multi-record truncate then
1436 * we need to find this batch, it may not be at the end.
1437 * If we aborted while writing one of the log records
1438 * then this set may still be in the list.
1439 */
1440 if ((ret = __memp_get_freelist(mpf, &felem, &list)) != 0)
1441 goto out;
1442 if (list != NULL) {
1443 if (felem != 0 && list[felem - 1] > pglist->pgno) {
1444 __db_freelist_pos(
1445 pglist->pgno, list, felem, &pos);
1446 DB_ASSERT(env, pos < felem);
1447 if (pglist->pgno == list[pos])
1448 goto done;
1449 pos++;
1450 } else if (felem != 0 &&
1451 list[felem - 1] == pglist->pgno)
1452 goto done;
1453 else
1454 pos = felem;
1455 if ((ret = __memp_extend_freelist(
1456 mpf, felem + nelem, &list)) != 0)
1457 goto out;
1458 if (pos != felem)
1459 memmove(&list[nelem + pos], &list[pos],
1460 sizeof(*list) * (felem - pos));
1461 for (lp = pglist; lp < &pglist[nelem]; lp++)
1462 list[pos++] = lp->pgno;
1463 }
1464 }
1465
1466 done: *lsnp = argp->prev_lsn;
1467 ret = 0;
1468
1469 out: REC_CLOSE;
1470 #else
1471 /*
1472 * If HAVE_FTRUNCATE is not defined, we'll never see pg_trunc records
1473 * to recover.
1474 */
1475 COMPQUIET(env, NULL);
1476 COMPQUIET(dbtp, NULL);
1477 COMPQUIET(lsnp, NULL);
1478 COMPQUIET(op, DB_TXN_ABORT);
1479 COMPQUIET(info, NULL);
1480 return (EINVAL);
1481 #endif
1482 }
1483 /*
1484 * __db_realloc_recover --
1485 * Recovery function for realloc.
1486 *
1487 * PUBLIC: int __db_realloc_recover
1488 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
1489 */
1490 int
__db_realloc_recover(env,dbtp,lsnp,op,info)1491 __db_realloc_recover(env, dbtp, lsnp, op, info)
1492 ENV *env;
1493 DBT *dbtp;
1494 DB_LSN *lsnp;
1495 db_recops op;
1496 void *info;
1497 {
1498 __db_realloc_args *argp;
1499 DB *file_dbp;
1500 DBC *dbc;
1501 DB_MPOOLFILE *mpf;
1502 DB_THREAD_INFO *ip;
1503 DB_TXN *txn;
1504 DB_TXNHEAD *txnhead;
1505 PAGE *pagep;
1506 db_pglist_t *pglist, *lp;
1507 #ifdef HAVE_FTRUNCATE
1508 db_pgno_t *list;
1509 u_int32_t felem, pos;
1510 #endif
1511 u_int32_t nelem;
1512 int cmp_n, cmp_p, ret;
1513
1514 txnhead = info;
1515 ip = txnhead->thread_info;
1516 txn = txnhead->txn;
1517
1518 REC_PRINT(__db_realloc_print);
1519 REC_INTRO(__db_realloc_read, txnhead, 1);
1520 mpf = file_dbp->mpf;
1521
1522 /*
1523 * First, iterate over all the pages and make sure they are all in
1524 * their prior or new states (according to the op).
1525 */
1526 pglist = (db_pglist_t *) argp->list.data;
1527 nelem = argp->list.size / sizeof(db_pglist_t);
1528 for (lp = pglist; lp < &pglist[nelem]; lp++) {
1529 if ((ret = __memp_fget(mpf, &lp->pgno, ip,
1530 txn, DB_MPOOL_CREATE, &pagep)) != 0)
1531 goto out;
1532 if (DB_REDO(op) && LOG_COMPARE(&LSN(pagep), &lp->lsn) == 0) {
1533 REC_DIRTY(mpf, txnhead, dbc->priority, &pagep);
1534 P_INIT(pagep, file_dbp->pgsize, lp->pgno,
1535 PGNO_INVALID, PGNO_INVALID, 0, argp->ptype);
1536 LSN(pagep) = *lsnp;
1537 } else if (DB_UNDO(op) && (IS_ZERO_LSN(LSN(pagep)) ||
1538 LOG_COMPARE(&LSN(pagep), lsnp) == 0)) {
1539 REC_DIRTY(mpf, txnhead, dbc->priority, &pagep);
1540 P_INIT(pagep, file_dbp->pgsize, lp->pgno,
1541 PGNO_INVALID, lp->next_pgno, 0, P_INVALID);
1542 LSN(pagep) = lp->lsn;
1543 }
1544 if ((ret = __memp_fput(mpf,
1545 ip, pagep, file_dbp->priority)) != 0)
1546 goto out;
1547 }
1548
1549 /* Now, fix up the free list. */
1550 if ((ret = __memp_fget(mpf,
1551 &argp->prev_pgno, ip, txn, 0, &pagep)) != 0)
1552 goto out;
1553
1554 cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
1555 cmp_p = LOG_COMPARE(&LSN(pagep), &argp->page_lsn);
1556 CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->page_lsn);
1557 CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp);
1558
1559 if (DB_REDO(op) && cmp_p == 0) {
1560 REC_DIRTY(mpf, txnhead, dbc->priority, &pagep);
1561 if (argp->prev_pgno == PGNO_BASE_MD)
1562 ((DBMETA *)pagep)->free = argp->next_free;
1563 else
1564 NEXT_PGNO(pagep) = argp->next_free;
1565 LSN(pagep) = *lsnp;
1566 } else if (DB_UNDO(op) && cmp_n == 0) {
1567 REC_DIRTY(mpf, txnhead, dbc->priority, &pagep);
1568 if (argp->prev_pgno == PGNO_BASE_MD)
1569 ((DBMETA *)pagep)->free = pglist->pgno;
1570 else
1571 NEXT_PGNO(pagep) = pglist->pgno;
1572 LSN(pagep) = argp->page_lsn;
1573 }
1574 if ((ret = __memp_fput(mpf, ip, pagep, file_dbp->priority)) != 0)
1575 goto out;
1576
1577 #ifdef HAVE_FTRUNCATE
1578 if (op == DB_TXN_ABORT) {
1579 /* Put the pages back in the sorted list. */
1580 if ((ret = __memp_get_freelist(mpf, &felem, &list)) != 0)
1581 goto out;
1582 if (list != NULL) {
1583 __db_freelist_pos(pglist->pgno, list, felem, &pos);
1584 if (pglist->pgno == list[pos])
1585 goto done;
1586 if ((ret = __memp_extend_freelist(
1587 mpf, felem + nelem, &list)) != 0)
1588 goto out;
1589 pos++;
1590 if (pos != felem)
1591 memmove(&list[pos+nelem],
1592 &list[pos], nelem * sizeof(*list));
1593 for (lp = pglist; lp < &pglist[nelem]; lp++)
1594 list[pos++] = lp->pgno;
1595 }
1596 }
1597 #endif
1598
1599 done: *lsnp = argp->prev_lsn;
1600 ret = 0;
1601
1602 out: REC_CLOSE;
1603 }
1604 /*
1605 * __db_pg_sort_44_recover --
1606 * Recovery function for pg_sort.
1607 * This is deprecated and kept for replication upgrades.
1608 *
1609 * PUBLIC: int __db_pg_sort_44_recover
1610 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
1611 */
1612 int
__db_pg_sort_44_recover(env,dbtp,lsnp,op,info)1613 __db_pg_sort_44_recover(env, dbtp, lsnp, op, info)
1614 ENV *env;
1615 DBT *dbtp;
1616 DB_LSN *lsnp;
1617 db_recops op;
1618 void *info;
1619 {
1620 #ifdef HAVE_FTRUNCATE
1621 __db_pg_sort_44_args *argp;
1622 DB *file_dbp;
1623 DBC *dbc;
1624 DBMETA *meta;
1625 DB_MPOOLFILE *mpf;
1626 DB_THREAD_INFO *ip;
1627 DB_TXN *txn;
1628 DB_TXNHEAD *txnhead;
1629 PAGE *pagep;
1630 db_pglist_t *pglist, *lp;
1631 db_pgno_t pgno, *list;
1632 u_int32_t felem, nelem;
1633 int ret;
1634
1635 txnhead = info;
1636 ip = txnhead->thread_info;
1637 txn = txnhead->txn;
1638 REC_PRINT(__db_pg_sort_44_print);
1639 REC_INTRO(__db_pg_sort_44_read, txnhead, 1);
1640
1641 pglist = (db_pglist_t *) argp->list.data;
1642 nelem = argp->list.size / sizeof(db_pglist_t);
1643 if (DB_REDO(op)) {
1644 pgno = argp->last_pgno;
1645 __db_freelist_sort(pglist, nelem);
1646 if ((ret = __db_pg_truncate(dbc, txn,
1647 pglist, NULL, &nelem, PGNO_INVALID, &pgno, lsnp, 1)) != 0)
1648 goto out;
1649
1650 if (argp->last_free != PGNO_INVALID) {
1651 if ((ret = __memp_fget(mpf,
1652 &argp->last_free, ip, txn, 0, &meta)) == 0) {
1653 if (LOG_COMPARE(&LSN(meta),
1654 &argp->last_lsn) == 0) {
1655 REC_DIRTY(mpf,
1656 txnhead, dbc->priority, &meta);
1657 NEXT_PGNO(meta) = PGNO_INVALID;
1658 LSN(meta) = *lsnp;
1659 }
1660 if ((ret = __memp_fput(mpf, ip,
1661 meta, file_dbp->priority)) != 0)
1662 goto out;
1663 meta = NULL;
1664 } else if (ret != DB_PAGE_NOTFOUND)
1665 goto out;
1666 }
1667 if ((ret = __memp_fget(mpf, &argp->meta, ip, txn,
1668 0, &meta)) != 0)
1669 goto out;
1670 if (LOG_COMPARE(&LSN(meta), &argp->meta_lsn) == 0) {
1671 REC_DIRTY(mpf, txnhead, dbc->priority, &meta);
1672 if (argp->last_free == PGNO_INVALID) {
1673 if (nelem == 0)
1674 meta->free = PGNO_INVALID;
1675 else
1676 meta->free = pglist->pgno;
1677 }
1678 meta->last_pgno = pgno;
1679 LSN(meta) = *lsnp;
1680 }
1681 } else {
1682 /* Put the free list back in its original order. */
1683 for (lp = pglist; lp < &pglist[nelem]; lp++) {
1684 if ((ret = __memp_fget(mpf, &lp->pgno, ip,
1685 txn, DB_MPOOL_CREATE, &pagep)) != 0)
1686 goto out;
1687 if (IS_ZERO_LSN(LSN(pagep)) ||
1688 LOG_COMPARE(&LSN(pagep), lsnp) == 0) {
1689 REC_DIRTY(mpf, txnhead, dbc->priority, &pagep);
1690 if (lp == &pglist[nelem - 1])
1691 pgno = PGNO_INVALID;
1692 else
1693 pgno = lp[1].pgno;
1694
1695 P_INIT(pagep, file_dbp->pgsize,
1696 lp->pgno, PGNO_INVALID, pgno, 0, P_INVALID);
1697 LSN(pagep) = lp->lsn;
1698 }
1699 if ((ret = __memp_fput(mpf,
1700 ip, pagep, file_dbp->priority)) != 0)
1701 goto out;
1702 }
1703 if (argp->last_free != PGNO_INVALID) {
1704 if ((ret = __memp_fget(mpf, &argp->last_free,
1705 ip, txn, DB_MPOOL_EDIT, &meta)) == 0) {
1706 if (LOG_COMPARE(&LSN(meta), lsnp) == 0) {
1707 NEXT_PGNO(meta) = pglist->pgno;
1708 LSN(meta) = argp->last_lsn;
1709 }
1710 if ((ret = __memp_fput(mpf, ip,
1711 meta, file_dbp->priority)) != 0)
1712 goto out;
1713 } else if (ret != DB_PAGE_NOTFOUND)
1714 goto out;
1715 meta = NULL;
1716 }
1717 if ((ret = __memp_fget(mpf, &argp->meta,
1718 ip, txn, DB_MPOOL_EDIT, &meta)) != 0)
1719 goto out;
1720 if (LOG_COMPARE(&LSN(meta), lsnp) == 0) {
1721 REC_DIRTY(mpf, txnhead, dbc->priority, &meta);
1722 meta->last_pgno = argp->last_pgno;
1723 if (argp->last_free == PGNO_INVALID)
1724 meta->free = pglist->pgno;
1725 LSN(meta) = argp->meta_lsn;
1726 }
1727 }
1728 if (op == DB_TXN_ABORT) {
1729 if ((ret = __memp_get_freelist(mpf, &felem, &list)) != 0)
1730 goto out;
1731 if (list != NULL) {
1732 DB_ASSERT(env, felem == 0 ||
1733 argp->last_free == list[felem - 1]);
1734 if ((ret = __memp_extend_freelist(
1735 mpf, felem + nelem, &list)) != 0)
1736 goto out;
1737 for (lp = pglist; lp < &pglist[nelem]; lp++)
1738 list[felem++] = lp->pgno;
1739 }
1740 }
1741
1742 if ((ret = __memp_fput(mpf, ip, meta, file_dbp->priority)) != 0)
1743 goto out;
1744
1745 done: *lsnp = argp->prev_lsn;
1746 ret = 0;
1747
1748 out: REC_CLOSE;
1749 #else
1750 /*
1751 * If HAVE_FTRUNCATE is not defined, we'll never see pg_sort records
1752 * to recover.
1753 */
1754 COMPQUIET(env, NULL);
1755 COMPQUIET(dbtp, NULL);
1756 COMPQUIET(lsnp, NULL);
1757 COMPQUIET(op, DB_TXN_ABORT);
1758 COMPQUIET(info, NULL);
1759 return (EINVAL);
1760 #endif
1761 }
1762
1763 /*
1764 * __db_relink_recover --
1765 * Recovery function for relink.
1766 *
1767 * PUBLIC: int __db_relink_recover
1768 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
1769 */
1770 int
__db_relink_recover(env,dbtp,lsnp,op,info)1771 __db_relink_recover(env, dbtp, lsnp, op, info)
1772 ENV *env;
1773 DBT *dbtp;
1774 DB_LSN *lsnp;
1775 db_recops op;
1776 void *info;
1777 {
1778 __db_relink_args *argp;
1779 DB *file_dbp;
1780 DBC *dbc;
1781 DB_MPOOLFILE *mpf;
1782 DB_THREAD_INFO *ip;
1783 DB_TXN *txn;
1784 DB_TXNHEAD *txnhead;
1785 PAGE *pagep;
1786 int cmp_n, cmp_p, ret;
1787
1788 txnhead = info;
1789 ip = txnhead->thread_info;
1790 txn = txnhead->txn;
1791 pagep = NULL;
1792 REC_PRINT(__db_relink_print);
1793 REC_INTRO(__db_relink_read, txnhead, 0);
1794
1795 /*
1796 * There are up to three pages we need to check -- the page, and the
1797 * previous and next pages, if they existed. For a page add operation,
1798 * the current page is the result of a split and is being recovered
1799 * elsewhere, so all we need do is recover the next page.
1800 */
1801 if (argp->next_pgno == PGNO_INVALID)
1802 goto prev;
1803 if ((ret = __memp_fget(mpf,
1804 &argp->next_pgno, ip, txn, 0, &pagep)) != 0) {
1805 if (ret != DB_PAGE_NOTFOUND) {
1806 ret = __db_pgerr(file_dbp, argp->next_pgno, ret);
1807 goto out;
1808 } else
1809 goto prev;
1810 }
1811
1812 cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
1813 cmp_p = LOG_COMPARE(&LSN(pagep), &argp->lsn_next);
1814 CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->lsn_next);
1815 CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp);
1816 if (cmp_p == 0 && DB_REDO(op)) {
1817 /* Redo the remove or replace. */
1818 REC_DIRTY(mpf, txnhead, file_dbp->priority, &pagep);
1819 if (argp->new_pgno == PGNO_INVALID)
1820 pagep->prev_pgno = argp->prev_pgno;
1821 else
1822 pagep->prev_pgno = argp->new_pgno;
1823
1824 pagep->lsn = *lsnp;
1825 } else if (cmp_n == 0 && DB_UNDO(op)) {
1826 /* Undo the remove or replace. */
1827 REC_DIRTY(mpf, txnhead, file_dbp->priority, &pagep);
1828 pagep->prev_pgno = argp->pgno;
1829
1830 pagep->lsn = argp->lsn_next;
1831 }
1832
1833 if ((ret = __memp_fput(mpf, ip, pagep, file_dbp->priority)) != 0)
1834 goto out;
1835 pagep = NULL;
1836
1837 prev: if (argp->prev_pgno == PGNO_INVALID)
1838 goto done;
1839 if ((ret = __memp_fget(mpf,
1840 &argp->prev_pgno, ip, txn, 0, &pagep)) != 0) {
1841 if (ret != DB_PAGE_NOTFOUND) {
1842 ret = __db_pgerr(file_dbp, argp->prev_pgno, ret);
1843 goto out;
1844 } else
1845 goto done;
1846 }
1847
1848 cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
1849 cmp_p = LOG_COMPARE(&LSN(pagep), &argp->lsn_prev);
1850 CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->lsn_prev);
1851 CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp);
1852 if (cmp_p == 0 && DB_REDO(op)) {
1853 /* Redo the relink. */
1854 REC_DIRTY(mpf, txnhead, file_dbp->priority, &pagep);
1855 if (argp->new_pgno == PGNO_INVALID)
1856 pagep->next_pgno = argp->next_pgno;
1857 else
1858 pagep->next_pgno = argp->new_pgno;
1859
1860 pagep->lsn = *lsnp;
1861 } else if (cmp_n == 0 && DB_UNDO(op)) {
1862 /* Undo the relink. */
1863 REC_DIRTY(mpf, txnhead, file_dbp->priority, &pagep);
1864 pagep->next_pgno = argp->pgno;
1865 pagep->lsn = argp->lsn_prev;
1866 }
1867
1868 if ((ret = __memp_fput(mpf,
1869 ip, pagep, file_dbp->priority)) != 0)
1870 goto out;
1871 pagep = NULL;
1872
1873 done: *lsnp = argp->prev_lsn;
1874 ret = 0;
1875
1876 out: if (pagep != NULL)
1877 (void)__memp_fput(mpf, ip, pagep, file_dbp->priority);
1878 REC_CLOSE;
1879 }
1880
1881 /*
1882 * __db_merge_recover --
1883 * Recovery function for merge.
1884 *
1885 * PUBLIC: int __db_merge_recover
1886 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
1887 */
1888 int
__db_merge_recover(env,dbtp,lsnp,op,info)1889 __db_merge_recover(env, dbtp, lsnp, op, info)
1890 ENV *env;
1891 DBT *dbtp;
1892 DB_LSN *lsnp;
1893 db_recops op;
1894 void *info;
1895 {
1896 __db_merge_args *argp;
1897 BTREE *bt;
1898 BKEYDATA *bk;
1899 DB *file_dbp;
1900 DBC *dbc;
1901 DB_LOCK handle_lock;
1902 DB_LOCKREQ request;
1903 DB_MPOOLFILE *mpf;
1904 DB_THREAD_INFO *ip;
1905 DB_TXN *txn;
1906 DB_TXNHEAD *txnhead;
1907 HASH *ht;
1908 PAGE *pagep;
1909 db_indx_t indx, *ninp, *pinp;
1910 u_int32_t size;
1911 u_int8_t *bp;
1912 int cmp_n, cmp_p, i, ret, t_ret;
1913
1914 txnhead = info;
1915 ip = txnhead->thread_info;
1916 txn = txnhead->txn;
1917 REC_PRINT(__db_merge_print);
1918 REC_INTRO(__db_merge_read, txnhead, op != DB_TXN_APPLY);
1919
1920 /* Allocate our own cursor without DB_RECOVER as we need a locker. */
1921 if (op == DB_TXN_APPLY && (ret = __db_cursor_int(file_dbp, ip, NULL,
1922 DB_QUEUE, PGNO_INVALID, 0, NULL, &dbc)) != 0)
1923 goto out;
1924 F_SET(dbc, DBC_RECOVER);
1925
1926 /* XXX Use REG_FGET() here? */
1927 if ((ret = __memp_fget(mpf, &argp->pgno, ip, txn, 0, &pagep)) != 0) {
1928 if (ret != DB_PAGE_NOTFOUND) {
1929 ret = __db_pgerr(file_dbp, argp->pgno, ret);
1930 goto out;
1931 } else
1932 goto next;
1933 }
1934
1935 cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
1936 cmp_p = LOG_COMPARE(&LSN(pagep), &argp->lsn);
1937 CHECK_LSN(file_dbp->env, op, cmp_p, &LSN(pagep), &argp->lsn);
1938 CHECK_ABORT(file_dbp->env, op, cmp_n, &LSN(pagep), lsnp);
1939
1940 if (cmp_p == 0 && DB_REDO(op)) {
1941 /*
1942 * When pg_copy is set, we are copying onto a new page.
1943 */
1944 DB_ASSERT(env, !argp->pg_copy || NUM_ENT(pagep) == 0);
1945 REC_DIRTY(mpf, txnhead, dbc->priority, &pagep);
1946 if (argp->pg_copy) {
1947 if (argp->data.size == 0) {
1948 memcpy(pagep, argp->hdr.data, argp->hdr.size);
1949 pagep->pgno = argp->pgno;
1950 goto do_lsn;
1951 }
1952 P_INIT(pagep, file_dbp->pgsize, pagep->pgno,
1953 PREV_PGNO(argp->hdr.data),
1954 NEXT_PGNO(argp->hdr.data),
1955 LEVEL(argp->hdr.data), TYPE(argp->hdr.data));
1956 }
1957 if (TYPE(pagep) == P_OVERFLOW) {
1958 OV_REF(pagep) = OV_REF(argp->hdr.data);
1959 OV_LEN(pagep) = OV_LEN(argp->hdr.data);
1960 bp = (u_int8_t *)pagep + P_OVERHEAD(file_dbp);
1961 memcpy(bp, argp->data.data, argp->data.size);
1962 } else {
1963 /* Copy the data segment. */
1964 bp = (u_int8_t *)pagep +
1965 (db_indx_t)(HOFFSET(pagep) - argp->data.size);
1966 memcpy(bp, argp->data.data, argp->data.size);
1967
1968 /* Copy index table offset past the current entries. */
1969 pinp = P_INP(file_dbp, pagep) + NUM_ENT(pagep);
1970 ninp = P_INP(file_dbp, argp->hdr.data);
1971 for (i = 0; i < NUM_ENT(argp->hdr.data); i++)
1972 *pinp++ = *ninp++
1973 - (file_dbp->pgsize - HOFFSET(pagep));
1974 HOFFSET(pagep) -= argp->data.size;
1975 NUM_ENT(pagep) += i;
1976 }
1977 do_lsn: pagep->lsn = *lsnp;
1978 if (op == DB_TXN_APPLY) {
1979 /*
1980 * If applying to an active system we must bump
1981 * the revision number so that the db will get
1982 * reopened. We also need to move the handle
1983 * locks. Note that the dbp will not have a
1984 * locker in a replication client apply thread.
1985 */
1986 if (file_dbp->type == DB_HASH) {
1987 if (argp->npgno == file_dbp->meta_pgno)
1988 file_dbp->mpf->mfp->revision++;
1989 } else {
1990 bt = file_dbp->bt_internal;
1991 if (argp->npgno == bt->bt_meta ||
1992 argp->npgno == bt->bt_root)
1993 file_dbp->mpf->mfp->revision++;
1994 }
1995 if (argp->npgno == file_dbp->meta_pgno) {
1996 F_CLR(file_dbp, DB_AM_RECOVER);
1997 if ((ret = __fop_lock_handle(file_dbp->env,
1998 file_dbp, dbc->locker, DB_LOCK_READ,
1999 NULL, 0)) != 0)
2000 goto err;
2001 handle_lock = file_dbp->handle_lock;
2002
2003 file_dbp->meta_pgno = argp->pgno;
2004 if ((ret = __fop_lock_handle(file_dbp->env,
2005 file_dbp, dbc->locker, DB_LOCK_READ,
2006 NULL, 0)) != 0)
2007 goto err;
2008
2009 /* Move the other handles to the new lock. */
2010 ret = __lock_change(file_dbp->env,
2011 &handle_lock, &file_dbp->handle_lock);
2012
2013 err: memset(&request, 0, sizeof(request));
2014 request.op = DB_LOCK_PUT_ALL;
2015 if ((t_ret = __lock_vec(
2016 file_dbp->env, dbc->locker,
2017 0, &request, 1, NULL)) != 0 && ret == 0)
2018 ret = t_ret;
2019 F_SET(file_dbp, DB_AM_RECOVER);
2020 if (ret != 0)
2021 goto out;
2022 }
2023 }
2024
2025 } else if (cmp_n == 0 && !DB_REDO(op)) {
2026 REC_DIRTY(mpf, txnhead, dbc->priority, &pagep);
2027 if (TYPE(pagep) == P_OVERFLOW) {
2028 HOFFSET(pagep) = file_dbp->pgsize;
2029 goto setlsn;
2030 }
2031
2032 if (argp->pg_copy) {
2033 /* The page was empty when we started. */
2034 P_INIT(pagep, file_dbp->pgsize,
2035 pagep->pgno, PGNO_INVALID,
2036 PGNO_INVALID, 0, TYPE(argp->hdr.data));
2037 goto setlsn;
2038 }
2039
2040 /*
2041 * Since logging is logical at the page level we cannot just
2042 * truncate the data space. Delete the proper number of items
2043 * from the logical end of the page.
2044 */
2045 for (i = 0; i < NUM_ENT(argp->hdr.data); i++) {
2046 indx = NUM_ENT(pagep) - 1;
2047 if (TYPE(pagep) == P_LBTREE && indx != 0 &&
2048 P_INP(file_dbp, pagep)[indx] ==
2049 P_INP(file_dbp, pagep)[indx - P_INDX]) {
2050 NUM_ENT(pagep)--;
2051 continue;
2052 }
2053 switch (TYPE(pagep)) {
2054 case P_LBTREE:
2055 case P_LRECNO:
2056 case P_LDUP:
2057 bk = GET_BKEYDATA(file_dbp, pagep, indx);
2058 size = BITEM_SIZE(bk);
2059 break;
2060
2061 case P_IBTREE:
2062 size = BINTERNAL_SIZE(
2063 GET_BINTERNAL(file_dbp, pagep, indx)->len);
2064 break;
2065 case P_IRECNO:
2066 size = RINTERNAL_SIZE;
2067 break;
2068 case P_HASH:
2069 size = LEN_HITEM(file_dbp,
2070 pagep, file_dbp->pgsize, indx);
2071 break;
2072 default:
2073 ret = __db_pgfmt(env, PGNO(pagep));
2074 goto out;
2075 }
2076 if ((ret = __db_ditem(dbc, pagep, indx, size)) != 0)
2077 goto out;
2078 }
2079 setlsn: pagep->lsn = argp->lsn;
2080 }
2081
2082 if ((ret = __memp_fput(mpf, ip, pagep, dbc->priority)) != 0)
2083 goto out;
2084
2085 next: if ((ret = __memp_fget(mpf, &argp->npgno, ip, txn, 0, &pagep)) != 0) {
2086 if (ret != DB_PAGE_NOTFOUND) {
2087 ret = __db_pgerr(file_dbp, argp->pgno, ret);
2088 goto out;
2089 } else
2090 goto done;
2091 }
2092
2093 cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
2094 cmp_p = LOG_COMPARE(&LSN(pagep), &argp->nlsn);
2095 CHECK_LSN(file_dbp->env, op, cmp_p, &LSN(pagep), &argp->nlsn);
2096
2097 if (cmp_p == 0 && DB_REDO(op)) {
2098 /* Need to truncate the page. */
2099 REC_DIRTY(mpf, txnhead, dbc->priority, &pagep);
2100 HOFFSET(pagep) = file_dbp->pgsize;
2101 NUM_ENT(pagep) = 0;
2102 pagep->lsn = *lsnp;
2103 } else if (cmp_n == 0 && !DB_REDO(op)) {
2104 /* Need to put the data back on the page. */
2105 REC_DIRTY(mpf, txnhead, dbc->priority, &pagep);
2106 if (TYPE(pagep) == P_OVERFLOW) {
2107 OV_REF(pagep) = OV_REF(argp->hdr.data);
2108 OV_LEN(pagep) = OV_LEN(argp->hdr.data);
2109 bp = (u_int8_t *)pagep + P_OVERHEAD(file_dbp);
2110 memcpy(bp, argp->data.data, argp->data.size);
2111 } else {
2112 bp = (u_int8_t *)pagep +
2113 (db_indx_t)(HOFFSET(pagep) - argp->data.size);
2114 memcpy(bp, argp->data.data, argp->data.size);
2115
2116 if (argp->pg_copy)
2117 memcpy(pagep, argp->hdr.data, argp->hdr.size);
2118 else {
2119 /* Copy index table. */
2120 pinp = P_INP(file_dbp, pagep) + NUM_ENT(pagep);
2121 ninp = P_INP(file_dbp, argp->hdr.data);
2122 for (i = 0; i < NUM_ENT(argp->hdr.data); i++)
2123 *pinp++ = *ninp++;
2124 HOFFSET(pagep) -= argp->data.size;
2125 NUM_ENT(pagep) += i;
2126 }
2127 }
2128 pagep->lsn = argp->nlsn;
2129 if (op == DB_TXN_ABORT) {
2130 /*
2131 * If we are undoing a meta/root page move we must
2132 * bump the revision number. Put the handle
2133 * locks back to their original state if we
2134 * moved the metadata page.
2135 */
2136 i = 0;
2137 if (file_dbp->type == DB_HASH) {
2138 ht = file_dbp->h_internal;
2139 if (argp->pgno == ht->meta_pgno) {
2140 ht->meta_pgno = argp->npgno;
2141 file_dbp->mpf->mfp->revision++;
2142 i = 1;
2143 }
2144 } else {
2145 bt = file_dbp->bt_internal;
2146 if (argp->pgno == bt->bt_meta) {
2147 file_dbp->mpf->mfp->revision++;
2148 bt->bt_meta = argp->npgno;
2149 i = 1;
2150 } else if (argp->pgno == bt->bt_root) {
2151 file_dbp->mpf->mfp->revision++;
2152 bt->bt_root = argp->npgno;
2153 }
2154 }
2155 if (argp->pgno == file_dbp->meta_pgno)
2156 file_dbp->meta_pgno = argp->npgno;
2157
2158 /*
2159 * If we detected a metadata page above, move
2160 * the handle locks to the new page.
2161 */
2162 if (i == 1) {
2163 handle_lock = file_dbp->handle_lock;
2164 if ((ret = __fop_lock_handle(file_dbp->env,
2165 file_dbp, file_dbp->locker, DB_LOCK_READ,
2166 NULL, 0)) != 0)
2167 goto out;
2168
2169 /* Move the other handles to the new lock. */
2170 if ((ret = __lock_change(file_dbp->env,
2171 &handle_lock, &file_dbp->handle_lock)) != 0)
2172 goto out;
2173 }
2174 }
2175 }
2176
2177 if ((ret = __memp_fput(mpf,
2178 ip, pagep, dbc->priority)) != 0)
2179 goto out;
2180 done:
2181 *lsnp = argp->prev_lsn;
2182 ret = 0;
2183
2184 out: REC_CLOSE;
2185 }
2186
2187 /*
2188 * __db_pgno_recover --
2189 * Recovery function for page number replacement.
2190 *
2191 * PUBLIC: int __db_pgno_recover
2192 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
2193 */
2194 int
__db_pgno_recover(env,dbtp,lsnp,op,info)2195 __db_pgno_recover(env, dbtp, lsnp, op, info)
2196 ENV *env;
2197 DBT *dbtp;
2198 DB_LSN *lsnp;
2199 db_recops op;
2200 void *info;
2201 {
2202 BINTERNAL *bi;
2203 __db_pgno_args *argp;
2204 DB_THREAD_INFO *ip;
2205 DB *file_dbp;
2206 DBC *dbc;
2207 DB_MPOOLFILE *mpf;
2208 DB_TXNHEAD *txnhead;
2209 PAGE *pagep, *npagep;
2210 db_pgno_t pgno, *pgnop;
2211 int cmp_n, cmp_p, ret;
2212
2213 txnhead = info;
2214 ip = txnhead->thread_info;
2215 REC_PRINT(__db_pgno_print);
2216 REC_INTRO(__db_pgno_read, txnhead, 0);
2217
2218 REC_FGET(mpf, txnhead, argp->pgno, &pagep, done);
2219
2220 cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
2221 cmp_p = LOG_COMPARE(&LSN(pagep), &argp->lsn);
2222 CHECK_LSN(file_dbp->env, op, cmp_p, &LSN(pagep), &argp->lsn);
2223 CHECK_ABORT(file_dbp->env, op, cmp_n, &LSN(pagep), lsnp);
2224
2225 if ((cmp_p == 0 && DB_REDO(op)) || (cmp_n == 0 && !DB_REDO(op))) {
2226 switch (TYPE(pagep)) {
2227 case P_IBTREE:
2228 /*
2229 * An internal record can have both a overflow
2230 * and child pointer. Fetch the page to see
2231 * which it is.
2232 */
2233 bi = GET_BINTERNAL(file_dbp, pagep, argp->indx);
2234 if (B_TYPE(bi->type) == B_OVERFLOW) {
2235 REC_FGET(mpf, txnhead,
2236 argp->npgno, &npagep, out);
2237
2238 if (TYPE(npagep) == P_OVERFLOW)
2239 pgnop =
2240 &((BOVERFLOW *)(bi->data))->pgno;
2241 else
2242 pgnop = &bi->pgno;
2243 if ((ret = __memp_fput(mpf, ip,
2244 npagep, file_dbp->priority)) != 0)
2245 goto out;
2246 break;
2247 }
2248 pgnop = &bi->pgno;
2249 break;
2250 case P_IRECNO:
2251 pgnop =
2252 &GET_RINTERNAL(file_dbp, pagep, argp->indx)->pgno;
2253 break;
2254 case P_HASH:
2255 pgnop = &pgno;
2256 break;
2257 default:
2258 pgnop =
2259 &GET_BOVERFLOW(file_dbp, pagep, argp->indx)->pgno;
2260 break;
2261 }
2262
2263 if (DB_REDO(op)) {
2264 /* Need to redo update described. */
2265 REC_DIRTY(mpf, txnhead, file_dbp->priority, &pagep);
2266 *pgnop = argp->npgno;
2267 pagep->lsn = *lsnp;
2268 } else {
2269 REC_DIRTY(mpf, txnhead, file_dbp->priority, &pagep);
2270 *pgnop = argp->opgno;
2271 pagep->lsn = argp->lsn;
2272 }
2273 if (TYPE(pagep) == P_HASH)
2274 memcpy(HOFFDUP_PGNO(P_ENTRY(file_dbp,
2275 pagep, argp->indx)), pgnop, sizeof(db_pgno_t));
2276 }
2277
2278 if ((ret = __memp_fput(mpf, ip, pagep, file_dbp->priority)) != 0)
2279 goto out;
2280
2281 done:
2282 *lsnp = argp->prev_lsn;
2283 ret = 0;
2284
2285 out: REC_CLOSE;
2286 }
2287
2288 /*
2289 * __db_pglist_swap -- swap a list of freelist pages.
2290 * PUBLIC: void __db_pglist_swap __P((u_int32_t, void *));
2291 */
2292 void
__db_pglist_swap(size,list)2293 __db_pglist_swap(size, list)
2294 u_int32_t size;
2295 void *list;
2296 {
2297 db_pglist_t *lp;
2298 u_int32_t nelem;
2299
2300 nelem = size / sizeof(db_pglist_t);
2301
2302 lp = (db_pglist_t *)list;
2303 while (nelem-- > 0) {
2304 P_32_SWAP(&lp->pgno);
2305 P_32_SWAP(&lp->lsn.file);
2306 P_32_SWAP(&lp->lsn.offset);
2307 lp++;
2308 }
2309 }
2310
2311 /*
2312 * __db_pglist_print -- print a list of freelist pages.
2313 * PUBLIC: void __db_pglist_print __P((ENV *, DB_MSGBUF *, DBT *));
2314 */
2315 void
__db_pglist_print(env,mbp,list)2316 __db_pglist_print(env, mbp, list)
2317 ENV *env;
2318 DB_MSGBUF *mbp;
2319 DBT *list;
2320 {
2321 db_pglist_t *lp;
2322 u_int32_t nelem;
2323
2324 nelem = list->size / sizeof(db_pglist_t);
2325 lp = (db_pglist_t *)list->data;
2326 __db_msgadd(env, mbp, "\t");
2327 while (nelem-- > 0) {
2328 __db_msgadd(env, mbp, "%lu [%lu][%lu]", (u_long)lp->pgno,
2329 (u_long)lp->lsn.file, (u_long)lp->lsn.offset);
2330 if (nelem % 4 == 0)
2331 __db_msgadd(env, mbp, "\n\t");
2332 else
2333 __db_msgadd(env, mbp, " ");
2334 lp++;
2335 }
2336 }
2337