1 /*-
2 * See the file LICENSE for redistribution information.
3 *
4 * Copyright (c) 1996, 2013 Oracle and/or its affiliates. All rights reserved.
5 */
6 /*
7 * Copyright (c) 1995, 1996
8 * Margo Seltzer. All rights reserved.
9 */
10 /*
11 * Copyright (c) 1995, 1996
12 * The President and Fellows of Harvard University. All rights reserved.
13 *
14 * This code is derived from software contributed to Berkeley by
15 * Margo Seltzer.
16 *
17 * Redistribution and use in source and binary forms, with or without
18 * modification, are permitted provided that the following conditions
19 * are met:
20 * 1. Redistributions of source code must retain the above copyright
21 * notice, this list of conditions and the following disclaimer.
22 * 2. Redistributions in binary form must reproduce the above copyright
23 * notice, this list of conditions and the following disclaimer in the
24 * documentation and/or other materials provided with the distribution.
25 * 3. Neither the name of the University nor the names of its contributors
26 * may be used to endorse or promote products derived from this software
27 * without specific prior written permission.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39 * SUCH DAMAGE.
40 *
41 * $Id$
42 */
43
44 #include "db_config.h"
45
46 #include "db_int.h"
47 #include "dbinc/db_page.h"
48 #include "dbinc/btree.h"
49 #include "dbinc/hash.h"
50 #include "dbinc/mp.h"
51
52 static int __ham_alloc_pages __P((DBC *, __ham_groupalloc_args *, DB_LSN *));
53 static int __ham_alloc_pages_42
54 __P((DBC *, __ham_groupalloc_42_args *, DB_LSN *));
55 static int __ham_chgpg_recover_func
56 __P((DBC *, DBC *, u_int32_t *, db_pgno_t, u_int32_t, void *));
57
58 /*
59 * __ham_insdel_recover --
60 *
61 * PUBLIC: int __ham_insdel_recover
62 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
63 */
64 int
__ham_insdel_recover(env,dbtp,lsnp,op,info)65 __ham_insdel_recover(env, dbtp, lsnp, op, info)
66 ENV *env;
67 DBT *dbtp;
68 DB_LSN *lsnp;
69 db_recops op;
70 void *info;
71 {
72 __ham_insdel_args *argp;
73 DB_THREAD_INFO *ip;
74 DB *file_dbp;
75 DBC *dbc;
76 DB_MPOOLFILE *mpf;
77 PAGE *pagep;
78 db_indx_t dindx;
79 int cmp_n, cmp_p, ret;
80
81 ip = ((DB_TXNHEAD *)info)->thread_info;
82 pagep = NULL;
83 REC_PRINT(__ham_insdel_print);
84 REC_INTRO(__ham_insdel_read, ip, 1);
85
86 if ((ret = __memp_fget(mpf, &argp->pgno, ip, NULL,
87 0, &pagep)) != 0) {
88 if (DB_UNDO(op)) {
89 if (ret == DB_PAGE_NOTFOUND)
90 goto done;
91 else {
92 ret = __db_pgerr(file_dbp, argp->pgno, ret);
93 goto out;
94 }
95 }
96 /* If the page is not here then it was later truncated. */
97 if (!IS_ZERO_LSN(argp->pagelsn))
98 goto done;
99 /*
100 * This page was created by a group allocation and
101 * the file may not have been extend yet.
102 * Create the page if necessary.
103 */
104 if ((ret = __memp_fget(mpf, &argp->pgno, ip, NULL,
105 DB_MPOOL_CREATE, &pagep)) != 0) {
106 ret = __db_pgerr(file_dbp, argp->pgno, ret);
107 goto out;
108 }
109 }
110
111 cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
112 cmp_p = LOG_COMPARE(&LSN(pagep), &argp->pagelsn);
113 CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->pagelsn);
114
115 /*
116 * Two possible things going on:
117 * redo a delete/undo a put: delete the item from the page.
118 * redo a put/undo a delete: add the item to the page.
119 * If we are undoing a delete, then the information logged is the
120 * entire entry off the page, not just the data of a dbt. In
121 * this case, we want to copy it back onto the page verbatim.
122 * We do this by calling __insertpair with the type H_OFFPAGE instead
123 * of H_KEYDATA.
124 */
125 if ((argp->opcode == DELPAIR && cmp_n == 0 && DB_UNDO(op)) ||
126 (argp->opcode == PUTPAIR && cmp_p == 0 && DB_REDO(op))) {
127 /*
128 * Need to redo a PUT or undo a delete.
129 */
130 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
131 dindx = (db_indx_t)argp->ndx;
132 if ((ret = __ham_insertpair(dbc, pagep, &dindx, &argp->key,
133 &argp->data, OP_MODE_GET(argp->keytype),
134 OP_MODE_GET(argp->datatype))) != 0)
135 goto out;
136 LSN(pagep) = DB_REDO(op) ? *lsnp : argp->pagelsn;
137 } else if ((argp->opcode == DELPAIR && cmp_p == 0 && DB_REDO(op)) ||
138 (argp->opcode == PUTPAIR && cmp_n == 0 && DB_UNDO(op))) {
139 /* Need to undo a put or redo a delete. */
140 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
141 __ham_dpair(file_dbp, pagep, argp->ndx);
142 LSN(pagep) = DB_REDO(op) ? *lsnp : argp->pagelsn;
143 }
144
145 if ((ret = __memp_fput(mpf, ip, pagep, file_dbp->priority)) != 0)
146 goto out;
147 pagep = NULL;
148
149 /* Return the previous LSN. */
150 done: *lsnp = argp->prev_lsn;
151 ret = 0;
152
153 out: if (pagep != NULL)
154 (void)__memp_fput(mpf, ip, pagep, file_dbp->priority);
155 REC_CLOSE;
156 }
157
158 /*
159 * __ham_insdel_42_recover --
160 *
161 * PUBLIC: int __ham_insdel_42_recover
162 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
163 */
164 int
__ham_insdel_42_recover(env,dbtp,lsnp,op,info)165 __ham_insdel_42_recover(env, dbtp, lsnp, op, info)
166 ENV *env;
167 DBT *dbtp;
168 DB_LSN *lsnp;
169 db_recops op;
170 void *info;
171 {
172 __ham_insdel_42_args *argp;
173 DB_THREAD_INFO *ip;
174 DB *file_dbp;
175 DBC *dbc;
176 DB_MPOOLFILE *mpf;
177 PAGE *pagep;
178 db_indx_t dindx;
179 u_int32_t dtype, ktype, opcode;
180 int cmp_n, cmp_p, ret;
181
182 ip = ((DB_TXNHEAD *)info)->thread_info;
183 pagep = NULL;
184 REC_PRINT(__ham_insdel_print);
185 REC_INTRO(__ham_insdel_42_read, ip, 1);
186
187 if ((ret = __memp_fget(mpf, &argp->pgno, ip, NULL,
188 0, &pagep)) != 0) {
189 if (DB_UNDO(op)) {
190 if (ret == DB_PAGE_NOTFOUND)
191 goto done;
192 else {
193 ret = __db_pgerr(file_dbp, argp->pgno, ret);
194 goto out;
195 }
196 }
197 /* If the page is not here then it was later truncated. */
198 if (!IS_ZERO_LSN(argp->pagelsn))
199 goto done;
200 /*
201 * This page was created by a group allocation and
202 * the file may not have been extend yet.
203 * Create the page if necessary.
204 */
205 if ((ret = __memp_fget(mpf, &argp->pgno, ip, NULL,
206 DB_MPOOL_CREATE, &pagep)) != 0) {
207 ret = __db_pgerr(file_dbp, argp->pgno, ret);
208 goto out;
209 }
210 }
211
212 cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
213 cmp_p = LOG_COMPARE(&LSN(pagep), &argp->pagelsn);
214 CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->pagelsn);
215
216 /*
217 * Two possible things going on:
218 * redo a delete/undo a put: delete the item from the page.
219 * redo a put/undo a delete: add the item to the page.
220 * If we are undoing a delete, then the information logged is the
221 * entire entry off the page, not just the data of a dbt. In
222 * this case, we want to copy it back onto the page verbatim.
223 * We do this by calling __insertpair with the type H_OFFPAGE instead
224 * of H_KEYDATA.
225 */
226 opcode = OPCODE_OF(argp->opcode);
227 if ((opcode == DELPAIR && cmp_n == 0 && DB_UNDO(op)) ||
228 (opcode == PUTPAIR && cmp_p == 0 && DB_REDO(op))) {
229 /*
230 * Need to redo a PUT or undo a delete.
231 */
232 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
233 ktype = DB_UNDO(op) || PAIR_ISKEYBIG(argp->opcode) ?
234 H_OFFPAGE : H_KEYDATA;
235 if (PAIR_ISDATADUP(argp->opcode))
236 dtype = H_DUPLICATE;
237 else if (DB_UNDO(op) || PAIR_ISDATABIG(argp->opcode))
238 dtype = H_OFFPAGE;
239 else
240 dtype = H_KEYDATA;
241 dindx = (db_indx_t)argp->ndx;
242 if ((ret = __ham_insertpair(dbc, pagep, &dindx,
243 &argp->key, &argp->data, ktype, dtype)) != 0)
244 goto out;
245 LSN(pagep) = DB_REDO(op) ? *lsnp : argp->pagelsn;
246 } else if ((opcode == DELPAIR && cmp_p == 0 && DB_REDO(op)) ||
247 (opcode == PUTPAIR && cmp_n == 0 && DB_UNDO(op))) {
248 /* Need to undo a put or redo a delete. */
249 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
250 __ham_dpair(file_dbp, pagep, argp->ndx);
251 LSN(pagep) = DB_REDO(op) ? *lsnp : argp->pagelsn;
252 }
253
254 if ((ret = __memp_fput(mpf, ip, pagep, file_dbp->priority)) != 0)
255 goto out;
256 pagep = NULL;
257
258 /* Return the previous LSN. */
259 done: *lsnp = argp->prev_lsn;
260 ret = 0;
261
262 out: if (pagep != NULL)
263 (void)__memp_fput(mpf, ip, pagep, file_dbp->priority);
264 REC_CLOSE;
265 }
266
267 /*
268 * __ham_newpage_recover --
269 * This log message is used when we add/remove overflow pages. This
270 * message takes care of the pointer chains, not the data on the pages.
271 *
272 * PUBLIC: int __ham_newpage_recover
273 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
274 */
275 int
__ham_newpage_recover(env,dbtp,lsnp,op,info)276 __ham_newpage_recover(env, dbtp, lsnp, op, info)
277 ENV *env;
278 DBT *dbtp;
279 DB_LSN *lsnp;
280 db_recops op;
281 void *info;
282 {
283 __ham_newpage_args *argp;
284 DB_THREAD_INFO *ip;
285 DB *file_dbp;
286 DBC *dbc;
287 DB_MPOOLFILE *mpf;
288 PAGE *pagep;
289 int change, cmp_n, cmp_p, ret;
290
291 ip = ((DB_TXNHEAD *)info)->thread_info;
292 pagep = NULL;
293 REC_PRINT(__ham_newpage_print);
294 REC_INTRO(__ham_newpage_read, ip, 0);
295
296 REC_FGET(mpf, ip, argp->new_pgno, &pagep, ppage);
297 change = 0;
298
299 /*
300 * There are potentially three pages we need to check: the one
301 * that we created/deleted, the one before it and the one after
302 * it.
303 */
304
305 cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
306 cmp_p = LOG_COMPARE(&LSN(pagep), &argp->pagelsn);
307 CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->pagelsn);
308 CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp);
309
310 if ((cmp_p == 0 && DB_REDO(op) && argp->opcode == PUTOVFL) ||
311 (cmp_n == 0 && DB_UNDO(op) && argp->opcode == DELOVFL)) {
312 /* Redo a create new page or undo a delete new page. */
313 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
314 P_INIT(pagep, file_dbp->pgsize, argp->new_pgno,
315 argp->prev_pgno, argp->next_pgno, 0, P_HASH);
316 change = 1;
317 } else if ((cmp_p == 0 && DB_REDO(op) && argp->opcode == DELOVFL) ||
318 (cmp_n == 0 && DB_UNDO(op) && argp->opcode == PUTOVFL)) {
319 /*
320 * Redo a delete or undo a create new page. All we
321 * really need to do is change the LSN.
322 */
323 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
324 change = 1;
325 }
326
327 if (change)
328 LSN(pagep) = DB_REDO(op) ? *lsnp : argp->pagelsn;
329
330 if ((ret = __memp_fput(mpf, ip, pagep, file_dbp->priority)) != 0)
331 goto out;
332 pagep = NULL;
333
334 /* Now do the prev page. */
335 ppage: if (argp->prev_pgno != PGNO_INVALID) {
336 REC_FGET(mpf, ip, argp->prev_pgno, &pagep, npage);
337
338 cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
339 cmp_p = LOG_COMPARE(&LSN(pagep), &argp->prevlsn);
340 CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->prevlsn);
341 CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp);
342 change = 0;
343
344 if ((cmp_p == 0 && DB_REDO(op) && argp->opcode == PUTOVFL) ||
345 (cmp_n == 0 && DB_UNDO(op) && argp->opcode == DELOVFL)) {
346 /* Redo a create new page or undo a delete new page. */
347 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
348 pagep->next_pgno = argp->new_pgno;
349 change = 1;
350 } else if ((cmp_p == 0 &&
351 DB_REDO(op) && argp->opcode == DELOVFL) ||
352 (cmp_n == 0 && DB_UNDO(op) && argp->opcode == PUTOVFL)) {
353 /* Redo a delete or undo a create new page. */
354 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
355 pagep->next_pgno = argp->next_pgno;
356 change = 1;
357 }
358
359 if (change)
360 LSN(pagep) = DB_REDO(op) ? *lsnp : argp->prevlsn;
361
362 if ((ret = __memp_fput(mpf,
363 ip, pagep, file_dbp->priority)) != 0)
364 goto out;
365 pagep = NULL;
366 }
367
368 /* Now time to do the next page */
369 npage: if (argp->next_pgno != PGNO_INVALID) {
370 REC_FGET(mpf, ip, argp->next_pgno, &pagep, done);
371
372 cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
373 cmp_p = LOG_COMPARE(&LSN(pagep), &argp->nextlsn);
374 CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->nextlsn);
375 CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp);
376 change = 0;
377
378 if ((cmp_p == 0 && DB_REDO(op) && argp->opcode == PUTOVFL) ||
379 (cmp_n == 0 && DB_UNDO(op) && argp->opcode == DELOVFL)) {
380 /* Redo a create new page or undo a delete new page. */
381 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
382 pagep->prev_pgno = argp->new_pgno;
383 change = 1;
384 } else if ((cmp_p == 0 &&
385 DB_REDO(op) && argp->opcode == DELOVFL) ||
386 (cmp_n == 0 && DB_UNDO(op) && argp->opcode == PUTOVFL)) {
387 /* Redo a delete or undo a create new page. */
388 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
389 pagep->prev_pgno = argp->prev_pgno;
390 change = 1;
391 }
392
393 if (change)
394 LSN(pagep) = DB_REDO(op) ? *lsnp : argp->nextlsn;
395
396 if ((ret = __memp_fput(mpf,
397 ip, pagep, file_dbp->priority)) != 0)
398 goto out;
399 pagep = NULL;
400 }
401 done: *lsnp = argp->prev_lsn;
402 ret = 0;
403
404 out: if (pagep != NULL)
405 (void)__memp_fput(mpf, ip, pagep, file_dbp->priority);
406 REC_CLOSE;
407 }
408
409 /*
410 * __ham_replace_recover --
411 * This log message refers to partial puts that are local to a single
412 * page. You can think of them as special cases of the more general
413 * insdel log message.
414 *
415 * PUBLIC: int __ham_replace_recover
416 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
417 */
418 int
__ham_replace_recover(env,dbtp,lsnp,op,info)419 __ham_replace_recover(env, dbtp, lsnp, op, info)
420 ENV *env;
421 DBT *dbtp;
422 DB_LSN *lsnp;
423 db_recops op;
424 void *info;
425 {
426 __ham_replace_args *argp;
427 DB_THREAD_INFO *ip;
428 DB *file_dbp;
429 DBC *dbc;
430 DB_MPOOLFILE *mpf;
431 DBT dbt;
432 PAGE *pagep;
433 u_int32_t change;
434 int cmp_n, cmp_p, is_plus, modified, off, ret;
435 u_int8_t *hk;
436
437 ip = ((DB_TXNHEAD *)info)->thread_info;
438 pagep = NULL;
439 REC_PRINT(__ham_replace_print);
440 REC_INTRO(__ham_replace_read, ip, 0);
441
442 REC_FGET(mpf, ip, argp->pgno, &pagep, done);
443
444 cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
445 cmp_p = LOG_COMPARE(&LSN(pagep), &argp->pagelsn);
446 CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->pagelsn);
447 CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp);
448
449 memset(&dbt, 0, sizeof(dbt));
450 modified = 0;
451
452 /*
453 * Before we know the direction of the transformation we will
454 * determine the size differential; then once we know if we are
455 * redoing or undoing, we'll adjust the sign (is_plus) appropriately.
456 */
457 if (argp->newitem.size > argp->olditem.size) {
458 change = argp->newitem.size - argp->olditem.size;
459 is_plus = 1;
460 } else {
461 change = argp->olditem.size - argp->newitem.size;
462 is_plus = 0;
463 }
464 /*
465 * When chaining from a "regular" record to an off page record
466 * the old record does not contain a header while the new record
467 * does and is at an offset of -1 relative to the data part of
468 * the record. We add this to the amount of the change (which is
469 * an absolute value). If we are undoing then the offset is not
470 * used in the placement of the data.
471 */
472 off = argp->off;
473 if (off < 0 &&
474 (OP_MODE_GET(argp->oldtype) == H_DUPLICATE ||
475 OP_MODE_GET(argp->oldtype) == H_KEYDATA)) {
476 change -= (u_int32_t)off;
477 if (DB_UNDO(op))
478 off = 0;
479 }
480 if (cmp_p == 0 && DB_REDO(op)) {
481 /* Reapply the change as specified. */
482 dbt.data = argp->newitem.data;
483 dbt.size = argp->newitem.size;
484 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
485 LSN(pagep) = *lsnp;
486 /*
487 * The is_plus flag is set properly to reflect
488 * newitem.size - olditem.size.
489 */
490 modified = 1;
491 } else if (cmp_n == 0 && DB_UNDO(op)) {
492 /* Undo the already applied change. */
493 dbt.data = argp->olditem.data;
494 dbt.size = argp->olditem.size;
495 /*
496 * Invert is_plus to reflect sign of
497 * olditem.size - newitem.size.
498 */
499 is_plus = !is_plus;
500 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
501 LSN(pagep) = argp->pagelsn;
502 modified = 1;
503 }
504
505 if (modified) {
506 __ham_onpage_replace(file_dbp, pagep,
507 argp->ndx, off, change, is_plus, &dbt);
508 if (argp->oldtype != argp->newtype) {
509 hk = P_ENTRY(file_dbp, pagep, argp->ndx);
510 if (DB_REDO(op))
511 HPAGE_PTYPE(hk) = OP_MODE_GET(argp->newtype);
512 else
513 HPAGE_PTYPE(hk) = OP_MODE_GET(argp->oldtype);
514 }
515 }
516
517 if ((ret = __memp_fput(mpf, ip, pagep, file_dbp->priority)) != 0)
518 goto out;
519 pagep = NULL;
520
521 done: *lsnp = argp->prev_lsn;
522 ret = 0;
523
524 out: if (pagep != NULL)
525 (void)__memp_fput(mpf, ip, pagep, file_dbp->priority);
526 REC_CLOSE;
527 }
528
529 /*
530 * __ham_replace_42_recover --
531 * This log message refers to partial puts that are local to a single
532 * page. You can think of them as special cases of the more general
533 * insdel log message.
534 *
535 * PUBLIC: int __ham_replace_42_recover
536 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
537 */
538 int
__ham_replace_42_recover(env,dbtp,lsnp,op,info)539 __ham_replace_42_recover(env, dbtp, lsnp, op, info)
540 ENV *env;
541 DBT *dbtp;
542 DB_LSN *lsnp;
543 db_recops op;
544 void *info;
545 {
546 __ham_replace_42_args *argp;
547 DB_THREAD_INFO *ip;
548 DB *file_dbp;
549 DBC *dbc;
550 DB_MPOOLFILE *mpf;
551 DBT dbt;
552 PAGE *pagep;
553 u_int32_t change;
554 int cmp_n, cmp_p, is_plus, modified, ret;
555 u_int8_t *hk;
556
557 ip = ((DB_TXNHEAD *)info)->thread_info;
558 pagep = NULL;
559 REC_PRINT(__ham_replace_print);
560 REC_INTRO(__ham_replace_42_read, ip, 0);
561
562 REC_FGET(mpf, ip, argp->pgno, &pagep, done);
563
564 cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
565 cmp_p = LOG_COMPARE(&LSN(pagep), &argp->pagelsn);
566 CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->pagelsn);
567 CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp);
568
569 memset(&dbt, 0, sizeof(dbt));
570 modified = 0;
571
572 /*
573 * Before we know the direction of the transformation we will
574 * determine the size differential; then once we know if we are
575 * redoing or undoing, we'll adjust the sign (is_plus) appropriately.
576 */
577 if (argp->newitem.size > argp->olditem.size) {
578 change = argp->newitem.size - argp->olditem.size;
579 is_plus = 1;
580 } else {
581 change = argp->olditem.size - argp->newitem.size;
582 is_plus = 0;
583 }
584 if (cmp_p == 0 && DB_REDO(op)) {
585 /* Reapply the change as specified. */
586 dbt.data = argp->newitem.data;
587 dbt.size = argp->newitem.size;
588 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
589 LSN(pagep) = *lsnp;
590 /*
591 * The is_plus flag is set properly to reflect
592 * newitem.size - olditem.size.
593 */
594 modified = 1;
595 } else if (cmp_n == 0 && DB_UNDO(op)) {
596 /* Undo the already applied change. */
597 dbt.data = argp->olditem.data;
598 dbt.size = argp->olditem.size;
599 /*
600 * Invert is_plus to reflect sign of
601 * olditem.size - newitem.size.
602 */
603 is_plus = !is_plus;
604 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
605 LSN(pagep) = argp->pagelsn;
606 modified = 1;
607 }
608
609 if (modified) {
610 __ham_onpage_replace(file_dbp, pagep,
611 argp->ndx, argp->off, change, is_plus, &dbt);
612 if (argp->makedup) {
613 hk = P_ENTRY(file_dbp, pagep, argp->ndx);
614 if (DB_REDO(op))
615 HPAGE_PTYPE(hk) = H_DUPLICATE;
616 else
617 HPAGE_PTYPE(hk) = H_KEYDATA;
618 }
619 }
620
621 if ((ret = __memp_fput(mpf, ip, pagep, file_dbp->priority)) != 0)
622 goto out;
623 pagep = NULL;
624
625 done: *lsnp = argp->prev_lsn;
626 ret = 0;
627
628 out: if (pagep != NULL)
629 (void)__memp_fput(mpf, ip, pagep, file_dbp->priority);
630 REC_CLOSE;
631 }
632
633 /*
634 * __ham_splitdata_recover --
635 *
636 * PUBLIC: int __ham_splitdata_recover
637 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
638 */
639 int
__ham_splitdata_recover(env,dbtp,lsnp,op,info)640 __ham_splitdata_recover(env, dbtp, lsnp, op, info)
641 ENV *env;
642 DBT *dbtp;
643 DB_LSN *lsnp;
644 db_recops op;
645 void *info;
646 {
647 __ham_splitdata_args *argp;
648 DB_THREAD_INFO *ip;
649 DB *file_dbp;
650 DBC *dbc;
651 DB_MPOOLFILE *mpf;
652 PAGE *pagep;
653 int cmp_n, cmp_p, ret;
654
655 ip = ((DB_TXNHEAD *)info)->thread_info;
656 pagep = NULL;
657 REC_PRINT(__ham_splitdata_print);
658 REC_INTRO(__ham_splitdata_read, ip, 1);
659
660 if ((ret = __memp_fget(mpf, &argp->pgno, ip, NULL, 0, &pagep)) != 0) {
661 if (DB_UNDO(op)) {
662 if (ret == DB_PAGE_NOTFOUND)
663 goto done;
664 else {
665 ret = __db_pgerr(file_dbp, argp->pgno, ret);
666 goto out;
667 }
668 }
669 /* If the page is not here then it was later truncated. */
670 if (!IS_ZERO_LSN(argp->pagelsn))
671 goto done;
672 /*
673 * This page was created by a group allocation and
674 * the file may not have been extend yet.
675 * Create the page if necessary.
676 */
677 if ((ret = __memp_fget(mpf, &argp->pgno,
678 ip, NULL, DB_MPOOL_CREATE, &pagep)) != 0) {
679 ret = __db_pgerr(file_dbp, argp->pgno, ret);
680 goto out;
681 }
682 }
683
684 cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
685 cmp_p = LOG_COMPARE(&LSN(pagep), &argp->pagelsn);
686 CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->pagelsn);
687 CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp);
688
689 /*
690 * There are three types of log messages here. Two are related
691 * to an actual page split operation, one for the old page
692 * and one for the new pages created. The original image in the
693 * SPLITOLD record is used for undo. The image in the SPLITNEW
694 * is used for redo. We should never have a case where there is
695 * a redo operation and the SPLITOLD record is on disk, but not
696 * the SPLITNEW record. Therefore, we only have work to do when
697 * redo NEW messages and undo OLD messages, but we have to update
698 * LSNs in both cases.
699 *
700 * The third message is generated when a page is sorted (SORTPAGE). In
701 * an undo the original image in the SORTPAGE is used. In a redo we
702 * recreate the sort operation by calling __ham_sort_page.
703 */
704 if (cmp_p == 0 && DB_REDO(op)) {
705 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
706 if (argp->opcode == SPLITNEW)
707 /* Need to redo the split described. */
708 memcpy(pagep, argp->pageimage.data,
709 argp->pageimage.size);
710 else if (argp->opcode == SORTPAGE) {
711 if ((ret = __ham_sort_page(dbc, NULL, pagep)) != 0)
712 goto out;
713 }
714 LSN(pagep) = *lsnp;
715 } else if (cmp_n == 0 && DB_UNDO(op)) {
716 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
717 if (argp->opcode == SPLITOLD || argp->opcode == SORTPAGE) {
718 /* Put back the old image. */
719 memcpy(pagep, argp->pageimage.data,
720 argp->pageimage.size);
721 } else
722 P_INIT(pagep, file_dbp->pgsize, argp->pgno,
723 PGNO_INVALID, PGNO_INVALID, 0, P_HASH);
724 LSN(pagep) = argp->pagelsn;
725 }
726 if ((ret = __memp_fput(mpf, ip, pagep, file_dbp->priority)) != 0)
727 goto out;
728 pagep = NULL;
729
730 done: *lsnp = argp->prev_lsn;
731 ret = 0;
732
733 out: if (pagep != NULL)
734 (void)__memp_fput(mpf, ip, pagep, file_dbp->priority);
735 REC_CLOSE;
736 }
737
738 /*
739 * __ham_copypage_recover --
740 * Recovery function for copypage.
741 *
742 * PUBLIC: int __ham_copypage_recover
743 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
744 */
745 int
__ham_copypage_recover(env,dbtp,lsnp,op,info)746 __ham_copypage_recover(env, dbtp, lsnp, op, info)
747 ENV *env;
748 DBT *dbtp;
749 DB_LSN *lsnp;
750 db_recops op;
751 void *info;
752 {
753 __ham_copypage_args *argp;
754 DB_THREAD_INFO *ip;
755 DB *file_dbp;
756 DBC *dbc;
757 DB_MPOOLFILE *mpf;
758 PAGE *pagep;
759 int cmp_n, cmp_p, ret;
760
761 ip = ((DB_TXNHEAD *)info)->thread_info;
762 pagep = NULL;
763 REC_PRINT(__ham_copypage_print);
764 REC_INTRO(__ham_copypage_read, ip, 0);
765
766 /* This is the bucket page. */
767 REC_FGET(mpf, ip, argp->pgno, &pagep, donext);
768
769 cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
770 cmp_p = LOG_COMPARE(&LSN(pagep), &argp->pagelsn);
771 CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->pagelsn);
772
773 if (cmp_p == 0 && DB_REDO(op)) {
774 /* Need to redo update described. */
775 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
776 memcpy(pagep, argp->page.data, argp->page.size);
777 PGNO(pagep) = argp->pgno;
778 PREV_PGNO(pagep) = PGNO_INVALID;
779 LSN(pagep) = *lsnp;
780 } else if (cmp_n == 0 && DB_UNDO(op)) {
781 /* Need to undo update described. */
782 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
783 P_INIT(pagep, file_dbp->pgsize, argp->pgno, PGNO_INVALID,
784 argp->next_pgno, 0, P_HASH);
785 LSN(pagep) = argp->pagelsn;
786 }
787 if ((ret = __memp_fput(mpf, ip, pagep, file_dbp->priority)) != 0)
788 goto out;
789 pagep = NULL;
790
791 donext: /* Now fix up the "next" page. */
792 REC_FGET(mpf, ip, argp->next_pgno, &pagep, do_nn);
793
794 /* For REDO just update the LSN. For UNDO copy page back. */
795 cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
796 cmp_p = LOG_COMPARE(&LSN(pagep), &argp->nextlsn);
797 CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->nextlsn);
798 CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp);
799 if (cmp_p == 0 && DB_REDO(op)) {
800 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
801 LSN(pagep) = *lsnp;
802 } else if (cmp_n == 0 && DB_UNDO(op)) {
803 /* Need to undo update described. */
804 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
805 memcpy(pagep, argp->page.data, argp->page.size);
806 }
807 if ((ret = __memp_fput(mpf, ip, pagep, file_dbp->priority)) != 0)
808 goto out;
809 pagep = NULL;
810
811 /* Now fix up the next's next page. */
812 do_nn: if (argp->nnext_pgno == PGNO_INVALID)
813 goto done;
814
815 REC_FGET(mpf, ip, argp->nnext_pgno, &pagep, done);
816
817 cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
818 cmp_p = LOG_COMPARE(&LSN(pagep), &argp->nnextlsn);
819 CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->nnextlsn);
820 CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp);
821
822 if (cmp_p == 0 && DB_REDO(op)) {
823 /* Need to redo update described. */
824 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
825 PREV_PGNO(pagep) = argp->pgno;
826 LSN(pagep) = *lsnp;
827 } else if (cmp_n == 0 && DB_UNDO(op)) {
828 /* Need to undo update described. */
829 REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
830 PREV_PGNO(pagep) = argp->next_pgno;
831 LSN(pagep) = argp->nnextlsn;
832 }
833 if ((ret = __memp_fput(mpf, ip, pagep, file_dbp->priority)) != 0)
834 goto out;
835 pagep = NULL;
836
837 done: *lsnp = argp->prev_lsn;
838 ret = 0;
839
840 out: if (pagep != NULL)
841 (void)__memp_fput(mpf, ip, pagep, file_dbp->priority);
842 REC_CLOSE;
843 }
844
845 /*
846 * __ham_metagroup_recover --
847 * Recovery function for metagroup.
848 *
849 * PUBLIC: int __ham_metagroup_recover
850 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
851 */
852 int
__ham_metagroup_recover(env,dbtp,lsnp,op,info)853 __ham_metagroup_recover(env, dbtp, lsnp, op, info)
854 ENV *env;
855 DBT *dbtp;
856 DB_LSN *lsnp;
857 db_recops op;
858 void *info;
859 {
860 __ham_metagroup_args *argp;
861 DB_THREAD_INFO *ip;
862 HASH_CURSOR *hcp;
863 DB *file_dbp;
864 DBMETA *mmeta;
865 DBC *dbc;
866 DB_MPOOLFILE *mpf;
867 PAGE *pagep;
868 db_pgno_t pgno;
869 int cmp_n, cmp_p, did_alloc, groupgrow, ret;
870
871 ip = ((DB_TXNHEAD *)info)->thread_info;
872 mmeta = NULL;
873 did_alloc = 0;
874 REC_PRINT(__ham_metagroup_print);
875 REC_INTRO(__ham_metagroup_read, ip, 1);
876
877 /*
878 * This logs the virtual create of pages pgno to pgno + bucket.
879 * The log record contains:
880 * bucket: old maximum bucket
881 * pgno: page number of the new bucket.
882 * We round up on log calculations, so we can figure out if we are
883 * about to double the hash table if argp->bucket+1 is a power of 2.
884 * If it is, then we are allocating an entire doubling of pages,
885 * otherwise, we are simply allocated one new page.
886 */
887 groupgrow =
888 (u_int32_t)(1 << __db_log2(argp->bucket + 1)) == argp->bucket + 1;
889 pgno = argp->pgno;
890 if (argp->newalloc)
891 pgno += argp->bucket;
892
893 pagep = NULL;
894 ret = __memp_fget(mpf, &pgno, ip, NULL, 0, &pagep);
895
896 /* If we are undoing, then we don't want to create the page. */
897 if (ret != 0 && DB_REDO(op))
898 ret = __memp_fget(mpf,
899 &pgno, ip, NULL, DB_MPOOL_CREATE, &pagep);
900 else if (ret == DB_PAGE_NOTFOUND)
901 goto do_meta;
902 if (ret != 0) {
903 if (ret != ENOSPC)
904 goto out;
905 pgno = 0;
906 goto do_meta;
907 }
908
909 /*
910 * When we get here then either we did not grow the file
911 * (groupgrow == 0) or we did grow the file and the allocation
912 * of those new pages succeeded.
913 */
914 did_alloc = groupgrow;
915
916 cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
917 cmp_p = LOG_COMPARE(&LSN(pagep), &argp->pagelsn);
918 CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->pagelsn);
919
920 if (cmp_p == 0 && DB_REDO(op)) {
921 REC_DIRTY(mpf, ip, dbc->priority, &pagep);
922 pagep->lsn = *lsnp;
923 } else if (cmp_n == 0 && DB_UNDO(op)) {
924 /* If this record allocated the pages give them back. */
925 if (argp->newalloc) {
926 if (pagep != NULL && (ret = __memp_fput(mpf,
927 ip, pagep, DB_PRIORITY_VERY_LOW)) != 0)
928 goto out;
929 pagep = NULL;
930 if ((ret = __memp_ftruncate(mpf, NULL, ip,
931 argp->pgno, 0)) != 0)
932 goto out;
933 } else {
934 /*
935 * Otherwise just roll the page back to its
936 * previous state.
937 */
938 REC_DIRTY(mpf, ip, dbc->priority, &pagep);
939 pagep->lsn = argp->pagelsn;
940 }
941 }
942 if (pagep != NULL &&
943 (ret = __memp_fput(mpf, ip, pagep, dbc->priority)) != 0)
944 goto out;
945
946 /*
947 * If a earlier aborted allocation used one of our pages it may
948 * be in the wrong state, read all the pages in the group and init
949 * them to be empty.
950 */
951 if (DB_REDO(op) && argp->newalloc) {
952 for (pgno = argp->pgno;
953 pgno < argp->pgno + argp->bucket; pgno++) {
954 if ((ret = __memp_fget(mpf,
955 &pgno, ip, NULL, DB_MPOOL_CREATE, &pagep)) != 0)
956 goto out;
957
958 if (IS_ZERO_LSN(LSN(pagep))) {
959 REC_DIRTY(mpf, ip, dbc->priority, &pagep);
960 P_INIT(pagep, file_dbp->pgsize,
961 PGNO_INVALID, PGNO_INVALID, PGNO_INVALID,
962 0, P_HASH);
963 }
964 if ((ret =
965 __memp_fput(mpf, ip, pagep, dbc->priority)) != 0)
966 goto out;
967 }
968 }
969
970 do_meta:
971 /* Now we have to update the meta-data page. */
972 hcp = (HASH_CURSOR *)dbc->internal;
973 if ((ret = __ham_get_meta(dbc)) != 0)
974 goto out;
975 cmp_n = LOG_COMPARE(lsnp, &hcp->hdr->dbmeta.lsn);
976 cmp_p = LOG_COMPARE(&hcp->hdr->dbmeta.lsn, &argp->metalsn);
977 CHECK_LSN(env, op, cmp_p, &hcp->hdr->dbmeta.lsn, &argp->metalsn);
978 CHECK_ABORT(env, op, cmp_n, &hcp->hdr->dbmeta.lsn, lsnp);
979 if (cmp_p == 0 && DB_REDO(op)) {
980 /* Redo the actual updating of bucket counts. */
981 REC_DIRTY(mpf, ip, dbc->priority, &hcp->hdr);
982 ++hcp->hdr->max_bucket;
983 if (groupgrow) {
984 hcp->hdr->low_mask = hcp->hdr->high_mask;
985 hcp->hdr->high_mask =
986 (argp->bucket + 1) | hcp->hdr->low_mask;
987 }
988 hcp->hdr->dbmeta.lsn = *lsnp;
989 } else if (cmp_n == 0 && DB_UNDO(op)) {
990 /* Undo the actual updating of bucket counts. */
991 REC_DIRTY(mpf, ip, dbc->priority, &hcp->hdr);
992 hcp->hdr->max_bucket = argp->bucket;
993 if (groupgrow) {
994 hcp->hdr->high_mask = argp->bucket;
995 hcp->hdr->low_mask = hcp->hdr->high_mask >> 1;
996 }
997 hcp->hdr->dbmeta.lsn = argp->metalsn;
998 }
999
1000 /*
1001 * Now we need to fix up the spares array. Each entry in the
1002 * spares array indicates the beginning page number for the
1003 * indicated doubling.
1004 */
1005 if (cmp_p == 0 && did_alloc && !DB_UNDO(op)) {
1006 REC_DIRTY(mpf, ip, dbc->priority, &hcp->hdr);
1007 hcp->hdr->spares[__db_log2(argp->bucket + 1) + 1] =
1008 (argp->pgno - argp->bucket) - 1;
1009 }
1010 if (cmp_n == 0 && groupgrow && DB_UNDO(op)) {
1011 REC_DIRTY(mpf, ip, dbc->priority, &hcp->hdr);
1012 hcp->hdr->spares[
1013 __db_log2(argp->bucket + 1) + 1] = PGNO_INVALID;
1014 }
1015
1016 /*
1017 * Finally, we need to potentially fix up the last_pgno field
1018 * in the master meta-data page (which may or may not be the
1019 * same as the hash header page).
1020 */
1021 if (argp->mmpgno != argp->mpgno) {
1022 if ((ret = __memp_fget(mpf,
1023 &argp->mmpgno, ip, NULL, DB_MPOOL_EDIT, &mmeta)) != 0) {
1024 if (DB_UNDO(op) && ret == DB_PAGE_NOTFOUND)
1025 ret = 0;
1026 goto out;
1027 }
1028 cmp_n = LOG_COMPARE(lsnp, &mmeta->lsn);
1029 cmp_p = LOG_COMPARE(&mmeta->lsn, &argp->mmetalsn);
1030 if (cmp_p == 0 && DB_REDO(op)) {
1031 REC_DIRTY(mpf, ip, dbc->priority, &mmeta);
1032 mmeta->lsn = *lsnp;
1033 } else if (cmp_n == 0 && DB_UNDO(op)) {
1034 REC_DIRTY(mpf, ip, dbc->priority, &mmeta);
1035 mmeta->lsn = argp->mmetalsn;
1036 }
1037 } else {
1038 mmeta = (DBMETA *)hcp->hdr;
1039 REC_DIRTY(mpf, ip, dbc->priority, &mmeta);
1040 }
1041
1042 if (cmp_n == 0 && DB_UNDO(op))
1043 mmeta->last_pgno = argp->last_pgno;
1044 else if (cmp_p == 0 && DB_REDO(op) && mmeta->last_pgno < pgno)
1045 mmeta->last_pgno = pgno;
1046
1047 if (argp->mmpgno != argp->mpgno &&
1048 (ret = __memp_fput(mpf, ip, mmeta, dbc->priority)) != 0)
1049 goto out;
1050 mmeta = NULL;
1051
1052 done: *lsnp = argp->prev_lsn;
1053 ret = 0;
1054
1055 out: if (mmeta != NULL)
1056 (void)__memp_fput(mpf, ip, mmeta, dbc->priority);
1057 if (dbc != NULL)
1058 (void)__ham_release_meta(dbc);
1059
1060 REC_CLOSE;
1061 }
1062
1063 /*
1064 * __ham_contract_recover --
1065 * Recovery function for contracting a hash table
1066 *
1067 * PUBLIC: int __ham_contract_recover
1068 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
1069 */
1070 int
__ham_contract_recover(env,dbtp,lsnp,op,info)1071 __ham_contract_recover(env, dbtp, lsnp, op, info)
1072 ENV *env;
1073 DBT *dbtp;
1074 DB_LSN *lsnp;
1075 db_recops op;
1076 void *info;
1077 {
1078 __ham_contract_args *argp;
1079 DB_THREAD_INFO *ip;
1080 DB_MPOOLFILE *mpf;
1081 DB *file_dbp;
1082 DBC *dbc;
1083 HASH_CURSOR *hcp;
1084 HMETA *meta;
1085 int cmp_n, cmp_p, ret, t_ret;
1086
1087 ip = ((DB_TXNHEAD *)info)->thread_info;
1088 REC_PRINT(__ham_contract_print);
1089 REC_INTRO(__ham_contract_read, ip, 1);
1090
1091 hcp = (HASH_CURSOR *)dbc->internal;
1092 if ((ret = __ham_get_meta(dbc)) != 0)
1093 goto done;
1094 meta = hcp->hdr;
1095 cmp_n = LOG_COMPARE(lsnp, &meta->dbmeta.lsn);
1096 cmp_p = LOG_COMPARE(&meta->dbmeta.lsn, &argp->meta_lsn);
1097 CHECK_LSN(env, op, cmp_p, &meta->dbmeta.lsn, &argp->meta_lsn);
1098 if (cmp_p == 0 && DB_REDO(op)) {
1099 REC_DIRTY(mpf, ip, dbc->priority, &hcp->hdr);
1100 meta = hcp->hdr;
1101 meta->max_bucket = argp->bucket - 1;
1102 if (argp->bucket == meta->low_mask + 1) {
1103 meta->spares[
1104 __db_log2(argp->bucket) + 1] = PGNO_INVALID;
1105 meta->high_mask = meta->low_mask;
1106 meta->low_mask >>= 1;
1107 }
1108 meta->dbmeta.lsn = *lsnp;
1109 } else if (cmp_n == 0 && DB_UNDO(op)) {
1110 REC_DIRTY(mpf, ip, dbc->priority, &hcp->hdr);
1111 meta = hcp->hdr;
1112 meta->max_bucket = argp->bucket;
1113 if (argp->bucket == meta->high_mask + 1) {
1114 meta->spares[__db_log2(argp->bucket) + 1] =
1115 argp->pgno - argp->bucket;
1116 meta->low_mask = meta->high_mask;
1117 meta->high_mask = meta->max_bucket | meta->low_mask;
1118 }
1119 meta->dbmeta.lsn = argp->meta_lsn;
1120 }
1121 *lsnp = argp->prev_lsn;
1122
1123 out: if ((t_ret = __ham_release_meta(dbc)) != 0 && ret == 0)
1124 ret = t_ret;
1125 done: REC_CLOSE;
1126 }
1127
1128 /*
1129 * __ham_groupalloc_recover --
1130 * Recover the batch creation of a set of pages for a new database.
1131 *
1132 * PUBLIC: int __ham_groupalloc_recover
1133 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
1134 */
1135 int
__ham_groupalloc_recover(env,dbtp,lsnp,op,info)1136 __ham_groupalloc_recover(env, dbtp, lsnp, op, info)
1137 ENV *env;
1138 DBT *dbtp;
1139 DB_LSN *lsnp;
1140 db_recops op;
1141 void *info;
1142 {
1143 __ham_groupalloc_args *argp;
1144 DB_THREAD_INFO *ip;
1145 DBMETA *mmeta;
1146 DB_MPOOLFILE *mpf;
1147 DB *file_dbp;
1148 DBC *dbc;
1149 PAGE *pagep;
1150 db_pgno_t pgno;
1151 int cmp_n, cmp_p, ret;
1152
1153 ip = ((DB_TXNHEAD *)info)->thread_info;
1154 mmeta = NULL;
1155 REC_PRINT(__ham_groupalloc_print);
1156 REC_INTRO(__ham_groupalloc_read, ip, 1);
1157
1158 pgno = PGNO_BASE_MD;
1159 if ((ret = __memp_fget(mpf, &pgno, ip, NULL, 0, &mmeta)) != 0) {
1160 if (DB_REDO(op)) {
1161 ret = __db_pgerr(file_dbp, pgno, ret);
1162 goto out;
1163 } else
1164 goto done;
1165 }
1166
1167 cmp_n = LOG_COMPARE(lsnp, &LSN(mmeta));
1168 cmp_p = LOG_COMPARE(&LSN(mmeta), &argp->meta_lsn);
1169 CHECK_LSN(env, op, cmp_p, &LSN(mmeta), &argp->meta_lsn);
1170 CHECK_ABORT(env, op, cmp_n, &LSN(mmeta), lsnp);
1171
1172 /*
1173 * Basically, we used mpool to allocate a chunk of pages.
1174 * We need to either add those to a free list (in the undo
1175 * case) or initialize them (in the redo case).
1176 *
1177 * If we are redoing and this is a hash subdatabase, it's possible
1178 * that the pages were never allocated, so we'd better check for
1179 * that and handle it here.
1180 */
1181 pgno = argp->start_pgno + argp->num - 1;
1182 if (DB_REDO(op)) {
1183 if ((ret = __ham_alloc_pages(dbc, argp, lsnp)) != 0)
1184 goto out;
1185 if (cmp_p == 0) {
1186 REC_DIRTY(mpf, ip, file_dbp->priority, &mmeta);
1187 LSN(mmeta) = *lsnp;
1188 }
1189 } else if (DB_UNDO(op)) {
1190 /*
1191 * Fetch the last page and determine if it is in
1192 * the post allocation state.
1193 */
1194 pagep = NULL;
1195 if ((ret = __memp_fget(mpf, &pgno,
1196 ip, NULL, DB_MPOOL_EDIT, &pagep)) == 0) {
1197 if (LOG_COMPARE(&pagep->lsn, lsnp) != 0) {
1198 if ((ret = __memp_fput(mpf, ip,
1199 pagep, DB_PRIORITY_VERY_LOW)) != 0)
1200 goto out;
1201 pagep = NULL;
1202 }
1203 } else if (ret != DB_PAGE_NOTFOUND)
1204 goto out;
1205 /*
1206 * If the last page was allocated then truncate back
1207 * to the first page.
1208 */
1209 if (pagep != NULL) {
1210 if ((ret = __memp_fput(mpf, ip,
1211 pagep, DB_PRIORITY_VERY_LOW)) != 0)
1212 goto out;
1213 if ((ret = __memp_ftruncate(mpf, NULL,
1214 ip, argp->start_pgno, 0)) != 0)
1215 goto out;
1216 }
1217
1218 /*
1219 * If we are rolling back the metapage, then make
1220 * sure it reflects the the correct last_pgno.
1221 */
1222 if (cmp_n == 0) {
1223 REC_DIRTY(mpf, ip, file_dbp->priority, &mmeta);
1224 mmeta->last_pgno = argp->last_pgno;
1225 }
1226 pgno = 0;
1227 if (cmp_n == 0) {
1228 REC_DIRTY(mpf, ip, file_dbp->priority, &mmeta);
1229 LSN(mmeta) = argp->meta_lsn;
1230 }
1231 }
1232
1233 /*
1234 * Set the last page number to the current value.
1235 */
1236 if (pgno > mmeta->last_pgno) {
1237 REC_DIRTY(mpf, ip, file_dbp->priority, &mmeta);
1238 mmeta->last_pgno = pgno;
1239 }
1240
1241 done: if (ret == 0)
1242 *lsnp = argp->prev_lsn;
1243 ret = 0;
1244
1245 out: if (mmeta != NULL)
1246 (void)__memp_fput(mpf, ip, mmeta, file_dbp->priority);
1247
1248 REC_CLOSE;
1249 }
1250
1251 /*
1252 * __ham_alloc_pages --
1253 *
1254 * Called during redo of a file create. We create new pages in the file
1255 * using the MPOOL_NEW_GROUP flag. We then log the meta-data page with a
1256 * __crdel_metasub message. If we manage to crash without the newly written
1257 * pages getting to disk (I'm not sure this can happen anywhere except our
1258 * test suite?!), then we need to go through a recreate the final pages.
1259 * Hash normally has holes in its files and handles them appropriately.
1260 */
1261 static int
__ham_alloc_pages(dbc,argp,lsnp)1262 __ham_alloc_pages(dbc, argp, lsnp)
1263 DBC *dbc;
1264 __ham_groupalloc_args *argp;
1265 DB_LSN *lsnp;
1266 {
1267 DB *file_dbp;
1268 DB_MPOOLFILE *mpf;
1269 DB_THREAD_INFO *ip;
1270 PAGE *pagep;
1271 db_pgno_t pgno;
1272 int ret;
1273
1274 file_dbp = dbc->dbp;
1275 mpf = file_dbp->mpf;
1276 ip = dbc->thread_info;
1277
1278 /* Read the last page of the allocation. */
1279 pgno = argp->start_pgno + argp->num - 1;
1280
1281 /* If the page exists, and it has been initialized, then we're done. */
1282 if ((ret =
1283 __memp_fget(mpf, &pgno, ip, NULL, 0, &pagep)) == 0) {
1284 if (NUM_ENT(pagep) == 0 && IS_ZERO_LSN(pagep->lsn))
1285 goto reinit_page;
1286 return (__memp_fput(mpf, ip, pagep, dbc->priority));
1287 }
1288
1289 /* Had to create the page. */
1290 if ((ret = __memp_fget(mpf, &pgno,
1291 ip, NULL, DB_MPOOL_CREATE, &pagep)) != 0)
1292 return (__db_pgerr(dbc->dbp, pgno, ret));
1293
1294 reinit_page:
1295 /* Initialize the newly allocated page. */
1296 REC_DIRTY(mpf, ip, dbc->priority, &pagep);
1297 P_INIT(pagep, dbc->dbp->pgsize,
1298 pgno, PGNO_INVALID, PGNO_INVALID, 0, P_HASH);
1299 pagep->lsn = *lsnp;
1300
1301 out: return (__memp_fput(mpf, ip, pagep, dbc->priority));
1302 }
1303
1304 /*
1305 * __ham_changeslot_recover --
1306 * Recovery function for changeslot.
1307 * When we compact a hash database we may change one of the spares slots
1308 * to point at a new block of pages.
1309 *
1310 * PUBLIC: int __ham_changeslot_recover
1311 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
1312 */
1313 int
__ham_changeslot_recover(env,dbtp,lsnp,op,info)1314 __ham_changeslot_recover(env, dbtp, lsnp, op, info)
1315 ENV *env;
1316 DBT *dbtp;
1317 DB_LSN *lsnp;
1318 db_recops op;
1319 void *info;
1320 {
1321 __ham_changeslot_args *argp;
1322 DB *file_dbp;
1323 DBC *dbc;
1324 DB_MPOOLFILE *mpf;
1325 DB_THREAD_INFO *ip;
1326 HASH_CURSOR *hcp;
1327 HMETA *meta;
1328 u_int32_t bucket;
1329 int cmp_n, cmp_p, ret;
1330
1331 ip = ((DB_TXNHEAD *)info)->thread_info;
1332
1333 REC_PRINT(__ham_changeslot_print);
1334 REC_INTRO(__ham_changeslot_read, ip, 1);
1335
1336 hcp = (HASH_CURSOR *)dbc->internal;
1337 if ((ret = __ham_get_meta(dbc)) != 0)
1338 goto out;
1339 meta = hcp->hdr;
1340 cmp_n = log_compare(lsnp, &LSN(meta));
1341 cmp_p = log_compare(&LSN(meta), &argp->meta_lsn);
1342
1343 bucket = argp->slot == 0 ? 0 : 1 << (argp->slot - 1);
1344 if (cmp_p == 0 && DB_REDO(op)) {
1345 REC_DIRTY(mpf, ip, dbc->priority, &hcp->hdr);
1346 meta = hcp->hdr;
1347 meta->spares[argp->slot] = argp->new - bucket;
1348 LSN(meta) = *lsnp;
1349 } else if (cmp_n == 0 && !DB_REDO(op)) {
1350 REC_DIRTY(mpf, ip, dbc->priority, &hcp->hdr);
1351 meta = hcp->hdr;
1352 meta->spares[argp->slot] = argp->old - bucket;
1353 LSN(meta) = argp->meta_lsn;
1354 }
1355 *lsnp = argp->prev_lsn;
1356 ret = __ham_release_meta(dbc);
1357
1358 done:
1359 out: REC_CLOSE;
1360 }
1361
1362 /*
1363 * __ham_curadj_recover --
1364 * Undo cursor adjustments if a subtransaction fails.
1365 *
1366 * PUBLIC: int __ham_curadj_recover
1367 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
1368 */
1369 int
__ham_curadj_recover(env,dbtp,lsnp,op,info)1370 __ham_curadj_recover(env, dbtp, lsnp, op, info)
1371 ENV *env;
1372 DBT *dbtp;
1373 DB_LSN *lsnp;
1374 db_recops op;
1375 void *info;
1376 {
1377 __ham_curadj_args *argp;
1378 db_ham_curadj mode, hamc_mode;
1379 DB_THREAD_INFO *ip;
1380 DB_MPOOLFILE *mpf;
1381 DB *file_dbp;
1382 DBC *dbc;
1383 HASH_CURSOR *hcp;
1384 int ret;
1385
1386 ip = ((DB_TXNHEAD *)info)->thread_info;
1387 REC_PRINT(__ham_curadj_print);
1388 REC_INTRO(__ham_curadj_read, ip, 1);
1389
1390 if (op != DB_TXN_ABORT)
1391 goto done;
1392
1393 mode = (db_ham_curadj)argp->add;
1394
1395 /*
1396 * Reverse the logged operation, so that the consequences are reversed
1397 * by the __hamc_update code.
1398 */
1399 switch (mode) {
1400 case DB_HAM_CURADJ_DEL:
1401 hamc_mode = DB_HAM_CURADJ_ADD;
1402 break;
1403 case DB_HAM_CURADJ_ADD:
1404 hamc_mode = DB_HAM_CURADJ_DEL;
1405 break;
1406 case DB_HAM_CURADJ_ADDMOD:
1407 hamc_mode = DB_HAM_CURADJ_DELMOD;
1408 break;
1409 case DB_HAM_CURADJ_DELMOD:
1410 hamc_mode = DB_HAM_CURADJ_ADDMOD;
1411 break;
1412 default:
1413 __db_errx(env, DB_STR("1122",
1414 "Invalid flag in __ham_curadj_recover"));
1415 ret = EINVAL;
1416 goto out;
1417 }
1418
1419 /*
1420 * Undo the adjustment by reinitializing the the cursor to look like
1421 * the one that was used to do the adjustment, then we invert the
1422 * add so that undo the adjustment.
1423 */
1424 hcp = (HASH_CURSOR *)dbc->internal;
1425 hcp->pgno = argp->pgno;
1426 hcp->indx = argp->indx;
1427 hcp->dup_off = argp->dup_off;
1428 hcp->order = argp->order;
1429 if (mode == DB_HAM_CURADJ_DEL)
1430 F_SET(hcp, H_DELETED);
1431 (void)__hamc_update(dbc, argp->len, hamc_mode, argp->is_dup);
1432
1433 done: *lsnp = argp->prev_lsn;
1434 out: REC_CLOSE;
1435 }
1436
1437 static int
__ham_chgpg_recover_func(cp,my_dbc,countp,pgno,indx,vargs)1438 __ham_chgpg_recover_func(cp, my_dbc, countp, pgno, indx, vargs)
1439 DBC *cp, *my_dbc;
1440 u_int32_t *countp;
1441 db_pgno_t pgno;
1442 u_int32_t indx;
1443 void *vargs;
1444 {
1445 BTREE_CURSOR *opdcp;
1446 HASH_CURSOR *lcp;
1447 u_int32_t order;
1448 int ret;
1449 __ham_chgpg_args *argp;
1450
1451 COMPQUIET(my_dbc, NULL);
1452 COMPQUIET(countp, NULL);
1453 COMPQUIET(pgno, 0);
1454 lcp = (HASH_CURSOR *)cp->internal;
1455 argp = vargs;
1456
1457 /* Overloaded field for DB_HAM_DEL*PG */
1458 order = argp->new_indx;
1459
1460 switch (argp->mode) {
1461 case DB_HAM_DELFIRSTPG:
1462 if (lcp->pgno != argp->new_pgno ||
1463 MVCC_SKIP_CURADJ(cp, lcp->pgno))
1464 break;
1465 if (lcp->indx != indx ||
1466 !F_ISSET(lcp, H_DELETED) ||
1467 lcp->order >= order) {
1468 lcp->pgno = argp->old_pgno;
1469 if (lcp->indx == indx)
1470 lcp->order -= order;
1471 }
1472 break;
1473 case DB_HAM_DELMIDPG:
1474 case DB_HAM_DELLASTPG:
1475 if (lcp->pgno == argp->new_pgno &&
1476 lcp->indx == indx &&
1477 F_ISSET(lcp, H_DELETED) &&
1478 lcp->order >= order &&
1479 !MVCC_SKIP_CURADJ(cp, lcp->pgno)) {
1480 lcp->pgno = argp->old_pgno;
1481 lcp->order -= order;
1482 lcp->indx = 0;
1483 }
1484 break;
1485 case DB_HAM_CHGPG:
1486 /*
1487 * If we're doing a CHGPG, we're undoing
1488 * the move of a non-deleted item to a
1489 * new page. Any cursors with the deleted
1490 * flag set do not belong to this item;
1491 * don't touch them.
1492 */
1493 if (F_ISSET(lcp, H_DELETED))
1494 break;
1495 /* FALLTHROUGH */
1496 case DB_HAM_SPLIT:
1497 if (lcp->pgno == argp->new_pgno &&
1498 lcp->indx == argp->new_indx &&
1499 !MVCC_SKIP_CURADJ(cp, lcp->pgno)) {
1500 lcp->indx = argp->old_indx;
1501 lcp->pgno = argp->old_pgno;
1502 }
1503 break;
1504 case DB_HAM_DUP:
1505 if (lcp->opd == NULL)
1506 break;
1507 opdcp = (BTREE_CURSOR *)lcp->opd->internal;
1508 if (opdcp->pgno != argp->new_pgno ||
1509 opdcp->indx != argp->new_indx ||
1510 MVCC_SKIP_CURADJ(lcp->opd, opdcp->pgno))
1511 break;
1512
1513 if (F_ISSET(opdcp, C_DELETED))
1514 F_SET(lcp, H_DELETED);
1515 /*
1516 * We can't close a cursor while we have the
1517 * dbp mutex locked, since c_close reacquires
1518 * it. It should be safe to drop the mutex
1519 * here, though, since newly opened cursors
1520 * are put only at the end of the tailq and
1521 * the cursor we're adjusting can't be closed
1522 * under us.
1523 */
1524 MUTEX_UNLOCK(cp->dbp->env, cp->dbp->mutex);
1525 ret = __dbc_close(lcp->opd);
1526 MUTEX_LOCK(cp->dbp->env, cp->dbp->mutex);
1527 if (ret != 0)
1528 return (ret);
1529 lcp->opd = NULL;
1530 break;
1531 }
1532 return (0);
1533 }
1534 /*
1535 * __ham_chgpg_recover --
1536 * Undo cursor adjustments if a subtransaction fails.
1537 *
1538 * PUBLIC: int __ham_chgpg_recover
1539 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
1540 */
1541 int
__ham_chgpg_recover(env,dbtp,lsnp,op,info)1542 __ham_chgpg_recover(env, dbtp, lsnp, op, info)
1543 ENV *env;
1544 DBT *dbtp;
1545 DB_LSN *lsnp;
1546 db_recops op;
1547 void *info;
1548 {
1549 __ham_chgpg_args *argp;
1550 DB_THREAD_INFO *ip;
1551 DB_MPOOLFILE *mpf;
1552 DB *file_dbp;
1553 DBC *dbc;
1554 int ret;
1555 u_int32_t count;
1556
1557 ip = ((DB_TXNHEAD *)info)->thread_info;
1558 REC_PRINT(__ham_chgpg_print);
1559 REC_INTRO(__ham_chgpg_read, ip, 0);
1560
1561 if (op != DB_TXN_ABORT)
1562 goto done;
1563
1564 ret = __db_walk_cursors(file_dbp, dbc,
1565 __ham_chgpg_recover_func, &count, 0, argp->old_indx, argp);
1566
1567 done: *lsnp = argp->prev_lsn;
1568 out: REC_CLOSE;
1569 }
1570
1571 /*
1572 * __ham_metagroup_recover --
1573 * Recovery function for metagroup.
1574 *
1575 * PUBLIC: int __ham_metagroup_42_recover
1576 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
1577 */
1578 int
__ham_metagroup_42_recover(env,dbtp,lsnp,op,info)1579 __ham_metagroup_42_recover(env, dbtp, lsnp, op, info)
1580 ENV *env;
1581 DBT *dbtp;
1582 DB_LSN *lsnp;
1583 db_recops op;
1584 void *info;
1585 {
1586 __ham_metagroup_42_args *argp;
1587 DB_THREAD_INFO *ip;
1588 HASH_CURSOR *hcp;
1589 DB *file_dbp;
1590 DBMETA *mmeta;
1591 DBC *dbc;
1592 DB_MPOOLFILE *mpf;
1593 PAGE *pagep;
1594 db_pgno_t pgno;
1595 u_int32_t flags;
1596 int cmp_n, cmp_p, did_alloc, groupgrow, ret;
1597
1598 ip = ((DB_TXNHEAD *)info)->thread_info;
1599 mmeta = NULL;
1600 did_alloc = 0;
1601 REC_PRINT(__ham_metagroup_42_print);
1602 REC_INTRO(__ham_metagroup_42_read, ip, 1);
1603
1604 /*
1605 * This logs the virtual create of pages pgno to pgno + bucket
1606 * If HAVE_FTRUNCATE is not supported the mpool page-allocation is not
1607 * transaction protected, we can never undo it. Even in an abort,
1608 * we have to allocate these pages to the hash table if they
1609 * were actually created. In particular, during disaster
1610 * recovery the metapage may be before this point if we
1611 * are rolling backward. If the file has not been extended
1612 * then the metapage could not have been updated.
1613 * The log record contains:
1614 * bucket: old maximum bucket
1615 * pgno: page number of the new bucket.
1616 * We round up on log calculations, so we can figure out if we are
1617 * about to double the hash table if argp->bucket+1 is a power of 2.
1618 * If it is, then we are allocating an entire doubling of pages,
1619 * otherwise, we are simply allocated one new page.
1620 */
1621 groupgrow =
1622 (u_int32_t)(1 << __db_log2(argp->bucket + 1)) == argp->bucket + 1;
1623 pgno = argp->pgno;
1624 if (argp->newalloc)
1625 pgno += argp->bucket;
1626
1627 flags = 0;
1628 pagep = NULL;
1629 LF_SET(DB_MPOOL_CREATE);
1630 ret = __memp_fget(mpf, &pgno, ip, NULL, flags, &pagep);
1631
1632 if (ret != 0) {
1633 if (ret != ENOSPC)
1634 goto out;
1635 pgno = 0;
1636 goto do_meta;
1637 }
1638
1639 /*
1640 * When we get here then either we did not grow the file
1641 * (groupgrow == 0) or we did grow the file and the allocation
1642 * of those new pages succeeded.
1643 */
1644 did_alloc = groupgrow;
1645
1646 cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
1647 cmp_p = LOG_COMPARE(&LSN(pagep), &argp->pagelsn);
1648 CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->pagelsn);
1649
1650 if (cmp_p == 0 && DB_REDO(op)) {
1651 REC_DIRTY(mpf, ip, dbc->priority, &pagep);
1652 pagep->lsn = *lsnp;
1653 } else if (cmp_n == 0 && DB_UNDO(op)) {
1654 /*
1655 * Otherwise just roll the page back to its
1656 * previous state.
1657 */
1658 REC_DIRTY(mpf, ip, dbc->priority, &pagep);
1659 pagep->lsn = argp->pagelsn;
1660 }
1661 if (pagep != NULL &&
1662 (ret = __memp_fput(mpf, ip, pagep, dbc->priority)) != 0)
1663 goto out;
1664
1665 do_meta:
1666 /* Now we have to update the meta-data page. */
1667 hcp = (HASH_CURSOR *)dbc->internal;
1668 if ((ret = __ham_get_meta(dbc)) != 0)
1669 goto out;
1670 cmp_n = LOG_COMPARE(lsnp, &hcp->hdr->dbmeta.lsn);
1671 cmp_p = LOG_COMPARE(&hcp->hdr->dbmeta.lsn, &argp->metalsn);
1672 CHECK_LSN(env, op, cmp_p, &hcp->hdr->dbmeta.lsn, &argp->metalsn);
1673 if (cmp_p == 0 && DB_REDO(op)) {
1674 /* Redo the actual updating of bucket counts. */
1675 REC_DIRTY(mpf, ip, dbc->priority, &hcp->hdr);
1676 ++hcp->hdr->max_bucket;
1677 if (groupgrow) {
1678 hcp->hdr->low_mask = hcp->hdr->high_mask;
1679 hcp->hdr->high_mask =
1680 (argp->bucket + 1) | hcp->hdr->low_mask;
1681 }
1682 hcp->hdr->dbmeta.lsn = *lsnp;
1683 } else if (cmp_n == 0 && DB_UNDO(op)) {
1684 /* Undo the actual updating of bucket counts. */
1685 REC_DIRTY(mpf, ip, dbc->priority, &hcp->hdr);
1686 hcp->hdr->max_bucket = argp->bucket;
1687 if (groupgrow) {
1688 hcp->hdr->high_mask = argp->bucket;
1689 hcp->hdr->low_mask = hcp->hdr->high_mask >> 1;
1690 }
1691 hcp->hdr->dbmeta.lsn = argp->metalsn;
1692 }
1693
1694 /*
1695 * Now we need to fix up the spares array. Each entry in the
1696 * spares array indicates the beginning page number for the
1697 * indicated doubling. We need to fill this in whenever the
1698 * spares array is invalid, if we never reclaim pages then
1699 * we have to allocate the pages to the spares array in both
1700 * the redo and undo cases.
1701 */
1702 if (did_alloc &&
1703 hcp->hdr->spares[__db_log2(argp->bucket + 1) + 1] == PGNO_INVALID) {
1704 REC_DIRTY(mpf, ip, dbc->priority, &hcp->hdr);
1705 hcp->hdr->spares[__db_log2(argp->bucket + 1) + 1] =
1706 (argp->pgno - argp->bucket) - 1;
1707 }
1708
1709 /*
1710 * Finally, we need to potentially fix up the last_pgno field
1711 * in the master meta-data page (which may or may not be the
1712 * same as the hash header page).
1713 */
1714 if (argp->mmpgno != argp->mpgno) {
1715 if ((ret = __memp_fget(mpf, &argp->mmpgno, ip, NULL,
1716 DB_MPOOL_EDIT, &mmeta)) != 0) {
1717 if (DB_UNDO(op) && ret == DB_PAGE_NOTFOUND)
1718 ret = 0;
1719 goto out;
1720 }
1721 cmp_n = LOG_COMPARE(lsnp, &mmeta->lsn);
1722 cmp_p = LOG_COMPARE(&mmeta->lsn, &argp->mmetalsn);
1723 if (cmp_p == 0 && DB_REDO(op)) {
1724 REC_DIRTY(mpf, ip, dbc->priority, &mmeta);
1725 mmeta->lsn = *lsnp;
1726 } else if (cmp_n == 0 && DB_UNDO(op)) {
1727 REC_DIRTY(mpf, ip, dbc->priority, &mmeta);
1728 mmeta->lsn = argp->mmetalsn;
1729 }
1730 } else {
1731 mmeta = (DBMETA *)hcp->hdr;
1732 REC_DIRTY(mpf, ip, dbc->priority, &mmeta);
1733 }
1734
1735 if (mmeta->last_pgno < pgno)
1736 mmeta->last_pgno = pgno;
1737
1738 if (argp->mmpgno != argp->mpgno &&
1739 (ret = __memp_fput(mpf, ip, mmeta, dbc->priority)) != 0)
1740 goto out;
1741 mmeta = NULL;
1742
1743 done: *lsnp = argp->prev_lsn;
1744 ret = 0;
1745
1746 out: if (mmeta != NULL)
1747 (void)__memp_fput(mpf, ip, mmeta, dbc->priority);
1748 if (dbc != NULL)
1749 (void)__ham_release_meta(dbc);
1750
1751 REC_CLOSE;
1752 }
1753
1754 /*
1755 * __ham_groupalloc_42_recover --
1756 * Recover the batch creation of a set of pages for a new database.
1757 *
1758 * PUBLIC: int __ham_groupalloc_42_recover
1759 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
1760 */
1761 int
__ham_groupalloc_42_recover(env,dbtp,lsnp,op,info)1762 __ham_groupalloc_42_recover(env, dbtp, lsnp, op, info)
1763 ENV *env;
1764 DBT *dbtp;
1765 DB_LSN *lsnp;
1766 db_recops op;
1767 void *info;
1768 {
1769 __ham_groupalloc_42_args *argp;
1770 DB_THREAD_INFO *ip;
1771 DBMETA *mmeta;
1772 DB_MPOOLFILE *mpf;
1773 DB *file_dbp;
1774 DBC *dbc;
1775 db_pgno_t pgno;
1776 int cmp_p, ret;
1777
1778 ip = ((DB_TXNHEAD *)info)->thread_info;
1779 mmeta = NULL;
1780 REC_PRINT(__ham_groupalloc_42_print);
1781 REC_INTRO(__ham_groupalloc_42_read, ip, 1);
1782
1783 pgno = PGNO_BASE_MD;
1784 if ((ret = __memp_fget(mpf, &pgno, ip, NULL, 0, &mmeta)) != 0) {
1785 if (DB_REDO(op)) {
1786 ret = __db_pgerr(file_dbp, pgno, ret);
1787 goto out;
1788 } else
1789 goto done;
1790 }
1791
1792 cmp_p = LOG_COMPARE(&LSN(mmeta), &argp->meta_lsn);
1793 CHECK_LSN(env, op, cmp_p, &LSN(mmeta), &argp->meta_lsn);
1794
1795 /*
1796 * Basically, we used mpool to allocate a chunk of pages.
1797 * We need to either add those to a free list (in the undo
1798 * case) or initialize them (in the redo case).
1799 *
1800 * If we are redoing and this is a hash subdatabase, it's possible
1801 * that the pages were never allocated, so we'd better check for
1802 * that and handle it here.
1803 */
1804 pgno = argp->start_pgno + argp->num - 1;
1805 if (DB_REDO(op)) {
1806 if ((ret = __ham_alloc_pages_42(dbc, argp, lsnp)) != 0)
1807 goto out;
1808 if (cmp_p == 0) {
1809 REC_DIRTY(mpf, ip, dbc->priority, &mmeta);
1810 LSN(mmeta) = *lsnp;
1811 }
1812 } else if (DB_UNDO(op)) {
1813 /*
1814 * We cannot roll back 4.2 style allocations.
1815 */
1816 __db_errx(env, DB_STR("1123",
1817 "Cannot replicate prepared transactions from master running release 4.2."));
1818 ret = __env_panic(env, EINVAL);
1819 goto out;
1820 }
1821
1822 /*
1823 * In both REDO and UNDO, we have grown the file and need to make
1824 * sure that last_pgno is correct. If we HAVE_FTRUNCATE pgno
1825 * will only be valid on REDO.
1826 */
1827 if (pgno > mmeta->last_pgno) {
1828 REC_DIRTY(mpf, ip, dbc->priority, &mmeta);
1829 mmeta->last_pgno = pgno;
1830 }
1831
1832 done: if (ret == 0)
1833 *lsnp = argp->prev_lsn;
1834 ret = 0;
1835
1836 out: if (mmeta != NULL)
1837 (void)__memp_fput(mpf, ip, mmeta, dbc->priority);
1838
1839 REC_CLOSE;
1840 }
1841
1842 /*
1843 * __ham_alloc_pages_42 --
1844 *
1845 * Called during redo of a file create. We create new pages in the file
1846 * using the MPOOL_NEW_GROUP flag. We then log the meta-data page with a
1847 * __crdel_metasub message. If we manage to crash without the newly written
1848 * pages getting to disk (I'm not sure this can happen anywhere except our
1849 * test suite?!), then we need to go through a recreate the final pages.
1850 * Hash normally has holes in its files and handles them appropriately.
1851 */
1852 static int
__ham_alloc_pages_42(dbc,argp,lsnp)1853 __ham_alloc_pages_42(dbc, argp, lsnp)
1854 DBC *dbc;
1855 __ham_groupalloc_42_args *argp;
1856 DB_LSN *lsnp;
1857 {
1858 DB_MPOOLFILE *mpf;
1859 DB_THREAD_INFO *ip;
1860 PAGE *pagep;
1861 db_pgno_t pgno;
1862 int ret;
1863
1864 mpf = dbc->dbp->mpf;
1865 ip = dbc->thread_info;
1866
1867 /* Read the last page of the allocation. */
1868 pgno = argp->start_pgno + argp->num - 1;
1869
1870 /* If the page exists, and it has been initialized, then we're done. */
1871 if ((ret = __memp_fget(mpf,
1872 &pgno, ip, NULL, 0, &pagep)) == 0) {
1873 if (NUM_ENT(pagep) == 0 && IS_ZERO_LSN(pagep->lsn))
1874 goto reinit_page;
1875 if ((ret = __memp_fput(mpf,
1876 ip, pagep, dbc->priority)) != 0)
1877 return (ret);
1878 return (0);
1879 }
1880
1881 /* Had to create the page. */
1882 if ((ret = __memp_fget(mpf, &pgno, ip, NULL,
1883 DB_MPOOL_CREATE | DB_MPOOL_DIRTY, &pagep)) != 0)
1884 return (__db_pgerr(dbc->dbp, pgno, ret));
1885
1886 reinit_page:
1887 /* Initialize the newly allocated page. */
1888 P_INIT(pagep,
1889 dbc->dbp->pgsize, pgno, PGNO_INVALID, PGNO_INVALID, 0, P_HASH);
1890 pagep->lsn = *lsnp;
1891
1892 if ((ret = __memp_fput(mpf, ip, pagep, dbc->priority)) != 0)
1893 return (ret);
1894
1895 return (0);
1896 }
1897