1 /*-
2  * See the file LICENSE for redistribution information.
3  *
4  * Copyright (c) 1996, 2013 Oracle and/or its affiliates.  All rights reserved.
5  */
6 /*
7  * Copyright (c) 1995, 1996
8  *	Margo Seltzer.  All rights reserved.
9  */
10 /*
11  * Copyright (c) 1995, 1996
12  *	The President and Fellows of Harvard University.  All rights reserved.
13  *
14  * This code is derived from software contributed to Berkeley by
15  * Margo Seltzer.
16  *
17  * Redistribution and use in source and binary forms, with or without
18  * modification, are permitted provided that the following conditions
19  * are met:
20  * 1. Redistributions of source code must retain the above copyright
21  *    notice, this list of conditions and the following disclaimer.
22  * 2. Redistributions in binary form must reproduce the above copyright
23  *    notice, this list of conditions and the following disclaimer in the
24  *    documentation and/or other materials provided with the distribution.
25  * 3. Neither the name of the University nor the names of its contributors
26  *    may be used to endorse or promote products derived from this software
27  *    without specific prior written permission.
28  *
29  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
30  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
33  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
34  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
35  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
36  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
37  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
38  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39  * SUCH DAMAGE.
40  *
41  * $Id$
42  */
43 
44 #include "db_config.h"
45 
46 #include "db_int.h"
47 #include "dbinc/db_page.h"
48 #include "dbinc/btree.h"
49 #include "dbinc/hash.h"
50 #include "dbinc/mp.h"
51 
52 static int __ham_alloc_pages __P((DBC *, __ham_groupalloc_args *, DB_LSN *));
53 static int __ham_alloc_pages_42
54     __P((DBC *, __ham_groupalloc_42_args *, DB_LSN *));
55 static int __ham_chgpg_recover_func
56     __P((DBC *, DBC *, u_int32_t *, db_pgno_t, u_int32_t, void *));
57 
58 /*
59  * __ham_insdel_recover --
60  *
61  * PUBLIC: int __ham_insdel_recover
62  * PUBLIC:     __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
63  */
64 int
__ham_insdel_recover(env,dbtp,lsnp,op,info)65 __ham_insdel_recover(env, dbtp, lsnp, op, info)
66 	ENV *env;
67 	DBT *dbtp;
68 	DB_LSN *lsnp;
69 	db_recops op;
70 	void *info;
71 {
72 	__ham_insdel_args *argp;
73 	DB_THREAD_INFO *ip;
74 	DB *file_dbp;
75 	DBC *dbc;
76 	DB_MPOOLFILE *mpf;
77 	PAGE *pagep;
78 	db_indx_t dindx;
79 	int cmp_n, cmp_p, ret;
80 
81 	ip = ((DB_TXNHEAD *)info)->thread_info;
82 	pagep = NULL;
83 	REC_PRINT(__ham_insdel_print);
84 	REC_INTRO(__ham_insdel_read, ip, 1);
85 
86 	if ((ret = __memp_fget(mpf, &argp->pgno, ip, NULL,
87 	    0, &pagep)) != 0) {
88 		if (DB_UNDO(op)) {
89 			if (ret == DB_PAGE_NOTFOUND)
90 				goto done;
91 			else {
92 				ret = __db_pgerr(file_dbp, argp->pgno, ret);
93 				goto out;
94 			}
95 		}
96 		/* If the page is not here then it was later truncated. */
97 		if (!IS_ZERO_LSN(argp->pagelsn))
98 			goto done;
99 		/*
100 		 * This page was created by a group allocation and
101 		 * the file may not have been extend yet.
102 		 * Create the page if necessary.
103 		 */
104 		if ((ret = __memp_fget(mpf, &argp->pgno, ip, NULL,
105 		    DB_MPOOL_CREATE, &pagep)) != 0) {
106 			ret = __db_pgerr(file_dbp, argp->pgno, ret);
107 			goto out;
108 		}
109 	}
110 
111 	cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
112 	cmp_p = LOG_COMPARE(&LSN(pagep), &argp->pagelsn);
113 	CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->pagelsn);
114 
115 	/*
116 	 * Two possible things going on:
117 	 * redo a delete/undo a put: delete the item from the page.
118 	 * redo a put/undo a delete: add the item to the page.
119 	 * If we are undoing a delete, then the information logged is the
120 	 * entire entry off the page, not just the data of a dbt.  In
121 	 * this case, we want to copy it back onto the page verbatim.
122 	 * We do this by calling __insertpair with the type H_OFFPAGE instead
123 	 * of H_KEYDATA.
124 	 */
125 	if ((argp->opcode == DELPAIR && cmp_n == 0 && DB_UNDO(op)) ||
126 	    (argp->opcode == PUTPAIR && cmp_p == 0 && DB_REDO(op))) {
127 		/*
128 		 * Need to redo a PUT or undo a delete.
129 		 */
130 		REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
131 		dindx = (db_indx_t)argp->ndx;
132 		if ((ret = __ham_insertpair(dbc, pagep, &dindx, &argp->key,
133 		    &argp->data, OP_MODE_GET(argp->keytype),
134 		    OP_MODE_GET(argp->datatype))) != 0)
135 			goto out;
136 		LSN(pagep) = DB_REDO(op) ? *lsnp : argp->pagelsn;
137 	} else if ((argp->opcode == DELPAIR && cmp_p == 0 && DB_REDO(op)) ||
138 	    (argp->opcode == PUTPAIR && cmp_n == 0 && DB_UNDO(op))) {
139 		/* Need to undo a put or redo a delete. */
140 		REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
141 		__ham_dpair(file_dbp, pagep, argp->ndx);
142 		LSN(pagep) = DB_REDO(op) ? *lsnp : argp->pagelsn;
143 	}
144 
145 	if ((ret = __memp_fput(mpf, ip, pagep, file_dbp->priority)) != 0)
146 		goto out;
147 	pagep = NULL;
148 
149 	/* Return the previous LSN. */
150 done:	*lsnp = argp->prev_lsn;
151 	ret = 0;
152 
153 out:	if (pagep != NULL)
154 		(void)__memp_fput(mpf, ip, pagep, file_dbp->priority);
155 	REC_CLOSE;
156 }
157 
158 /*
159  * __ham_insdel_42_recover --
160  *
161  * PUBLIC: int __ham_insdel_42_recover
162  * PUBLIC:     __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
163  */
164 int
__ham_insdel_42_recover(env,dbtp,lsnp,op,info)165 __ham_insdel_42_recover(env, dbtp, lsnp, op, info)
166 	ENV *env;
167 	DBT *dbtp;
168 	DB_LSN *lsnp;
169 	db_recops op;
170 	void *info;
171 {
172 	__ham_insdel_42_args *argp;
173 	DB_THREAD_INFO *ip;
174 	DB *file_dbp;
175 	DBC *dbc;
176 	DB_MPOOLFILE *mpf;
177 	PAGE *pagep;
178 	db_indx_t dindx;
179 	u_int32_t dtype, ktype, opcode;
180 	int cmp_n, cmp_p, ret;
181 
182 	ip = ((DB_TXNHEAD *)info)->thread_info;
183 	pagep = NULL;
184 	REC_PRINT(__ham_insdel_print);
185 	REC_INTRO(__ham_insdel_42_read, ip, 1);
186 
187 	if ((ret = __memp_fget(mpf, &argp->pgno, ip, NULL,
188 	    0, &pagep)) != 0) {
189 		if (DB_UNDO(op)) {
190 			if (ret == DB_PAGE_NOTFOUND)
191 				goto done;
192 			else {
193 				ret = __db_pgerr(file_dbp, argp->pgno, ret);
194 				goto out;
195 			}
196 		}
197 		/* If the page is not here then it was later truncated. */
198 		if (!IS_ZERO_LSN(argp->pagelsn))
199 			goto done;
200 		/*
201 		 * This page was created by a group allocation and
202 		 * the file may not have been extend yet.
203 		 * Create the page if necessary.
204 		 */
205 		if ((ret = __memp_fget(mpf, &argp->pgno, ip, NULL,
206 		    DB_MPOOL_CREATE, &pagep)) != 0) {
207 			ret = __db_pgerr(file_dbp, argp->pgno, ret);
208 			goto out;
209 		}
210 	}
211 
212 	cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
213 	cmp_p = LOG_COMPARE(&LSN(pagep), &argp->pagelsn);
214 	CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->pagelsn);
215 
216 	/*
217 	 * Two possible things going on:
218 	 * redo a delete/undo a put: delete the item from the page.
219 	 * redo a put/undo a delete: add the item to the page.
220 	 * If we are undoing a delete, then the information logged is the
221 	 * entire entry off the page, not just the data of a dbt.  In
222 	 * this case, we want to copy it back onto the page verbatim.
223 	 * We do this by calling __insertpair with the type H_OFFPAGE instead
224 	 * of H_KEYDATA.
225 	 */
226 	opcode = OPCODE_OF(argp->opcode);
227 	if ((opcode == DELPAIR && cmp_n == 0 && DB_UNDO(op)) ||
228 	    (opcode == PUTPAIR && cmp_p == 0 && DB_REDO(op))) {
229 		/*
230 		 * Need to redo a PUT or undo a delete.
231 		 */
232 		REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
233 		ktype = DB_UNDO(op) || PAIR_ISKEYBIG(argp->opcode) ?
234 		    H_OFFPAGE : H_KEYDATA;
235 		if (PAIR_ISDATADUP(argp->opcode))
236 			dtype = H_DUPLICATE;
237 		else if (DB_UNDO(op) || PAIR_ISDATABIG(argp->opcode))
238 			dtype = H_OFFPAGE;
239 		else
240 			dtype = H_KEYDATA;
241 		dindx = (db_indx_t)argp->ndx;
242 		if ((ret = __ham_insertpair(dbc, pagep, &dindx,
243 		    &argp->key, &argp->data, ktype, dtype)) != 0)
244 			goto out;
245 		LSN(pagep) = DB_REDO(op) ? *lsnp : argp->pagelsn;
246 	} else if ((opcode == DELPAIR && cmp_p == 0 && DB_REDO(op)) ||
247 	    (opcode == PUTPAIR && cmp_n == 0 && DB_UNDO(op))) {
248 		/* Need to undo a put or redo a delete. */
249 		REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
250 		__ham_dpair(file_dbp, pagep, argp->ndx);
251 		LSN(pagep) = DB_REDO(op) ? *lsnp : argp->pagelsn;
252 	}
253 
254 	if ((ret = __memp_fput(mpf, ip, pagep, file_dbp->priority)) != 0)
255 		goto out;
256 	pagep = NULL;
257 
258 	/* Return the previous LSN. */
259 done:	*lsnp = argp->prev_lsn;
260 	ret = 0;
261 
262 out:	if (pagep != NULL)
263 		(void)__memp_fput(mpf, ip, pagep, file_dbp->priority);
264 	REC_CLOSE;
265 }
266 
267 /*
268  * __ham_newpage_recover --
269  *	This log message is used when we add/remove overflow pages.  This
270  *	message takes care of the pointer chains, not the data on the pages.
271  *
272  * PUBLIC: int __ham_newpage_recover
273  * PUBLIC:     __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
274  */
275 int
__ham_newpage_recover(env,dbtp,lsnp,op,info)276 __ham_newpage_recover(env, dbtp, lsnp, op, info)
277 	ENV *env;
278 	DBT *dbtp;
279 	DB_LSN *lsnp;
280 	db_recops op;
281 	void *info;
282 {
283 	__ham_newpage_args *argp;
284 	DB_THREAD_INFO *ip;
285 	DB *file_dbp;
286 	DBC *dbc;
287 	DB_MPOOLFILE *mpf;
288 	PAGE *pagep;
289 	int change, cmp_n, cmp_p, ret;
290 
291 	ip = ((DB_TXNHEAD *)info)->thread_info;
292 	pagep = NULL;
293 	REC_PRINT(__ham_newpage_print);
294 	REC_INTRO(__ham_newpage_read, ip, 0);
295 
296 	REC_FGET(mpf, ip, argp->new_pgno, &pagep, ppage);
297 	change = 0;
298 
299 	/*
300 	 * There are potentially three pages we need to check: the one
301 	 * that we created/deleted, the one before it and the one after
302 	 * it.
303 	 */
304 
305 	cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
306 	cmp_p = LOG_COMPARE(&LSN(pagep), &argp->pagelsn);
307 	CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->pagelsn);
308 	CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp);
309 
310 	if ((cmp_p == 0 && DB_REDO(op) && argp->opcode == PUTOVFL) ||
311 	    (cmp_n == 0 && DB_UNDO(op) && argp->opcode == DELOVFL)) {
312 		/* Redo a create new page or undo a delete new page. */
313 		REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
314 		P_INIT(pagep, file_dbp->pgsize, argp->new_pgno,
315 		    argp->prev_pgno, argp->next_pgno, 0, P_HASH);
316 		change = 1;
317 	} else if ((cmp_p == 0 && DB_REDO(op) && argp->opcode == DELOVFL) ||
318 	    (cmp_n == 0 && DB_UNDO(op) && argp->opcode == PUTOVFL)) {
319 		/*
320 		 * Redo a delete or undo a create new page.  All we
321 		 * really need to do is change the LSN.
322 		 */
323 		REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
324 		change = 1;
325 	}
326 
327 	if (change)
328 		LSN(pagep) = DB_REDO(op) ? *lsnp : argp->pagelsn;
329 
330 	if ((ret = __memp_fput(mpf, ip, pagep, file_dbp->priority)) != 0)
331 		goto out;
332 	pagep = NULL;
333 
334 	/* Now do the prev page. */
335 ppage:	if (argp->prev_pgno != PGNO_INVALID) {
336 		REC_FGET(mpf, ip, argp->prev_pgno, &pagep, npage);
337 
338 		cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
339 		cmp_p = LOG_COMPARE(&LSN(pagep), &argp->prevlsn);
340 		CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->prevlsn);
341 		CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp);
342 		change = 0;
343 
344 		if ((cmp_p == 0 && DB_REDO(op) && argp->opcode == PUTOVFL) ||
345 		    (cmp_n == 0 && DB_UNDO(op) && argp->opcode == DELOVFL)) {
346 			/* Redo a create new page or undo a delete new page. */
347 			REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
348 			pagep->next_pgno = argp->new_pgno;
349 			change = 1;
350 		} else if ((cmp_p == 0 &&
351 		    DB_REDO(op) && argp->opcode == DELOVFL) ||
352 		    (cmp_n == 0 && DB_UNDO(op) && argp->opcode == PUTOVFL)) {
353 			/* Redo a delete or undo a create new page. */
354 			REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
355 			pagep->next_pgno = argp->next_pgno;
356 			change = 1;
357 		}
358 
359 		if (change)
360 			LSN(pagep) = DB_REDO(op) ? *lsnp : argp->prevlsn;
361 
362 		if ((ret = __memp_fput(mpf,
363 		    ip, pagep, file_dbp->priority)) != 0)
364 			goto out;
365 		pagep = NULL;
366 	}
367 
368 	/* Now time to do the next page */
369 npage:	if (argp->next_pgno != PGNO_INVALID) {
370 		REC_FGET(mpf, ip, argp->next_pgno, &pagep, done);
371 
372 		cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
373 		cmp_p = LOG_COMPARE(&LSN(pagep), &argp->nextlsn);
374 		CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->nextlsn);
375 		CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp);
376 		change = 0;
377 
378 		if ((cmp_p == 0 && DB_REDO(op) && argp->opcode == PUTOVFL) ||
379 		    (cmp_n == 0 && DB_UNDO(op) && argp->opcode == DELOVFL)) {
380 			/* Redo a create new page or undo a delete new page. */
381 			REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
382 			pagep->prev_pgno = argp->new_pgno;
383 			change = 1;
384 		} else if ((cmp_p == 0 &&
385 		    DB_REDO(op) && argp->opcode == DELOVFL) ||
386 		    (cmp_n == 0 && DB_UNDO(op) && argp->opcode == PUTOVFL)) {
387 			/* Redo a delete or undo a create new page. */
388 			REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
389 			pagep->prev_pgno = argp->prev_pgno;
390 			change = 1;
391 		}
392 
393 		if (change)
394 			LSN(pagep) = DB_REDO(op) ? *lsnp : argp->nextlsn;
395 
396 		if ((ret = __memp_fput(mpf,
397 		    ip, pagep, file_dbp->priority)) != 0)
398 			goto out;
399 		pagep = NULL;
400 	}
401 done:	*lsnp = argp->prev_lsn;
402 	ret = 0;
403 
404 out:	if (pagep != NULL)
405 		(void)__memp_fput(mpf, ip, pagep, file_dbp->priority);
406 	REC_CLOSE;
407 }
408 
409 /*
410  * __ham_replace_recover --
411  *	This log message refers to partial puts that are local to a single
412  *	page.  You can think of them as special cases of the more general
413  *	insdel log message.
414  *
415  * PUBLIC: int __ham_replace_recover
416  * PUBLIC:    __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
417  */
418 int
__ham_replace_recover(env,dbtp,lsnp,op,info)419 __ham_replace_recover(env, dbtp, lsnp, op, info)
420 	ENV *env;
421 	DBT *dbtp;
422 	DB_LSN *lsnp;
423 	db_recops op;
424 	void *info;
425 {
426 	__ham_replace_args *argp;
427 	DB_THREAD_INFO *ip;
428 	DB *file_dbp;
429 	DBC *dbc;
430 	DB_MPOOLFILE *mpf;
431 	DBT dbt;
432 	PAGE *pagep;
433 	u_int32_t change;
434 	int cmp_n, cmp_p, is_plus, modified, off, ret;
435 	u_int8_t *hk;
436 
437 	ip = ((DB_TXNHEAD *)info)->thread_info;
438 	pagep = NULL;
439 	REC_PRINT(__ham_replace_print);
440 	REC_INTRO(__ham_replace_read, ip, 0);
441 
442 	REC_FGET(mpf, ip, argp->pgno, &pagep, done);
443 
444 	cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
445 	cmp_p = LOG_COMPARE(&LSN(pagep), &argp->pagelsn);
446 	CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->pagelsn);
447 	CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp);
448 
449 	memset(&dbt, 0, sizeof(dbt));
450 	modified = 0;
451 
452 	/*
453 	 * Before we know the direction of the transformation we will
454 	 * determine the size differential; then once we know if we are
455 	 * redoing or undoing, we'll adjust the sign (is_plus) appropriately.
456 	 */
457 	if (argp->newitem.size > argp->olditem.size) {
458 		change = argp->newitem.size - argp->olditem.size;
459 		is_plus = 1;
460 	} else {
461 		change = argp->olditem.size - argp->newitem.size;
462 		is_plus = 0;
463 	}
464 	/*
465 	 * When chaining from a "regular" record to an off page record
466 	 * the old record does not contain a header while the new record
467 	 * does and is at an offset of -1 relative to the data part of
468 	 * the record. We add this to the amount of the change (which is
469 	 * an absolute value).  If we are undoing then the offset is not
470 	 * used in the placement of the data.
471 	 */
472 	off = argp->off;
473 	if (off < 0 &&
474 	     (OP_MODE_GET(argp->oldtype) == H_DUPLICATE ||
475 	     OP_MODE_GET(argp->oldtype) == H_KEYDATA)) {
476 		change -= (u_int32_t)off;
477 		if (DB_UNDO(op))
478 			off = 0;
479 	}
480 	if (cmp_p == 0 && DB_REDO(op)) {
481 		/* Reapply the change as specified. */
482 		dbt.data = argp->newitem.data;
483 		dbt.size = argp->newitem.size;
484 		REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
485 		LSN(pagep) = *lsnp;
486 		/*
487 		 * The is_plus flag is set properly to reflect
488 		 * newitem.size - olditem.size.
489 		 */
490 		modified = 1;
491 	} else if (cmp_n == 0 && DB_UNDO(op)) {
492 		/* Undo the already applied change. */
493 		dbt.data = argp->olditem.data;
494 		dbt.size = argp->olditem.size;
495 		/*
496 		 * Invert is_plus to reflect sign of
497 		 * olditem.size - newitem.size.
498 		 */
499 		is_plus = !is_plus;
500 		REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
501 		LSN(pagep) = argp->pagelsn;
502 		modified = 1;
503 	}
504 
505 	if (modified) {
506 		__ham_onpage_replace(file_dbp, pagep,
507 		    argp->ndx, off, change, is_plus, &dbt);
508 		if (argp->oldtype != argp->newtype) {
509 			hk = P_ENTRY(file_dbp, pagep, argp->ndx);
510 			if (DB_REDO(op))
511 				HPAGE_PTYPE(hk) = OP_MODE_GET(argp->newtype);
512 			else
513 				HPAGE_PTYPE(hk) = OP_MODE_GET(argp->oldtype);
514 		}
515 	}
516 
517 	if ((ret = __memp_fput(mpf, ip, pagep, file_dbp->priority)) != 0)
518 		goto out;
519 	pagep = NULL;
520 
521 done:	*lsnp = argp->prev_lsn;
522 	ret = 0;
523 
524 out:	if (pagep != NULL)
525 		(void)__memp_fput(mpf, ip, pagep, file_dbp->priority);
526 	REC_CLOSE;
527 }
528 
529 /*
530  * __ham_replace_42_recover --
531  *	This log message refers to partial puts that are local to a single
532  *	page.  You can think of them as special cases of the more general
533  *	insdel log message.
534  *
535  * PUBLIC: int __ham_replace_42_recover
536  * PUBLIC:    __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
537  */
538 int
__ham_replace_42_recover(env,dbtp,lsnp,op,info)539 __ham_replace_42_recover(env, dbtp, lsnp, op, info)
540 	ENV *env;
541 	DBT *dbtp;
542 	DB_LSN *lsnp;
543 	db_recops op;
544 	void *info;
545 {
546 	__ham_replace_42_args *argp;
547 	DB_THREAD_INFO *ip;
548 	DB *file_dbp;
549 	DBC *dbc;
550 	DB_MPOOLFILE *mpf;
551 	DBT dbt;
552 	PAGE *pagep;
553 	u_int32_t change;
554 	int cmp_n, cmp_p, is_plus, modified, ret;
555 	u_int8_t *hk;
556 
557 	ip = ((DB_TXNHEAD *)info)->thread_info;
558 	pagep = NULL;
559 	REC_PRINT(__ham_replace_print);
560 	REC_INTRO(__ham_replace_42_read, ip, 0);
561 
562 	REC_FGET(mpf, ip, argp->pgno, &pagep, done);
563 
564 	cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
565 	cmp_p = LOG_COMPARE(&LSN(pagep), &argp->pagelsn);
566 	CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->pagelsn);
567 	CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp);
568 
569 	memset(&dbt, 0, sizeof(dbt));
570 	modified = 0;
571 
572 	/*
573 	 * Before we know the direction of the transformation we will
574 	 * determine the size differential; then once we know if we are
575 	 * redoing or undoing, we'll adjust the sign (is_plus) appropriately.
576 	 */
577 	if (argp->newitem.size > argp->olditem.size) {
578 		change = argp->newitem.size - argp->olditem.size;
579 		is_plus = 1;
580 	} else {
581 		change = argp->olditem.size - argp->newitem.size;
582 		is_plus = 0;
583 	}
584 	if (cmp_p == 0 && DB_REDO(op)) {
585 		/* Reapply the change as specified. */
586 		dbt.data = argp->newitem.data;
587 		dbt.size = argp->newitem.size;
588 		REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
589 		LSN(pagep) = *lsnp;
590 		/*
591 		 * The is_plus flag is set properly to reflect
592 		 * newitem.size - olditem.size.
593 		 */
594 		modified = 1;
595 	} else if (cmp_n == 0 && DB_UNDO(op)) {
596 		/* Undo the already applied change. */
597 		dbt.data = argp->olditem.data;
598 		dbt.size = argp->olditem.size;
599 		/*
600 		 * Invert is_plus to reflect sign of
601 		 * olditem.size - newitem.size.
602 		 */
603 		is_plus = !is_plus;
604 		REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
605 		LSN(pagep) = argp->pagelsn;
606 		modified = 1;
607 	}
608 
609 	if (modified) {
610 		__ham_onpage_replace(file_dbp, pagep,
611 		    argp->ndx, argp->off, change, is_plus, &dbt);
612 		if (argp->makedup) {
613 			hk = P_ENTRY(file_dbp, pagep, argp->ndx);
614 			if (DB_REDO(op))
615 				HPAGE_PTYPE(hk) = H_DUPLICATE;
616 			else
617 				HPAGE_PTYPE(hk) = H_KEYDATA;
618 		}
619 	}
620 
621 	if ((ret = __memp_fput(mpf, ip, pagep, file_dbp->priority)) != 0)
622 		goto out;
623 	pagep = NULL;
624 
625 done:	*lsnp = argp->prev_lsn;
626 	ret = 0;
627 
628 out:	if (pagep != NULL)
629 		(void)__memp_fput(mpf, ip, pagep, file_dbp->priority);
630 	REC_CLOSE;
631 }
632 
633 /*
634  * __ham_splitdata_recover --
635  *
636  * PUBLIC: int __ham_splitdata_recover
637  * PUBLIC:    __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
638  */
639 int
__ham_splitdata_recover(env,dbtp,lsnp,op,info)640 __ham_splitdata_recover(env, dbtp, lsnp, op, info)
641 	ENV *env;
642 	DBT *dbtp;
643 	DB_LSN *lsnp;
644 	db_recops op;
645 	void *info;
646 {
647 	__ham_splitdata_args *argp;
648 	DB_THREAD_INFO *ip;
649 	DB *file_dbp;
650 	DBC *dbc;
651 	DB_MPOOLFILE *mpf;
652 	PAGE *pagep;
653 	int cmp_n, cmp_p, ret;
654 
655 	ip = ((DB_TXNHEAD *)info)->thread_info;
656 	pagep = NULL;
657 	REC_PRINT(__ham_splitdata_print);
658 	REC_INTRO(__ham_splitdata_read, ip, 1);
659 
660 	if ((ret = __memp_fget(mpf, &argp->pgno, ip, NULL, 0, &pagep)) != 0) {
661 		if (DB_UNDO(op)) {
662 			if (ret == DB_PAGE_NOTFOUND)
663 				goto done;
664 			else {
665 				ret = __db_pgerr(file_dbp, argp->pgno, ret);
666 				goto out;
667 			}
668 		}
669 		/* If the page is not here then it was later truncated. */
670 		if (!IS_ZERO_LSN(argp->pagelsn))
671 			goto done;
672 		/*
673 		 * This page was created by a group allocation and
674 		 * the file may not have been extend yet.
675 		 * Create the page if necessary.
676 		 */
677 		if ((ret = __memp_fget(mpf, &argp->pgno,
678 		    ip, NULL, DB_MPOOL_CREATE, &pagep)) != 0) {
679 			ret = __db_pgerr(file_dbp, argp->pgno, ret);
680 			goto out;
681 		}
682 	}
683 
684 	cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
685 	cmp_p = LOG_COMPARE(&LSN(pagep), &argp->pagelsn);
686 	CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->pagelsn);
687 	CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp);
688 
689 	/*
690 	 * There are three types of log messages here. Two are related
691 	 * to an actual page split operation, one for the old page
692 	 * and one for the new pages created.  The original image in the
693 	 * SPLITOLD record is used for undo.  The image in the SPLITNEW
694 	 * is used for redo.  We should never have a case where there is
695 	 * a redo operation and the SPLITOLD record is on disk, but not
696 	 * the SPLITNEW record.  Therefore, we only have work to do when
697 	 * redo NEW messages and undo OLD messages, but we have to update
698 	 * LSNs in both cases.
699 	 *
700 	 * The third message is generated when a page is sorted (SORTPAGE). In
701 	 * an undo the original image in the SORTPAGE is used. In a redo we
702 	 * recreate the sort operation by calling __ham_sort_page.
703 	 */
704 	if (cmp_p == 0 && DB_REDO(op)) {
705 		REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
706 		if (argp->opcode == SPLITNEW)
707 			/* Need to redo the split described. */
708 			memcpy(pagep, argp->pageimage.data,
709 			    argp->pageimage.size);
710 		else if (argp->opcode == SORTPAGE) {
711 			if ((ret = __ham_sort_page(dbc, NULL, pagep)) != 0)
712 				goto out;
713 		}
714 		LSN(pagep) = *lsnp;
715 	} else if (cmp_n == 0 && DB_UNDO(op)) {
716 		REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
717 		if (argp->opcode == SPLITOLD || argp->opcode == SORTPAGE) {
718 			/* Put back the old image. */
719 			memcpy(pagep, argp->pageimage.data,
720 			    argp->pageimage.size);
721 		} else
722 			P_INIT(pagep, file_dbp->pgsize, argp->pgno,
723 			    PGNO_INVALID, PGNO_INVALID, 0, P_HASH);
724 		LSN(pagep) = argp->pagelsn;
725 	}
726 	if ((ret = __memp_fput(mpf, ip, pagep, file_dbp->priority)) != 0)
727 		goto out;
728 	pagep = NULL;
729 
730 done:	*lsnp = argp->prev_lsn;
731 	ret = 0;
732 
733 out:	if (pagep != NULL)
734 		(void)__memp_fput(mpf, ip, pagep, file_dbp->priority);
735 	REC_CLOSE;
736 }
737 
738 /*
739  * __ham_copypage_recover --
740  *	Recovery function for copypage.
741  *
742  * PUBLIC: int __ham_copypage_recover
743  * PUBLIC:   __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
744  */
745 int
__ham_copypage_recover(env,dbtp,lsnp,op,info)746 __ham_copypage_recover(env, dbtp, lsnp, op, info)
747 	ENV *env;
748 	DBT *dbtp;
749 	DB_LSN *lsnp;
750 	db_recops op;
751 	void *info;
752 {
753 	__ham_copypage_args *argp;
754 	DB_THREAD_INFO *ip;
755 	DB *file_dbp;
756 	DBC *dbc;
757 	DB_MPOOLFILE *mpf;
758 	PAGE *pagep;
759 	int cmp_n, cmp_p, ret;
760 
761 	ip = ((DB_TXNHEAD *)info)->thread_info;
762 	pagep = NULL;
763 	REC_PRINT(__ham_copypage_print);
764 	REC_INTRO(__ham_copypage_read, ip, 0);
765 
766 	/* This is the bucket page. */
767 	REC_FGET(mpf, ip, argp->pgno, &pagep, donext);
768 
769 	cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
770 	cmp_p = LOG_COMPARE(&LSN(pagep), &argp->pagelsn);
771 	CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->pagelsn);
772 
773 	if (cmp_p == 0 && DB_REDO(op)) {
774 		/* Need to redo update described. */
775 		REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
776 		memcpy(pagep, argp->page.data, argp->page.size);
777 		PGNO(pagep) = argp->pgno;
778 		PREV_PGNO(pagep) = PGNO_INVALID;
779 		LSN(pagep) = *lsnp;
780 	} else if (cmp_n == 0 && DB_UNDO(op)) {
781 		/* Need to undo update described. */
782 		REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
783 		P_INIT(pagep, file_dbp->pgsize, argp->pgno, PGNO_INVALID,
784 		    argp->next_pgno, 0, P_HASH);
785 		LSN(pagep) = argp->pagelsn;
786 	}
787 	if ((ret = __memp_fput(mpf, ip, pagep, file_dbp->priority)) != 0)
788 		goto out;
789 	pagep = NULL;
790 
791 donext:	/* Now fix up the "next" page. */
792 	REC_FGET(mpf, ip, argp->next_pgno, &pagep, do_nn);
793 
794 	/* For REDO just update the LSN. For UNDO copy page back. */
795 	cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
796 	cmp_p = LOG_COMPARE(&LSN(pagep), &argp->nextlsn);
797 	CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->nextlsn);
798 	CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp);
799 	if (cmp_p == 0 && DB_REDO(op)) {
800 		REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
801 		LSN(pagep) = *lsnp;
802 	} else if (cmp_n == 0 && DB_UNDO(op)) {
803 		/* Need to undo update described. */
804 		REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
805 		memcpy(pagep, argp->page.data, argp->page.size);
806 	}
807 	if ((ret = __memp_fput(mpf, ip, pagep, file_dbp->priority)) != 0)
808 		goto out;
809 	pagep = NULL;
810 
811 	/* Now fix up the next's next page. */
812 do_nn:	if (argp->nnext_pgno == PGNO_INVALID)
813 		goto done;
814 
815 	REC_FGET(mpf, ip, argp->nnext_pgno, &pagep, done);
816 
817 	cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
818 	cmp_p = LOG_COMPARE(&LSN(pagep), &argp->nnextlsn);
819 	CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->nnextlsn);
820 	CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp);
821 
822 	if (cmp_p == 0 && DB_REDO(op)) {
823 		/* Need to redo update described. */
824 		REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
825 		PREV_PGNO(pagep) = argp->pgno;
826 		LSN(pagep) = *lsnp;
827 	} else if (cmp_n == 0 && DB_UNDO(op)) {
828 		/* Need to undo update described. */
829 		REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
830 		PREV_PGNO(pagep) = argp->next_pgno;
831 		LSN(pagep) = argp->nnextlsn;
832 	}
833 	if ((ret = __memp_fput(mpf, ip, pagep, file_dbp->priority)) != 0)
834 		goto out;
835 	pagep = NULL;
836 
837 done:	*lsnp = argp->prev_lsn;
838 	ret = 0;
839 
840 out:	if (pagep != NULL)
841 		(void)__memp_fput(mpf, ip, pagep, file_dbp->priority);
842 	REC_CLOSE;
843 }
844 
845 /*
846  * __ham_metagroup_recover --
847  *	Recovery function for metagroup.
848  *
849  * PUBLIC: int __ham_metagroup_recover
850  * PUBLIC:   __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
851  */
852 int
__ham_metagroup_recover(env,dbtp,lsnp,op,info)853 __ham_metagroup_recover(env, dbtp, lsnp, op, info)
854 	ENV *env;
855 	DBT *dbtp;
856 	DB_LSN *lsnp;
857 	db_recops op;
858 	void *info;
859 {
860 	__ham_metagroup_args *argp;
861 	DB_THREAD_INFO *ip;
862 	HASH_CURSOR *hcp;
863 	DB *file_dbp;
864 	DBMETA *mmeta;
865 	DBC *dbc;
866 	DB_MPOOLFILE *mpf;
867 	PAGE *pagep;
868 	db_pgno_t pgno;
869 	int cmp_n, cmp_p, did_alloc, groupgrow, ret;
870 
871 	ip = ((DB_TXNHEAD *)info)->thread_info;
872 	mmeta = NULL;
873 	did_alloc = 0;
874 	REC_PRINT(__ham_metagroup_print);
875 	REC_INTRO(__ham_metagroup_read, ip, 1);
876 
877 	/*
878 	 * This logs the virtual create of pages pgno to pgno + bucket.
879 	 * The log record contains:
880 	 * bucket: old maximum bucket
881 	 * pgno: page number of the new bucket.
882 	 * We round up on log calculations, so we can figure out if we are
883 	 * about to double the hash table if argp->bucket+1 is a power of 2.
884 	 * If it is, then we are allocating an entire doubling of pages,
885 	 * otherwise, we are simply allocated one new page.
886 	 */
887 	groupgrow =
888 	    (u_int32_t)(1 << __db_log2(argp->bucket + 1)) == argp->bucket + 1;
889 	pgno = argp->pgno;
890 	if (argp->newalloc)
891 		pgno += argp->bucket;
892 
893 	pagep = NULL;
894 	ret = __memp_fget(mpf, &pgno, ip, NULL, 0, &pagep);
895 
896 	/* If we are undoing, then we don't want to create the page. */
897 	if (ret != 0 && DB_REDO(op))
898 		ret = __memp_fget(mpf,
899 		    &pgno, ip, NULL, DB_MPOOL_CREATE, &pagep);
900 	else if (ret == DB_PAGE_NOTFOUND)
901 		goto do_meta;
902 	if (ret != 0) {
903 		if (ret != ENOSPC)
904 			goto out;
905 		pgno = 0;
906 		goto do_meta;
907 	}
908 
909 	/*
910 	 * When we get here then either we did not grow the file
911 	 * (groupgrow == 0) or we did grow the file and the allocation
912 	 * of those new pages succeeded.
913 	 */
914 	did_alloc = groupgrow;
915 
916 	cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
917 	cmp_p = LOG_COMPARE(&LSN(pagep), &argp->pagelsn);
918 	CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->pagelsn);
919 
920 	if (cmp_p == 0 && DB_REDO(op)) {
921 		REC_DIRTY(mpf, ip, dbc->priority, &pagep);
922 		pagep->lsn = *lsnp;
923 	} else if (cmp_n == 0 && DB_UNDO(op)) {
924 		/* If this record allocated the pages give them back. */
925 		if (argp->newalloc) {
926 			if (pagep != NULL && (ret = __memp_fput(mpf,
927 			    ip, pagep, DB_PRIORITY_VERY_LOW)) != 0)
928 				goto out;
929 			pagep = NULL;
930 			if ((ret = __memp_ftruncate(mpf, NULL, ip,
931 			    argp->pgno, 0)) != 0)
932 				goto out;
933 		} else {
934 			/*
935 			 * Otherwise just roll the page back to its
936 			 * previous state.
937 			 */
938 			REC_DIRTY(mpf, ip, dbc->priority, &pagep);
939 			pagep->lsn = argp->pagelsn;
940 		}
941 	}
942 	if (pagep != NULL &&
943 	    (ret = __memp_fput(mpf, ip, pagep, dbc->priority)) != 0)
944 		goto out;
945 
946 	/*
947 	 * If a earlier aborted allocation used one of our pages it may
948 	 * be in the wrong state, read all the pages in the group and init
949 	 * them to be empty.
950 	 */
951 	if (DB_REDO(op) && argp->newalloc) {
952 		for (pgno = argp->pgno;
953 		    pgno < argp->pgno + argp->bucket; pgno++) {
954 			if ((ret = __memp_fget(mpf,
955 			    &pgno, ip, NULL, DB_MPOOL_CREATE, &pagep)) != 0)
956 				goto out;
957 
958 			if (IS_ZERO_LSN(LSN(pagep))) {
959 				REC_DIRTY(mpf, ip, dbc->priority, &pagep);
960 				P_INIT(pagep, file_dbp->pgsize,
961 				    PGNO_INVALID, PGNO_INVALID, PGNO_INVALID,
962 				    0, P_HASH);
963 			}
964 			if ((ret =
965 			    __memp_fput(mpf, ip, pagep, dbc->priority)) != 0)
966 				goto out;
967 		}
968 	}
969 
970 do_meta:
971 	/* Now we have to update the meta-data page. */
972 	hcp = (HASH_CURSOR *)dbc->internal;
973 	if ((ret = __ham_get_meta(dbc)) != 0)
974 		goto out;
975 	cmp_n = LOG_COMPARE(lsnp, &hcp->hdr->dbmeta.lsn);
976 	cmp_p = LOG_COMPARE(&hcp->hdr->dbmeta.lsn, &argp->metalsn);
977 	CHECK_LSN(env, op, cmp_p, &hcp->hdr->dbmeta.lsn, &argp->metalsn);
978 	CHECK_ABORT(env, op, cmp_n, &hcp->hdr->dbmeta.lsn, lsnp);
979 	if (cmp_p == 0 && DB_REDO(op)) {
980 		/* Redo the actual updating of bucket counts. */
981 		REC_DIRTY(mpf, ip, dbc->priority, &hcp->hdr);
982 		++hcp->hdr->max_bucket;
983 		if (groupgrow) {
984 			hcp->hdr->low_mask = hcp->hdr->high_mask;
985 			hcp->hdr->high_mask =
986 			    (argp->bucket + 1) | hcp->hdr->low_mask;
987 		}
988 		hcp->hdr->dbmeta.lsn = *lsnp;
989 	} else if (cmp_n == 0 && DB_UNDO(op)) {
990 		/* Undo the actual updating of bucket counts. */
991 		REC_DIRTY(mpf, ip, dbc->priority, &hcp->hdr);
992 		hcp->hdr->max_bucket = argp->bucket;
993 		if (groupgrow) {
994 			hcp->hdr->high_mask = argp->bucket;
995 			hcp->hdr->low_mask = hcp->hdr->high_mask >> 1;
996 		}
997 		hcp->hdr->dbmeta.lsn = argp->metalsn;
998 	}
999 
1000 	/*
1001 	 * Now we need to fix up the spares array.  Each entry in the
1002 	 * spares array indicates the beginning page number for the
1003 	 * indicated doubling.
1004 	 */
1005 	if (cmp_p == 0 && did_alloc && !DB_UNDO(op)) {
1006 		REC_DIRTY(mpf, ip, dbc->priority, &hcp->hdr);
1007 		hcp->hdr->spares[__db_log2(argp->bucket + 1) + 1] =
1008 		    (argp->pgno - argp->bucket) - 1;
1009 	}
1010 	if (cmp_n == 0 && groupgrow && DB_UNDO(op)) {
1011 		REC_DIRTY(mpf, ip, dbc->priority, &hcp->hdr);
1012 		hcp->hdr->spares[
1013 		    __db_log2(argp->bucket + 1) + 1] = PGNO_INVALID;
1014 	}
1015 
1016 	/*
1017 	 * Finally, we need to potentially fix up the last_pgno field
1018 	 * in the master meta-data page (which may or may not be the
1019 	 * same as the hash header page).
1020 	 */
1021 	if (argp->mmpgno != argp->mpgno) {
1022 		if ((ret = __memp_fget(mpf,
1023 		    &argp->mmpgno, ip,  NULL, DB_MPOOL_EDIT, &mmeta)) != 0) {
1024 			if (DB_UNDO(op) && ret == DB_PAGE_NOTFOUND)
1025 				ret = 0;
1026 			goto out;
1027 		}
1028 		cmp_n = LOG_COMPARE(lsnp, &mmeta->lsn);
1029 		cmp_p = LOG_COMPARE(&mmeta->lsn, &argp->mmetalsn);
1030 		if (cmp_p == 0 && DB_REDO(op)) {
1031 			REC_DIRTY(mpf, ip, dbc->priority, &mmeta);
1032 			mmeta->lsn = *lsnp;
1033 		} else if (cmp_n == 0 && DB_UNDO(op)) {
1034 			REC_DIRTY(mpf, ip, dbc->priority, &mmeta);
1035 			mmeta->lsn = argp->mmetalsn;
1036 		}
1037 	} else {
1038 		mmeta = (DBMETA *)hcp->hdr;
1039 		REC_DIRTY(mpf, ip, dbc->priority, &mmeta);
1040 	}
1041 
1042 	if (cmp_n == 0 && DB_UNDO(op))
1043 		mmeta->last_pgno = argp->last_pgno;
1044 	else if (cmp_p == 0 && DB_REDO(op) && mmeta->last_pgno < pgno)
1045 		mmeta->last_pgno = pgno;
1046 
1047 	if (argp->mmpgno != argp->mpgno &&
1048 	    (ret = __memp_fput(mpf, ip, mmeta, dbc->priority)) != 0)
1049 		goto out;
1050 	mmeta = NULL;
1051 
1052 done:	*lsnp = argp->prev_lsn;
1053 	ret = 0;
1054 
1055 out:	if (mmeta != NULL)
1056 		(void)__memp_fput(mpf, ip, mmeta, dbc->priority);
1057 	if (dbc != NULL)
1058 		(void)__ham_release_meta(dbc);
1059 
1060 	REC_CLOSE;
1061 }
1062 
1063 /*
1064  * __ham_contract_recover --
1065  *	Recovery function for contracting a hash table
1066  *
1067  * PUBLIC: int __ham_contract_recover
1068  * PUBLIC:   __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
1069  */
1070 int
__ham_contract_recover(env,dbtp,lsnp,op,info)1071 __ham_contract_recover(env, dbtp, lsnp, op, info)
1072 	ENV *env;
1073 	DBT *dbtp;
1074 	DB_LSN *lsnp;
1075 	db_recops op;
1076 	void *info;
1077 {
1078 	__ham_contract_args *argp;
1079 	DB_THREAD_INFO *ip;
1080 	DB_MPOOLFILE *mpf;
1081 	DB *file_dbp;
1082 	DBC *dbc;
1083 	HASH_CURSOR *hcp;
1084 	HMETA *meta;
1085 	int cmp_n, cmp_p, ret, t_ret;
1086 
1087 	ip = ((DB_TXNHEAD *)info)->thread_info;
1088 	REC_PRINT(__ham_contract_print);
1089 	REC_INTRO(__ham_contract_read, ip, 1);
1090 
1091 	hcp = (HASH_CURSOR *)dbc->internal;
1092 	if ((ret = __ham_get_meta(dbc)) != 0)
1093 		goto done;
1094 	meta = hcp->hdr;
1095 	cmp_n = LOG_COMPARE(lsnp, &meta->dbmeta.lsn);
1096 	cmp_p = LOG_COMPARE(&meta->dbmeta.lsn, &argp->meta_lsn);
1097 	CHECK_LSN(env, op, cmp_p, &meta->dbmeta.lsn, &argp->meta_lsn);
1098 	if (cmp_p == 0 && DB_REDO(op)) {
1099 		REC_DIRTY(mpf, ip, dbc->priority, &hcp->hdr);
1100 		meta = hcp->hdr;
1101 		meta->max_bucket = argp->bucket - 1;
1102 		if (argp->bucket == meta->low_mask + 1) {
1103 			meta->spares[
1104 			    __db_log2(argp->bucket) + 1] = PGNO_INVALID;
1105 			meta->high_mask = meta->low_mask;
1106 			meta->low_mask >>= 1;
1107 		}
1108 		meta->dbmeta.lsn = *lsnp;
1109 	} else if (cmp_n == 0 && DB_UNDO(op)) {
1110 		REC_DIRTY(mpf, ip, dbc->priority, &hcp->hdr);
1111 		meta = hcp->hdr;
1112 		meta->max_bucket = argp->bucket;
1113 		if (argp->bucket == meta->high_mask + 1) {
1114 			meta->spares[__db_log2(argp->bucket) + 1] =
1115 			    argp->pgno - argp->bucket;
1116 			meta->low_mask = meta->high_mask;
1117 			meta->high_mask = meta->max_bucket | meta->low_mask;
1118 		}
1119 		meta->dbmeta.lsn = argp->meta_lsn;
1120 	}
1121 	*lsnp = argp->prev_lsn;
1122 
1123 out:	if ((t_ret = __ham_release_meta(dbc)) != 0 && ret == 0)
1124 		ret = t_ret;
1125 done:	REC_CLOSE;
1126 }
1127 
1128 /*
1129  * __ham_groupalloc_recover --
1130  *	Recover the batch creation of a set of pages for a new database.
1131  *
1132  * PUBLIC: int __ham_groupalloc_recover
1133  * PUBLIC:   __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
1134  */
1135 int
__ham_groupalloc_recover(env,dbtp,lsnp,op,info)1136 __ham_groupalloc_recover(env, dbtp, lsnp, op, info)
1137 	ENV *env;
1138 	DBT *dbtp;
1139 	DB_LSN *lsnp;
1140 	db_recops op;
1141 	void *info;
1142 {
1143 	__ham_groupalloc_args *argp;
1144 	DB_THREAD_INFO *ip;
1145 	DBMETA *mmeta;
1146 	DB_MPOOLFILE *mpf;
1147 	DB *file_dbp;
1148 	DBC *dbc;
1149 	PAGE *pagep;
1150 	db_pgno_t pgno;
1151 	int cmp_n, cmp_p, ret;
1152 
1153 	ip = ((DB_TXNHEAD *)info)->thread_info;
1154 	mmeta = NULL;
1155 	REC_PRINT(__ham_groupalloc_print);
1156 	REC_INTRO(__ham_groupalloc_read, ip, 1);
1157 
1158 	pgno = PGNO_BASE_MD;
1159 	if ((ret = __memp_fget(mpf, &pgno, ip, NULL, 0, &mmeta)) != 0) {
1160 		if (DB_REDO(op)) {
1161 			ret = __db_pgerr(file_dbp, pgno, ret);
1162 			goto out;
1163 		} else
1164 			goto done;
1165 	}
1166 
1167 	cmp_n = LOG_COMPARE(lsnp, &LSN(mmeta));
1168 	cmp_p = LOG_COMPARE(&LSN(mmeta), &argp->meta_lsn);
1169 	CHECK_LSN(env, op, cmp_p, &LSN(mmeta), &argp->meta_lsn);
1170 	CHECK_ABORT(env, op, cmp_n, &LSN(mmeta), lsnp);
1171 
1172 	/*
1173 	 * Basically, we used mpool to allocate a chunk of pages.
1174 	 * We need to either add those to a free list (in the undo
1175 	 * case) or initialize them (in the redo case).
1176 	 *
1177 	 * If we are redoing and this is a hash subdatabase, it's possible
1178 	 * that the pages were never allocated, so we'd better check for
1179 	 * that and handle it here.
1180 	 */
1181 	pgno = argp->start_pgno + argp->num - 1;
1182 	if (DB_REDO(op)) {
1183 		if ((ret = __ham_alloc_pages(dbc, argp, lsnp)) != 0)
1184 			goto out;
1185 		if (cmp_p == 0) {
1186 			REC_DIRTY(mpf, ip, file_dbp->priority, &mmeta);
1187 			LSN(mmeta) = *lsnp;
1188 		}
1189 	} else if (DB_UNDO(op)) {
1190 		/*
1191 		 * Fetch the last page and determine if it is in
1192 		 * the post allocation state.
1193 		 */
1194 		pagep = NULL;
1195 		if ((ret = __memp_fget(mpf, &pgno,
1196 		     ip,  NULL, DB_MPOOL_EDIT, &pagep)) == 0) {
1197 			if (LOG_COMPARE(&pagep->lsn, lsnp) != 0) {
1198 				if ((ret = __memp_fput(mpf, ip,
1199 				    pagep, DB_PRIORITY_VERY_LOW)) != 0)
1200 					goto out;
1201 				pagep = NULL;
1202 			}
1203 		} else if (ret != DB_PAGE_NOTFOUND)
1204 			goto out;
1205 		/*
1206 		 * If the last page was allocated then truncate back
1207 		 * to the first page.
1208 		 */
1209 		if (pagep != NULL) {
1210 			if ((ret = __memp_fput(mpf, ip,
1211 			    pagep, DB_PRIORITY_VERY_LOW)) != 0)
1212 				goto out;
1213 			if ((ret = __memp_ftruncate(mpf, NULL,
1214 			     ip, argp->start_pgno, 0)) != 0)
1215 				goto out;
1216 		}
1217 
1218 		/*
1219 		 * If we are rolling back the metapage, then make
1220 		 * sure it reflects the the correct last_pgno.
1221 		 */
1222 		if (cmp_n == 0) {
1223 			REC_DIRTY(mpf, ip, file_dbp->priority, &mmeta);
1224 			mmeta->last_pgno = argp->last_pgno;
1225 		}
1226 		pgno = 0;
1227 		if (cmp_n == 0) {
1228 			REC_DIRTY(mpf, ip, file_dbp->priority, &mmeta);
1229 			LSN(mmeta) = argp->meta_lsn;
1230 		}
1231 	}
1232 
1233 	/*
1234 	 * Set the last page number to the current value.
1235 	 */
1236 	if (pgno > mmeta->last_pgno) {
1237 		REC_DIRTY(mpf, ip, file_dbp->priority, &mmeta);
1238 		mmeta->last_pgno = pgno;
1239 	}
1240 
1241 done:	if (ret == 0)
1242 		*lsnp = argp->prev_lsn;
1243 	ret = 0;
1244 
1245 out:	if (mmeta != NULL)
1246 		(void)__memp_fput(mpf, ip, mmeta, file_dbp->priority);
1247 
1248 	REC_CLOSE;
1249 }
1250 
1251 /*
1252  * __ham_alloc_pages --
1253  *
1254  * Called during redo of a file create.  We create new pages in the file
1255  * using the MPOOL_NEW_GROUP flag.  We then log the meta-data page with a
1256  * __crdel_metasub message.  If we manage to crash without the newly written
1257  * pages getting to disk (I'm not sure this can happen anywhere except our
1258  * test suite?!), then we need to go through a recreate the final pages.
1259  * Hash normally has holes in its files and handles them appropriately.
1260  */
1261 static int
__ham_alloc_pages(dbc,argp,lsnp)1262 __ham_alloc_pages(dbc, argp, lsnp)
1263 	DBC *dbc;
1264 	__ham_groupalloc_args *argp;
1265 	DB_LSN *lsnp;
1266 {
1267 	DB *file_dbp;
1268 	DB_MPOOLFILE *mpf;
1269 	DB_THREAD_INFO *ip;
1270 	PAGE *pagep;
1271 	db_pgno_t pgno;
1272 	int ret;
1273 
1274 	file_dbp = dbc->dbp;
1275 	mpf = file_dbp->mpf;
1276 	ip = dbc->thread_info;
1277 
1278 	/* Read the last page of the allocation. */
1279 	pgno = argp->start_pgno + argp->num - 1;
1280 
1281 	/* If the page exists, and it has been initialized, then we're done. */
1282 	if ((ret =
1283 	    __memp_fget(mpf, &pgno, ip, NULL, 0, &pagep)) == 0) {
1284 		if (NUM_ENT(pagep) == 0 && IS_ZERO_LSN(pagep->lsn))
1285 			goto reinit_page;
1286 		return (__memp_fput(mpf, ip, pagep, dbc->priority));
1287 	}
1288 
1289 	/* Had to create the page. */
1290 	if ((ret = __memp_fget(mpf, &pgno,
1291 	    ip, NULL, DB_MPOOL_CREATE, &pagep)) != 0)
1292 		return (__db_pgerr(dbc->dbp, pgno, ret));
1293 
1294 reinit_page:
1295 	/* Initialize the newly allocated page. */
1296 	REC_DIRTY(mpf, ip, dbc->priority, &pagep);
1297 	P_INIT(pagep, dbc->dbp->pgsize,
1298 	    pgno, PGNO_INVALID, PGNO_INVALID, 0, P_HASH);
1299 	pagep->lsn = *lsnp;
1300 
1301 out:	return (__memp_fput(mpf, ip, pagep, dbc->priority));
1302 }
1303 
1304 /*
1305  * __ham_changeslot_recover --
1306  *	Recovery function for changeslot.
1307  * When we compact a hash database we may change one of the spares slots
1308  * to point at a new block of pages.
1309  *
1310  * PUBLIC: int __ham_changeslot_recover
1311  * PUBLIC:   __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
1312  */
1313 int
__ham_changeslot_recover(env,dbtp,lsnp,op,info)1314 __ham_changeslot_recover(env, dbtp, lsnp, op, info)
1315 	ENV *env;
1316 	DBT *dbtp;
1317 	DB_LSN *lsnp;
1318 	db_recops op;
1319 	void *info;
1320 {
1321 	__ham_changeslot_args *argp;
1322 	DB *file_dbp;
1323 	DBC *dbc;
1324 	DB_MPOOLFILE *mpf;
1325 	DB_THREAD_INFO *ip;
1326 	HASH_CURSOR *hcp;
1327 	HMETA *meta;
1328 	u_int32_t bucket;
1329 	int cmp_n, cmp_p, ret;
1330 
1331 	ip = ((DB_TXNHEAD *)info)->thread_info;
1332 
1333 	REC_PRINT(__ham_changeslot_print);
1334 	REC_INTRO(__ham_changeslot_read, ip, 1);
1335 
1336 	hcp = (HASH_CURSOR *)dbc->internal;
1337 	if ((ret = __ham_get_meta(dbc)) != 0)
1338 		goto out;
1339 	meta = hcp->hdr;
1340 	cmp_n = log_compare(lsnp, &LSN(meta));
1341 	cmp_p = log_compare(&LSN(meta), &argp->meta_lsn);
1342 
1343 	bucket = argp->slot == 0 ? 0 : 1 << (argp->slot - 1);
1344 	if (cmp_p == 0 && DB_REDO(op)) {
1345 		REC_DIRTY(mpf, ip, dbc->priority, &hcp->hdr);
1346 		meta = hcp->hdr;
1347 		meta->spares[argp->slot] = argp->new - bucket;
1348 		LSN(meta) = *lsnp;
1349 	} else if (cmp_n == 0 && !DB_REDO(op)) {
1350 		REC_DIRTY(mpf, ip, dbc->priority, &hcp->hdr);
1351 		meta = hcp->hdr;
1352 		meta->spares[argp->slot] = argp->old - bucket;
1353 		LSN(meta) = argp->meta_lsn;
1354 	}
1355 	*lsnp = argp->prev_lsn;
1356 	ret = __ham_release_meta(dbc);
1357 
1358 done:
1359 out:	REC_CLOSE;
1360 }
1361 
1362 /*
1363  * __ham_curadj_recover --
1364  *	Undo cursor adjustments if a subtransaction fails.
1365  *
1366  * PUBLIC: int __ham_curadj_recover
1367  * PUBLIC:   __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
1368  */
1369 int
__ham_curadj_recover(env,dbtp,lsnp,op,info)1370 __ham_curadj_recover(env, dbtp, lsnp, op, info)
1371 	ENV *env;
1372 	DBT *dbtp;
1373 	DB_LSN *lsnp;
1374 	db_recops op;
1375 	void *info;
1376 {
1377 	__ham_curadj_args *argp;
1378 	db_ham_curadj mode, hamc_mode;
1379 	DB_THREAD_INFO *ip;
1380 	DB_MPOOLFILE *mpf;
1381 	DB *file_dbp;
1382 	DBC *dbc;
1383 	HASH_CURSOR *hcp;
1384 	int ret;
1385 
1386 	ip = ((DB_TXNHEAD *)info)->thread_info;
1387 	REC_PRINT(__ham_curadj_print);
1388 	REC_INTRO(__ham_curadj_read, ip, 1);
1389 
1390 	if (op != DB_TXN_ABORT)
1391 		goto done;
1392 
1393 	mode = (db_ham_curadj)argp->add;
1394 
1395 	/*
1396 	 * Reverse the logged operation, so that the consequences are reversed
1397 	 * by the __hamc_update code.
1398 	 */
1399 	switch (mode) {
1400 	case DB_HAM_CURADJ_DEL:
1401 		hamc_mode = DB_HAM_CURADJ_ADD;
1402 		break;
1403 	case DB_HAM_CURADJ_ADD:
1404 		hamc_mode = DB_HAM_CURADJ_DEL;
1405 		break;
1406 	case DB_HAM_CURADJ_ADDMOD:
1407 		hamc_mode = DB_HAM_CURADJ_DELMOD;
1408 		break;
1409 	case DB_HAM_CURADJ_DELMOD:
1410 		hamc_mode = DB_HAM_CURADJ_ADDMOD;
1411 		break;
1412 	default:
1413 		__db_errx(env, DB_STR("1122",
1414 		    "Invalid flag in __ham_curadj_recover"));
1415 		ret = EINVAL;
1416 		goto out;
1417 	}
1418 
1419 	/*
1420 	 * Undo the adjustment by reinitializing the the cursor to look like
1421 	 * the one that was used to do the adjustment, then we invert the
1422 	 * add so that undo the adjustment.
1423 	 */
1424 	hcp = (HASH_CURSOR *)dbc->internal;
1425 	hcp->pgno = argp->pgno;
1426 	hcp->indx = argp->indx;
1427 	hcp->dup_off = argp->dup_off;
1428 	hcp->order = argp->order;
1429 	if (mode == DB_HAM_CURADJ_DEL)
1430 		F_SET(hcp, H_DELETED);
1431 	(void)__hamc_update(dbc, argp->len, hamc_mode, argp->is_dup);
1432 
1433 done:	*lsnp = argp->prev_lsn;
1434 out:	REC_CLOSE;
1435 }
1436 
1437 static int
__ham_chgpg_recover_func(cp,my_dbc,countp,pgno,indx,vargs)1438 __ham_chgpg_recover_func(cp, my_dbc, countp, pgno, indx, vargs)
1439 	DBC *cp, *my_dbc;
1440 	u_int32_t *countp;
1441 	db_pgno_t pgno;
1442 	u_int32_t indx;
1443 	void *vargs;
1444 {
1445 	BTREE_CURSOR *opdcp;
1446 	HASH_CURSOR *lcp;
1447 	u_int32_t order;
1448 	int ret;
1449 	__ham_chgpg_args *argp;
1450 
1451 	COMPQUIET(my_dbc, NULL);
1452 	COMPQUIET(countp, NULL);
1453 	COMPQUIET(pgno, 0);
1454 	lcp = (HASH_CURSOR *)cp->internal;
1455 	argp = vargs;
1456 
1457 	/* Overloaded field for DB_HAM_DEL*PG */
1458 	order = argp->new_indx;
1459 
1460 	switch (argp->mode) {
1461 	case DB_HAM_DELFIRSTPG:
1462 		if (lcp->pgno != argp->new_pgno ||
1463 		    MVCC_SKIP_CURADJ(cp, lcp->pgno))
1464 			break;
1465 		if (lcp->indx != indx ||
1466 		    !F_ISSET(lcp, H_DELETED) ||
1467 		    lcp->order >= order) {
1468 			lcp->pgno = argp->old_pgno;
1469 			if (lcp->indx == indx)
1470 				lcp->order -= order;
1471 		}
1472 		break;
1473 	case DB_HAM_DELMIDPG:
1474 	case DB_HAM_DELLASTPG:
1475 		if (lcp->pgno == argp->new_pgno &&
1476 		    lcp->indx == indx &&
1477 		    F_ISSET(lcp, H_DELETED) &&
1478 		    lcp->order >= order &&
1479 		    !MVCC_SKIP_CURADJ(cp, lcp->pgno)) {
1480 			lcp->pgno = argp->old_pgno;
1481 			lcp->order -= order;
1482 			lcp->indx = 0;
1483 		}
1484 		break;
1485 	case DB_HAM_CHGPG:
1486 		/*
1487 		 * If we're doing a CHGPG, we're undoing
1488 		 * the move of a non-deleted item to a
1489 		 * new page.  Any cursors with the deleted
1490 		 * flag set do not belong to this item;
1491 		 * don't touch them.
1492 		 */
1493 		if (F_ISSET(lcp, H_DELETED))
1494 			break;
1495 		/* FALLTHROUGH */
1496 	case DB_HAM_SPLIT:
1497 		if (lcp->pgno == argp->new_pgno &&
1498 		    lcp->indx == argp->new_indx &&
1499 		    !MVCC_SKIP_CURADJ(cp, lcp->pgno)) {
1500 			lcp->indx = argp->old_indx;
1501 			lcp->pgno = argp->old_pgno;
1502 		}
1503 		break;
1504 	case DB_HAM_DUP:
1505 		if (lcp->opd == NULL)
1506 			break;
1507 		opdcp = (BTREE_CURSOR *)lcp->opd->internal;
1508 		if (opdcp->pgno != argp->new_pgno ||
1509 		    opdcp->indx != argp->new_indx ||
1510 		    MVCC_SKIP_CURADJ(lcp->opd, opdcp->pgno))
1511 			break;
1512 
1513 		if (F_ISSET(opdcp, C_DELETED))
1514 			F_SET(lcp, H_DELETED);
1515 		/*
1516 		 * We can't close a cursor while we have the
1517 		 * dbp mutex locked, since c_close reacquires
1518 		 * it.  It should be safe to drop the mutex
1519 		 * here, though, since newly opened cursors
1520 		 * are put only at the end of the tailq and
1521 		 * the cursor we're adjusting can't be closed
1522 		 * under us.
1523 		 */
1524 		MUTEX_UNLOCK(cp->dbp->env, cp->dbp->mutex);
1525 		ret = __dbc_close(lcp->opd);
1526 		MUTEX_LOCK(cp->dbp->env, cp->dbp->mutex);
1527 		if (ret != 0)
1528 			return (ret);
1529 		lcp->opd = NULL;
1530 		break;
1531 	}
1532 	return (0);
1533 }
1534 /*
1535  * __ham_chgpg_recover --
1536  *	Undo cursor adjustments if a subtransaction fails.
1537  *
1538  * PUBLIC: int __ham_chgpg_recover
1539  * PUBLIC:   __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
1540  */
1541 int
__ham_chgpg_recover(env,dbtp,lsnp,op,info)1542 __ham_chgpg_recover(env, dbtp, lsnp, op, info)
1543 	ENV *env;
1544 	DBT *dbtp;
1545 	DB_LSN *lsnp;
1546 	db_recops op;
1547 	void *info;
1548 {
1549 	__ham_chgpg_args *argp;
1550 	DB_THREAD_INFO *ip;
1551 	DB_MPOOLFILE *mpf;
1552 	DB *file_dbp;
1553 	DBC *dbc;
1554 	int ret;
1555 	u_int32_t count;
1556 
1557 	ip = ((DB_TXNHEAD *)info)->thread_info;
1558 	REC_PRINT(__ham_chgpg_print);
1559 	REC_INTRO(__ham_chgpg_read, ip, 0);
1560 
1561 	if (op != DB_TXN_ABORT)
1562 		goto done;
1563 
1564 	ret = __db_walk_cursors(file_dbp, dbc,
1565 	    __ham_chgpg_recover_func, &count, 0, argp->old_indx, argp);
1566 
1567 done:	*lsnp = argp->prev_lsn;
1568 out:	REC_CLOSE;
1569 }
1570 
1571 /*
1572  * __ham_metagroup_recover --
1573  *	Recovery function for metagroup.
1574  *
1575  * PUBLIC: int __ham_metagroup_42_recover
1576  * PUBLIC:   __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
1577  */
1578 int
__ham_metagroup_42_recover(env,dbtp,lsnp,op,info)1579 __ham_metagroup_42_recover(env, dbtp, lsnp, op, info)
1580 	ENV *env;
1581 	DBT *dbtp;
1582 	DB_LSN *lsnp;
1583 	db_recops op;
1584 	void *info;
1585 {
1586 	__ham_metagroup_42_args *argp;
1587 	DB_THREAD_INFO *ip;
1588 	HASH_CURSOR *hcp;
1589 	DB *file_dbp;
1590 	DBMETA *mmeta;
1591 	DBC *dbc;
1592 	DB_MPOOLFILE *mpf;
1593 	PAGE *pagep;
1594 	db_pgno_t pgno;
1595 	u_int32_t flags;
1596 	int cmp_n, cmp_p, did_alloc, groupgrow, ret;
1597 
1598 	ip = ((DB_TXNHEAD *)info)->thread_info;
1599 	mmeta = NULL;
1600 	did_alloc = 0;
1601 	REC_PRINT(__ham_metagroup_42_print);
1602 	REC_INTRO(__ham_metagroup_42_read, ip, 1);
1603 
1604 	/*
1605 	 * This logs the virtual create of pages pgno to pgno + bucket
1606 	 * If HAVE_FTRUNCATE is not supported the mpool page-allocation is not
1607 	 * transaction protected, we can never undo it.  Even in an abort,
1608 	 * we have to allocate these pages to the hash table if they
1609 	 * were actually created.  In particular, during disaster
1610 	 * recovery the metapage may be before this point if we
1611 	 * are rolling backward.  If the file has not been extended
1612 	 * then the metapage could not have been updated.
1613 	 * The log record contains:
1614 	 * bucket: old maximum bucket
1615 	 * pgno: page number of the new bucket.
1616 	 * We round up on log calculations, so we can figure out if we are
1617 	 * about to double the hash table if argp->bucket+1 is a power of 2.
1618 	 * If it is, then we are allocating an entire doubling of pages,
1619 	 * otherwise, we are simply allocated one new page.
1620 	 */
1621 	groupgrow =
1622 	    (u_int32_t)(1 << __db_log2(argp->bucket + 1)) == argp->bucket + 1;
1623 	pgno = argp->pgno;
1624 	if (argp->newalloc)
1625 		pgno += argp->bucket;
1626 
1627 	flags = 0;
1628 	pagep = NULL;
1629 	LF_SET(DB_MPOOL_CREATE);
1630 	ret = __memp_fget(mpf, &pgno, ip,  NULL, flags, &pagep);
1631 
1632 	if (ret != 0) {
1633 		if (ret != ENOSPC)
1634 			goto out;
1635 		pgno = 0;
1636 		goto do_meta;
1637 	}
1638 
1639 	/*
1640 	 * When we get here then either we did not grow the file
1641 	 * (groupgrow == 0) or we did grow the file and the allocation
1642 	 * of those new pages succeeded.
1643 	 */
1644 	did_alloc = groupgrow;
1645 
1646 	cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
1647 	cmp_p = LOG_COMPARE(&LSN(pagep), &argp->pagelsn);
1648 	CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->pagelsn);
1649 
1650 	if (cmp_p == 0 && DB_REDO(op)) {
1651 		REC_DIRTY(mpf, ip, dbc->priority, &pagep);
1652 		pagep->lsn = *lsnp;
1653 	} else if (cmp_n == 0 && DB_UNDO(op)) {
1654 		/*
1655 		 * Otherwise just roll the page back to its
1656 		 * previous state.
1657 		 */
1658 		REC_DIRTY(mpf, ip, dbc->priority, &pagep);
1659 		pagep->lsn = argp->pagelsn;
1660 	}
1661 	if (pagep != NULL &&
1662 	    (ret = __memp_fput(mpf, ip, pagep, dbc->priority)) != 0)
1663 		goto out;
1664 
1665 do_meta:
1666 	/* Now we have to update the meta-data page. */
1667 	hcp = (HASH_CURSOR *)dbc->internal;
1668 	if ((ret = __ham_get_meta(dbc)) != 0)
1669 		goto out;
1670 	cmp_n = LOG_COMPARE(lsnp, &hcp->hdr->dbmeta.lsn);
1671 	cmp_p = LOG_COMPARE(&hcp->hdr->dbmeta.lsn, &argp->metalsn);
1672 	CHECK_LSN(env, op, cmp_p, &hcp->hdr->dbmeta.lsn, &argp->metalsn);
1673 	if (cmp_p == 0 && DB_REDO(op)) {
1674 		/* Redo the actual updating of bucket counts. */
1675 		REC_DIRTY(mpf, ip, dbc->priority, &hcp->hdr);
1676 		++hcp->hdr->max_bucket;
1677 		if (groupgrow) {
1678 			hcp->hdr->low_mask = hcp->hdr->high_mask;
1679 			hcp->hdr->high_mask =
1680 			    (argp->bucket + 1) | hcp->hdr->low_mask;
1681 		}
1682 		hcp->hdr->dbmeta.lsn = *lsnp;
1683 	} else if (cmp_n == 0 && DB_UNDO(op)) {
1684 		/* Undo the actual updating of bucket counts. */
1685 		REC_DIRTY(mpf, ip, dbc->priority, &hcp->hdr);
1686 		hcp->hdr->max_bucket = argp->bucket;
1687 		if (groupgrow) {
1688 			hcp->hdr->high_mask = argp->bucket;
1689 			hcp->hdr->low_mask = hcp->hdr->high_mask >> 1;
1690 		}
1691 		hcp->hdr->dbmeta.lsn = argp->metalsn;
1692 	}
1693 
1694 	/*
1695 	 * Now we need to fix up the spares array.  Each entry in the
1696 	 * spares array indicates the beginning page number for the
1697 	 * indicated doubling.  We need to fill this in whenever the
1698 	 * spares array is invalid, if we never reclaim pages then
1699 	 * we have to allocate the pages to the spares array in both
1700 	 * the redo and undo cases.
1701 	 */
1702 	if (did_alloc &&
1703 	    hcp->hdr->spares[__db_log2(argp->bucket + 1) + 1] == PGNO_INVALID) {
1704 		REC_DIRTY(mpf, ip, dbc->priority, &hcp->hdr);
1705 		hcp->hdr->spares[__db_log2(argp->bucket + 1) + 1] =
1706 		    (argp->pgno - argp->bucket) - 1;
1707 	}
1708 
1709 	/*
1710 	 * Finally, we need to potentially fix up the last_pgno field
1711 	 * in the master meta-data page (which may or may not be the
1712 	 * same as the hash header page).
1713 	 */
1714 	if (argp->mmpgno != argp->mpgno) {
1715 		if ((ret = __memp_fget(mpf, &argp->mmpgno, ip, NULL,
1716 		    DB_MPOOL_EDIT, &mmeta)) != 0) {
1717 			if (DB_UNDO(op) && ret == DB_PAGE_NOTFOUND)
1718 				ret = 0;
1719 			goto out;
1720 		}
1721 		cmp_n = LOG_COMPARE(lsnp, &mmeta->lsn);
1722 		cmp_p = LOG_COMPARE(&mmeta->lsn, &argp->mmetalsn);
1723 		if (cmp_p == 0 && DB_REDO(op)) {
1724 			REC_DIRTY(mpf, ip, dbc->priority, &mmeta);
1725 			mmeta->lsn = *lsnp;
1726 		} else if (cmp_n == 0 && DB_UNDO(op)) {
1727 			REC_DIRTY(mpf, ip, dbc->priority, &mmeta);
1728 			mmeta->lsn = argp->mmetalsn;
1729 		}
1730 	} else {
1731 		mmeta = (DBMETA *)hcp->hdr;
1732 		REC_DIRTY(mpf, ip, dbc->priority, &mmeta);
1733 	}
1734 
1735 	if (mmeta->last_pgno < pgno)
1736 		mmeta->last_pgno = pgno;
1737 
1738 	if (argp->mmpgno != argp->mpgno &&
1739 	    (ret = __memp_fput(mpf, ip, mmeta, dbc->priority)) != 0)
1740 		goto out;
1741 	mmeta = NULL;
1742 
1743 done:	*lsnp = argp->prev_lsn;
1744 	ret = 0;
1745 
1746 out:	if (mmeta != NULL)
1747 		(void)__memp_fput(mpf, ip, mmeta, dbc->priority);
1748 	if (dbc != NULL)
1749 		(void)__ham_release_meta(dbc);
1750 
1751 	REC_CLOSE;
1752 }
1753 
1754 /*
1755  * __ham_groupalloc_42_recover --
1756  *	Recover the batch creation of a set of pages for a new database.
1757  *
1758  * PUBLIC: int __ham_groupalloc_42_recover
1759  * PUBLIC:   __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
1760  */
1761 int
__ham_groupalloc_42_recover(env,dbtp,lsnp,op,info)1762 __ham_groupalloc_42_recover(env, dbtp, lsnp, op, info)
1763 	ENV *env;
1764 	DBT *dbtp;
1765 	DB_LSN *lsnp;
1766 	db_recops op;
1767 	void *info;
1768 {
1769 	__ham_groupalloc_42_args *argp;
1770 	DB_THREAD_INFO *ip;
1771 	DBMETA *mmeta;
1772 	DB_MPOOLFILE *mpf;
1773 	DB *file_dbp;
1774 	DBC *dbc;
1775 	db_pgno_t pgno;
1776 	int cmp_p, ret;
1777 
1778 	ip = ((DB_TXNHEAD *)info)->thread_info;
1779 	mmeta = NULL;
1780 	REC_PRINT(__ham_groupalloc_42_print);
1781 	REC_INTRO(__ham_groupalloc_42_read, ip, 1);
1782 
1783 	pgno = PGNO_BASE_MD;
1784 	if ((ret = __memp_fget(mpf, &pgno, ip, NULL, 0, &mmeta)) != 0) {
1785 		if (DB_REDO(op)) {
1786 			ret = __db_pgerr(file_dbp, pgno, ret);
1787 			goto out;
1788 		} else
1789 			goto done;
1790 	}
1791 
1792 	cmp_p = LOG_COMPARE(&LSN(mmeta), &argp->meta_lsn);
1793 	CHECK_LSN(env, op, cmp_p, &LSN(mmeta), &argp->meta_lsn);
1794 
1795 	/*
1796 	 * Basically, we used mpool to allocate a chunk of pages.
1797 	 * We need to either add those to a free list (in the undo
1798 	 * case) or initialize them (in the redo case).
1799 	 *
1800 	 * If we are redoing and this is a hash subdatabase, it's possible
1801 	 * that the pages were never allocated, so we'd better check for
1802 	 * that and handle it here.
1803 	 */
1804 	pgno = argp->start_pgno + argp->num - 1;
1805 	if (DB_REDO(op)) {
1806 		if ((ret = __ham_alloc_pages_42(dbc, argp, lsnp)) != 0)
1807 			goto out;
1808 		if (cmp_p == 0) {
1809 			REC_DIRTY(mpf, ip, dbc->priority, &mmeta);
1810 			LSN(mmeta) = *lsnp;
1811 		}
1812 	} else if (DB_UNDO(op)) {
1813 		/*
1814 		 * We cannot roll back 4.2 style allocations.
1815 		 */
1816 		__db_errx(env, DB_STR("1123",
1817 "Cannot replicate prepared transactions from master running release 4.2."));
1818 		ret = __env_panic(env, EINVAL);
1819 		goto out;
1820 	}
1821 
1822 	/*
1823 	 * In both REDO and UNDO, we have grown the file and need to make
1824 	 * sure that last_pgno is correct.  If we HAVE_FTRUNCATE pgno
1825 	 * will only be valid on REDO.
1826 	 */
1827 	if (pgno > mmeta->last_pgno) {
1828 		REC_DIRTY(mpf, ip, dbc->priority, &mmeta);
1829 		mmeta->last_pgno = pgno;
1830 	}
1831 
1832 done:	if (ret == 0)
1833 		*lsnp = argp->prev_lsn;
1834 	ret = 0;
1835 
1836 out:	if (mmeta != NULL)
1837 		(void)__memp_fput(mpf, ip, mmeta, dbc->priority);
1838 
1839 	REC_CLOSE;
1840 }
1841 
1842 /*
1843  * __ham_alloc_pages_42 --
1844  *
1845  * Called during redo of a file create.  We create new pages in the file
1846  * using the MPOOL_NEW_GROUP flag.  We then log the meta-data page with a
1847  * __crdel_metasub message.  If we manage to crash without the newly written
1848  * pages getting to disk (I'm not sure this can happen anywhere except our
1849  * test suite?!), then we need to go through a recreate the final pages.
1850  * Hash normally has holes in its files and handles them appropriately.
1851  */
1852 static int
__ham_alloc_pages_42(dbc,argp,lsnp)1853 __ham_alloc_pages_42(dbc, argp, lsnp)
1854 	DBC *dbc;
1855 	__ham_groupalloc_42_args *argp;
1856 	DB_LSN *lsnp;
1857 {
1858 	DB_MPOOLFILE *mpf;
1859 	DB_THREAD_INFO *ip;
1860 	PAGE *pagep;
1861 	db_pgno_t pgno;
1862 	int ret;
1863 
1864 	mpf = dbc->dbp->mpf;
1865 	ip = dbc->thread_info;
1866 
1867 	/* Read the last page of the allocation. */
1868 	pgno = argp->start_pgno + argp->num - 1;
1869 
1870 	/* If the page exists, and it has been initialized, then we're done. */
1871 	if ((ret = __memp_fget(mpf,
1872 	    &pgno, ip, NULL, 0, &pagep)) == 0) {
1873 		if (NUM_ENT(pagep) == 0 && IS_ZERO_LSN(pagep->lsn))
1874 			goto reinit_page;
1875 		if ((ret = __memp_fput(mpf,
1876 		    ip, pagep, dbc->priority)) != 0)
1877 			return (ret);
1878 		return (0);
1879 	}
1880 
1881 	/* Had to create the page. */
1882 	if ((ret = __memp_fget(mpf, &pgno, ip, NULL,
1883 	    DB_MPOOL_CREATE | DB_MPOOL_DIRTY, &pagep)) != 0)
1884 		return (__db_pgerr(dbc->dbp, pgno, ret));
1885 
1886 reinit_page:
1887 	/* Initialize the newly allocated page. */
1888 	P_INIT(pagep,
1889 	    dbc->dbp->pgsize, pgno, PGNO_INVALID, PGNO_INVALID, 0, P_HASH);
1890 	pagep->lsn = *lsnp;
1891 
1892 	if ((ret = __memp_fput(mpf, ip, pagep, dbc->priority)) != 0)
1893 		return (ret);
1894 
1895 	return (0);
1896 }
1897