1 /*-
2  * See the file LICENSE for redistribution information.
3  *
4  * Copyright (c) 1996, 1997, 1998, 1999
5  *	Sleepycat Software.  All rights reserved.
6  */
7 /*
8  * Copyright (c) 1995, 1996
9  *	Margo Seltzer.  All rights reserved.
10  */
11 /*
12  * Copyright (c) 1995, 1996
13  *	The President and Fellows of Harvard University.  All rights reserved.
14  *
15  * This code is derived from software contributed to Berkeley by
16  * Margo Seltzer.
17  *
18  * Redistribution and use in source and binary forms, with or without
19  * modification, are permitted provided that the following conditions
20  * are met:
21  * 1. Redistributions of source code must retain the above copyright
22  *    notice, this list of conditions and the following disclaimer.
23  * 2. Redistributions in binary form must reproduce the above copyright
24  *    notice, this list of conditions and the following disclaimer in the
25  *    documentation and/or other materials provided with the distribution.
26  * 3. Neither the name of the University nor the names of its contributors
27  *    may be used to endorse or promote products derived from this software
28  *    without specific prior written permission.
29  *
30  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
31  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
32  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
33  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
34  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
35  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
36  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
37  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
38  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
39  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
40  * SUCH DAMAGE.
41  */
42 
43 #include "db_config.h"
44 
45 #ifndef lint
46 static const char sccsid[] = "@(#)hash_rec.c	11.12 (Sleepycat) 10/19/99";
47 #endif /* not lint */
48 
49 #ifndef NO_SYSTEM_INCLUDES
50 #include <sys/types.h>
51 
52 #include <errno.h>
53 #include <string.h>
54 #endif
55 
56 #include "db_int.h"
57 #include "db_page.h"
58 #include "db_shash.h"
59 #include "btree.h"
60 #include "hash.h"
61 #include "lock.h"
62 #include "log.h"
63 #include "mp.h"
64 
65 static int CDB___ham_alloc_pages __P((DB *, HMETA *, db_pgno_t, db_pgno_t));
66 static int CDB___ham_free_pages __P((DB *, __ham_groupalloc_args *));
67 
68 /*
69  * CDB___ham_insdel_recover --
70  *
71  * PUBLIC: int CDB___ham_insdel_recover
72  * PUBLIC:     __P((DB_ENV *, DBT *, DB_LSN *, int, void *));
73  */
74 int
CDB___ham_insdel_recover(dbenv,dbtp,lsnp,redo,info)75 CDB___ham_insdel_recover(dbenv, dbtp, lsnp, redo, info)
76 	DB_ENV *dbenv;
77 	DBT *dbtp;
78 	DB_LSN *lsnp;
79 	int redo;
80 	void *info;
81 {
82 	__ham_insdel_args *argp;
83 	DB *file_dbp;
84 	DBC *dbc;
85 	DB_MPOOLFILE *mpf;
86 	PAGE *pagep;
87 	u_int32_t op;
88 	int cmp_n, cmp_p, getmeta, ret;
89 
90 	COMPQUIET(info, NULL);
91 
92 	getmeta = 0;
93 	REC_PRINT(CDB___ham_insdel_print);
94 	REC_INTRO(CDB___ham_insdel_read, 1);
95 
96 	if ((ret = CDB_memp_fget(mpf, &argp->pgno, 0, &pagep)) != 0) {
97 		if (!redo) {
98 			/*
99 			 * We are undoing and the page doesn't exist.  That
100 			 * is equivalent to having a pagelsn of 0, so we
101 			 * would not have to undo anything.  In this case,
102 			 * don't bother creating a page.
103 			 */
104 			goto done;
105 		} else if ((ret = CDB_memp_fget(mpf, &argp->pgno,
106 		    DB_MPOOL_CREATE, &pagep)) != 0)
107 			goto out;
108 	}
109 
110 	if ((ret = CDB___ham_get_meta(dbc)) != 0)
111 		goto out;
112 	getmeta = 1;
113 
114 	cmp_n = CDB_log_compare(lsnp, &LSN(pagep));
115 	cmp_p = CDB_log_compare(&LSN(pagep), &argp->pagelsn);
116 	/*
117 	 * Two possible things going on:
118 	 * redo a delete/undo a put: delete the item from the page.
119 	 * redo a put/undo a delete: add the item to the page.
120 	 * If we are undoing a delete, then the information logged is the
121 	 * entire entry off the page, not just the data of a dbt.  In
122 	 * this case, we want to copy it back onto the page verbatim.
123 	 * We do this by calling __putitem with the type H_OFFPAGE instead
124 	 * of H_KEYDATA.
125 	 */
126 	op = OPCODE_OF(argp->opcode);
127 
128 	if ((op == DELPAIR && cmp_n == 0 && !redo) ||
129 	    (op == PUTPAIR && cmp_p == 0 && redo)) {
130 		/*
131 		 * Need to redo a PUT or undo a delete.  If we are undoing a
132 		 * delete, we've got to restore the item back to its original
133 		 * position.  That's a royal pain in the butt (because we do
134 		 * not store item lengths on the page), but there's no choice.
135 		 */
136 		if (op != DELPAIR ||
137 		    argp->ndx == (u_int32_t)H_NUMPAIRS(pagep)) {
138 			CDB___ham_putitem(pagep, &argp->key,
139 			    !redo || PAIR_ISKEYBIG(argp->opcode) ?
140 			    H_OFFPAGE : H_KEYDATA);
141 			CDB___ham_putitem(pagep, &argp->data,
142 			    !redo || PAIR_ISDATABIG(argp->opcode) ?
143 			    H_OFFPAGE : H_KEYDATA);
144 		} else
145 			(void) CDB___ham_reputpair(pagep, file_dbp->pgsize,
146 			    argp->ndx, &argp->key, &argp->data);
147 
148 		LSN(pagep) = redo ? *lsnp : argp->pagelsn;
149 		if ((ret = CDB___ham_put_page(file_dbp, pagep, 1)) != 0)
150 			goto out;
151 
152 	} else if ((op == DELPAIR && cmp_p == 0 && redo)
153 	    || (op == PUTPAIR && cmp_n == 0 && !redo)) {
154 		/* Need to undo a put or redo a delete. */
155 		CDB___ham_dpair(file_dbp, pagep, argp->ndx);
156 		LSN(pagep) = redo ? *lsnp : argp->pagelsn;
157 		if ((ret = CDB___ham_put_page(file_dbp, (PAGE *)pagep, 1)) != 0)
158 			goto out;
159 	} else
160 		if ((ret = CDB___ham_put_page(file_dbp, (PAGE *)pagep, 0)) != 0)
161 			goto out;
162 
163 	/* Return the previous LSN. */
164 done:	*lsnp = argp->prev_lsn;
165 	ret = 0;
166 
167 out:	if (getmeta)
168 		(void)CDB___ham_release_meta(dbc);
169 	REC_CLOSE;
170 }
171 
172 /*
173  * CDB___ham_newpage_recover --
174  *	This log message is used when we add/remove overflow pages.  This
175  *	message takes care of the pointer chains, not the data on the pages.
176  *
177  * PUBLIC: int CDB___ham_newpage_recover
178  * PUBLIC:     __P((DB_ENV *, DBT *, DB_LSN *, int, void *));
179  */
180 int
CDB___ham_newpage_recover(dbenv,dbtp,lsnp,redo,info)181 CDB___ham_newpage_recover(dbenv, dbtp, lsnp, redo, info)
182 	DB_ENV *dbenv;
183 	DBT *dbtp;
184 	DB_LSN *lsnp;
185 	int redo;
186 	void *info;
187 {
188 	__ham_newpage_args *argp;
189 	DB *file_dbp;
190 	DBC *dbc;
191 	DB_MPOOLFILE *mpf;
192 	PAGE *pagep;
193 	int cmp_n, cmp_p, change, getmeta, ret;
194 
195 	COMPQUIET(info, NULL);
196 
197 	getmeta = 0;
198 	REC_PRINT(CDB___ham_newpage_print);
199 	REC_INTRO(CDB___ham_newpage_read, 1);
200 
201 	if ((ret = CDB_memp_fget(mpf, &argp->new_pgno, 0, &pagep)) != 0) {
202 		if (!redo) {
203 			/*
204 			 * We are undoing and the page doesn't exist.  That
205 			 * is equivalent to having a pagelsn of 0, so we
206 			 * would not have to undo anything.  In this case,
207 			 * don't bother creating a page.
208 			 */
209 			ret = 0;
210 			goto ppage;
211 		} else if ((ret = CDB_memp_fget(mpf, &argp->new_pgno,
212 		    DB_MPOOL_CREATE, &pagep)) != 0)
213 			goto out;
214 	}
215 
216 	if ((ret = CDB___ham_get_meta(dbc)) != 0)
217 		goto out;
218 	getmeta = 1;
219 
220 	/*
221 	 * There are potentially three pages we need to check: the one
222 	 * that we created/deleted, the one before it and the one after
223 	 * it.
224 	 */
225 
226 	cmp_n = CDB_log_compare(lsnp, &LSN(pagep));
227 	cmp_p = CDB_log_compare(&LSN(pagep), &argp->pagelsn);
228 	change = 0;
229 
230 	if ((cmp_p == 0 && redo && argp->opcode == PUTOVFL) ||
231 	    (cmp_n == 0 && !redo && argp->opcode == DELOVFL)) {
232 		/* Redo a create new page or undo a delete new page. */
233 		P_INIT(pagep, file_dbp->pgsize, argp->new_pgno,
234 		    argp->prev_pgno, argp->next_pgno, 0, P_HASH);
235 		change = 1;
236 	} else if ((cmp_p == 0 && redo && argp->opcode == DELOVFL) ||
237 	    (cmp_n == 0 && !redo && argp->opcode == PUTOVFL)) {
238 		/*
239 		 * Redo a delete or undo a create new page.  All we
240 		 * really need to do is change the LSN.
241 		 */
242 		change = 1;
243 	}
244 
245 	if (!change) {
246 		if ((ret = CDB___ham_put_page(file_dbp, (PAGE *)pagep, 0)) != 0)
247 			goto out;
248 	} else {
249 		LSN(pagep) = redo ? *lsnp : argp->pagelsn;
250 		if ((ret = CDB___ham_put_page(file_dbp, (PAGE *)pagep, 1)) != 0)
251 			goto out;
252 	}
253 
254 	/* Now do the prev page. */
255 ppage:	if (argp->prev_pgno != PGNO_INVALID) {
256 		if ((ret = CDB_memp_fget(mpf, &argp->prev_pgno, 0, &pagep)) != 0) {
257 			if (!redo) {
258 				/*
259 				 * We are undoing and the page doesn't exist.
260 				 * That is equivalent to having a pagelsn of 0,
261 				 * so we would not have to undo anything.  In
262 				 * this case, don't bother creating a page.
263 				 */
264 				ret = 0;
265 				goto npage;
266 			} else if ((ret =
267 			    CDB_memp_fget(mpf, &argp->prev_pgno,
268 			    DB_MPOOL_CREATE, &pagep)) != 0)
269 				goto out;
270 		}
271 
272 		cmp_n = CDB_log_compare(lsnp, &LSN(pagep));
273 		cmp_p = CDB_log_compare(&LSN(pagep), &argp->prevlsn);
274 		change = 0;
275 
276 		if ((cmp_p == 0 && redo && argp->opcode == PUTOVFL) ||
277 		    (cmp_n == 0 && !redo && argp->opcode == DELOVFL)) {
278 			/* Redo a create new page or undo a delete new page. */
279 			pagep->next_pgno = argp->new_pgno;
280 			change = 1;
281 		} else if ((cmp_p == 0 && redo && argp->opcode == DELOVFL) ||
282 		    (cmp_n == 0 && !redo && argp->opcode == PUTOVFL)) {
283 			/* Redo a delete or undo a create new page. */
284 			pagep->next_pgno = argp->next_pgno;
285 			change = 1;
286 		}
287 
288 		if (!change) {
289 			if ((ret =
290 			    CDB___ham_put_page(file_dbp, (PAGE *)pagep, 0)) != 0)
291 				goto out;
292 		} else {
293 			LSN(pagep) = redo ? *lsnp : argp->prevlsn;
294 			if ((ret =
295 			    CDB___ham_put_page(file_dbp, (PAGE *)pagep, 1)) != 0)
296 				goto out;
297 		}
298 	}
299 
300 	/* Now time to do the next page */
301 npage:	if (argp->next_pgno != PGNO_INVALID) {
302 		if ((ret = CDB_memp_fget(mpf, &argp->next_pgno, 0, &pagep)) != 0) {
303 			if (!redo) {
304 				/*
305 				 * We are undoing and the page doesn't exist.
306 				 * That is equivalent to having a pagelsn of 0,
307 				 * so we would not have to undo anything.  In
308 				 * this case, don't bother creating a page.
309 				 */
310 				goto done;
311 			} else if ((ret =
312 			    CDB_memp_fget(mpf, &argp->next_pgno,
313 			    DB_MPOOL_CREATE, &pagep)) != 0)
314 				goto out;
315 		}
316 
317 		cmp_n = CDB_log_compare(lsnp, &LSN(pagep));
318 		cmp_p = CDB_log_compare(&LSN(pagep), &argp->nextlsn);
319 		change = 0;
320 
321 		if ((cmp_p == 0 && redo && argp->opcode == PUTOVFL) ||
322 		    (cmp_n == 0 && !redo && argp->opcode == DELOVFL)) {
323 			/* Redo a create new page or undo a delete new page. */
324 			pagep->prev_pgno = argp->new_pgno;
325 			change = 1;
326 		} else if ((cmp_p == 0 && redo && argp->opcode == DELOVFL) ||
327 		    (cmp_n == 0 && !redo && argp->opcode == PUTOVFL)) {
328 			/* Redo a delete or undo a create new page. */
329 			pagep->prev_pgno = argp->prev_pgno;
330 			change = 1;
331 		}
332 
333 		if (!change) {
334 			if ((ret =
335 			    CDB___ham_put_page(file_dbp, (PAGE *)pagep, 0)) != 0)
336 				goto out;
337 		} else {
338 			LSN(pagep) = redo ? *lsnp : argp->nextlsn;
339 			if ((ret =
340 			    CDB___ham_put_page(file_dbp, (PAGE *)pagep, 1)) != 0)
341 				goto out;
342 		}
343 	}
344 done:	*lsnp = argp->prev_lsn;
345 	ret = 0;
346 
347 out:	if (getmeta)
348 		(void)CDB___ham_release_meta(dbc);
349 	REC_CLOSE;
350 }
351 
352 
353 /*
354  * CDB___ham_replace_recover --
355  *	This log message refers to partial puts that are local to a single
356  *	page.  You can think of them as special cases of the more general
357  *	insdel log message.
358  *
359  * PUBLIC: int CDB___ham_replace_recover
360  * PUBLIC:    __P((DB_ENV *, DBT *, DB_LSN *, int, void *));
361  */
362 int
CDB___ham_replace_recover(dbenv,dbtp,lsnp,redo,info)363 CDB___ham_replace_recover(dbenv, dbtp, lsnp, redo, info)
364 	DB_ENV *dbenv;
365 	DBT *dbtp;
366 	DB_LSN *lsnp;
367 	int redo;
368 	void *info;
369 {
370 	__ham_replace_args *argp;
371 	DB *file_dbp;
372 	DBC *dbc;
373 	DB_MPOOLFILE *mpf;
374 	DBT dbt;
375 	PAGE *pagep;
376 	int32_t grow;
377 	int change, cmp_n, cmp_p, getmeta, ret;
378 	u_int8_t *hk;
379 
380 	COMPQUIET(info, NULL);
381 
382 	getmeta = 0;
383 	REC_PRINT(CDB___ham_replace_print);
384 	REC_INTRO(CDB___ham_replace_read, 1);
385 
386 	if ((ret = CDB_memp_fget(mpf, &argp->pgno, 0, &pagep)) != 0) {
387 		if (!redo) {
388 			/*
389 			 * We are undoing and the page doesn't exist.  That
390 			 * is equivalent to having a pagelsn of 0, so we
391 			 * would not have to undo anything.  In this case,
392 			 * don't bother creating a page.
393 			 */
394 			goto done;
395 		} else if ((ret = CDB_memp_fget(mpf, &argp->pgno,
396 		    DB_MPOOL_CREATE, &pagep)) != 0)
397 			goto out;
398 	}
399 
400 	if ((ret = CDB___ham_get_meta(dbc)) != 0)
401 		goto out;
402 	getmeta = 1;
403 
404 	cmp_n = CDB_log_compare(lsnp, &LSN(pagep));
405 	cmp_p = CDB_log_compare(&LSN(pagep), &argp->pagelsn);
406 
407 	memset(&dbt, 0, sizeof(dbt));
408 	if (cmp_p == 0 && redo) {
409 		change = 1;
410 		/* Reapply the change as specified. */
411 		dbt.data = argp->newitem.data;
412 		dbt.size = argp->newitem.size;
413 		grow = argp->newitem.size - argp->olditem.size;
414 		LSN(pagep) = *lsnp;
415 	} else if (cmp_n == 0 && !redo) {
416 		change = 1;
417 		/* Undo the already applied change. */
418 		dbt.data = argp->olditem.data;
419 		dbt.size = argp->olditem.size;
420 		grow = argp->olditem.size - argp->newitem.size;
421 		LSN(pagep) = argp->pagelsn;
422 	} else {
423 		change = 0;
424 		grow = 0;
425 	}
426 
427 	if (change) {
428 		CDB___ham_onpage_replace(pagep,
429 		    file_dbp->pgsize, argp->ndx, argp->off, grow, &dbt);
430 		if (argp->makedup) {
431 			hk = P_ENTRY(pagep, argp->ndx);
432 			if (redo)
433 				HPAGE_PTYPE(hk) = H_DUPLICATE;
434 			else
435 				HPAGE_PTYPE(hk) = H_KEYDATA;
436 		}
437 	}
438 
439 	if ((ret = CDB___ham_put_page(file_dbp, pagep, change)) != 0)
440 		goto out;
441 
442 done:	*lsnp = argp->prev_lsn;
443 	ret = 0;
444 
445 out:	if (getmeta)
446 		(void)CDB___ham_release_meta(dbc);
447 	REC_CLOSE;
448 }
449 
450 /*
451  * CDB___ham_newpgno_recover --
452  *	This log message is used when allocating or deleting an overflow
453  *	page.  It takes care of modifying the meta data.
454  *
455  * PUBLIC: int CDB___ham_newpgno_recover
456  * PUBLIC:    __P((DB_ENV *, DBT *, DB_LSN *, int, void *));
457  */
458 int
CDB___ham_newpgno_recover(dbenv,dbtp,lsnp,redo,info)459 CDB___ham_newpgno_recover(dbenv, dbtp, lsnp, redo, info)
460 	DB_ENV *dbenv;
461 	DBT *dbtp;
462 	DB_LSN *lsnp;
463 	int redo;
464 	void *info;
465 {
466 	COMPQUIET(dbenv, NULL);
467 	COMPQUIET(dbtp, NULL);
468 	COMPQUIET(lsnp, NULL);
469 	COMPQUIET(redo, 0);
470 	COMPQUIET(info, NULL);
471 	return (EINVAL);
472 }
473 
474 /*
475  * CDB___ham_splitmeta_recover --
476  *	This is the meta-data part of the split.  Records the new and old
477  *	bucket numbers and the new/old mask information.
478  *
479  * PUBLIC: int CDB___ham_splitmeta_recover
480  * PUBLIC:    __P((DB_ENV *, DBT *, DB_LSN *, int, void *));
481  */
482 int
CDB___ham_splitmeta_recover(dbenv,dbtp,lsnp,redo,info)483 CDB___ham_splitmeta_recover(dbenv, dbtp, lsnp, redo, info)
484 	DB_ENV *dbenv;
485 	DBT *dbtp;
486 	DB_LSN *lsnp;
487 	int redo;
488 	void *info;
489 {
490 	COMPQUIET(dbenv, NULL);
491 	COMPQUIET(dbtp, NULL);
492 	COMPQUIET(lsnp, NULL);
493 	COMPQUIET(redo, 0);
494 	COMPQUIET(info, NULL);
495 	return (EINVAL);
496 }
497 
498 /*
499  * CDB___ham_splitdata_recover --
500  *
501  * PUBLIC: int CDB___ham_splitdata_recover
502  * PUBLIC:    __P((DB_ENV *, DBT *, DB_LSN *, int, void *));
503  */
504 int
CDB___ham_splitdata_recover(dbenv,dbtp,lsnp,redo,info)505 CDB___ham_splitdata_recover(dbenv, dbtp, lsnp, redo, info)
506 	DB_ENV *dbenv;
507 	DBT *dbtp;
508 	DB_LSN *lsnp;
509 	int redo;
510 	void *info;
511 {
512 	__ham_splitdata_args *argp;
513 	DB *file_dbp;
514 	DBC *dbc;
515 	DB_MPOOLFILE *mpf;
516 	PAGE *pagep;
517 	int change, cmp_n, cmp_p, getmeta, ret;
518 
519 	COMPQUIET(info, NULL);
520 
521 	getmeta = 0;
522 	REC_PRINT(CDB___ham_splitdata_print);
523 	REC_INTRO(CDB___ham_splitdata_read, 1);
524 
525 	if ((ret = CDB_memp_fget(mpf, &argp->pgno, 0, &pagep)) != 0) {
526 		if (!redo) {
527 			/*
528 			 * We are undoing and the page doesn't exist.  That
529 			 * is equivalent to having a pagelsn of 0, so we
530 			 * would not have to undo anything.  In this case,
531 			 * don't bother creating a page.
532 			 */
533 			goto done;
534 		} else if ((ret = CDB_memp_fget(mpf, &argp->pgno,
535 		    DB_MPOOL_CREATE, &pagep)) != 0)
536 			goto out;
537 	}
538 
539 	if ((ret = CDB___ham_get_meta(dbc)) != 0)
540 		goto out;
541 	getmeta = 1;
542 
543 	cmp_n = CDB_log_compare(lsnp, &LSN(pagep));
544 	cmp_p = CDB_log_compare(&LSN(pagep), &argp->pagelsn);
545 
546 	/*
547 	 * There are two types of log messages here, one for the old page
548 	 * and one for the new pages created.  The original image in the
549 	 * SPLITOLD record is used for undo.  The image in the SPLITNEW
550 	 * is used for redo.  We should never have a case where there is
551 	 * a redo operation and the SPLITOLD record is on disk, but not
552 	 * the SPLITNEW record.  Therefore, we only have work to do when
553 	 * redo NEW messages and undo OLD messages, but we have to update
554 	 * LSNs in both cases.
555 	 */
556 	change = 0;
557 	if (cmp_p == 0 && redo) {
558 		if (argp->opcode == SPLITNEW)
559 			/* Need to redo the split described. */
560 			memcpy(pagep, argp->pageimage.data,
561 			    argp->pageimage.size);
562 		LSN(pagep) = *lsnp;
563 		change = 1;
564 	} else if (cmp_n == 0 && !redo) {
565 		if (argp->opcode == SPLITOLD) {
566 			/* Put back the old image. */
567 			memcpy(pagep, argp->pageimage.data,
568 			    argp->pageimage.size);
569 		} else
570 			P_INIT(pagep, file_dbp->pgsize, argp->pgno,
571 			    PGNO_INVALID, PGNO_INVALID, 0, P_HASH);
572 		LSN(pagep) = argp->pagelsn;
573 		change = 1;
574 	}
575 	if ((ret = CDB___ham_put_page(file_dbp, pagep, change)) != 0)
576 		goto out;
577 
578 done:	*lsnp = argp->prev_lsn;
579 	ret = 0;
580 
581 out:	if (getmeta)
582 		(void)CDB___ham_release_meta(dbc);
583 	REC_CLOSE;
584 }
585 
586 /*
587  * CDB___ham_ovfl_recover --
588  *	This message is generated when we initialize a set of overflow pages.
589  *
590  * PUBLIC: int CDB___ham_ovfl_recover
591  * PUBLIC:     __P((DB_ENV *, DBT *, DB_LSN *, int, void *));
592  */
593 int
CDB___ham_ovfl_recover(dbenv,dbtp,lsnp,redo,info)594 CDB___ham_ovfl_recover(dbenv, dbtp, lsnp, redo, info)
595 	DB_ENV *dbenv;
596 	DBT *dbtp;
597 	DB_LSN *lsnp;
598 	int redo;
599 	void *info;
600 {
601 	COMPQUIET(dbenv, NULL);
602 	COMPQUIET(dbtp, NULL);
603 	COMPQUIET(lsnp, NULL);
604 	COMPQUIET(redo, 0);
605 	COMPQUIET(info, NULL);
606 	return (EINVAL);
607 }
608 
609 /*
610  * CDB___ham_copypage_recover --
611  *	Recovery function for copypage.
612  *
613  * PUBLIC: int CDB___ham_copypage_recover
614  * PUBLIC:   __P((DB_ENV *, DBT *, DB_LSN *, int, void *));
615  */
616 int
CDB___ham_copypage_recover(dbenv,dbtp,lsnp,redo,info)617 CDB___ham_copypage_recover(dbenv, dbtp, lsnp, redo, info)
618 	DB_ENV *dbenv;
619 	DBT *dbtp;
620 	DB_LSN *lsnp;
621 	int redo;
622 	void *info;
623 {
624 	__ham_copypage_args *argp;
625 	DB *file_dbp;
626 	DBC *dbc;
627 	DB_MPOOLFILE *mpf;
628 	PAGE *pagep;
629 	int cmp_n, cmp_p, getmeta, modified, ret;
630 
631 	COMPQUIET(info, NULL);
632 
633 	getmeta = 0;
634 	REC_PRINT(CDB___ham_copypage_print);
635 	REC_INTRO(CDB___ham_copypage_read, 1);
636 
637 	if ((ret = CDB___ham_get_meta(dbc)) != 0)
638 		goto out;
639 	getmeta = 1;
640 	modified = 0;
641 
642 	/* This is the bucket page. */
643 	if ((ret = CDB_memp_fget(mpf, &argp->pgno, 0, &pagep)) != 0) {
644 		if (!redo) {
645 			/*
646 			 * We are undoing and the page doesn't exist.  That
647 			 * is equivalent to having a pagelsn of 0, so we
648 			 * would not have to undo anything.  In this case,
649 			 * don't bother creating a page.
650 			 */
651 			ret = 0;
652 			goto donext;
653 		} else if ((ret = CDB_memp_fget(mpf, &argp->pgno,
654 		    DB_MPOOL_CREATE, &pagep)) != 0)
655 			goto out;
656 	}
657 
658 	cmp_n = CDB_log_compare(lsnp, &LSN(pagep));
659 	cmp_p = CDB_log_compare(&LSN(pagep), &argp->pagelsn);
660 
661 	if (cmp_p == 0 && redo) {
662 		/* Need to redo update described. */
663 		memcpy(pagep, argp->page.data, argp->page.size);
664 		LSN(pagep) = *lsnp;
665 		modified = 1;
666 	} else if (cmp_n == 0 && !redo) {
667 		/* Need to undo update described. */
668 		P_INIT(pagep, file_dbp->pgsize, argp->pgno, PGNO_INVALID,
669 		    argp->next_pgno, 0, P_HASH);
670 		LSN(pagep) = argp->pagelsn;
671 		modified = 1;
672 	}
673 	if ((ret = CDB_memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0)
674 		goto out;
675 
676 donext:	/* Now fix up the "next" page. */
677 	if ((ret = CDB_memp_fget(mpf, &argp->next_pgno, 0, &pagep)) != 0) {
678 		if (!redo) {
679 			/*
680 			 * We are undoing and the page doesn't exist.  That
681 			 * is equivalent to having a pagelsn of 0, so we
682 			 * would not have to undo anything.  In this case,
683 			 * don't bother creating a page.
684 			 */
685 			ret = 0;
686 			goto do_nn;
687 		} else if ((ret = CDB_memp_fget(mpf, &argp->next_pgno,
688 		    DB_MPOOL_CREATE, &pagep)) != 0)
689 			goto out;
690 	}
691 
692 	/* There is nothing to do in the REDO case; only UNDO. */
693 
694 	cmp_n = CDB_log_compare(lsnp, &LSN(pagep));
695 	if (cmp_n == 0 && !redo) {
696 		/* Need to undo update described. */
697 		memcpy(pagep, argp->page.data, argp->page.size);
698 		modified = 1;
699 	}
700 	if ((ret = CDB_memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0)
701 		goto out;
702 
703 	/* Now fix up the next's next page. */
704 do_nn:	if (argp->nnext_pgno == PGNO_INVALID)
705 		goto done;
706 
707 	if ((ret = CDB_memp_fget(mpf, &argp->nnext_pgno, 0, &pagep)) != 0) {
708 		if (!redo) {
709 			/*
710 			 * We are undoing and the page doesn't exist.  That
711 			 * is equivalent to having a pagelsn of 0, so we
712 			 * would not have to undo anything.  In this case,
713 			 * don't bother creating a page.
714 			 */
715 			goto done;
716 		} else if ((ret = CDB_memp_fget(mpf, &argp->nnext_pgno,
717 		    DB_MPOOL_CREATE, &pagep)) != 0)
718 			goto out;
719 	}
720 
721 	cmp_n = CDB_log_compare(lsnp, &LSN(pagep));
722 	cmp_p = CDB_log_compare(&LSN(pagep), &argp->nnextlsn);
723 
724 	if (cmp_p == 0 && redo) {
725 		/* Need to redo update described. */
726 		PREV_PGNO(pagep) = argp->pgno;
727 		LSN(pagep) = *lsnp;
728 		modified = 1;
729 	} else if (cmp_n == 0 && !redo) {
730 		/* Need to undo update described. */
731 		PREV_PGNO(pagep) = argp->next_pgno;
732 		LSN(pagep) = argp->nnextlsn;
733 		modified = 1;
734 	}
735 	if ((ret = CDB_memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0)
736 		goto out;
737 
738 done:	*lsnp = argp->prev_lsn;
739 	ret = 0;
740 
741 out:	if (getmeta)
742 		(void)CDB___ham_release_meta(dbc);
743 	REC_CLOSE;
744 }
745 
746 /*
747  * CDB___ham_metagroup_recover --
748  *	Recovery function for metagroup.
749  *
750  * PUBLIC: int CDB___ham_metagroup_recover
751  * PUBLIC:   __P((DB_ENV *, DBT *, DB_LSN *, int, void *));
752  */
753 int
CDB___ham_metagroup_recover(dbenv,dbtp,lsnp,redo,info)754 CDB___ham_metagroup_recover(dbenv, dbtp, lsnp, redo, info)
755 	DB_ENV *dbenv;
756 	DBT *dbtp;
757 	DB_LSN *lsnp;
758 	int redo;
759 	void *info;
760 {
761 	__ham_metagroup_args *argp;
762 	HASH_CURSOR *hcp;
763 	DB *file_dbp;
764 	DBC *dbc;
765 	DB_MPOOLFILE *mpf;
766 	PAGE *pagep;
767 	db_pgno_t last_pgno;
768 	int cmp_n, cmp_p, groupgrow, modified, ret;
769 
770 	COMPQUIET(info, NULL);
771 	REC_PRINT(CDB___ham_metagroup_print);
772 	REC_INTRO(CDB___ham_metagroup_read, 1);
773 
774 	/*
775 	 * This logs the virtual create of pages pgno to pgno + bucket
776 	 * Since the mpool page-allocation is not really able to be
777 	 * transaction protected, we can never undo it.  Even in an abort,
778 	 * we have to allocate these pages to the hash table.
779 	 * The log record contains:
780 	 * bucket: new bucket being allocated.
781 	 * pgno: page number of the new bucket.
782 	 * if bucket is a power of 2, then we allocated a whole batch of
783 	 * pages; if it's not, then we simply allocated one new page.
784 	 */
785 	groupgrow =
786 	    (u_int32_t)(1 << CDB___db_log2(argp->bucket + 1)) == argp->bucket + 1;
787 
788 	last_pgno = argp->pgno;
789 	if (groupgrow)
790 		/* Read the last page. */
791 		last_pgno += argp->bucket;
792 
793 	if ((ret = CDB_memp_fget(mpf, &last_pgno, DB_MPOOL_CREATE, &pagep)) != 0)
794 		goto out;
795 
796 	modified = 0;
797 	cmp_n = CDB_log_compare(lsnp, &LSN(pagep));
798 	cmp_p = CDB_log_compare(&argp->pagelsn, &LSN(pagep));
799 
800 	if ((cmp_p == 0 && redo) || (cmp_n == 0 && !redo)) {
801 		/*
802 		 * We need to make sure that we redo the allocation of the
803 		 * pages.
804 		 */
805 		if (redo)
806 			pagep->lsn = *lsnp;
807 		else
808 			pagep->lsn = argp->pagelsn;
809 		modified = 1;
810 	}
811 	if ((ret = CDB_memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0)
812 		goto out;
813 
814 	/* Now we have to update the meta-data page. */
815 	hcp = dbc->internal;
816 	if ((ret = CDB___ham_get_meta(dbc)) != 0)
817 		goto out;
818 	cmp_n = CDB_log_compare(lsnp, &hcp->hdr->dbmeta.lsn);
819 	cmp_p = CDB_log_compare(&argp->metalsn, &hcp->hdr->dbmeta.lsn);
820 	if ((cmp_p == 0 && redo) || (cmp_n == 0 && !redo)) {
821 		if (redo) {
822 			/* Redo the actual updating of bucket counts. */
823 			++hcp->hdr->max_bucket;
824 			if (groupgrow) {
825 				hcp->hdr->low_mask = hcp->hdr->high_mask;
826 				hcp->hdr->high_mask =
827 				    (argp->bucket + 1) | hcp->hdr->low_mask;
828 			}
829 			hcp->hdr->dbmeta.lsn = *lsnp;
830 		} else {
831 			/* Undo the actual updating of bucket counts. */
832 			--hcp->hdr->max_bucket;
833 			if (groupgrow) {
834 				hcp->hdr->high_mask = hcp->hdr->low_mask;
835 				hcp->hdr->low_mask = hcp->hdr->high_mask >> 1;
836 			}
837 			hcp->hdr->dbmeta.lsn = argp->metalsn;
838 		}
839 		if (groupgrow &&
840 		    hcp->hdr->spares[CDB___db_log2(argp->bucket + 1) + 1] ==
841 		    PGNO_INVALID)
842 			hcp->hdr->spares[CDB___db_log2(argp->bucket + 1) + 1] =
843 			    argp->pgno - argp->bucket - 1;
844 		F_SET(hcp, H_DIRTY);
845 	}
846 	if ((ret = CDB___ham_release_meta(dbc)) != 0)
847 		goto out;
848 
849 done:	*lsnp = argp->prev_lsn;
850 	ret = 0;
851 
852 out:	REC_CLOSE;
853 }
854 
855 /*
856  * CDB___ham_groupalloc_recover --
857  *	Recover the batch creation of a set of pages for a new database.
858  *
859  * PUBLIC: int CDB___ham_groupalloc_recover
860  * PUBLIC:   __P((DB_ENV *, DBT *, DB_LSN *, int, void *));
861  */
862 int
CDB___ham_groupalloc_recover(dbenv,dbtp,lsnp,redo,info)863 CDB___ham_groupalloc_recover(dbenv, dbtp, lsnp, redo, info)
864 	DB_ENV *dbenv;
865 	DBT *dbtp;
866 	DB_LSN *lsnp;
867 	int redo;
868 	void *info;
869 {
870 	__ham_groupalloc_args *argp;
871 	DBMETA *mmeta;
872 	DB_MPOOLFILE *mpf;
873 	DB *file_dbp;
874 	DBC *dbc;
875 	PAGE *pagep;
876 	db_pgno_t pgno;
877 	int cmp_n, cmp_p, modified, ret;
878 
879 	modified = 0;
880 	COMPQUIET(info, NULL);
881 	REC_PRINT(CDB___ham_groupalloc_print);
882 	REC_INTRO(CDB___ham_groupalloc_read, 0);
883 
884 	if ((ret = CDB_memp_fget(mpf, &argp->pgno, 0, &pagep)) != 0) {
885 		if (redo) {
886 			/* Page should have existed. */
887 			(void)CDB___db_pgerr(file_dbp, argp->pgno);
888 			goto out;
889 		} else {
890 			ret = 0;
891 			goto done;
892 		}
893 	}
894 
895 	cmp_n = CDB_log_compare(lsnp, &LSN(pagep));
896 	cmp_p = CDB_log_compare(&LSN(pagep), &argp->metalsn);
897 
898 	if (cmp_p == 0 && redo) {
899 		LSN(pagep) = *lsnp;
900 		modified = 1;
901 	} else if (cmp_n == 0 && !redo) {
902 		LSN(pagep) = argp->metalsn;
903 		modified = 1;
904 	}
905 
906 	/*
907 	 * Basically, we used mpool to allocate a chunk of pages.
908 	 * We need to either add those to a free list (in the undo
909 	 * case) or initialize them (in the redo case).
910 	 *
911 	 * If we are redoing and this is a hash subdatabase, it's possible
912 	 * that the pages were never allocated, so we'd better check for
913 	 * that and handle it here.
914 	 */
915 	if (redo) {
916 		if ((ret = CDB___ham_alloc_pages(file_dbp,
917 		    (HMETA *)pagep, argp->start_pgno, argp->num)) != 0)
918 			goto out1;
919 
920 		/* Update the master meta data page LSN. */
921 		if (argp->pgno != PGNO_BASE_MD) {
922 			pgno = PGNO_BASE_MD;
923 			if ((ret = CDB_memp_fget(mpf, &pgno, 0, &mmeta)) != 0)
924 				goto out1;
925 			mmeta->lsn = *lsnp;
926 			if ((ret = CDB_memp_fput(mpf, mmeta, DB_MPOOL_DIRTY)) != 0)
927 				goto out1;
928 		}
929 	}
930 
931 	/*
932 	 * If we are undoing and this is a subdatabase then we need to
933 	 * put the pages on the free list.  If it's not a subdatabase,
934 	 * then we can simply do nothing because we're about to delete
935 	 * the file.
936 	 */
937 	if (!redo && argp->pgno != PGNO_BASE_MD) {
938 		if ((ret = CDB___ham_free_pages(file_dbp, argp)) != 0)
939 			goto out1;
940 		LSN(pagep) = argp->metalsn;
941 		modified = 1;
942 	}
943 
944 out1:	if ((ret = CDB_memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0)
945 		goto out;
946 
947 done:	if (ret == 0)
948 		*lsnp = argp->prev_lsn;
949 
950 out:	REC_CLOSE;
951 }
952 
953 /*
954  * CDB___ham_free_pages --
955  *
956  * Called during abort/undo of a file create.  We create new pages in the file
957  * using the MPOOL_NEW_GROUP flag.  We then log the meta-data page with a
958  * __crdel_metasub message.  If we fail we need to take those newly allocated
959  * pages and put them on a free list.  Normally this would happen in the
960  * recovery for CDB___db_new, but that doesn't get called in this case.
961  */
962 static int
CDB___ham_free_pages(dbp,argp)963 CDB___ham_free_pages(dbp, argp)
964 	DB *dbp;
965 	__ham_groupalloc_args *argp;
966 {
967 	DBMETA *mmeta;
968 	DB_MPOOLFILE *mpf;
969 	PAGE *pagep;
970 	u_int32_t i;
971 	db_pgno_t last_free, pgno;
972 	int mod_meta, ret, t_ret;
973 
974 	mod_meta = 0;
975 
976 	/* Get the master meta-data page. */
977 	mpf = dbp->mpf;
978 	pgno = PGNO_BASE_MD;
979 	if ((ret = CDB_memp_fget(mpf, &pgno, 0, &mmeta)) != 0)
980 		return (ret);
981 
982 	last_free = mmeta->free;
983 
984 	for (i = 0; i <= argp->num; i++) {
985 		pgno = argp->start_pgno + i;
986 		if ((ret =
987 		    CDB_memp_fget(mpf, &pgno, DB_MPOOL_CREATE, &pagep)) != 0) {
988 			(void)CDB___db_pgerr(dbp, pgno);
989 			goto out;
990 		}
991 
992 		/* Fix up the allocated page. */
993 		P_INIT(pagep,
994 		    dbp->pgsize, pgno, PGNO_INVALID, last_free, 0, P_INVALID);
995 		ZERO_LSN(pagep->lsn);
996 
997 		if ((ret = CDB_memp_fput(mpf, pagep, DB_MPOOL_DIRTY)) != 0)
998 			goto out;
999 	}
1000 
1001 	mmeta->free = last_free;
1002 	mmeta->lsn = argp->mmetalsn;
1003 	mod_meta = 1;
1004 
1005 out:	if ((t_ret = CDB_memp_fput(mpf, mmeta, mod_meta ? DB_MPOOL_DIRTY : 0)) != 0
1006 	    && ret == 0)
1007 		ret = t_ret;
1008 
1009 	return (ret);
1010 }
1011 
1012 /*
1013  * CDB___ham_alloc_pages --
1014  *
1015  * Called during redo of a file create.  We create new pages in the file
1016  * using the MPOOL_NEW_GROUP flag.  We then log the meta-data page with a
1017  * __crdel_metasub message.  If we manage to crash without the newly written
1018  * pages getting to disk (I'm not sure this can happen anywhere except our
1019  * test suite?!), then we need to go through a recreate the final pages.
1020  * Hash normally has holes in its files and handles them appropriately.
1021  */
1022 static int
CDB___ham_alloc_pages(dbp,meta,start,npages)1023 CDB___ham_alloc_pages(dbp, meta, start, npages)
1024 	DB *dbp;
1025 	HMETA *meta;
1026 	db_pgno_t start, npages;
1027 {
1028 	DB_MPOOLFILE *mpf;
1029 	PAGE *pagep;
1030 	db_pgno_t pgno;
1031 	int ret;
1032 
1033 	mpf = dbp->mpf;
1034 
1035 	/* Read the last page of the allocation. */
1036 	pgno = meta->spares[0] + meta->max_bucket;
1037 
1038 	/* If the page exists, and it has been initialized, then we're done. */
1039 	if ((ret = CDB_memp_fget(mpf, &pgno, 0, &pagep)) == 0) {
1040 		if (pagep->type == P_INVALID && pagep->lsn.file == 0)
1041 			goto reinit_page;
1042 		if ((ret = CDB_memp_fput(mpf, pagep, 0)) != 0)
1043 			return (ret);
1044 		return (0);
1045 	}
1046 
1047 	/*
1048 	 * Had to create the page.  On some systems (read "Windows"),
1049 	 * you can find random garbage on pages to which you haven't
1050 	 * yet written.  So, we have an os layer that will do the
1051 	 * right thing for group allocations.  We call that directly
1052 	 * to make sure all the pages are allocated and then continue
1053 	 * merrily on our way with normal recovery.
1054 	 */
1055 	if ((ret = CDB___os_fpinit(&mpf->fh, start, npages, dbp->pgsize)) != 0)
1056 		return (ret);
1057 
1058 	if ((ret = CDB_memp_fget(mpf, &pgno, DB_MPOOL_CREATE, &pagep)) != 0) {
1059 		(void)CDB___db_pgerr(dbp, pgno);
1060 		return (ret);
1061 	}
1062 
1063 reinit_page:
1064 	/* Initialize the newly allocated page. */
1065 	P_INIT(pagep,
1066 	    dbp->pgsize, pgno, PGNO_INVALID, PGNO_INVALID, 0, P_HASH);
1067 	ZERO_LSN(pagep->lsn);
1068 
1069 	if ((ret = CDB_memp_fput(mpf, pagep, DB_MPOOL_DIRTY)) != 0)
1070 		return (ret);
1071 
1072 	return (0);
1073 }
1074