1 /*-
2 * See the file LICENSE for redistribution information.
3 *
4 * Copyright (c) 1996, 1997, 1998, 1999
5 * Sleepycat Software. All rights reserved.
6 */
7 /*
8 * Copyright (c) 1995, 1996
9 * Margo Seltzer. All rights reserved.
10 */
11 /*
12 * Copyright (c) 1995, 1996
13 * The President and Fellows of Harvard University. All rights reserved.
14 *
15 * This code is derived from software contributed to Berkeley by
16 * Margo Seltzer.
17 *
18 * Redistribution and use in source and binary forms, with or without
19 * modification, are permitted provided that the following conditions
20 * are met:
21 * 1. Redistributions of source code must retain the above copyright
22 * notice, this list of conditions and the following disclaimer.
23 * 2. Redistributions in binary form must reproduce the above copyright
24 * notice, this list of conditions and the following disclaimer in the
25 * documentation and/or other materials provided with the distribution.
26 * 3. Neither the name of the University nor the names of its contributors
27 * may be used to endorse or promote products derived from this software
28 * without specific prior written permission.
29 *
30 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
31 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
32 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
33 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
34 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
35 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
36 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
37 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
38 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
39 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
40 * SUCH DAMAGE.
41 */
42
43 #include "db_config.h"
44
45 #ifndef lint
46 static const char sccsid[] = "@(#)hash_rec.c 11.12 (Sleepycat) 10/19/99";
47 #endif /* not lint */
48
49 #ifndef NO_SYSTEM_INCLUDES
50 #include <sys/types.h>
51
52 #include <errno.h>
53 #include <string.h>
54 #endif
55
56 #include "db_int.h"
57 #include "db_page.h"
58 #include "db_shash.h"
59 #include "btree.h"
60 #include "hash.h"
61 #include "lock.h"
62 #include "log.h"
63 #include "mp.h"
64
65 static int CDB___ham_alloc_pages __P((DB *, HMETA *, db_pgno_t, db_pgno_t));
66 static int CDB___ham_free_pages __P((DB *, __ham_groupalloc_args *));
67
68 /*
69 * CDB___ham_insdel_recover --
70 *
71 * PUBLIC: int CDB___ham_insdel_recover
72 * PUBLIC: __P((DB_ENV *, DBT *, DB_LSN *, int, void *));
73 */
74 int
CDB___ham_insdel_recover(dbenv,dbtp,lsnp,redo,info)75 CDB___ham_insdel_recover(dbenv, dbtp, lsnp, redo, info)
76 DB_ENV *dbenv;
77 DBT *dbtp;
78 DB_LSN *lsnp;
79 int redo;
80 void *info;
81 {
82 __ham_insdel_args *argp;
83 DB *file_dbp;
84 DBC *dbc;
85 DB_MPOOLFILE *mpf;
86 PAGE *pagep;
87 u_int32_t op;
88 int cmp_n, cmp_p, getmeta, ret;
89
90 COMPQUIET(info, NULL);
91
92 getmeta = 0;
93 REC_PRINT(CDB___ham_insdel_print);
94 REC_INTRO(CDB___ham_insdel_read, 1);
95
96 if ((ret = CDB_memp_fget(mpf, &argp->pgno, 0, &pagep)) != 0) {
97 if (!redo) {
98 /*
99 * We are undoing and the page doesn't exist. That
100 * is equivalent to having a pagelsn of 0, so we
101 * would not have to undo anything. In this case,
102 * don't bother creating a page.
103 */
104 goto done;
105 } else if ((ret = CDB_memp_fget(mpf, &argp->pgno,
106 DB_MPOOL_CREATE, &pagep)) != 0)
107 goto out;
108 }
109
110 if ((ret = CDB___ham_get_meta(dbc)) != 0)
111 goto out;
112 getmeta = 1;
113
114 cmp_n = CDB_log_compare(lsnp, &LSN(pagep));
115 cmp_p = CDB_log_compare(&LSN(pagep), &argp->pagelsn);
116 /*
117 * Two possible things going on:
118 * redo a delete/undo a put: delete the item from the page.
119 * redo a put/undo a delete: add the item to the page.
120 * If we are undoing a delete, then the information logged is the
121 * entire entry off the page, not just the data of a dbt. In
122 * this case, we want to copy it back onto the page verbatim.
123 * We do this by calling __putitem with the type H_OFFPAGE instead
124 * of H_KEYDATA.
125 */
126 op = OPCODE_OF(argp->opcode);
127
128 if ((op == DELPAIR && cmp_n == 0 && !redo) ||
129 (op == PUTPAIR && cmp_p == 0 && redo)) {
130 /*
131 * Need to redo a PUT or undo a delete. If we are undoing a
132 * delete, we've got to restore the item back to its original
133 * position. That's a royal pain in the butt (because we do
134 * not store item lengths on the page), but there's no choice.
135 */
136 if (op != DELPAIR ||
137 argp->ndx == (u_int32_t)H_NUMPAIRS(pagep)) {
138 CDB___ham_putitem(pagep, &argp->key,
139 !redo || PAIR_ISKEYBIG(argp->opcode) ?
140 H_OFFPAGE : H_KEYDATA);
141 CDB___ham_putitem(pagep, &argp->data,
142 !redo || PAIR_ISDATABIG(argp->opcode) ?
143 H_OFFPAGE : H_KEYDATA);
144 } else
145 (void) CDB___ham_reputpair(pagep, file_dbp->pgsize,
146 argp->ndx, &argp->key, &argp->data);
147
148 LSN(pagep) = redo ? *lsnp : argp->pagelsn;
149 if ((ret = CDB___ham_put_page(file_dbp, pagep, 1)) != 0)
150 goto out;
151
152 } else if ((op == DELPAIR && cmp_p == 0 && redo)
153 || (op == PUTPAIR && cmp_n == 0 && !redo)) {
154 /* Need to undo a put or redo a delete. */
155 CDB___ham_dpair(file_dbp, pagep, argp->ndx);
156 LSN(pagep) = redo ? *lsnp : argp->pagelsn;
157 if ((ret = CDB___ham_put_page(file_dbp, (PAGE *)pagep, 1)) != 0)
158 goto out;
159 } else
160 if ((ret = CDB___ham_put_page(file_dbp, (PAGE *)pagep, 0)) != 0)
161 goto out;
162
163 /* Return the previous LSN. */
164 done: *lsnp = argp->prev_lsn;
165 ret = 0;
166
167 out: if (getmeta)
168 (void)CDB___ham_release_meta(dbc);
169 REC_CLOSE;
170 }
171
172 /*
173 * CDB___ham_newpage_recover --
174 * This log message is used when we add/remove overflow pages. This
175 * message takes care of the pointer chains, not the data on the pages.
176 *
177 * PUBLIC: int CDB___ham_newpage_recover
178 * PUBLIC: __P((DB_ENV *, DBT *, DB_LSN *, int, void *));
179 */
180 int
CDB___ham_newpage_recover(dbenv,dbtp,lsnp,redo,info)181 CDB___ham_newpage_recover(dbenv, dbtp, lsnp, redo, info)
182 DB_ENV *dbenv;
183 DBT *dbtp;
184 DB_LSN *lsnp;
185 int redo;
186 void *info;
187 {
188 __ham_newpage_args *argp;
189 DB *file_dbp;
190 DBC *dbc;
191 DB_MPOOLFILE *mpf;
192 PAGE *pagep;
193 int cmp_n, cmp_p, change, getmeta, ret;
194
195 COMPQUIET(info, NULL);
196
197 getmeta = 0;
198 REC_PRINT(CDB___ham_newpage_print);
199 REC_INTRO(CDB___ham_newpage_read, 1);
200
201 if ((ret = CDB_memp_fget(mpf, &argp->new_pgno, 0, &pagep)) != 0) {
202 if (!redo) {
203 /*
204 * We are undoing and the page doesn't exist. That
205 * is equivalent to having a pagelsn of 0, so we
206 * would not have to undo anything. In this case,
207 * don't bother creating a page.
208 */
209 ret = 0;
210 goto ppage;
211 } else if ((ret = CDB_memp_fget(mpf, &argp->new_pgno,
212 DB_MPOOL_CREATE, &pagep)) != 0)
213 goto out;
214 }
215
216 if ((ret = CDB___ham_get_meta(dbc)) != 0)
217 goto out;
218 getmeta = 1;
219
220 /*
221 * There are potentially three pages we need to check: the one
222 * that we created/deleted, the one before it and the one after
223 * it.
224 */
225
226 cmp_n = CDB_log_compare(lsnp, &LSN(pagep));
227 cmp_p = CDB_log_compare(&LSN(pagep), &argp->pagelsn);
228 change = 0;
229
230 if ((cmp_p == 0 && redo && argp->opcode == PUTOVFL) ||
231 (cmp_n == 0 && !redo && argp->opcode == DELOVFL)) {
232 /* Redo a create new page or undo a delete new page. */
233 P_INIT(pagep, file_dbp->pgsize, argp->new_pgno,
234 argp->prev_pgno, argp->next_pgno, 0, P_HASH);
235 change = 1;
236 } else if ((cmp_p == 0 && redo && argp->opcode == DELOVFL) ||
237 (cmp_n == 0 && !redo && argp->opcode == PUTOVFL)) {
238 /*
239 * Redo a delete or undo a create new page. All we
240 * really need to do is change the LSN.
241 */
242 change = 1;
243 }
244
245 if (!change) {
246 if ((ret = CDB___ham_put_page(file_dbp, (PAGE *)pagep, 0)) != 0)
247 goto out;
248 } else {
249 LSN(pagep) = redo ? *lsnp : argp->pagelsn;
250 if ((ret = CDB___ham_put_page(file_dbp, (PAGE *)pagep, 1)) != 0)
251 goto out;
252 }
253
254 /* Now do the prev page. */
255 ppage: if (argp->prev_pgno != PGNO_INVALID) {
256 if ((ret = CDB_memp_fget(mpf, &argp->prev_pgno, 0, &pagep)) != 0) {
257 if (!redo) {
258 /*
259 * We are undoing and the page doesn't exist.
260 * That is equivalent to having a pagelsn of 0,
261 * so we would not have to undo anything. In
262 * this case, don't bother creating a page.
263 */
264 ret = 0;
265 goto npage;
266 } else if ((ret =
267 CDB_memp_fget(mpf, &argp->prev_pgno,
268 DB_MPOOL_CREATE, &pagep)) != 0)
269 goto out;
270 }
271
272 cmp_n = CDB_log_compare(lsnp, &LSN(pagep));
273 cmp_p = CDB_log_compare(&LSN(pagep), &argp->prevlsn);
274 change = 0;
275
276 if ((cmp_p == 0 && redo && argp->opcode == PUTOVFL) ||
277 (cmp_n == 0 && !redo && argp->opcode == DELOVFL)) {
278 /* Redo a create new page or undo a delete new page. */
279 pagep->next_pgno = argp->new_pgno;
280 change = 1;
281 } else if ((cmp_p == 0 && redo && argp->opcode == DELOVFL) ||
282 (cmp_n == 0 && !redo && argp->opcode == PUTOVFL)) {
283 /* Redo a delete or undo a create new page. */
284 pagep->next_pgno = argp->next_pgno;
285 change = 1;
286 }
287
288 if (!change) {
289 if ((ret =
290 CDB___ham_put_page(file_dbp, (PAGE *)pagep, 0)) != 0)
291 goto out;
292 } else {
293 LSN(pagep) = redo ? *lsnp : argp->prevlsn;
294 if ((ret =
295 CDB___ham_put_page(file_dbp, (PAGE *)pagep, 1)) != 0)
296 goto out;
297 }
298 }
299
300 /* Now time to do the next page */
301 npage: if (argp->next_pgno != PGNO_INVALID) {
302 if ((ret = CDB_memp_fget(mpf, &argp->next_pgno, 0, &pagep)) != 0) {
303 if (!redo) {
304 /*
305 * We are undoing and the page doesn't exist.
306 * That is equivalent to having a pagelsn of 0,
307 * so we would not have to undo anything. In
308 * this case, don't bother creating a page.
309 */
310 goto done;
311 } else if ((ret =
312 CDB_memp_fget(mpf, &argp->next_pgno,
313 DB_MPOOL_CREATE, &pagep)) != 0)
314 goto out;
315 }
316
317 cmp_n = CDB_log_compare(lsnp, &LSN(pagep));
318 cmp_p = CDB_log_compare(&LSN(pagep), &argp->nextlsn);
319 change = 0;
320
321 if ((cmp_p == 0 && redo && argp->opcode == PUTOVFL) ||
322 (cmp_n == 0 && !redo && argp->opcode == DELOVFL)) {
323 /* Redo a create new page or undo a delete new page. */
324 pagep->prev_pgno = argp->new_pgno;
325 change = 1;
326 } else if ((cmp_p == 0 && redo && argp->opcode == DELOVFL) ||
327 (cmp_n == 0 && !redo && argp->opcode == PUTOVFL)) {
328 /* Redo a delete or undo a create new page. */
329 pagep->prev_pgno = argp->prev_pgno;
330 change = 1;
331 }
332
333 if (!change) {
334 if ((ret =
335 CDB___ham_put_page(file_dbp, (PAGE *)pagep, 0)) != 0)
336 goto out;
337 } else {
338 LSN(pagep) = redo ? *lsnp : argp->nextlsn;
339 if ((ret =
340 CDB___ham_put_page(file_dbp, (PAGE *)pagep, 1)) != 0)
341 goto out;
342 }
343 }
344 done: *lsnp = argp->prev_lsn;
345 ret = 0;
346
347 out: if (getmeta)
348 (void)CDB___ham_release_meta(dbc);
349 REC_CLOSE;
350 }
351
352
353 /*
354 * CDB___ham_replace_recover --
355 * This log message refers to partial puts that are local to a single
356 * page. You can think of them as special cases of the more general
357 * insdel log message.
358 *
359 * PUBLIC: int CDB___ham_replace_recover
360 * PUBLIC: __P((DB_ENV *, DBT *, DB_LSN *, int, void *));
361 */
362 int
CDB___ham_replace_recover(dbenv,dbtp,lsnp,redo,info)363 CDB___ham_replace_recover(dbenv, dbtp, lsnp, redo, info)
364 DB_ENV *dbenv;
365 DBT *dbtp;
366 DB_LSN *lsnp;
367 int redo;
368 void *info;
369 {
370 __ham_replace_args *argp;
371 DB *file_dbp;
372 DBC *dbc;
373 DB_MPOOLFILE *mpf;
374 DBT dbt;
375 PAGE *pagep;
376 int32_t grow;
377 int change, cmp_n, cmp_p, getmeta, ret;
378 u_int8_t *hk;
379
380 COMPQUIET(info, NULL);
381
382 getmeta = 0;
383 REC_PRINT(CDB___ham_replace_print);
384 REC_INTRO(CDB___ham_replace_read, 1);
385
386 if ((ret = CDB_memp_fget(mpf, &argp->pgno, 0, &pagep)) != 0) {
387 if (!redo) {
388 /*
389 * We are undoing and the page doesn't exist. That
390 * is equivalent to having a pagelsn of 0, so we
391 * would not have to undo anything. In this case,
392 * don't bother creating a page.
393 */
394 goto done;
395 } else if ((ret = CDB_memp_fget(mpf, &argp->pgno,
396 DB_MPOOL_CREATE, &pagep)) != 0)
397 goto out;
398 }
399
400 if ((ret = CDB___ham_get_meta(dbc)) != 0)
401 goto out;
402 getmeta = 1;
403
404 cmp_n = CDB_log_compare(lsnp, &LSN(pagep));
405 cmp_p = CDB_log_compare(&LSN(pagep), &argp->pagelsn);
406
407 memset(&dbt, 0, sizeof(dbt));
408 if (cmp_p == 0 && redo) {
409 change = 1;
410 /* Reapply the change as specified. */
411 dbt.data = argp->newitem.data;
412 dbt.size = argp->newitem.size;
413 grow = argp->newitem.size - argp->olditem.size;
414 LSN(pagep) = *lsnp;
415 } else if (cmp_n == 0 && !redo) {
416 change = 1;
417 /* Undo the already applied change. */
418 dbt.data = argp->olditem.data;
419 dbt.size = argp->olditem.size;
420 grow = argp->olditem.size - argp->newitem.size;
421 LSN(pagep) = argp->pagelsn;
422 } else {
423 change = 0;
424 grow = 0;
425 }
426
427 if (change) {
428 CDB___ham_onpage_replace(pagep,
429 file_dbp->pgsize, argp->ndx, argp->off, grow, &dbt);
430 if (argp->makedup) {
431 hk = P_ENTRY(pagep, argp->ndx);
432 if (redo)
433 HPAGE_PTYPE(hk) = H_DUPLICATE;
434 else
435 HPAGE_PTYPE(hk) = H_KEYDATA;
436 }
437 }
438
439 if ((ret = CDB___ham_put_page(file_dbp, pagep, change)) != 0)
440 goto out;
441
442 done: *lsnp = argp->prev_lsn;
443 ret = 0;
444
445 out: if (getmeta)
446 (void)CDB___ham_release_meta(dbc);
447 REC_CLOSE;
448 }
449
450 /*
451 * CDB___ham_newpgno_recover --
452 * This log message is used when allocating or deleting an overflow
453 * page. It takes care of modifying the meta data.
454 *
455 * PUBLIC: int CDB___ham_newpgno_recover
456 * PUBLIC: __P((DB_ENV *, DBT *, DB_LSN *, int, void *));
457 */
458 int
CDB___ham_newpgno_recover(dbenv,dbtp,lsnp,redo,info)459 CDB___ham_newpgno_recover(dbenv, dbtp, lsnp, redo, info)
460 DB_ENV *dbenv;
461 DBT *dbtp;
462 DB_LSN *lsnp;
463 int redo;
464 void *info;
465 {
466 COMPQUIET(dbenv, NULL);
467 COMPQUIET(dbtp, NULL);
468 COMPQUIET(lsnp, NULL);
469 COMPQUIET(redo, 0);
470 COMPQUIET(info, NULL);
471 return (EINVAL);
472 }
473
474 /*
475 * CDB___ham_splitmeta_recover --
476 * This is the meta-data part of the split. Records the new and old
477 * bucket numbers and the new/old mask information.
478 *
479 * PUBLIC: int CDB___ham_splitmeta_recover
480 * PUBLIC: __P((DB_ENV *, DBT *, DB_LSN *, int, void *));
481 */
482 int
CDB___ham_splitmeta_recover(dbenv,dbtp,lsnp,redo,info)483 CDB___ham_splitmeta_recover(dbenv, dbtp, lsnp, redo, info)
484 DB_ENV *dbenv;
485 DBT *dbtp;
486 DB_LSN *lsnp;
487 int redo;
488 void *info;
489 {
490 COMPQUIET(dbenv, NULL);
491 COMPQUIET(dbtp, NULL);
492 COMPQUIET(lsnp, NULL);
493 COMPQUIET(redo, 0);
494 COMPQUIET(info, NULL);
495 return (EINVAL);
496 }
497
498 /*
499 * CDB___ham_splitdata_recover --
500 *
501 * PUBLIC: int CDB___ham_splitdata_recover
502 * PUBLIC: __P((DB_ENV *, DBT *, DB_LSN *, int, void *));
503 */
504 int
CDB___ham_splitdata_recover(dbenv,dbtp,lsnp,redo,info)505 CDB___ham_splitdata_recover(dbenv, dbtp, lsnp, redo, info)
506 DB_ENV *dbenv;
507 DBT *dbtp;
508 DB_LSN *lsnp;
509 int redo;
510 void *info;
511 {
512 __ham_splitdata_args *argp;
513 DB *file_dbp;
514 DBC *dbc;
515 DB_MPOOLFILE *mpf;
516 PAGE *pagep;
517 int change, cmp_n, cmp_p, getmeta, ret;
518
519 COMPQUIET(info, NULL);
520
521 getmeta = 0;
522 REC_PRINT(CDB___ham_splitdata_print);
523 REC_INTRO(CDB___ham_splitdata_read, 1);
524
525 if ((ret = CDB_memp_fget(mpf, &argp->pgno, 0, &pagep)) != 0) {
526 if (!redo) {
527 /*
528 * We are undoing and the page doesn't exist. That
529 * is equivalent to having a pagelsn of 0, so we
530 * would not have to undo anything. In this case,
531 * don't bother creating a page.
532 */
533 goto done;
534 } else if ((ret = CDB_memp_fget(mpf, &argp->pgno,
535 DB_MPOOL_CREATE, &pagep)) != 0)
536 goto out;
537 }
538
539 if ((ret = CDB___ham_get_meta(dbc)) != 0)
540 goto out;
541 getmeta = 1;
542
543 cmp_n = CDB_log_compare(lsnp, &LSN(pagep));
544 cmp_p = CDB_log_compare(&LSN(pagep), &argp->pagelsn);
545
546 /*
547 * There are two types of log messages here, one for the old page
548 * and one for the new pages created. The original image in the
549 * SPLITOLD record is used for undo. The image in the SPLITNEW
550 * is used for redo. We should never have a case where there is
551 * a redo operation and the SPLITOLD record is on disk, but not
552 * the SPLITNEW record. Therefore, we only have work to do when
553 * redo NEW messages and undo OLD messages, but we have to update
554 * LSNs in both cases.
555 */
556 change = 0;
557 if (cmp_p == 0 && redo) {
558 if (argp->opcode == SPLITNEW)
559 /* Need to redo the split described. */
560 memcpy(pagep, argp->pageimage.data,
561 argp->pageimage.size);
562 LSN(pagep) = *lsnp;
563 change = 1;
564 } else if (cmp_n == 0 && !redo) {
565 if (argp->opcode == SPLITOLD) {
566 /* Put back the old image. */
567 memcpy(pagep, argp->pageimage.data,
568 argp->pageimage.size);
569 } else
570 P_INIT(pagep, file_dbp->pgsize, argp->pgno,
571 PGNO_INVALID, PGNO_INVALID, 0, P_HASH);
572 LSN(pagep) = argp->pagelsn;
573 change = 1;
574 }
575 if ((ret = CDB___ham_put_page(file_dbp, pagep, change)) != 0)
576 goto out;
577
578 done: *lsnp = argp->prev_lsn;
579 ret = 0;
580
581 out: if (getmeta)
582 (void)CDB___ham_release_meta(dbc);
583 REC_CLOSE;
584 }
585
586 /*
587 * CDB___ham_ovfl_recover --
588 * This message is generated when we initialize a set of overflow pages.
589 *
590 * PUBLIC: int CDB___ham_ovfl_recover
591 * PUBLIC: __P((DB_ENV *, DBT *, DB_LSN *, int, void *));
592 */
593 int
CDB___ham_ovfl_recover(dbenv,dbtp,lsnp,redo,info)594 CDB___ham_ovfl_recover(dbenv, dbtp, lsnp, redo, info)
595 DB_ENV *dbenv;
596 DBT *dbtp;
597 DB_LSN *lsnp;
598 int redo;
599 void *info;
600 {
601 COMPQUIET(dbenv, NULL);
602 COMPQUIET(dbtp, NULL);
603 COMPQUIET(lsnp, NULL);
604 COMPQUIET(redo, 0);
605 COMPQUIET(info, NULL);
606 return (EINVAL);
607 }
608
609 /*
610 * CDB___ham_copypage_recover --
611 * Recovery function for copypage.
612 *
613 * PUBLIC: int CDB___ham_copypage_recover
614 * PUBLIC: __P((DB_ENV *, DBT *, DB_LSN *, int, void *));
615 */
616 int
CDB___ham_copypage_recover(dbenv,dbtp,lsnp,redo,info)617 CDB___ham_copypage_recover(dbenv, dbtp, lsnp, redo, info)
618 DB_ENV *dbenv;
619 DBT *dbtp;
620 DB_LSN *lsnp;
621 int redo;
622 void *info;
623 {
624 __ham_copypage_args *argp;
625 DB *file_dbp;
626 DBC *dbc;
627 DB_MPOOLFILE *mpf;
628 PAGE *pagep;
629 int cmp_n, cmp_p, getmeta, modified, ret;
630
631 COMPQUIET(info, NULL);
632
633 getmeta = 0;
634 REC_PRINT(CDB___ham_copypage_print);
635 REC_INTRO(CDB___ham_copypage_read, 1);
636
637 if ((ret = CDB___ham_get_meta(dbc)) != 0)
638 goto out;
639 getmeta = 1;
640 modified = 0;
641
642 /* This is the bucket page. */
643 if ((ret = CDB_memp_fget(mpf, &argp->pgno, 0, &pagep)) != 0) {
644 if (!redo) {
645 /*
646 * We are undoing and the page doesn't exist. That
647 * is equivalent to having a pagelsn of 0, so we
648 * would not have to undo anything. In this case,
649 * don't bother creating a page.
650 */
651 ret = 0;
652 goto donext;
653 } else if ((ret = CDB_memp_fget(mpf, &argp->pgno,
654 DB_MPOOL_CREATE, &pagep)) != 0)
655 goto out;
656 }
657
658 cmp_n = CDB_log_compare(lsnp, &LSN(pagep));
659 cmp_p = CDB_log_compare(&LSN(pagep), &argp->pagelsn);
660
661 if (cmp_p == 0 && redo) {
662 /* Need to redo update described. */
663 memcpy(pagep, argp->page.data, argp->page.size);
664 LSN(pagep) = *lsnp;
665 modified = 1;
666 } else if (cmp_n == 0 && !redo) {
667 /* Need to undo update described. */
668 P_INIT(pagep, file_dbp->pgsize, argp->pgno, PGNO_INVALID,
669 argp->next_pgno, 0, P_HASH);
670 LSN(pagep) = argp->pagelsn;
671 modified = 1;
672 }
673 if ((ret = CDB_memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0)
674 goto out;
675
676 donext: /* Now fix up the "next" page. */
677 if ((ret = CDB_memp_fget(mpf, &argp->next_pgno, 0, &pagep)) != 0) {
678 if (!redo) {
679 /*
680 * We are undoing and the page doesn't exist. That
681 * is equivalent to having a pagelsn of 0, so we
682 * would not have to undo anything. In this case,
683 * don't bother creating a page.
684 */
685 ret = 0;
686 goto do_nn;
687 } else if ((ret = CDB_memp_fget(mpf, &argp->next_pgno,
688 DB_MPOOL_CREATE, &pagep)) != 0)
689 goto out;
690 }
691
692 /* There is nothing to do in the REDO case; only UNDO. */
693
694 cmp_n = CDB_log_compare(lsnp, &LSN(pagep));
695 if (cmp_n == 0 && !redo) {
696 /* Need to undo update described. */
697 memcpy(pagep, argp->page.data, argp->page.size);
698 modified = 1;
699 }
700 if ((ret = CDB_memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0)
701 goto out;
702
703 /* Now fix up the next's next page. */
704 do_nn: if (argp->nnext_pgno == PGNO_INVALID)
705 goto done;
706
707 if ((ret = CDB_memp_fget(mpf, &argp->nnext_pgno, 0, &pagep)) != 0) {
708 if (!redo) {
709 /*
710 * We are undoing and the page doesn't exist. That
711 * is equivalent to having a pagelsn of 0, so we
712 * would not have to undo anything. In this case,
713 * don't bother creating a page.
714 */
715 goto done;
716 } else if ((ret = CDB_memp_fget(mpf, &argp->nnext_pgno,
717 DB_MPOOL_CREATE, &pagep)) != 0)
718 goto out;
719 }
720
721 cmp_n = CDB_log_compare(lsnp, &LSN(pagep));
722 cmp_p = CDB_log_compare(&LSN(pagep), &argp->nnextlsn);
723
724 if (cmp_p == 0 && redo) {
725 /* Need to redo update described. */
726 PREV_PGNO(pagep) = argp->pgno;
727 LSN(pagep) = *lsnp;
728 modified = 1;
729 } else if (cmp_n == 0 && !redo) {
730 /* Need to undo update described. */
731 PREV_PGNO(pagep) = argp->next_pgno;
732 LSN(pagep) = argp->nnextlsn;
733 modified = 1;
734 }
735 if ((ret = CDB_memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0)
736 goto out;
737
738 done: *lsnp = argp->prev_lsn;
739 ret = 0;
740
741 out: if (getmeta)
742 (void)CDB___ham_release_meta(dbc);
743 REC_CLOSE;
744 }
745
746 /*
747 * CDB___ham_metagroup_recover --
748 * Recovery function for metagroup.
749 *
750 * PUBLIC: int CDB___ham_metagroup_recover
751 * PUBLIC: __P((DB_ENV *, DBT *, DB_LSN *, int, void *));
752 */
753 int
CDB___ham_metagroup_recover(dbenv,dbtp,lsnp,redo,info)754 CDB___ham_metagroup_recover(dbenv, dbtp, lsnp, redo, info)
755 DB_ENV *dbenv;
756 DBT *dbtp;
757 DB_LSN *lsnp;
758 int redo;
759 void *info;
760 {
761 __ham_metagroup_args *argp;
762 HASH_CURSOR *hcp;
763 DB *file_dbp;
764 DBC *dbc;
765 DB_MPOOLFILE *mpf;
766 PAGE *pagep;
767 db_pgno_t last_pgno;
768 int cmp_n, cmp_p, groupgrow, modified, ret;
769
770 COMPQUIET(info, NULL);
771 REC_PRINT(CDB___ham_metagroup_print);
772 REC_INTRO(CDB___ham_metagroup_read, 1);
773
774 /*
775 * This logs the virtual create of pages pgno to pgno + bucket
776 * Since the mpool page-allocation is not really able to be
777 * transaction protected, we can never undo it. Even in an abort,
778 * we have to allocate these pages to the hash table.
779 * The log record contains:
780 * bucket: new bucket being allocated.
781 * pgno: page number of the new bucket.
782 * if bucket is a power of 2, then we allocated a whole batch of
783 * pages; if it's not, then we simply allocated one new page.
784 */
785 groupgrow =
786 (u_int32_t)(1 << CDB___db_log2(argp->bucket + 1)) == argp->bucket + 1;
787
788 last_pgno = argp->pgno;
789 if (groupgrow)
790 /* Read the last page. */
791 last_pgno += argp->bucket;
792
793 if ((ret = CDB_memp_fget(mpf, &last_pgno, DB_MPOOL_CREATE, &pagep)) != 0)
794 goto out;
795
796 modified = 0;
797 cmp_n = CDB_log_compare(lsnp, &LSN(pagep));
798 cmp_p = CDB_log_compare(&argp->pagelsn, &LSN(pagep));
799
800 if ((cmp_p == 0 && redo) || (cmp_n == 0 && !redo)) {
801 /*
802 * We need to make sure that we redo the allocation of the
803 * pages.
804 */
805 if (redo)
806 pagep->lsn = *lsnp;
807 else
808 pagep->lsn = argp->pagelsn;
809 modified = 1;
810 }
811 if ((ret = CDB_memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0)
812 goto out;
813
814 /* Now we have to update the meta-data page. */
815 hcp = dbc->internal;
816 if ((ret = CDB___ham_get_meta(dbc)) != 0)
817 goto out;
818 cmp_n = CDB_log_compare(lsnp, &hcp->hdr->dbmeta.lsn);
819 cmp_p = CDB_log_compare(&argp->metalsn, &hcp->hdr->dbmeta.lsn);
820 if ((cmp_p == 0 && redo) || (cmp_n == 0 && !redo)) {
821 if (redo) {
822 /* Redo the actual updating of bucket counts. */
823 ++hcp->hdr->max_bucket;
824 if (groupgrow) {
825 hcp->hdr->low_mask = hcp->hdr->high_mask;
826 hcp->hdr->high_mask =
827 (argp->bucket + 1) | hcp->hdr->low_mask;
828 }
829 hcp->hdr->dbmeta.lsn = *lsnp;
830 } else {
831 /* Undo the actual updating of bucket counts. */
832 --hcp->hdr->max_bucket;
833 if (groupgrow) {
834 hcp->hdr->high_mask = hcp->hdr->low_mask;
835 hcp->hdr->low_mask = hcp->hdr->high_mask >> 1;
836 }
837 hcp->hdr->dbmeta.lsn = argp->metalsn;
838 }
839 if (groupgrow &&
840 hcp->hdr->spares[CDB___db_log2(argp->bucket + 1) + 1] ==
841 PGNO_INVALID)
842 hcp->hdr->spares[CDB___db_log2(argp->bucket + 1) + 1] =
843 argp->pgno - argp->bucket - 1;
844 F_SET(hcp, H_DIRTY);
845 }
846 if ((ret = CDB___ham_release_meta(dbc)) != 0)
847 goto out;
848
849 done: *lsnp = argp->prev_lsn;
850 ret = 0;
851
852 out: REC_CLOSE;
853 }
854
855 /*
856 * CDB___ham_groupalloc_recover --
857 * Recover the batch creation of a set of pages for a new database.
858 *
859 * PUBLIC: int CDB___ham_groupalloc_recover
860 * PUBLIC: __P((DB_ENV *, DBT *, DB_LSN *, int, void *));
861 */
862 int
CDB___ham_groupalloc_recover(dbenv,dbtp,lsnp,redo,info)863 CDB___ham_groupalloc_recover(dbenv, dbtp, lsnp, redo, info)
864 DB_ENV *dbenv;
865 DBT *dbtp;
866 DB_LSN *lsnp;
867 int redo;
868 void *info;
869 {
870 __ham_groupalloc_args *argp;
871 DBMETA *mmeta;
872 DB_MPOOLFILE *mpf;
873 DB *file_dbp;
874 DBC *dbc;
875 PAGE *pagep;
876 db_pgno_t pgno;
877 int cmp_n, cmp_p, modified, ret;
878
879 modified = 0;
880 COMPQUIET(info, NULL);
881 REC_PRINT(CDB___ham_groupalloc_print);
882 REC_INTRO(CDB___ham_groupalloc_read, 0);
883
884 if ((ret = CDB_memp_fget(mpf, &argp->pgno, 0, &pagep)) != 0) {
885 if (redo) {
886 /* Page should have existed. */
887 (void)CDB___db_pgerr(file_dbp, argp->pgno);
888 goto out;
889 } else {
890 ret = 0;
891 goto done;
892 }
893 }
894
895 cmp_n = CDB_log_compare(lsnp, &LSN(pagep));
896 cmp_p = CDB_log_compare(&LSN(pagep), &argp->metalsn);
897
898 if (cmp_p == 0 && redo) {
899 LSN(pagep) = *lsnp;
900 modified = 1;
901 } else if (cmp_n == 0 && !redo) {
902 LSN(pagep) = argp->metalsn;
903 modified = 1;
904 }
905
906 /*
907 * Basically, we used mpool to allocate a chunk of pages.
908 * We need to either add those to a free list (in the undo
909 * case) or initialize them (in the redo case).
910 *
911 * If we are redoing and this is a hash subdatabase, it's possible
912 * that the pages were never allocated, so we'd better check for
913 * that and handle it here.
914 */
915 if (redo) {
916 if ((ret = CDB___ham_alloc_pages(file_dbp,
917 (HMETA *)pagep, argp->start_pgno, argp->num)) != 0)
918 goto out1;
919
920 /* Update the master meta data page LSN. */
921 if (argp->pgno != PGNO_BASE_MD) {
922 pgno = PGNO_BASE_MD;
923 if ((ret = CDB_memp_fget(mpf, &pgno, 0, &mmeta)) != 0)
924 goto out1;
925 mmeta->lsn = *lsnp;
926 if ((ret = CDB_memp_fput(mpf, mmeta, DB_MPOOL_DIRTY)) != 0)
927 goto out1;
928 }
929 }
930
931 /*
932 * If we are undoing and this is a subdatabase then we need to
933 * put the pages on the free list. If it's not a subdatabase,
934 * then we can simply do nothing because we're about to delete
935 * the file.
936 */
937 if (!redo && argp->pgno != PGNO_BASE_MD) {
938 if ((ret = CDB___ham_free_pages(file_dbp, argp)) != 0)
939 goto out1;
940 LSN(pagep) = argp->metalsn;
941 modified = 1;
942 }
943
944 out1: if ((ret = CDB_memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0)
945 goto out;
946
947 done: if (ret == 0)
948 *lsnp = argp->prev_lsn;
949
950 out: REC_CLOSE;
951 }
952
953 /*
954 * CDB___ham_free_pages --
955 *
956 * Called during abort/undo of a file create. We create new pages in the file
957 * using the MPOOL_NEW_GROUP flag. We then log the meta-data page with a
958 * __crdel_metasub message. If we fail we need to take those newly allocated
959 * pages and put them on a free list. Normally this would happen in the
960 * recovery for CDB___db_new, but that doesn't get called in this case.
961 */
962 static int
CDB___ham_free_pages(dbp,argp)963 CDB___ham_free_pages(dbp, argp)
964 DB *dbp;
965 __ham_groupalloc_args *argp;
966 {
967 DBMETA *mmeta;
968 DB_MPOOLFILE *mpf;
969 PAGE *pagep;
970 u_int32_t i;
971 db_pgno_t last_free, pgno;
972 int mod_meta, ret, t_ret;
973
974 mod_meta = 0;
975
976 /* Get the master meta-data page. */
977 mpf = dbp->mpf;
978 pgno = PGNO_BASE_MD;
979 if ((ret = CDB_memp_fget(mpf, &pgno, 0, &mmeta)) != 0)
980 return (ret);
981
982 last_free = mmeta->free;
983
984 for (i = 0; i <= argp->num; i++) {
985 pgno = argp->start_pgno + i;
986 if ((ret =
987 CDB_memp_fget(mpf, &pgno, DB_MPOOL_CREATE, &pagep)) != 0) {
988 (void)CDB___db_pgerr(dbp, pgno);
989 goto out;
990 }
991
992 /* Fix up the allocated page. */
993 P_INIT(pagep,
994 dbp->pgsize, pgno, PGNO_INVALID, last_free, 0, P_INVALID);
995 ZERO_LSN(pagep->lsn);
996
997 if ((ret = CDB_memp_fput(mpf, pagep, DB_MPOOL_DIRTY)) != 0)
998 goto out;
999 }
1000
1001 mmeta->free = last_free;
1002 mmeta->lsn = argp->mmetalsn;
1003 mod_meta = 1;
1004
1005 out: if ((t_ret = CDB_memp_fput(mpf, mmeta, mod_meta ? DB_MPOOL_DIRTY : 0)) != 0
1006 && ret == 0)
1007 ret = t_ret;
1008
1009 return (ret);
1010 }
1011
1012 /*
1013 * CDB___ham_alloc_pages --
1014 *
1015 * Called during redo of a file create. We create new pages in the file
1016 * using the MPOOL_NEW_GROUP flag. We then log the meta-data page with a
1017 * __crdel_metasub message. If we manage to crash without the newly written
1018 * pages getting to disk (I'm not sure this can happen anywhere except our
1019 * test suite?!), then we need to go through a recreate the final pages.
1020 * Hash normally has holes in its files and handles them appropriately.
1021 */
1022 static int
CDB___ham_alloc_pages(dbp,meta,start,npages)1023 CDB___ham_alloc_pages(dbp, meta, start, npages)
1024 DB *dbp;
1025 HMETA *meta;
1026 db_pgno_t start, npages;
1027 {
1028 DB_MPOOLFILE *mpf;
1029 PAGE *pagep;
1030 db_pgno_t pgno;
1031 int ret;
1032
1033 mpf = dbp->mpf;
1034
1035 /* Read the last page of the allocation. */
1036 pgno = meta->spares[0] + meta->max_bucket;
1037
1038 /* If the page exists, and it has been initialized, then we're done. */
1039 if ((ret = CDB_memp_fget(mpf, &pgno, 0, &pagep)) == 0) {
1040 if (pagep->type == P_INVALID && pagep->lsn.file == 0)
1041 goto reinit_page;
1042 if ((ret = CDB_memp_fput(mpf, pagep, 0)) != 0)
1043 return (ret);
1044 return (0);
1045 }
1046
1047 /*
1048 * Had to create the page. On some systems (read "Windows"),
1049 * you can find random garbage on pages to which you haven't
1050 * yet written. So, we have an os layer that will do the
1051 * right thing for group allocations. We call that directly
1052 * to make sure all the pages are allocated and then continue
1053 * merrily on our way with normal recovery.
1054 */
1055 if ((ret = CDB___os_fpinit(&mpf->fh, start, npages, dbp->pgsize)) != 0)
1056 return (ret);
1057
1058 if ((ret = CDB_memp_fget(mpf, &pgno, DB_MPOOL_CREATE, &pagep)) != 0) {
1059 (void)CDB___db_pgerr(dbp, pgno);
1060 return (ret);
1061 }
1062
1063 reinit_page:
1064 /* Initialize the newly allocated page. */
1065 P_INIT(pagep,
1066 dbp->pgsize, pgno, PGNO_INVALID, PGNO_INVALID, 0, P_HASH);
1067 ZERO_LSN(pagep->lsn);
1068
1069 if ((ret = CDB_memp_fput(mpf, pagep, DB_MPOOL_DIRTY)) != 0)
1070 return (ret);
1071
1072 return (0);
1073 }
1074