1 /*-
2 * See the file LICENSE for redistribution information.
3 *
4 * Copyright (c) 1996, 1997, 1998, 1999, 2000
5 * Sleepycat Software. All rights reserved.
6 */
7 /*
8 * Copyright (c) 1990, 1993, 1994, 1995, 1996
9 * Keith Bostic. All rights reserved.
10 */
11 /*
12 * Copyright (c) 1990, 1993, 1994, 1995
13 * The Regents of the University of California. All rights reserved.
14 *
15 * This code is derived from software contributed to Berkeley by
16 * Mike Olson.
17 *
18 * Redistribution and use in source and binary forms, with or without
19 * modification, are permitted provided that the following conditions
20 * are met:
21 * 1. Redistributions of source code must retain the above copyright
22 * notice, this list of conditions and the following disclaimer.
23 * 2. Redistributions in binary form must reproduce the above copyright
24 * notice, this list of conditions and the following disclaimer in the
25 * documentation and/or other materials provided with the distribution.
26 * 3. Neither the name of the University nor the names of its contributors
27 * may be used to endorse or promote products derived from this software
28 * without specific prior written permission.
29 *
30 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
31 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
32 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
33 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
34 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
35 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
36 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
37 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
38 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
39 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
40 * SUCH DAMAGE.
41 */
42
43 #include "config.h"
44
45 #ifndef lint
46 static const char revid[] = "$Id: bt_open.c,v 1.4 2000/06/28 16:47:00 loic Exp $";
47 #endif /* not lint */
48
49 #ifndef NO_SYSTEM_INCLUDES
50 #include <sys/types.h>
51
52 #include <errno.h>
53 #include <limits.h>
54 #include <string.h>
55 #endif
56
57 #include "db_int.h"
58 #include "db_page.h"
59 #include "db_swap.h"
60 #include "btree.h"
61 #include "db_shash.h"
62 #include "lock.h"
63 #include "log.h"
64 #include "mp.h"
65
66 /*
67 * CDB___bam_open --
68 * Open a btree.
69 *
70 * PUBLIC: int CDB___bam_open __P((DB *, const char *, db_pgno_t, u_int32_t));
71 */
72 int
CDB___bam_open(dbp,name,base_pgno,flags)73 CDB___bam_open(dbp, name, base_pgno, flags)
74 DB *dbp;
75 const char *name;
76 db_pgno_t base_pgno;
77 u_int32_t flags;
78 {
79 BTREE *t;
80
81 t = dbp->bt_internal;
82
83 /* Initialize the remaining fields/methods of the DB. */
84 dbp->del = CDB___bam_delete;
85 dbp->key_range = CDB___bam_key_range;
86 dbp->stat = CDB___bam_stat;
87
88 /*
89 * We don't permit the user to specify a prefix routine if they didn't
90 * also specify a comparison routine, they can't know enough about our
91 * comparison routine to get it right.
92 */
93 if (t->bt_compare == CDB___bam_defcmp && t->bt_prefix != CDB___bam_defpfx) {
94 CDB___db_err(dbp->dbenv,
95 "prefix comparison may not be specified for default comparison routine");
96 return (EINVAL);
97 }
98
99 /* Start up the tree. */
100 return (CDB___bam_read_root(dbp, name, base_pgno, flags));
101 }
102
103 /*
104 * CDB___bam_metachk --
105 *
106 * PUBLIC: int CDB___bam_metachk __P((DB *, const char *, BTMETA *));
107 */
108 int
CDB___bam_metachk(dbp,name,btm)109 CDB___bam_metachk(dbp, name, btm)
110 DB *dbp;
111 const char *name;
112 BTMETA *btm;
113 {
114 DB_ENV *dbenv;
115 u_int32_t vers;
116 int ret;
117
118 dbenv = dbp->dbenv;
119
120 /*
121 * At this point, all we know is that the magic number is for a Btree.
122 * Check the version, the database may be out of date.
123 */
124 vers = btm->dbmeta.version;
125 if (F_ISSET(dbp, DB_AM_SWAP))
126 M_32_SWAP(vers);
127 switch (vers) {
128 case 6:
129 case 7:
130 CDB___db_err(dbenv,
131 "%s: btree version %lu requires a version upgrade",
132 name, (u_long)vers);
133 return (DB_OLD_VERSION);
134 case 8:
135 break;
136 default:
137 CDB___db_err(dbenv,
138 "%s: unsupported btree version: %lu", name, (u_long)vers);
139 return (EINVAL);
140 }
141
142 /* Swap the page if we need to. */
143 if (F_ISSET(dbp, DB_AM_SWAP) && (ret = CDB___bam_mswap((PAGE *)btm)) != 0)
144 return (ret);
145
146 /*
147 * Check application info against metadata info, and set info, flags,
148 * and type based on metadata info.
149 */
150 if ((ret =
151 CDB___db_fchk(dbenv, "DB->open", btm->dbmeta.flags, BTM_MASK)) != 0)
152 return (ret);
153
154 if (F_ISSET(&btm->dbmeta, BTM_RECNO)) {
155 if (dbp->type == DB_BTREE)
156 goto wrong_type;
157 dbp->type = DB_RECNO;
158 DB_ILLEGAL_METHOD(dbp, DB_OK_RECNO);
159 } else {
160 if (dbp->type == DB_RECNO)
161 goto wrong_type;
162 dbp->type = DB_BTREE;
163 DB_ILLEGAL_METHOD(dbp, DB_OK_BTREE);
164 }
165
166 if (F_ISSET(&btm->dbmeta, BTM_DUP))
167 F_SET(dbp, DB_AM_DUP);
168 else
169 if (F_ISSET(dbp, DB_AM_DUP)) {
170 CDB___db_err(dbenv,
171 "%s: DB_DUP specified to open method but not set in database",
172 name);
173 return (EINVAL);
174 }
175
176 if (F_ISSET(&btm->dbmeta, BTM_RECNUM)) {
177 if (dbp->type != DB_BTREE)
178 goto wrong_type;
179 F_SET(dbp, DB_BT_RECNUM);
180
181 if ((ret = CDB___db_fcchk(dbenv,
182 "DB->open", dbp->flags, DB_AM_DUP, DB_BT_RECNUM)) != 0)
183 return (ret);
184 } else
185 if (F_ISSET(dbp, DB_BT_RECNUM)) {
186 CDB___db_err(dbenv,
187 "%s: DB_RECNUM specified to open method but not set in database",
188 name);
189 return (EINVAL);
190 }
191
192 if (F_ISSET(&btm->dbmeta, BTM_FIXEDLEN)) {
193 if (dbp->type != DB_RECNO)
194 goto wrong_type;
195 F_SET(dbp, DB_RE_FIXEDLEN);
196 } else
197 if (F_ISSET(dbp, DB_RE_FIXEDLEN)) {
198 CDB___db_err(dbenv,
199 "%s: DB_FIXEDLEN specified to open method but not set in database",
200 name);
201 return (EINVAL);
202 }
203
204 if (F_ISSET(&btm->dbmeta, BTM_RENUMBER)) {
205 if (dbp->type != DB_RECNO)
206 goto wrong_type;
207 F_SET(dbp, DB_RE_RENUMBER);
208 } else
209 if (F_ISSET(dbp, DB_RE_RENUMBER)) {
210 CDB___db_err(dbenv,
211 "%s: DB_RENUMBER specified to open method but not set in database",
212 name);
213 return (EINVAL);
214 }
215
216 if (F_ISSET(&btm->dbmeta, BTM_SUBDB))
217 F_SET(dbp, DB_AM_SUBDB);
218 else
219 if (F_ISSET(dbp, DB_AM_SUBDB)) {
220 CDB___db_err(dbenv,
221 "%s: multiple databases specified but not supported by file",
222 name);
223 return (EINVAL);
224 }
225
226 if (F_ISSET(&btm->dbmeta, BTM_DUPSORT)) {
227 if (dbp->dup_compare == NULL)
228 dbp->dup_compare = CDB___bam_defcmp;
229 F_SET(dbp, DB_AM_DUPSORT);
230 } else
231 if (dbp->dup_compare != NULL) {
232 CDB___db_err(dbenv,
233 "%s: duplicate sort specified but not supported in database",
234 name);
235 return (EINVAL);
236 }
237
238 /* Set the page size. */
239 dbp->pgsize = btm->dbmeta.pagesize;
240
241 /* Copy the file's ID. */
242 memcpy(dbp->fileid, btm->dbmeta.uid, DB_FILE_ID_LEN);
243
244 return (0);
245
246 wrong_type:
247 if (dbp->type == DB_BTREE)
248 CDB___db_err(dbenv,
249 "open method type is Btree, database type is Recno");
250 else
251 CDB___db_err(dbenv,
252 "open method type is Recno, database type is Btree");
253 return (EINVAL);
254 }
255
256 /*
257 * CDB___bam_read_root --
258 * Check (and optionally create) a tree.
259 *
260 * PUBLIC: int CDB___bam_read_root __P((DB *, const char *, db_pgno_t, u_int32_t));
261 */
262 int
CDB___bam_read_root(dbp,name,base_pgno,flags)263 CDB___bam_read_root(dbp, name, base_pgno, flags)
264 DB *dbp;
265 const char *name;
266 db_pgno_t base_pgno;
267 u_int32_t flags;
268 {
269 BTMETA *meta;
270 BTREE *t;
271 DBC *dbc;
272 DB_LSN orig_lsn;
273 DB_LOCK metalock;
274 PAGE *root;
275 int locked, ret, t_ret;
276
277 ret = 0;
278 t = dbp->bt_internal;
279 meta = NULL;
280 root = NULL;
281 locked = 0;
282
283 /*
284 * Get a cursor. If DB_CREATE is specified, we may be creating
285 * the root page, and to do that safely in CDB we need a write
286 * cursor. In STD_LOCKING mode, we'll synchronize using the
287 * meta page lock instead.
288 */
289 if ((ret = dbp->cursor(dbp, dbp->open_txn,
290 &dbc, LF_ISSET(DB_CREATE) && LOCKING(dbp->dbenv) ?
291 DB_WRITECURSOR : 0)) != 0)
292 return (ret);
293
294 /* Get, and optionally create the metadata page. */
295 if ((ret =
296 CDB___db_lget(dbc, 0, base_pgno, DB_LOCK_READ, 0, &metalock)) != 0)
297 goto err;
298 if ((ret = CDB_memp_fget(
299 dbp->mpf, &base_pgno, DB_MPOOL_CREATE, (PAGE **)&meta)) != 0)
300 goto err;
301
302 /*
303 * If the magic number is correct, we're not creating the tree.
304 * Correct any fields that may not be right. Note, all of the
305 * local flags were set by DB->open.
306 */
307 again: if (meta->dbmeta.magic != 0) {
308 t->bt_maxkey = meta->maxkey;
309 t->bt_minkey = meta->minkey;
310 t->re_pad = meta->re_pad;
311 t->re_len = meta->re_len;
312
313 t->bt_meta = base_pgno;
314 t->bt_root = meta->root;
315
316 (void)CDB_memp_fput(dbp->mpf, meta, 0);
317 meta = NULL;
318 goto done;
319 }
320
321 /* In recovery if it's not there it will be created elsewhere.*/
322 if (IS_RECOVERING(dbp->dbenv))
323 goto done;
324
325 /* If we're doing CDB; we now have to get the write lock. */
326 if (LOCKING(dbp->dbenv)) {
327 /*
328 * We'd better have DB_CREATE set if we're actually doing
329 * the create.
330 */
331 DB_ASSERT(LF_ISSET(DB_CREATE));
332 if ((ret = CDB_lock_get(dbp->dbenv, dbc->locker, DB_LOCK_UPGRADE,
333 &dbc->lock_dbt, DB_LOCK_WRITE, &dbc->mylock)) != 0)
334 goto err;
335 }
336
337 /*
338 * If we are doing locking, relase the read lock and get a write lock.
339 * We want to avoid deadlock.
340 */
341 if (locked == 0 && STD_LOCKING(dbc)) {
342 if ((ret = __LPUT(dbc, metalock)) != 0)
343 goto err;
344 if ((ret = CDB___db_lget(dbc,
345 0, base_pgno, DB_LOCK_WRITE, 0, &metalock)) != 0)
346 goto err;
347 locked = 1;
348 goto again;
349 }
350
351 /* Initialize the tree structure metadata information. */
352 orig_lsn = meta->dbmeta.lsn;
353 memset(meta, 0, sizeof(BTMETA));
354 meta->dbmeta.lsn = orig_lsn;
355 meta->dbmeta.pgno = base_pgno;
356 meta->dbmeta.magic = DB_BTREEMAGIC;
357 meta->dbmeta.version = DB_BTREEVERSION;
358 meta->dbmeta.pagesize = dbp->pgsize;
359 meta->dbmeta.type = P_BTREEMETA;
360 meta->dbmeta.free = PGNO_INVALID;
361 if (F_ISSET(dbp, DB_AM_DUP))
362 F_SET(&meta->dbmeta, BTM_DUP);
363 if (F_ISSET(dbp, DB_RE_FIXEDLEN))
364 F_SET(&meta->dbmeta, BTM_FIXEDLEN);
365 if (F_ISSET(dbp, DB_BT_RECNUM))
366 F_SET(&meta->dbmeta, BTM_RECNUM);
367 if (F_ISSET(dbp, DB_RE_RENUMBER))
368 F_SET(&meta->dbmeta, BTM_RENUMBER);
369 if (F_ISSET(dbp, DB_AM_SUBDB))
370 F_SET(&meta->dbmeta, BTM_SUBDB);
371 if (dbp->dup_compare != NULL)
372 F_SET(&meta->dbmeta, BTM_DUPSORT);
373 if (dbp->type == DB_RECNO)
374 F_SET(&meta->dbmeta, BTM_RECNO);
375 memcpy(meta->dbmeta.uid, dbp->fileid, DB_FILE_ID_LEN);
376
377 meta->maxkey = t->bt_maxkey;
378 meta->minkey = t->bt_minkey;
379 meta->re_len = t->re_len;
380 meta->re_pad = t->re_pad;
381
382 /* If necessary, log the meta-data and root page creates. */
383 if ((ret = CDB___db_log_page(dbp,
384 name, &orig_lsn, base_pgno, (PAGE *)meta)) != 0)
385 goto err;
386
387 /* Create and initialize a root page. */
388 if ((ret = CDB___db_new(dbc,
389 ((dbp->type == DB_RECNO ? P_LRECNO : P_LBTREE) | dbp->tags), &root)) != 0)
390 goto err;
391 root->level = LEAFLEVEL;
392
393 if (dbp->open_txn != NULL && (ret = CDB___bam_root_log(dbp->dbenv,
394 dbp->open_txn, &meta->dbmeta.lsn, 0, dbp->log_fileid,
395 meta->dbmeta.pgno, root->pgno, &meta->dbmeta.lsn)) != 0)
396 goto err;
397
398 meta->root = root->pgno;
399
400 DB_TEST_RECOVERY(dbp, DB_TEST_POSTLOGMETA, ret, name);
401 if ((ret = CDB___db_log_page(dbp,
402 name, &root->lsn, root->pgno, root)) != 0)
403 goto err;
404 DB_TEST_RECOVERY(dbp, DB_TEST_POSTLOG, ret, name);
405
406 t->bt_meta = base_pgno;
407 t->bt_root = root->pgno;
408
409 /* Release the metadata and root pages. */
410 if ((ret = CDB_memp_fput(dbp->mpf, meta, DB_MPOOL_DIRTY)) != 0)
411 goto err;
412 meta = NULL;
413 if ((ret = CDB_memp_fput(dbp->mpf, root, DB_MPOOL_DIRTY)) != 0)
414 goto err;
415 root = NULL;
416
417 /*
418 * Flush the metadata and root pages to disk.
419 *
420 * !!!
421 * It's not useful to return not-yet-flushed here -- convert it to
422 * an error.
423 */
424 if ((ret = CDB_memp_fsync(dbp->mpf)) == DB_INCOMPLETE) {
425 CDB___db_err(dbp->dbenv, "Metapage flush failed");
426 ret = EINVAL;
427 }
428 DB_TEST_RECOVERY(dbp, DB_TEST_POSTSYNC, ret, name);
429
430 done: /*
431 * !!!
432 * We already did an insert and so the last-page-inserted has been
433 * set. I'm not sure where the *right* place to clear this value
434 * is, it's not intuitively obvious that it belongs here.
435 */
436 t->bt_lpgno = PGNO_INVALID;
437
438 err:
439 DB_TEST_RECOVERY_LABEL
440 /* Put any remaining pages back. */
441 if (meta != NULL)
442 if ((t_ret = CDB_memp_fput(dbp->mpf, meta, 0)) != 0 &&
443 ret == 0)
444 ret = t_ret;
445 if (root != NULL)
446 if ((t_ret = CDB_memp_fput(dbp->mpf, root, 0)) != 0 &&
447 ret == 0)
448 ret = t_ret;
449
450 /* We can release the metapage lock when we are done. */
451 (void)__LPUT(dbc, metalock);
452
453 if ((t_ret = dbc->c_close(dbc)) != 0 && ret == 0)
454 ret = t_ret;
455 return (ret);
456 }
457