1 /*-
2  * See the file LICENSE for redistribution information.
3  *
4  * Copyright (c) 1996, 1997, 1998, 1999, 2000
5  *	Sleepycat Software.  All rights reserved.
6  */
7 /*
8  * Copyright (c) 1990, 1993, 1994, 1995, 1996
9  *	Keith Bostic.  All rights reserved.
10  */
11 /*
12  * Copyright (c) 1990, 1993, 1994, 1995
13  *	The Regents of the University of California.  All rights reserved.
14  *
15  * This code is derived from software contributed to Berkeley by
16  * Mike Olson.
17  *
18  * Redistribution and use in source and binary forms, with or without
19  * modification, are permitted provided that the following conditions
20  * are met:
21  * 1. Redistributions of source code must retain the above copyright
22  *    notice, this list of conditions and the following disclaimer.
23  * 2. Redistributions in binary form must reproduce the above copyright
24  *    notice, this list of conditions and the following disclaimer in the
25  *    documentation and/or other materials provided with the distribution.
26  * 3. Neither the name of the University nor the names of its contributors
27  *    may be used to endorse or promote products derived from this software
28  *    without specific prior written permission.
29  *
30  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
31  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
32  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
33  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
34  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
35  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
36  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
37  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
38  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
39  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
40  * SUCH DAMAGE.
41  */
42 
43 #include "config.h"
44 
45 #ifndef lint
46 static const char revid[] = "$Id: bt_open.c,v 1.4 2000/06/28 16:47:00 loic Exp $";
47 #endif /* not lint */
48 
49 #ifndef NO_SYSTEM_INCLUDES
50 #include <sys/types.h>
51 
52 #include <errno.h>
53 #include <limits.h>
54 #include <string.h>
55 #endif
56 
57 #include "db_int.h"
58 #include "db_page.h"
59 #include "db_swap.h"
60 #include "btree.h"
61 #include "db_shash.h"
62 #include "lock.h"
63 #include "log.h"
64 #include "mp.h"
65 
66 /*
67  * CDB___bam_open --
68  *	Open a btree.
69  *
70  * PUBLIC: int CDB___bam_open __P((DB *, const char *, db_pgno_t, u_int32_t));
71  */
72 int
CDB___bam_open(dbp,name,base_pgno,flags)73 CDB___bam_open(dbp, name, base_pgno, flags)
74 	DB *dbp;
75 	const char *name;
76 	db_pgno_t base_pgno;
77 	u_int32_t flags;
78 {
79 	BTREE *t;
80 
81 	t = dbp->bt_internal;
82 
83 	/* Initialize the remaining fields/methods of the DB. */
84 	dbp->del = CDB___bam_delete;
85 	dbp->key_range = CDB___bam_key_range;
86 	dbp->stat = CDB___bam_stat;
87 
88 	/*
89 	 * We don't permit the user to specify a prefix routine if they didn't
90 	 * also specify a comparison routine, they can't know enough about our
91 	 * comparison routine to get it right.
92 	 */
93 	if (t->bt_compare == CDB___bam_defcmp && t->bt_prefix != CDB___bam_defpfx) {
94 		CDB___db_err(dbp->dbenv,
95 "prefix comparison may not be specified for default comparison routine");
96 		return (EINVAL);
97 	}
98 
99 	/* Start up the tree. */
100 	return (CDB___bam_read_root(dbp, name, base_pgno, flags));
101 }
102 
103 /*
104  * CDB___bam_metachk --
105  *
106  * PUBLIC: int CDB___bam_metachk __P((DB *, const char *, BTMETA *));
107  */
108 int
CDB___bam_metachk(dbp,name,btm)109 CDB___bam_metachk(dbp, name, btm)
110 	DB *dbp;
111 	const char *name;
112 	BTMETA *btm;
113 {
114 	DB_ENV *dbenv;
115 	u_int32_t vers;
116 	int ret;
117 
118 	dbenv = dbp->dbenv;
119 
120 	/*
121 	 * At this point, all we know is that the magic number is for a Btree.
122 	 * Check the version, the database may be out of date.
123 	 */
124 	vers = btm->dbmeta.version;
125 	if (F_ISSET(dbp, DB_AM_SWAP))
126 		M_32_SWAP(vers);
127 	switch (vers) {
128 	case 6:
129 	case 7:
130 		CDB___db_err(dbenv,
131 		    "%s: btree version %lu requires a version upgrade",
132 		    name, (u_long)vers);
133 		return (DB_OLD_VERSION);
134 	case 8:
135 		break;
136 	default:
137 		CDB___db_err(dbenv,
138 		    "%s: unsupported btree version: %lu", name, (u_long)vers);
139 		return (EINVAL);
140 	}
141 
142 	/* Swap the page if we need to. */
143 	if (F_ISSET(dbp, DB_AM_SWAP) && (ret = CDB___bam_mswap((PAGE *)btm)) != 0)
144 		return (ret);
145 
146 	/*
147 	 * Check application info against metadata info, and set info, flags,
148 	 * and type based on metadata info.
149 	 */
150 	if ((ret =
151 	    CDB___db_fchk(dbenv, "DB->open", btm->dbmeta.flags, BTM_MASK)) != 0)
152 		return (ret);
153 
154 	if (F_ISSET(&btm->dbmeta, BTM_RECNO)) {
155 		if (dbp->type == DB_BTREE)
156 			goto wrong_type;
157 		dbp->type = DB_RECNO;
158 		DB_ILLEGAL_METHOD(dbp, DB_OK_RECNO);
159 	} else {
160 		if (dbp->type == DB_RECNO)
161 			goto wrong_type;
162 		dbp->type = DB_BTREE;
163 		DB_ILLEGAL_METHOD(dbp, DB_OK_BTREE);
164 	}
165 
166 	if (F_ISSET(&btm->dbmeta, BTM_DUP))
167 		F_SET(dbp, DB_AM_DUP);
168 	else
169 		if (F_ISSET(dbp, DB_AM_DUP)) {
170 			CDB___db_err(dbenv,
171 		"%s: DB_DUP specified to open method but not set in database",
172 			    name);
173 			return (EINVAL);
174 		}
175 
176 	if (F_ISSET(&btm->dbmeta, BTM_RECNUM)) {
177 		if (dbp->type != DB_BTREE)
178 			goto wrong_type;
179 		F_SET(dbp, DB_BT_RECNUM);
180 
181 		if ((ret = CDB___db_fcchk(dbenv,
182 		    "DB->open", dbp->flags, DB_AM_DUP, DB_BT_RECNUM)) != 0)
183 			return (ret);
184 	} else
185 		if (F_ISSET(dbp, DB_BT_RECNUM)) {
186 			CDB___db_err(dbenv,
187 	    "%s: DB_RECNUM specified to open method but not set in database",
188 			    name);
189 			return (EINVAL);
190 		}
191 
192 	if (F_ISSET(&btm->dbmeta, BTM_FIXEDLEN)) {
193 		if (dbp->type != DB_RECNO)
194 			goto wrong_type;
195 		F_SET(dbp, DB_RE_FIXEDLEN);
196 	} else
197 		if (F_ISSET(dbp, DB_RE_FIXEDLEN)) {
198 			CDB___db_err(dbenv,
199 	"%s: DB_FIXEDLEN specified to open method but not set in database",
200 			    name);
201 			return (EINVAL);
202 		}
203 
204 	if (F_ISSET(&btm->dbmeta, BTM_RENUMBER)) {
205 		if (dbp->type != DB_RECNO)
206 			goto wrong_type;
207 		F_SET(dbp, DB_RE_RENUMBER);
208 	} else
209 		if (F_ISSET(dbp, DB_RE_RENUMBER)) {
210 			CDB___db_err(dbenv,
211 	    "%s: DB_RENUMBER specified to open method but not set in database",
212 			    name);
213 			return (EINVAL);
214 		}
215 
216 	if (F_ISSET(&btm->dbmeta, BTM_SUBDB))
217 		F_SET(dbp, DB_AM_SUBDB);
218 	else
219 		if (F_ISSET(dbp, DB_AM_SUBDB)) {
220 			CDB___db_err(dbenv,
221 	    "%s: multiple databases specified but not supported by file",
222 			    name);
223 			return (EINVAL);
224 		}
225 
226 	if (F_ISSET(&btm->dbmeta, BTM_DUPSORT)) {
227 		if (dbp->dup_compare == NULL)
228 			dbp->dup_compare = CDB___bam_defcmp;
229 		F_SET(dbp, DB_AM_DUPSORT);
230 	} else
231 		if (dbp->dup_compare != NULL) {
232 			CDB___db_err(dbenv,
233 		"%s: duplicate sort specified but not supported in database",
234 			    name);
235 			return (EINVAL);
236 		}
237 
238 	/* Set the page size. */
239 	dbp->pgsize = btm->dbmeta.pagesize;
240 
241 	/* Copy the file's ID. */
242 	memcpy(dbp->fileid, btm->dbmeta.uid, DB_FILE_ID_LEN);
243 
244 	return (0);
245 
246 wrong_type:
247 	if (dbp->type == DB_BTREE)
248 		CDB___db_err(dbenv,
249 		    "open method type is Btree, database type is Recno");
250 	else
251 		CDB___db_err(dbenv,
252 		    "open method type is Recno, database type is Btree");
253 	return (EINVAL);
254 }
255 
256 /*
257  * CDB___bam_read_root --
258  *	Check (and optionally create) a tree.
259  *
260  * PUBLIC: int CDB___bam_read_root __P((DB *, const char *, db_pgno_t, u_int32_t));
261  */
262 int
CDB___bam_read_root(dbp,name,base_pgno,flags)263 CDB___bam_read_root(dbp, name, base_pgno, flags)
264 	DB *dbp;
265 	const char *name;
266 	db_pgno_t base_pgno;
267 	u_int32_t flags;
268 {
269 	BTMETA *meta;
270 	BTREE *t;
271 	DBC *dbc;
272 	DB_LSN orig_lsn;
273 	DB_LOCK metalock;
274 	PAGE *root;
275 	int locked, ret, t_ret;
276 
277 	ret = 0;
278 	t = dbp->bt_internal;
279 	meta = NULL;
280 	root = NULL;
281 	locked = 0;
282 
283 	/*
284 	 * Get a cursor.  If DB_CREATE is specified, we may be creating
285 	 * the root page, and to do that safely in CDB we need a write
286 	 * cursor.  In STD_LOCKING mode, we'll synchronize using the
287 	 * meta page lock instead.
288 	 */
289 	if ((ret = dbp->cursor(dbp, dbp->open_txn,
290 	    &dbc, LF_ISSET(DB_CREATE) && LOCKING(dbp->dbenv) ?
291 	    DB_WRITECURSOR : 0)) != 0)
292 		return (ret);
293 
294 	/* Get, and optionally create the metadata page. */
295 	if ((ret =
296 	    CDB___db_lget(dbc, 0, base_pgno, DB_LOCK_READ, 0, &metalock)) != 0)
297 		goto err;
298 	if ((ret = CDB_memp_fget(
299 	    dbp->mpf, &base_pgno, DB_MPOOL_CREATE, (PAGE **)&meta)) != 0)
300 		goto err;
301 
302 	/*
303 	 * If the magic number is correct, we're not creating the tree.
304 	 * Correct any fields that may not be right.  Note, all of the
305 	 * local flags were set by DB->open.
306 	 */
307 again:	if (meta->dbmeta.magic != 0) {
308 		t->bt_maxkey = meta->maxkey;
309 		t->bt_minkey = meta->minkey;
310 		t->re_pad = meta->re_pad;
311 		t->re_len = meta->re_len;
312 
313 		t->bt_meta = base_pgno;
314 		t->bt_root = meta->root;
315 
316 		(void)CDB_memp_fput(dbp->mpf, meta, 0);
317 		meta = NULL;
318 		goto done;
319 	}
320 
321 	/* In recovery if it's not there it will be created elsewhere.*/
322 	if (IS_RECOVERING(dbp->dbenv))
323 		goto done;
324 
325 	/* If we're doing CDB; we now have to get the write lock. */
326 	if (LOCKING(dbp->dbenv)) {
327 		/*
328 		 * We'd better have DB_CREATE set if we're actually doing
329 		 * the create.
330 		 */
331 		DB_ASSERT(LF_ISSET(DB_CREATE));
332 	    	if ((ret = CDB_lock_get(dbp->dbenv, dbc->locker, DB_LOCK_UPGRADE,
333 	    	    &dbc->lock_dbt, DB_LOCK_WRITE, &dbc->mylock)) != 0)
334 			goto err;
335 	}
336 
337 	/*
338 	 * If we are doing locking, relase the read lock and get a write lock.
339 	 * We want to avoid deadlock.
340 	 */
341 	if (locked == 0 && STD_LOCKING(dbc)) {
342 		if ((ret = __LPUT(dbc, metalock)) != 0)
343 			goto err;
344 		if ((ret = CDB___db_lget(dbc,
345 		     0, base_pgno, DB_LOCK_WRITE, 0, &metalock)) != 0)
346 			goto err;
347 		locked = 1;
348 		goto again;
349 	}
350 
351 	/* Initialize the tree structure metadata information. */
352 	orig_lsn = meta->dbmeta.lsn;
353 	memset(meta, 0, sizeof(BTMETA));
354 	meta->dbmeta.lsn = orig_lsn;
355 	meta->dbmeta.pgno = base_pgno;
356 	meta->dbmeta.magic = DB_BTREEMAGIC;
357 	meta->dbmeta.version = DB_BTREEVERSION;
358 	meta->dbmeta.pagesize = dbp->pgsize;
359 	meta->dbmeta.type = P_BTREEMETA;
360 	meta->dbmeta.free = PGNO_INVALID;
361 	if (F_ISSET(dbp, DB_AM_DUP))
362 		F_SET(&meta->dbmeta, BTM_DUP);
363 	if (F_ISSET(dbp, DB_RE_FIXEDLEN))
364 		F_SET(&meta->dbmeta, BTM_FIXEDLEN);
365 	if (F_ISSET(dbp, DB_BT_RECNUM))
366 		F_SET(&meta->dbmeta, BTM_RECNUM);
367 	if (F_ISSET(dbp, DB_RE_RENUMBER))
368 		F_SET(&meta->dbmeta, BTM_RENUMBER);
369 	if (F_ISSET(dbp, DB_AM_SUBDB))
370 		F_SET(&meta->dbmeta, BTM_SUBDB);
371 	if (dbp->dup_compare != NULL)
372 		F_SET(&meta->dbmeta, BTM_DUPSORT);
373 	if (dbp->type == DB_RECNO)
374 		F_SET(&meta->dbmeta, BTM_RECNO);
375 	memcpy(meta->dbmeta.uid, dbp->fileid, DB_FILE_ID_LEN);
376 
377 	meta->maxkey = t->bt_maxkey;
378 	meta->minkey = t->bt_minkey;
379 	meta->re_len = t->re_len;
380 	meta->re_pad = t->re_pad;
381 
382 	/* If necessary, log the meta-data and root page creates.  */
383 	if ((ret = CDB___db_log_page(dbp,
384 	    name, &orig_lsn, base_pgno, (PAGE *)meta)) != 0)
385 		goto err;
386 
387 	/* Create and initialize a root page. */
388 	if ((ret = CDB___db_new(dbc,
389 	    ((dbp->type == DB_RECNO ? P_LRECNO : P_LBTREE) | dbp->tags), &root)) != 0)
390 		goto err;
391 	root->level = LEAFLEVEL;
392 
393 	if (dbp->open_txn != NULL && (ret = CDB___bam_root_log(dbp->dbenv,
394 	    dbp->open_txn, &meta->dbmeta.lsn, 0, dbp->log_fileid,
395 	    meta->dbmeta.pgno, root->pgno, &meta->dbmeta.lsn)) != 0)
396 		goto err;
397 
398 	meta->root = root->pgno;
399 
400 	DB_TEST_RECOVERY(dbp, DB_TEST_POSTLOGMETA, ret, name);
401 	if ((ret = CDB___db_log_page(dbp,
402 	    name, &root->lsn, root->pgno, root)) != 0)
403 		goto err;
404 	DB_TEST_RECOVERY(dbp, DB_TEST_POSTLOG, ret, name);
405 
406 	t->bt_meta = base_pgno;
407 	t->bt_root = root->pgno;
408 
409 	/* Release the metadata and root pages. */
410 	if ((ret = CDB_memp_fput(dbp->mpf, meta, DB_MPOOL_DIRTY)) != 0)
411 		goto err;
412 	meta = NULL;
413 	if ((ret = CDB_memp_fput(dbp->mpf, root, DB_MPOOL_DIRTY)) != 0)
414 		goto err;
415 	root = NULL;
416 
417 	/*
418 	 * Flush the metadata and root pages to disk.
419 	 *
420 	 * !!!
421 	 * It's not useful to return not-yet-flushed here -- convert it to
422 	 * an error.
423 	 */
424 	if ((ret = CDB_memp_fsync(dbp->mpf)) == DB_INCOMPLETE) {
425 		CDB___db_err(dbp->dbenv, "Metapage flush failed");
426 		ret = EINVAL;
427 	}
428 	DB_TEST_RECOVERY(dbp, DB_TEST_POSTSYNC, ret, name);
429 
430 done:	/*
431 	 * !!!
432 	 * We already did an insert and so the last-page-inserted has been
433 	 * set.  I'm not sure where the *right* place to clear this value
434 	 * is, it's not intuitively obvious that it belongs here.
435 	 */
436 	t->bt_lpgno = PGNO_INVALID;
437 
438 err:
439 DB_TEST_RECOVERY_LABEL
440 	/* Put any remaining pages back. */
441 	if (meta != NULL)
442 		if ((t_ret = CDB_memp_fput(dbp->mpf, meta, 0)) != 0 &&
443 		    ret == 0)
444 			ret = t_ret;
445 	if (root != NULL)
446 		if ((t_ret = CDB_memp_fput(dbp->mpf, root, 0)) != 0 &&
447 		    ret == 0)
448 			ret = t_ret;
449 
450 	/* We can release the metapage lock when we are done. */
451 	(void)__LPUT(dbc, metalock);
452 
453 	if ((t_ret = dbc->c_close(dbc)) != 0 && ret == 0)
454 		ret = t_ret;
455 	return (ret);
456 }
457