1 /*-
2  * See the file LICENSE for redistribution information.
3  *
4  * Copyright (c) 1996, 2013 Oracle and/or its affiliates.  All rights reserved.
5  */
6 /*
7  * Copyright (c) 1990, 1993, 1994, 1995, 1996
8  *	Keith Bostic.  All rights reserved.
9  */
10 /*
11  * Copyright (c) 1990, 1993, 1994, 1995
12  *	The Regents of the University of California.  All rights reserved.
13  *
14  * This code is derived from software contributed to Berkeley by
15  * Mike Olson.
16  *
17  * Redistribution and use in source and binary forms, with or without
18  * modification, are permitted provided that the following conditions
19  * are met:
20  * 1. Redistributions of source code must retain the above copyright
21  *    notice, this list of conditions and the following disclaimer.
22  * 2. Redistributions in binary form must reproduce the above copyright
23  *    notice, this list of conditions and the following disclaimer in the
24  *    documentation and/or other materials provided with the distribution.
25  * 3. Neither the name of the University nor the names of its contributors
26  *    may be used to endorse or promote products derived from this software
27  *    without specific prior written permission.
28  *
29  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
30  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
33  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
34  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
35  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
36  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
37  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
38  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39  * SUCH DAMAGE.
40  *
41  * $Id$
42  */
43 #ifndef	_DB_BTREE_H_
44 #define	_DB_BTREE_H_
45 
46 #if defined(__cplusplus)
47 extern "C" {
48 #endif
49 
50 /* Forward structure declarations. */
51 struct __btree;		typedef struct __btree BTREE;
52 struct __cursor;	typedef struct __cursor BTREE_CURSOR;
53 struct __epg;		typedef struct __epg EPG;
54 
55 #define	DEFMINKEYPAGE	 (2)
56 
57 /*
58  * A recno order of 0 indicates that we don't have an order, not that we've
59  * an order less than 1.
60  */
61 #define	INVALID_ORDER	0
62 
63 #define	ISINTERNAL(p)	(TYPE(p) == P_IBTREE || TYPE(p) == P_IRECNO)
64 #define	ISLEAF(p)	(TYPE(p) == P_LBTREE ||				\
65 			    TYPE(p) == P_LRECNO || TYPE(p) == P_LDUP)
66 
67 /* Flags for __bam_cadjust_log(). */
68 #define	CAD_UPDATEROOT	0x01		/* Root page count was updated. */
69 
70 /* Flags for __bam_split_log(). */
71 #define	SPL_NRECS	0x01		/* Split tree has record count. */
72 #define	SPL_RECNO	0x02		/* This is a Recno cursor. */
73 
74 /* Flags for __bam_iitem(). */
75 #define	BI_DELETED	0x01		/* Key/data pair only placeholder. */
76 
77 /* Flags for __bam_stkrel(). */
78 #define	STK_CLRDBC	0x01		/* Clear dbc->page reference. */
79 #define	STK_NOLOCK	0x02		/* Don't retain locks. */
80 #define	STK_PGONLY	0x04
81 
82 /* Flags for __ram_ca(). These get logged, so make the values explicit. */
83 typedef enum {
84 	CA_DELETE = 0,			/* Delete the current record. */
85 	CA_IAFTER = 1,			/* Insert before the current record. */
86 	CA_IBEFORE = 2,			/* Insert after the current record. */
87 	CA_ICURRENT = 3			/* Overwrite the current record. */
88 } ca_recno_arg;
89 
90 /*
91  * Flags for __bam_search() and __bam_rsearch().
92  *
93  * Note, internal page searches must find the largest record less than key in
94  * the tree so that descents work.  Leaf page searches must find the smallest
95  * record greater than key so that the returned index is the record's correct
96  * position for insertion.
97  *
98  * The flags parameter to the search routines describes three aspects of the
99  * search: the type of locking required (including if we're locking a pair of
100  * pages), the item to return in the presence of duplicates and whether or not
101  * to return deleted entries.  To simplify both the mnemonic representation
102  * and the code that checks for various cases, we construct a set of bitmasks.
103  */
104 #define	SR_READ		0x00001		/* Read locks. */
105 #define	SR_WRITE	0x00002		/* Write locks. */
106 
107 #define	SR_APPEND	0x00040		/* Append to the tree. */
108 #define	SR_DELNO	0x00080		/* Don't return deleted items. */
109 #define	SR_DUPFIRST	0x00100		/* Return first duplicate. */
110 #define	SR_DUPLAST	0x00200		/* Return last duplicate. */
111 #define	SR_EXACT	0x00400		/* Exact items only. */
112 #define	SR_PARENT	0x00800		/* Lock page pair. */
113 #define	SR_STACK	0x01000		/* Need a complete stack. */
114 #define	SR_PAST_EOF	0x02000		/* If doing insert search (or keyfirst
115 					 * or keylast operations), or a split
116 					 * on behalf of an insert, it's okay to
117 					 * return an entry one past end-of-page.
118 					 */
119 #define	SR_STK_ONLY	0x04000		/* Just return info in the stack */
120 #define	SR_MAX		0x08000		/* Get the right most key */
121 #define	SR_MIN		0x10000		/* Get the left most key */
122 #define	SR_NEXT		0x20000		/* Get the page after this key */
123 #define	SR_DEL		0x40000		/* Get the tree to delete this key. */
124 #define	SR_START	0x80000		/* Level to start stack. */
125 #define	SR_BOTH		0x100000	/* Get this and the NEXT page */
126 
127 #define	SR_DELETE							\
128 	(SR_WRITE | SR_DUPFIRST | SR_DELNO | SR_EXACT | SR_STACK)
129 #define	SR_FIND		(SR_READ | SR_DUPFIRST | SR_DELNO)
130 #define	SR_FIND_WR	(SR_WRITE | SR_DUPFIRST | SR_DELNO)
131 #define	SR_INSERT	(SR_WRITE | SR_DUPLAST | SR_PAST_EOF | SR_STACK)
132 #define	SR_KEYFIRST	(SR_WRITE | SR_DUPFIRST | SR_PAST_EOF | SR_STACK)
133 #define	SR_KEYLAST	(SR_WRITE | SR_DUPLAST | SR_PAST_EOF | SR_STACK)
134 #define	SR_WRPAIR	(SR_WRITE | SR_DUPLAST | SR_PAST_EOF | SR_PARENT)
135 
136 /*
137  * Various routines pass around page references.  A page reference is
138  * a pointer to the page, and the indx indicates an item on the page.
139  * Each page reference may include a lock.
140  */
141 struct __epg {
142 	PAGE	     *page;		/* The page. */
143 	db_indx_t     indx;		/* The index on the page. */
144 	db_indx_t     entries;		/* The number of entries on page */
145 	DB_LOCK	      lock;		/* The page's lock. */
146 	db_lockmode_t lock_mode;	/* The lock mode. */
147 };
148 
149 /*
150  * We maintain a stack of the pages that we're locking in the tree.  Grow
151  * the stack as necessary.
152  *
153  * XXX
154  * Temporary fix for #3243 -- clear the page and lock from the stack entry.
155  * The correct fix is to never release a stack that doesn't hold items.
156  */
157 #define	BT_STK_CLR(c) do {						\
158 	(c)->csp = (c)->sp;						\
159 	(c)->csp->page = NULL;						\
160 	LOCK_INIT((c)->csp->lock);					\
161 } while (0)
162 
163 #define	BT_STK_ENTER(env, c, pagep, page_indx, l, mode, ret) do {	\
164 	if ((ret = ((c)->csp == (c)->esp ?				\
165 	    __bam_stkgrow(env, c) : 0)) == 0) {				\
166 		(c)->csp->page = pagep;					\
167 		(c)->csp->indx = (page_indx);				\
168 		(c)->csp->entries = NUM_ENT(pagep);			\
169 		(c)->csp->lock = l;					\
170 		(c)->csp->lock_mode = mode;				\
171 	}								\
172 } while (0)
173 
174 #define	BT_STK_PUSH(env, c, pagep, page_indx, lock, mode, ret) do {	\
175 	BT_STK_ENTER(env, c, pagep, page_indx, lock, mode, ret);	\
176 	++(c)->csp;							\
177 } while (0)
178 
179 #define	BT_STK_NUM(env, c, pagep, page_indx, ret) do {		\
180 	if ((ret = ((c)->csp ==						\
181 	    (c)->esp ? __bam_stkgrow(env, c) : 0)) == 0) {		\
182 		(c)->csp->page = NULL;					\
183 		(c)->csp->indx = (page_indx);				\
184 		(c)->csp->entries = NUM_ENT(pagep);			\
185 		LOCK_INIT((c)->csp->lock);				\
186 		(c)->csp->lock_mode = DB_LOCK_NG;			\
187 	}								\
188 } while (0)
189 
190 #define	BT_STK_NUMPUSH(env, c, pagep, page_indx, ret) do {		\
191 	BT_STK_NUM(env, cp, pagep, page_indx, ret);			\
192 	++(c)->csp;							\
193 } while (0)
194 
195 #define	BT_STK_POP(c)							\
196 	((c)->csp == (c)->sp ? NULL : --(c)->csp)
197 
198 /*
199  * Flags for __bam_dpages.
200  */
201 #define	BTD_UPDATE	0x0001		/* Update parents. */
202 #define	BTD_RELINK	0x0002		/* Relink leaf pages. */
203 
204 /*
205  * TRY_LOCK
206  *	When holding a stack we have pages latched but not locked so
207  * we must avoid an undetectable deadlock by not then blocking on a
208  * lock.
209  */
210 #define	TRY_LOCK(dbc, pgno, saved_pgno, saved_lock, lock_mode, label) \
211 	TRY_LOCK2(dbc, NULL, pgno, saved_pgno, saved_lock, lock_mode, label)
212 /*
213  * TRY_LOCK2
214  *	This is a special call for __bam_compact_int which uses 2
215  * overlapping stacks.
216  */
217 
218 #ifdef BTREE_DEBUG
219 #define	TRY_LOCK2(dbc, ndbc, pgno,					\
220     saved_pgno, saved_lock, lock_mode, label) do {			\
221 	static int BTcount = 0;						\
222 	if ((pgno) != (saved_pgno) &&					\
223 	    ((BTcount++ % 5) == 0 ||					\
224 	    (ret = __db_lget(dbc, LCK_COUPLE_ALWAYS, pgno,		\
225 	    lock_mode, DB_LOCK_NOWAIT, &(saved_lock))) != 0)) {		\
226 		if (ret != 0 && ret != DB_LOCK_NOTGRANTED &&		\
227 		     ret != DB_LOCK_DEADLOCK)				\
228 			break;						\
229 		if ((ndbc) != NULL) {					\
230 			BTREE_CURSOR *__cp;				\
231 			__cp = (BTREE_CURSOR *) (dbc)->internal;	\
232 			__cp->sp->page = NULL;				\
233 			LOCK_INIT(__cp->sp->lock);			\
234 			if ((ret = __bam_stkrel(ndbc, 0)) != 0)		\
235 				break;					\
236 		}							\
237 		if ((ret = __bam_stkrel(dbc, 0)) != 0)			\
238 			break;						\
239 		if ((ret = __db_lget(dbc, LCK_COUPLE_ALWAYS, pgno,	\
240 		    lock_mode, 0, &(saved_lock))) != 0)			\
241 			break;						\
242 		saved_pgno = pgno;					\
243 		goto label;						\
244 	}								\
245 	saved_pgno = pgno;						\
246 } while (0)
247 #else
248 #define	TRY_LOCK2(dbc, ndbc, pgno,					\
249     saved_pgno, saved_lock, lock_mode, label) do {			\
250 	if ((pgno) != (saved_pgno) &&					\
251 	    (ret = __db_lget(dbc, LCK_COUPLE_ALWAYS, pgno,		\
252 	    lock_mode, DB_LOCK_NOWAIT, &(saved_lock))) != 0) {		\
253 		if (ret != DB_LOCK_NOTGRANTED &&			\
254 		     ret != DB_LOCK_DEADLOCK)				\
255 			break;						\
256 		if ((ndbc) != NULL) {					\
257 			BTREE_CURSOR *__cp;				\
258 			__cp = (BTREE_CURSOR *) (dbc)->internal;	\
259 			__cp->sp->page = NULL;				\
260 			LOCK_INIT(__cp->sp->lock);			\
261 			if ((ret = __bam_stkrel(ndbc, 0)) != 0)		\
262 				break;					\
263 		}							\
264 		if ((ret = __bam_stkrel(dbc, 0)) != 0)			\
265 			break;						\
266 		if ((ret = __db_lget(dbc, LCK_COUPLE_ALWAYS, pgno,	\
267 		    lock_mode, 0, &(saved_lock))) != 0)	\
268 			break;						\
269 		saved_pgno = pgno;					\
270 		goto label;						\
271 	}								\
272 	saved_pgno = pgno;						\
273 } while (0)
274 #endif
275 
276 /* Btree/Recno cursor. */
277 struct __cursor {
278 	/* struct __dbc_internal */
279 	__DBC_INTERNAL
280 
281 	/* btree private part */
282 	EPG		*sp;		/* Stack pointer. */
283 	EPG		*csp;		/* Current stack entry. */
284 	EPG		*esp;		/* End stack pointer. */
285 	EPG		 stack[5];
286 
287 	db_indx_t	 ovflsize;	/* Maximum key/data on-page size. */
288 
289 	db_recno_t	 recno;		/* Current record number. */
290 	u_int32_t	 order;		/* Relative order among deleted curs. */
291 
292 #ifdef HAVE_COMPRESSION
293 	/*
294 	 * Compression:
295 	 *
296 	 * We need to hold the current compressed chunk, as well as the previous
297 	 * key/data, in order to decompress the next key/data. We do that by
298 	 * swapping whether prevKey/Data and currentKey/Data point to
299 	 * key1/data1, or key2/data2.
300 	 *
301 	 * We store prevcursor in order to be able to perform one level of
302 	 * DB_PREV by returning prevKey/prevData. We need prev2cursor to more
303 	 * efficiently do a subsequent DB_PREV with a linear search from the
304 	 * beginning of the compressed chunk.
305 	 *
306 	 * When we delete entries, we set the cursor to point to the next entry
307 	 * after the last deleted key, and set C_COMPRESS_DELETED. The del_key
308 	 * DBT holds the key of the deleted entry supposedly pointed to by a
309 	 * compressed cursor, and is used to implement DB_PREV_DUP,
310 	 * DB_PREV_NODUP, DB_NEXT_DUP, and DB_NEXT_NODUP on a deleted entry.
311 	 */
312 	DBT		 compressed;	/* Current compressed chunk */
313 	DBT		 key1;		/* Holds prevKey or currentKey */
314 	DBT		 key2;		/* Holds prevKey or currentKey */
315 	DBT		 data1;		/* Holds prevData or currentData */
316 	DBT		 data2;		/* Holds prevData or currentData */
317 	DBT		 del_key;	/* Holds key from the deleted entry */
318 	DBT		 del_data;	/* Holds data from the deleted entry */
319 	DBT		*prevKey;	/* Previous key decompressed */
320 	DBT		*prevData;	/* Previous data decompressed */
321 	DBT		*currentKey;	/* Current key decompressed */
322 	DBT		*currentData;	/* Current data decompressed */
323 	u_int8_t	*compcursor;	/* Current position in compressed */
324 	u_int8_t	*compend;	/* End of compressed */
325 	u_int8_t	*prevcursor;	/* Previous current position */
326 	u_int8_t	*prev2cursor;	/* Previous previous current position */
327 #endif
328 
329 	/*
330 	 * Btree:
331 	 * We set a flag in the cursor structure if the underlying object has
332 	 * been deleted.  It's not strictly necessary, we could get the same
333 	 * information by looking at the page itself, but this method doesn't
334 	 * require us to retrieve the page on cursor delete.
335 	 *
336 	 * Recno:
337 	 * When renumbering recno databases during deletes, cursors referencing
338 	 * "deleted" records end up positioned between two records, and so must
339 	 * be specially adjusted on the next operation.
340 	 */
341 #define	C_DELETED		0x0001	/* Record was deleted. */
342 	/*
343 	 * There are three tree types that require maintaining record numbers.
344 	 * Recno AM trees, Btree AM trees for which the DB_RECNUM flag was set,
345 	 * and Btree off-page duplicate trees.
346 	 */
347 #define	C_RECNUM		0x0002	/* Tree requires record counts. */
348 	/*
349 	 * Recno trees have immutable record numbers by default, but optionally
350 	 * support mutable record numbers.  Off-page duplicate Recno trees have
351 	 * mutable record numbers.  All Btrees with record numbers (including
352 	 * off-page duplicate trees) are mutable by design, no flag is needed.
353 	 */
354 #define	C_RENUMBER		0x0004	/* Tree records are mutable. */
355 	/*
356 	 * The current compressed key/data could be deleted, as well as the
357 	 * key/data that the underlying BTree cursor points to.
358 	 */
359 #define	C_COMPRESS_DELETED	0x0008	/* Compressed record was deleted. */
360 	/*
361 	 * The current compressed chunk has been modified by another DBC. A
362 	 * compressed cursor will have to seek it's position again if necessary
363 	 * when it is next accessed.
364 	 */
365 #define	C_COMPRESS_MODIFIED	0x0010	/* Compressed record was modified. */
366 	u_int32_t	 flags;
367 };
368 
369 /*
370  * Threshhold value, as a function of bt_minkey, of the number of
371  * bytes a key/data pair can use before being placed on an overflow
372  * page.  Assume every item requires the maximum alignment for
373  * padding, out of sheer paranoia.
374  */
375 #define	B_MINKEY_TO_OVFLSIZE(dbp, minkey, pgsize)			\
376 	((u_int16_t)(((pgsize) - P_OVERHEAD(dbp)) / ((minkey) * P_INDX) -\
377 	    (BKEYDATA_PSIZE(0) + DB_ALIGN(1, sizeof(int32_t)))))
378 
379 /*
380  * The maximum space that a single item can ever take up on one page.
381  * Used by __bam_split to determine whether a split is still necessary.
382  */
383 #define	B_MAX(a,b)	(((a) > (b)) ? (a) : (b))
384 #define	B_MAXSIZEONPAGE(ovflsize)					\
385 	(B_MAX(BOVERFLOW_PSIZE, BKEYDATA_PSIZE(ovflsize)))
386 
387 /*
388  * BAM_GET_ROOT --
389  *	This macro is used to isolate the fact that the root page of
390  * a subdatabase may move if DB->compact is called on it.
391  * The dbp->mpf->mfp->revision will be incremented every time
392  * a subdatabase root or meta page moves.  If this is the case then
393  * we must call __db_reopen to read the master database to find it.
394  * We leave the loop only by breaking out if we do not have a subdb
395  * or we are sure the have the right revision.
396  *
397  * It must be guaranteed that we cannot read an old root pgno and a
398  * current revision number.  We note that the global revision number
399  * and DB handle information are only updated while holding the latches
400  * and locks of the master database pages.
401  * If another thread is synchronizing the DB handle with the master
402  * database it will exclusively latch both the old and new pages so we will
403  * synchronize on that.
404  */
405 #define BAM_GET_ROOT(dbc, root_pgno, 					\
406 	     page, get_mode, lock_mode, lock, ret) do {			\
407 	BTREE *__t = (dbc)->dbp->bt_internal;				\
408 	BTREE_CURSOR *__cp = (BTREE_CURSOR *)(dbc)->internal;		\
409 	db_pgno_t __root;						\
410 	u_int32_t __rev = 0;						\
411 	if ((root_pgno) == PGNO_INVALID) {				\
412 		if (__cp->root == PGNO_INVALID) {			\
413 			__root = __t->bt_root;				\
414 			__rev = __t->revision;				\
415 		} else 							\
416 			__root = root_pgno = __cp->root;		\
417 	} else								\
418 		__root = root_pgno;					\
419 	if (STD_LOCKING(dbc) &&						\
420 	    ((lock_mode) == DB_LOCK_WRITE || F_ISSET(dbc, DBC_DOWNREV)	\
421 	    || dbc->dbtype == DB_RECNO || F_ISSET(__cp, C_RECNUM)) &&	\
422 	    (ret =							\
423 	    __db_lget(dbc, 0, __root, lock_mode, 0, &(lock))) != 0)	\
424 		break;							\
425 	if ((ret = __memp_fget((dbc)->dbp->mpf, &__root,		\
426 	     (dbc)->thread_info, dbc->txn, get_mode, &page)) == 0) {	\
427 		if (__root == root_pgno)				\
428 			break;						\
429 		if (F_ISSET(dbc, DBC_OPD) ||				\
430 		    !F_ISSET((dbc)->dbp, DB_AM_SUBDB) ||		\
431 		     (__t->bt_root == __root &&				\
432 		     (LEVEL(page) == LEAFLEVEL || TYPE(page) == 	\
433 		     (dbc->dbtype == DB_BTREE ? P_IBTREE : P_IRECNO)) &&\
434 		     __rev == (dbc)->dbp->mpf->mfp->revision)) {	\
435 			root_pgno = __root;				\
436 			break;						\
437 		}							\
438 		if ((ret = __memp_fput((dbc)->dbp->mpf, 		\
439 		     (dbc)->thread_info, page, (dbc)->priority)) != 0)	\
440 			break;						\
441 	} else if (ret != DB_PAGE_NOTFOUND)				\
442 		break;							\
443 	if ((ret = __LPUT(dbc, lock)) != 0)				\
444 		break;							\
445 	if ((ret = __db_reopen(dbc)) != 0)				\
446 		break;							\
447 } while (1)
448 
449 /*
450  * Return the root of this tree. If this is an off page duplicate tree
451  * then its in the cursor, otherwise we must look in the db handle.
452  */
453 #define BAM_ROOT_PGNO(dbc)						\
454 	(((BTREE_CURSOR *)(dbc)->internal)->root == PGNO_INVALID ?	\
455 	    ((BTREE*)(dbc)->dbp->bt_internal)->bt_root :		\
456 	    ((BTREE_CURSOR *)(dbc)->internal)->root)
457 
458 
459 
460 /*
461  * The in-memory, per-tree btree/recno data structure.
462  */
463 struct __btree {			/* Btree access method. */
464 	/*
465 	 * These fields may change if this is a subdatabase and
466 	 * it gets compacted.
467 	 */
468 	db_pgno_t bt_meta;		/* Database meta-data page. */
469 	db_pgno_t bt_root;		/* Database root page. */
470 	u_int32_t revision;		/* Revision of root/meta. */
471 
472 	u_int32_t bt_minkey;		/* Minimum keys per page. */
473 
474 					/* Btree comparison function. */
475 	int (*bt_compare) __P((DB *, const DBT *, const DBT *));
476 					/* Btree prefix function. */
477 	size_t (*bt_prefix) __P((DB *, const DBT *, const DBT *));
478 					/* Btree compress function. */
479 #ifdef HAVE_COMPRESSION
480 	int (*bt_compress) __P((DB *, const DBT *, const DBT *, const DBT *,
481 				       const DBT *, DBT *));
482 					/* Btree decompress function. */
483 	int (*bt_decompress) __P((DB *, const DBT *, const DBT *, DBT *, DBT *,
484 					 DBT *));
485 					/* dup_compare for compression */
486 	int (*compress_dup_compare) __P((DB *, const DBT *, const DBT *));
487 #endif
488 
489 					/* Recno access method. */
490 	int	  re_pad;		/* Fixed-length padding byte. */
491 	int	  re_delim;		/* Variable-length delimiting byte. */
492 	u_int32_t re_len;		/* Length for fixed-length records. */
493 	char	 *re_source;		/* Source file name. */
494 
495 	/*
496 	 * !!!
497 	 * The bt_lpgno field is NOT protected by any mutex, and for this
498 	 * reason must be advisory only, so, while it is read/written by
499 	 * multiple threads, DB is completely indifferent to the quality
500 	 * of its information.
501 	 */
502 	db_pgno_t bt_lpgno;		/* Last insert location. */
503 	DB_LSN	  bt_llsn;		/* Last insert LSN. */
504 
505 	/*
506 	 * !!!
507 	 * The re_modified field is NOT protected by any mutex, and for this
508 	 * reason cannot be anything more complicated than a zero/non-zero
509 	 * value.  The actual writing of the backing source file cannot be
510 	 * threaded, so clearing the flag isn't a problem.
511 	 */
512 	int	  re_modified;		/* If the tree was modified. */
513 
514 	/*
515 	 * !!!
516 	 * These fields are ignored as far as multi-threading is concerned.
517 	 * There are no transaction semantics associated with backing files,
518 	 * nor is there any thread protection.
519 	 */
520 	FILE		*re_fp;		/* Source file handle. */
521 	int		 re_eof;	/* Backing source file EOF reached. */
522 	db_recno_t	 re_last;	/* Last record number read. */
523 
524 };
525 
526 /*
527  * Modes for the __bam_curadj recovery records (btree_curadj).
528  * These appear in log records, so we wire the values and
529  * do not leave it up to the compiler.
530  */
531 typedef enum {
532 	DB_CA_DI	= 1,
533 	DB_CA_DUP	= 2,
534 	DB_CA_RSPLIT	= 3,
535 	DB_CA_SPLIT	= 4
536 } db_ca_mode;
537 
538 /*
539  * Flags for __bam_pinsert.
540  */
541 #define	BPI_SPACEONLY	0x01		/* Only check for space to update. */
542 #define	BPI_NORECNUM	0x02		/* Don't update the left's recnum. */
543 #define	BPI_NOLOGGING	0x04		/* Don't log the update. */
544 #define	BPI_REPLACE	0x08		/* Replace the record. */
545 
546 #if defined(__cplusplus)
547 }
548 #endif
549 
550 #include "dbinc_auto/btree_auto.h"
551 #include "dbinc_auto/btree_ext.h"
552 #include "dbinc/db_am.h"
553 #endif /* !_DB_BTREE_H_ */
554