xref: /dragonfly/sys/vfs/hammer/hammer_btree.h (revision cf37dc20)
1 /*
2  * Copyright (c) 2007 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  * $DragonFly: src/sys/vfs/hammer/hammer_btree.h,v 1.24 2008/06/26 04:06:22 dillon Exp $
35  */
36 
37 /*
38  * HAMMER B-Tree index
39  *
40  * HAMMER implements a modified B+Tree.   B+Trees store records only
41  * at their leaves and HAMMER's modification is to adjust the internal
42  * elements so there is a boundary element on each side instead of sub-tree
43  * pointers.
44  *
45  * We just call our modified B+Tree a 'B-Tree' in HAMMER documentation to
46  * reduce confusion.
47  *
48  * A B-Tree internal node looks like this:
49  *
50  *	B N N N N N N B   <-- boundary and internal elements
51  *       S S S S S S S    <-- subtree pointers
52  *
53  * A B-Tree leaf node looks like this:
54  *
55  *	L L L L L L L L   <-- leaf elemenets
56  *			      (there is also a previous and next-leaf pointer)
57  *
58  * The recursion radix of an internal node is reduced by 1 relative to
59  * a normal B-Tree in order to accomodate the right-hand boundary.
60  * The left-hand boundary (B in the left) is integrated into the first
61  * element so it doesn't require 2 elements to accomodate boundaries.
62  *
63  * The big benefit to using a B-Tree with built-in bounds information is
64  * that it makes it possible to cache pointers into the middle of the tree
65  * and not have to start searches, insertions, OR deletions at the root node.
66  * The boundary elements allow searches to progress in a definitive direction
67  * from any point in the tree without revisting nodes.  It is also possible
68  * to terminate searches early and make minor adjustments to the boundaries
69  * (within the confines of the parent's boundaries) on the fly.  This greatly
70  * improves the efficiency of many operations.
71  *
72  * All of the structures below are on-disk structures.
73  */
74 
75 /*
76  * Common base for all B-Tree element types (40 bytes)
77  */
78 struct hammer_base_elm {
79 	int64_t	obj_id;		/* 00 object record is associated with */
80 	int64_t key;		/* 08 indexing key (offset or namekey) */
81 
82 	hammer_tid_t create_tid; /* 10 transaction id for record creation */
83 	hammer_tid_t delete_tid; /* 18 transaction id for record update/del */
84 
85 	u_int16_t rec_type;	/* 20 _RECTYPE_ */
86 	u_int8_t obj_type;	/* 22 _OBJTYPE_ (restricted) */
87 	u_int8_t btype;		/* 23 B-Tree element type */
88 	u_int32_t localization;	/* 24 B-Tree localization parameter */
89 				/* 28 */
90 };
91 
92 typedef struct hammer_base_elm *hammer_base_elm_t;
93 
94 /*
95  * Localization has sorting priority over the obj_id,rec_type,key,tid
96  * and is used to localize inodes for very fast directory scans.
97  *
98  * Localization can also be used to create pseudo-filesystems within
99  * a HAMMER filesystem.  Pseudo-filesystems would be suitable
100  * replication targets.  Upper 16 bits of the localization field in
101  * struct hammer_base_elm represents pseudo-filesystem id, and lower
102  * 16 bits of this field represents its type (inode or misc).
103  *
104  * The root inode (not the PFS root inode but the real root) uses
105  * HAMMER_DEF_LOCALIZATION for its incore ip->obj_localization.
106  * HAMMER_DEF_LOCALIZATION implies PFS 0 and no localization type.
107  */
108 #define HAMMER_LOCALIZE_RESERVED00	0x00000000
109 #define HAMMER_LOCALIZE_INODE		0x00000001
110 #define HAMMER_LOCALIZE_MISC		0x00000002
111 #define HAMMER_LOCALIZE_RESERVED03	0x00000003
112 #define HAMMER_LOCALIZE_MASK		0x0000FFFF
113 #define HAMMER_LOCALIZE_PSEUDOFS_MASK	0xFFFF0000
114 
115 #define HAMMER_MIN_LOCALIZATION		0x00000000U
116 #define HAMMER_MAX_LOCALIZATION		0x0000FFFFU
117 #define HAMMER_DEF_LOCALIZATION		0x00000000U
118 
119 /*
120  * Internal element (40 + 24 = 64 bytes).
121  *
122  * An internal element contains a recursion to another B-Tree node.
123  */
124 struct hammer_btree_internal_elm {
125 	struct hammer_base_elm base;
126 	hammer_tid_t	mirror_tid;		/* mirroring support */
127 	hammer_off_t	subtree_offset;
128 	int32_t		unused02;
129 	int32_t		unused03;
130 };
131 
132 typedef struct hammer_btree_internal_elm *hammer_btree_internal_elm_t;
133 
134 /*
135  * Leaf B-Tree element (40 + 24 = 64 bytes).
136  *
137  * NOTE: create_ts/delete_ts are not used by any core algorithms, they are
138  *       used only by userland to present nominal real-time date strings
139  *	 to the user.
140  */
141 struct hammer_btree_leaf_elm {
142 	struct hammer_base_elm base;
143 	u_int32_t	create_ts;
144 	u_int32_t	delete_ts;
145 	hammer_off_t	data_offset;
146 	int32_t		data_len;
147 	hammer_crc_t	data_crc;
148 };
149 
150 typedef struct hammer_btree_leaf_elm *hammer_btree_leaf_elm_t;
151 
152 /*
153  * Rollup btree leaf element types - 64 byte structure
154  */
155 union hammer_btree_elm {
156 	struct hammer_base_elm		base;
157 	struct hammer_btree_leaf_elm	leaf;
158 	struct hammer_btree_internal_elm internal;
159 };
160 
161 typedef union hammer_btree_elm *hammer_btree_elm_t;
162 
163 /*
164  * B-Tree node (64x64 = 4K structure)
165  *
166  * Each node contains 63 elements.  The last element for an internal node
167  * is the right-boundary so internal nodes have one fewer logical elements
168  * then leaf nodes.
169  *
170  * 'count' always refers to the number of elements and is non-inclusive of
171  * the right-hand boundary for an internal node. For a leaf node, 'count'
172  * refers to the number of elements and there is no idea of boundaries.
173  *
174  * The use of a fairly large radix is designed to reduce the number of
175  * discrete disk accesses required to locate something.  Keep in mind
176  * that nodes are allocated out of 16K hammer buffers so supported values
177  * are (256-1), (128-1), (64-1), (32-1), or (16-1). HAMMER uses 63-way
178  * so the node size is (64x(1+(64-1))) = 4KB.
179  *
180  * NOTE: FUTURE EXPANSION: The reserved fields in hammer_node_ondisk are
181  * reserved for left/right leaf linkage fields, flags, and other future
182  * features.
183  */
184 #define HAMMER_BTREE_LEAF_ELMS	63
185 #define HAMMER_BTREE_INT_ELMS	(HAMMER_BTREE_LEAF_ELMS - 1)
186 
187 #define HAMMER_BTREE_TYPE_INTERNAL	((u_int8_t)'I')
188 #define HAMMER_BTREE_TYPE_LEAF		((u_int8_t)'L')
189 #define HAMMER_BTREE_TYPE_RECORD	((u_int8_t)'R')
190 #define HAMMER_BTREE_TYPE_DELETED	((u_int8_t)'D')
191 #define HAMMER_BTREE_TYPE_NONE		((u_int8_t)0)
192 
193 /*
194  * Return 1 if elm is a node element of an internal node,
195  * otherwise return 0.
196  */
197 static __inline
198 int
199 hammer_is_internal_node_elm(hammer_btree_elm_t elm)
200 {
201 	switch (elm->base.btype) {
202 	case HAMMER_BTREE_TYPE_INTERNAL:
203 	case HAMMER_BTREE_TYPE_LEAF:
204 		return(1);
205 	}
206 	return(0);
207 }
208 
209 /*
210  * Return 1 if elm is a node element of a leaf node,
211  * otherwise return 0.
212  */
213 static __inline
214 int
215 hammer_is_leaf_node_elm(hammer_btree_elm_t elm)
216 {
217 	switch (elm->base.btype) {
218 	case HAMMER_BTREE_TYPE_RECORD:
219 		return(1);
220 	}
221 	return(0);
222 }
223 
224 static __inline
225 int
226 hammer_node_max_elements(u_int8_t type)
227 {
228 	switch (type) {
229 	case HAMMER_BTREE_TYPE_LEAF:
230 		return(HAMMER_BTREE_LEAF_ELMS);
231 	case HAMMER_BTREE_TYPE_INTERNAL:
232 		return(HAMMER_BTREE_INT_ELMS);
233 	}
234 	return(-1);  /* invalid type */
235 }
236 
237 static __inline
238 char
239 hammer_elm_btype(hammer_btree_elm_t elm)
240 {
241 	switch(elm->base.btype) {
242 	case HAMMER_BTREE_TYPE_INTERNAL:
243 	case HAMMER_BTREE_TYPE_LEAF:
244 	case HAMMER_BTREE_TYPE_RECORD:
245 	case HAMMER_BTREE_TYPE_DELETED:
246 		return(elm->base.btype);  /* ascii */
247 	case HAMMER_BTREE_TYPE_NONE:
248 		return('*');
249 	default:
250 		return('?');
251 	}
252 }
253 
254 struct hammer_node_ondisk {
255 	/*
256 	 * B-Tree node header (64 bytes)
257 	 */
258 	hammer_crc_t	crc;		/* MUST BE FIRST FIELD OF STRUCTURE */
259 	u_int32_t	signature;	/* 0 or 0xB3A49586 but not used for anything */
260 	hammer_off_t	parent;		/* 0 if at root of B-Tree */
261 	int32_t		count;
262 	u_int8_t	type;
263 	u_int8_t	reserved01;
264 	u_int16_t	reserved02;
265 	hammer_off_t	reserved03;	/* future link_left */
266 	hammer_off_t	reserved04;	/* future link_right */
267 	hammer_off_t	reserved05;
268 	hammer_off_t	reserved06;
269 	hammer_tid_t	mirror_tid;	/* mirroring support (aggregator) */
270 
271 	/*
272 	 * Element array.  Internal nodes have one less logical element
273 	 * (meaning: the same number of physical elements) in order to
274 	 * accomodate the right-hand boundary.  The left-hand boundary
275 	 * is integrated into the first element.  Leaf nodes have no
276 	 * boundary elements.
277 	 */
278 	union hammer_btree_elm elms[HAMMER_BTREE_LEAF_ELMS];
279 };
280 
281 #define HAMMER_BTREE_SIGNATURE_GOOD		0xB3A49586
282 #define HAMMER_BTREE_SIGNATURE_DESTROYED	0x4A3B2C1D  /* not used */
283 #define HAMMER_BTREE_CRCSIZE	\
284 	(sizeof(struct hammer_node_ondisk) - sizeof(hammer_crc_t))
285 
286 typedef struct hammer_node_ondisk *hammer_node_ondisk_t;
287