1 /* 2 * Copyright (c) 2007 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * $DragonFly: src/sys/vfs/hammer/hammer_btree.h,v 1.24 2008/06/26 04:06:22 dillon Exp $ 35 */ 36 37 /* 38 * HAMMER B-Tree index 39 * 40 * HAMMER implements a modified B+Tree. B+Trees store records only 41 * at their leaves and HAMMER's modification is to adjust the internal 42 * elements so there is a boundary element on each side instead of sub-tree 43 * pointers. 44 * 45 * We just call our modified B+Tree a 'B-Tree' in HAMMER documentation to 46 * reduce confusion. 47 * 48 * A B-Tree internal node looks like this: 49 * 50 * B N N N N N N B <-- boundary and internal elements 51 * S S S S S S S <-- subtree pointers 52 * 53 * A B-Tree leaf node looks like this: 54 * 55 * L L L L L L L L <-- leaf elemenets 56 * (there is also a previous and next-leaf pointer) 57 * 58 * The recursion radix of an internal node is reduced by 1 relative to 59 * a normal B-Tree in order to accomodate the right-hand boundary. 60 * The left-hand boundary (B in the left) is integrated into the first 61 * element so it doesn't require 2 elements to accomodate boundaries. 62 * 63 * The big benefit to using a B-Tree with built-in bounds information is 64 * that it makes it possible to cache pointers into the middle of the tree 65 * and not have to start searches, insertions, OR deletions at the root node. 66 * The boundary elements allow searches to progress in a definitive direction 67 * from any point in the tree without revisting nodes. It is also possible 68 * to terminate searches early and make minor adjustments to the boundaries 69 * (within the confines of the parent's boundaries) on the fly. This greatly 70 * improves the efficiency of many operations. 71 * 72 * All of the structures below are on-disk structures. 73 */ 74 75 /* 76 * Common base for all B-Tree element types (40 bytes) 77 */ 78 struct hammer_base_elm { 79 int64_t obj_id; /* 00 object record is associated with */ 80 int64_t key; /* 08 indexing key (offset or namekey) */ 81 82 hammer_tid_t create_tid; /* 10 transaction id for record creation */ 83 hammer_tid_t delete_tid; /* 18 transaction id for record update/del */ 84 85 u_int16_t rec_type; /* 20 _RECTYPE_ */ 86 u_int8_t obj_type; /* 22 _OBJTYPE_ (restricted) */ 87 u_int8_t btype; /* 23 B-Tree element type */ 88 u_int32_t localization; /* 24 B-Tree localization parameter */ 89 /* 28 */ 90 }; 91 92 typedef struct hammer_base_elm *hammer_base_elm_t; 93 94 /* 95 * Localization has sorting priority over the obj_id,rec_type,key,tid 96 * and is used to localize inodes for very fast directory scans. 97 * 98 * Localization can also be used to create pseudo-filesystems within 99 * a HAMMER filesystem. Pseudo-filesystems would be suitable 100 * replication targets. Upper 16 bits of the localization field in 101 * struct hammer_base_elm represents pseudo-filesystem id, and lower 102 * 16 bits of this field represents its type (inode or misc). 103 * 104 * The root inode (not the PFS root inode but the real root) uses 105 * HAMMER_DEF_LOCALIZATION for its incore ip->obj_localization. 106 * HAMMER_DEF_LOCALIZATION implies PFS 0 and no localization type. 107 */ 108 #define HAMMER_LOCALIZE_RESERVED00 0x00000000 109 #define HAMMER_LOCALIZE_INODE 0x00000001 110 #define HAMMER_LOCALIZE_MISC 0x00000002 111 #define HAMMER_LOCALIZE_RESERVED03 0x00000003 112 #define HAMMER_LOCALIZE_MASK 0x0000FFFF 113 #define HAMMER_LOCALIZE_PSEUDOFS_MASK 0xFFFF0000 114 115 #define HAMMER_MIN_LOCALIZATION 0x00000000U 116 #define HAMMER_MAX_LOCALIZATION 0x0000FFFFU 117 #define HAMMER_DEF_LOCALIZATION 0x00000000U 118 119 /* 120 * Internal element (40 + 24 = 64 bytes). 121 * 122 * An internal element contains a recursion to another B-Tree node. 123 */ 124 struct hammer_btree_internal_elm { 125 struct hammer_base_elm base; 126 hammer_tid_t mirror_tid; /* mirroring support */ 127 hammer_off_t subtree_offset; 128 int32_t unused02; 129 int32_t unused03; 130 }; 131 132 typedef struct hammer_btree_internal_elm *hammer_btree_internal_elm_t; 133 134 /* 135 * Leaf B-Tree element (40 + 24 = 64 bytes). 136 * 137 * NOTE: create_ts/delete_ts are not used by any core algorithms, they are 138 * used only by userland to present nominal real-time date strings 139 * to the user. 140 */ 141 struct hammer_btree_leaf_elm { 142 struct hammer_base_elm base; 143 u_int32_t create_ts; 144 u_int32_t delete_ts; 145 hammer_off_t data_offset; 146 int32_t data_len; 147 hammer_crc_t data_crc; 148 }; 149 150 typedef struct hammer_btree_leaf_elm *hammer_btree_leaf_elm_t; 151 152 /* 153 * Rollup btree leaf element types - 64 byte structure 154 */ 155 union hammer_btree_elm { 156 struct hammer_base_elm base; 157 struct hammer_btree_leaf_elm leaf; 158 struct hammer_btree_internal_elm internal; 159 }; 160 161 typedef union hammer_btree_elm *hammer_btree_elm_t; 162 163 /* 164 * B-Tree node (64x64 = 4K structure) 165 * 166 * Each node contains 63 elements. The last element for an internal node 167 * is the right-boundary so internal nodes have one fewer logical elements 168 * then leaf nodes. 169 * 170 * 'count' always refers to the number of elements and is non-inclusive of 171 * the right-hand boundary for an internal node. For a leaf node, 'count' 172 * refers to the number of elements and there is no idea of boundaries. 173 * 174 * The use of a fairly large radix is designed to reduce the number of 175 * discrete disk accesses required to locate something. Keep in mind 176 * that nodes are allocated out of 16K hammer buffers so supported values 177 * are (256-1), (128-1), (64-1), (32-1), or (16-1). HAMMER uses 63-way 178 * so the node size is (64x(1+(64-1))) = 4KB. 179 * 180 * NOTE: FUTURE EXPANSION: The reserved fields in hammer_node_ondisk are 181 * reserved for left/right leaf linkage fields, flags, and other future 182 * features. 183 */ 184 #define HAMMER_BTREE_LEAF_ELMS 63 185 #define HAMMER_BTREE_INT_ELMS (HAMMER_BTREE_LEAF_ELMS - 1) 186 187 #define HAMMER_BTREE_TYPE_INTERNAL ((u_int8_t)'I') 188 #define HAMMER_BTREE_TYPE_LEAF ((u_int8_t)'L') 189 #define HAMMER_BTREE_TYPE_RECORD ((u_int8_t)'R') 190 #define HAMMER_BTREE_TYPE_DELETED ((u_int8_t)'D') 191 #define HAMMER_BTREE_TYPE_NONE ((u_int8_t)0) 192 193 /* 194 * Return 1 if elm is a node element of an internal node, 195 * otherwise return 0. 196 */ 197 static __inline 198 int 199 hammer_is_internal_node_elm(hammer_btree_elm_t elm) 200 { 201 switch (elm->base.btype) { 202 case HAMMER_BTREE_TYPE_INTERNAL: 203 case HAMMER_BTREE_TYPE_LEAF: 204 return(1); 205 } 206 return(0); 207 } 208 209 /* 210 * Return 1 if elm is a node element of a leaf node, 211 * otherwise return 0. 212 */ 213 static __inline 214 int 215 hammer_is_leaf_node_elm(hammer_btree_elm_t elm) 216 { 217 switch (elm->base.btype) { 218 case HAMMER_BTREE_TYPE_RECORD: 219 return(1); 220 } 221 return(0); 222 } 223 224 static __inline 225 int 226 hammer_node_max_elements(u_int8_t type) 227 { 228 switch (type) { 229 case HAMMER_BTREE_TYPE_LEAF: 230 return(HAMMER_BTREE_LEAF_ELMS); 231 case HAMMER_BTREE_TYPE_INTERNAL: 232 return(HAMMER_BTREE_INT_ELMS); 233 } 234 return(-1); /* invalid type */ 235 } 236 237 static __inline 238 char 239 hammer_elm_btype(hammer_btree_elm_t elm) 240 { 241 switch(elm->base.btype) { 242 case HAMMER_BTREE_TYPE_INTERNAL: 243 case HAMMER_BTREE_TYPE_LEAF: 244 case HAMMER_BTREE_TYPE_RECORD: 245 case HAMMER_BTREE_TYPE_DELETED: 246 return(elm->base.btype); /* ascii */ 247 case HAMMER_BTREE_TYPE_NONE: 248 return('*'); 249 default: 250 return('?'); 251 } 252 } 253 254 struct hammer_node_ondisk { 255 /* 256 * B-Tree node header (64 bytes) 257 */ 258 hammer_crc_t crc; /* MUST BE FIRST FIELD OF STRUCTURE */ 259 u_int32_t signature; /* 0 or 0xB3A49586 but not used for anything */ 260 hammer_off_t parent; /* 0 if at root of B-Tree */ 261 int32_t count; 262 u_int8_t type; 263 u_int8_t reserved01; 264 u_int16_t reserved02; 265 hammer_off_t reserved03; /* future link_left */ 266 hammer_off_t reserved04; /* future link_right */ 267 hammer_off_t reserved05; 268 hammer_off_t reserved06; 269 hammer_tid_t mirror_tid; /* mirroring support (aggregator) */ 270 271 /* 272 * Element array. Internal nodes have one less logical element 273 * (meaning: the same number of physical elements) in order to 274 * accomodate the right-hand boundary. The left-hand boundary 275 * is integrated into the first element. Leaf nodes have no 276 * boundary elements. 277 */ 278 union hammer_btree_elm elms[HAMMER_BTREE_LEAF_ELMS]; 279 }; 280 281 #define HAMMER_BTREE_SIGNATURE_GOOD 0xB3A49586 282 #define HAMMER_BTREE_SIGNATURE_DESTROYED 0x4A3B2C1D /* not used */ 283 #define HAMMER_BTREE_CRCSIZE \ 284 (sizeof(struct hammer_node_ondisk) - sizeof(hammer_crc_t)) 285 286 typedef struct hammer_node_ondisk *hammer_node_ondisk_t; 287