1 /* 2 * CDDL HEADER START 3 * 4 * This file and its contents are supplied under the terms of the 5 * Common Development and Distribution License ("CDDL"), version 1.0. 6 * You may only use this file in accordance with the terms of version 7 * 1.0 of the CDDL. 8 * 9 * A full copy of the text of the CDDL should have accompanied this 10 * source. A copy of the CDDL is also available via the Internet at 11 * http://www.illumos.org/license/CDDL. 12 * 13 * CDDL HEADER END 14 */ 15 /* 16 * Copyright (c) 2019 by Delphix. All rights reserved. 17 */ 18 19 #ifndef _BTREE_H 20 #define _BTREE_H 21 22 #ifdef __cplusplus 23 extern "C" { 24 #endif 25 26 #include <sys/zfs_context.h> 27 28 /* 29 * This file defines the interface for a B-Tree implementation for ZFS. The 30 * tree can be used to store arbitrary sortable data types with low overhead 31 * and good operation performance. In addition the tree intelligently 32 * optimizes bulk in-order insertions to improve memory use and performance. 33 * 34 * Note that for all B-Tree functions, the values returned are pointers to the 35 * internal copies of the data in the tree. The internal data can only be 36 * safely mutated if the changes cannot change the ordering of the element 37 * with respect to any other elements in the tree. 38 * 39 * The major drawback of the B-Tree is that any returned elements or indexes 40 * are only valid until a side-effectful operation occurs, since these can 41 * result in reallocation or relocation of data. Side effectful operations are 42 * defined as insertion, removal, and zfs_btree_destroy_nodes. 43 * 44 * The B-Tree has two types of nodes: core nodes, and leaf nodes. Core 45 * nodes have an array of children pointing to other nodes, and an array of 46 * elements that act as separators between the elements of the subtrees rooted 47 * at its children. Leaf nodes only contain data elements, and form the bottom 48 * layer of the tree. Unlike B+ Trees, in this B-Tree implementation the 49 * elements in the core nodes are not copies of or references to leaf node 50 * elements. Each element occurs only once in the tree, no matter what kind 51 * of node it is in. 52 * 53 * The tree's height is the same throughout, unlike many other forms of search 54 * tree. Each node (except for the root) must be between half minus one and 55 * completely full of elements (and children) at all times. Any operation that 56 * would put the node outside of that range results in a rebalancing operation 57 * (taking, merging, or splitting). 58 * 59 * This tree was implemented using descriptions from Wikipedia's articles on 60 * B-Trees and B+ Trees. 61 */ 62 63 /* 64 * Decreasing these values results in smaller memmove operations, but more of 65 * them, and increased memory overhead. Increasing these values results in 66 * higher variance in operation time, and reduces memory overhead. 67 */ 68 #define BTREE_CORE_ELEMS 126 69 #define BTREE_LEAF_SIZE 4096 70 71 extern kmem_cache_t *zfs_btree_leaf_cache; 72 73 typedef struct zfs_btree_hdr { 74 struct zfs_btree_core *bth_parent; 75 /* 76 * Set to -1 to indicate core nodes. Other values represent first 77 * valid element offset for leaf nodes. 78 */ 79 uint32_t bth_first; 80 /* 81 * For both leaf and core nodes, represents the number of elements in 82 * the node. For core nodes, they will have bth_count + 1 children. 83 */ 84 uint32_t bth_count; 85 } zfs_btree_hdr_t; 86 87 typedef struct zfs_btree_core { 88 zfs_btree_hdr_t btc_hdr; 89 zfs_btree_hdr_t *btc_children[BTREE_CORE_ELEMS + 1]; 90 uint8_t btc_elems[]; 91 } zfs_btree_core_t; 92 93 typedef struct zfs_btree_leaf { 94 zfs_btree_hdr_t btl_hdr; 95 uint8_t btl_elems[]; 96 } zfs_btree_leaf_t; 97 98 typedef struct zfs_btree_index { 99 zfs_btree_hdr_t *bti_node; 100 uint32_t bti_offset; 101 /* 102 * True if the location is before the list offset, false if it's at 103 * the listed offset. 104 */ 105 boolean_t bti_before; 106 } zfs_btree_index_t; 107 108 typedef struct btree { 109 int (*bt_compar) (const void *, const void *); 110 size_t bt_elem_size; 111 size_t bt_leaf_size; 112 uint32_t bt_leaf_cap; 113 int32_t bt_height; 114 uint64_t bt_num_elems; 115 uint64_t bt_num_nodes; 116 zfs_btree_hdr_t *bt_root; 117 zfs_btree_leaf_t *bt_bulk; // non-null if bulk loading 118 } zfs_btree_t; 119 120 /* 121 * Allocate and deallocate caches for btree nodes. 122 */ 123 void zfs_btree_init(void); 124 void zfs_btree_fini(void); 125 126 /* 127 * Initialize an B-Tree. Arguments are: 128 * 129 * tree - the tree to be initialized 130 * compar - function to compare two nodes, it must return exactly: -1, 0, or +1 131 * -1 for <, 0 for ==, and +1 for > 132 * size - the value of sizeof(struct my_type) 133 * lsize - custom leaf size 134 */ 135 void zfs_btree_create(zfs_btree_t *, int (*) (const void *, const void *), 136 size_t); 137 void zfs_btree_create_custom(zfs_btree_t *, int (*)(const void *, const void *), 138 size_t, size_t); 139 140 /* 141 * Find a node with a matching value in the tree. Returns the matching node 142 * found. If not found, it returns NULL and then if "where" is not NULL it sets 143 * "where" for use with zfs_btree_add_idx() or zfs_btree_nearest(). 144 * 145 * node - node that has the value being looked for 146 * where - position for use with zfs_btree_nearest() or zfs_btree_add_idx(), 147 * may be NULL 148 */ 149 void *zfs_btree_find(zfs_btree_t *, const void *, zfs_btree_index_t *); 150 151 /* 152 * Insert a node into the tree. 153 * 154 * node - the node to insert 155 * where - position as returned from zfs_btree_find() 156 */ 157 void zfs_btree_add_idx(zfs_btree_t *, const void *, const zfs_btree_index_t *); 158 159 /* 160 * Return the first or last valued node in the tree. Will return NULL if the 161 * tree is empty. The index can be NULL if the location of the first or last 162 * element isn't required. 163 */ 164 void *zfs_btree_first(zfs_btree_t *, zfs_btree_index_t *); 165 void *zfs_btree_last(zfs_btree_t *, zfs_btree_index_t *); 166 167 /* 168 * Return the next or previous valued node in the tree. The second index can 169 * safely be NULL, if the location of the next or previous value isn't 170 * required. 171 */ 172 void *zfs_btree_next(zfs_btree_t *, const zfs_btree_index_t *, 173 zfs_btree_index_t *); 174 void *zfs_btree_prev(zfs_btree_t *, const zfs_btree_index_t *, 175 zfs_btree_index_t *); 176 177 /* 178 * Get a value from a tree and an index. 179 */ 180 void *zfs_btree_get(zfs_btree_t *, zfs_btree_index_t *); 181 182 /* 183 * Add a single value to the tree. The value must not compare equal to any 184 * other node already in the tree. Note that the value will be copied out, not 185 * inserted directly. It is safe to free or destroy the value once this 186 * function returns. 187 */ 188 void zfs_btree_add(zfs_btree_t *, const void *); 189 190 /* 191 * Remove a single value from the tree. The value must be in the tree. The 192 * pointer passed in may be a pointer into a tree-controlled buffer, but it 193 * need not be. 194 */ 195 void zfs_btree_remove(zfs_btree_t *, const void *); 196 197 /* 198 * Remove the value at the given location from the tree. 199 */ 200 void zfs_btree_remove_idx(zfs_btree_t *, zfs_btree_index_t *); 201 202 /* 203 * Return the number of nodes in the tree 204 */ 205 ulong_t zfs_btree_numnodes(zfs_btree_t *); 206 207 /* 208 * Used to destroy any remaining nodes in a tree. The cookie argument should 209 * be initialized to NULL before the first call. Returns a node that has been 210 * removed from the tree and may be free()'d. Returns NULL when the tree is 211 * empty. 212 * 213 * Once you call zfs_btree_destroy_nodes(), you can only continuing calling it 214 * and finally zfs_btree_destroy(). No other B-Tree routines will be valid. 215 * 216 * cookie - an index used to save state between calls to 217 * zfs_btree_destroy_nodes() 218 * 219 * EXAMPLE: 220 * zfs_btree_t *tree; 221 * struct my_data *node; 222 * zfs_btree_index_t *cookie; 223 * 224 * cookie = NULL; 225 * while ((node = zfs_btree_destroy_nodes(tree, &cookie)) != NULL) 226 * data_destroy(node); 227 * zfs_btree_destroy(tree); 228 */ 229 void *zfs_btree_destroy_nodes(zfs_btree_t *, zfs_btree_index_t **); 230 231 /* 232 * Destroys all nodes in the tree quickly. This doesn't give the caller an 233 * opportunity to iterate over each node and do its own cleanup; for that, use 234 * zfs_btree_destroy_nodes(). 235 */ 236 void zfs_btree_clear(zfs_btree_t *); 237 238 /* 239 * Final destroy of an B-Tree. Arguments are: 240 * 241 * tree - the empty tree to destroy 242 */ 243 void zfs_btree_destroy(zfs_btree_t *tree); 244 245 /* Runs a variety of self-checks on the btree to verify integrity. */ 246 void zfs_btree_verify(zfs_btree_t *tree); 247 248 #ifdef __cplusplus 249 } 250 #endif 251 252 #endif /* _BTREE_H */ 253