1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * Copyright (c) 2013, 2019 by Delphix. All rights reserved. 28 */ 29 30 #ifndef _SYS_RANGE_TREE_H 31 #define _SYS_RANGE_TREE_H 32 33 #include <sys/btree.h> 34 #include <sys/dmu.h> 35 36 #ifdef __cplusplus 37 extern "C" { 38 #endif 39 40 #define RANGE_TREE_HISTOGRAM_SIZE 64 41 42 typedef struct range_tree_ops range_tree_ops_t; 43 44 typedef enum range_seg_type { 45 RANGE_SEG32, 46 RANGE_SEG64, 47 RANGE_SEG_GAP, 48 RANGE_SEG_NUM_TYPES, 49 } range_seg_type_t; 50 51 /* 52 * Note: the range_tree may not be accessed concurrently; consumers 53 * must provide external locking if required. 54 */ 55 typedef struct range_tree { 56 zfs_btree_t rt_root; /* offset-ordered segment b-tree */ 57 uint64_t rt_space; /* sum of all segments in the map */ 58 range_seg_type_t rt_type; /* type of range_seg_t in use */ 59 /* 60 * All data that is stored in the range tree must have a start higher 61 * than or equal to rt_start, and all sizes and offsets must be 62 * multiples of 1 << rt_shift. 63 */ 64 uint8_t rt_shift; 65 uint64_t rt_start; 66 range_tree_ops_t *rt_ops; 67 68 /* rt_btree_compare should only be set if rt_arg is a b-tree */ 69 void *rt_arg; 70 int (*rt_btree_compare)(const void *, const void *); 71 72 uint64_t rt_gap; /* allowable inter-segment gap */ 73 74 /* 75 * The rt_histogram maintains a histogram of ranges. Each bucket, 76 * rt_histogram[i], contains the number of ranges whose size is: 77 * 2^i <= size of range in bytes < 2^(i+1) 78 */ 79 uint64_t rt_histogram[RANGE_TREE_HISTOGRAM_SIZE]; 80 } range_tree_t; 81 82 typedef struct range_seg32 { 83 uint32_t rs_start; /* starting offset of this segment */ 84 uint32_t rs_end; /* ending offset (non-inclusive) */ 85 } range_seg32_t; 86 87 /* 88 * Extremely large metaslabs, vdev-wide trees, and dnode-wide trees may 89 * require 64-bit integers for ranges. 90 */ 91 typedef struct range_seg64 { 92 uint64_t rs_start; /* starting offset of this segment */ 93 uint64_t rs_end; /* ending offset (non-inclusive) */ 94 } range_seg64_t; 95 96 typedef struct range_seg_gap { 97 uint64_t rs_start; /* starting offset of this segment */ 98 uint64_t rs_end; /* ending offset (non-inclusive) */ 99 uint64_t rs_fill; /* actual fill if gap mode is on */ 100 } range_seg_gap_t; 101 102 /* 103 * This type needs to be the largest of the range segs, since it will be stack 104 * allocated and then cast the actual type to do tree operations. 105 */ 106 typedef range_seg_gap_t range_seg_max_t; 107 108 /* 109 * This is just for clarity of code purposes, so we can make it clear that a 110 * pointer is to a range seg of some type; when we need to do the actual math, 111 * we'll figure out the real type. 112 */ 113 typedef void range_seg_t; 114 115 struct range_tree_ops { 116 void (*rtop_create)(range_tree_t *rt, void *arg); 117 void (*rtop_destroy)(range_tree_t *rt, void *arg); 118 void (*rtop_add)(range_tree_t *rt, void *rs, void *arg); 119 void (*rtop_remove)(range_tree_t *rt, void *rs, void *arg); 120 void (*rtop_vacate)(range_tree_t *rt, void *arg); 121 }; 122 123 static inline uint64_t 124 rs_get_start_raw(const range_seg_t *rs, const range_tree_t *rt) 125 { 126 ASSERT3U(rt->rt_type, <=, RANGE_SEG_NUM_TYPES); 127 switch (rt->rt_type) { 128 case RANGE_SEG32: 129 return (((range_seg32_t *)rs)->rs_start); 130 case RANGE_SEG64: 131 return (((range_seg64_t *)rs)->rs_start); 132 case RANGE_SEG_GAP: 133 return (((range_seg_gap_t *)rs)->rs_start); 134 default: 135 VERIFY(0); 136 return (0); 137 } 138 } 139 140 static inline uint64_t 141 rs_get_end_raw(const range_seg_t *rs, const range_tree_t *rt) 142 { 143 ASSERT3U(rt->rt_type, <=, RANGE_SEG_NUM_TYPES); 144 switch (rt->rt_type) { 145 case RANGE_SEG32: 146 return (((range_seg32_t *)rs)->rs_end); 147 case RANGE_SEG64: 148 return (((range_seg64_t *)rs)->rs_end); 149 case RANGE_SEG_GAP: 150 return (((range_seg_gap_t *)rs)->rs_end); 151 default: 152 VERIFY(0); 153 return (0); 154 } 155 } 156 157 static inline uint64_t 158 rs_get_fill_raw(const range_seg_t *rs, const range_tree_t *rt) 159 { 160 ASSERT3U(rt->rt_type, <=, RANGE_SEG_NUM_TYPES); 161 switch (rt->rt_type) { 162 case RANGE_SEG32: { 163 const range_seg32_t *r32 = rs; 164 return (r32->rs_end - r32->rs_start); 165 } 166 case RANGE_SEG64: { 167 const range_seg64_t *r64 = rs; 168 return (r64->rs_end - r64->rs_start); 169 } 170 case RANGE_SEG_GAP: 171 return (((range_seg_gap_t *)rs)->rs_fill); 172 default: 173 VERIFY(0); 174 return (0); 175 } 176 177 } 178 179 static inline uint64_t 180 rs_get_start(const range_seg_t *rs, const range_tree_t *rt) 181 { 182 return ((rs_get_start_raw(rs, rt) << rt->rt_shift) + rt->rt_start); 183 } 184 185 static inline uint64_t 186 rs_get_end(const range_seg_t *rs, const range_tree_t *rt) 187 { 188 return ((rs_get_end_raw(rs, rt) << rt->rt_shift) + rt->rt_start); 189 } 190 191 static inline uint64_t 192 rs_get_fill(const range_seg_t *rs, const range_tree_t *rt) 193 { 194 return (rs_get_fill_raw(rs, rt) << rt->rt_shift); 195 } 196 197 static inline void 198 rs_set_start_raw(range_seg_t *rs, range_tree_t *rt, uint64_t start) 199 { 200 ASSERT3U(rt->rt_type, <=, RANGE_SEG_NUM_TYPES); 201 switch (rt->rt_type) { 202 case RANGE_SEG32: 203 ASSERT3U(start, <=, UINT32_MAX); 204 ((range_seg32_t *)rs)->rs_start = (uint32_t)start; 205 break; 206 case RANGE_SEG64: 207 ((range_seg64_t *)rs)->rs_start = start; 208 break; 209 case RANGE_SEG_GAP: 210 ((range_seg_gap_t *)rs)->rs_start = start; 211 break; 212 default: 213 VERIFY(0); 214 } 215 } 216 217 static inline void 218 rs_set_end_raw(range_seg_t *rs, range_tree_t *rt, uint64_t end) 219 { 220 ASSERT3U(rt->rt_type, <=, RANGE_SEG_NUM_TYPES); 221 switch (rt->rt_type) { 222 case RANGE_SEG32: 223 ASSERT3U(end, <=, UINT32_MAX); 224 ((range_seg32_t *)rs)->rs_end = (uint32_t)end; 225 break; 226 case RANGE_SEG64: 227 ((range_seg64_t *)rs)->rs_end = end; 228 break; 229 case RANGE_SEG_GAP: 230 ((range_seg_gap_t *)rs)->rs_end = end; 231 break; 232 default: 233 VERIFY(0); 234 } 235 } 236 237 static inline void 238 rs_set_fill_raw(range_seg_t *rs, range_tree_t *rt, uint64_t fill) 239 { 240 ASSERT3U(rt->rt_type, <=, RANGE_SEG_NUM_TYPES); 241 switch (rt->rt_type) { 242 case RANGE_SEG32: 243 /* fall through */ 244 case RANGE_SEG64: 245 ASSERT3U(fill, ==, rs_get_end_raw(rs, rt) - rs_get_start_raw(rs, 246 rt)); 247 break; 248 case RANGE_SEG_GAP: 249 ((range_seg_gap_t *)rs)->rs_fill = fill; 250 break; 251 default: 252 VERIFY(0); 253 } 254 } 255 256 static inline void 257 rs_set_start(range_seg_t *rs, range_tree_t *rt, uint64_t start) 258 { 259 ASSERT3U(start, >=, rt->rt_start); 260 ASSERT(IS_P2ALIGNED(start, 1ULL << rt->rt_shift)); 261 rs_set_start_raw(rs, rt, (start - rt->rt_start) >> rt->rt_shift); 262 } 263 264 static inline void 265 rs_set_end(range_seg_t *rs, range_tree_t *rt, uint64_t end) 266 { 267 ASSERT3U(end, >=, rt->rt_start); 268 ASSERT(IS_P2ALIGNED(end, 1ULL << rt->rt_shift)); 269 rs_set_end_raw(rs, rt, (end - rt->rt_start) >> rt->rt_shift); 270 } 271 272 static inline void 273 rs_set_fill(range_seg_t *rs, range_tree_t *rt, uint64_t fill) 274 { 275 ASSERT(IS_P2ALIGNED(fill, 1ULL << rt->rt_shift)); 276 rs_set_fill_raw(rs, rt, fill >> rt->rt_shift); 277 } 278 279 typedef void range_tree_func_t(void *arg, uint64_t start, uint64_t size); 280 281 range_tree_t *range_tree_create_impl(range_tree_ops_t *ops, 282 range_seg_type_t type, void *arg, uint64_t start, uint64_t shift, 283 int (*zfs_btree_compare) (const void *, const void *), uint64_t gap); 284 range_tree_t *range_tree_create(range_tree_ops_t *ops, range_seg_type_t type, 285 void *arg, uint64_t start, uint64_t shift); 286 void range_tree_destroy(range_tree_t *rt); 287 boolean_t range_tree_contains(range_tree_t *rt, uint64_t start, uint64_t size); 288 range_seg_t *range_tree_find(range_tree_t *rt, uint64_t start, uint64_t size); 289 boolean_t range_tree_find_in(range_tree_t *rt, uint64_t start, uint64_t size, 290 uint64_t *ostart, uint64_t *osize); 291 void range_tree_verify_not_present(range_tree_t *rt, 292 uint64_t start, uint64_t size); 293 void range_tree_resize_segment(range_tree_t *rt, range_seg_t *rs, 294 uint64_t newstart, uint64_t newsize); 295 uint64_t range_tree_space(range_tree_t *rt); 296 uint64_t range_tree_numsegs(range_tree_t *rt); 297 boolean_t range_tree_is_empty(range_tree_t *rt); 298 void range_tree_swap(range_tree_t **rtsrc, range_tree_t **rtdst); 299 void range_tree_stat_verify(range_tree_t *rt); 300 uint64_t range_tree_min(range_tree_t *rt); 301 uint64_t range_tree_max(range_tree_t *rt); 302 uint64_t range_tree_span(range_tree_t *rt); 303 304 void range_tree_add(void *arg, uint64_t start, uint64_t size); 305 void range_tree_remove(void *arg, uint64_t start, uint64_t size); 306 void range_tree_remove_fill(range_tree_t *rt, uint64_t start, uint64_t size); 307 void range_tree_adjust_fill(range_tree_t *rt, range_seg_t *rs, int64_t delta); 308 void range_tree_clear(range_tree_t *rt, uint64_t start, uint64_t size); 309 310 void range_tree_vacate(range_tree_t *rt, range_tree_func_t *func, void *arg); 311 void range_tree_walk(range_tree_t *rt, range_tree_func_t *func, void *arg); 312 range_seg_t *range_tree_first(range_tree_t *rt); 313 314 void range_tree_remove_xor_add_segment(uint64_t start, uint64_t end, 315 range_tree_t *removefrom, range_tree_t *addto); 316 void range_tree_remove_xor_add(range_tree_t *rt, range_tree_t *removefrom, 317 range_tree_t *addto); 318 319 void rt_btree_create(range_tree_t *rt, void *arg); 320 void rt_btree_destroy(range_tree_t *rt, void *arg); 321 void rt_btree_add(range_tree_t *rt, range_seg_t *rs, void *arg); 322 void rt_btree_remove(range_tree_t *rt, range_seg_t *rs, void *arg); 323 void rt_btree_vacate(range_tree_t *rt, void *arg); 324 extern range_tree_ops_t rt_btree_ops; 325 326 #ifdef __cplusplus 327 } 328 #endif 329 330 #endif /* _SYS_RANGE_TREE_H */ 331