1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * Copyright (c) 2013, 2019 by Delphix. All rights reserved.
28  */
29 
30 #ifndef _SYS_RANGE_TREE_H
31 #define	_SYS_RANGE_TREE_H
32 
33 #include <sys/btree.h>
34 #include <sys/dmu.h>
35 
36 #ifdef	__cplusplus
37 extern "C" {
38 #endif
39 
40 #define	RANGE_TREE_HISTOGRAM_SIZE	64
41 
42 typedef struct range_tree_ops range_tree_ops_t;
43 
44 typedef enum range_seg_type {
45 	RANGE_SEG32,
46 	RANGE_SEG64,
47 	RANGE_SEG_GAP,
48 	RANGE_SEG_NUM_TYPES,
49 } range_seg_type_t;
50 
51 /*
52  * Note: the range_tree may not be accessed concurrently; consumers
53  * must provide external locking if required.
54  */
55 typedef struct range_tree {
56 	zfs_btree_t	rt_root;	/* offset-ordered segment b-tree */
57 	uint64_t	rt_space;	/* sum of all segments in the map */
58 	range_seg_type_t rt_type;	/* type of range_seg_t in use */
59 	/*
60 	 * All data that is stored in the range tree must have a start higher
61 	 * than or equal to rt_start, and all sizes and offsets must be
62 	 * multiples of 1 << rt_shift.
63 	 */
64 	uint8_t		rt_shift;
65 	uint64_t	rt_start;
66 	range_tree_ops_t *rt_ops;
67 
68 	/* rt_btree_compare should only be set if rt_arg is a b-tree */
69 	void		*rt_arg;
70 	int (*rt_btree_compare)(const void *, const void *);
71 
72 	uint64_t	rt_gap;		/* allowable inter-segment gap */
73 
74 	/*
75 	 * The rt_histogram maintains a histogram of ranges. Each bucket,
76 	 * rt_histogram[i], contains the number of ranges whose size is:
77 	 * 2^i <= size of range in bytes < 2^(i+1)
78 	 */
79 	uint64_t	rt_histogram[RANGE_TREE_HISTOGRAM_SIZE];
80 } range_tree_t;
81 
82 typedef struct range_seg32 {
83 	uint32_t	rs_start;	/* starting offset of this segment */
84 	uint32_t	rs_end;		/* ending offset (non-inclusive) */
85 } range_seg32_t;
86 
87 /*
88  * Extremely large metaslabs, vdev-wide trees, and dnode-wide trees may
89  * require 64-bit integers for ranges.
90  */
91 typedef struct range_seg64 {
92 	uint64_t	rs_start;	/* starting offset of this segment */
93 	uint64_t	rs_end;		/* ending offset (non-inclusive) */
94 } range_seg64_t;
95 
96 typedef struct range_seg_gap {
97 	uint64_t	rs_start;	/* starting offset of this segment */
98 	uint64_t	rs_end;		/* ending offset (non-inclusive) */
99 	uint64_t	rs_fill;	/* actual fill if gap mode is on */
100 } range_seg_gap_t;
101 
102 /*
103  * This type needs to be the largest of the range segs, since it will be stack
104  * allocated and then cast the actual type to do tree operations.
105  */
106 typedef range_seg_gap_t range_seg_max_t;
107 
108 /*
109  * This is just for clarity of code purposes, so we can make it clear that a
110  * pointer is to a range seg of some type; when we need to do the actual math,
111  * we'll figure out the real type.
112  */
113 typedef void range_seg_t;
114 
115 struct range_tree_ops {
116 	void    (*rtop_create)(range_tree_t *rt, void *arg);
117 	void    (*rtop_destroy)(range_tree_t *rt, void *arg);
118 	void	(*rtop_add)(range_tree_t *rt, void *rs, void *arg);
119 	void    (*rtop_remove)(range_tree_t *rt, void *rs, void *arg);
120 	void	(*rtop_vacate)(range_tree_t *rt, void *arg);
121 };
122 
123 static inline uint64_t
124 rs_get_start_raw(const range_seg_t *rs, const range_tree_t *rt)
125 {
126 	ASSERT3U(rt->rt_type, <=, RANGE_SEG_NUM_TYPES);
127 	switch (rt->rt_type) {
128 	case RANGE_SEG32:
129 		return (((range_seg32_t *)rs)->rs_start);
130 	case RANGE_SEG64:
131 		return (((range_seg64_t *)rs)->rs_start);
132 	case RANGE_SEG_GAP:
133 		return (((range_seg_gap_t *)rs)->rs_start);
134 	default:
135 		VERIFY(0);
136 		return (0);
137 	}
138 }
139 
140 static inline uint64_t
141 rs_get_end_raw(const range_seg_t *rs, const range_tree_t *rt)
142 {
143 	ASSERT3U(rt->rt_type, <=, RANGE_SEG_NUM_TYPES);
144 	switch (rt->rt_type) {
145 	case RANGE_SEG32:
146 		return (((range_seg32_t *)rs)->rs_end);
147 	case RANGE_SEG64:
148 		return (((range_seg64_t *)rs)->rs_end);
149 	case RANGE_SEG_GAP:
150 		return (((range_seg_gap_t *)rs)->rs_end);
151 	default:
152 		VERIFY(0);
153 		return (0);
154 	}
155 }
156 
157 static inline uint64_t
158 rs_get_fill_raw(const range_seg_t *rs, const range_tree_t *rt)
159 {
160 	ASSERT3U(rt->rt_type, <=, RANGE_SEG_NUM_TYPES);
161 	switch (rt->rt_type) {
162 	case RANGE_SEG32: {
163 		const range_seg32_t *r32 = rs;
164 		return (r32->rs_end - r32->rs_start);
165 	}
166 	case RANGE_SEG64: {
167 		const range_seg64_t *r64 = rs;
168 		return (r64->rs_end - r64->rs_start);
169 	}
170 	case RANGE_SEG_GAP:
171 		return (((range_seg_gap_t *)rs)->rs_fill);
172 	default:
173 		VERIFY(0);
174 		return (0);
175 	}
176 
177 }
178 
179 static inline uint64_t
180 rs_get_start(const range_seg_t *rs, const range_tree_t *rt)
181 {
182 	return ((rs_get_start_raw(rs, rt) << rt->rt_shift) + rt->rt_start);
183 }
184 
185 static inline uint64_t
186 rs_get_end(const range_seg_t *rs, const range_tree_t *rt)
187 {
188 	return ((rs_get_end_raw(rs, rt) << rt->rt_shift) + rt->rt_start);
189 }
190 
191 static inline uint64_t
192 rs_get_fill(const range_seg_t *rs, const range_tree_t *rt)
193 {
194 	return (rs_get_fill_raw(rs, rt) << rt->rt_shift);
195 }
196 
197 static inline void
198 rs_set_start_raw(range_seg_t *rs, range_tree_t *rt, uint64_t start)
199 {
200 	ASSERT3U(rt->rt_type, <=, RANGE_SEG_NUM_TYPES);
201 	switch (rt->rt_type) {
202 	case RANGE_SEG32:
203 		ASSERT3U(start, <=, UINT32_MAX);
204 		((range_seg32_t *)rs)->rs_start = (uint32_t)start;
205 		break;
206 	case RANGE_SEG64:
207 		((range_seg64_t *)rs)->rs_start = start;
208 		break;
209 	case RANGE_SEG_GAP:
210 		((range_seg_gap_t *)rs)->rs_start = start;
211 		break;
212 	default:
213 		VERIFY(0);
214 	}
215 }
216 
217 static inline void
218 rs_set_end_raw(range_seg_t *rs, range_tree_t *rt, uint64_t end)
219 {
220 	ASSERT3U(rt->rt_type, <=, RANGE_SEG_NUM_TYPES);
221 	switch (rt->rt_type) {
222 	case RANGE_SEG32:
223 		ASSERT3U(end, <=, UINT32_MAX);
224 		((range_seg32_t *)rs)->rs_end = (uint32_t)end;
225 		break;
226 	case RANGE_SEG64:
227 		((range_seg64_t *)rs)->rs_end = end;
228 		break;
229 	case RANGE_SEG_GAP:
230 		((range_seg_gap_t *)rs)->rs_end = end;
231 		break;
232 	default:
233 		VERIFY(0);
234 	}
235 }
236 
237 static inline void
238 rs_set_fill_raw(range_seg_t *rs, range_tree_t *rt, uint64_t fill)
239 {
240 	ASSERT3U(rt->rt_type, <=, RANGE_SEG_NUM_TYPES);
241 	switch (rt->rt_type) {
242 	case RANGE_SEG32:
243 		/* fall through */
244 	case RANGE_SEG64:
245 		ASSERT3U(fill, ==, rs_get_end_raw(rs, rt) - rs_get_start_raw(rs,
246 		    rt));
247 		break;
248 	case RANGE_SEG_GAP:
249 		((range_seg_gap_t *)rs)->rs_fill = fill;
250 		break;
251 	default:
252 		VERIFY(0);
253 	}
254 }
255 
256 static inline void
257 rs_set_start(range_seg_t *rs, range_tree_t *rt, uint64_t start)
258 {
259 	ASSERT3U(start, >=, rt->rt_start);
260 	ASSERT(IS_P2ALIGNED(start, 1ULL << rt->rt_shift));
261 	rs_set_start_raw(rs, rt, (start - rt->rt_start) >> rt->rt_shift);
262 }
263 
264 static inline void
265 rs_set_end(range_seg_t *rs, range_tree_t *rt, uint64_t end)
266 {
267 	ASSERT3U(end, >=, rt->rt_start);
268 	ASSERT(IS_P2ALIGNED(end, 1ULL << rt->rt_shift));
269 	rs_set_end_raw(rs, rt, (end - rt->rt_start) >> rt->rt_shift);
270 }
271 
272 static inline void
273 rs_set_fill(range_seg_t *rs, range_tree_t *rt, uint64_t fill)
274 {
275 	ASSERT(IS_P2ALIGNED(fill, 1ULL << rt->rt_shift));
276 	rs_set_fill_raw(rs, rt, fill >> rt->rt_shift);
277 }
278 
279 typedef void range_tree_func_t(void *arg, uint64_t start, uint64_t size);
280 
281 range_tree_t *range_tree_create_impl(range_tree_ops_t *ops,
282     range_seg_type_t type, void *arg, uint64_t start, uint64_t shift,
283     int (*zfs_btree_compare) (const void *, const void *), uint64_t gap);
284 range_tree_t *range_tree_create(range_tree_ops_t *ops, range_seg_type_t type,
285     void *arg, uint64_t start, uint64_t shift);
286 void range_tree_destroy(range_tree_t *rt);
287 boolean_t range_tree_contains(range_tree_t *rt, uint64_t start, uint64_t size);
288 range_seg_t *range_tree_find(range_tree_t *rt, uint64_t start, uint64_t size);
289 boolean_t range_tree_find_in(range_tree_t *rt, uint64_t start, uint64_t size,
290     uint64_t *ostart, uint64_t *osize);
291 void range_tree_verify_not_present(range_tree_t *rt,
292     uint64_t start, uint64_t size);
293 void range_tree_resize_segment(range_tree_t *rt, range_seg_t *rs,
294     uint64_t newstart, uint64_t newsize);
295 uint64_t range_tree_space(range_tree_t *rt);
296 uint64_t range_tree_numsegs(range_tree_t *rt);
297 boolean_t range_tree_is_empty(range_tree_t *rt);
298 void range_tree_swap(range_tree_t **rtsrc, range_tree_t **rtdst);
299 void range_tree_stat_verify(range_tree_t *rt);
300 uint64_t range_tree_min(range_tree_t *rt);
301 uint64_t range_tree_max(range_tree_t *rt);
302 uint64_t range_tree_span(range_tree_t *rt);
303 
304 void range_tree_add(void *arg, uint64_t start, uint64_t size);
305 void range_tree_remove(void *arg, uint64_t start, uint64_t size);
306 void range_tree_remove_fill(range_tree_t *rt, uint64_t start, uint64_t size);
307 void range_tree_adjust_fill(range_tree_t *rt, range_seg_t *rs, int64_t delta);
308 void range_tree_clear(range_tree_t *rt, uint64_t start, uint64_t size);
309 
310 void range_tree_vacate(range_tree_t *rt, range_tree_func_t *func, void *arg);
311 void range_tree_walk(range_tree_t *rt, range_tree_func_t *func, void *arg);
312 range_seg_t *range_tree_first(range_tree_t *rt);
313 
314 void range_tree_remove_xor_add_segment(uint64_t start, uint64_t end,
315     range_tree_t *removefrom, range_tree_t *addto);
316 void range_tree_remove_xor_add(range_tree_t *rt, range_tree_t *removefrom,
317     range_tree_t *addto);
318 
319 void rt_btree_create(range_tree_t *rt, void *arg);
320 void rt_btree_destroy(range_tree_t *rt, void *arg);
321 void rt_btree_add(range_tree_t *rt, range_seg_t *rs, void *arg);
322 void rt_btree_remove(range_tree_t *rt, range_seg_t *rs, void *arg);
323 void rt_btree_vacate(range_tree_t *rt, void *arg);
324 extern range_tree_ops_t rt_btree_ops;
325 
326 #ifdef	__cplusplus
327 }
328 #endif
329 
330 #endif	/* _SYS_RANGE_TREE_H */
331