xref: /freebsd/sys/contrib/openzfs/module/avl/avl.c (revision 1d386b48)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or https://opensource.org/licenses/CDDL-1.0.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
28  * Copyright (c) 2015 by Delphix. All rights reserved.
29  */
30 
31 /*
32  * AVL - generic AVL tree implementation for kernel use
33  *
34  * A complete description of AVL trees can be found in many CS textbooks.
35  *
36  * Here is a very brief overview. An AVL tree is a binary search tree that is
37  * almost perfectly balanced. By "almost" perfectly balanced, we mean that at
38  * any given node, the left and right subtrees are allowed to differ in height
39  * by at most 1 level.
40  *
41  * This relaxation from a perfectly balanced binary tree allows doing
42  * insertion and deletion relatively efficiently. Searching the tree is
43  * still a fast operation, roughly O(log(N)).
44  *
45  * The key to insertion and deletion is a set of tree manipulations called
46  * rotations, which bring unbalanced subtrees back into the semi-balanced state.
47  *
48  * This implementation of AVL trees has the following peculiarities:
49  *
50  *	- The AVL specific data structures are physically embedded as fields
51  *	  in the "using" data structures.  To maintain generality the code
52  *	  must constantly translate between "avl_node_t *" and containing
53  *	  data structure "void *"s by adding/subtracting the avl_offset.
54  *
55  *	- Since the AVL data is always embedded in other structures, there is
56  *	  no locking or memory allocation in the AVL routines. This must be
57  *	  provided for by the enclosing data structure's semantics. Typically,
58  *	  avl_insert()/_add()/_remove()/avl_insert_here() require some kind of
59  *	  exclusive write lock. Other operations require a read lock.
60  *
61  *      - The implementation uses iteration instead of explicit recursion,
62  *	  since it is intended to run on limited size kernel stacks. Since
63  *	  there is no recursion stack present to move "up" in the tree,
64  *	  there is an explicit "parent" link in the avl_node_t.
65  *
66  *      - The left/right children pointers of a node are in an array.
67  *	  In the code, variables (instead of constants) are used to represent
68  *	  left and right indices.  The implementation is written as if it only
69  *	  dealt with left handed manipulations.  By changing the value assigned
70  *	  to "left", the code also works for right handed trees.  The
71  *	  following variables/terms are frequently used:
72  *
73  *		int left;	// 0 when dealing with left children,
74  *				// 1 for dealing with right children
75  *
76  *		int left_heavy;	// -1 when left subtree is taller at some node,
77  *				// +1 when right subtree is taller
78  *
79  *		int right;	// will be the opposite of left (0 or 1)
80  *		int right_heavy;// will be the opposite of left_heavy (-1 or 1)
81  *
82  *		int direction;  // 0 for "<" (ie. left child); 1 for ">" (right)
83  *
84  *	  Though it is a little more confusing to read the code, the approach
85  *	  allows using half as much code (and hence cache footprint) for tree
86  *	  manipulations and eliminates many conditional branches.
87  *
88  *	- The avl_index_t is an opaque "cookie" used to find nodes at or
89  *	  adjacent to where a new value would be inserted in the tree. The value
90  *	  is a modified "avl_node_t *".  The bottom bit (normally 0 for a
91  *	  pointer) is set to indicate if that the new node has a value greater
92  *	  than the value of the indicated "avl_node_t *".
93  *
94  * Note - in addition to userland (e.g. libavl and libutil) and the kernel
95  * (e.g. genunix), avl.c is compiled into ld.so and kmdb's genunix module,
96  * which each have their own compilation environments and subsequent
97  * requirements. Each of these environments must be considered when adding
98  * dependencies from avl.c.
99  *
100  * Link to Illumos.org for more information on avl function:
101  * [1] https://illumos.org/man/9f/avl
102  */
103 
104 #include <sys/types.h>
105 #include <sys/param.h>
106 #include <sys/debug.h>
107 #include <sys/avl.h>
108 #include <sys/cmn_err.h>
109 #include <sys/mod.h>
110 
111 #ifndef _KERNEL
112 #include <string.h>
113 #endif
114 
115 /*
116  * Walk from one node to the previous valued node (ie. an infix walk
117  * towards the left). At any given node we do one of 2 things:
118  *
119  * - If there is a left child, go to it, then to it's rightmost descendant.
120  *
121  * - otherwise we return through parent nodes until we've come from a right
122  *   child.
123  *
124  * Return Value:
125  * NULL - if at the end of the nodes
126  * otherwise next node
127  */
128 void *
129 avl_walk(avl_tree_t *tree, void	*oldnode, int left)
130 {
131 	size_t off = tree->avl_offset;
132 	avl_node_t *node = AVL_DATA2NODE(oldnode, off);
133 	int right = 1 - left;
134 	int was_child;
135 
136 
137 	/*
138 	 * nowhere to walk to if tree is empty
139 	 */
140 	if (node == NULL)
141 		return (NULL);
142 
143 	/*
144 	 * Visit the previous valued node. There are two possibilities:
145 	 *
146 	 * If this node has a left child, go down one left, then all
147 	 * the way right.
148 	 */
149 	if (node->avl_child[left] != NULL) {
150 		for (node = node->avl_child[left];
151 		    node->avl_child[right] != NULL;
152 		    node = node->avl_child[right])
153 			;
154 	/*
155 	 * Otherwise, return through left children as far as we can.
156 	 */
157 	} else {
158 		for (;;) {
159 			was_child = AVL_XCHILD(node);
160 			node = AVL_XPARENT(node);
161 			if (node == NULL)
162 				return (NULL);
163 			if (was_child == right)
164 				break;
165 		}
166 	}
167 
168 	return (AVL_NODE2DATA(node, off));
169 }
170 
171 /*
172  * Return the lowest valued node in a tree or NULL.
173  * (leftmost child from root of tree)
174  */
175 void *
176 avl_first(avl_tree_t *tree)
177 {
178 	avl_node_t *node;
179 	avl_node_t *prev = NULL;
180 	size_t off = tree->avl_offset;
181 
182 	for (node = tree->avl_root; node != NULL; node = node->avl_child[0])
183 		prev = node;
184 
185 	if (prev != NULL)
186 		return (AVL_NODE2DATA(prev, off));
187 	return (NULL);
188 }
189 
190 /*
191  * Return the highest valued node in a tree or NULL.
192  * (rightmost child from root of tree)
193  */
194 void *
195 avl_last(avl_tree_t *tree)
196 {
197 	avl_node_t *node;
198 	avl_node_t *prev = NULL;
199 	size_t off = tree->avl_offset;
200 
201 	for (node = tree->avl_root; node != NULL; node = node->avl_child[1])
202 		prev = node;
203 
204 	if (prev != NULL)
205 		return (AVL_NODE2DATA(prev, off));
206 	return (NULL);
207 }
208 
209 /*
210  * Access the node immediately before or after an insertion point.
211  *
212  * "avl_index_t" is a (avl_node_t *) with the bottom bit indicating a child
213  *
214  * Return value:
215  *	NULL: no node in the given direction
216  *	"void *"  of the found tree node
217  */
218 void *
219 avl_nearest(avl_tree_t *tree, avl_index_t where, int direction)
220 {
221 	int child = AVL_INDEX2CHILD(where);
222 	avl_node_t *node = AVL_INDEX2NODE(where);
223 	void *data;
224 	size_t off = tree->avl_offset;
225 
226 	if (node == NULL) {
227 		ASSERT(tree->avl_root == NULL);
228 		return (NULL);
229 	}
230 	data = AVL_NODE2DATA(node, off);
231 	if (child != direction)
232 		return (data);
233 
234 	return (avl_walk(tree, data, direction));
235 }
236 
237 
238 /*
239  * Search for the node which contains "value".  The algorithm is a
240  * simple binary tree search.
241  *
242  * return value:
243  *	NULL: the value is not in the AVL tree
244  *		*where (if not NULL)  is set to indicate the insertion point
245  *	"void *"  of the found tree node
246  */
247 void *
248 avl_find(avl_tree_t *tree, const void *value, avl_index_t *where)
249 {
250 	avl_node_t *node;
251 	avl_node_t *prev = NULL;
252 	int child = 0;
253 	int diff;
254 	size_t off = tree->avl_offset;
255 
256 	for (node = tree->avl_root; node != NULL;
257 	    node = node->avl_child[child]) {
258 
259 		prev = node;
260 
261 		diff = tree->avl_compar(value, AVL_NODE2DATA(node, off));
262 		ASSERT(-1 <= diff && diff <= 1);
263 		if (diff == 0) {
264 #ifdef ZFS_DEBUG
265 			if (where != NULL)
266 				*where = 0;
267 #endif
268 			return (AVL_NODE2DATA(node, off));
269 		}
270 		child = (diff > 0);
271 	}
272 
273 	if (where != NULL)
274 		*where = AVL_MKINDEX(prev, child);
275 
276 	return (NULL);
277 }
278 
279 
280 /*
281  * Perform a rotation to restore balance at the subtree given by depth.
282  *
283  * This routine is used by both insertion and deletion. The return value
284  * indicates:
285  *	 0 : subtree did not change height
286  *	!0 : subtree was reduced in height
287  *
288  * The code is written as if handling left rotations, right rotations are
289  * symmetric and handled by swapping values of variables right/left[_heavy]
290  *
291  * On input balance is the "new" balance at "node". This value is either
292  * -2 or +2.
293  */
294 static int
295 avl_rotation(avl_tree_t *tree, avl_node_t *node, int balance)
296 {
297 	int left = !(balance < 0);	/* when balance = -2, left will be 0 */
298 	int right = 1 - left;
299 	int left_heavy = balance >> 1;
300 	int right_heavy = -left_heavy;
301 	avl_node_t *parent = AVL_XPARENT(node);
302 	avl_node_t *child = node->avl_child[left];
303 	avl_node_t *cright;
304 	avl_node_t *gchild;
305 	avl_node_t *gright;
306 	avl_node_t *gleft;
307 	int which_child = AVL_XCHILD(node);
308 	int child_bal = AVL_XBALANCE(child);
309 
310 	/*
311 	 * case 1 : node is overly left heavy, the left child is balanced or
312 	 * also left heavy. This requires the following rotation.
313 	 *
314 	 *                   (node bal:-2)
315 	 *                    /           \
316 	 *                   /             \
317 	 *              (child bal:0 or -1)
318 	 *              /    \
319 	 *             /      \
320 	 *                     cright
321 	 *
322 	 * becomes:
323 	 *
324 	 *              (child bal:1 or 0)
325 	 *              /        \
326 	 *             /          \
327 	 *                        (node bal:-1 or 0)
328 	 *                         /     \
329 	 *                        /       \
330 	 *                     cright
331 	 *
332 	 * we detect this situation by noting that child's balance is not
333 	 * right_heavy.
334 	 */
335 	if (child_bal != right_heavy) {
336 
337 		/*
338 		 * compute new balance of nodes
339 		 *
340 		 * If child used to be left heavy (now balanced) we reduced
341 		 * the height of this sub-tree -- used in "return...;" below
342 		 */
343 		child_bal += right_heavy; /* adjust towards right */
344 
345 		/*
346 		 * move "cright" to be node's left child
347 		 */
348 		cright = child->avl_child[right];
349 		node->avl_child[left] = cright;
350 		if (cright != NULL) {
351 			AVL_SETPARENT(cright, node);
352 			AVL_SETCHILD(cright, left);
353 		}
354 
355 		/*
356 		 * move node to be child's right child
357 		 */
358 		child->avl_child[right] = node;
359 		AVL_SETBALANCE(node, -child_bal);
360 		AVL_SETCHILD(node, right);
361 		AVL_SETPARENT(node, child);
362 
363 		/*
364 		 * update the pointer into this subtree
365 		 */
366 		AVL_SETBALANCE(child, child_bal);
367 		AVL_SETCHILD(child, which_child);
368 		AVL_SETPARENT(child, parent);
369 		if (parent != NULL)
370 			parent->avl_child[which_child] = child;
371 		else
372 			tree->avl_root = child;
373 
374 		return (child_bal == 0);
375 	}
376 
377 	/*
378 	 * case 2 : When node is left heavy, but child is right heavy we use
379 	 * a different rotation.
380 	 *
381 	 *                   (node b:-2)
382 	 *                    /   \
383 	 *                   /     \
384 	 *                  /       \
385 	 *             (child b:+1)
386 	 *              /     \
387 	 *             /       \
388 	 *                   (gchild b: != 0)
389 	 *                     /  \
390 	 *                    /    \
391 	 *                 gleft   gright
392 	 *
393 	 * becomes:
394 	 *
395 	 *              (gchild b:0)
396 	 *              /       \
397 	 *             /         \
398 	 *            /           \
399 	 *        (child b:?)   (node b:?)
400 	 *         /  \          /   \
401 	 *        /    \        /     \
402 	 *            gleft   gright
403 	 *
404 	 * computing the new balances is more complicated. As an example:
405 	 *	 if gchild was right_heavy, then child is now left heavy
406 	 *		else it is balanced
407 	 */
408 	gchild = child->avl_child[right];
409 	gleft = gchild->avl_child[left];
410 	gright = gchild->avl_child[right];
411 
412 	/*
413 	 * move gright to left child of node and
414 	 *
415 	 * move gleft to right child of node
416 	 */
417 	node->avl_child[left] = gright;
418 	if (gright != NULL) {
419 		AVL_SETPARENT(gright, node);
420 		AVL_SETCHILD(gright, left);
421 	}
422 
423 	child->avl_child[right] = gleft;
424 	if (gleft != NULL) {
425 		AVL_SETPARENT(gleft, child);
426 		AVL_SETCHILD(gleft, right);
427 	}
428 
429 	/*
430 	 * move child to left child of gchild and
431 	 *
432 	 * move node to right child of gchild and
433 	 *
434 	 * fixup parent of all this to point to gchild
435 	 */
436 	balance = AVL_XBALANCE(gchild);
437 	gchild->avl_child[left] = child;
438 	AVL_SETBALANCE(child, (balance == right_heavy ? left_heavy : 0));
439 	AVL_SETPARENT(child, gchild);
440 	AVL_SETCHILD(child, left);
441 
442 	gchild->avl_child[right] = node;
443 	AVL_SETBALANCE(node, (balance == left_heavy ? right_heavy : 0));
444 	AVL_SETPARENT(node, gchild);
445 	AVL_SETCHILD(node, right);
446 
447 	AVL_SETBALANCE(gchild, 0);
448 	AVL_SETPARENT(gchild, parent);
449 	AVL_SETCHILD(gchild, which_child);
450 	if (parent != NULL)
451 		parent->avl_child[which_child] = gchild;
452 	else
453 		tree->avl_root = gchild;
454 
455 	return (1);	/* the new tree is always shorter */
456 }
457 
458 
459 /*
460  * Insert a new node into an AVL tree at the specified (from avl_find()) place.
461  *
462  * Newly inserted nodes are always leaf nodes in the tree, since avl_find()
463  * searches out to the leaf positions.  The avl_index_t indicates the node
464  * which will be the parent of the new node.
465  *
466  * After the node is inserted, a single rotation further up the tree may
467  * be necessary to maintain an acceptable AVL balance.
468  */
469 void
470 avl_insert(avl_tree_t *tree, void *new_data, avl_index_t where)
471 {
472 	avl_node_t *node;
473 	avl_node_t *parent = AVL_INDEX2NODE(where);
474 	int old_balance;
475 	int new_balance;
476 	int which_child = AVL_INDEX2CHILD(where);
477 	size_t off = tree->avl_offset;
478 
479 #ifdef _LP64
480 	ASSERT(((uintptr_t)new_data & 0x7) == 0);
481 #endif
482 
483 	node = AVL_DATA2NODE(new_data, off);
484 
485 	/*
486 	 * First, add the node to the tree at the indicated position.
487 	 */
488 	++tree->avl_numnodes;
489 
490 	node->avl_child[0] = NULL;
491 	node->avl_child[1] = NULL;
492 
493 	AVL_SETCHILD(node, which_child);
494 	AVL_SETBALANCE(node, 0);
495 	AVL_SETPARENT(node, parent);
496 	if (parent != NULL) {
497 		ASSERT(parent->avl_child[which_child] == NULL);
498 		parent->avl_child[which_child] = node;
499 	} else {
500 		ASSERT(tree->avl_root == NULL);
501 		tree->avl_root = node;
502 	}
503 	/*
504 	 * Now, back up the tree modifying the balance of all nodes above the
505 	 * insertion point. If we get to a highly unbalanced ancestor, we
506 	 * need to do a rotation.  If we back out of the tree we are done.
507 	 * If we brought any subtree into perfect balance (0), we are also done.
508 	 */
509 	for (;;) {
510 		node = parent;
511 		if (node == NULL)
512 			return;
513 
514 		/*
515 		 * Compute the new balance
516 		 */
517 		old_balance = AVL_XBALANCE(node);
518 		new_balance = old_balance + (which_child ? 1 : -1);
519 
520 		/*
521 		 * If we introduced equal balance, then we are done immediately
522 		 */
523 		if (new_balance == 0) {
524 			AVL_SETBALANCE(node, 0);
525 			return;
526 		}
527 
528 		/*
529 		 * If both old and new are not zero we went
530 		 * from -1 to -2 balance, do a rotation.
531 		 */
532 		if (old_balance != 0)
533 			break;
534 
535 		AVL_SETBALANCE(node, new_balance);
536 		parent = AVL_XPARENT(node);
537 		which_child = AVL_XCHILD(node);
538 	}
539 
540 	/*
541 	 * perform a rotation to fix the tree and return
542 	 */
543 	(void) avl_rotation(tree, node, new_balance);
544 }
545 
546 /*
547  * Insert "new_data" in "tree" in the given "direction" either after or
548  * before (AVL_AFTER, AVL_BEFORE) the data "here".
549  *
550  * Insertions can only be done at empty leaf points in the tree, therefore
551  * if the given child of the node is already present we move to either
552  * the AVL_PREV or AVL_NEXT and reverse the insertion direction. Since
553  * every other node in the tree is a leaf, this always works.
554  *
555  * To help developers using this interface, we assert that the new node
556  * is correctly ordered at every step of the way in DEBUG kernels.
557  */
558 void
559 avl_insert_here(
560 	avl_tree_t *tree,
561 	void *new_data,
562 	void *here,
563 	int direction)
564 {
565 	avl_node_t *node;
566 	int child = direction;	/* rely on AVL_BEFORE == 0, AVL_AFTER == 1 */
567 #ifdef ZFS_DEBUG
568 	int diff;
569 #endif
570 
571 	ASSERT(tree != NULL);
572 	ASSERT(new_data != NULL);
573 	ASSERT(here != NULL);
574 	ASSERT(direction == AVL_BEFORE || direction == AVL_AFTER);
575 
576 	/*
577 	 * If corresponding child of node is not NULL, go to the neighboring
578 	 * node and reverse the insertion direction.
579 	 */
580 	node = AVL_DATA2NODE(here, tree->avl_offset);
581 
582 #ifdef ZFS_DEBUG
583 	diff = tree->avl_compar(new_data, here);
584 	ASSERT(-1 <= diff && diff <= 1);
585 	ASSERT(diff != 0);
586 	ASSERT(diff > 0 ? child == 1 : child == 0);
587 #endif
588 
589 	if (node->avl_child[child] != NULL) {
590 		node = node->avl_child[child];
591 		child = 1 - child;
592 		while (node->avl_child[child] != NULL) {
593 #ifdef ZFS_DEBUG
594 			diff = tree->avl_compar(new_data,
595 			    AVL_NODE2DATA(node, tree->avl_offset));
596 			ASSERT(-1 <= diff && diff <= 1);
597 			ASSERT(diff != 0);
598 			ASSERT(diff > 0 ? child == 1 : child == 0);
599 #endif
600 			node = node->avl_child[child];
601 		}
602 #ifdef ZFS_DEBUG
603 		diff = tree->avl_compar(new_data,
604 		    AVL_NODE2DATA(node, tree->avl_offset));
605 		ASSERT(-1 <= diff && diff <= 1);
606 		ASSERT(diff != 0);
607 		ASSERT(diff > 0 ? child == 1 : child == 0);
608 #endif
609 	}
610 	ASSERT(node->avl_child[child] == NULL);
611 
612 	avl_insert(tree, new_data, AVL_MKINDEX(node, child));
613 }
614 
615 /*
616  * Add a new node to an AVL tree.  Strictly enforce that no duplicates can
617  * be added to the tree with a VERIFY which is enabled for non-DEBUG builds.
618  */
619 void
620 avl_add(avl_tree_t *tree, void *new_node)
621 {
622 	avl_index_t where = 0;
623 
624 	VERIFY(avl_find(tree, new_node, &where) == NULL);
625 
626 	avl_insert(tree, new_node, where);
627 }
628 
629 /*
630  * Delete a node from the AVL tree.  Deletion is similar to insertion, but
631  * with 2 complications.
632  *
633  * First, we may be deleting an interior node. Consider the following subtree:
634  *
635  *     d           c            c
636  *    / \         / \          / \
637  *   b   e       b   e        b   e
638  *  / \	        / \          /
639  * a   c       a            a
640  *
641  * When we are deleting node (d), we find and bring up an adjacent valued leaf
642  * node, say (c), to take the interior node's place. In the code this is
643  * handled by temporarily swapping (d) and (c) in the tree and then using
644  * common code to delete (d) from the leaf position.
645  *
646  * Secondly, an interior deletion from a deep tree may require more than one
647  * rotation to fix the balance. This is handled by moving up the tree through
648  * parents and applying rotations as needed. The return value from
649  * avl_rotation() is used to detect when a subtree did not change overall
650  * height due to a rotation.
651  */
652 void
653 avl_remove(avl_tree_t *tree, void *data)
654 {
655 	avl_node_t *delete;
656 	avl_node_t *parent;
657 	avl_node_t *node;
658 	avl_node_t tmp;
659 	int old_balance;
660 	int new_balance;
661 	int left;
662 	int right;
663 	int which_child;
664 	size_t off = tree->avl_offset;
665 
666 	delete = AVL_DATA2NODE(data, off);
667 
668 	/*
669 	 * Deletion is easiest with a node that has at most 1 child.
670 	 * We swap a node with 2 children with a sequentially valued
671 	 * neighbor node. That node will have at most 1 child. Note this
672 	 * has no effect on the ordering of the remaining nodes.
673 	 *
674 	 * As an optimization, we choose the greater neighbor if the tree
675 	 * is right heavy, otherwise the left neighbor. This reduces the
676 	 * number of rotations needed.
677 	 */
678 	if (delete->avl_child[0] != NULL && delete->avl_child[1] != NULL) {
679 
680 		/*
681 		 * choose node to swap from whichever side is taller
682 		 */
683 		old_balance = AVL_XBALANCE(delete);
684 		left = (old_balance > 0);
685 		right = 1 - left;
686 
687 		/*
688 		 * get to the previous value'd node
689 		 * (down 1 left, as far as possible right)
690 		 */
691 		for (node = delete->avl_child[left];
692 		    node->avl_child[right] != NULL;
693 		    node = node->avl_child[right])
694 			;
695 
696 		/*
697 		 * create a temp placeholder for 'node'
698 		 * move 'node' to delete's spot in the tree
699 		 */
700 		tmp = *node;
701 
702 		memcpy(node, delete, sizeof (*node));
703 		if (node->avl_child[left] == node)
704 			node->avl_child[left] = &tmp;
705 
706 		parent = AVL_XPARENT(node);
707 		if (parent != NULL)
708 			parent->avl_child[AVL_XCHILD(node)] = node;
709 		else
710 			tree->avl_root = node;
711 		AVL_SETPARENT(node->avl_child[left], node);
712 		AVL_SETPARENT(node->avl_child[right], node);
713 
714 		/*
715 		 * Put tmp where node used to be (just temporary).
716 		 * It always has a parent and at most 1 child.
717 		 */
718 		delete = &tmp;
719 		parent = AVL_XPARENT(delete);
720 		parent->avl_child[AVL_XCHILD(delete)] = delete;
721 		which_child = (delete->avl_child[1] != 0);
722 		if (delete->avl_child[which_child] != NULL)
723 			AVL_SETPARENT(delete->avl_child[which_child], delete);
724 	}
725 
726 
727 	/*
728 	 * Here we know "delete" is at least partially a leaf node. It can
729 	 * be easily removed from the tree.
730 	 */
731 	ASSERT(tree->avl_numnodes > 0);
732 	--tree->avl_numnodes;
733 	parent = AVL_XPARENT(delete);
734 	which_child = AVL_XCHILD(delete);
735 	if (delete->avl_child[0] != NULL)
736 		node = delete->avl_child[0];
737 	else
738 		node = delete->avl_child[1];
739 
740 	/*
741 	 * Connect parent directly to node (leaving out delete).
742 	 */
743 	if (node != NULL) {
744 		AVL_SETPARENT(node, parent);
745 		AVL_SETCHILD(node, which_child);
746 	}
747 	if (parent == NULL) {
748 		tree->avl_root = node;
749 		return;
750 	}
751 	parent->avl_child[which_child] = node;
752 
753 
754 	/*
755 	 * Since the subtree is now shorter, begin adjusting parent balances
756 	 * and performing any needed rotations.
757 	 */
758 	do {
759 
760 		/*
761 		 * Move up the tree and adjust the balance
762 		 *
763 		 * Capture the parent and which_child values for the next
764 		 * iteration before any rotations occur.
765 		 */
766 		node = parent;
767 		old_balance = AVL_XBALANCE(node);
768 		new_balance = old_balance - (which_child ? 1 : -1);
769 		parent = AVL_XPARENT(node);
770 		which_child = AVL_XCHILD(node);
771 
772 		/*
773 		 * If a node was in perfect balance but isn't anymore then
774 		 * we can stop, since the height didn't change above this point
775 		 * due to a deletion.
776 		 */
777 		if (old_balance == 0) {
778 			AVL_SETBALANCE(node, new_balance);
779 			break;
780 		}
781 
782 		/*
783 		 * If the new balance is zero, we don't need to rotate
784 		 * else
785 		 * need a rotation to fix the balance.
786 		 * If the rotation doesn't change the height
787 		 * of the sub-tree we have finished adjusting.
788 		 */
789 		if (new_balance == 0)
790 			AVL_SETBALANCE(node, new_balance);
791 		else if (!avl_rotation(tree, node, new_balance))
792 			break;
793 	} while (parent != NULL);
794 }
795 
796 #define	AVL_REINSERT(tree, obj)		\
797 	avl_remove((tree), (obj));	\
798 	avl_add((tree), (obj))
799 
800 boolean_t
801 avl_update_lt(avl_tree_t *t, void *obj)
802 {
803 	void *neighbor;
804 
805 	ASSERT(((neighbor = AVL_NEXT(t, obj)) == NULL) ||
806 	    (t->avl_compar(obj, neighbor) <= 0));
807 
808 	neighbor = AVL_PREV(t, obj);
809 	if ((neighbor != NULL) && (t->avl_compar(obj, neighbor) < 0)) {
810 		AVL_REINSERT(t, obj);
811 		return (B_TRUE);
812 	}
813 
814 	return (B_FALSE);
815 }
816 
817 boolean_t
818 avl_update_gt(avl_tree_t *t, void *obj)
819 {
820 	void *neighbor;
821 
822 	ASSERT(((neighbor = AVL_PREV(t, obj)) == NULL) ||
823 	    (t->avl_compar(obj, neighbor) >= 0));
824 
825 	neighbor = AVL_NEXT(t, obj);
826 	if ((neighbor != NULL) && (t->avl_compar(obj, neighbor) > 0)) {
827 		AVL_REINSERT(t, obj);
828 		return (B_TRUE);
829 	}
830 
831 	return (B_FALSE);
832 }
833 
834 boolean_t
835 avl_update(avl_tree_t *t, void *obj)
836 {
837 	void *neighbor;
838 
839 	neighbor = AVL_PREV(t, obj);
840 	if ((neighbor != NULL) && (t->avl_compar(obj, neighbor) < 0)) {
841 		AVL_REINSERT(t, obj);
842 		return (B_TRUE);
843 	}
844 
845 	neighbor = AVL_NEXT(t, obj);
846 	if ((neighbor != NULL) && (t->avl_compar(obj, neighbor) > 0)) {
847 		AVL_REINSERT(t, obj);
848 		return (B_TRUE);
849 	}
850 
851 	return (B_FALSE);
852 }
853 
854 void
855 avl_swap(avl_tree_t *tree1, avl_tree_t *tree2)
856 {
857 	avl_node_t *temp_node;
858 	ulong_t temp_numnodes;
859 
860 	ASSERT3P(tree1->avl_compar, ==, tree2->avl_compar);
861 	ASSERT3U(tree1->avl_offset, ==, tree2->avl_offset);
862 
863 	temp_node = tree1->avl_root;
864 	temp_numnodes = tree1->avl_numnodes;
865 	tree1->avl_root = tree2->avl_root;
866 	tree1->avl_numnodes = tree2->avl_numnodes;
867 	tree2->avl_root = temp_node;
868 	tree2->avl_numnodes = temp_numnodes;
869 }
870 
871 /*
872  * initialize a new AVL tree
873  */
874 void
875 avl_create(avl_tree_t *tree, int (*compar) (const void *, const void *),
876     size_t size, size_t offset)
877 {
878 	ASSERT(tree);
879 	ASSERT(compar);
880 	ASSERT(size > 0);
881 	ASSERT(size >= offset + sizeof (avl_node_t));
882 #ifdef _LP64
883 	ASSERT((offset & 0x7) == 0);
884 #endif
885 
886 	tree->avl_compar = compar;
887 	tree->avl_root = NULL;
888 	tree->avl_numnodes = 0;
889 	tree->avl_offset = offset;
890 }
891 
892 /*
893  * Delete a tree.
894  */
895 void
896 avl_destroy(avl_tree_t *tree)
897 {
898 	ASSERT(tree);
899 	ASSERT(tree->avl_numnodes == 0);
900 	ASSERT(tree->avl_root == NULL);
901 }
902 
903 
904 /*
905  * Return the number of nodes in an AVL tree.
906  */
907 ulong_t
908 avl_numnodes(avl_tree_t *tree)
909 {
910 	ASSERT(tree);
911 	return (tree->avl_numnodes);
912 }
913 
914 boolean_t
915 avl_is_empty(avl_tree_t *tree)
916 {
917 	ASSERT(tree);
918 	return (tree->avl_numnodes == 0);
919 }
920 
921 #define	CHILDBIT	(1L)
922 
923 /*
924  * Post-order tree walk used to visit all tree nodes and destroy the tree
925  * in post order. This is used for removing all the nodes from a tree without
926  * paying any cost for rebalancing it.
927  *
928  * example:
929  *
930  *	void *cookie = NULL;
931  *	my_data_t *node;
932  *
933  *	while ((node = avl_destroy_nodes(tree, &cookie)) != NULL)
934  *		free(node);
935  *	avl_destroy(tree);
936  *
937  * The cookie is really an avl_node_t to the current node's parent and
938  * an indication of which child you looked at last.
939  *
940  * On input, a cookie value of CHILDBIT indicates the tree is done.
941  */
942 void *
943 avl_destroy_nodes(avl_tree_t *tree, void **cookie)
944 {
945 	avl_node_t	*node;
946 	avl_node_t	*parent;
947 	int		child;
948 	void		*first;
949 	size_t		off = tree->avl_offset;
950 
951 	/*
952 	 * Initial calls go to the first node or it's right descendant.
953 	 */
954 	if (*cookie == NULL) {
955 		first = avl_first(tree);
956 
957 		/*
958 		 * deal with an empty tree
959 		 */
960 		if (first == NULL) {
961 			*cookie = (void *)CHILDBIT;
962 			return (NULL);
963 		}
964 
965 		node = AVL_DATA2NODE(first, off);
966 		parent = AVL_XPARENT(node);
967 		goto check_right_side;
968 	}
969 
970 	/*
971 	 * If there is no parent to return to we are done.
972 	 */
973 	parent = (avl_node_t *)((uintptr_t)(*cookie) & ~CHILDBIT);
974 	if (parent == NULL) {
975 		if (tree->avl_root != NULL) {
976 			ASSERT(tree->avl_numnodes == 1);
977 			tree->avl_root = NULL;
978 			tree->avl_numnodes = 0;
979 		}
980 		return (NULL);
981 	}
982 
983 	/*
984 	 * Remove the child pointer we just visited from the parent and tree.
985 	 */
986 	child = (uintptr_t)(*cookie) & CHILDBIT;
987 	parent->avl_child[child] = NULL;
988 	ASSERT(tree->avl_numnodes > 1);
989 	--tree->avl_numnodes;
990 
991 	/*
992 	 * If we just removed a right child or there isn't one, go up to parent.
993 	 */
994 	if (child == 1 || parent->avl_child[1] == NULL) {
995 		node = parent;
996 		parent = AVL_XPARENT(parent);
997 		goto done;
998 	}
999 
1000 	/*
1001 	 * Do parent's right child, then leftmost descendent.
1002 	 */
1003 	node = parent->avl_child[1];
1004 	while (node->avl_child[0] != NULL) {
1005 		parent = node;
1006 		node = node->avl_child[0];
1007 	}
1008 
1009 	/*
1010 	 * If here, we moved to a left child. It may have one
1011 	 * child on the right (when balance == +1).
1012 	 */
1013 check_right_side:
1014 	if (node->avl_child[1] != NULL) {
1015 		ASSERT(AVL_XBALANCE(node) == 1);
1016 		parent = node;
1017 		node = node->avl_child[1];
1018 		ASSERT(node->avl_child[0] == NULL &&
1019 		    node->avl_child[1] == NULL);
1020 	} else {
1021 		ASSERT(AVL_XBALANCE(node) <= 0);
1022 	}
1023 
1024 done:
1025 	if (parent == NULL) {
1026 		*cookie = (void *)CHILDBIT;
1027 		ASSERT(node == tree->avl_root);
1028 	} else {
1029 		*cookie = (void *)((uintptr_t)parent | AVL_XCHILD(node));
1030 	}
1031 
1032 	return (AVL_NODE2DATA(node, off));
1033 }
1034 
1035 EXPORT_SYMBOL(avl_create);
1036 EXPORT_SYMBOL(avl_find);
1037 EXPORT_SYMBOL(avl_insert);
1038 EXPORT_SYMBOL(avl_insert_here);
1039 EXPORT_SYMBOL(avl_walk);
1040 EXPORT_SYMBOL(avl_first);
1041 EXPORT_SYMBOL(avl_last);
1042 EXPORT_SYMBOL(avl_nearest);
1043 EXPORT_SYMBOL(avl_add);
1044 EXPORT_SYMBOL(avl_swap);
1045 EXPORT_SYMBOL(avl_is_empty);
1046 EXPORT_SYMBOL(avl_remove);
1047 EXPORT_SYMBOL(avl_numnodes);
1048 EXPORT_SYMBOL(avl_destroy_nodes);
1049 EXPORT_SYMBOL(avl_destroy);
1050 EXPORT_SYMBOL(avl_update_lt);
1051 EXPORT_SYMBOL(avl_update_gt);
1052 EXPORT_SYMBOL(avl_update);
1053