xref: /minix/external/bsd/bind/dist/lib/dns/rbt.c (revision fb9c64b2)
1 /*	$NetBSD: rbt.c,v 1.10 2015/07/08 17:28:59 christos Exp $	*/
2 
3 /*
4  * Copyright (C) 2004, 2005, 2007-2009, 2011-2015  Internet Systems Consortium, Inc. ("ISC")
5  * Copyright (C) 1999-2003  Internet Software Consortium.
6  *
7  * Permission to use, copy, modify, and/or distribute this software for any
8  * purpose with or without fee is hereby granted, provided that the above
9  * copyright notice and this permission notice appear in all copies.
10  *
11  * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
12  * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
13  * AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
14  * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
15  * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
16  * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
17  * PERFORMANCE OF THIS SOFTWARE.
18  */
19 
20 /*! \file */
21 
22 /* Principal Authors: DCL */
23 
24 #include <config.h>
25 
26 #include <sys/stat.h>
27 #ifdef HAVE_INTTYPES_H
28 #include <inttypes.h> /* uintptr_t */
29 #endif
30 
31 #include <isc/crc64.h>
32 #include <isc/file.h>
33 #include <isc/hex.h>
34 #include <isc/mem.h>
35 #include <isc/platform.h>
36 #include <isc/print.h>
37 #include <isc/refcount.h>
38 #include <isc/socket.h>
39 #include <isc/stdio.h>
40 #include <isc/string.h>
41 #include <isc/util.h>
42 
43 /*%
44  * This define is so dns/name.h (included by dns/fixedname.h) uses more
45  * efficient macro calls instead of functions for a few operations.
46  */
47 #define DNS_NAME_USEINLINE 1
48 
49 #include <dns/fixedname.h>
50 #include <dns/log.h>
51 #include <dns/rbt.h>
52 #include <dns/result.h>
53 #include <dns/version.h>
54 
55 #include <unistd.h>
56 
57 #define CHECK(x) \
58 	do { \
59 		result = (x); \
60 		if (result != ISC_R_SUCCESS) \
61 			goto cleanup; \
62 	} while (/*CONSTCOND*/0)
63 
64 #define RBT_MAGIC               ISC_MAGIC('R', 'B', 'T', '+')
65 #define VALID_RBT(rbt)          ISC_MAGIC_VALID(rbt, RBT_MAGIC)
66 
67 /*
68  * XXXDCL Since parent pointers were added in again, I could remove all of the
69  * chain junk, and replace with dns_rbt_firstnode, _previousnode, _nextnode,
70  * _lastnode.  This would involve pretty major change to the API.
71  */
72 #define CHAIN_MAGIC             ISC_MAGIC('0', '-', '0', '-')
73 #define VALID_CHAIN(chain)      ISC_MAGIC_VALID(chain, CHAIN_MAGIC)
74 
75 #define RBT_HASH_SIZE           64
76 
77 #ifdef RBT_MEM_TEST
78 #undef RBT_HASH_SIZE
79 #define RBT_HASH_SIZE 2 /*%< To give the reallocation code a workout. */
80 #endif
81 
82 struct dns_rbt {
83 	unsigned int            magic;
84 	isc_mem_t *             mctx;
85 	dns_rbtnode_t *         root;
86 	void                    (*data_deleter)(void *, void *);
87 	void *                  deleter_arg;
88 	unsigned int            nodecount;
89 	unsigned int            hashsize;
90 	dns_rbtnode_t **        hashtable;
91 	void *                  mmap_location;
92 };
93 
94 #define RED 0
95 #define BLACK 1
96 
97 /*
98  * This is the header for map-format RBT images.  It is populated,
99  * and then written, as the LAST thing done to the file before returning.
100  * Writing this last (with zeros in the header area initially) will ensure
101  * that the header is only valid when the RBT image is also valid.
102  */
103 typedef struct file_header file_header_t;
104 
105 /* Pad to 32 bytes */
106 static char FILE_VERSION[32] = "\0";
107 
108 /* Header length, always the same size regardless of structure size */
109 #define HEADER_LENGTH		1024
110 
111 struct file_header {
112 	char version1[32];
113 	isc_uint64_t first_node_offset;	/* usually 1024 */
114 	/*
115 	 * information about the system on which the map file was generated
116 	 * will be used to tell if we can load the map file or not
117 	 */
118 	isc_uint32_t ptrsize;
119 	unsigned int bigendian:1;	/* big or little endian system */
120 	unsigned int rdataset_fixed:1;	/* compiled with --enable-rrset-fixed */
121 	unsigned int nodecount;		/* shadow from rbt structure */
122 	isc_uint64_t crc;
123 	char version2[32];  		/* repeated; must match version1 */
124 };
125 
126 /*
127  * The following declarations are for the serialization of an RBT:
128  *
129  * step one: write out a zeroed header of 1024 bytes
130  * step two: walk the tree in a depth-first, left-right-down order, writing
131  * out the nodes, reserving space as we go, correcting addresses to point
132  * at the proper offset in the file, and setting a flag for each pointer to
133  * indicate that it is a reference to a location in the file, rather than in
134  * memory.
135  * step three: write out the header, adding the information that will be
136  * needed to re-create the tree object itself.
137  *
138  * The RBTDB object will do this three times, once for each of the three
139  * RBT objects it contains.
140  *
141  * Note: 'file' must point an actual open file that can be mmapped
142  * and fseeked, not to a pipe or stream
143  */
144 
145 static isc_result_t
146 dns_rbt_zero_header(FILE *file);
147 
148 static isc_result_t
149 write_header(FILE *file, dns_rbt_t *rbt, isc_uint64_t first_node_offset,
150 	     isc_uint64_t crc);
151 
152 static isc_result_t
153 serialize_node(FILE *file, dns_rbtnode_t *node, uintptr_t left,
154 	       uintptr_t right, uintptr_t down, uintptr_t parent,
155 	       uintptr_t data, isc_uint64_t *crc);
156 
157 static isc_result_t
158 serialize_nodes(FILE *file, dns_rbtnode_t *node, uintptr_t parent,
159 		dns_rbtdatawriter_t datawriter, void *writer_arg,
160 		uintptr_t *where, isc_uint64_t *crc);
161 /*
162  * The following functions allow you to get the actual address of a pointer
163  * without having to use an if statement to check to see if that address is
164  * relative or not
165  */
166 static inline dns_rbtnode_t *
167 getparent(dns_rbtnode_t *node, file_header_t *header) {
168 	char *adjusted_address = (char *)(node->parent);
169 	adjusted_address += node->parent_is_relative * (uintptr_t)header;
170 
171 	return ((dns_rbtnode_t *)adjusted_address);
172 }
173 
174 static inline dns_rbtnode_t *
175 getleft(dns_rbtnode_t *node, file_header_t *header) {
176 	char *adjusted_address = (char *)(node->left);
177 	adjusted_address += node->left_is_relative * (uintptr_t)header;
178 
179 	return ((dns_rbtnode_t *)adjusted_address);
180 }
181 
182 static inline dns_rbtnode_t *
183 getright(dns_rbtnode_t *node, file_header_t *header) {
184 	char *adjusted_address = (char *)(node->right);
185 	adjusted_address += node->right_is_relative * (uintptr_t)header;
186 
187 	return ((dns_rbtnode_t *)adjusted_address);
188 }
189 
190 static inline dns_rbtnode_t *
191 getdown(dns_rbtnode_t *node, file_header_t *header) {
192 	char *adjusted_address = (char *)(node->down);
193 	adjusted_address += node->down_is_relative * (uintptr_t)header;
194 
195 	return ((dns_rbtnode_t *)adjusted_address);
196 }
197 
198 static inline dns_rbtnode_t *
199 getdata(dns_rbtnode_t *node, file_header_t *header) {
200 	char *adjusted_address = (char *)(node->data);
201 	adjusted_address += node->data_is_relative * (uintptr_t)header;
202 
203 	return ((dns_rbtnode_t *)adjusted_address);
204 }
205 
206 /*%
207  * Elements of the rbtnode structure.
208  */
209 #define PARENT(node)            ((node)->parent)
210 #define LEFT(node)              ((node)->left)
211 #define RIGHT(node)             ((node)->right)
212 #define DOWN(node)              ((node)->down)
213 #define DATA(node)              ((node)->data)
214 #define IS_EMPTY(node)          ((node)->data == NULL)
215 #define HASHNEXT(node)          ((node)->hashnext)
216 #define HASHVAL(node)           ((node)->hashval)
217 #define COLOR(node)             ((node)->color)
218 #define NAMELEN(node)           ((node)->namelen)
219 #define OLDNAMELEN(node)        ((node)->oldnamelen)
220 #define OFFSETLEN(node)         ((node)->offsetlen)
221 #define ATTRS(node)             ((node)->attributes)
222 #define IS_ROOT(node)           ISC_TF((node)->is_root == 1)
223 #define FINDCALLBACK(node)      ISC_TF((node)->find_callback == 1)
224 
225 /*%
226  * Structure elements from the rbtdb.c, not
227  * used as part of the rbt.c algorithms.
228  */
229 #define DIRTY(node)     ((node)->dirty)
230 #define WILD(node)      ((node)->wild)
231 #define LOCKNUM(node)   ((node)->locknum)
232 
233 /*%
234  * The variable length stuff stored after the node has the following
235  * structure.
236  *
237  *	<name_data>{1..255}<oldoffsetlen>{1}<offsets>{1..128}
238  *
239  * <name_data> contains the name of the node when it was created.
240  * <oldoffsetlen> contains the length of <offsets> when the node was created.
241  * <offsets> contains the offets into name for each label when the node was
242  * created.
243  */
244 
245 #define NAME(node)      ((unsigned char *)((node) + 1))
246 #define OFFSETS(node)   (NAME(node) + OLDNAMELEN(node) + 1)
247 #define OLDOFFSETLEN(node) (OFFSETS(node)[-1])
248 
249 #define NODE_SIZE(node) (sizeof(*node) + \
250 			 OLDNAMELEN(node) + OLDOFFSETLEN(node) + 1)
251 
252 /*%
253  * Color management.
254  */
255 #define IS_RED(node)            ((node) != NULL && (node)->color == RED)
256 #define IS_BLACK(node)          ((node) == NULL || (node)->color == BLACK)
257 #define MAKE_RED(node)          ((node)->color = RED)
258 #define MAKE_BLACK(node)        ((node)->color = BLACK)
259 
260 /*%
261  * Chain management.
262  *
263  * The "ancestors" member of chains were removed, with their job now
264  * being wholly handled by parent pointers (which didn't exist, because
265  * of memory concerns, when chains were first implemented).
266  */
267 #define ADD_LEVEL(chain, node) \
268 	do { \
269 		INSIST((chain)->level_count < DNS_RBT_LEVELBLOCK); \
270 		(chain)->levels[(chain)->level_count++] = (node); \
271 	} while (0)
272 
273 /*%
274  * The following macros directly access normally private name variables.
275  * These macros are used to avoid a lot of function calls in the critical
276  * path of the tree traversal code.
277  */
278 
279 static inline void
280 NODENAME(dns_rbtnode_t *node, dns_name_t *name) {
281 	name->length = NAMELEN(node);
282 	name->labels = OFFSETLEN(node);
283 	name->ndata = NAME(node);
284 	name->offsets = OFFSETS(node);
285 	name->attributes = ATTRS(node);
286 	name->attributes |= DNS_NAMEATTR_READONLY;
287 }
288 
289 void
290 dns_rbtnode_nodename(dns_rbtnode_t *node, dns_name_t *name) {
291 	name->length = NAMELEN(node);
292 	name->labels = OFFSETLEN(node);
293 	name->ndata = NAME(node);
294 	name->offsets = OFFSETS(node);
295 	name->attributes = ATTRS(node);
296 	name->attributes |= DNS_NAMEATTR_READONLY;
297 }
298 
299 dns_rbtnode_t *
300 dns_rbt_root(dns_rbt_t *rbt) {
301   return rbt->root;
302 }
303 
304 #ifdef DNS_RBT_USEHASH
305 static isc_result_t
306 inithash(dns_rbt_t *rbt);
307 #endif
308 
309 #ifdef DEBUG
310 #define inline
311 /*
312  * A little something to help out in GDB.
313  */
314 dns_name_t Name(dns_rbtnode_t *node);
315 dns_name_t
316 Name(dns_rbtnode_t *node) {
317 	dns_name_t name;
318 
319 	dns_name_init(&name, NULL);
320 	if (node != NULL)
321 		NODENAME(node, &name);
322 
323 	return (name);
324 }
325 
326 static void
327 hexdump(const char *desc, void *blob, size_t size) {
328 	char hexdump[BUFSIZ * 2 + 1];
329 	isc_buffer_t b;
330 	isc_region_t r;
331 	isc_result_t result;
332 	size_t bytes;
333 	isc_uint8_t *data = blob;
334 
335 	fprintf(stderr, "%s: ", desc);
336 	do {
337 		isc_buffer_init(&b, hexdump, sizeof(hexdump));
338 		r.base = data;
339 		r.length = bytes = (size > BUFSIZ) ? BUFSIZ : size;
340 		result = isc_hex_totext(&r, 0, "", &b);
341 		RUNTIME_CHECK(result == ISC_R_SUCCESS);
342 		isc_buffer_putuint8(&b, 0);
343 		fprintf(stderr, "%s", hexdump);
344 		data += bytes;
345 		size -= bytes;
346 	} while (size > 0);
347 	fprintf(stderr, "\n");
348 }
349 #endif /* DEBUG */
350 
351 /* The passed node must not be NULL. */
352 static inline dns_rbtnode_t *
353 get_subtree_root(dns_rbtnode_t *node) {
354 	while (!IS_ROOT(node)) {
355 		node = PARENT(node);
356 	}
357 
358 	return (node);
359 }
360 
361 /* Upper node is the parent of the root of the passed node's
362  * subtree. The passed node must not be NULL.
363  */
364 static inline dns_rbtnode_t *
365 get_upper_node(dns_rbtnode_t *node) {
366 	dns_rbtnode_t *root = get_subtree_root(node);
367 
368 	/*
369 	 * Return the node in the level above the argument node that points
370 	 * to the level the argument node is in.  If the argument node is in
371 	 * the top level, the return value is NULL.
372 	 */
373 	return (PARENT(root));
374 }
375 
376 size_t
377 dns__rbtnode_getdistance(dns_rbtnode_t *node) {
378 	size_t nodes = 1;
379 
380 	while (node != NULL) {
381 		if (IS_ROOT(node))
382 			break;
383 		nodes++;
384 		node = PARENT(node);
385 	}
386 
387 	return (nodes);
388 }
389 
390 /*
391  * Forward declarations.
392  */
393 static isc_result_t
394 create_node(isc_mem_t *mctx, dns_name_t *name, dns_rbtnode_t **nodep);
395 
396 #ifdef DNS_RBT_USEHASH
397 static inline void
398 hash_node(dns_rbt_t *rbt, dns_rbtnode_t *node, dns_name_t *name);
399 static inline void
400 unhash_node(dns_rbt_t *rbt, dns_rbtnode_t *node);
401 static void
402 rehash(dns_rbt_t *rbt, unsigned int newcount);
403 #else
404 #define hash_node(rbt, node, name)
405 #define unhash_node(rbt, node)
406 #define rehash(rbt, newcount)
407 #endif
408 
409 static inline void
410 rotate_left(dns_rbtnode_t *node, dns_rbtnode_t **rootp);
411 static inline void
412 rotate_right(dns_rbtnode_t *node, dns_rbtnode_t **rootp);
413 
414 static void
415 addonlevel(dns_rbtnode_t *node, dns_rbtnode_t *current, int order,
416 	   dns_rbtnode_t **rootp);
417 
418 static void
419 deletefromlevel(dns_rbtnode_t *delete, dns_rbtnode_t **rootp);
420 
421 static isc_result_t
422 treefix(dns_rbt_t *rbt, void *base, size_t size,
423 	dns_rbtnode_t *n, dns_name_t *name,
424 	dns_rbtdatafixer_t datafixer, void *fixer_arg,
425 	isc_uint64_t *crc);
426 
427 static isc_result_t
428 deletetree(dns_rbt_t *rbt, dns_rbtnode_t *node);
429 
430 static void
431 deletetreeflat(dns_rbt_t *rbt, unsigned int quantum, dns_rbtnode_t **nodep);
432 
433 static void
434 printnodename(dns_rbtnode_t *node, isc_boolean_t quoted, FILE *f);
435 
436 static void
437 freenode(dns_rbt_t *rbt, dns_rbtnode_t **nodep);
438 
439 static isc_result_t
440 dns_rbt_zero_header(FILE *file) {
441 	/*
442 	 * Write out a zeroed header as a placeholder.  Doing this ensures
443 	 * that the file will not read while it is partially written, should
444 	 * writing fail or be interrupted.
445 	 */
446 	char buffer[HEADER_LENGTH];
447 	isc_result_t result;
448 
449 	memset(buffer, 0, HEADER_LENGTH);
450 	result = isc_stdio_write(buffer, 1, HEADER_LENGTH, file, NULL);
451 	if (result != ISC_R_SUCCESS)
452 		return (result);
453 
454 	result = fflush(file);
455 	if (result != ISC_R_SUCCESS)
456 		return (result);
457 
458 	return (ISC_R_SUCCESS);
459 }
460 
461 /*
462  * Write out the real header, including NodeDump version information
463  * and the offset of the first node.
464  *
465  * Any information stored in the rbt object itself should be stored
466  * here.
467  */
468 static isc_result_t
469 write_header(FILE *file, dns_rbt_t *rbt, isc_uint64_t first_node_offset,
470 	     isc_uint64_t crc)
471 {
472 	file_header_t header;
473 	isc_result_t result;
474 	off_t location;
475 
476 	if (FILE_VERSION[0] == '\0') {
477 		memset(FILE_VERSION, 0, sizeof(FILE_VERSION));
478 		snprintf(FILE_VERSION, sizeof(FILE_VERSION),
479 			 "RBT Image %s %s", dns_major, dns_mapapi);
480 	}
481 
482 	memset(&header, 0, sizeof(file_header_t));
483 	memmove(header.version1, FILE_VERSION, sizeof(header.version1));
484 	memmove(header.version2, FILE_VERSION, sizeof(header.version2));
485 	header.first_node_offset = first_node_offset;
486 	header.ptrsize = (isc_uint32_t) sizeof(void *);
487 	header.bigendian = (1 == htonl(1)) ? 1 : 0;
488 
489 #ifdef DNS_RDATASET_FIXED
490 	header.rdataset_fixed = 1;
491 #else
492 	header.rdataset_fixed = 0;
493 #endif
494 
495 	header.nodecount = rbt->nodecount;
496 
497 	header.crc = crc;
498 
499 	CHECK(isc_stdio_tell(file, &location));
500 	location = dns_rbt_serialize_align(location);
501 	CHECK(isc_stdio_seek(file, location, SEEK_SET));
502 	CHECK(isc_stdio_write(&header, 1, sizeof(file_header_t), file, NULL));
503 	CHECK(fflush(file));
504 
505 	/* Ensure we are always at the end of the file. */
506 	CHECK(isc_stdio_seek(file, 0, SEEK_END));
507 
508  cleanup:
509 	return (result);
510 }
511 
512 static isc_result_t
513 serialize_node(FILE *file, dns_rbtnode_t *node, uintptr_t left,
514 	       uintptr_t right, uintptr_t down, uintptr_t parent,
515 	       uintptr_t data, isc_uint64_t *crc)
516 {
517 	dns_rbtnode_t temp_node;
518 	off_t file_position;
519 	unsigned char *node_data;
520 	size_t datasize;
521 	isc_result_t result;
522 #ifdef DEBUG
523 	dns_name_t nodename;
524 #endif
525 
526 	INSIST(node != NULL);
527 
528 	CHECK(isc_stdio_tell(file, &file_position));
529 	file_position = dns_rbt_serialize_align(file_position);
530 	CHECK(isc_stdio_seek(file, file_position, SEEK_SET));
531 
532 	temp_node = *node;
533 	temp_node.down_is_relative = 0;
534 	temp_node.left_is_relative = 0;
535 	temp_node.right_is_relative = 0;
536 	temp_node.parent_is_relative = 0;
537 	temp_node.data_is_relative = 0;
538 	temp_node.is_mmapped = 1;
539 
540 	/*
541 	 * If the next node is not NULL, calculate the next node's location
542 	 * in the file.  Note that this will have to change when the data
543 	 * structure changes, and it also assumes that we always write the
544 	 * nodes out in list order (which we currently do.)
545 	*/
546 	if (temp_node.parent != NULL) {
547 		temp_node.parent = (dns_rbtnode_t *)(parent);
548 		temp_node.parent_is_relative = 1;
549 	}
550 	if (temp_node.left != NULL) {
551 		temp_node.left = (dns_rbtnode_t *)(left);
552 		temp_node.left_is_relative = 1;
553 	}
554 	if (temp_node.right != NULL) {
555 		temp_node.right = (dns_rbtnode_t *)(right);
556 		temp_node.right_is_relative = 1;
557 	}
558 	if (temp_node.down != NULL) {
559 		temp_node.down = (dns_rbtnode_t *)(down);
560 		temp_node.down_is_relative = 1;
561 	}
562 	if (temp_node.data != NULL) {
563 		temp_node.data = (dns_rbtnode_t *)(data);
564 		temp_node.data_is_relative = 1;
565 	}
566 
567 	node_data = (unsigned char *) node + sizeof(dns_rbtnode_t);
568 	datasize = NODE_SIZE(node) - sizeof(dns_rbtnode_t);
569 
570 	CHECK(isc_stdio_write(&temp_node, 1, sizeof(dns_rbtnode_t),
571 			      file, NULL));
572 	CHECK(isc_stdio_write(node_data, 1, datasize, file, NULL));
573 
574 #ifdef DEBUG
575 	dns_name_init(&nodename, NULL);
576 	NODENAME(node, &nodename);
577 	fprintf(stderr, "serialize ");
578 	dns_name_print(&nodename, stderr);
579 	fprintf(stderr, "\n");
580 	hexdump("node header", &temp_node,
581 		sizeof(dns_rbtnode_t));
582 	hexdump("node data", node_data, datasize);
583 #endif
584 
585 	isc_crc64_update(crc, (const isc_uint8_t *) &temp_node,
586 			sizeof(dns_rbtnode_t));
587 	isc_crc64_update(crc, (const isc_uint8_t *) node_data, datasize);
588 
589  cleanup:
590 	return (result);
591 }
592 
593 static isc_result_t
594 serialize_nodes(FILE *file, dns_rbtnode_t *node, uintptr_t parent,
595 		dns_rbtdatawriter_t datawriter, void *writer_arg,
596 		uintptr_t *where, isc_uint64_t *crc)
597 {
598 	uintptr_t left = 0, right = 0, down = 0, data = 0;
599 	off_t location = 0, offset_adjust;
600 	isc_result_t result;
601 
602 	if (node == NULL) {
603 		if (where != NULL)
604 			*where = 0;
605 		return (ISC_R_SUCCESS);
606 	}
607 
608 	/* Reserve space for current node. */
609 	CHECK(isc_stdio_tell(file, &location));
610 	location = dns_rbt_serialize_align(location);
611 	CHECK(isc_stdio_seek(file, location, SEEK_SET));
612 
613 	offset_adjust = dns_rbt_serialize_align(location + NODE_SIZE(node));
614 	CHECK(isc_stdio_seek(file, offset_adjust, SEEK_SET));
615 
616 	/*
617 	 * Serialize the rest of the tree.
618 	 *
619 	 * WARNING: A change in the order (from left, right, down)
620 	 * will break the way the crc hash is computed.
621 	 */
622 	CHECK(serialize_nodes(file, getleft(node, NULL), location,
623 			      datawriter, writer_arg, &left, crc));
624 	CHECK(serialize_nodes(file, getright(node, NULL), location,
625 			      datawriter, writer_arg, &right, crc));
626 	CHECK(serialize_nodes(file, getdown(node, NULL), location,
627 			      datawriter, writer_arg, &down, crc));
628 
629 	if (node->data != NULL) {
630 		off_t ret;
631 
632 		CHECK(isc_stdio_tell(file, &ret));
633 		ret = dns_rbt_serialize_align(ret);
634 		CHECK(isc_stdio_seek(file, ret, SEEK_SET));
635 		data = ret;
636 
637 		datawriter(file, node->data, writer_arg, crc);
638 	}
639 
640 	/* Seek back to reserved space. */
641 	CHECK(isc_stdio_seek(file, location, SEEK_SET));
642 
643 	/* Serialize the current node. */
644 	CHECK(serialize_node(file, node, left, right, down, parent, data, crc));
645 
646 	/* Ensure we are always at the end of the file. */
647 	CHECK(isc_stdio_seek(file, 0, SEEK_END));
648 
649 	if (where != NULL)
650 		*where = (uintptr_t) location;
651 
652  cleanup:
653 	return (result);
654 }
655 
656 off_t
657 dns_rbt_serialize_align(off_t target) {
658 	off_t offset = target % 8;
659 
660 	if (offset == 0)
661 		return (target);
662 	else
663 		return (target + 8 - offset);
664 }
665 
666 isc_result_t
667 dns_rbt_serialize_tree(FILE *file, dns_rbt_t *rbt,
668 		       dns_rbtdatawriter_t datawriter,
669 		       void *writer_arg, off_t *offset)
670 {
671 	isc_result_t result;
672 	off_t header_position, node_position, end_position;
673 	isc_uint64_t crc;
674 
675 	REQUIRE(file != NULL);
676 
677 	CHECK(isc_file_isplainfilefd(fileno(file)));
678 
679 	isc_crc64_init(&crc);
680 
681 	CHECK(isc_stdio_tell(file, &header_position));
682 
683 	/* Write dummy header */
684 	CHECK(dns_rbt_zero_header(file));
685 
686 	/* Serialize nodes */
687 	CHECK(isc_stdio_tell(file, &node_position));
688 	CHECK(serialize_nodes(file, rbt->root, 0, datawriter,
689 			      writer_arg, NULL, &crc));
690 
691 	CHECK(isc_stdio_tell(file, &end_position));
692 	if (node_position == end_position) {
693 		CHECK(isc_stdio_seek(file, header_position, SEEK_SET));
694 		*offset = 0;
695 		return (ISC_R_SUCCESS);
696 	}
697 
698 	isc_crc64_final(&crc);
699 #ifdef DEBUG
700 	hexdump("serializing CRC", &crc, sizeof(crc));
701 #endif
702 
703 	/* Serialize header */
704 	CHECK(isc_stdio_seek(file, header_position, SEEK_SET));
705 	CHECK(write_header(file, rbt, HEADER_LENGTH, crc));
706 
707 	/* Ensure we are always at the end of the file. */
708 	CHECK(isc_stdio_seek(file, 0, SEEK_END));
709 	*offset = dns_rbt_serialize_align(header_position);
710 
711  cleanup:
712 	return (result);
713 }
714 
715 #define CONFIRM(a) do { \
716 	if (! (a)) { \
717 		result = ISC_R_INVALIDFILE; \
718 		goto cleanup; \
719 	} \
720 } while(/*CONSTCOND*/0);
721 
722 static isc_result_t
723 treefix(dns_rbt_t *rbt, void *base, size_t filesize, dns_rbtnode_t *n,
724 	dns_name_t *name, dns_rbtdatafixer_t datafixer,
725 	void *fixer_arg, isc_uint64_t *crc)
726 {
727 	isc_result_t result = ISC_R_SUCCESS;
728 	dns_fixedname_t fixed;
729 	dns_name_t nodename, *fullname;
730 	unsigned char *node_data;
731 	dns_rbtnode_t header;
732 	size_t datasize, nodemax = filesize - sizeof(dns_rbtnode_t);
733 
734 	if (n == NULL)
735 		return (ISC_R_SUCCESS);
736 
737 	CONFIRM((void *) n >= base);
738 	CONFIRM((char *) n - (char *) base <= (int) nodemax);
739 	CONFIRM(DNS_RBTNODE_VALID(n));
740 
741 	dns_name_init(&nodename, NULL);
742 	NODENAME(n, &nodename);
743 
744 	fullname = &nodename;
745 	CONFIRM(dns_name_isvalid(fullname));
746 
747 	if (!dns_name_isabsolute(&nodename)) {
748 		dns_fixedname_init(&fixed);
749 		fullname = dns_fixedname_name(&fixed);
750 		CHECK(dns_name_concatenate(&nodename, name, fullname, NULL));
751 	}
752 
753 	/* memorize header contents prior to fixup */
754 	memmove(&header, n, sizeof(header));
755 
756 	if (n->left_is_relative) {
757 		CONFIRM(n->left <= (dns_rbtnode_t *) nodemax);
758 		n->left = getleft(n, rbt->mmap_location);
759 		n->left_is_relative = 0;
760 		CONFIRM(DNS_RBTNODE_VALID(n->left));
761 	} else
762 		CONFIRM(n->left == NULL);
763 
764 	if (n->right_is_relative) {
765 		CONFIRM(n->right <= (dns_rbtnode_t *) nodemax);
766 		n->right = getright(n, rbt->mmap_location);
767 		n->right_is_relative = 0;
768 		CONFIRM(DNS_RBTNODE_VALID(n->right));
769 	} else
770 		CONFIRM(n->right == NULL);
771 
772 	if (n->down_is_relative) {
773 		CONFIRM(n->down <= (dns_rbtnode_t *) nodemax);
774 		n->down = getdown(n, rbt->mmap_location);
775 		n->down_is_relative = 0;
776 		CONFIRM(n->down > (dns_rbtnode_t *) n);
777 		CONFIRM(DNS_RBTNODE_VALID(n->down));
778 	} else
779 		CONFIRM(n->down == NULL);
780 
781 	if (n->parent_is_relative) {
782 		CONFIRM(n->parent <= (dns_rbtnode_t *) nodemax);
783 		n->parent = getparent(n, rbt->mmap_location);
784 		n->parent_is_relative = 0;
785 		CONFIRM(n->parent < (dns_rbtnode_t *) n);
786 		CONFIRM(DNS_RBTNODE_VALID(n->parent));
787 	} else
788 		CONFIRM(n->parent == NULL);
789 
790 	if (n->data_is_relative) {
791 		CONFIRM(n->data <= (void *) filesize);
792 		n->data = getdata(n, rbt->mmap_location);
793 		n->data_is_relative = 0;
794 		CONFIRM(n->data > (void *) n);
795 	} else
796 		CONFIRM(n->data == NULL);
797 
798 	hash_node(rbt, n, fullname);
799 
800 	/* a change in the order (from left, right, down) will break hashing*/
801 	if (n->left != NULL)
802 		CHECK(treefix(rbt, base, filesize, n->left, name,
803 			      datafixer, fixer_arg, crc));
804 	if (n->right != NULL)
805 		CHECK(treefix(rbt, base, filesize, n->right, name,
806 			      datafixer, fixer_arg, crc));
807 	if (n->down != NULL)
808 		CHECK(treefix(rbt, base, filesize, n->down, fullname,
809 			      datafixer, fixer_arg, crc));
810 
811 	if (datafixer != NULL && n->data != NULL)
812 		CHECK(datafixer(n, base, filesize, fixer_arg, crc));
813 
814 	rbt->nodecount++;
815 	node_data = (unsigned char *) n + sizeof(dns_rbtnode_t);
816 	datasize = NODE_SIZE(n) - sizeof(dns_rbtnode_t);
817 
818 #ifdef DEBUG
819 	fprintf(stderr, "deserialize ");
820 	dns_name_print(&nodename, stderr);
821 	fprintf(stderr, "\n");
822 	hexdump("node header", &header,
823 		sizeof(dns_rbtnode_t));
824 	hexdump("node data", node_data, datasize);
825 #endif
826 	isc_crc64_update(crc, (const isc_uint8_t *) &header,
827 			sizeof(dns_rbtnode_t));
828 	isc_crc64_update(crc, (const isc_uint8_t *) node_data,
829 			datasize);
830 
831  cleanup:
832 	return (result);
833 }
834 
835 isc_result_t
836 dns_rbt_deserialize_tree(void *base_address, size_t filesize,
837 			 off_t header_offset, isc_mem_t *mctx,
838 			 dns_rbtdeleter_t deleter, void *deleter_arg,
839 			 dns_rbtdatafixer_t datafixer, void *fixer_arg,
840 			 dns_rbtnode_t **originp, dns_rbt_t **rbtp)
841 {
842 	isc_result_t result = ISC_R_SUCCESS;
843 	file_header_t *header;
844 	dns_rbt_t *rbt = NULL;
845 	isc_uint64_t crc;
846 
847 	REQUIRE(originp == NULL || *originp == NULL);
848 	REQUIRE(rbtp != NULL && *rbtp == NULL);
849 
850 	isc_crc64_init(&crc);
851 
852 	CHECK(dns_rbt_create(mctx, deleter, deleter_arg, &rbt));
853 
854 	rbt->mmap_location = base_address;
855 
856 	header = (file_header_t *)((char *)base_address + header_offset);
857 
858 #ifdef DNS_RDATASET_FIXED
859 	if (header->rdataset_fixed != 1) {
860 		result = ISC_R_INVALIDFILE;
861 		goto cleanup;
862 	}
863 
864 #else
865 	if (header->rdataset_fixed != 0) {
866 		result = ISC_R_INVALIDFILE;
867 		goto cleanup;
868 	}
869 #endif
870 
871 	if (header->ptrsize != (isc_uint32_t) sizeof(void *)) {
872 		result = ISC_R_INVALIDFILE;
873 		goto cleanup;
874 	}
875 	if (header->bigendian != (1 == htonl(1)) ? 1 : 0) {
876 		result = ISC_R_INVALIDFILE;
877 		goto cleanup;
878 	}
879 
880 	/* Copy other data items from the header into our rbt. */
881 	rbt->root = (dns_rbtnode_t *)((char *)base_address +
882 				header_offset + header->first_node_offset);
883 
884 	if ((header->nodecount * sizeof(dns_rbtnode_t)) > filesize) {
885 		result = ISC_R_INVALIDFILE;
886 		goto cleanup;
887 	}
888 	rehash(rbt, header->nodecount);
889 
890 	CHECK(treefix(rbt, base_address, filesize, rbt->root,
891 		      dns_rootname, datafixer, fixer_arg, &crc));
892 
893 	isc_crc64_final(&crc);
894 #ifdef DEBUG
895 	hexdump("deserializing CRC", &crc, sizeof(crc));
896 #endif
897 
898 	/* Check file hash */
899 	if (header->crc != crc) {
900 		result = ISC_R_INVALIDFILE;
901 		goto cleanup;
902 	}
903 
904 	*rbtp = rbt;
905 	if (originp != NULL)
906 		*originp = rbt->root;
907 
908 	if (header->nodecount != rbt->nodecount)
909 		result = ISC_R_INVALIDFILE;
910 
911  cleanup:
912 	if (result != ISC_R_SUCCESS && rbt != NULL) {
913 		rbt->root = NULL;
914 		rbt->nodecount = 0;
915 		dns_rbt_destroy(&rbt);
916 	}
917 
918 	return (result);
919 }
920 
921 /*
922  * Initialize a red/black tree of trees.
923  */
924 isc_result_t
925 dns_rbt_create(isc_mem_t *mctx, dns_rbtdeleter_t deleter,
926 	       void *deleter_arg, dns_rbt_t **rbtp)
927 {
928 #ifdef DNS_RBT_USEHASH
929 	isc_result_t result;
930 #endif
931 	dns_rbt_t *rbt;
932 
933 	REQUIRE(mctx != NULL);
934 	REQUIRE(rbtp != NULL && *rbtp == NULL);
935 	REQUIRE(deleter == NULL ? deleter_arg == NULL : 1);
936 
937 	rbt = (dns_rbt_t *)isc_mem_get(mctx, sizeof(*rbt));
938 	if (rbt == NULL)
939 		return (ISC_R_NOMEMORY);
940 
941 	rbt->mctx = NULL;
942 	isc_mem_attach(mctx, &rbt->mctx);
943 	rbt->data_deleter = deleter;
944 	rbt->deleter_arg = deleter_arg;
945 	rbt->root = NULL;
946 	rbt->nodecount = 0;
947 	rbt->hashtable = NULL;
948 	rbt->hashsize = 0;
949 	rbt->mmap_location = NULL;
950 
951 #ifdef DNS_RBT_USEHASH
952 	result = inithash(rbt);
953 	if (result != ISC_R_SUCCESS) {
954 		isc_mem_putanddetach(&rbt->mctx, rbt, sizeof(*rbt));
955 		return (result);
956 	}
957 #endif
958 
959 	rbt->magic = RBT_MAGIC;
960 
961 	*rbtp = rbt;
962 
963 	return (ISC_R_SUCCESS);
964 }
965 
966 /*
967  * Deallocate a red/black tree of trees.
968  */
969 void
970 dns_rbt_destroy(dns_rbt_t **rbtp) {
971 	RUNTIME_CHECK(dns_rbt_destroy2(rbtp, 0) == ISC_R_SUCCESS);
972 }
973 
974 isc_result_t
975 dns_rbt_destroy2(dns_rbt_t **rbtp, unsigned int quantum) {
976 	dns_rbt_t *rbt;
977 
978 	REQUIRE(rbtp != NULL && VALID_RBT(*rbtp));
979 
980 	rbt = *rbtp;
981 
982 	deletetreeflat(rbt, quantum, &rbt->root);
983 	if (rbt->root != NULL)
984 		return (ISC_R_QUOTA);
985 
986 	INSIST(rbt->nodecount == 0);
987 
988 	rbt->mmap_location = NULL;
989 
990 	if (rbt->hashtable != NULL)
991 		isc_mem_put(rbt->mctx, rbt->hashtable,
992 			    rbt->hashsize * sizeof(dns_rbtnode_t *));
993 
994 	rbt->magic = 0;
995 
996 	isc_mem_putanddetach(&rbt->mctx, rbt, sizeof(*rbt));
997 	*rbtp = NULL;
998 	return (ISC_R_SUCCESS);
999 }
1000 
1001 unsigned int
1002 dns_rbt_nodecount(dns_rbt_t *rbt) {
1003 
1004 	REQUIRE(VALID_RBT(rbt));
1005 
1006 	return (rbt->nodecount);
1007 }
1008 
1009 unsigned int
1010 dns_rbt_hashsize(dns_rbt_t *rbt) {
1011 
1012 	REQUIRE(VALID_RBT(rbt));
1013 
1014 	return (rbt->hashsize);
1015 }
1016 
1017 static inline isc_result_t
1018 chain_name(dns_rbtnodechain_t *chain, dns_name_t *name,
1019 	   isc_boolean_t include_chain_end)
1020 {
1021 	dns_name_t nodename;
1022 	isc_result_t result = ISC_R_SUCCESS;
1023 	int i;
1024 
1025 	dns_name_init(&nodename, NULL);
1026 
1027 	if (include_chain_end && chain->end != NULL) {
1028 		NODENAME(chain->end, &nodename);
1029 		result = dns_name_copy(&nodename, name, NULL);
1030 		if (result != ISC_R_SUCCESS)
1031 			return (result);
1032 	} else
1033 		dns_name_reset(name);
1034 
1035 	for (i = (int)chain->level_count - 1; i >= 0; i--) {
1036 		NODENAME(chain->levels[i], &nodename);
1037 		result = dns_name_concatenate(name, &nodename, name, NULL);
1038 
1039 		if (result != ISC_R_SUCCESS)
1040 			return (result);
1041 	}
1042 	return (result);
1043 }
1044 
1045 static inline isc_result_t
1046 move_chain_to_last(dns_rbtnodechain_t *chain, dns_rbtnode_t *node) {
1047 	do {
1048 		/*
1049 		 * Go as far right and then down as much as possible,
1050 		 * as long as the rightmost node has a down pointer.
1051 		 */
1052 		while (RIGHT(node) != NULL)
1053 			node = RIGHT(node);
1054 
1055 		if (DOWN(node) == NULL)
1056 			break;
1057 
1058 		ADD_LEVEL(chain, node);
1059 		node = DOWN(node);
1060 	} while (1);
1061 
1062 	chain->end = node;
1063 
1064 	return (ISC_R_SUCCESS);
1065 }
1066 
1067 /*
1068  * Add 'name' to tree, initializing its data pointer with 'data'.
1069  */
1070 
1071 isc_result_t
1072 dns_rbt_addnode(dns_rbt_t *rbt, dns_name_t *name, dns_rbtnode_t **nodep) {
1073 	/*
1074 	 * Does this thing have too many variables or what?
1075 	 */
1076 	dns_rbtnode_t **root, *parent, *child, *current, *new_current;
1077 	dns_name_t *add_name, *new_name, current_name, *prefix, *suffix;
1078 	dns_fixedname_t fixedcopy, fixedprefix, fixedsuffix, fnewname;
1079 	dns_offsets_t current_offsets;
1080 	dns_namereln_t compared;
1081 	isc_result_t result = ISC_R_SUCCESS;
1082 	dns_rbtnodechain_t chain;
1083 	unsigned int common_labels;
1084 	unsigned int nlabels, hlabels;
1085 	int order;
1086 
1087 	REQUIRE(VALID_RBT(rbt));
1088 	REQUIRE(dns_name_isabsolute(name));
1089 	REQUIRE(nodep != NULL && *nodep == NULL);
1090 
1091 	/*
1092 	 * Create a copy of the name so the original name structure is
1093 	 * not modified.
1094 	 */
1095 	dns_fixedname_init(&fixedcopy);
1096 	add_name = dns_fixedname_name(&fixedcopy);
1097 	dns_name_clone(name, add_name);
1098 
1099 	if (rbt->root == NULL) {
1100 		result = create_node(rbt->mctx, add_name, &new_current);
1101 		if (result == ISC_R_SUCCESS) {
1102 			rbt->nodecount++;
1103 			new_current->is_root = 1;
1104 			rbt->root = new_current;
1105 			*nodep = new_current;
1106 			hash_node(rbt, new_current, name);
1107 		}
1108 		return (result);
1109 	}
1110 
1111 	dns_rbtnodechain_init(&chain, rbt->mctx);
1112 
1113 	dns_fixedname_init(&fixedprefix);
1114 	dns_fixedname_init(&fixedsuffix);
1115 	prefix = dns_fixedname_name(&fixedprefix);
1116 	suffix = dns_fixedname_name(&fixedsuffix);
1117 
1118 	root = &rbt->root;
1119 	INSIST(IS_ROOT(*root));
1120 	parent = NULL;
1121 	current = NULL;
1122 	child = *root;
1123 	dns_name_init(&current_name, current_offsets);
1124 	dns_fixedname_init(&fnewname);
1125 	new_name = dns_fixedname_name(&fnewname);
1126 	nlabels = dns_name_countlabels(name);
1127 	hlabels = 0;
1128 
1129 	do {
1130 		current = child;
1131 
1132 		NODENAME(current, &current_name);
1133 		compared = dns_name_fullcompare(add_name, &current_name,
1134 						&order, &common_labels);
1135 
1136 		if (compared == dns_namereln_equal) {
1137 			*nodep = current;
1138 			result = ISC_R_EXISTS;
1139 			break;
1140 
1141 		}
1142 
1143 		if (compared == dns_namereln_none) {
1144 
1145 			if (order < 0) {
1146 				parent = current;
1147 				child = LEFT(current);
1148 
1149 			} else if (order > 0) {
1150 				parent = current;
1151 				child = RIGHT(current);
1152 
1153 			}
1154 
1155 		} else {
1156 			/*
1157 			 * This name has some suffix in common with the
1158 			 * name at the current node.  If the name at
1159 			 * the current node is shorter, that means the
1160 			 * new name should be in a subtree.  If the
1161 			 * name at the current node is longer, that means
1162 			 * the down pointer to this tree should point
1163 			 * to a new tree that has the common suffix, and
1164 			 * the non-common parts of these two names should
1165 			 * start a new tree.
1166 			 */
1167 			hlabels += common_labels;
1168 			if (compared == dns_namereln_subdomain) {
1169 				/*
1170 				 * All of the existing labels are in common,
1171 				 * so the new name is in a subtree.
1172 				 * Whack off the common labels for the
1173 				 * not-in-common part to be searched for
1174 				 * in the next level.
1175 				 */
1176 				dns_name_split(add_name, common_labels,
1177 					       add_name, NULL);
1178 
1179 				/*
1180 				 * Follow the down pointer (possibly NULL).
1181 				 */
1182 				root = &DOWN(current);
1183 
1184 				INSIST(*root == NULL ||
1185 				       (IS_ROOT(*root) &&
1186 					PARENT(*root) == current));
1187 
1188 				parent = NULL;
1189 				child = DOWN(current);
1190 				ADD_LEVEL(&chain, current);
1191 
1192 			} else {
1193 				/*
1194 				 * The number of labels in common is fewer
1195 				 * than the number of labels at the current
1196 				 * node, so the current node must be adjusted
1197 				 * to have just the common suffix, and a down
1198 				 * pointer made to a new tree.
1199 				 */
1200 
1201 				INSIST(compared == dns_namereln_commonancestor
1202 				       || compared == dns_namereln_contains);
1203 
1204 				/*
1205 				 * Ensure the number of levels in the tree
1206 				 * does not exceed the number of logical
1207 				 * levels allowed by DNSSEC.
1208 				 *
1209 				 * XXXDCL need a better error result?
1210 				 *
1211 				 * XXXDCL Since chain ancestors were removed,
1212 				 * no longer used by addonlevel(),
1213 				 * this is the only real use of chains in the
1214 				 * function.  It could be done instead with
1215 				 * a simple integer variable, but I am pressed
1216 				 * for time.
1217 				 */
1218 				if (chain.level_count ==
1219 				    (sizeof(chain.levels) /
1220 				     sizeof(*chain.levels))) {
1221 					result = ISC_R_NOSPACE;
1222 					break;
1223 				}
1224 
1225 				/*
1226 				 * Split the name into two parts, a prefix
1227 				 * which is the not-in-common parts of the
1228 				 * two names and a suffix that is the common
1229 				 * parts of them.
1230 				 */
1231 				dns_name_split(&current_name, common_labels,
1232 					       prefix, suffix);
1233 				result = create_node(rbt->mctx, suffix,
1234 						     &new_current);
1235 
1236 				if (result != ISC_R_SUCCESS)
1237 					break;
1238 
1239 				/*
1240 				 * Reproduce the tree attributes of the
1241 				 * current node.
1242 				 */
1243 				new_current->is_root = current->is_root;
1244 				if (current->nsec == DNS_RBT_NSEC_HAS_NSEC)
1245 					new_current->nsec = DNS_RBT_NSEC_NORMAL;
1246 				else
1247 					new_current->nsec = current->nsec;
1248 				PARENT(new_current)  = PARENT(current);
1249 				LEFT(new_current)    = LEFT(current);
1250 				RIGHT(new_current)   = RIGHT(current);
1251 				COLOR(new_current)   = COLOR(current);
1252 
1253 				/*
1254 				 * Fix pointers that were to the current node.
1255 				 */
1256 				if (parent != NULL) {
1257 					if (LEFT(parent) == current)
1258 						LEFT(parent) = new_current;
1259 					else
1260 						RIGHT(parent) = new_current;
1261 				}
1262 				if (LEFT(new_current) != NULL)
1263 					PARENT(LEFT(new_current)) =
1264 						new_current;
1265 				if (RIGHT(new_current) != NULL)
1266 					PARENT(RIGHT(new_current)) =
1267 						new_current;
1268 				if (*root == current)
1269 					*root = new_current;
1270 
1271 				NAMELEN(current) = prefix->length;
1272 				OFFSETLEN(current) = prefix->labels;
1273 
1274 				/*
1275 				 * Set up the new root of the next level.
1276 				 * By definition it will not be the top
1277 				 * level tree, so clear DNS_NAMEATTR_ABSOLUTE.
1278 				 */
1279 				current->is_root = 1;
1280 				PARENT(current) = new_current;
1281 				DOWN(new_current) = current;
1282 				root = &DOWN(new_current);
1283 
1284 				ADD_LEVEL(&chain, new_current);
1285 
1286 				LEFT(current) = NULL;
1287 				RIGHT(current) = NULL;
1288 
1289 				MAKE_BLACK(current);
1290 				ATTRS(current) &= ~DNS_NAMEATTR_ABSOLUTE;
1291 
1292 				rbt->nodecount++;
1293 				dns_name_getlabelsequence(name,
1294 							  nlabels - hlabels,
1295 							  hlabels, new_name);
1296 				hash_node(rbt, new_current, new_name);
1297 
1298 				if (common_labels ==
1299 				    dns_name_countlabels(add_name)) {
1300 					/*
1301 					 * The name has been added by pushing
1302 					 * the not-in-common parts down to
1303 					 * a new level.
1304 					 */
1305 					*nodep = new_current;
1306 					return (ISC_R_SUCCESS);
1307 
1308 				} else {
1309 					/*
1310 					 * The current node has no data,
1311 					 * because it is just a placeholder.
1312 					 * Its data pointer is already NULL
1313 					 * from create_node()), so there's
1314 					 * nothing more to do to it.
1315 					 */
1316 
1317 					/*
1318 					 * The not-in-common parts of the new
1319 					 * name will be inserted into the new
1320 					 * level following this loop (unless
1321 					 * result != ISC_R_SUCCESS, which
1322 					 * is tested after the loop ends).
1323 					 */
1324 					dns_name_split(add_name, common_labels,
1325 						       add_name, NULL);
1326 
1327 					break;
1328 				}
1329 
1330 			}
1331 
1332 		}
1333 
1334 	} while (child != NULL);
1335 
1336 	if (result == ISC_R_SUCCESS)
1337 		result = create_node(rbt->mctx, add_name, &new_current);
1338 
1339 	if (result == ISC_R_SUCCESS) {
1340 		addonlevel(new_current, current, order, root);
1341 		rbt->nodecount++;
1342 		*nodep = new_current;
1343 		hash_node(rbt, new_current, name);
1344 	}
1345 
1346 	return (result);
1347 }
1348 
1349 /*
1350  * Add a name to the tree of trees, associating it with some data.
1351  */
1352 isc_result_t
1353 dns_rbt_addname(dns_rbt_t *rbt, dns_name_t *name, void *data) {
1354 	isc_result_t result;
1355 	dns_rbtnode_t *node;
1356 
1357 	REQUIRE(VALID_RBT(rbt));
1358 	REQUIRE(dns_name_isabsolute(name));
1359 
1360 	node = NULL;
1361 
1362 	result = dns_rbt_addnode(rbt, name, &node);
1363 
1364 	/*
1365 	 * dns_rbt_addnode will report the node exists even when
1366 	 * it does not have data associated with it, but the
1367 	 * dns_rbt_*name functions all behave depending on whether
1368 	 * there is data associated with a node.
1369 	 */
1370 	if (result == ISC_R_SUCCESS ||
1371 	    (result == ISC_R_EXISTS && DATA(node) == NULL)) {
1372 		DATA(node) = data;
1373 		result = ISC_R_SUCCESS;
1374 	}
1375 
1376 	return (result);
1377 }
1378 
1379 /*
1380  * Find the node for "name" in the tree of trees.
1381  */
1382 isc_result_t
1383 dns_rbt_findnode(dns_rbt_t *rbt, dns_name_t *name, dns_name_t *foundname,
1384 		 dns_rbtnode_t **node, dns_rbtnodechain_t *chain,
1385 		 unsigned int options, dns_rbtfindcallback_t callback,
1386 		 void *callback_arg)
1387 {
1388 	dns_rbtnode_t *current, *last_compared, *current_root;
1389 	dns_rbtnodechain_t localchain;
1390 	dns_name_t *search_name, current_name, *callback_name;
1391 	dns_fixedname_t fixedcallbackname, fixedsearchname;
1392 	dns_namereln_t compared;
1393 	isc_result_t result, saved_result;
1394 	unsigned int common_labels;
1395 	unsigned int hlabels = 0;
1396 	int order;
1397 
1398 	REQUIRE(VALID_RBT(rbt));
1399 	REQUIRE(dns_name_isabsolute(name));
1400 	REQUIRE(node != NULL && *node == NULL);
1401 	REQUIRE((options & (DNS_RBTFIND_NOEXACT | DNS_RBTFIND_NOPREDECESSOR))
1402 		!=         (DNS_RBTFIND_NOEXACT | DNS_RBTFIND_NOPREDECESSOR));
1403 
1404 	/*
1405 	 * If there is a chain it needs to appear to be in a sane state,
1406 	 * otherwise a chain is still needed to generate foundname and
1407 	 * callback_name.
1408 	 */
1409 	if (chain == NULL) {
1410 		options |= DNS_RBTFIND_NOPREDECESSOR;
1411 		chain = &localchain;
1412 		dns_rbtnodechain_init(chain, rbt->mctx);
1413 	} else
1414 		dns_rbtnodechain_reset(chain);
1415 
1416 	if (rbt->root == NULL)
1417 		return (ISC_R_NOTFOUND);
1418 
1419 	/*
1420 	 * Appease GCC about variables it incorrectly thinks are
1421 	 * possibly used uninitialized.
1422 	 */
1423 	compared = dns_namereln_none;
1424 	last_compared = NULL;
1425 	order = 0;
1426 
1427 	dns_fixedname_init(&fixedcallbackname);
1428 	callback_name = dns_fixedname_name(&fixedcallbackname);
1429 
1430 	/*
1431 	 * search_name is the name segment being sought in each tree level.
1432 	 * By using a fixedname, the search_name will definitely have offsets
1433 	 * for use by any splitting.
1434 	 * By using dns_name_clone, no name data should be copied thanks to
1435 	 * the lack of bitstring labels.
1436 	 */
1437 	dns_fixedname_init(&fixedsearchname);
1438 	search_name = dns_fixedname_name(&fixedsearchname);
1439 	dns_name_clone(name, search_name);
1440 
1441 	dns_name_init(&current_name, NULL);
1442 
1443 	saved_result = ISC_R_SUCCESS;
1444 	current = rbt->root;
1445 	current_root = rbt->root;
1446 
1447 	while (current != NULL) {
1448 		NODENAME(current, &current_name);
1449 		compared = dns_name_fullcompare(search_name, &current_name,
1450 						&order, &common_labels);
1451 		/*
1452 		 * last_compared is used as a shortcut to start (or
1453 		 * continue rather) finding the stop-node of the search
1454 		 * when hashing was used (see much below in this
1455 		 * function).
1456 		 */
1457 		last_compared = current;
1458 
1459 		if (compared == dns_namereln_equal)
1460 			break;
1461 
1462 		if (compared == dns_namereln_none) {
1463 #ifdef DNS_RBT_USEHASH
1464 			/*
1465 			 * Here, current is pointing at a subtree root
1466 			 * node. We try to find a matching node using
1467 			 * the hashtable. We can get one of 3 results
1468 			 * here: (a) we locate the matching node, (b) we
1469 			 * find a node to which the current node has a
1470 			 * subdomain relation, (c) we fail to find (a)
1471 			 * or (b).
1472 			 */
1473 
1474 			dns_name_t hash_name;
1475 			dns_rbtnode_t *hnode;
1476 			dns_rbtnode_t *up_current;
1477 			unsigned int nlabels;
1478 			unsigned int tlabels = 1;
1479 			unsigned int hash;
1480 
1481 			/*
1482 			 * If there is no hash table, hashing can't be done.
1483 			 */
1484 			if (rbt->hashtable == NULL)
1485 				goto nohash;
1486 
1487 			/*
1488 			 * The case of current != current_root, that
1489 			 * means a left or right pointer was followed,
1490 			 * only happens when the algorithm fell through to
1491 			 * the traditional binary search because of a
1492 			 * bitstring label.  Since we dropped the bitstring
1493 			 * support, this should not happen.
1494 			 */
1495 			INSIST(current == current_root);
1496 
1497 			nlabels = dns_name_countlabels(search_name);
1498 
1499 			/*
1500 			 * current_root is the root of the current level, so
1501 			 * it's parent is the same as it's "up" pointer.
1502 			 */
1503 			up_current = PARENT(current_root);
1504 			dns_name_init(&hash_name, NULL);
1505 
1506 		hashagain:
1507 			/*
1508 			 * Compute the hash over the full absolute
1509 			 * name. Look for the smallest suffix match at
1510 			 * this tree level (hlevel), and then at every
1511 			 * iteration, look for the next smallest suffix
1512 			 * match (add another subdomain label to the
1513 			 * absolute name being hashed).
1514 			 */
1515 			dns_name_getlabelsequence(name,
1516 						  nlabels - tlabels,
1517 						  hlabels + tlabels,
1518 						  &hash_name);
1519 			hash = dns_name_fullhash(&hash_name, ISC_FALSE);
1520 			dns_name_getlabelsequence(search_name,
1521 						  nlabels - tlabels,
1522 						  tlabels, &hash_name);
1523 
1524 			/*
1525 			 * Walk all the nodes in the hash bucket pointed
1526 			 * by the computed hash value.
1527 			 */
1528 			for (hnode = rbt->hashtable[hash % rbt->hashsize];
1529 			     hnode != NULL;
1530 			     hnode = hnode->hashnext)
1531 			{
1532 				dns_name_t hnode_name;
1533 
1534 				if (hash != HASHVAL(hnode))
1535 					continue;
1536 				/*
1537 				 * This checks that the hashed label
1538 				 * sequence being looked up is at the
1539 				 * same tree level, so that we don't
1540 				 * match a labelsequence from some other
1541 				 * subdomain.
1542 				 */
1543 				if (get_upper_node(hnode) != up_current)
1544 					continue;
1545 
1546 				dns_name_init(&hnode_name, NULL);
1547 				NODENAME(hnode, &hnode_name);
1548 				if (dns_name_equal(&hnode_name, &hash_name))
1549 					break;
1550 			}
1551 
1552 			if (hnode != NULL) {
1553 				current = hnode;
1554 				/*
1555 				 * This is an optimization.  If hashing found
1556 				 * the right node, the next call to
1557 				 * dns_name_fullcompare() would obviously
1558 				 * return _equal or _subdomain.  Determine
1559 				 * which of those would be the case by
1560 				 * checking if the full name was hashed.  Then
1561 				 * make it look like dns_name_fullcompare
1562 				 * was called and jump to the right place.
1563 				 */
1564 				if (tlabels == nlabels) {
1565 					compared = dns_namereln_equal;
1566 					break;
1567 				} else {
1568 					common_labels = tlabels;
1569 					compared = dns_namereln_subdomain;
1570 					goto subdomain;
1571 				}
1572 			}
1573 
1574 			if (tlabels++ < nlabels)
1575 				goto hashagain;
1576 
1577 			/*
1578 			 * All of the labels have been tried against the hash
1579 			 * table.  Since we dropped the support of bitstring
1580 			 * labels, the name isn't in the table.
1581 			 */
1582 			current = NULL;
1583 			continue;
1584 
1585 		nohash:
1586 #endif /* DNS_RBT_USEHASH */
1587 			/*
1588 			 * Standard binary search tree movement.
1589 			 */
1590 			if (order < 0)
1591 				current = LEFT(current);
1592 			else
1593 				current = RIGHT(current);
1594 
1595 		} else {
1596 			/*
1597 			 * The names have some common suffix labels.
1598 			 *
1599 			 * If the number in common are equal in length to
1600 			 * the current node's name length, then follow the
1601 			 * down pointer and search in the new tree.
1602 			 */
1603 			if (compared == dns_namereln_subdomain) {
1604 #ifdef DNS_RBT_USEHASH
1605 		subdomain:
1606 #endif
1607 				/*
1608 				 * Whack off the current node's common parts
1609 				 * for the name to search in the next level.
1610 				 */
1611 				dns_name_split(search_name, common_labels,
1612 					       search_name, NULL);
1613 				hlabels += common_labels;
1614 				/*
1615 				 * This might be the closest enclosing name.
1616 				 */
1617 				if (DATA(current) != NULL ||
1618 				    (options & DNS_RBTFIND_EMPTYDATA) != 0)
1619 					*node = current;
1620 
1621 				/*
1622 				 * Point the chain to the next level.   This
1623 				 * needs to be done before 'current' is pointed
1624 				 * there because the callback in the next
1625 				 * block of code needs the current 'current',
1626 				 * but in the event the callback requests that
1627 				 * the search be stopped then the
1628 				 * DNS_R_PARTIALMATCH code at the end of this
1629 				 * function needs the chain pointed to the
1630 				 * next level.
1631 				 */
1632 				ADD_LEVEL(chain, current);
1633 
1634 				/*
1635 				 * The caller may want to interrupt the
1636 				 * downward search when certain special nodes
1637 				 * are traversed.  If this is a special node,
1638 				 * the callback is used to learn what the
1639 				 * caller wants to do.
1640 				 */
1641 				if (callback != NULL &&
1642 				    FINDCALLBACK(current)) {
1643 					result = chain_name(chain,
1644 							    callback_name,
1645 							    ISC_FALSE);
1646 					if (result != ISC_R_SUCCESS) {
1647 						dns_rbtnodechain_reset(chain);
1648 						return (result);
1649 					}
1650 
1651 					result = (callback)(current,
1652 							    callback_name,
1653 							    callback_arg);
1654 					if (result != DNS_R_CONTINUE) {
1655 						saved_result = result;
1656 						/*
1657 						 * Treat this node as if it
1658 						 * had no down pointer.
1659 						 */
1660 						current = NULL;
1661 						break;
1662 					}
1663 				}
1664 
1665 				/*
1666 				 * Finally, head to the next tree level.
1667 				 */
1668 				current = DOWN(current);
1669 				current_root = current;
1670 
1671 			} else {
1672 				/*
1673 				 * Though there are labels in common, the
1674 				 * entire name at this node is not common
1675 				 * with the search name so the search
1676 				 * name does not exist in the tree.
1677 				 */
1678 				INSIST(compared == dns_namereln_commonancestor
1679 				       || compared == dns_namereln_contains);
1680 
1681 				current = NULL;
1682 			}
1683 		}
1684 	}
1685 
1686 	/*
1687 	 * If current is not NULL, NOEXACT is not disallowing exact matches,
1688 	 * and either the node has data or an empty node is ok, return
1689 	 * ISC_R_SUCCESS to indicate an exact match.
1690 	 */
1691 	if (current != NULL && (options & DNS_RBTFIND_NOEXACT) == 0 &&
1692 	    (DATA(current) != NULL ||
1693 	     (options & DNS_RBTFIND_EMPTYDATA) != 0)) {
1694 		/*
1695 		 * Found an exact match.
1696 		 */
1697 		chain->end = current;
1698 		chain->level_matches = chain->level_count;
1699 
1700 		if (foundname != NULL)
1701 			result = chain_name(chain, foundname, ISC_TRUE);
1702 		else
1703 			result = ISC_R_SUCCESS;
1704 
1705 		if (result == ISC_R_SUCCESS) {
1706 			*node = current;
1707 			result = saved_result;
1708 		} else
1709 			*node = NULL;
1710 	} else {
1711 		/*
1712 		 * Did not find an exact match (or did not want one).
1713 		 */
1714 		if (*node != NULL) {
1715 			/*
1716 			 * ... but found a partially matching superdomain.
1717 			 * Unwind the chain to the partial match node
1718 			 * to set level_matches to the level above the node,
1719 			 * and then to derive the name.
1720 			 *
1721 			 * chain->level_count is guaranteed to be at least 1
1722 			 * here because by definition of finding a superdomain,
1723 			 * the chain is pointed to at least the first subtree.
1724 			 */
1725 			chain->level_matches = chain->level_count - 1;
1726 
1727 			while (chain->levels[chain->level_matches] != *node) {
1728 				INSIST(chain->level_matches > 0);
1729 				chain->level_matches--;
1730 			}
1731 
1732 			if (foundname != NULL) {
1733 				unsigned int saved_count = chain->level_count;
1734 
1735 				chain->level_count = chain->level_matches + 1;
1736 
1737 				result = chain_name(chain, foundname,
1738 						    ISC_FALSE);
1739 
1740 				chain->level_count = saved_count;
1741 			} else
1742 				result = ISC_R_SUCCESS;
1743 
1744 			if (result == ISC_R_SUCCESS)
1745 				result = DNS_R_PARTIALMATCH;
1746 
1747 		} else
1748 			result = ISC_R_NOTFOUND;
1749 
1750 		if (current != NULL) {
1751 			/*
1752 			 * There was an exact match but either
1753 			 * DNS_RBTFIND_NOEXACT was set, or
1754 			 * DNS_RBTFIND_EMPTYDATA was set and the node had no
1755 			 * data.  A policy decision was made to set the
1756 			 * chain to the exact match, but this is subject
1757 			 * to change if it becomes apparent that something
1758 			 * else would be more useful.  It is important that
1759 			 * this case is handled here, because the predecessor
1760 			 * setting code below assumes the match was not exact.
1761 			 */
1762 			INSIST(((options & DNS_RBTFIND_NOEXACT) != 0) ||
1763 			       ((options & DNS_RBTFIND_EMPTYDATA) == 0 &&
1764 				DATA(current) == NULL));
1765 			chain->end = current;
1766 
1767 		} else if ((options & DNS_RBTFIND_NOPREDECESSOR) != 0) {
1768 			/*
1769 			 * Ensure the chain points nowhere.
1770 			 */
1771 			chain->end = NULL;
1772 
1773 		} else {
1774 			/*
1775 			 * Since there was no exact match, the chain argument
1776 			 * needs to be pointed at the DNSSEC predecessor of
1777 			 * the search name.
1778 			 */
1779 			if (compared == dns_namereln_subdomain) {
1780 				/*
1781 				 * Attempted to follow a down pointer that was
1782 				 * NULL, which means the searched for name was
1783 				 * a subdomain of a terminal name in the tree.
1784 				 * Since there are no existing subdomains to
1785 				 * order against, the terminal name is the
1786 				 * predecessor.
1787 				 */
1788 				INSIST(chain->level_count > 0);
1789 				INSIST(chain->level_matches <
1790 				       chain->level_count);
1791 				chain->end =
1792 					chain->levels[--chain->level_count];
1793 
1794 			} else {
1795 				isc_result_t result2;
1796 
1797 				/*
1798 				 * Point current to the node that stopped
1799 				 * the search.
1800 				 *
1801 				 * With the hashing modification that has been
1802 				 * added to the algorithm, the stop node of a
1803 				 * standard binary search is not known.  So it
1804 				 * has to be found.  There is probably a more
1805 				 * clever way of doing this.
1806 				 *
1807 				 * The assignment of current to NULL when
1808 				 * the relationship is *not* dns_namereln_none,
1809 				 * even though it later gets set to the same
1810 				 * last_compared anyway, is simply to not push
1811 				 * the while loop in one more level of
1812 				 * indentation.
1813 				 */
1814 				if (compared == dns_namereln_none)
1815 					current = last_compared;
1816 				else
1817 					current = NULL;
1818 
1819 				while (current != NULL) {
1820 					NODENAME(current, &current_name);
1821 					compared = dns_name_fullcompare(
1822 								search_name,
1823 								&current_name,
1824 								&order,
1825 								&common_labels);
1826 					POST(compared);
1827 
1828 					last_compared = current;
1829 
1830 					/*
1831 					 * Standard binary search movement.
1832 					 */
1833 					if (order < 0)
1834 						current = LEFT(current);
1835 					else
1836 						current = RIGHT(current);
1837 
1838 				}
1839 
1840 				current = last_compared;
1841 
1842 				/*
1843 				 * Reached a point within a level tree that
1844 				 * positively indicates the name is not
1845 				 * present, but the stop node could be either
1846 				 * less than the desired name (order > 0) or
1847 				 * greater than the desired name (order < 0).
1848 				 *
1849 				 * If the stop node is less, it is not
1850 				 * necessarily the predecessor.  If the stop
1851 				 * node has a down pointer, then the real
1852 				 * predecessor is at the end of a level below
1853 				 * (not necessarily the next level).
1854 				 * Move down levels until the rightmost node
1855 				 * does not have a down pointer.
1856 				 *
1857 				 * When the stop node is greater, it is
1858 				 * the successor.  All the logic for finding
1859 				 * the predecessor is handily encapsulated
1860 				 * in dns_rbtnodechain_prev.  In the event
1861 				 * that the search name is less than anything
1862 				 * else in the tree, the chain is reset.
1863 				 * XXX DCL What is the best way for the caller
1864 				 *         to know that the search name has
1865 				 *         no predecessor?
1866 				 */
1867 
1868 
1869 				if (order > 0) {
1870 					if (DOWN(current) != NULL) {
1871 						ADD_LEVEL(chain, current);
1872 
1873 						result2 =
1874 						      move_chain_to_last(chain,
1875 								DOWN(current));
1876 
1877 						if (result2 != ISC_R_SUCCESS)
1878 							result = result2;
1879 					} else
1880 						/*
1881 						 * Ah, the pure and simple
1882 						 * case.  The stop node is the
1883 						 * predecessor.
1884 						 */
1885 						chain->end = current;
1886 
1887 				} else {
1888 					INSIST(order < 0);
1889 
1890 					chain->end = current;
1891 
1892 					result2 = dns_rbtnodechain_prev(chain,
1893 									NULL,
1894 									NULL);
1895 					if (result2 == ISC_R_SUCCESS ||
1896 					    result2 == DNS_R_NEWORIGIN)
1897 						;       /* Nothing. */
1898 					else if (result2 == ISC_R_NOMORE)
1899 						/*
1900 						 * There is no predecessor.
1901 						 */
1902 						dns_rbtnodechain_reset(chain);
1903 					else
1904 						result = result2;
1905 				}
1906 
1907 			}
1908 		}
1909 	}
1910 
1911 	ENSURE(*node == NULL || DNS_RBTNODE_VALID(*node));
1912 
1913 	return (result);
1914 }
1915 
1916 /*
1917  * Get the data pointer associated with 'name'.
1918  */
1919 isc_result_t
1920 dns_rbt_findname(dns_rbt_t *rbt, dns_name_t *name, unsigned int options,
1921 		 dns_name_t *foundname, void **data) {
1922 	dns_rbtnode_t *node = NULL;
1923 	isc_result_t result;
1924 
1925 	REQUIRE(data != NULL && *data == NULL);
1926 
1927 	result = dns_rbt_findnode(rbt, name, foundname, &node, NULL,
1928 				  options, NULL, NULL);
1929 
1930 	if (node != NULL &&
1931 	    (DATA(node) != NULL || (options & DNS_RBTFIND_EMPTYDATA) != 0))
1932 		*data = DATA(node);
1933 	else
1934 		result = ISC_R_NOTFOUND;
1935 
1936 	return (result);
1937 }
1938 
1939 /*
1940  * Delete a name from the tree of trees.
1941  */
1942 isc_result_t
1943 dns_rbt_deletename(dns_rbt_t *rbt, dns_name_t *name, isc_boolean_t recurse) {
1944 	dns_rbtnode_t *node = NULL;
1945 	isc_result_t result;
1946 
1947 	REQUIRE(VALID_RBT(rbt));
1948 	REQUIRE(dns_name_isabsolute(name));
1949 
1950 	/*
1951 	 * First, find the node.
1952 	 *
1953 	 * When searching, the name might not have an exact match:
1954 	 * consider a.b.a.com, b.b.a.com and c.b.a.com as the only
1955 	 * elements of a tree, which would make layer 1 a single
1956 	 * node tree of "b.a.com" and layer 2 a three node tree of
1957 	 * a, b, and c.  Deleting a.com would find only a partial depth
1958 	 * match in the first layer.  Should it be a requirement that
1959 	 * that the name to be deleted have data?  For now, it is.
1960 	 *
1961 	 * ->dirty, ->locknum and ->references are ignored; they are
1962 	 * solely the province of rbtdb.c.
1963 	 */
1964 	result = dns_rbt_findnode(rbt, name, NULL, &node, NULL,
1965 				  DNS_RBTFIND_NOOPTIONS, NULL, NULL);
1966 
1967 	if (result == ISC_R_SUCCESS) {
1968 		if (DATA(node) != NULL)
1969 			result = dns_rbt_deletenode(rbt, node, recurse);
1970 		else
1971 			result = ISC_R_NOTFOUND;
1972 
1973 	} else if (result == DNS_R_PARTIALMATCH)
1974 		result = ISC_R_NOTFOUND;
1975 
1976 	return (result);
1977 }
1978 
1979 /*
1980  * Remove a node from the tree of trees.
1981  *
1982  * NOTE WELL: deletion is *not* symmetric with addition; that is, reversing
1983  * a sequence of additions to be deletions will not generally get the
1984  * tree back to the state it started in.  For example, if the addition
1985  * of "b.c" caused the node "a.b.c" to be split, pushing "a" to its own level,
1986  * then the subsequent deletion of "b.c" will not cause "a" to be pulled up,
1987  * restoring "a.b.c".  The RBT *used* to do this kind of rejoining, but it
1988  * turned out to be a bad idea because it could corrupt an active nodechain
1989  * that had "b.c" as one of its levels -- and the RBT has no idea what
1990  * nodechains are in use by callers, so it can't even *try* to helpfully
1991  * fix them up (which would probably be doomed to failure anyway).
1992  *
1993  * Similarly, it is possible to leave the tree in a state where a supposedly
1994  * deleted node still exists.  The first case of this is obvious; take
1995  * the tree which has "b.c" on one level, pointing to "a".  Now deleted "b.c".
1996  * It was just established in the previous paragraph why we can't pull "a"
1997  * back up to its parent level.  But what happens when "a" then gets deleted?
1998  * "b.c" is left hanging around without data or children.  This condition
1999  * is actually pretty easy to detect, but ... should it really be removed?
2000  * Is a chain pointing to it?  An iterator?  Who knows!  (Note that the
2001  * references structure member cannot be looked at because it is private to
2002  * rbtdb.)  This is ugly and makes me unhappy, but after hours of trying to
2003  * make it more aesthetically proper and getting nowhere, this is the way it
2004  * is going to stay until such time as it proves to be a *real* problem.
2005  *
2006  * Finally, for reference, note that the original routine that did node
2007  * joining was called join_nodes().  It has been excised, living now only
2008  * in the CVS history, but comments have been left behind that point to it just
2009  * in case someone wants to muck with this some more.
2010  *
2011  * The one positive aspect of all of this is that joining used to have a
2012  * case where it might fail.  Without trying to join, now this function always
2013  * succeeds. It still returns isc_result_t, though, so the API wouldn't change.
2014  */
2015 isc_result_t
2016 dns_rbt_deletenode(dns_rbt_t *rbt, dns_rbtnode_t *node, isc_boolean_t recurse)
2017 {
2018 	dns_rbtnode_t *parent;
2019 
2020 	REQUIRE(VALID_RBT(rbt));
2021 	REQUIRE(DNS_RBTNODE_VALID(node));
2022 	INSIST(rbt->nodecount != 0);
2023 
2024 	if (DOWN(node) != NULL) {
2025 		if (recurse)
2026 			RUNTIME_CHECK(deletetree(rbt, DOWN(node))
2027 				      == ISC_R_SUCCESS);
2028 		else {
2029 			if (DATA(node) != NULL && rbt->data_deleter != NULL)
2030 				rbt->data_deleter(DATA(node), rbt->deleter_arg);
2031 			DATA(node) = NULL;
2032 
2033 			/*
2034 			 * Since there is at least one node below this one and
2035 			 * no recursion was requested, the deletion is
2036 			 * complete.  The down node from this node might be all
2037 			 * by itself on a single level, so join_nodes() could
2038 			 * be used to collapse the tree (with all the caveats
2039 			 * of the comment at the start of this function).
2040 			 */
2041 			return (ISC_R_SUCCESS);
2042 		}
2043 	}
2044 
2045 	/*
2046 	 * Note the node that points to the level of the node that is being
2047 	 * deleted.  If the deleted node is the top level, parent will be set
2048 	 * to NULL.
2049 	 */
2050 	parent = get_upper_node(node);
2051 
2052 	/*
2053 	 * This node now has no down pointer (either because it didn't
2054 	 * have one to start, or because it was recursively removed).
2055 	 * So now the node needs to be removed from this level.
2056 	 */
2057 	deletefromlevel(node, parent == NULL ? &rbt->root : &DOWN(parent));
2058 
2059 	if (DATA(node) != NULL && rbt->data_deleter != NULL)
2060 		rbt->data_deleter(DATA(node), rbt->deleter_arg);
2061 
2062 	unhash_node(rbt, node);
2063 #if DNS_RBT_USEMAGIC
2064 	node->magic = 0;
2065 #endif
2066 	dns_rbtnode_refdestroy(node);
2067 
2068 	freenode(rbt, &node);
2069 
2070 	/*
2071 	 * There are now two special cases that can exist that would
2072 	 * not have existed if the tree had been created using only
2073 	 * the names that now exist in it.  (This is all related to
2074 	 * join_nodes() as described in this function's introductory comment.)
2075 	 * Both cases exist when the deleted node's parent (the node
2076 	 * that pointed to the deleted node's level) is not null but
2077 	 * it has no data:  parent != NULL && DATA(parent) == NULL.
2078 	 *
2079 	 * The first case is that the deleted node was the last on its level:
2080 	 * DOWN(parent) == NULL.  This case can only exist if the parent was
2081 	 * previously deleted -- and so now, apparently, the parent should go
2082 	 * away.  That can't be done though because there might be external
2083 	 * references to it, such as through a nodechain.
2084 	 *
2085 	 * The other case also involves a parent with no data, but with the
2086 	 * deleted node being the next-to-last node instead of the last:
2087 	 * LEFT(DOWN(parent)) == NULL && RIGHT(DOWN(parent)) == NULL.
2088 	 * Presumably now the remaining node on the level should be joined
2089 	 * with the parent, but it's already been described why that can't be
2090 	 * done.
2091 	 */
2092 
2093 	/*
2094 	 * This function never fails.
2095 	 */
2096 	return (ISC_R_SUCCESS);
2097 }
2098 
2099 void
2100 dns_rbt_namefromnode(dns_rbtnode_t *node, dns_name_t *name) {
2101 
2102 	REQUIRE(DNS_RBTNODE_VALID(node));
2103 	REQUIRE(name != NULL);
2104 	REQUIRE(name->offsets == NULL);
2105 
2106 	NODENAME(node, name);
2107 }
2108 
2109 isc_result_t
2110 dns_rbt_fullnamefromnode(dns_rbtnode_t *node, dns_name_t *name) {
2111 	dns_name_t current;
2112 	isc_result_t result;
2113 
2114 	REQUIRE(DNS_RBTNODE_VALID(node));
2115 	REQUIRE(name != NULL);
2116 	REQUIRE(name->buffer != NULL);
2117 
2118 	dns_name_init(&current, NULL);
2119 	dns_name_reset(name);
2120 
2121 	do {
2122 		INSIST(node != NULL);
2123 
2124 		NODENAME(node, &current);
2125 
2126 		result = dns_name_concatenate(name, &current, name, NULL);
2127 		if (result != ISC_R_SUCCESS)
2128 			break;
2129 
2130 		node = get_upper_node(node);
2131 	} while (! dns_name_isabsolute(name));
2132 
2133 	return (result);
2134 }
2135 
2136 char *
2137 dns_rbt_formatnodename(dns_rbtnode_t *node, char *printname, unsigned int size)
2138 {
2139 	dns_fixedname_t fixedname;
2140 	dns_name_t *name;
2141 	isc_result_t result;
2142 
2143 	REQUIRE(DNS_RBTNODE_VALID(node));
2144 	REQUIRE(printname != NULL);
2145 
2146 	dns_fixedname_init(&fixedname);
2147 	name = dns_fixedname_name(&fixedname);
2148 	result = dns_rbt_fullnamefromnode(node, name);
2149 	if (result == ISC_R_SUCCESS)
2150 		dns_name_format(name, printname, size);
2151 	else
2152 		snprintf(printname, size, "<error building name: %s>",
2153 			 dns_result_totext(result));
2154 
2155 	return (printname);
2156 }
2157 
2158 static isc_result_t
2159 create_node(isc_mem_t *mctx, dns_name_t *name, dns_rbtnode_t **nodep) {
2160 	dns_rbtnode_t *node;
2161 	isc_region_t region;
2162 	unsigned int labels;
2163 	size_t nodelen;
2164 
2165 	REQUIRE(name->offsets != NULL);
2166 
2167 	dns_name_toregion(name, &region);
2168 	labels = dns_name_countlabels(name);
2169 	ENSURE(labels > 0);
2170 
2171 	/*
2172 	 * Allocate space for the node structure, the name, and the offsets.
2173 	 */
2174 	nodelen = sizeof(dns_rbtnode_t) + region.length + labels + 1;
2175 	node = (dns_rbtnode_t *)isc_mem_get(mctx, nodelen);
2176 	if (node == NULL)
2177 		return (ISC_R_NOMEMORY);
2178 	memset(node, 0, nodelen);
2179 
2180 	node->is_root = 0;
2181 	PARENT(node) = NULL;
2182 	RIGHT(node) = NULL;
2183 	LEFT(node) = NULL;
2184 	DOWN(node) = NULL;
2185 	DATA(node) = NULL;
2186 	node->is_mmapped = 0;
2187 	node->down_is_relative = 0;
2188 	node->left_is_relative = 0;
2189 	node->right_is_relative = 0;
2190 	node->parent_is_relative = 0;
2191 	node->data_is_relative = 0;
2192 	node->rpz = 0;
2193 
2194 #ifdef DNS_RBT_USEHASH
2195 	HASHNEXT(node) = NULL;
2196 	HASHVAL(node) = 0;
2197 #endif
2198 
2199 	ISC_LINK_INIT(node, deadlink);
2200 
2201 	LOCKNUM(node) = 0;
2202 	WILD(node) = 0;
2203 	DIRTY(node) = 0;
2204 	dns_rbtnode_refinit(node, 0);
2205 	node->find_callback = 0;
2206 	node->nsec = DNS_RBT_NSEC_NORMAL;
2207 
2208 	MAKE_BLACK(node);
2209 
2210 	/*
2211 	 * The following is stored to make reconstructing a name from the
2212 	 * stored value in the node easy:  the length of the name, the number
2213 	 * of labels, whether the name is absolute or not, the name itself,
2214 	 * and the name's offsets table.
2215 	 *
2216 	 * XXX RTH
2217 	 *      The offsets table could be made smaller by eliminating the
2218 	 *      first offset, which is always 0.  This requires changes to
2219 	 *      lib/dns/name.c.
2220 	 *
2221 	 * Note: OLDOFFSETLEN *must* be assigned *after* OLDNAMELEN is assigned
2222 	 * 	 as it uses OLDNAMELEN.
2223 	 */
2224 	OLDNAMELEN(node) = NAMELEN(node) = region.length;
2225 	OLDOFFSETLEN(node) = OFFSETLEN(node) = labels;
2226 	ATTRS(node) = name->attributes;
2227 
2228 	memmove(NAME(node), region.base, region.length);
2229 	memmove(OFFSETS(node), name->offsets, labels);
2230 
2231 #if DNS_RBT_USEMAGIC
2232 	node->magic = DNS_RBTNODE_MAGIC;
2233 #endif
2234 	*nodep = node;
2235 
2236 	return (ISC_R_SUCCESS);
2237 }
2238 
2239 #ifdef DNS_RBT_USEHASH
2240 static inline void
2241 hash_add_node(dns_rbt_t *rbt, dns_rbtnode_t *node, dns_name_t *name) {
2242 	unsigned int hash;
2243 
2244 	REQUIRE(name != NULL);
2245 
2246 	HASHVAL(node) = dns_name_fullhash(name, ISC_FALSE);
2247 
2248 	hash = HASHVAL(node) % rbt->hashsize;
2249 	HASHNEXT(node) = rbt->hashtable[hash];
2250 
2251 	rbt->hashtable[hash] = node;
2252 }
2253 
2254 static isc_result_t
2255 inithash(dns_rbt_t *rbt) {
2256 	unsigned int bytes;
2257 
2258 	rbt->hashsize = RBT_HASH_SIZE;
2259 	bytes = rbt->hashsize * sizeof(dns_rbtnode_t *);
2260 	rbt->hashtable = isc_mem_get(rbt->mctx, bytes);
2261 
2262 	if (rbt->hashtable == NULL)
2263 		return (ISC_R_NOMEMORY);
2264 
2265 	memset(rbt->hashtable, 0, bytes);
2266 
2267 	return (ISC_R_SUCCESS);
2268 }
2269 
2270 static void
2271 rehash(dns_rbt_t *rbt, unsigned int newcount) {
2272 	unsigned int oldsize;
2273 	dns_rbtnode_t **oldtable;
2274 	dns_rbtnode_t *node;
2275 	unsigned int hash;
2276 	unsigned int i;
2277 
2278 	oldsize = rbt->hashsize;
2279 	oldtable = rbt->hashtable;
2280 	do {
2281 		rbt->hashsize = rbt->hashsize * 2 + 1;
2282 	} while (newcount >= (rbt->hashsize * 3));
2283 	rbt->hashtable = isc_mem_get(rbt->mctx,
2284 				     rbt->hashsize * sizeof(dns_rbtnode_t *));
2285 	if (rbt->hashtable == NULL) {
2286 		rbt->hashtable = oldtable;
2287 		rbt->hashsize = oldsize;
2288 		return;
2289 	}
2290 
2291 	INSIST(rbt->hashsize > 0);
2292 
2293 	for (i = 0; i < rbt->hashsize; i++)
2294 		rbt->hashtable[i] = NULL;
2295 
2296 	for (i = 0; i < oldsize; i++) {
2297 		node = oldtable[i];
2298 		while (node != NULL) {
2299 			hash = HASHVAL(node) % rbt->hashsize;
2300 			oldtable[i] = HASHNEXT(node);
2301 			HASHNEXT(node) = rbt->hashtable[hash];
2302 			rbt->hashtable[hash] = node;
2303 			node = oldtable[i];
2304 		}
2305 	}
2306 
2307 	isc_mem_put(rbt->mctx, oldtable, oldsize * sizeof(dns_rbtnode_t *));
2308 }
2309 
2310 static inline void
2311 hash_node(dns_rbt_t *rbt, dns_rbtnode_t *node, dns_name_t *name) {
2312 	REQUIRE(DNS_RBTNODE_VALID(node));
2313 
2314 	if (rbt->nodecount >= (rbt->hashsize * 3))
2315 		rehash(rbt, rbt->nodecount);
2316 
2317 	hash_add_node(rbt, node, name);
2318 }
2319 
2320 static inline void
2321 unhash_node(dns_rbt_t *rbt, dns_rbtnode_t *node) {
2322 	unsigned int bucket;
2323 	dns_rbtnode_t *bucket_node;
2324 
2325 	REQUIRE(DNS_RBTNODE_VALID(node));
2326 
2327 	if (rbt->hashtable != NULL) {
2328 		bucket = HASHVAL(node) % rbt->hashsize;
2329 		bucket_node = rbt->hashtable[bucket];
2330 
2331 		if (bucket_node == node)
2332 			rbt->hashtable[bucket] = HASHNEXT(node);
2333 		else {
2334 			while (HASHNEXT(bucket_node) != node) {
2335 				INSIST(HASHNEXT(bucket_node) != NULL);
2336 				bucket_node = HASHNEXT(bucket_node);
2337 			}
2338 			HASHNEXT(bucket_node) = HASHNEXT(node);
2339 		}
2340 	}
2341 }
2342 #endif /* DNS_RBT_USEHASH */
2343 
2344 static inline void
2345 rotate_left(dns_rbtnode_t *node, dns_rbtnode_t **rootp) {
2346 	dns_rbtnode_t *child;
2347 
2348 	REQUIRE(DNS_RBTNODE_VALID(node));
2349 	REQUIRE(rootp != NULL);
2350 
2351 	child = RIGHT(node);
2352 	INSIST(child != NULL);
2353 
2354 	RIGHT(node) = LEFT(child);
2355 	if (LEFT(child) != NULL)
2356 		PARENT(LEFT(child)) = node;
2357 	LEFT(child) = node;
2358 
2359 	if (child != NULL)
2360 		PARENT(child) = PARENT(node);
2361 
2362 	if (IS_ROOT(node)) {
2363 		*rootp = child;
2364 		child->is_root = 1;
2365 		node->is_root = 0;
2366 
2367 	} else {
2368 		if (LEFT(PARENT(node)) == node)
2369 			LEFT(PARENT(node)) = child;
2370 		else
2371 			RIGHT(PARENT(node)) = child;
2372 	}
2373 
2374 	PARENT(node) = child;
2375 }
2376 
2377 static inline void
2378 rotate_right(dns_rbtnode_t *node, dns_rbtnode_t **rootp) {
2379 	dns_rbtnode_t *child;
2380 
2381 	REQUIRE(DNS_RBTNODE_VALID(node));
2382 	REQUIRE(rootp != NULL);
2383 
2384 	child = LEFT(node);
2385 	INSIST(child != NULL);
2386 
2387 	LEFT(node) = RIGHT(child);
2388 	if (RIGHT(child) != NULL)
2389 		PARENT(RIGHT(child)) = node;
2390 	RIGHT(child) = node;
2391 
2392 	if (child != NULL)
2393 		PARENT(child) = PARENT(node);
2394 
2395 	if (IS_ROOT(node)) {
2396 		*rootp = child;
2397 		child->is_root = 1;
2398 		node->is_root = 0;
2399 
2400 	} else {
2401 		if (LEFT(PARENT(node)) == node)
2402 			LEFT(PARENT(node)) = child;
2403 		else
2404 			RIGHT(PARENT(node)) = child;
2405 	}
2406 
2407 	PARENT(node) = child;
2408 }
2409 
2410 /*
2411  * This is the real workhorse of the insertion code, because it does the
2412  * true red/black tree on a single level.
2413  */
2414 static void
2415 addonlevel(dns_rbtnode_t *node, dns_rbtnode_t *current, int order,
2416 	   dns_rbtnode_t **rootp)
2417 {
2418 	dns_rbtnode_t *child, *root, *parent, *grandparent;
2419 	dns_name_t add_name, current_name;
2420 	dns_offsets_t add_offsets, current_offsets;
2421 
2422 	REQUIRE(rootp != NULL);
2423 	REQUIRE(DNS_RBTNODE_VALID(node) && LEFT(node) == NULL &&
2424 		RIGHT(node) == NULL);
2425 	REQUIRE(current != NULL);
2426 
2427 	root = *rootp;
2428 	if (root == NULL) {
2429 		/*
2430 		 * First node of a level.
2431 		 */
2432 		MAKE_BLACK(node);
2433 		node->is_root = 1;
2434 		PARENT(node) = current;
2435 		*rootp = node;
2436 		return;
2437 	}
2438 
2439 	child = root;
2440 	POST(child);
2441 
2442 	dns_name_init(&add_name, add_offsets);
2443 	NODENAME(node, &add_name);
2444 
2445 	dns_name_init(&current_name, current_offsets);
2446 	NODENAME(current, &current_name);
2447 
2448 	if (order < 0) {
2449 		INSIST(LEFT(current) == NULL);
2450 		LEFT(current) = node;
2451 	} else {
2452 		INSIST(RIGHT(current) == NULL);
2453 		RIGHT(current) = node;
2454 	}
2455 
2456 	INSIST(PARENT(node) == NULL);
2457 	PARENT(node) = current;
2458 
2459 	MAKE_RED(node);
2460 
2461 	while (node != root && IS_RED(PARENT(node))) {
2462 		/*
2463 		 * XXXDCL could do away with separate parent and grandparent
2464 		 * variables.  They are vestiges of the days before parent
2465 		 * pointers.  However, they make the code a little clearer.
2466 		 */
2467 
2468 		parent = PARENT(node);
2469 		grandparent = PARENT(parent);
2470 
2471 		if (parent == LEFT(grandparent)) {
2472 			child = RIGHT(grandparent);
2473 			if (child != NULL && IS_RED(child)) {
2474 				MAKE_BLACK(parent);
2475 				MAKE_BLACK(child);
2476 				MAKE_RED(grandparent);
2477 				node = grandparent;
2478 			} else {
2479 				if (node == RIGHT(parent)) {
2480 					rotate_left(parent, &root);
2481 					node = parent;
2482 					parent = PARENT(node);
2483 					grandparent = PARENT(parent);
2484 				}
2485 				MAKE_BLACK(parent);
2486 				MAKE_RED(grandparent);
2487 				rotate_right(grandparent, &root);
2488 			}
2489 		} else {
2490 			child = LEFT(grandparent);
2491 			if (child != NULL && IS_RED(child)) {
2492 				MAKE_BLACK(parent);
2493 				MAKE_BLACK(child);
2494 				MAKE_RED(grandparent);
2495 				node = grandparent;
2496 			} else {
2497 				if (node == LEFT(parent)) {
2498 					rotate_right(parent, &root);
2499 					node = parent;
2500 					parent = PARENT(node);
2501 					grandparent = PARENT(parent);
2502 				}
2503 				MAKE_BLACK(parent);
2504 				MAKE_RED(grandparent);
2505 				rotate_left(grandparent, &root);
2506 			}
2507 		}
2508 	}
2509 
2510 	MAKE_BLACK(root);
2511 	ENSURE(IS_ROOT(root));
2512 	*rootp = root;
2513 
2514 	return;
2515 }
2516 
2517 /*
2518  * This is the real workhorse of the deletion code, because it does the
2519  * true red/black tree on a single level.
2520  */
2521 static void
2522 deletefromlevel(dns_rbtnode_t *delete, dns_rbtnode_t **rootp) {
2523 	dns_rbtnode_t *child, *sibling, *parent;
2524 	dns_rbtnode_t *successor;
2525 
2526 	REQUIRE(delete != NULL);
2527 
2528 	/*
2529 	 * Verify that the parent history is (apparently) correct.
2530 	 */
2531 	INSIST((IS_ROOT(delete) && *rootp == delete) ||
2532 	       (! IS_ROOT(delete) &&
2533 		(LEFT(PARENT(delete)) == delete ||
2534 		 RIGHT(PARENT(delete)) == delete)));
2535 
2536 	child = NULL;
2537 
2538 	if (LEFT(delete) == NULL) {
2539 		if (RIGHT(delete) == NULL) {
2540 			if (IS_ROOT(delete)) {
2541 				/*
2542 				 * This is the only item in the tree.
2543 				 */
2544 				*rootp = NULL;
2545 				return;
2546 			}
2547 		} else
2548 			/*
2549 			 * This node has one child, on the right.
2550 			 */
2551 			child = RIGHT(delete);
2552 
2553 	} else if (RIGHT(delete) == NULL)
2554 		/*
2555 		 * This node has one child, on the left.
2556 		 */
2557 		child = LEFT(delete);
2558 	else {
2559 		dns_rbtnode_t holder, *tmp = &holder;
2560 
2561 		/*
2562 		 * This node has two children, so it cannot be directly
2563 		 * deleted.  Find its immediate in-order successor and
2564 		 * move it to this location, then do the deletion at the
2565 		 * old site of the successor.
2566 		 */
2567 		successor = RIGHT(delete);
2568 		while (LEFT(successor) != NULL)
2569 			successor = LEFT(successor);
2570 
2571 		/*
2572 		 * The successor cannot possibly have a left child;
2573 		 * if there is any child, it is on the right.
2574 		 */
2575 		if (RIGHT(successor) != NULL)
2576 			child = RIGHT(successor);
2577 
2578 		/*
2579 		 * Swap the two nodes; it would be simpler to just replace
2580 		 * the value being deleted with that of the successor,
2581 		 * but this rigamarole is done so the caller has complete
2582 		 * control over the pointers (and memory allocation) of
2583 		 * all of nodes.  If just the key value were removed from
2584 		 * the tree, the pointer to the node would be unchanged.
2585 		 */
2586 
2587 		/*
2588 		 * First, put the successor in the tree location of the
2589 		 * node to be deleted.  Save its existing tree pointer
2590 		 * information, which will be needed when linking up
2591 		 * delete to the successor's old location.
2592 		 */
2593 		memmove(tmp, successor, sizeof(dns_rbtnode_t));
2594 
2595 		if (IS_ROOT(delete)) {
2596 			*rootp = successor;
2597 			successor->is_root = ISC_TRUE;
2598 			delete->is_root = ISC_FALSE;
2599 
2600 		} else
2601 			if (LEFT(PARENT(delete)) == delete)
2602 				LEFT(PARENT(delete)) = successor;
2603 			else
2604 				RIGHT(PARENT(delete)) = successor;
2605 
2606 		PARENT(successor) = PARENT(delete);
2607 		LEFT(successor)   = LEFT(delete);
2608 		RIGHT(successor)  = RIGHT(delete);
2609 		COLOR(successor)  = COLOR(delete);
2610 
2611 		if (LEFT(successor) != NULL)
2612 			PARENT(LEFT(successor)) = successor;
2613 		if (RIGHT(successor) != successor)
2614 			PARENT(RIGHT(successor)) = successor;
2615 
2616 		/*
2617 		 * Now relink the node to be deleted into the
2618 		 * successor's previous tree location.  PARENT(tmp)
2619 		 * is the successor's original parent.
2620 		 */
2621 		INSIST(! IS_ROOT(delete));
2622 
2623 		if (PARENT(tmp) == delete) {
2624 			/*
2625 			 * Node being deleted was successor's parent.
2626 			 */
2627 			RIGHT(successor) = delete;
2628 			PARENT(delete) = successor;
2629 
2630 		} else {
2631 			LEFT(PARENT(tmp)) = delete;
2632 			PARENT(delete) = PARENT(tmp);
2633 		}
2634 
2635 		/*
2636 		 * Original location of successor node has no left.
2637 		 */
2638 		LEFT(delete)   = NULL;
2639 		RIGHT(delete)  = RIGHT(tmp);
2640 		COLOR(delete)  = COLOR(tmp);
2641 	}
2642 
2643 	/*
2644 	 * Remove the node by removing the links from its parent.
2645 	 */
2646 	if (! IS_ROOT(delete)) {
2647 		if (LEFT(PARENT(delete)) == delete)
2648 			LEFT(PARENT(delete)) = child;
2649 		else
2650 			RIGHT(PARENT(delete)) = child;
2651 
2652 		if (child != NULL)
2653 			PARENT(child) = PARENT(delete);
2654 
2655 	} else {
2656 		/*
2657 		 * This is the root being deleted, and at this point
2658 		 * it is known to have just one child.
2659 		 */
2660 		*rootp = child;
2661 		child->is_root = 1;
2662 		PARENT(child) = PARENT(delete);
2663 	}
2664 
2665 	/*
2666 	 * Fix color violations.
2667 	 */
2668 	if (IS_BLACK(delete)) {
2669 		parent = PARENT(delete);
2670 
2671 		while (child != *rootp && IS_BLACK(child)) {
2672 			INSIST(child == NULL || ! IS_ROOT(child));
2673 
2674 			if (LEFT(parent) == child) {
2675 				sibling = RIGHT(parent);
2676 
2677 				if (IS_RED(sibling)) {
2678 					MAKE_BLACK(sibling);
2679 					MAKE_RED(parent);
2680 					rotate_left(parent, rootp);
2681 					sibling = RIGHT(parent);
2682 				}
2683 
2684 				INSIST(sibling != NULL);
2685 
2686 				if (IS_BLACK(LEFT(sibling)) &&
2687 				    IS_BLACK(RIGHT(sibling))) {
2688 					MAKE_RED(sibling);
2689 					child = parent;
2690 
2691 				} else {
2692 
2693 					if (IS_BLACK(RIGHT(sibling))) {
2694 						MAKE_BLACK(LEFT(sibling));
2695 						MAKE_RED(sibling);
2696 						rotate_right(sibling, rootp);
2697 						sibling = RIGHT(parent);
2698 					}
2699 
2700 					COLOR(sibling) = COLOR(parent);
2701 					MAKE_BLACK(parent);
2702 					INSIST(RIGHT(sibling) != NULL);
2703 					MAKE_BLACK(RIGHT(sibling));
2704 					rotate_left(parent, rootp);
2705 					child = *rootp;
2706 				}
2707 
2708 			} else {
2709 				/*
2710 				 * Child is parent's right child.
2711 				 * Everything is done the same as above,
2712 				 * except mirrored.
2713 				 */
2714 				sibling = LEFT(parent);
2715 
2716 				if (IS_RED(sibling)) {
2717 					MAKE_BLACK(sibling);
2718 					MAKE_RED(parent);
2719 					rotate_right(parent, rootp);
2720 					sibling = LEFT(parent);
2721 				}
2722 
2723 				INSIST(sibling != NULL);
2724 
2725 				if (IS_BLACK(LEFT(sibling)) &&
2726 				    IS_BLACK(RIGHT(sibling))) {
2727 					MAKE_RED(sibling);
2728 					child = parent;
2729 
2730 				} else {
2731 					if (IS_BLACK(LEFT(sibling))) {
2732 						MAKE_BLACK(RIGHT(sibling));
2733 						MAKE_RED(sibling);
2734 						rotate_left(sibling, rootp);
2735 						sibling = LEFT(parent);
2736 					}
2737 
2738 					COLOR(sibling) = COLOR(parent);
2739 					MAKE_BLACK(parent);
2740 					INSIST(LEFT(sibling) != NULL);
2741 					MAKE_BLACK(LEFT(sibling));
2742 					rotate_right(parent, rootp);
2743 					child = *rootp;
2744 				}
2745 			}
2746 
2747 			parent = PARENT(child);
2748 		}
2749 
2750 		if (IS_RED(child))
2751 			MAKE_BLACK(child);
2752 	}
2753 }
2754 
2755 /*
2756  * This should only be used on the root of a tree, because no color fixup
2757  * is done at all.
2758  *
2759  * NOTE: No root pointer maintenance is done, because the function is only
2760  * used for two cases:
2761  * + deleting everything DOWN from a node that is itself being deleted, and
2762  * + deleting the entire tree of trees from dns_rbt_destroy.
2763  * In each case, the root pointer is no longer relevant, so there
2764  * is no need for a root parameter to this function.
2765  *
2766  * If the function is ever intended to be used to delete something where
2767  * a pointer needs to be told that this tree no longer exists,
2768  * this function would need to adjusted accordingly.
2769  */
2770 static isc_result_t
2771 deletetree(dns_rbt_t *rbt, dns_rbtnode_t *node) {
2772 	isc_result_t result = ISC_R_SUCCESS;
2773 
2774 	REQUIRE(VALID_RBT(rbt));
2775 
2776 	if (node == NULL)
2777 		return (result);
2778 
2779 	if (LEFT(node) != NULL) {
2780 		result = deletetree(rbt, LEFT(node));
2781 		if (result != ISC_R_SUCCESS)
2782 			goto done;
2783 		LEFT(node) = NULL;
2784 	}
2785 	if (RIGHT(node) != NULL) {
2786 		result = deletetree(rbt, RIGHT(node));
2787 		if (result != ISC_R_SUCCESS)
2788 			goto done;
2789 		RIGHT(node) = NULL;
2790 	}
2791 	if (DOWN(node) != NULL) {
2792 		result = deletetree(rbt, DOWN(node));
2793 		if (result != ISC_R_SUCCESS)
2794 			goto done;
2795 		DOWN(node) = NULL;
2796 	}
2797  done:
2798 	if (result != ISC_R_SUCCESS)
2799 		return (result);
2800 
2801 	if (DATA(node) != NULL && rbt->data_deleter != NULL)
2802 		rbt->data_deleter(DATA(node), rbt->deleter_arg);
2803 
2804 	unhash_node(rbt, node);
2805 #if DNS_RBT_USEMAGIC
2806 	node->magic = 0;
2807 #endif
2808 
2809 	freenode(rbt, &node);
2810 	return (result);
2811 }
2812 
2813 static void
2814 freenode(dns_rbt_t *rbt, dns_rbtnode_t **nodep) {
2815 	dns_rbtnode_t *node = *nodep;
2816 
2817 	if (node->is_mmapped == 0) {
2818 		isc_mem_put(rbt->mctx, node, NODE_SIZE(node));
2819 	}
2820 	*nodep = NULL;
2821 
2822 	rbt->nodecount--;
2823 }
2824 
2825 static void
2826 deletetreeflat(dns_rbt_t *rbt, unsigned int quantum, dns_rbtnode_t **nodep) {
2827 	dns_rbtnode_t *parent;
2828 	dns_rbtnode_t *node = *nodep;
2829 
2830 	REQUIRE(VALID_RBT(rbt));
2831 
2832  again:
2833 	if (node == NULL) {
2834 		*nodep = NULL;
2835 		return;
2836 	}
2837 
2838  traverse:
2839 	if (LEFT(node) != NULL) {
2840 		node = LEFT(node);
2841 		goto traverse;
2842 	}
2843 	if (DOWN(node) != NULL) {
2844 		node = DOWN(node);
2845 		goto traverse;
2846 	}
2847 
2848 	if (DATA(node) != NULL && rbt->data_deleter != NULL)
2849 		rbt->data_deleter(DATA(node), rbt->deleter_arg);
2850 
2851 	/*
2852 	 * Note: we don't call unhash_node() here as we are destroying
2853 	 * the complete rbt tree.
2854 	 */
2855 #if DNS_RBT_USEMAGIC
2856 	node->magic = 0;
2857 #endif
2858 	parent = PARENT(node);
2859 	if (RIGHT(node) != NULL)
2860 		PARENT(RIGHT(node)) = parent;
2861 	if (parent != NULL) {
2862 		if (LEFT(parent) == node)
2863 			LEFT(parent) = RIGHT(node);
2864 		else if (DOWN(parent) == node)
2865 			DOWN(parent) = RIGHT(node);
2866 	} else
2867 		parent = RIGHT(node);
2868 
2869 	freenode(rbt, &node);
2870 
2871 	node = parent;
2872 	if (quantum != 0 && --quantum == 0) {
2873 		*nodep = node;
2874 		return;
2875 	}
2876 	goto again;
2877 }
2878 
2879 static size_t
2880 getheight_helper(dns_rbtnode_t *node) {
2881 	size_t dl, dr;
2882 	size_t this_height, down_height;
2883 
2884 	if (node == NULL)
2885 		return (0);
2886 
2887 	dl = getheight_helper(LEFT(node));
2888 	dr = getheight_helper(RIGHT(node));
2889 
2890 	this_height = ISC_MAX(dl + 1, dr + 1);
2891 	down_height = getheight_helper(DOWN(node));
2892 
2893 	return (ISC_MAX(this_height, down_height));
2894 }
2895 
2896 size_t
2897 dns__rbt_getheight(dns_rbt_t *rbt) {
2898 	return (getheight_helper(rbt->root));
2899 }
2900 
2901 static isc_boolean_t
2902 check_properties_helper(dns_rbtnode_t *node) {
2903 	if (node == NULL)
2904 		return (ISC_TRUE);
2905 
2906 	if (IS_RED(node)) {
2907 		/* Root nodes must be BLACK. */
2908 		if (IS_ROOT(node))
2909 			return (ISC_FALSE);
2910 
2911 		/* Both children of RED nodes must be BLACK. */
2912 		if (IS_RED(LEFT(node)) || IS_RED(RIGHT(node)))
2913 			return (ISC_FALSE);
2914 	}
2915 
2916 	/* If node is assigned to the down_ pointer of its parent, it is
2917 	 * a subtree root and must have the flag set.
2918 	 */
2919 	if (((!PARENT(node)) ||
2920 	     (DOWN(PARENT(node)) == node)) &&
2921 	    (!IS_ROOT(node)))
2922 	{
2923 		return (ISC_FALSE);
2924 	}
2925 
2926 	/* Repeat tests with this node's children. */
2927 	return (check_properties_helper(LEFT(node)) &&
2928 		check_properties_helper(RIGHT(node)) &&
2929 		check_properties_helper(DOWN(node)));
2930 }
2931 
2932 static isc_boolean_t
2933 check_black_distance_helper(dns_rbtnode_t *node, size_t *distance) {
2934 	size_t dl, dr, dd;
2935 
2936 	if (node == NULL) {
2937 		*distance = 1;
2938 		return (ISC_TRUE);
2939 	}
2940 
2941 	if (!check_black_distance_helper(LEFT(node), &dl))
2942 		return (ISC_FALSE);
2943 
2944 	if (!check_black_distance_helper(RIGHT(node), &dr))
2945 		return (ISC_FALSE);
2946 
2947 	if (!check_black_distance_helper(DOWN(node), &dd))
2948 		return (ISC_FALSE);
2949 
2950 	/* Left and right side black node counts must match. */
2951 	if (dl != dr)
2952 		return (ISC_FALSE);
2953 
2954 	if (IS_BLACK(node))
2955 		dl++;
2956 
2957 	*distance = dl;
2958 
2959 	return (ISC_TRUE);
2960 }
2961 
2962 isc_boolean_t
2963 dns__rbt_checkproperties(dns_rbt_t *rbt) {
2964 	size_t dd;
2965 
2966 	if (!check_properties_helper(rbt->root))
2967 		return (ISC_FALSE);
2968 
2969 	/* Path from a given node to all its leaves must contain the
2970 	 * same number of BLACK child nodes. This is done separately
2971 	 * here instead of inside check_properties_helper() as
2972 	 * it would take (n log n) complexity otherwise.
2973 	 */
2974 	return (check_black_distance_helper(rbt->root, &dd));
2975 }
2976 
2977 static void
2978 dns_rbt_indent(FILE *f, int depth) {
2979 	int i;
2980 
2981 	fprintf(f, "%4d ", depth);
2982 
2983 	for (i = 0; i < depth; i++)
2984 		fprintf(f, "- ");
2985 }
2986 
2987 void
2988 dns_rbt_printnodeinfo(dns_rbtnode_t *n, FILE *f) {
2989 	fprintf(f, "Node info for nodename: ");
2990 	printnodename(n, ISC_TRUE, f);
2991 	fprintf(f, "\n");
2992 
2993 	fprintf(f, "n = %p\n", n);
2994 
2995 	fprintf(f, "Relative pointers: %s%s%s%s%s\n",
2996 			(n->parent_is_relative == 1 ? " P" : ""),
2997 			(n->right_is_relative == 1 ? " R" : ""),
2998 			(n->left_is_relative == 1 ? " L" : ""),
2999 			(n->down_is_relative == 1 ? " D" : ""),
3000 			(n->data_is_relative == 1 ? " T" : ""));
3001 
3002 	fprintf(f, "node lock address = %d\n", n->locknum);
3003 
3004 	fprintf(f, "Parent: %p\n", n->parent);
3005 	fprintf(f, "Right: %p\n", n->right);
3006 	fprintf(f, "Left: %p\n", n->left);
3007 	fprintf(f, "Down: %p\n", n->down);
3008 	fprintf(f, "daTa: %p\n", n->data);
3009 }
3010 
3011 static void
3012 printnodename(dns_rbtnode_t *node, isc_boolean_t quoted, FILE *f) {
3013 	isc_region_t r;
3014 	dns_name_t name;
3015 	char buffer[DNS_NAME_FORMATSIZE];
3016 	dns_offsets_t offsets;
3017 
3018 	r.length = NAMELEN(node);
3019 	r.base = NAME(node);
3020 
3021 	dns_name_init(&name, offsets);
3022 	dns_name_fromregion(&name, &r);
3023 
3024 	dns_name_format(&name, buffer, sizeof(buffer));
3025 
3026 	if (quoted)
3027 		fprintf(f, "\"%s\"", buffer);
3028 	else
3029 		fprintf(f, "%s", buffer);
3030 }
3031 
3032 static void
3033 print_text_helper(dns_rbtnode_t *root, dns_rbtnode_t *parent,
3034 		  int depth, const char *direction,
3035 		  void (*data_printer)(FILE *, void *), FILE *f)
3036 {
3037 	dns_rbt_indent(f, depth);
3038 
3039 	if (root != NULL) {
3040 		printnodename(root, ISC_TRUE, f);
3041 		fprintf(f, " (%s, %s", direction,
3042 			IS_RED(root) ? "RED" : "BLACK");
3043 
3044 		if ((! IS_ROOT(root) && PARENT(root) != parent) ||
3045 		    (  IS_ROOT(root) && depth > 0 &&
3046 		       DOWN(PARENT(root)) != root)) {
3047 
3048 			fprintf(f, " (BAD parent pointer! -> ");
3049 			if (PARENT(root) != NULL)
3050 				printnodename(PARENT(root), ISC_TRUE, f);
3051 			else
3052 				fprintf(f, "NULL");
3053 			fprintf(f, ")");
3054 		}
3055 
3056 		fprintf(f, ")");
3057 
3058 		if (root->data != NULL && data_printer != NULL) {
3059 			fprintf(f, " data@%p: ", root->data);
3060 			data_printer(f, root->data);
3061 		}
3062 		fprintf(f, "\n");
3063 
3064 		depth++;
3065 
3066 		if (IS_RED(root) && IS_RED(LEFT(root)))
3067 			fprintf(f, "** Red/Red color violation on left\n");
3068 		print_text_helper(LEFT(root), root, depth, "left",
3069 					  data_printer, f);
3070 
3071 		if (IS_RED(root) && IS_RED(RIGHT(root)))
3072 			fprintf(f, "** Red/Red color violation on right\n");
3073 		print_text_helper(RIGHT(root), root, depth, "right",
3074 					  data_printer, f);
3075 
3076 		print_text_helper(DOWN(root), NULL, depth, "down",
3077 					  data_printer, f);
3078 	} else {
3079 		fprintf(f, "NULL (%s)\n", direction);
3080 	}
3081 }
3082 
3083 void
3084 dns_rbt_printtext(dns_rbt_t *rbt,
3085 		  void (*data_printer)(FILE *, void *), FILE *f)
3086 {
3087 	REQUIRE(VALID_RBT(rbt));
3088 
3089 	print_text_helper(rbt->root, NULL, 0, "root", data_printer, f);
3090 }
3091 
3092 static int
3093 print_dot_helper(dns_rbtnode_t *node, unsigned int *nodecount,
3094 		 isc_boolean_t show_pointers, FILE *f)
3095 {
3096 	unsigned int l, r, d;
3097 
3098 	if (node == NULL)
3099 		return (0);
3100 
3101 	l = print_dot_helper(LEFT(node), nodecount, show_pointers, f);
3102 	r = print_dot_helper(RIGHT(node), nodecount, show_pointers, f);
3103 	d = print_dot_helper(DOWN(node), nodecount, show_pointers, f);
3104 
3105 	*nodecount += 1;
3106 
3107 	fprintf(f, "node%u[label = \"<f0> |<f1> ", *nodecount);
3108 	printnodename(node, ISC_FALSE, f);
3109 	fprintf(f, "|<f2>");
3110 
3111 	if (show_pointers)
3112 		fprintf(f, "|<f3> n=%p|<f4> p=%p", node, PARENT(node));
3113 
3114 	fprintf(f, "\"] [");
3115 
3116 	if (IS_RED(node))
3117 		fprintf(f, "color=red");
3118 	else
3119 		fprintf(f, "color=black");
3120 
3121 	/* XXXMUKS: verify that IS_ROOT() indicates subtree root and not
3122 	 * forest root.
3123 	 */
3124 	if (IS_ROOT(node))
3125 		fprintf(f, ",penwidth=3");
3126 
3127 	if (IS_EMPTY(node))
3128 		fprintf(f, ",style=filled,fillcolor=lightgrey");
3129 
3130 	fprintf(f, "];\n");
3131 
3132 	if (LEFT(node) != NULL)
3133 		fprintf(f, "\"node%u\":f0 -> \"node%u\":f1;\n", *nodecount, l);
3134 
3135 	if (DOWN(node) != NULL)
3136 		fprintf(f, "\"node%u\":f1 -> \"node%u\":f1 [penwidth=5];\n",
3137 			*nodecount, d);
3138 
3139 	if (RIGHT(node) != NULL)
3140 		fprintf(f, "\"node%u\":f2 -> \"node%u\":f1;\n", *nodecount, r);
3141 
3142 	return (*nodecount);
3143 }
3144 
3145 void
3146 dns_rbt_printdot(dns_rbt_t *rbt, isc_boolean_t show_pointers, FILE *f) {
3147 	unsigned int nodecount = 0;
3148 
3149 	REQUIRE(VALID_RBT(rbt));
3150 
3151 	fprintf(f, "digraph g {\n");
3152 	fprintf(f, "node [shape = record,height=.1];\n");
3153 	print_dot_helper(rbt->root, &nodecount, show_pointers, f);
3154 	fprintf(f, "}\n");
3155 }
3156 
3157 /*
3158  * Chain Functions
3159  */
3160 
3161 void
3162 dns_rbtnodechain_init(dns_rbtnodechain_t *chain, isc_mem_t *mctx) {
3163 
3164 	REQUIRE(chain != NULL);
3165 
3166 	/*
3167 	 * Initialize 'chain'.
3168 	 */
3169 	chain->mctx = mctx;
3170 	chain->end = NULL;
3171 	chain->level_count = 0;
3172 	chain->level_matches = 0;
3173 	memset(chain->levels, 0, sizeof(chain->levels));
3174 
3175 	chain->magic = CHAIN_MAGIC;
3176 }
3177 
3178 isc_result_t
3179 dns_rbtnodechain_current(dns_rbtnodechain_t *chain, dns_name_t *name,
3180 			 dns_name_t *origin, dns_rbtnode_t **node)
3181 {
3182 	isc_result_t result = ISC_R_SUCCESS;
3183 
3184 	REQUIRE(VALID_CHAIN(chain));
3185 
3186 	if (node != NULL)
3187 		*node = chain->end;
3188 
3189 	if (chain->end == NULL)
3190 		return (ISC_R_NOTFOUND);
3191 
3192 	if (name != NULL) {
3193 		NODENAME(chain->end, name);
3194 
3195 		if (chain->level_count == 0) {
3196 			/*
3197 			 * Names in the top level tree are all absolute.
3198 			 * Always make 'name' relative.
3199 			 */
3200 			INSIST(dns_name_isabsolute(name));
3201 
3202 			/*
3203 			 * This is cheaper than dns_name_getlabelsequence().
3204 			 */
3205 			name->labels--;
3206 			name->length--;
3207 			name->attributes &= ~DNS_NAMEATTR_ABSOLUTE;
3208 		}
3209 	}
3210 
3211 	if (origin != NULL) {
3212 		if (chain->level_count > 0)
3213 			result = chain_name(chain, origin, ISC_FALSE);
3214 		else
3215 			result = dns_name_copy(dns_rootname, origin, NULL);
3216 	}
3217 
3218 	return (result);
3219 }
3220 
3221 isc_result_t
3222 dns_rbtnodechain_prev(dns_rbtnodechain_t *chain, dns_name_t *name,
3223 		      dns_name_t *origin)
3224 {
3225 	dns_rbtnode_t *current, *previous, *predecessor;
3226 	isc_result_t result = ISC_R_SUCCESS;
3227 	isc_boolean_t new_origin = ISC_FALSE;
3228 
3229 	REQUIRE(VALID_CHAIN(chain) && chain->end != NULL);
3230 
3231 	predecessor = NULL;
3232 
3233 	current = chain->end;
3234 
3235 	if (LEFT(current) != NULL) {
3236 		/*
3237 		 * Moving left one then right as far as possible is the
3238 		 * previous node, at least for this level.
3239 		 */
3240 		current = LEFT(current);
3241 
3242 		while (RIGHT(current) != NULL)
3243 			current = RIGHT(current);
3244 
3245 		predecessor = current;
3246 
3247 	} else {
3248 		/*
3249 		 * No left links, so move toward the root.  If at any point on
3250 		 * the way there the link from parent to child is a right
3251 		 * link, then the parent is the previous node, at least
3252 		 * for this level.
3253 		 */
3254 		while (! IS_ROOT(current)) {
3255 			previous = current;
3256 			current = PARENT(current);
3257 
3258 			if (RIGHT(current) == previous) {
3259 				predecessor = current;
3260 				break;
3261 			}
3262 		}
3263 	}
3264 
3265 	if (predecessor != NULL) {
3266 		/*
3267 		 * Found a predecessor node in this level.  It might not
3268 		 * really be the predecessor, however.
3269 		 */
3270 		if (DOWN(predecessor) != NULL) {
3271 			/*
3272 			 * The predecessor is really down at least one level.
3273 			 * Go down and as far right as possible, and repeat
3274 			 * as long as the rightmost node has a down pointer.
3275 			 */
3276 			do {
3277 				/*
3278 				 * XXX DCL Need to do something about origins
3279 				 * here. See whether to go down, and if so
3280 				 * whether it is truly what Bob calls a
3281 				 * new origin.
3282 				 */
3283 				ADD_LEVEL(chain, predecessor);
3284 				predecessor = DOWN(predecessor);
3285 
3286 				/* XXX DCL duplicated from above; clever
3287 				 * way to unduplicate? */
3288 
3289 				while (RIGHT(predecessor) != NULL)
3290 					predecessor = RIGHT(predecessor);
3291 			} while (DOWN(predecessor) != NULL);
3292 
3293 			/* XXX DCL probably needs work on the concept */
3294 			if (origin != NULL)
3295 				new_origin = ISC_TRUE;
3296 		}
3297 
3298 	} else if (chain->level_count > 0) {
3299 		/*
3300 		 * Dang, didn't find a predecessor in this level.
3301 		 * Got to the root of this level without having traversed
3302 		 * any right links.  Ascend the tree one level; the
3303 		 * node that points to this tree is the predecessor.
3304 		 */
3305 		INSIST(chain->level_count > 0 && IS_ROOT(current));
3306 		predecessor = chain->levels[--chain->level_count];
3307 
3308 		/* XXX DCL probably needs work on the concept */
3309 		/*
3310 		 * Don't declare an origin change when the new origin is "."
3311 		 * at the top level tree, because "." is declared as the origin
3312 		 * for the second level tree.
3313 		 */
3314 		if (origin != NULL &&
3315 		    (chain->level_count > 0 || OFFSETLEN(predecessor) > 1))
3316 			new_origin = ISC_TRUE;
3317 	}
3318 
3319 	if (predecessor != NULL) {
3320 		chain->end = predecessor;
3321 
3322 		if (new_origin) {
3323 			result = dns_rbtnodechain_current(chain, name, origin,
3324 							  NULL);
3325 			if (result == ISC_R_SUCCESS)
3326 				result = DNS_R_NEWORIGIN;
3327 
3328 		} else
3329 			result = dns_rbtnodechain_current(chain, name, NULL,
3330 							  NULL);
3331 
3332 	} else
3333 		result = ISC_R_NOMORE;
3334 
3335 	return (result);
3336 }
3337 
3338 isc_result_t
3339 dns_rbtnodechain_down(dns_rbtnodechain_t *chain, dns_name_t *name,
3340 		      dns_name_t *origin)
3341 {
3342 	dns_rbtnode_t *current, *successor;
3343 	isc_result_t result = ISC_R_SUCCESS;
3344 	isc_boolean_t new_origin = ISC_FALSE;
3345 
3346 	REQUIRE(VALID_CHAIN(chain) && chain->end != NULL);
3347 
3348 	successor = NULL;
3349 
3350 	current = chain->end;
3351 
3352 	if (DOWN(current) != NULL) {
3353 		/*
3354 		 * Don't declare an origin change when the new origin is "."
3355 		 * at the second level tree, because "." is already declared
3356 		 * as the origin for the top level tree.
3357 		 */
3358 		if (chain->level_count > 0 ||
3359 		    OFFSETLEN(current) > 1)
3360 			new_origin = ISC_TRUE;
3361 
3362 		ADD_LEVEL(chain, current);
3363 		current = DOWN(current);
3364 
3365 		while (LEFT(current) != NULL)
3366 			current = LEFT(current);
3367 
3368 		successor = current;
3369 	}
3370 
3371 	if (successor != NULL) {
3372 		chain->end = successor;
3373 
3374 		/*
3375 		 * It is not necessary to use dns_rbtnodechain_current like
3376 		 * the other functions because this function will never
3377 		 * find a node in the topmost level.  This is because the
3378 		 * root level will never be more than one name, and everything
3379 		 * in the megatree is a successor to that node, down at
3380 		 * the second level or below.
3381 		 */
3382 
3383 		if (name != NULL)
3384 			NODENAME(chain->end, name);
3385 
3386 		if (new_origin) {
3387 			if (origin != NULL)
3388 				result = chain_name(chain, origin, ISC_FALSE);
3389 
3390 			if (result == ISC_R_SUCCESS)
3391 				result = DNS_R_NEWORIGIN;
3392 
3393 		} else
3394 			result = ISC_R_SUCCESS;
3395 
3396 	} else
3397 		result = ISC_R_NOMORE;
3398 
3399 	return (result);
3400 }
3401 
3402 isc_result_t
3403 dns_rbtnodechain_nextflat(dns_rbtnodechain_t *chain, dns_name_t *name) {
3404 	dns_rbtnode_t *current, *previous, *successor;
3405 	isc_result_t result = ISC_R_SUCCESS;
3406 
3407 	REQUIRE(VALID_CHAIN(chain) && chain->end != NULL);
3408 
3409 	successor = NULL;
3410 
3411 	current = chain->end;
3412 
3413 	if (RIGHT(current) == NULL) {
3414 		while (! IS_ROOT(current)) {
3415 			previous = current;
3416 			current = PARENT(current);
3417 
3418 			if (LEFT(current) == previous) {
3419 				successor = current;
3420 				break;
3421 			}
3422 		}
3423 	} else {
3424 		current = RIGHT(current);
3425 
3426 		while (LEFT(current) != NULL)
3427 			current = LEFT(current);
3428 
3429 		successor = current;
3430 	}
3431 
3432 	if (successor != NULL) {
3433 		chain->end = successor;
3434 
3435 		if (name != NULL)
3436 			NODENAME(chain->end, name);
3437 
3438 		result = ISC_R_SUCCESS;
3439 	} else
3440 		result = ISC_R_NOMORE;
3441 
3442 	return (result);
3443 }
3444 
3445 isc_result_t
3446 dns_rbtnodechain_next(dns_rbtnodechain_t *chain, dns_name_t *name,
3447 		      dns_name_t *origin)
3448 {
3449 	dns_rbtnode_t *current, *previous, *successor;
3450 	isc_result_t result = ISC_R_SUCCESS;
3451 	isc_boolean_t new_origin = ISC_FALSE;
3452 
3453 	REQUIRE(VALID_CHAIN(chain) && chain->end != NULL);
3454 
3455 	successor = NULL;
3456 
3457 	current = chain->end;
3458 
3459 	/*
3460 	 * If there is a level below this node, the next node is the leftmost
3461 	 * node of the next level.
3462 	 */
3463 	if (DOWN(current) != NULL) {
3464 		/*
3465 		 * Don't declare an origin change when the new origin is "."
3466 		 * at the second level tree, because "." is already declared
3467 		 * as the origin for the top level tree.
3468 		 */
3469 		if (chain->level_count > 0 ||
3470 		    OFFSETLEN(current) > 1)
3471 			new_origin = ISC_TRUE;
3472 
3473 		ADD_LEVEL(chain, current);
3474 		current = DOWN(current);
3475 
3476 		while (LEFT(current) != NULL)
3477 			current = LEFT(current);
3478 
3479 		successor = current;
3480 
3481 	} else if (RIGHT(current) == NULL) {
3482 		/*
3483 		 * The successor is up, either in this level or a previous one.
3484 		 * Head back toward the root of the tree, looking for any path
3485 		 * that was via a left link; the successor is the node that has
3486 		 * that left link.  In the event the root of the level is
3487 		 * reached without having traversed any left links, ascend one
3488 		 * level and look for either a right link off the point of
3489 		 * ascent, or search for a left link upward again, repeating
3490 		 * ascends until either case is true.
3491 		 */
3492 		do {
3493 			while (! IS_ROOT(current)) {
3494 				previous = current;
3495 				current = PARENT(current);
3496 
3497 				if (LEFT(current) == previous) {
3498 					successor = current;
3499 					break;
3500 				}
3501 			}
3502 
3503 			if (successor == NULL) {
3504 				/*
3505 				 * Reached the root without having traversed
3506 				 * any left pointers, so this level is done.
3507 				 */
3508 				if (chain->level_count == 0)
3509 					break;
3510 
3511 				current = chain->levels[--chain->level_count];
3512 				new_origin = ISC_TRUE;
3513 
3514 				if (RIGHT(current) != NULL)
3515 					break;
3516 			}
3517 		} while (successor == NULL);
3518 	}
3519 
3520 	if (successor == NULL && RIGHT(current) != NULL) {
3521 		current = RIGHT(current);
3522 
3523 		while (LEFT(current) != NULL)
3524 			current = LEFT(current);
3525 
3526 		successor = current;
3527 	}
3528 
3529 	if (successor != NULL) {
3530 		chain->end = successor;
3531 
3532 		/*
3533 		 * It is not necessary to use dns_rbtnodechain_current like
3534 		 * the other functions because this function will never
3535 		 * find a node in the topmost level.  This is because the
3536 		 * root level will never be more than one name, and everything
3537 		 * in the megatree is a successor to that node, down at
3538 		 * the second level or below.
3539 		 */
3540 
3541 		if (name != NULL)
3542 			NODENAME(chain->end, name);
3543 
3544 		if (new_origin) {
3545 			if (origin != NULL)
3546 				result = chain_name(chain, origin, ISC_FALSE);
3547 
3548 			if (result == ISC_R_SUCCESS)
3549 				result = DNS_R_NEWORIGIN;
3550 
3551 		} else
3552 			result = ISC_R_SUCCESS;
3553 
3554 	} else
3555 		result = ISC_R_NOMORE;
3556 
3557 	return (result);
3558 }
3559 
3560 isc_result_t
3561 dns_rbtnodechain_first(dns_rbtnodechain_t *chain, dns_rbt_t *rbt,
3562 		       dns_name_t *name, dns_name_t *origin)
3563 
3564 {
3565 	isc_result_t result;
3566 
3567 	REQUIRE(VALID_RBT(rbt));
3568 	REQUIRE(VALID_CHAIN(chain));
3569 
3570 	dns_rbtnodechain_reset(chain);
3571 
3572 	chain->end = rbt->root;
3573 
3574 	result = dns_rbtnodechain_current(chain, name, origin, NULL);
3575 
3576 	if (result == ISC_R_SUCCESS)
3577 		result = DNS_R_NEWORIGIN;
3578 
3579 	return (result);
3580 }
3581 
3582 isc_result_t
3583 dns_rbtnodechain_last(dns_rbtnodechain_t *chain, dns_rbt_t *rbt,
3584 		       dns_name_t *name, dns_name_t *origin)
3585 
3586 {
3587 	isc_result_t result;
3588 
3589 	REQUIRE(VALID_RBT(rbt));
3590 	REQUIRE(VALID_CHAIN(chain));
3591 
3592 	dns_rbtnodechain_reset(chain);
3593 
3594 	result = move_chain_to_last(chain, rbt->root);
3595 	if (result != ISC_R_SUCCESS)
3596 		return (result);
3597 
3598 	result = dns_rbtnodechain_current(chain, name, origin, NULL);
3599 
3600 	if (result == ISC_R_SUCCESS)
3601 		result = DNS_R_NEWORIGIN;
3602 
3603 	return (result);
3604 }
3605 
3606 
3607 void
3608 dns_rbtnodechain_reset(dns_rbtnodechain_t *chain) {
3609 
3610 	REQUIRE(VALID_CHAIN(chain));
3611 
3612 	/*
3613 	 * Free any dynamic storage associated with 'chain', and then
3614 	 * reinitialize 'chain'.
3615 	 */
3616 	chain->end = NULL;
3617 	chain->level_count = 0;
3618 	chain->level_matches = 0;
3619 }
3620 
3621 void
3622 dns_rbtnodechain_invalidate(dns_rbtnodechain_t *chain) {
3623 	/*
3624 	 * Free any dynamic storage associated with 'chain', and then
3625 	 * invalidate 'chain'.
3626 	 */
3627 
3628 	dns_rbtnodechain_reset(chain);
3629 
3630 	chain->magic = 0;
3631 }
3632 
3633 /* XXXMUKS:
3634  *
3635  * - worth removing inline as static functions are inlined automatically
3636  *   where suitable by modern compilers.
3637  * - bump the size of dns_rbt.nodecount to size_t.
3638  * - the dumpfile header also contains a nodecount that is unsigned
3639  *   int. If large files (> 2^32 nodes) are to be supported, the
3640  *   allocation for this field should be increased.
3641  */
3642