xref: /minix/external/bsd/bind/dist/lib/dns/rbtdb.c (revision 00b67f09)
1 /*	$NetBSD: rbtdb.c,v 1.20 2015/07/08 17:28:59 christos Exp $	*/
2 
3 /*
4  * Copyright (C) 2004-2015  Internet Systems Consortium, Inc. ("ISC")
5  * Copyright (C) 1999-2003  Internet Software Consortium.
6  *
7  * Permission to use, copy, modify, and/or distribute this software for any
8  * purpose with or without fee is hereby granted, provided that the above
9  * copyright notice and this permission notice appear in all copies.
10  *
11  * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
12  * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
13  * AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
14  * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
15  * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
16  * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
17  * PERFORMANCE OF THIS SOFTWARE.
18  */
19 
20 /*! \file */
21 
22 /*
23  * Principal Author: Bob Halley
24  */
25 
26 #include <config.h>
27 
28 /* #define inline */
29 
30 #ifdef HAVE_INTTYPES_H
31 #include <inttypes.h> /* uintptr_t */
32 #endif
33 
34 #include <isc/crc64.h>
35 #include <isc/event.h>
36 #include <isc/heap.h>
37 #include <isc/file.h>
38 #include <isc/hex.h>
39 #include <isc/mem.h>
40 #include <isc/mutex.h>
41 #include <isc/once.h>
42 #include <isc/platform.h>
43 #include <isc/print.h>
44 #include <isc/random.h>
45 #include <isc/refcount.h>
46 #include <isc/rwlock.h>
47 #include <isc/serial.h>
48 #include <isc/socket.h>
49 #include <isc/stdio.h>
50 #include <isc/string.h>
51 #include <isc/task.h>
52 #include <isc/time.h>
53 #include <isc/util.h>
54 
55 #include <dns/acache.h>
56 #include <dns/callbacks.h>
57 #include <dns/db.h>
58 #include <dns/dbiterator.h>
59 #include <dns/events.h>
60 #include <dns/fixedname.h>
61 #include <dns/lib.h>
62 #include <dns/log.h>
63 #include <dns/masterdump.h>
64 #include <dns/nsec.h>
65 #include <dns/nsec3.h>
66 #include <dns/rbt.h>
67 #include <dns/rpz.h>
68 #include <dns/rdata.h>
69 #include <dns/rdataset.h>
70 #include <dns/rdatasetiter.h>
71 #include <dns/rdataslab.h>
72 #include <dns/rdatastruct.h>
73 #include <dns/result.h>
74 #include <dns/stats.h>
75 #include <dns/version.h>
76 #include <dns/view.h>
77 #include <dns/zone.h>
78 #include <dns/zonekey.h>
79 
80 #ifndef WIN32
81 #include <sys/mman.h>
82 #else
83 #define PROT_READ	0x01
84 #define PROT_WRITE	0x02
85 #define MAP_PRIVATE	0x0002
86 #define MAP_FAILED	((void *)-1)
87 #endif
88 
89 #ifdef DNS_RBTDB_VERSION64
90 #include "rbtdb64.h"
91 #else
92 #include "rbtdb.h"
93 #endif
94 
95 #ifdef DNS_RBTDB_VERSION64
96 #define RBTDB_MAGIC                     ISC_MAGIC('R', 'B', 'D', '8')
97 #else
98 #define RBTDB_MAGIC                     ISC_MAGIC('R', 'B', 'D', '4')
99 #endif
100 
101 #define CHECK(op) \
102 	do { result = (op); \
103 		if (result != ISC_R_SUCCESS) goto failure; \
104 	} while (/*CONSTCOND*/0)
105 
106 /*
107  * This is the map file header for RBTDB images.  It is populated, and then
108  * written, as the LAST thing done to the file.  Writing this last (with
109  * zeros in the header area initially) will ensure that the header is only
110  * valid when the RBTDB image is also valid.
111  */
112 typedef struct rbtdb_file_header rbtdb_file_header_t;
113 
114 /* Header length, always the same size regardless of structure size */
115 #define RBTDB_HEADER_LENGTH	1024
116 
117 struct rbtdb_file_header {
118 	char version1[32];
119 	isc_uint32_t ptrsize;
120 	unsigned int bigendian:1;
121 	isc_uint64_t tree;
122 	isc_uint64_t nsec;
123 	isc_uint64_t nsec3;
124 
125 	char version2[32];  		/* repeated; must match version1 */
126 };
127 
128 
129 /*%
130  * Note that "impmagic" is not the first four bytes of the struct, so
131  * ISC_MAGIC_VALID cannot be used.
132  */
133 #define VALID_RBTDB(rbtdb)      ((rbtdb) != NULL && \
134 				 (rbtdb)->common.impmagic == RBTDB_MAGIC)
135 
136 #ifdef DNS_RBTDB_VERSION64
137 typedef isc_uint64_t                    rbtdb_serial_t;
138 /*%
139  * Make casting easier in symbolic debuggers by using different names
140  * for the 64 bit version.
141  */
142 #define dns_rbtdb_t dns_rbtdb64_t
143 #define rdatasetheader_t rdatasetheader64_t
144 #define rbtdb_version_t rbtdb_version64_t
145 
146 #define once once64
147 #define FILE_VERSION FILE_VERSION64
148 #define init_count init_count64
149 
150 #define cache_methods cache_methods64
151 #define dbiterator_methods dbiterator_methods64
152 #define rdataset_methods rdataset_methods64
153 #define rdatasetiter_methods rdatasetiter_methods64
154 #define zone_methods zone_methods64
155 
156 #define acache_callback acache_callback64
157 #define acache_cancelentry acache_cancelentry64
158 #define activeempty activeempty64
159 #define activeemtpynode activeemtpynode64
160 #define add32 add64
161 #define add_changed add_changed64
162 #define add_empty_wildcards add_empty_wildcards64
163 #define add_wildcard_magic add_wildcard_magic64
164 #define addrdataset addrdataset64
165 #define allrdatasets allrdatasets64
166 #define attach attach64
167 #define attachnode attachnode64
168 #define attachversion attachversion64
169 #define beginload beginload64
170 #define bind_rdataset bind_rdataset64
171 #define cache_find cache_find64
172 #define cache_findrdataset cache_findrdataset64
173 #define cache_findzonecut cache_findzonecut64
174 #define cache_zonecut_callback cache_zonecut_callback64
175 #define cleanup_dead_nodes cleanup_dead_nodes64
176 #define cleanup_dead_nodes_callback cleanup_dead_nodes_callback64
177 #define closeversion closeversion64
178 #define createiterator createiterator64
179 #define currentversion currentversion64
180 #define dbiterator_current dbiterator_current64
181 #define dbiterator_destroy dbiterator_destroy64
182 #define dbiterator_first dbiterator_first64
183 #define dbiterator_last dbiterator_last64
184 #define dbiterator_next dbiterator_next64
185 #define dbiterator_origin dbiterator_origin64
186 #define dbiterator_pause dbiterator_pause64
187 #define dbiterator_prev dbiterator_prev64
188 #define dbiterator_seek dbiterator_seek64
189 #define decrement_reference decrement_reference64
190 #define delete_callback delete_callback64
191 #define delete_node delete_node64
192 #define deleterdataset deleterdataset64
193 #define deserialize32 deserialize64
194 #define detach detach64
195 #define detachnode detachnode64
196 #define dump dump64
197 #define endload endload64
198 #define expire_header expire_header64
199 #define expirenode expirenode64
200 #define find_closest_nsec find_closest_nsec64
201 #define find_coveringnsec find_coveringnsec64
202 #define find_deepest_zonecut find_deepest_zonecut64
203 #define findnode findnode64
204 #define findnodeintree findnodeintree64
205 #define findnsec3node findnsec3node64
206 #define flush_deletions flush_deletions64
207 #define free_acachearray free_acachearray64
208 #define free_noqname free_noqname64
209 #define free_rbtdb free_rbtdb64
210 #define free_rbtdb_callback free_rbtdb_callback64
211 #define free_rdataset free_rdataset64
212 #define getnsec3parameters getnsec3parameters64
213 #define getoriginnode getoriginnode64
214 #define getrrsetstats getrrsetstats64
215 #define getsigningtime getsigningtime64
216 #define hashsize hashsize64
217 #define init_file_version init_file_version64
218 #define isdnssec isdnssec64
219 #define ispersistent ispersistent64
220 #define issecure issecure64
221 #define iszonesecure iszonesecure64
222 #define loading_addrdataset loading_addrdataset64
223 #define loadnode loadnode64
224 #define matchparams matchparams64
225 #define maybe_free_rbtdb maybe_free_rbtdb64
226 #define new_reference new_reference64
227 #define newversion newversion64
228 #define nodecount nodecount64
229 #define overmem overmem64
230 #define previous_closest_nsec previous_closest_nsec64
231 #define printnode printnode64
232 #define prune_tree prune_tree64
233 #define rbt_datafixer rbt_datafixer64
234 #define rbt_datawriter rbt_datawriter64
235 #define rdataset_clearprefetch rdataset_clearprefetch64
236 #define rdataset_clone rdataset_clone64
237 #define rdataset_count rdataset_count64
238 #define rdataset_current rdataset_current64
239 #define rdataset_disassociate rdataset_disassociate64
240 #define rdataset_expire rdataset_expire64
241 #define rdataset_first rdataset_first64
242 #define rdataset_getadditional rdataset_getadditional64
243 #define rdataset_getclosest rdataset_getclosest64
244 #define rdataset_getnoqname rdataset_getnoqname64
245 #define rdataset_next rdataset_next64
246 #define rdataset_putadditional rdataset_putadditional64
247 #define rdataset_setadditional rdataset_setadditional64
248 #define rdataset_settrust rdataset_settrust64
249 #define rdatasetiter_current rdatasetiter_current64
250 #define rdatasetiter_destroy rdatasetiter_destroy64
251 #define rdatasetiter_first rdatasetiter_first64
252 #define rdatasetiter_next rdatasetiter_next64
253 #define reactivate_node reactivate_node64
254 #define resign_delete resign_delete64
255 #define resign_insert resign_insert64
256 #define resign_sooner resign_sooner64
257 #define resigned resigned64
258 #define rpz_attach rpz_attach64
259 #define rpz_ready rpz_ready64
260 #define serialize serialize64
261 #define set_index set_index64
262 #define set_ttl set_ttl64
263 #define setcachestats setcachestats64
264 #define setsigningtime setsigningtime64
265 #define settask settask64
266 #define setup_delegation setup_delegation64
267 #define subtractrdataset subtractrdataset64
268 #define ttl_sooner ttl_sooner64
269 #define update_cachestats update_cachestats64
270 #define update_header update_header64
271 #define update_newheader update_newheader64
272 #define update_rrsetstats update_rrsetstats64
273 #define zone_find zone_find64
274 #define zone_findrdataset zone_findrdataset64
275 #define zone_findzonecut zone_findzonecut64
276 #define zone_zonecut_callback zone_zonecut_callback64
277 
278 #else
279 typedef isc_uint32_t                    rbtdb_serial_t;
280 #endif
281 
282 typedef isc_uint32_t                    rbtdb_rdatatype_t;
283 
284 #define RBTDB_RDATATYPE_BASE(type)      ((dns_rdatatype_t)((type) & 0xFFFF))
285 #define RBTDB_RDATATYPE_EXT(type)       ((dns_rdatatype_t)((type) >> 16))
286 #define RBTDB_RDATATYPE_VALUE(b, e)     ((rbtdb_rdatatype_t)((e) << 16) | (b))
287 
288 #define RBTDB_RDATATYPE_SIGNSEC \
289 		RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_nsec)
290 #define RBTDB_RDATATYPE_SIGNSEC3 \
291 		RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_nsec3)
292 #define RBTDB_RDATATYPE_SIGNS \
293 		RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_ns)
294 #define RBTDB_RDATATYPE_SIGCNAME \
295 		RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_cname)
296 #define RBTDB_RDATATYPE_SIGDNAME \
297 		RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_dname)
298 #define RBTDB_RDATATYPE_SIGDDS \
299 		RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_ds)
300 #define RBTDB_RDATATYPE_NCACHEANY \
301 		RBTDB_RDATATYPE_VALUE(0, dns_rdatatype_any)
302 
303 /*
304  * We use rwlock for DB lock only when ISC_RWLOCK_USEATOMIC is non 0.
305  * Using rwlock is effective with regard to lookup performance only when
306  * it is implemented in an efficient way.
307  * Otherwise, it is generally wise to stick to the simple locking since rwlock
308  * would require more memory or can even make lookups slower due to its own
309  * overhead (when it internally calls mutex locks).
310  */
311 #ifdef ISC_RWLOCK_USEATOMIC
312 #define DNS_RBTDB_USERWLOCK 1
313 #else
314 #define DNS_RBTDB_USERWLOCK 0
315 #endif
316 
317 #if DNS_RBTDB_USERWLOCK
318 #define RBTDB_INITLOCK(l)       isc_rwlock_init((l), 0, 0)
319 #define RBTDB_DESTROYLOCK(l)    isc_rwlock_destroy(l)
320 #define RBTDB_LOCK(l, t)        RWLOCK((l), (t))
321 #define RBTDB_UNLOCK(l, t)      RWUNLOCK((l), (t))
322 #else
323 #define RBTDB_INITLOCK(l)       isc_mutex_init(l)
324 #define RBTDB_DESTROYLOCK(l)    DESTROYLOCK(l)
325 #define RBTDB_LOCK(l, t)        LOCK(l)
326 #define RBTDB_UNLOCK(l, t)      UNLOCK(l)
327 #endif
328 
329 /*
330  * Since node locking is sensitive to both performance and memory footprint,
331  * we need some trick here.  If we have both high-performance rwlock and
332  * high performance and small-memory reference counters, we use rwlock for
333  * node lock and isc_refcount for node references.  In this case, we don't have
334  * to protect the access to the counters by locks.
335  * Otherwise, we simply use ordinary mutex lock for node locking, and use
336  * simple integers as reference counters which is protected by the lock.
337  * In most cases, we can simply use wrapper macros such as NODE_LOCK and
338  * NODE_UNLOCK.  In some other cases, however, we need to protect reference
339  * counters first and then protect other parts of a node as read-only data.
340  * Special additional macros, NODE_STRONGLOCK(), NODE_WEAKLOCK(), etc, are also
341  * provided for these special cases.  When we can use the efficient backend
342  * routines, we should only protect the "other members" by NODE_WEAKLOCK(read).
343  * Otherwise, we should use NODE_STRONGLOCK() to protect the entire critical
344  * section including the access to the reference counter.
345  * Note that we cannot use NODE_LOCK()/NODE_UNLOCK() wherever the protected
346  * section is also protected by NODE_STRONGLOCK().
347  */
348 #if defined(ISC_RWLOCK_USEATOMIC) && defined(DNS_RBT_USEISCREFCOUNT)
349 typedef isc_rwlock_t nodelock_t;
350 
351 #define NODE_INITLOCK(l)        isc_rwlock_init((l), 0, 0)
352 #define NODE_DESTROYLOCK(l)     isc_rwlock_destroy(l)
353 #define NODE_LOCK(l, t)         RWLOCK((l), (t))
354 #define NODE_UNLOCK(l, t)       RWUNLOCK((l), (t))
355 #define NODE_TRYUPGRADE(l)      isc_rwlock_tryupgrade(l)
356 
357 #define NODE_STRONGLOCK(l)      ((void)0)
358 #define NODE_STRONGUNLOCK(l)    ((void)0)
359 #define NODE_WEAKLOCK(l, t)     NODE_LOCK(l, t)
360 #define NODE_WEAKUNLOCK(l, t)   NODE_UNLOCK(l, t)
361 #define NODE_WEAKDOWNGRADE(l)   isc_rwlock_downgrade(l)
362 #else
363 typedef isc_mutex_t nodelock_t;
364 
365 #define NODE_INITLOCK(l)        isc_mutex_init(l)
366 #define NODE_DESTROYLOCK(l)     DESTROYLOCK(l)
367 #define NODE_LOCK(l, t)         LOCK(l)
368 #define NODE_UNLOCK(l, t)       UNLOCK(l)
369 #define NODE_TRYUPGRADE(l)      ISC_R_SUCCESS
370 
371 #define NODE_STRONGLOCK(l)      LOCK(l)
372 #define NODE_STRONGUNLOCK(l)    UNLOCK(l)
373 #define NODE_WEAKLOCK(l, t)     ((void)0)
374 #define NODE_WEAKUNLOCK(l, t)   ((void)0)
375 #define NODE_WEAKDOWNGRADE(l)   ((void)0)
376 #endif
377 
378 /*%
379  * Whether to rate-limit updating the LRU to avoid possible thread contention.
380  * Our performance measurement has shown the cost is marginal, so it's defined
381  * to be 0 by default either with or without threads.
382  */
383 #ifndef DNS_RBTDB_LIMITLRUUPDATE
384 #define DNS_RBTDB_LIMITLRUUPDATE 0
385 #endif
386 
387 /*
388  * Allow clients with a virtual time of up to 5 minutes in the past to see
389  * records that would have otherwise have expired.
390  */
391 #define RBTDB_VIRTUAL 300
392 
393 struct noqname {
394 	dns_name_t 	name;
395 	void *     	neg;
396 	void *     	negsig;
397 	dns_rdatatype_t	type;
398 };
399 
400 typedef struct acachectl acachectl_t;
401 
402 typedef struct rdatasetheader {
403 	/*%
404 	 * Locked by the owning node's lock.
405 	 */
406 	rbtdb_serial_t                  serial;
407 	dns_ttl_t                       rdh_ttl;
408 	rbtdb_rdatatype_t               type;
409 	isc_uint16_t                    attributes;
410 	dns_trust_t                     trust;
411 	struct noqname                  *noqname;
412 	struct noqname                  *closest;
413 	unsigned int 			is_mmapped : 1;
414 	unsigned int 			next_is_relative : 1;
415 	unsigned int 			node_is_relative : 1;
416 	/*%<
417 	 * We don't use the LIST macros, because the LIST structure has
418 	 * both head and tail pointers, and is doubly linked.
419 	 */
420 
421 	struct rdatasetheader           *next;
422 	/*%<
423 	 * If this is the top header for an rdataset, 'next' points
424 	 * to the top header for the next rdataset (i.e., the next type).
425 	 * Otherwise, it points up to the header whose down pointer points
426 	 * at this header.
427 	 */
428 
429 	struct rdatasetheader           *down;
430 	/*%<
431 	 * Points to the header for the next older version of
432 	 * this rdataset.
433 	 */
434 
435 	isc_uint32_t                    count;
436 	/*%<
437 	 * Monotonously increased every time this rdataset is bound so that
438 	 * it is used as the base of the starting point in DNS responses
439 	 * when the "cyclic" rrset-order is required.  Since the ordering
440 	 * should not be so crucial, no lock is set for the counter for
441 	 * performance reasons.
442 	 */
443 
444 	acachectl_t                     *additional_auth;
445 	acachectl_t                     *additional_glue;
446 
447 	dns_rbtnode_t                   *node;
448 	isc_stdtime_t                   last_used;
449 	ISC_LINK(struct rdatasetheader) link;
450 
451 	unsigned int                    heap_index;
452 	/*%<
453 	 * Used for TTL-based cache cleaning.
454 	 */
455 	isc_stdtime_t                   resign;
456 } rdatasetheader_t;
457 
458 typedef ISC_LIST(rdatasetheader_t)      rdatasetheaderlist_t;
459 typedef ISC_LIST(dns_rbtnode_t)         rbtnodelist_t;
460 
461 #define RDATASET_ATTR_NONEXISTENT       0x0001
462 #define RDATASET_ATTR_STALE             0x0002
463 #define RDATASET_ATTR_IGNORE            0x0004
464 #define RDATASET_ATTR_RETAIN            0x0008
465 #define RDATASET_ATTR_NXDOMAIN          0x0010
466 #define RDATASET_ATTR_RESIGN            0x0020
467 #define RDATASET_ATTR_STATCOUNT         0x0040
468 #define RDATASET_ATTR_OPTOUT            0x0080
469 #define RDATASET_ATTR_NEGATIVE          0x0100
470 #define RDATASET_ATTR_PREFETCH          0x0200
471 
472 typedef struct acache_cbarg {
473 	dns_rdatasetadditional_t        type;
474 	unsigned int                    count;
475 	dns_db_t                        *db;
476 	dns_dbnode_t                    *node;
477 	rdatasetheader_t                *header;
478 } acache_cbarg_t;
479 
480 struct acachectl {
481 	dns_acacheentry_t               *entry;
482 	acache_cbarg_t                  *cbarg;
483 };
484 
485 /*
486  * XXX
487  * When the cache will pre-expire data (due to memory low or other
488  * situations) before the rdataset's TTL has expired, it MUST
489  * respect the RETAIN bit and not expire the data until its TTL is
490  * expired.
491  */
492 
493 #undef IGNORE                   /* WIN32 winbase.h defines this. */
494 
495 #define EXISTS(header) \
496 	(((header)->attributes & RDATASET_ATTR_NONEXISTENT) == 0)
497 #define NONEXISTENT(header) \
498 	(((header)->attributes & RDATASET_ATTR_NONEXISTENT) != 0)
499 #define IGNORE(header) \
500 	(((header)->attributes & RDATASET_ATTR_IGNORE) != 0)
501 #define RETAIN(header) \
502 	(((header)->attributes & RDATASET_ATTR_RETAIN) != 0)
503 #define NXDOMAIN(header) \
504 	(((header)->attributes & RDATASET_ATTR_NXDOMAIN) != 0)
505 #define RESIGN(header) \
506 	(((header)->attributes & RDATASET_ATTR_RESIGN) != 0)
507 #define OPTOUT(header) \
508 	(((header)->attributes & RDATASET_ATTR_OPTOUT) != 0)
509 #define NEGATIVE(header) \
510 	(((header)->attributes & RDATASET_ATTR_NEGATIVE) != 0)
511 #define PREFETCH(header) \
512 	(((header)->attributes & RDATASET_ATTR_PREFETCH) != 0)
513 
514 #define DEFAULT_NODE_LOCK_COUNT         7       /*%< Should be prime. */
515 
516 /*%
517  * Number of buckets for cache DB entries (locks, LRU lists, TTL heaps).
518  * There is a tradeoff issue about configuring this value: if this is too
519  * small, it may cause heavier contention between threads; if this is too large,
520  * LRU purge algorithm won't work well (entries tend to be purged prematurely).
521  * The default value should work well for most environments, but this can
522  * also be configurable at compilation time via the
523  * DNS_RBTDB_CACHE_NODE_LOCK_COUNT variable.  This value must be larger than
524  * 1 due to the assumption of overmem_purge().
525  */
526 #ifdef DNS_RBTDB_CACHE_NODE_LOCK_COUNT
527 #if DNS_RBTDB_CACHE_NODE_LOCK_COUNT <= 1
528 #error "DNS_RBTDB_CACHE_NODE_LOCK_COUNT must be larger than 1"
529 #else
530 #define DEFAULT_CACHE_NODE_LOCK_COUNT DNS_RBTDB_CACHE_NODE_LOCK_COUNT
531 #endif
532 #else
533 #define DEFAULT_CACHE_NODE_LOCK_COUNT   16
534 #endif	/* DNS_RBTDB_CACHE_NODE_LOCK_COUNT */
535 
536 typedef struct {
537 	nodelock_t                      lock;
538 	/* Protected in the refcount routines. */
539 	isc_refcount_t                  references;
540 	/* Locked by lock. */
541 	isc_boolean_t                   exiting;
542 } rbtdb_nodelock_t;
543 
544 typedef struct rbtdb_changed {
545 	dns_rbtnode_t *                 node;
546 	isc_boolean_t                   dirty;
547 	ISC_LINK(struct rbtdb_changed)  link;
548 } rbtdb_changed_t;
549 
550 typedef ISC_LIST(rbtdb_changed_t)       rbtdb_changedlist_t;
551 
552 typedef enum {
553 	dns_db_insecure,
554 	dns_db_partial,
555 	dns_db_secure
556 } dns_db_secure_t;
557 
558 typedef struct dns_rbtdb dns_rbtdb_t;
559 
560 /* Reason for expiring a record from cache */
561 typedef enum {
562 	expire_lru,
563 	expire_ttl,
564 	expire_flush
565 } expire_t;
566 
567 typedef struct rbtdb_version {
568 	/* Not locked */
569 	rbtdb_serial_t                  serial;
570 	dns_rbtdb_t *			rbtdb;
571 	/*
572 	 * Protected in the refcount routines.
573 	 * XXXJT: should we change the lock policy based on the refcount
574 	 * performance?
575 	 */
576 	isc_refcount_t                  references;
577 	/* Locked by database lock. */
578 	isc_boolean_t                   writer;
579 	isc_boolean_t                   commit_ok;
580 	rbtdb_changedlist_t             changed_list;
581 	rdatasetheaderlist_t		resigned_list;
582 	ISC_LINK(struct rbtdb_version)  link;
583 	dns_db_secure_t			secure;
584 	isc_boolean_t			havensec3;
585 	/* NSEC3 parameters */
586 	dns_hash_t			hash;
587 	isc_uint8_t			flags;
588 	isc_uint16_t			iterations;
589 	isc_uint8_t			salt_length;
590 	unsigned char			salt[DNS_NSEC3_SALTSIZE];
591 } rbtdb_version_t;
592 
593 typedef ISC_LIST(rbtdb_version_t)       rbtdb_versionlist_t;
594 
595 struct dns_rbtdb {
596 	/* Unlocked. */
597 	dns_db_t                        common;
598 	/* Locks the data in this struct */
599 #if DNS_RBTDB_USERWLOCK
600 	isc_rwlock_t                    lock;
601 #else
602 	isc_mutex_t                     lock;
603 #endif
604 	/* Locks the tree structure (prevents nodes appearing/disappearing) */
605 	isc_rwlock_t                    tree_lock;
606 	/* Locks for individual tree nodes */
607 	unsigned int                    node_lock_count;
608 	rbtdb_nodelock_t *              node_locks;
609 	dns_rbtnode_t *                 origin_node;
610 	dns_stats_t *			rrsetstats; /* cache DB only */
611 	isc_stats_t *			cachestats; /* cache DB only */
612 	/* Locked by lock. */
613 	unsigned int                    active;
614 	isc_refcount_t                  references;
615 	unsigned int                    attributes;
616 	rbtdb_serial_t                  current_serial;
617 	rbtdb_serial_t                  least_serial;
618 	rbtdb_serial_t                  next_serial;
619 	rbtdb_version_t *               current_version;
620 	rbtdb_version_t *               future_version;
621 	rbtdb_versionlist_t             open_versions;
622 	isc_task_t *                    task;
623 	dns_dbnode_t                    *soanode;
624 	dns_dbnode_t                    *nsnode;
625 
626 	/*
627 	 * This is a linked list used to implement the LRU cache.  There will
628 	 * be node_lock_count linked lists here.  Nodes in bucket 1 will be
629 	 * placed on the linked list rdatasets[1].
630 	 */
631 	rdatasetheaderlist_t            *rdatasets;
632 
633 	/*%
634 	 * Temporary storage for stale cache nodes and dynamically deleted
635 	 * nodes that await being cleaned up.
636 	 */
637 	rbtnodelist_t                   *deadnodes;
638 
639 	/*
640 	 * Heaps.  These are used for TTL based expiry in a cache,
641 	 * or for zone resigning in a zone DB.  hmctx is the memory
642 	 * context to use for the heap (which differs from the main
643 	 * database memory context in the case of a cache).
644 	 */
645 	isc_mem_t *			hmctx;
646 	isc_heap_t                      **heaps;
647 
648 	/*
649 	 * Base values for the mmap() code.
650 	 */
651 	void *				mmap_location;
652 	size_t				mmap_size;
653 
654 	/* Locked by tree_lock. */
655 	dns_rbt_t *                     tree;
656 	dns_rbt_t *			nsec;
657 	dns_rbt_t *			nsec3;
658 	dns_rpz_zones_t			*rpzs;
659 	dns_rpz_num_t			rpz_num;
660 	dns_rpz_zones_t			*load_rpzs;
661 
662 	/* Unlocked */
663 	unsigned int                    quantum;
664 };
665 
666 #define RBTDB_ATTR_LOADED               0x01
667 #define RBTDB_ATTR_LOADING              0x02
668 
669 /*%
670  * Search Context
671  */
672 typedef struct {
673 	dns_rbtdb_t *           rbtdb;
674 	rbtdb_version_t *       rbtversion;
675 	rbtdb_serial_t          serial;
676 	unsigned int            options;
677 	dns_rbtnodechain_t      chain;
678 	isc_boolean_t           copy_name;
679 	isc_boolean_t           need_cleanup;
680 	isc_boolean_t           wild;
681 	dns_rbtnode_t *         zonecut;
682 	rdatasetheader_t *      zonecut_rdataset;
683 	rdatasetheader_t *      zonecut_sigrdataset;
684 	dns_fixedname_t         zonecut_name;
685 	isc_stdtime_t           now;
686 } rbtdb_search_t;
687 
688 /*%
689  * Load Context
690  */
691 typedef struct {
692 	dns_rbtdb_t *           rbtdb;
693 	isc_stdtime_t           now;
694 } rbtdb_load_t;
695 
696 static void delete_callback(void *data, void *arg);
697 static void rdataset_disassociate(dns_rdataset_t *rdataset);
698 static isc_result_t rdataset_first(dns_rdataset_t *rdataset);
699 static isc_result_t rdataset_next(dns_rdataset_t *rdataset);
700 static void rdataset_current(dns_rdataset_t *rdataset, dns_rdata_t *rdata);
701 static void rdataset_clone(dns_rdataset_t *source, dns_rdataset_t *target);
702 static unsigned int rdataset_count(dns_rdataset_t *rdataset);
703 static isc_result_t rdataset_getnoqname(dns_rdataset_t *rdataset,
704 					dns_name_t *name,
705 					dns_rdataset_t *neg,
706 					dns_rdataset_t *negsig);
707 static isc_result_t rdataset_getclosest(dns_rdataset_t *rdataset,
708 					dns_name_t *name,
709 					dns_rdataset_t *neg,
710 					dns_rdataset_t *negsig);
711 static isc_result_t rdataset_getadditional(dns_rdataset_t *rdataset,
712 					   dns_rdatasetadditional_t type,
713 					   dns_rdatatype_t qtype,
714 					   dns_acache_t *acache,
715 					   dns_zone_t **zonep,
716 					   dns_db_t **dbp,
717 					   dns_dbversion_t **versionp,
718 					   dns_dbnode_t **nodep,
719 					   dns_name_t *fname,
720 					   dns_message_t *msg,
721 					   isc_stdtime_t now);
722 static isc_result_t rdataset_setadditional(dns_rdataset_t *rdataset,
723 					   dns_rdatasetadditional_t type,
724 					   dns_rdatatype_t qtype,
725 					   dns_acache_t *acache,
726 					   dns_zone_t *zone,
727 					   dns_db_t *db,
728 					   dns_dbversion_t *version,
729 					   dns_dbnode_t *node,
730 					   dns_name_t *fname);
731 static isc_result_t rdataset_putadditional(dns_acache_t *acache,
732 					   dns_rdataset_t *rdataset,
733 					   dns_rdatasetadditional_t type,
734 					   dns_rdatatype_t qtype);
735 static inline isc_boolean_t need_headerupdate(rdatasetheader_t *header,
736 					      isc_stdtime_t now);
737 static void update_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
738 			  isc_stdtime_t now);
739 static void expire_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
740 			  isc_boolean_t tree_locked, expire_t reason);
741 static void overmem_purge(dns_rbtdb_t *rbtdb, unsigned int locknum_start,
742 			  isc_stdtime_t now, isc_boolean_t tree_locked);
743 static isc_result_t resign_insert(dns_rbtdb_t *rbtdb, int idx,
744 				  rdatasetheader_t *newheader);
745 static void resign_delete(dns_rbtdb_t *rbtdb, rbtdb_version_t *version,
746 			  rdatasetheader_t *header);
747 static void prune_tree(isc_task_t *task, isc_event_t *event);
748 static void rdataset_settrust(dns_rdataset_t *rdataset, dns_trust_t trust);
749 static void rdataset_expire(dns_rdataset_t *rdataset);
750 static void rdataset_clearprefetch(dns_rdataset_t *rdataset);
751 
752 static dns_rdatasetmethods_t rdataset_methods = {
753 	rdataset_disassociate,
754 	rdataset_first,
755 	rdataset_next,
756 	rdataset_current,
757 	rdataset_clone,
758 	rdataset_count,
759 	NULL,
760 	rdataset_getnoqname,
761 	NULL,
762 	rdataset_getclosest,
763 	rdataset_getadditional,
764 	rdataset_setadditional,
765 	rdataset_putadditional,
766 	rdataset_settrust,
767 	rdataset_expire,
768 	rdataset_clearprefetch
769 };
770 
771 static void rdatasetiter_destroy(dns_rdatasetiter_t **iteratorp);
772 static isc_result_t rdatasetiter_first(dns_rdatasetiter_t *iterator);
773 static isc_result_t rdatasetiter_next(dns_rdatasetiter_t *iterator);
774 static void rdatasetiter_current(dns_rdatasetiter_t *iterator,
775 				 dns_rdataset_t *rdataset);
776 
777 static dns_rdatasetitermethods_t rdatasetiter_methods = {
778 	rdatasetiter_destroy,
779 	rdatasetiter_first,
780 	rdatasetiter_next,
781 	rdatasetiter_current
782 };
783 
784 typedef struct rbtdb_rdatasetiter {
785 	dns_rdatasetiter_t              common;
786 	rdatasetheader_t *              current;
787 } rbtdb_rdatasetiter_t;
788 
789 static void             dbiterator_destroy(dns_dbiterator_t **iteratorp);
790 static isc_result_t     dbiterator_first(dns_dbiterator_t *iterator);
791 static isc_result_t     dbiterator_last(dns_dbiterator_t *iterator);
792 static isc_result_t     dbiterator_seek(dns_dbiterator_t *iterator,
793 					dns_name_t *name);
794 static isc_result_t     dbiterator_prev(dns_dbiterator_t *iterator);
795 static isc_result_t     dbiterator_next(dns_dbiterator_t *iterator);
796 static isc_result_t     dbiterator_current(dns_dbiterator_t *iterator,
797 					   dns_dbnode_t **nodep,
798 					   dns_name_t *name);
799 static isc_result_t     dbiterator_pause(dns_dbiterator_t *iterator);
800 static isc_result_t     dbiterator_origin(dns_dbiterator_t *iterator,
801 					  dns_name_t *name);
802 
803 static dns_dbiteratormethods_t dbiterator_methods = {
804 	dbiterator_destroy,
805 	dbiterator_first,
806 	dbiterator_last,
807 	dbiterator_seek,
808 	dbiterator_prev,
809 	dbiterator_next,
810 	dbiterator_current,
811 	dbiterator_pause,
812 	dbiterator_origin
813 };
814 
815 #define DELETION_BATCH_MAX 64
816 
817 /*
818  * If 'paused' is ISC_TRUE, then the tree lock is not being held.
819  */
820 typedef struct rbtdb_dbiterator {
821 	dns_dbiterator_t                common;
822 	isc_boolean_t                   paused;
823 	isc_boolean_t                   new_origin;
824 	isc_rwlocktype_t                tree_locked;
825 	isc_result_t                    result;
826 	dns_fixedname_t                 name;
827 	dns_fixedname_t                 origin;
828 	dns_rbtnodechain_t              chain;
829 	dns_rbtnodechain_t		nsec3chain;
830 	dns_rbtnodechain_t		*current;
831 	dns_rbtnode_t                   *node;
832 	dns_rbtnode_t                   *deletions[DELETION_BATCH_MAX];
833 	int                             delete;
834 	isc_boolean_t			nsec3only;
835 	isc_boolean_t			nonsec3;
836 } rbtdb_dbiterator_t;
837 
838 
839 #define IS_STUB(rbtdb)  (((rbtdb)->common.attributes & DNS_DBATTR_STUB)  != 0)
840 #define IS_CACHE(rbtdb) (((rbtdb)->common.attributes & DNS_DBATTR_CACHE) != 0)
841 
842 static void free_rbtdb(dns_rbtdb_t *rbtdb, isc_boolean_t log,
843 		       isc_event_t *event);
844 static void overmem(dns_db_t *db, isc_boolean_t overmem);
845 static void setnsec3parameters(dns_db_t *db, rbtdb_version_t *version);
846 
847 /* Pad to 32 bytes */
848 static char FILE_VERSION[32] = "\0";
849 
850 /*%
851  * 'init_count' is used to initialize 'newheader->count' which inturn
852  * is used to determine where in the cycle rrset-order cyclic starts.
853  * We don't lock this as we don't care about simultaneous updates.
854  *
855  * Note:
856  *      Both init_count and header->count can be ISC_UINT32_MAX.
857  *      The count on the returned rdataset however can't be as
858  *      that indicates that the database does not implement cyclic
859  *      processing.
860  */
861 static unsigned int init_count;
862 
863 /*
864  * Locking
865  *
866  * If a routine is going to lock more than one lock in this module, then
867  * the locking must be done in the following order:
868  *
869  *      Tree Lock
870  *
871  *      Node Lock       (Only one from the set may be locked at one time by
872  *                       any caller)
873  *
874  *      Database Lock
875  *
876  * Failure to follow this hierarchy can result in deadlock.
877  */
878 
879 /*
880  * Deleting Nodes
881  *
882  * For zone databases the node for the origin of the zone MUST NOT be deleted.
883  */
884 
885 /*
886  * Debugging routines
887  */
888 #ifdef DEBUG
889 static void
hexdump(const char * desc,unsigned char * data,size_t size)890 hexdump(const char *desc, unsigned char *data, size_t size) {
891 	char hexdump[BUFSIZ * 2 + 1];
892 	isc_buffer_t b;
893 	isc_region_t r;
894 	isc_result_t result;
895 	size_t bytes;
896 
897 	fprintf(stderr, "%s: ", desc);
898 	do {
899 		isc_buffer_init(&b, hexdump, sizeof(hexdump));
900 		r.base = data;
901 		r.length = bytes = (size > BUFSIZ) ? BUFSIZ : size;
902 		result = isc_hex_totext(&r, 0, "", &b);
903 		RUNTIME_CHECK(result == ISC_R_SUCCESS);
904 		isc_buffer_putuint8(&b, 0);
905 		fprintf(stderr, "%s", hexdump);
906 		data += bytes;
907 		size -= bytes;
908 	} while (size > 0);
909 	fprintf(stderr, "\n");
910 }
911 #endif
912 
913 
914 /*
915  * DB Routines
916  */
917 
918 static void
attach(dns_db_t * source,dns_db_t ** targetp)919 attach(dns_db_t *source, dns_db_t **targetp) {
920 	dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)source;
921 
922 	REQUIRE(VALID_RBTDB(rbtdb));
923 
924 	isc_refcount_increment(&rbtdb->references, NULL);
925 
926 	*targetp = source;
927 }
928 
929 static void
free_rbtdb_callback(isc_task_t * task,isc_event_t * event)930 free_rbtdb_callback(isc_task_t *task, isc_event_t *event) {
931 	dns_rbtdb_t *rbtdb = event->ev_arg;
932 
933 	UNUSED(task);
934 
935 	free_rbtdb(rbtdb, ISC_TRUE, event);
936 }
937 
938 static void
update_cachestats(dns_rbtdb_t * rbtdb,isc_result_t result)939 update_cachestats(dns_rbtdb_t *rbtdb, isc_result_t result) {
940 	INSIST(IS_CACHE(rbtdb));
941 
942 	if (rbtdb->cachestats == NULL)
943 		return;
944 
945 	switch (result) {
946 	case ISC_R_SUCCESS:
947 	case DNS_R_CNAME:
948 	case DNS_R_DNAME:
949 	case DNS_R_DELEGATION:
950 	case DNS_R_NCACHENXDOMAIN:
951 	case DNS_R_NCACHENXRRSET:
952 		isc_stats_increment(rbtdb->cachestats,
953 				    dns_cachestatscounter_hits);
954 		break;
955 	default:
956 		isc_stats_increment(rbtdb->cachestats,
957 				    dns_cachestatscounter_misses);
958 	}
959 }
960 
961 static void
update_rrsetstats(dns_rbtdb_t * rbtdb,rdatasetheader_t * header,isc_boolean_t increment)962 update_rrsetstats(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
963 		  isc_boolean_t increment)
964 {
965 	dns_rdatastatstype_t statattributes = 0;
966 	dns_rdatastatstype_t base = 0;
967 	dns_rdatastatstype_t type;
968 
969 	/* At the moment we count statistics only for cache DB */
970 	INSIST(IS_CACHE(rbtdb));
971 
972 	if (NEGATIVE(header)) {
973 		if (NXDOMAIN(header))
974 			statattributes = DNS_RDATASTATSTYPE_ATTR_NXDOMAIN;
975 		else {
976 			statattributes = DNS_RDATASTATSTYPE_ATTR_NXRRSET;
977 			base = RBTDB_RDATATYPE_EXT(header->type);
978 		}
979 	} else
980 		base = RBTDB_RDATATYPE_BASE(header->type);
981 
982 	type = DNS_RDATASTATSTYPE_VALUE(base, statattributes);
983 	if (increment)
984 		dns_rdatasetstats_increment(rbtdb->rrsetstats, type);
985 	else
986 		dns_rdatasetstats_decrement(rbtdb->rrsetstats, type);
987 }
988 
989 static void
set_ttl(dns_rbtdb_t * rbtdb,rdatasetheader_t * header,dns_ttl_t newttl)990 set_ttl(dns_rbtdb_t *rbtdb, rdatasetheader_t *header, dns_ttl_t newttl) {
991 	int idx;
992 	isc_heap_t *heap;
993 	dns_ttl_t oldttl;
994 
995 	oldttl = header->rdh_ttl;
996 	header->rdh_ttl = newttl;
997 
998 	if (!IS_CACHE(rbtdb))
999 		return;
1000 
1001 	/*
1002 	 * It's possible the rbtdb is not a cache.  If this is the case,
1003 	 * we will not have a heap, and we move on.  If we do, though,
1004 	 * we might need to adjust things.
1005 	 */
1006 	if (header->heap_index == 0 || newttl == oldttl)
1007 		return;
1008 	idx = header->node->locknum;
1009 	if (rbtdb->heaps == NULL || rbtdb->heaps[idx] == NULL)
1010 	    return;
1011 	heap = rbtdb->heaps[idx];
1012 
1013 	if (newttl < oldttl)
1014 		isc_heap_increased(heap, header->heap_index);
1015 	else
1016 		isc_heap_decreased(heap, header->heap_index);
1017 }
1018 
1019 /*%
1020  * These functions allow the heap code to rank the priority of each
1021  * element.  It returns ISC_TRUE if v1 happens "sooner" than v2.
1022  */
1023 static isc_boolean_t
ttl_sooner(void * v1,void * v2)1024 ttl_sooner(void *v1, void *v2) {
1025 	rdatasetheader_t *h1 = v1;
1026 	rdatasetheader_t *h2 = v2;
1027 
1028 	if (h1->rdh_ttl < h2->rdh_ttl)
1029 		return (ISC_TRUE);
1030 	return (ISC_FALSE);
1031 }
1032 
1033 static isc_boolean_t
resign_sooner(void * v1,void * v2)1034 resign_sooner(void *v1, void *v2) {
1035 	rdatasetheader_t *h1 = v1;
1036 	rdatasetheader_t *h2 = v2;
1037 
1038 	if (isc_serial_lt(h1->resign, h2->resign))
1039 		return (ISC_TRUE);
1040 	return (ISC_FALSE);
1041 }
1042 
1043 /*%
1044  * This function sets the heap index into the header.
1045  */
1046 static void
set_index(void * what,unsigned int index)1047 set_index(void *what, unsigned int index) {
1048 	rdatasetheader_t *h = what;
1049 
1050 	h->heap_index = index;
1051 }
1052 
1053 /*%
1054  * Work out how many nodes can be deleted in the time between two
1055  * requests to the nameserver.  Smooth the resulting number and use it
1056  * as a estimate for the number of nodes to be deleted in the next
1057  * iteration.
1058  */
1059 static unsigned int
adjust_quantum(unsigned int old,isc_time_t * start)1060 adjust_quantum(unsigned int old, isc_time_t *start) {
1061 	unsigned int pps = dns_pps;     /* packets per second */
1062 	unsigned int interval;
1063 	isc_uint64_t usecs;
1064 	isc_time_t end;
1065 	unsigned int new;
1066 
1067 	if (pps < 100)
1068 		pps = 100;
1069 	isc_time_now(&end);
1070 
1071 	interval = 1000000 / pps;       /* interval in usec */
1072 	if (interval == 0)
1073 		interval = 1;
1074 	usecs = isc_time_microdiff(&end, start);
1075 	if (usecs == 0) {
1076 		/*
1077 		 * We were unable to measure the amount of time taken.
1078 		 * Double the nodes deleted next time.
1079 		 */
1080 		old *= 2;
1081 		if (old > 1000)
1082 			old = 1000;
1083 		return (old);
1084 	}
1085 	new = old * interval;
1086 	new /= (unsigned int)usecs;
1087 	if (new == 0)
1088 		new = 1;
1089 	else if (new > 1000)
1090 		new = 1000;
1091 
1092 	/* Smooth */
1093 	new = (new + old * 3) / 4;
1094 
1095 	if (new != old)
1096 		isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
1097 			      DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
1098 			      "adjust_quantum: old=%d, new=%d", old, new);
1099 
1100 	return (new);
1101 }
1102 
1103 static void
free_rbtdb(dns_rbtdb_t * rbtdb,isc_boolean_t log,isc_event_t * event)1104 free_rbtdb(dns_rbtdb_t *rbtdb, isc_boolean_t log, isc_event_t *event) {
1105 	unsigned int i;
1106 	isc_ondestroy_t ondest;
1107 	isc_result_t result;
1108 	char buf[DNS_NAME_FORMATSIZE];
1109 	dns_rbt_t **treep;
1110 	isc_time_t start;
1111 
1112 	if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
1113 		overmem((dns_db_t *)rbtdb, (isc_boolean_t)-1);
1114 
1115 	REQUIRE(rbtdb->current_version != NULL || EMPTY(rbtdb->open_versions));
1116 	REQUIRE(rbtdb->future_version == NULL);
1117 
1118 	if (rbtdb->current_version != NULL) {
1119 		unsigned int refs;
1120 
1121 		isc_refcount_decrement(&rbtdb->current_version->references,
1122 				       &refs);
1123 		INSIST(refs == 0);
1124 		UNLINK(rbtdb->open_versions, rbtdb->current_version, link);
1125 		isc_refcount_destroy(&rbtdb->current_version->references);
1126 		isc_mem_put(rbtdb->common.mctx, rbtdb->current_version,
1127 			    sizeof(rbtdb_version_t));
1128 	}
1129 
1130 	/*
1131 	 * We assume the number of remaining dead nodes is reasonably small;
1132 	 * the overhead of unlinking all nodes here should be negligible.
1133 	 */
1134 	for (i = 0; i < rbtdb->node_lock_count; i++) {
1135 		dns_rbtnode_t *node;
1136 
1137 		node = ISC_LIST_HEAD(rbtdb->deadnodes[i]);
1138 		while (node != NULL) {
1139 			ISC_LIST_UNLINK(rbtdb->deadnodes[i], node, deadlink);
1140 			node = ISC_LIST_HEAD(rbtdb->deadnodes[i]);
1141 		}
1142 	}
1143 
1144 	if (event == NULL)
1145 		rbtdb->quantum = (rbtdb->task != NULL) ? 100 : 0;
1146 
1147 	for (;;) {
1148 		/*
1149 		 * pick the next tree to (start to) destroy
1150 		 */
1151 		treep = &rbtdb->tree;
1152 		if (*treep == NULL) {
1153 			treep = &rbtdb->nsec;
1154 			if (*treep == NULL) {
1155 				treep = &rbtdb->nsec3;
1156 				/*
1157 				 * we're finished after clear cutting
1158 				 */
1159 				if (*treep == NULL)
1160 					break;
1161 			}
1162 		}
1163 
1164 		isc_time_now(&start);
1165 		result = dns_rbt_destroy2(treep, rbtdb->quantum);
1166 		if (result == ISC_R_QUOTA) {
1167 			INSIST(rbtdb->task != NULL);
1168 			if (rbtdb->quantum != 0)
1169 				rbtdb->quantum = adjust_quantum(rbtdb->quantum,
1170 								&start);
1171 			if (event == NULL)
1172 				event = isc_event_allocate(rbtdb->common.mctx,
1173 							   NULL,
1174 							 DNS_EVENT_FREESTORAGE,
1175 							   free_rbtdb_callback,
1176 							   rbtdb,
1177 							   sizeof(isc_event_t));
1178 			if (event == NULL)
1179 				continue;
1180 			isc_task_send(rbtdb->task, &event);
1181 			return;
1182 		}
1183 		INSIST(result == ISC_R_SUCCESS && *treep == NULL);
1184 	}
1185 
1186 	if (event != NULL)
1187 		isc_event_free(&event);
1188 	if (log) {
1189 		if (dns_name_dynamic(&rbtdb->common.origin))
1190 			dns_name_format(&rbtdb->common.origin, buf,
1191 					sizeof(buf));
1192 		else
1193 			strcpy(buf, "<UNKNOWN>");
1194 		isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
1195 			      DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
1196 			      "done free_rbtdb(%s)", buf);
1197 	}
1198 	if (dns_name_dynamic(&rbtdb->common.origin))
1199 		dns_name_free(&rbtdb->common.origin, rbtdb->common.mctx);
1200 	for (i = 0; i < rbtdb->node_lock_count; i++) {
1201 		isc_refcount_destroy(&rbtdb->node_locks[i].references);
1202 		NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
1203 	}
1204 
1205 	/*
1206 	 * Clean up LRU / re-signing order lists.
1207 	 */
1208 	if (rbtdb->rdatasets != NULL) {
1209 		for (i = 0; i < rbtdb->node_lock_count; i++)
1210 			INSIST(ISC_LIST_EMPTY(rbtdb->rdatasets[i]));
1211 		isc_mem_put(rbtdb->common.mctx, rbtdb->rdatasets,
1212 			    rbtdb->node_lock_count *
1213 			    sizeof(rdatasetheaderlist_t));
1214 	}
1215 	/*
1216 	 * Clean up dead node buckets.
1217 	 */
1218 	if (rbtdb->deadnodes != NULL) {
1219 		for (i = 0; i < rbtdb->node_lock_count; i++)
1220 			INSIST(ISC_LIST_EMPTY(rbtdb->deadnodes[i]));
1221 		isc_mem_put(rbtdb->common.mctx, rbtdb->deadnodes,
1222 		    rbtdb->node_lock_count * sizeof(rbtnodelist_t));
1223 	}
1224 	/*
1225 	 * Clean up heap objects.
1226 	 */
1227 	if (rbtdb->heaps != NULL) {
1228 		for (i = 0; i < rbtdb->node_lock_count; i++)
1229 			isc_heap_destroy(&rbtdb->heaps[i]);
1230 		isc_mem_put(rbtdb->hmctx, rbtdb->heaps,
1231 			    rbtdb->node_lock_count * sizeof(isc_heap_t *));
1232 	}
1233 
1234 	if (rbtdb->rrsetstats != NULL)
1235 		dns_stats_detach(&rbtdb->rrsetstats);
1236 	if (rbtdb->cachestats != NULL)
1237 		isc_stats_detach(&rbtdb->cachestats);
1238 
1239 	if (rbtdb->load_rpzs != NULL) {
1240 		/*
1241 		 * We must be cleaning up after a failed zone loading.
1242 		 */
1243 		REQUIRE(rbtdb->rpzs != NULL &&
1244 			rbtdb->rpz_num < rbtdb->rpzs->p.num_zones);
1245 		dns_rpz_detach_rpzs(&rbtdb->load_rpzs);
1246 	}
1247 	if (rbtdb->rpzs != NULL) {
1248 		REQUIRE(rbtdb->rpz_num < rbtdb->rpzs->p.num_zones);
1249 		dns_rpz_detach_rpzs(&rbtdb->rpzs);
1250 	}
1251 
1252 	isc_mem_put(rbtdb->common.mctx, rbtdb->node_locks,
1253 		    rbtdb->node_lock_count * sizeof(rbtdb_nodelock_t));
1254 	isc_rwlock_destroy(&rbtdb->tree_lock);
1255 	isc_refcount_destroy(&rbtdb->references);
1256 	if (rbtdb->task != NULL)
1257 		isc_task_detach(&rbtdb->task);
1258 
1259 	RBTDB_DESTROYLOCK(&rbtdb->lock);
1260 	rbtdb->common.magic = 0;
1261 	rbtdb->common.impmagic = 0;
1262 	ondest = rbtdb->common.ondest;
1263 	isc_mem_detach(&rbtdb->hmctx);
1264 
1265 	if (rbtdb->mmap_location != NULL)
1266 		isc_file_munmap(rbtdb->mmap_location,
1267 				(size_t) rbtdb->mmap_size);
1268 
1269 	isc_mem_putanddetach(&rbtdb->common.mctx, rbtdb, sizeof(*rbtdb));
1270 	isc_ondestroy_notify(&ondest, rbtdb);
1271 }
1272 
1273 static inline void
maybe_free_rbtdb(dns_rbtdb_t * rbtdb)1274 maybe_free_rbtdb(dns_rbtdb_t *rbtdb) {
1275 	isc_boolean_t want_free = ISC_FALSE;
1276 	unsigned int i;
1277 	unsigned int inactive = 0;
1278 
1279 	/* XXX check for open versions here */
1280 
1281 	if (rbtdb->soanode != NULL)
1282 		dns_db_detachnode((dns_db_t *)rbtdb, &rbtdb->soanode);
1283 	if (rbtdb->nsnode != NULL)
1284 		dns_db_detachnode((dns_db_t *)rbtdb, &rbtdb->nsnode);
1285 
1286 	/*
1287 	 * Even though there are no external direct references, there still
1288 	 * may be nodes in use.
1289 	 */
1290 	for (i = 0; i < rbtdb->node_lock_count; i++) {
1291 		NODE_LOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_write);
1292 		rbtdb->node_locks[i].exiting = ISC_TRUE;
1293 		NODE_UNLOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_write);
1294 		if (isc_refcount_current(&rbtdb->node_locks[i].references)
1295 		    == 0) {
1296 			inactive++;
1297 		}
1298 	}
1299 
1300 	if (inactive != 0) {
1301 		RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
1302 		rbtdb->active -= inactive;
1303 		if (rbtdb->active == 0)
1304 			want_free = ISC_TRUE;
1305 		RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
1306 		if (want_free) {
1307 			char buf[DNS_NAME_FORMATSIZE];
1308 			if (dns_name_dynamic(&rbtdb->common.origin))
1309 				dns_name_format(&rbtdb->common.origin, buf,
1310 						sizeof(buf));
1311 			else
1312 				strcpy(buf, "<UNKNOWN>");
1313 			isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
1314 				      DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
1315 				      "calling free_rbtdb(%s)", buf);
1316 			free_rbtdb(rbtdb, ISC_TRUE, NULL);
1317 		}
1318 	}
1319 }
1320 
1321 static void
detach(dns_db_t ** dbp)1322 detach(dns_db_t **dbp) {
1323 	dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(*dbp);
1324 	unsigned int refs;
1325 
1326 	REQUIRE(VALID_RBTDB(rbtdb));
1327 
1328 	isc_refcount_decrement(&rbtdb->references, &refs);
1329 
1330 	if (refs == 0)
1331 		maybe_free_rbtdb(rbtdb);
1332 
1333 	*dbp = NULL;
1334 }
1335 
1336 static void
currentversion(dns_db_t * db,dns_dbversion_t ** versionp)1337 currentversion(dns_db_t *db, dns_dbversion_t **versionp) {
1338 	dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1339 	rbtdb_version_t *version;
1340 	unsigned int refs;
1341 
1342 	REQUIRE(VALID_RBTDB(rbtdb));
1343 
1344 	RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
1345 	version = rbtdb->current_version;
1346 	isc_refcount_increment(&version->references, &refs);
1347 	RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
1348 
1349 	*versionp = (dns_dbversion_t *)version;
1350 }
1351 
1352 static inline rbtdb_version_t *
allocate_version(isc_mem_t * mctx,rbtdb_serial_t serial,unsigned int references,isc_boolean_t writer)1353 allocate_version(isc_mem_t *mctx, rbtdb_serial_t serial,
1354 		 unsigned int references, isc_boolean_t writer)
1355 {
1356 	isc_result_t result;
1357 	rbtdb_version_t *version;
1358 
1359 	version = isc_mem_get(mctx, sizeof(*version));
1360 	if (version == NULL)
1361 		return (NULL);
1362 	version->serial = serial;
1363 	result = isc_refcount_init(&version->references, references);
1364 	if (result != ISC_R_SUCCESS) {
1365 		isc_mem_put(mctx, version, sizeof(*version));
1366 		return (NULL);
1367 	}
1368 	version->writer = writer;
1369 	version->commit_ok = ISC_FALSE;
1370 	ISC_LIST_INIT(version->changed_list);
1371 	ISC_LIST_INIT(version->resigned_list);
1372 	ISC_LINK_INIT(version, link);
1373 
1374 	return (version);
1375 }
1376 
1377 static isc_result_t
newversion(dns_db_t * db,dns_dbversion_t ** versionp)1378 newversion(dns_db_t *db, dns_dbversion_t **versionp) {
1379 	dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1380 	rbtdb_version_t *version;
1381 
1382 	REQUIRE(VALID_RBTDB(rbtdb));
1383 	REQUIRE(versionp != NULL && *versionp == NULL);
1384 	REQUIRE(rbtdb->future_version == NULL);
1385 
1386 	RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
1387 	RUNTIME_CHECK(rbtdb->next_serial != 0);         /* XXX Error? */
1388 	version = allocate_version(rbtdb->common.mctx, rbtdb->next_serial, 1,
1389 				   ISC_TRUE);
1390 	if (version != NULL) {
1391 		version->rbtdb = rbtdb;
1392 		version->commit_ok = ISC_TRUE;
1393 		version->secure = rbtdb->current_version->secure;
1394 		version->havensec3 = rbtdb->current_version->havensec3;
1395 		if (version->havensec3) {
1396 			version->flags = rbtdb->current_version->flags;
1397 			version->iterations =
1398 				rbtdb->current_version->iterations;
1399 			version->hash = rbtdb->current_version->hash;
1400 			version->salt_length =
1401 				rbtdb->current_version->salt_length;
1402 			memmove(version->salt, rbtdb->current_version->salt,
1403 				version->salt_length);
1404 		} else {
1405 			version->flags = 0;
1406 			version->iterations = 0;
1407 			version->hash = 0;
1408 			version->salt_length = 0;
1409 			memset(version->salt, 0, sizeof(version->salt));
1410 		}
1411 		rbtdb->next_serial++;
1412 		rbtdb->future_version = version;
1413 	}
1414 	RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
1415 
1416 	if (version == NULL)
1417 		return (ISC_R_NOMEMORY);
1418 
1419 	*versionp = version;
1420 
1421 	return (ISC_R_SUCCESS);
1422 }
1423 
1424 static void
attachversion(dns_db_t * db,dns_dbversion_t * source,dns_dbversion_t ** targetp)1425 attachversion(dns_db_t *db, dns_dbversion_t *source,
1426 	      dns_dbversion_t **targetp)
1427 {
1428 	dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1429 	rbtdb_version_t *rbtversion = source;
1430 	unsigned int refs;
1431 
1432 	REQUIRE(VALID_RBTDB(rbtdb));
1433 	INSIST(rbtversion != NULL && rbtversion->rbtdb == rbtdb);
1434 
1435 	isc_refcount_increment(&rbtversion->references, &refs);
1436 	INSIST(refs > 1);
1437 
1438 	*targetp = rbtversion;
1439 }
1440 
1441 static rbtdb_changed_t *
add_changed(dns_rbtdb_t * rbtdb,rbtdb_version_t * version,dns_rbtnode_t * node)1442 add_changed(dns_rbtdb_t *rbtdb, rbtdb_version_t *version,
1443 	    dns_rbtnode_t *node)
1444 {
1445 	rbtdb_changed_t *changed;
1446 	unsigned int refs;
1447 
1448 	/*
1449 	 * Caller must be holding the node lock if its reference must be
1450 	 * protected by the lock.
1451 	 */
1452 
1453 	changed = isc_mem_get(rbtdb->common.mctx, sizeof(*changed));
1454 
1455 	RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
1456 
1457 	REQUIRE(version->writer);
1458 
1459 	if (changed != NULL) {
1460 		dns_rbtnode_refincrement(node, &refs);
1461 		INSIST(refs != 0);
1462 		changed->node = node;
1463 		changed->dirty = ISC_FALSE;
1464 		ISC_LIST_INITANDAPPEND(version->changed_list, changed, link);
1465 	} else
1466 		version->commit_ok = ISC_FALSE;
1467 
1468 	RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
1469 
1470 	return (changed);
1471 }
1472 
1473 static void
free_acachearray(isc_mem_t * mctx,rdatasetheader_t * header,acachectl_t * array)1474 free_acachearray(isc_mem_t *mctx, rdatasetheader_t *header,
1475 		 acachectl_t *array)
1476 {
1477 	unsigned int count;
1478 	unsigned int i;
1479 	unsigned char *raw;     /* RDATASLAB */
1480 
1481 	/*
1482 	 * The caller must be holding the corresponding node lock.
1483 	 */
1484 
1485 	if (array == NULL)
1486 		return;
1487 
1488 	raw = (unsigned char *)header + sizeof(*header);
1489 	count = raw[0] * 256 + raw[1];
1490 
1491 	/*
1492 	 * Sanity check: since an additional cache entry has a reference to
1493 	 * the original DB node (in the callback arg), there should be no
1494 	 * acache entries when the node can be freed.
1495 	 */
1496 	for (i = 0; i < count; i++)
1497 		INSIST(array[i].entry == NULL && array[i].cbarg == NULL);
1498 
1499 	isc_mem_put(mctx, array, count * sizeof(acachectl_t));
1500 }
1501 
1502 static inline void
free_noqname(isc_mem_t * mctx,struct noqname ** noqname)1503 free_noqname(isc_mem_t *mctx, struct noqname **noqname) {
1504 
1505 	if (dns_name_dynamic(&(*noqname)->name))
1506 		dns_name_free(&(*noqname)->name, mctx);
1507 	if ((*noqname)->neg != NULL)
1508 		isc_mem_put(mctx, (*noqname)->neg,
1509 			    dns_rdataslab_size((*noqname)->neg, 0));
1510 	if ((*noqname)->negsig != NULL)
1511 		isc_mem_put(mctx, (*noqname)->negsig,
1512 			    dns_rdataslab_size((*noqname)->negsig, 0));
1513 	isc_mem_put(mctx, *noqname, sizeof(**noqname));
1514 	*noqname = NULL;
1515 }
1516 
1517 static inline void
init_rdataset(dns_rbtdb_t * rbtdb,rdatasetheader_t * h)1518 init_rdataset(dns_rbtdb_t *rbtdb, rdatasetheader_t *h) {
1519 	ISC_LINK_INIT(h, link);
1520 	h->heap_index = 0;
1521 	h->is_mmapped = 0;
1522 	h->next_is_relative = 0;
1523 	h->node_is_relative = 0;
1524 
1525 #if TRACE_HEADER
1526 	if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
1527 		fprintf(stderr, "initialized header: %p\n", h);
1528 #else
1529 	UNUSED(rbtdb);
1530 #endif
1531 }
1532 
1533 /*
1534  * Update the copied values of 'next' and 'node' if they are relative.
1535  */
1536 static void
update_newheader(rdatasetheader_t * new,rdatasetheader_t * old)1537 update_newheader(rdatasetheader_t *new, rdatasetheader_t *old) {
1538 	char *p;
1539 
1540 	if (old->next_is_relative) {
1541 		p = (char *) old;
1542 		p += (uintptr_t)old->next;
1543 		new->next = (rdatasetheader_t *)p;
1544 	}
1545 	if (old->node_is_relative) {
1546 		p = (char *) old;
1547 		p += (uintptr_t)old->node;
1548 		new->node = (dns_rbtnode_t *)p;
1549 	}
1550 }
1551 
1552 static inline rdatasetheader_t *
new_rdataset(dns_rbtdb_t * rbtdb,isc_mem_t * mctx)1553 new_rdataset(dns_rbtdb_t *rbtdb, isc_mem_t *mctx) {
1554 	rdatasetheader_t *h;
1555 
1556 	h = isc_mem_get(mctx, sizeof(*h));
1557 	if (h == NULL)
1558 		return (NULL);
1559 
1560 #if TRACE_HEADER
1561 	if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
1562 		fprintf(stderr, "allocated header: %p\n", h);
1563 #endif
1564 	init_rdataset(rbtdb, h);
1565 	return (h);
1566 }
1567 
1568 static inline void
free_rdataset(dns_rbtdb_t * rbtdb,isc_mem_t * mctx,rdatasetheader_t * rdataset)1569 free_rdataset(dns_rbtdb_t *rbtdb, isc_mem_t *mctx, rdatasetheader_t *rdataset) {
1570 	unsigned int size;
1571 	int idx;
1572 
1573 	if (EXISTS(rdataset) &&
1574 	    (rdataset->attributes & RDATASET_ATTR_STATCOUNT) != 0) {
1575 		update_rrsetstats(rbtdb, rdataset, ISC_FALSE);
1576 	}
1577 
1578 	idx = rdataset->node->locknum;
1579 	if (ISC_LINK_LINKED(rdataset, link)) {
1580 		INSIST(IS_CACHE(rbtdb));
1581 		ISC_LIST_UNLINK(rbtdb->rdatasets[idx], rdataset, link);
1582 	}
1583 
1584 	if (rdataset->heap_index != 0)
1585 		isc_heap_delete(rbtdb->heaps[idx], rdataset->heap_index);
1586 	rdataset->heap_index = 0;
1587 
1588 	if (rdataset->noqname != NULL)
1589 		free_noqname(mctx, &rdataset->noqname);
1590 	if (rdataset->closest != NULL)
1591 		free_noqname(mctx, &rdataset->closest);
1592 
1593 	free_acachearray(mctx, rdataset, rdataset->additional_auth);
1594 	free_acachearray(mctx, rdataset, rdataset->additional_glue);
1595 
1596 	if ((rdataset->attributes & RDATASET_ATTR_NONEXISTENT) != 0)
1597 		size = sizeof(*rdataset);
1598 	else
1599 		size = dns_rdataslab_size((unsigned char *)rdataset,
1600 					  sizeof(*rdataset));
1601 
1602 	if (rdataset->is_mmapped == 1)
1603 		return;
1604 
1605 	isc_mem_put(mctx, rdataset, size);
1606 }
1607 
1608 static inline void
rollback_node(dns_rbtnode_t * node,rbtdb_serial_t serial)1609 rollback_node(dns_rbtnode_t *node, rbtdb_serial_t serial) {
1610 	rdatasetheader_t *header, *dcurrent;
1611 	isc_boolean_t make_dirty = ISC_FALSE;
1612 
1613 	/*
1614 	 * Caller must hold the node lock.
1615 	 */
1616 
1617 	/*
1618 	 * We set the IGNORE attribute on rdatasets with serial number
1619 	 * 'serial'.  When the reference count goes to zero, these rdatasets
1620 	 * will be cleaned up; until that time, they will be ignored.
1621 	 */
1622 	for (header = node->data; header != NULL; header = header->next) {
1623 		if (header->serial == serial) {
1624 			header->attributes |= RDATASET_ATTR_IGNORE;
1625 			make_dirty = ISC_TRUE;
1626 		}
1627 		for (dcurrent = header->down;
1628 		     dcurrent != NULL;
1629 		     dcurrent = dcurrent->down) {
1630 			if (dcurrent->serial == serial) {
1631 				dcurrent->attributes |= RDATASET_ATTR_IGNORE;
1632 				make_dirty = ISC_TRUE;
1633 			}
1634 		}
1635 	}
1636 	if (make_dirty)
1637 		node->dirty = 1;
1638 }
1639 
1640 static inline void
mark_stale_header(dns_rbtdb_t * rbtdb,rdatasetheader_t * header)1641 mark_stale_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header) {
1642 
1643 	/*
1644 	 * If we are already stale there is nothing to do.
1645 	 */
1646 	if ((header->attributes & RDATASET_ATTR_STALE) != 0)
1647 		return;
1648 
1649 	header->attributes |= RDATASET_ATTR_STALE;
1650 	header->node->dirty = 1;
1651 
1652 	/*
1653 	 * If we have not been counted then there is nothing to do.
1654 	 */
1655 	if ((header->attributes & RDATASET_ATTR_STATCOUNT) == 0)
1656 		return;
1657 
1658 	if (EXISTS(header))
1659 		update_rrsetstats(rbtdb, header, ISC_TRUE);
1660 }
1661 
1662 static inline void
clean_stale_headers(dns_rbtdb_t * rbtdb,isc_mem_t * mctx,rdatasetheader_t * top)1663 clean_stale_headers(dns_rbtdb_t *rbtdb, isc_mem_t *mctx, rdatasetheader_t *top)
1664 {
1665 	rdatasetheader_t *d, *down_next;
1666 
1667 	for (d = top->down; d != NULL; d = down_next) {
1668 		down_next = d->down;
1669 		free_rdataset(rbtdb, mctx, d);
1670 	}
1671 	top->down = NULL;
1672 }
1673 
1674 static inline void
clean_cache_node(dns_rbtdb_t * rbtdb,dns_rbtnode_t * node)1675 clean_cache_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node) {
1676 	rdatasetheader_t *current, *top_prev, *top_next;
1677 	isc_mem_t *mctx = rbtdb->common.mctx;
1678 
1679 	/*
1680 	 * Caller must be holding the node lock.
1681 	 */
1682 
1683 	top_prev = NULL;
1684 	for (current = node->data; current != NULL; current = top_next) {
1685 		top_next = current->next;
1686 		clean_stale_headers(rbtdb, mctx, current);
1687 		/*
1688 		 * If current is nonexistent or stale, we can clean it up.
1689 		 */
1690 		if ((current->attributes &
1691 		     (RDATASET_ATTR_NONEXISTENT|RDATASET_ATTR_STALE)) != 0) {
1692 			if (top_prev != NULL)
1693 				top_prev->next = current->next;
1694 			else
1695 				node->data = current->next;
1696 			free_rdataset(rbtdb, mctx, current);
1697 		} else
1698 			top_prev = current;
1699 	}
1700 	node->dirty = 0;
1701 }
1702 
1703 static inline void
clean_zone_node(dns_rbtdb_t * rbtdb,dns_rbtnode_t * node,rbtdb_serial_t least_serial)1704 clean_zone_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1705 		rbtdb_serial_t least_serial)
1706 {
1707 	rdatasetheader_t *current, *dcurrent, *down_next, *dparent;
1708 	rdatasetheader_t *top_prev, *top_next;
1709 	isc_mem_t *mctx = rbtdb->common.mctx;
1710 	isc_boolean_t still_dirty = ISC_FALSE;
1711 
1712 	/*
1713 	 * Caller must be holding the node lock.
1714 	 */
1715 	REQUIRE(least_serial != 0);
1716 
1717 	top_prev = NULL;
1718 	for (current = node->data; current != NULL; current = top_next) {
1719 		top_next = current->next;
1720 
1721 		/*
1722 		 * First, we clean up any instances of multiple rdatasets
1723 		 * with the same serial number, or that have the IGNORE
1724 		 * attribute.
1725 		 */
1726 		dparent = current;
1727 		for (dcurrent = current->down;
1728 		     dcurrent != NULL;
1729 		     dcurrent = down_next) {
1730 			down_next = dcurrent->down;
1731 			INSIST(dcurrent->serial <= dparent->serial);
1732 			if (dcurrent->serial == dparent->serial ||
1733 			    IGNORE(dcurrent)) {
1734 				if (down_next != NULL)
1735 					down_next->next = dparent;
1736 				dparent->down = down_next;
1737 				free_rdataset(rbtdb, mctx, dcurrent);
1738 			} else
1739 				dparent = dcurrent;
1740 		}
1741 
1742 		/*
1743 		 * We've now eliminated all IGNORE datasets with the possible
1744 		 * exception of current, which we now check.
1745 		 */
1746 		if (IGNORE(current)) {
1747 			down_next = current->down;
1748 			if (down_next == NULL) {
1749 				if (top_prev != NULL)
1750 					top_prev->next = current->next;
1751 				else
1752 					node->data = current->next;
1753 				free_rdataset(rbtdb, mctx, current);
1754 				/*
1755 				 * current no longer exists, so we can
1756 				 * just continue with the loop.
1757 				 */
1758 				continue;
1759 			} else {
1760 				/*
1761 				 * Pull up current->down, making it the new
1762 				 * current.
1763 				 */
1764 				if (top_prev != NULL)
1765 					top_prev->next = down_next;
1766 				else
1767 					node->data = down_next;
1768 				down_next->next = top_next;
1769 				free_rdataset(rbtdb, mctx, current);
1770 				current = down_next;
1771 			}
1772 		}
1773 
1774 		/*
1775 		 * We now try to find the first down node less than the
1776 		 * least serial.
1777 		 */
1778 		dparent = current;
1779 		for (dcurrent = current->down;
1780 		     dcurrent != NULL;
1781 		     dcurrent = down_next) {
1782 			down_next = dcurrent->down;
1783 			if (dcurrent->serial < least_serial)
1784 				break;
1785 			dparent = dcurrent;
1786 		}
1787 
1788 		/*
1789 		 * If there is a such an rdataset, delete it and any older
1790 		 * versions.
1791 		 */
1792 		if (dcurrent != NULL) {
1793 			do {
1794 				down_next = dcurrent->down;
1795 				INSIST(dcurrent->serial <= least_serial);
1796 				free_rdataset(rbtdb, mctx, dcurrent);
1797 				dcurrent = down_next;
1798 			} while (dcurrent != NULL);
1799 			dparent->down = NULL;
1800 		}
1801 
1802 		/*
1803 		 * Note.  The serial number of 'current' might be less than
1804 		 * least_serial too, but we cannot delete it because it is
1805 		 * the most recent version, unless it is a NONEXISTENT
1806 		 * rdataset.
1807 		 */
1808 		if (current->down != NULL) {
1809 			still_dirty = ISC_TRUE;
1810 			top_prev = current;
1811 		} else {
1812 			/*
1813 			 * If this is a NONEXISTENT rdataset, we can delete it.
1814 			 */
1815 			if (NONEXISTENT(current)) {
1816 				if (top_prev != NULL)
1817 					top_prev->next = current->next;
1818 				else
1819 					node->data = current->next;
1820 				free_rdataset(rbtdb, mctx, current);
1821 			} else
1822 				top_prev = current;
1823 		}
1824 	}
1825 	if (!still_dirty)
1826 		node->dirty = 0;
1827 }
1828 
1829 static void
delete_node(dns_rbtdb_t * rbtdb,dns_rbtnode_t * node)1830 delete_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node) {
1831 	dns_rbtnode_t *nsecnode;
1832 	dns_fixedname_t fname;
1833 	dns_name_t *name;
1834 	isc_result_t result = ISC_R_UNEXPECTED;
1835 	unsigned int node_has_rpz;
1836 
1837 	INSIST(!ISC_LINK_LINKED(node, deadlink));
1838 
1839 	switch (node->nsec) {
1840 	case DNS_RBT_NSEC_NORMAL:
1841 		/*
1842 		 * Though this may be wasteful, it has to be done before
1843 		 * node is deleted.
1844 		 */
1845 		dns_fixedname_init(&fname);
1846 		name = dns_fixedname_name(&fname);
1847 		dns_rbt_fullnamefromnode(node, name);
1848 
1849 		node_has_rpz = node->rpz;
1850 		result = dns_rbt_deletenode(rbtdb->tree, node, ISC_FALSE);
1851 		if (result == ISC_R_SUCCESS &&
1852 		    rbtdb->rpzs != NULL && node_has_rpz)
1853 			dns_rpz_delete(rbtdb->rpzs, rbtdb->rpz_num, name);
1854 		break;
1855 	case DNS_RBT_NSEC_HAS_NSEC:
1856 		dns_fixedname_init(&fname);
1857 		name = dns_fixedname_name(&fname);
1858 		dns_rbt_fullnamefromnode(node, name);
1859 		/*
1860 		 * Delete the corresponding node from the auxiliary NSEC
1861 		 * tree before deleting from the main tree.
1862 		 */
1863 		nsecnode = NULL;
1864 		result = dns_rbt_findnode(rbtdb->nsec, name, NULL, &nsecnode,
1865 					  NULL, DNS_RBTFIND_EMPTYDATA,
1866 					  NULL, NULL);
1867 		if (result != ISC_R_SUCCESS) {
1868 			isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
1869 				      DNS_LOGMODULE_CACHE, ISC_LOG_WARNING,
1870 				      "delete_node: "
1871 				      "dns_rbt_findnode(nsec): %s",
1872 				      isc_result_totext(result));
1873 		} else {
1874 			result = dns_rbt_deletenode(rbtdb->nsec, nsecnode,
1875 						    ISC_FALSE);
1876 			if (result != ISC_R_SUCCESS) {
1877 				isc_log_write(dns_lctx,
1878 					      DNS_LOGCATEGORY_DATABASE,
1879 					      DNS_LOGMODULE_CACHE,
1880 					      ISC_LOG_WARNING,
1881 					      "delete_node(): "
1882 					      "dns_rbt_deletenode(nsecnode): %s",
1883 					      isc_result_totext(result));
1884 			}
1885 		}
1886 		node_has_rpz = node->rpz;
1887 		result = dns_rbt_deletenode(rbtdb->tree, node, ISC_FALSE);
1888 		if (result == ISC_R_SUCCESS &&
1889 		    rbtdb->rpzs != NULL && node_has_rpz)
1890 			dns_rpz_delete(rbtdb->rpzs, rbtdb->rpz_num, name);
1891 		break;
1892 	case DNS_RBT_NSEC_NSEC:
1893 		result = dns_rbt_deletenode(rbtdb->nsec, node, ISC_FALSE);
1894 		break;
1895 	case DNS_RBT_NSEC_NSEC3:
1896 		result = dns_rbt_deletenode(rbtdb->nsec3, node, ISC_FALSE);
1897 		break;
1898 	}
1899 	if (result != ISC_R_SUCCESS) {
1900 		isc_log_write(dns_lctx,
1901 			      DNS_LOGCATEGORY_DATABASE,
1902 			      DNS_LOGMODULE_CACHE,
1903 			      ISC_LOG_WARNING,
1904 			      "delete_node(): "
1905 			      "dns_rbt_deletenode: %s",
1906 			      isc_result_totext(result));
1907 	}
1908 }
1909 
1910 /*%
1911  * Clean up dead nodes.  These are nodes which have no references, and
1912  * have no data.  They are dead but we could not or chose not to delete
1913  * them when we deleted all the data at that node because we did not want
1914  * to wait for the tree write lock.
1915  *
1916  * The caller must hold a tree write lock and bucketnum'th node (write) lock.
1917  */
1918 static void
cleanup_dead_nodes(dns_rbtdb_t * rbtdb,int bucketnum)1919 cleanup_dead_nodes(dns_rbtdb_t *rbtdb, int bucketnum) {
1920 	dns_rbtnode_t *node;
1921 	int count = 10;         /* XXXJT: should be adjustable */
1922 
1923 	node = ISC_LIST_HEAD(rbtdb->deadnodes[bucketnum]);
1924 	while (node != NULL && count > 0) {
1925 		ISC_LIST_UNLINK(rbtdb->deadnodes[bucketnum], node, deadlink);
1926 
1927 		/*
1928 		 * Since we're holding a tree write lock, it should be
1929 		 * impossible for this node to be referenced by others.
1930 		 */
1931 		INSIST(dns_rbtnode_refcurrent(node) == 0 &&
1932 		       node->data == NULL);
1933 
1934 		delete_node(rbtdb, node);
1935 
1936 		node = ISC_LIST_HEAD(rbtdb->deadnodes[bucketnum]);
1937 		count--;
1938 	}
1939 }
1940 
1941 /*
1942  * Caller must be holding the node lock.
1943  */
1944 static inline void
new_reference(dns_rbtdb_t * rbtdb,dns_rbtnode_t * node)1945 new_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node) {
1946 	unsigned int lockrefs, noderefs;
1947 	isc_refcount_t *lockref;
1948 
1949 	INSIST(!ISC_LINK_LINKED(node, deadlink));
1950 	dns_rbtnode_refincrement0(node, &noderefs);
1951 	if (noderefs == 1) {    /* this is the first reference to the node */
1952 		lockref = &rbtdb->node_locks[node->locknum].references;
1953 		isc_refcount_increment0(lockref, &lockrefs);
1954 		INSIST(lockrefs != 0);
1955 	}
1956 	INSIST(noderefs != 0);
1957 }
1958 
1959 /*
1960  * This function is assumed to be called when a node is newly referenced
1961  * and can be in the deadnode list.  In that case the node must be retrieved
1962  * from the list because it is going to be used.  In addition, if the caller
1963  * happens to hold a write lock on the tree, it's a good chance to purge dead
1964  * nodes.
1965  * Note: while a new reference is gained in multiple places, there are only very
1966  * few cases where the node can be in the deadnode list (only empty nodes can
1967  * have been added to the list).
1968  */
1969 static inline void
reactivate_node(dns_rbtdb_t * rbtdb,dns_rbtnode_t * node,isc_rwlocktype_t treelocktype)1970 reactivate_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1971 		isc_rwlocktype_t treelocktype)
1972 {
1973 	isc_rwlocktype_t locktype = isc_rwlocktype_read;
1974 	nodelock_t *nodelock = &rbtdb->node_locks[node->locknum].lock;
1975 	isc_boolean_t maybe_cleanup = ISC_FALSE;
1976 
1977 	POST(locktype);
1978 
1979 	NODE_STRONGLOCK(nodelock);
1980 	NODE_WEAKLOCK(nodelock, locktype);
1981 
1982 	/*
1983 	 * Check if we can possibly cleanup the dead node.  If so, upgrade
1984 	 * the node lock below to perform the cleanup.
1985 	 */
1986 	if (!ISC_LIST_EMPTY(rbtdb->deadnodes[node->locknum]) &&
1987 	    treelocktype == isc_rwlocktype_write) {
1988 		maybe_cleanup = ISC_TRUE;
1989 	}
1990 
1991 	if (ISC_LINK_LINKED(node, deadlink) || maybe_cleanup) {
1992 		/*
1993 		 * Upgrade the lock and test if we still need to unlink.
1994 		 */
1995 		NODE_WEAKUNLOCK(nodelock, locktype);
1996 		locktype = isc_rwlocktype_write;
1997 		POST(locktype);
1998 		NODE_WEAKLOCK(nodelock, locktype);
1999 		if (ISC_LINK_LINKED(node, deadlink))
2000 			ISC_LIST_UNLINK(rbtdb->deadnodes[node->locknum],
2001 					node, deadlink);
2002 		if (maybe_cleanup)
2003 			cleanup_dead_nodes(rbtdb, node->locknum);
2004 	}
2005 
2006 	new_reference(rbtdb, node);
2007 
2008 	NODE_WEAKUNLOCK(nodelock, locktype);
2009 	NODE_STRONGUNLOCK(nodelock);
2010 }
2011 
2012 /*
2013  * Caller must be holding the node lock; either the "strong", read or write
2014  * lock.  Note that the lock must be held even when node references are
2015  * atomically modified; in that case the decrement operation itself does not
2016  * have to be protected, but we must avoid a race condition where multiple
2017  * threads are decreasing the reference to zero simultaneously and at least
2018  * one of them is going to free the node.
2019  * This function returns ISC_TRUE if and only if the node reference decreases
2020  * to zero.
2021  */
2022 static isc_boolean_t
decrement_reference(dns_rbtdb_t * rbtdb,dns_rbtnode_t * node,rbtdb_serial_t least_serial,isc_rwlocktype_t nlock,isc_rwlocktype_t tlock,isc_boolean_t pruning)2023 decrement_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
2024 		    rbtdb_serial_t least_serial,
2025 		    isc_rwlocktype_t nlock, isc_rwlocktype_t tlock,
2026 		    isc_boolean_t pruning)
2027 {
2028 	isc_result_t result;
2029 	isc_boolean_t write_locked;
2030 	rbtdb_nodelock_t *nodelock;
2031 	unsigned int refs, nrefs;
2032 	int bucket = node->locknum;
2033 	isc_boolean_t no_reference = ISC_TRUE;
2034 
2035 	nodelock = &rbtdb->node_locks[bucket];
2036 
2037 #define KEEP_NODE(n, r) \
2038 	((n)->data != NULL || (n)->down != NULL || (n) == (r)->origin_node)
2039 
2040 	/* Handle easy and typical case first. */
2041 	if (!node->dirty && KEEP_NODE(node, rbtdb)) {
2042 		dns_rbtnode_refdecrement(node, &nrefs);
2043 		INSIST((int)nrefs >= 0);
2044 		if (nrefs == 0) {
2045 			isc_refcount_decrement(&nodelock->references, &refs);
2046 			INSIST((int)refs >= 0);
2047 		}
2048 		return ((nrefs == 0) ? ISC_TRUE : ISC_FALSE);
2049 	}
2050 
2051 	/* Upgrade the lock? */
2052 	if (nlock == isc_rwlocktype_read) {
2053 		NODE_WEAKUNLOCK(&nodelock->lock, isc_rwlocktype_read);
2054 		NODE_WEAKLOCK(&nodelock->lock, isc_rwlocktype_write);
2055 	}
2056 
2057 	dns_rbtnode_refdecrement(node, &nrefs);
2058 	INSIST((int)nrefs >= 0);
2059 	if (nrefs > 0) {
2060 		/* Restore the lock? */
2061 		if (nlock == isc_rwlocktype_read)
2062 			NODE_WEAKDOWNGRADE(&nodelock->lock);
2063 		return (ISC_FALSE);
2064 	}
2065 
2066 	if (node->dirty) {
2067 		if (IS_CACHE(rbtdb))
2068 			clean_cache_node(rbtdb, node);
2069 		else {
2070 			if (least_serial == 0) {
2071 				/*
2072 				 * Caller doesn't know the least serial.
2073 				 * Get it.
2074 				 */
2075 				RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
2076 				least_serial = rbtdb->least_serial;
2077 				RBTDB_UNLOCK(&rbtdb->lock,
2078 					     isc_rwlocktype_read);
2079 			}
2080 			clean_zone_node(rbtdb, node, least_serial);
2081 		}
2082 	}
2083 
2084 	/*
2085 	 * Attempt to switch to a write lock on the tree.  If this fails,
2086 	 * we will add this node to a linked list of nodes in this locking
2087 	 * bucket which we will free later.
2088 	 */
2089 	if (tlock != isc_rwlocktype_write) {
2090 		/*
2091 		 * Locking hierarchy notwithstanding, we don't need to free
2092 		 * the node lock before acquiring the tree write lock because
2093 		 * we only do a trylock.
2094 		 */
2095 		if (tlock == isc_rwlocktype_read)
2096 			result = isc_rwlock_tryupgrade(&rbtdb->tree_lock);
2097 		else
2098 			result = isc_rwlock_trylock(&rbtdb->tree_lock,
2099 						    isc_rwlocktype_write);
2100 		RUNTIME_CHECK(result == ISC_R_SUCCESS ||
2101 			      result == ISC_R_LOCKBUSY);
2102 
2103 		write_locked = ISC_TF(result == ISC_R_SUCCESS);
2104 	} else
2105 		write_locked = ISC_TRUE;
2106 
2107 	isc_refcount_decrement(&nodelock->references, &refs);
2108 	INSIST((int)refs >= 0);
2109 
2110 	if (KEEP_NODE(node, rbtdb))
2111 		goto restore_locks;
2112 
2113 #undef KEEP_NODE
2114 
2115 	if (write_locked) {
2116 		/*
2117 		 * We can now delete the node.
2118 		 */
2119 
2120 		/*
2121 		 * If this node is the only one in the level it's in, deleting
2122 		 * this node may recursively make its parent the only node in
2123 		 * the parent level; if so, and if no one is currently using
2124 		 * the parent node, this is almost the only opportunity to
2125 		 * clean it up.  But the recursive cleanup is not that trivial
2126 		 * since the child and parent may be in different lock buckets,
2127 		 * which would cause a lock order reversal problem.  To avoid
2128 		 * the trouble, we'll dispatch a separate event for batch
2129 		 * cleaning.  We need to check whether we're deleting the node
2130 		 * as a result of pruning to avoid infinite dispatching.
2131 		 * Note: pruning happens only when a task has been set for the
2132 		 * rbtdb.  If the user of the rbtdb chooses not to set a task,
2133 		 * it's their responsibility to purge stale leaves (e.g. by
2134 		 * periodic walk-through).
2135 		 */
2136 		if (!pruning && node->parent != NULL &&
2137 		    node->parent->down == node && node->left == NULL &&
2138 		    node->right == NULL && rbtdb->task != NULL) {
2139 			isc_event_t *ev;
2140 			dns_db_t *db;
2141 
2142 			ev = isc_event_allocate(rbtdb->common.mctx, NULL,
2143 						DNS_EVENT_RBTPRUNE,
2144 						prune_tree, node,
2145 						sizeof(isc_event_t));
2146 			if (ev != NULL) {
2147 				new_reference(rbtdb, node);
2148 				db = NULL;
2149 				attach((dns_db_t *)rbtdb, &db);
2150 				ev->ev_sender = db;
2151 				isc_task_send(rbtdb->task, &ev);
2152 				no_reference = ISC_FALSE;
2153 			} else {
2154 				/*
2155 				 * XXX: this is a weird situation.  We could
2156 				 * ignore this error case, but then the stale
2157 				 * node will unlikely be purged except via a
2158 				 * rare condition such as manual cleanup.  So
2159 				 * we queue it in the deadnodes list, hoping
2160 				 * the memory shortage is temporary and the node
2161 				 * will be deleted later.
2162 				 */
2163 				isc_log_write(dns_lctx,
2164 					      DNS_LOGCATEGORY_DATABASE,
2165 					      DNS_LOGMODULE_CACHE,
2166 					      ISC_LOG_INFO,
2167 					      "decrement_reference: failed to "
2168 					      "allocate pruning event");
2169 				INSIST(node->data == NULL);
2170 				INSIST(!ISC_LINK_LINKED(node, deadlink));
2171 				ISC_LIST_APPEND(rbtdb->deadnodes[bucket], node,
2172 						deadlink);
2173 			}
2174 		} else {
2175 			if (isc_log_wouldlog(dns_lctx, ISC_LOG_DEBUG(1))) {
2176 				char printname[DNS_NAME_FORMATSIZE];
2177 
2178 				isc_log_write(dns_lctx,
2179 					      DNS_LOGCATEGORY_DATABASE,
2180 					      DNS_LOGMODULE_CACHE,
2181 					      ISC_LOG_DEBUG(1),
2182 					      "decrement_reference: "
2183 					      "delete from rbt: %p %s",
2184 					      node,
2185 					      dns_rbt_formatnodename(node,
2186 							printname,
2187 							sizeof(printname)));
2188 			}
2189 
2190 			delete_node(rbtdb, node);
2191 		}
2192 	} else {
2193 		INSIST(node->data == NULL);
2194 		INSIST(!ISC_LINK_LINKED(node, deadlink));
2195 		ISC_LIST_APPEND(rbtdb->deadnodes[bucket], node, deadlink);
2196 	}
2197 
2198  restore_locks:
2199 	/* Restore the lock? */
2200 	if (nlock == isc_rwlocktype_read)
2201 		NODE_WEAKDOWNGRADE(&nodelock->lock);
2202 
2203 	/*
2204 	 * Relock a read lock, or unlock the write lock if no lock was held.
2205 	 */
2206 	if (tlock == isc_rwlocktype_none)
2207 		if (write_locked)
2208 			RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2209 
2210 	if (tlock == isc_rwlocktype_read)
2211 		if (write_locked)
2212 			isc_rwlock_downgrade(&rbtdb->tree_lock);
2213 
2214 	return (no_reference);
2215 }
2216 
2217 /*
2218  * Prune the tree by recursively cleaning-up single leaves.  In the worst
2219  * case, the number of iteration is the number of tree levels, which is at
2220  * most the maximum number of domain name labels, i.e, 127.  In practice, this
2221  * should be much smaller (only a few times), and even the worst case would be
2222  * acceptable for a single event.
2223  */
2224 static void
prune_tree(isc_task_t * task,isc_event_t * event)2225 prune_tree(isc_task_t *task, isc_event_t *event) {
2226 	dns_rbtdb_t *rbtdb = event->ev_sender;
2227 	dns_rbtnode_t *node = event->ev_arg;
2228 	dns_rbtnode_t *parent;
2229 	unsigned int locknum;
2230 
2231 	UNUSED(task);
2232 
2233 	isc_event_free(&event);
2234 
2235 	RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2236 	locknum = node->locknum;
2237 	NODE_LOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
2238 	do {
2239 		parent = node->parent;
2240 		decrement_reference(rbtdb, node, 0, isc_rwlocktype_write,
2241 				    isc_rwlocktype_write, ISC_TRUE);
2242 
2243 		if (parent != NULL && parent->down == NULL) {
2244 			/*
2245 			 * node was the only down child of the parent and has
2246 			 * just been removed.  We'll then need to examine the
2247 			 * parent.  Keep the lock if possible; otherwise,
2248 			 * release the old lock and acquire one for the parent.
2249 			 */
2250 			if (parent->locknum != locknum) {
2251 				NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
2252 					    isc_rwlocktype_write);
2253 				locknum = parent->locknum;
2254 				NODE_LOCK(&rbtdb->node_locks[locknum].lock,
2255 					  isc_rwlocktype_write);
2256 			}
2257 
2258 			/*
2259 			 * We need to gain a reference to the node before
2260 			 * decrementing it in the next iteration.  In addition,
2261 			 * if the node is in the dead-nodes list, extract it
2262 			 * from the list beforehand as we do in
2263 			 * reactivate_node().
2264 			 */
2265 			if (ISC_LINK_LINKED(parent, deadlink))
2266 				ISC_LIST_UNLINK(rbtdb->deadnodes[locknum],
2267 						parent, deadlink);
2268 			new_reference(rbtdb, parent);
2269 		} else
2270 			parent = NULL;
2271 
2272 		node = parent;
2273 	} while (node != NULL);
2274 	NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
2275 	RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2276 
2277 	detach((dns_db_t **)(void *)&rbtdb);
2278 }
2279 
2280 static inline void
make_least_version(dns_rbtdb_t * rbtdb,rbtdb_version_t * version,rbtdb_changedlist_t * cleanup_list)2281 make_least_version(dns_rbtdb_t *rbtdb, rbtdb_version_t *version,
2282 		   rbtdb_changedlist_t *cleanup_list)
2283 {
2284 	/*
2285 	 * Caller must be holding the database lock.
2286 	 */
2287 
2288 	rbtdb->least_serial = version->serial;
2289 	*cleanup_list = version->changed_list;
2290 	ISC_LIST_INIT(version->changed_list);
2291 }
2292 
2293 static inline void
cleanup_nondirty(rbtdb_version_t * version,rbtdb_changedlist_t * cleanup_list)2294 cleanup_nondirty(rbtdb_version_t *version, rbtdb_changedlist_t *cleanup_list) {
2295 	rbtdb_changed_t *changed, *next_changed;
2296 
2297 	/*
2298 	 * If the changed record is dirty, then
2299 	 * an update created multiple versions of
2300 	 * a given rdataset.  We keep this list
2301 	 * until we're the least open version, at
2302 	 * which point it's safe to get rid of any
2303 	 * older versions.
2304 	 *
2305 	 * If the changed record isn't dirty, then
2306 	 * we don't need it anymore since we're
2307 	 * committing and not rolling back.
2308 	 *
2309 	 * The caller must be holding the database lock.
2310 	 */
2311 	for (changed = HEAD(version->changed_list);
2312 	     changed != NULL;
2313 	     changed = next_changed) {
2314 		next_changed = NEXT(changed, link);
2315 		if (!changed->dirty) {
2316 			UNLINK(version->changed_list,
2317 			       changed, link);
2318 			APPEND(*cleanup_list,
2319 			       changed, link);
2320 		}
2321 	}
2322 }
2323 
2324 static void
iszonesecure(dns_db_t * db,rbtdb_version_t * version,dns_dbnode_t * origin)2325 iszonesecure(dns_db_t *db, rbtdb_version_t *version, dns_dbnode_t *origin) {
2326 	dns_rdataset_t keyset;
2327 	dns_rdataset_t nsecset, signsecset;
2328 	isc_boolean_t haszonekey = ISC_FALSE;
2329 	isc_boolean_t hasnsec = ISC_FALSE;
2330 	isc_result_t result;
2331 
2332 	dns_rdataset_init(&keyset);
2333 	result = dns_db_findrdataset(db, origin, version, dns_rdatatype_dnskey,
2334 				     0, 0, &keyset, NULL);
2335 	if (result == ISC_R_SUCCESS) {
2336 		result = dns_rdataset_first(&keyset);
2337 		while (result == ISC_R_SUCCESS) {
2338 			dns_rdata_t keyrdata = DNS_RDATA_INIT;
2339 			dns_rdataset_current(&keyset, &keyrdata);
2340 			if (dns_zonekey_iszonekey(&keyrdata)) {
2341 				haszonekey = ISC_TRUE;
2342 				break;
2343 			}
2344 			result = dns_rdataset_next(&keyset);
2345 		}
2346 		dns_rdataset_disassociate(&keyset);
2347 	}
2348 	if (!haszonekey) {
2349 		version->secure = dns_db_insecure;
2350 		version->havensec3 = ISC_FALSE;
2351 		return;
2352 	}
2353 
2354 	dns_rdataset_init(&nsecset);
2355 	dns_rdataset_init(&signsecset);
2356 	result = dns_db_findrdataset(db, origin, version, dns_rdatatype_nsec,
2357 				     0, 0, &nsecset, &signsecset);
2358 	if (result == ISC_R_SUCCESS) {
2359 		if (dns_rdataset_isassociated(&signsecset)) {
2360 			hasnsec = ISC_TRUE;
2361 			dns_rdataset_disassociate(&signsecset);
2362 		}
2363 		dns_rdataset_disassociate(&nsecset);
2364 	}
2365 
2366 	setnsec3parameters(db, version);
2367 
2368 	/*
2369 	 * Do we have a valid NSEC/NSEC3 chain?
2370 	 */
2371 	if (version->havensec3 || hasnsec)
2372 		version->secure = dns_db_secure;
2373 	else
2374 		version->secure = dns_db_insecure;
2375 }
2376 
2377 /*%<
2378  * Walk the origin node looking for NSEC3PARAM records.
2379  * Cache the nsec3 parameters.
2380  */
2381 static void
setnsec3parameters(dns_db_t * db,rbtdb_version_t * version)2382 setnsec3parameters(dns_db_t *db, rbtdb_version_t *version) {
2383 	dns_rbtnode_t *node;
2384 	dns_rdata_nsec3param_t nsec3param;
2385 	dns_rdata_t rdata = DNS_RDATA_INIT;
2386 	isc_region_t region;
2387 	isc_result_t result;
2388 	rdatasetheader_t *header, *header_next;
2389 	unsigned char *raw;             /* RDATASLAB */
2390 	unsigned int count, length;
2391 	dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2392 
2393 	version->havensec3 = ISC_FALSE;
2394 	node = rbtdb->origin_node;
2395 	NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2396 		  isc_rwlocktype_read);
2397 	for (header = node->data;
2398 	     header != NULL;
2399 	     header = header_next) {
2400 		header_next = header->next;
2401 		do {
2402 			if (header->serial <= version->serial &&
2403 			    !IGNORE(header)) {
2404 				if (NONEXISTENT(header))
2405 					header = NULL;
2406 				break;
2407 			} else
2408 				header = header->down;
2409 		} while (header != NULL);
2410 
2411 		if (header != NULL &&
2412 		    (header->type == dns_rdatatype_nsec3param)) {
2413 			/*
2414 			 * Find A NSEC3PARAM with a supported algorithm.
2415 			 */
2416 			raw = (unsigned char *)header + sizeof(*header);
2417 			count = raw[0] * 256 + raw[1]; /* count */
2418 #if DNS_RDATASET_FIXED
2419 			raw += count * 4 + 2;
2420 #else
2421 			raw += 2;
2422 #endif
2423 			while (count-- > 0U) {
2424 				length = raw[0] * 256 + raw[1];
2425 #if DNS_RDATASET_FIXED
2426 				raw += 4;
2427 #else
2428 				raw += 2;
2429 #endif
2430 				region.base = raw;
2431 				region.length = length;
2432 				raw += length;
2433 				dns_rdata_fromregion(&rdata,
2434 						     rbtdb->common.rdclass,
2435 						     dns_rdatatype_nsec3param,
2436 						     &region);
2437 				result = dns_rdata_tostruct(&rdata,
2438 							    &nsec3param,
2439 							    NULL);
2440 				INSIST(result == ISC_R_SUCCESS);
2441 				dns_rdata_reset(&rdata);
2442 
2443 				if (nsec3param.hash != DNS_NSEC3_UNKNOWNALG &&
2444 				    !dns_nsec3_supportedhash(nsec3param.hash))
2445 					continue;
2446 
2447 				if (nsec3param.flags != 0)
2448 					continue;
2449 
2450 				memmove(version->salt, nsec3param.salt,
2451 					nsec3param.salt_length);
2452 				version->hash = nsec3param.hash;
2453 				version->salt_length = nsec3param.salt_length;
2454 				version->iterations = nsec3param.iterations;
2455 				version->flags = nsec3param.flags;
2456 				version->havensec3 = ISC_TRUE;
2457 				/*
2458 				 * Look for a better algorithm than the
2459 				 * unknown test algorithm.
2460 				 */
2461 				if (nsec3param.hash != DNS_NSEC3_UNKNOWNALG)
2462 					goto unlock;
2463 			}
2464 		}
2465 	}
2466  unlock:
2467 	NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2468 		    isc_rwlocktype_read);
2469 }
2470 
2471 static void
cleanup_dead_nodes_callback(isc_task_t * task,isc_event_t * event)2472 cleanup_dead_nodes_callback(isc_task_t *task, isc_event_t *event) {
2473 	dns_rbtdb_t *rbtdb = event->ev_arg;
2474 	isc_boolean_t again = ISC_FALSE;
2475 	unsigned int locknum;
2476 	unsigned int refs;
2477 
2478 	RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2479 	for (locknum = 0; locknum < rbtdb->node_lock_count; locknum++) {
2480 		NODE_LOCK(&rbtdb->node_locks[locknum].lock,
2481 			  isc_rwlocktype_write);
2482 		cleanup_dead_nodes(rbtdb, locknum);
2483 		if (ISC_LIST_HEAD(rbtdb->deadnodes[locknum]) != NULL)
2484 			again = ISC_TRUE;
2485 		NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
2486 			    isc_rwlocktype_write);
2487 	}
2488 	RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2489 	if (again)
2490 		isc_task_send(task, &event);
2491 	else {
2492 		isc_event_free(&event);
2493 		isc_refcount_decrement(&rbtdb->references, &refs);
2494 		if (refs == 0)
2495 			maybe_free_rbtdb(rbtdb);
2496 	}
2497 }
2498 
2499 static void
closeversion(dns_db_t * db,dns_dbversion_t ** versionp,isc_boolean_t commit)2500 closeversion(dns_db_t *db, dns_dbversion_t **versionp, isc_boolean_t commit) {
2501 	dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2502 	rbtdb_version_t *version, *cleanup_version, *least_greater;
2503 	isc_boolean_t rollback = ISC_FALSE;
2504 	rbtdb_changedlist_t cleanup_list;
2505 	rdatasetheaderlist_t resigned_list;
2506 	rbtdb_changed_t *changed, *next_changed;
2507 	rbtdb_serial_t serial, least_serial;
2508 	dns_rbtnode_t *rbtnode;
2509 	unsigned int refs;
2510 	rdatasetheader_t *header;
2511 
2512 	REQUIRE(VALID_RBTDB(rbtdb));
2513 	version = (rbtdb_version_t *)*versionp;
2514 	INSIST(version->rbtdb == rbtdb);
2515 
2516 	cleanup_version = NULL;
2517 	ISC_LIST_INIT(cleanup_list);
2518 	ISC_LIST_INIT(resigned_list);
2519 
2520 	isc_refcount_decrement(&version->references, &refs);
2521 	if (refs > 0) {         /* typical and easy case first */
2522 		if (commit) {
2523 			RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
2524 			INSIST(!version->writer);
2525 			RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
2526 		}
2527 		goto end;
2528 	}
2529 
2530 	RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
2531 	serial = version->serial;
2532 	if (version->writer) {
2533 		if (commit) {
2534 			unsigned cur_ref;
2535 			rbtdb_version_t *cur_version;
2536 
2537 			INSIST(version->commit_ok);
2538 			INSIST(version == rbtdb->future_version);
2539 			/*
2540 			 * The current version is going to be replaced.
2541 			 * Release the (likely last) reference to it from the
2542 			 * DB itself and unlink it from the open list.
2543 			 */
2544 			cur_version = rbtdb->current_version;
2545 			isc_refcount_decrement(&cur_version->references,
2546 					       &cur_ref);
2547 			if (cur_ref == 0) {
2548 				if (cur_version->serial == rbtdb->least_serial)
2549 					INSIST(EMPTY(cur_version->changed_list));
2550 				UNLINK(rbtdb->open_versions,
2551 				       cur_version, link);
2552 			}
2553 			if (EMPTY(rbtdb->open_versions)) {
2554 				/*
2555 				 * We're going to become the least open
2556 				 * version.
2557 				 */
2558 				make_least_version(rbtdb, version,
2559 						   &cleanup_list);
2560 			} else {
2561 				/*
2562 				 * Some other open version is the
2563 				 * least version.  We can't cleanup
2564 				 * records that were changed in this
2565 				 * version because the older versions
2566 				 * may still be in use by an open
2567 				 * version.
2568 				 *
2569 				 * We can, however, discard the
2570 				 * changed records for things that
2571 				 * we've added that didn't exist in
2572 				 * prior versions.
2573 				 */
2574 				cleanup_nondirty(version, &cleanup_list);
2575 			}
2576 			/*
2577 			 * If the (soon to be former) current version
2578 			 * isn't being used by anyone, we can clean
2579 			 * it up.
2580 			 */
2581 			if (cur_ref == 0) {
2582 				cleanup_version = cur_version;
2583 				APPENDLIST(version->changed_list,
2584 					   cleanup_version->changed_list,
2585 					   link);
2586 			}
2587 			/*
2588 			 * Update the zone's secure status.
2589 			 */
2590 			if (!IS_CACHE(rbtdb))
2591 				iszonesecure(db, version, rbtdb->origin_node);
2592 			/*
2593 			 * Become the current version.
2594 			 */
2595 			version->writer = ISC_FALSE;
2596 			rbtdb->current_version = version;
2597 			rbtdb->current_serial = version->serial;
2598 			rbtdb->future_version = NULL;
2599 
2600 			/*
2601 			 * Keep the current version in the open list, and
2602 			 * gain a reference for the DB itself (see the DB
2603 			 * creation function below).  This must be the only
2604 			 * case where we need to increment the counter from
2605 			 * zero and need to use isc_refcount_increment0().
2606 			 */
2607 			isc_refcount_increment0(&version->references,
2608 						&cur_ref);
2609 			INSIST(cur_ref == 1);
2610 			PREPEND(rbtdb->open_versions,
2611 				rbtdb->current_version, link);
2612 			resigned_list = version->resigned_list;
2613 			ISC_LIST_INIT(version->resigned_list);
2614 		} else {
2615 			/*
2616 			 * We're rolling back this transaction.
2617 			 */
2618 			cleanup_list = version->changed_list;
2619 			ISC_LIST_INIT(version->changed_list);
2620 			resigned_list = version->resigned_list;
2621 			ISC_LIST_INIT(version->resigned_list);
2622 			rollback = ISC_TRUE;
2623 			cleanup_version = version;
2624 			rbtdb->future_version = NULL;
2625 		}
2626 	} else {
2627 		if (version != rbtdb->current_version) {
2628 			/*
2629 			 * There are no external or internal references
2630 			 * to this version and it can be cleaned up.
2631 			 */
2632 			cleanup_version = version;
2633 
2634 			/*
2635 			 * Find the version with the least serial
2636 			 * number greater than ours.
2637 			 */
2638 			least_greater = PREV(version, link);
2639 			if (least_greater == NULL)
2640 				least_greater = rbtdb->current_version;
2641 
2642 			INSIST(version->serial < least_greater->serial);
2643 			/*
2644 			 * Is this the least open version?
2645 			 */
2646 			if (version->serial == rbtdb->least_serial) {
2647 				/*
2648 				 * Yes.  Install the new least open
2649 				 * version.
2650 				 */
2651 				make_least_version(rbtdb,
2652 						   least_greater,
2653 						   &cleanup_list);
2654 			} else {
2655 				/*
2656 				 * Add any unexecuted cleanups to
2657 				 * those of the least greater version.
2658 				 */
2659 				APPENDLIST(least_greater->changed_list,
2660 					   version->changed_list,
2661 					   link);
2662 			}
2663 		} else if (version->serial == rbtdb->least_serial)
2664 			INSIST(EMPTY(version->changed_list));
2665 		UNLINK(rbtdb->open_versions, version, link);
2666 	}
2667 	least_serial = rbtdb->least_serial;
2668 	RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
2669 
2670 	if (cleanup_version != NULL) {
2671 		INSIST(EMPTY(cleanup_version->changed_list));
2672 		isc_mem_put(rbtdb->common.mctx, cleanup_version,
2673 			    sizeof(*cleanup_version));
2674 	}
2675 
2676 	/*
2677 	 * Commit/rollback re-signed headers.
2678 	 */
2679 	for (header = HEAD(resigned_list);
2680 	     header != NULL;
2681 	     header = HEAD(resigned_list)) {
2682 		nodelock_t *lock;
2683 
2684 		ISC_LIST_UNLINK(resigned_list, header, link);
2685 
2686 		lock = &rbtdb->node_locks[header->node->locknum].lock;
2687 		NODE_LOCK(lock, isc_rwlocktype_write);
2688 		if (rollback && !IGNORE(header)) {
2689 			isc_result_t result;
2690 			result = resign_insert(rbtdb, header->node->locknum,
2691 					       header);
2692 			if (result != ISC_R_SUCCESS)
2693 				isc_log_write(dns_lctx,
2694 					      DNS_LOGCATEGORY_DATABASE,
2695 					      DNS_LOGMODULE_ZONE, ISC_LOG_ERROR,
2696 					      "Unable to reinsert header to "
2697 					      "re-signing heap: %s\n",
2698 				dns_result_totext(result));
2699 		}
2700 		decrement_reference(rbtdb, header->node, least_serial,
2701 				    isc_rwlocktype_write, isc_rwlocktype_none,
2702 				    ISC_FALSE);
2703 		NODE_UNLOCK(lock, isc_rwlocktype_write);
2704 	}
2705 
2706 	if (!EMPTY(cleanup_list)) {
2707 		isc_event_t *event = NULL;
2708 		isc_rwlocktype_t tlock = isc_rwlocktype_none;
2709 
2710 		if (rbtdb->task != NULL)
2711 			event = isc_event_allocate(rbtdb->common.mctx, NULL,
2712 						   DNS_EVENT_RBTDEADNODES,
2713 						   cleanup_dead_nodes_callback,
2714 						   rbtdb, sizeof(isc_event_t));
2715 		if (event == NULL) {
2716 			/*
2717 			 * We acquire a tree write lock here in order to make
2718 			 * sure that stale nodes will be removed in
2719 			 * decrement_reference().  If we didn't have the lock,
2720 			 * those nodes could miss the chance to be removed
2721 			 * until the server stops.  The write lock is
2722 			 * expensive, but this event should be rare enough
2723 			 * to justify the cost.
2724 			 */
2725 			RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2726 			tlock = isc_rwlocktype_write;
2727 		}
2728 
2729 		for (changed = HEAD(cleanup_list);
2730 		     changed != NULL;
2731 		     changed = next_changed) {
2732 			nodelock_t *lock;
2733 
2734 			next_changed = NEXT(changed, link);
2735 			rbtnode = changed->node;
2736 			lock = &rbtdb->node_locks[rbtnode->locknum].lock;
2737 
2738 			NODE_LOCK(lock, isc_rwlocktype_write);
2739 			/*
2740 			 * This is a good opportunity to purge any dead nodes,
2741 			 * so use it.
2742 			 */
2743 			if (event == NULL)
2744 				cleanup_dead_nodes(rbtdb, rbtnode->locknum);
2745 
2746 			if (rollback)
2747 				rollback_node(rbtnode, serial);
2748 			decrement_reference(rbtdb, rbtnode, least_serial,
2749 					    isc_rwlocktype_write, tlock,
2750 					    ISC_FALSE);
2751 
2752 			NODE_UNLOCK(lock, isc_rwlocktype_write);
2753 
2754 			isc_mem_put(rbtdb->common.mctx, changed,
2755 				    sizeof(*changed));
2756 		}
2757 		if (event != NULL) {
2758 			isc_refcount_increment(&rbtdb->references, NULL);
2759 			isc_task_send(rbtdb->task, &event);
2760 		} else
2761 			RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2762 	}
2763 
2764  end:
2765 	*versionp = NULL;
2766 }
2767 
2768 /*
2769  * Add the necessary magic for the wildcard name 'name'
2770  * to be found in 'rbtdb'.
2771  *
2772  * In order for wildcard matching to work correctly in
2773  * zone_find(), we must ensure that a node for the wildcarding
2774  * level exists in the database, and has its 'find_callback'
2775  * and 'wild' bits set.
2776  *
2777  * E.g. if the wildcard name is "*.sub.example." then we
2778  * must ensure that "sub.example." exists and is marked as
2779  * a wildcard level.
2780  */
2781 static isc_result_t
add_wildcard_magic(dns_rbtdb_t * rbtdb,dns_name_t * name)2782 add_wildcard_magic(dns_rbtdb_t *rbtdb, dns_name_t *name) {
2783 	isc_result_t result;
2784 	dns_name_t foundname;
2785 	dns_offsets_t offsets;
2786 	unsigned int n;
2787 	dns_rbtnode_t *node = NULL;
2788 
2789 	dns_name_init(&foundname, offsets);
2790 	n = dns_name_countlabels(name);
2791 	INSIST(n >= 2);
2792 	n--;
2793 	dns_name_getlabelsequence(name, 1, n, &foundname);
2794 	result = dns_rbt_addnode(rbtdb->tree, &foundname, &node);
2795 	if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
2796 		return (result);
2797 	if (result == ISC_R_SUCCESS)
2798 		node->nsec = DNS_RBT_NSEC_NORMAL;
2799 	node->find_callback = 1;
2800 	node->wild = 1;
2801 	return (ISC_R_SUCCESS);
2802 }
2803 
2804 static isc_result_t
add_empty_wildcards(dns_rbtdb_t * rbtdb,dns_name_t * name)2805 add_empty_wildcards(dns_rbtdb_t *rbtdb, dns_name_t *name) {
2806 	isc_result_t result;
2807 	dns_name_t foundname;
2808 	dns_offsets_t offsets;
2809 	unsigned int n, l, i;
2810 
2811 	dns_name_init(&foundname, offsets);
2812 	n = dns_name_countlabels(name);
2813 	l = dns_name_countlabels(&rbtdb->common.origin);
2814 	i = l + 1;
2815 	while (i < n) {
2816 		dns_rbtnode_t *node = NULL;     /* dummy */
2817 		dns_name_getlabelsequence(name, n - i, i, &foundname);
2818 		if (dns_name_iswildcard(&foundname)) {
2819 			result = add_wildcard_magic(rbtdb, &foundname);
2820 			if (result != ISC_R_SUCCESS)
2821 				return (result);
2822 			result = dns_rbt_addnode(rbtdb->tree, &foundname,
2823 						 &node);
2824 			if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
2825 				return (result);
2826 			if (result == ISC_R_SUCCESS)
2827 				node->nsec = DNS_RBT_NSEC_NORMAL;
2828 		}
2829 		i++;
2830 	}
2831 	return (ISC_R_SUCCESS);
2832 }
2833 
2834 static isc_result_t
findnodeintree(dns_rbtdb_t * rbtdb,dns_rbt_t * tree,dns_name_t * name,isc_boolean_t create,dns_dbnode_t ** nodep)2835 findnodeintree(dns_rbtdb_t *rbtdb, dns_rbt_t *tree, dns_name_t *name,
2836 	       isc_boolean_t create, dns_dbnode_t **nodep)
2837 {
2838 	dns_rbtnode_t *node = NULL;
2839 	dns_name_t nodename;
2840 	isc_result_t result;
2841 	isc_rwlocktype_t locktype = isc_rwlocktype_read;
2842 
2843 	INSIST(tree == rbtdb->tree || tree == rbtdb->nsec3);
2844 
2845 	dns_name_init(&nodename, NULL);
2846 	RWLOCK(&rbtdb->tree_lock, locktype);
2847 	result = dns_rbt_findnode(tree, name, NULL, &node, NULL,
2848 				  DNS_RBTFIND_EMPTYDATA, NULL, NULL);
2849 	if (result != ISC_R_SUCCESS) {
2850 		RWUNLOCK(&rbtdb->tree_lock, locktype);
2851 		if (!create) {
2852 			if (result == DNS_R_PARTIALMATCH)
2853 				result = ISC_R_NOTFOUND;
2854 			return (result);
2855 		}
2856 		/*
2857 		 * It would be nice to try to upgrade the lock instead of
2858 		 * unlocking then relocking.
2859 		 */
2860 		locktype = isc_rwlocktype_write;
2861 		RWLOCK(&rbtdb->tree_lock, locktype);
2862 		node = NULL;
2863 		result = dns_rbt_addnode(tree, name, &node);
2864 		if (result == ISC_R_SUCCESS) {
2865 			dns_rbt_namefromnode(node, &nodename);
2866 #ifdef DNS_RBT_USEHASH
2867 			node->locknum = node->hashval % rbtdb->node_lock_count;
2868 #else
2869 			node->locknum = dns_name_hash(&nodename, ISC_TRUE) %
2870 				rbtdb->node_lock_count;
2871 #endif
2872 			if (tree == rbtdb->tree) {
2873 				add_empty_wildcards(rbtdb, name);
2874 
2875 				if (dns_name_iswildcard(name)) {
2876 					result = add_wildcard_magic(rbtdb, name);
2877 					if (result != ISC_R_SUCCESS) {
2878 						RWUNLOCK(&rbtdb->tree_lock, locktype);
2879 						return (result);
2880 					}
2881 				}
2882 			}
2883 			if (tree == rbtdb->nsec3)
2884 				node->nsec = DNS_RBT_NSEC_NSEC3;
2885 		} else if (result != ISC_R_EXISTS) {
2886 			RWUNLOCK(&rbtdb->tree_lock, locktype);
2887 			return (result);
2888 		}
2889 	}
2890 
2891 	if (tree == rbtdb->nsec3)
2892 		INSIST(node->nsec == DNS_RBT_NSEC_NSEC3);
2893 
2894 	reactivate_node(rbtdb, node, locktype);
2895 
2896 	/*
2897 	 * Always try to add the policy zone data, because this node might
2898 	 * already have been implicitly created by the previous addition of
2899 	 * a longer domain.  A common example is adding *.example.com
2900 	 * (implicitly creating example.com) followed by explicitly adding
2901 	 * example.com.
2902 	 */
2903 	if (create && rbtdb->rpzs != NULL && tree == rbtdb->tree) {
2904 		dns_fixedname_t fnamef;
2905 		dns_name_t *fname;
2906 
2907 		dns_fixedname_init(&fnamef);
2908 		fname = dns_fixedname_name(&fnamef);
2909 		dns_rbt_fullnamefromnode(node, fname);
2910 		result = dns_rpz_add(rbtdb->rpzs, rbtdb->rpz_num, fname);
2911 		if (result == ISC_R_SUCCESS)
2912 			node->rpz = 1;
2913 		if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS) {
2914 			/*
2915 			 * It is too late to give up, so merely complain.
2916 			 */
2917 			isc_log_write(dns_lctx, DNS_LOGCATEGORY_RPZ,
2918 				      DNS_LOGMODULE_RBTDB, DNS_RPZ_ERROR_LEVEL,
2919 				      "dns_rpz_add(): %s",
2920 				      isc_result_totext(result));
2921 		}
2922 	}
2923 
2924 	RWUNLOCK(&rbtdb->tree_lock, locktype);
2925 
2926 	*nodep = (dns_dbnode_t *)node;
2927 
2928 	return (ISC_R_SUCCESS);
2929 }
2930 
2931 static isc_result_t
findnode(dns_db_t * db,dns_name_t * name,isc_boolean_t create,dns_dbnode_t ** nodep)2932 findnode(dns_db_t *db, dns_name_t *name, isc_boolean_t create,
2933 	 dns_dbnode_t **nodep)
2934 {
2935 	dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2936 
2937 	REQUIRE(VALID_RBTDB(rbtdb));
2938 
2939 	return (findnodeintree(rbtdb, rbtdb->tree, name, create, nodep));
2940 }
2941 
2942 static isc_result_t
findnsec3node(dns_db_t * db,dns_name_t * name,isc_boolean_t create,dns_dbnode_t ** nodep)2943 findnsec3node(dns_db_t *db, dns_name_t *name, isc_boolean_t create,
2944 	      dns_dbnode_t **nodep)
2945 {
2946 	dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2947 
2948 	REQUIRE(VALID_RBTDB(rbtdb));
2949 
2950 	return (findnodeintree(rbtdb, rbtdb->nsec3, name, create, nodep));
2951 }
2952 
2953 static isc_result_t
zone_zonecut_callback(dns_rbtnode_t * node,dns_name_t * name,void * arg)2954 zone_zonecut_callback(dns_rbtnode_t *node, dns_name_t *name, void *arg) {
2955 	rbtdb_search_t *search = arg;
2956 	rdatasetheader_t *header, *header_next;
2957 	rdatasetheader_t *dname_header, *sigdname_header, *ns_header;
2958 	rdatasetheader_t *found;
2959 	isc_result_t result;
2960 	dns_rbtnode_t *onode;
2961 
2962 	/*
2963 	 * We only want to remember the topmost zone cut, since it's the one
2964 	 * that counts, so we'll just continue if we've already found a
2965 	 * zonecut.
2966 	 */
2967 	if (search->zonecut != NULL)
2968 		return (DNS_R_CONTINUE);
2969 
2970 	found = NULL;
2971 	result = DNS_R_CONTINUE;
2972 	onode = search->rbtdb->origin_node;
2973 
2974 	NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2975 		  isc_rwlocktype_read);
2976 
2977 	/*
2978 	 * Look for an NS or DNAME rdataset active in our version.
2979 	 */
2980 	ns_header = NULL;
2981 	dname_header = NULL;
2982 	sigdname_header = NULL;
2983 	for (header = node->data; header != NULL; header = header_next) {
2984 		header_next = header->next;
2985 		if (header->type == dns_rdatatype_ns ||
2986 		    header->type == dns_rdatatype_dname ||
2987 		    header->type == RBTDB_RDATATYPE_SIGDNAME) {
2988 			do {
2989 				if (header->serial <= search->serial &&
2990 				    !IGNORE(header)) {
2991 					/*
2992 					 * Is this a "this rdataset doesn't
2993 					 * exist" record?
2994 					 */
2995 					if (NONEXISTENT(header))
2996 						header = NULL;
2997 					break;
2998 				} else
2999 					header = header->down;
3000 			} while (header != NULL);
3001 			if (header != NULL) {
3002 				if (header->type == dns_rdatatype_dname)
3003 					dname_header = header;
3004 				else if (header->type ==
3005 					   RBTDB_RDATATYPE_SIGDNAME)
3006 					sigdname_header = header;
3007 				else if (node != onode ||
3008 					 IS_STUB(search->rbtdb)) {
3009 					/*
3010 					 * We've found an NS rdataset that
3011 					 * isn't at the origin node.  We check
3012 					 * that they're not at the origin node,
3013 					 * because otherwise we'd erroneously
3014 					 * treat the zone top as if it were
3015 					 * a delegation.
3016 					 */
3017 					ns_header = header;
3018 				}
3019 			}
3020 		}
3021 	}
3022 
3023 	/*
3024 	 * Did we find anything?
3025 	 */
3026 	if (!IS_CACHE(search->rbtdb) && !IS_STUB(search->rbtdb) &&
3027 	    ns_header != NULL) {
3028 		/*
3029 		 * Note that NS has precedence over DNAME if both exist
3030 		 * in a zone.  Otherwise DNAME take precedence over NS.
3031 		 */
3032 		found = ns_header;
3033 		search->zonecut_sigrdataset = NULL;
3034 	} else if (dname_header != NULL) {
3035 		found = dname_header;
3036 		search->zonecut_sigrdataset = sigdname_header;
3037 	} else if (ns_header != NULL) {
3038 		found = ns_header;
3039 		search->zonecut_sigrdataset = NULL;
3040 	}
3041 
3042 	if (found != NULL) {
3043 		/*
3044 		 * We increment the reference count on node to ensure that
3045 		 * search->zonecut_rdataset will still be valid later.
3046 		 */
3047 		new_reference(search->rbtdb, node);
3048 		search->zonecut = node;
3049 		search->zonecut_rdataset = found;
3050 		search->need_cleanup = ISC_TRUE;
3051 		/*
3052 		 * Since we've found a zonecut, anything beneath it is
3053 		 * glue and is not subject to wildcard matching, so we
3054 		 * may clear search->wild.
3055 		 */
3056 		search->wild = ISC_FALSE;
3057 		if ((search->options & DNS_DBFIND_GLUEOK) == 0) {
3058 			/*
3059 			 * If the caller does not want to find glue, then
3060 			 * this is the best answer and the search should
3061 			 * stop now.
3062 			 */
3063 			result = DNS_R_PARTIALMATCH;
3064 		} else {
3065 			dns_name_t *zcname;
3066 
3067 			/*
3068 			 * The search will continue beneath the zone cut.
3069 			 * This may or may not be the best match.  In case it
3070 			 * is, we need to remember the node name.
3071 			 */
3072 			zcname = dns_fixedname_name(&search->zonecut_name);
3073 			RUNTIME_CHECK(dns_name_copy(name, zcname, NULL) ==
3074 				      ISC_R_SUCCESS);
3075 			search->copy_name = ISC_TRUE;
3076 		}
3077 	} else {
3078 		/*
3079 		 * There is no zonecut at this node which is active in this
3080 		 * version.
3081 		 *
3082 		 * If this is a "wild" node and the caller hasn't disabled
3083 		 * wildcard matching, remember that we've seen a wild node
3084 		 * in case we need to go searching for wildcard matches
3085 		 * later on.
3086 		 */
3087 		if (node->wild && (search->options & DNS_DBFIND_NOWILD) == 0)
3088 			search->wild = ISC_TRUE;
3089 	}
3090 
3091 	NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
3092 		    isc_rwlocktype_read);
3093 
3094 	return (result);
3095 }
3096 
3097 static inline void
bind_rdataset(dns_rbtdb_t * rbtdb,dns_rbtnode_t * node,rdatasetheader_t * header,isc_stdtime_t now,dns_rdataset_t * rdataset)3098 bind_rdataset(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
3099 	      rdatasetheader_t *header, isc_stdtime_t now,
3100 	      dns_rdataset_t *rdataset)
3101 {
3102 	unsigned char *raw;     /* RDATASLAB */
3103 
3104 	/*
3105 	 * Caller must be holding the node reader lock.
3106 	 * XXXJT: technically, we need a writer lock, since we'll increment
3107 	 * the header count below.  However, since the actual counter value
3108 	 * doesn't matter, we prioritize performance here.  (We may want to
3109 	 * use atomic increment when available).
3110 	 */
3111 
3112 	if (rdataset == NULL)
3113 		return;
3114 
3115 	new_reference(rbtdb, node);
3116 
3117 	INSIST(rdataset->methods == NULL);      /* We must be disassociated. */
3118 
3119 	rdataset->methods = &rdataset_methods;
3120 	rdataset->rdclass = rbtdb->common.rdclass;
3121 	rdataset->type = RBTDB_RDATATYPE_BASE(header->type);
3122 	rdataset->covers = RBTDB_RDATATYPE_EXT(header->type);
3123 	rdataset->ttl = header->rdh_ttl - now;
3124 	rdataset->trust = header->trust;
3125 	if (NEGATIVE(header))
3126 		rdataset->attributes |= DNS_RDATASETATTR_NEGATIVE;
3127 	if (NXDOMAIN(header))
3128 		rdataset->attributes |= DNS_RDATASETATTR_NXDOMAIN;
3129 	if (OPTOUT(header))
3130 		rdataset->attributes |= DNS_RDATASETATTR_OPTOUT;
3131 	if (PREFETCH(header))
3132 		rdataset->attributes |= DNS_RDATASETATTR_PREFETCH;
3133 	rdataset->private1 = rbtdb;
3134 	rdataset->private2 = node;
3135 	raw = (unsigned char *)header + sizeof(*header);
3136 	rdataset->private3 = raw;
3137 	rdataset->count = header->count++;
3138 	if (rdataset->count == ISC_UINT32_MAX)
3139 		rdataset->count = 0;
3140 
3141 	/*
3142 	 * Reset iterator state.
3143 	 */
3144 	rdataset->privateuint4 = 0;
3145 	rdataset->private5 = NULL;
3146 
3147 	/*
3148 	 * Add noqname proof.
3149 	 */
3150 	rdataset->private6 = header->noqname;
3151 	if (rdataset->private6 != NULL)
3152 		rdataset->attributes |=  DNS_RDATASETATTR_NOQNAME;
3153 	rdataset->private7 = header->closest;
3154 	if (rdataset->private7 != NULL)
3155 		rdataset->attributes |=  DNS_RDATASETATTR_CLOSEST;
3156 
3157 	/*
3158 	 * Copy out re-signing information.
3159 	 */
3160 	if (RESIGN(header)) {
3161 		rdataset->attributes |=  DNS_RDATASETATTR_RESIGN;
3162 		rdataset->resign = header->resign;
3163 	} else
3164 		rdataset->resign = 0;
3165 }
3166 
3167 static inline isc_result_t
setup_delegation(rbtdb_search_t * search,dns_dbnode_t ** nodep,dns_name_t * foundname,dns_rdataset_t * rdataset,dns_rdataset_t * sigrdataset)3168 setup_delegation(rbtdb_search_t *search, dns_dbnode_t **nodep,
3169 		 dns_name_t *foundname, dns_rdataset_t *rdataset,
3170 		 dns_rdataset_t *sigrdataset)
3171 {
3172 	isc_result_t result;
3173 	dns_name_t *zcname;
3174 	rbtdb_rdatatype_t type;
3175 	dns_rbtnode_t *node;
3176 
3177 	/*
3178 	 * The caller MUST NOT be holding any node locks.
3179 	 */
3180 
3181 	node = search->zonecut;
3182 	type = search->zonecut_rdataset->type;
3183 
3184 	/*
3185 	 * If we have to set foundname, we do it before anything else.
3186 	 * If we were to set foundname after we had set nodep or bound the
3187 	 * rdataset, then we'd have to undo that work if dns_name_copy()
3188 	 * failed.  By setting foundname first, there's nothing to undo if
3189 	 * we have trouble.
3190 	 */
3191 	if (foundname != NULL && search->copy_name) {
3192 		zcname = dns_fixedname_name(&search->zonecut_name);
3193 		result = dns_name_copy(zcname, foundname, NULL);
3194 		if (result != ISC_R_SUCCESS)
3195 			return (result);
3196 	}
3197 	if (nodep != NULL) {
3198 		/*
3199 		 * Note that we don't have to increment the node's reference
3200 		 * count here because we're going to use the reference we
3201 		 * already have in the search block.
3202 		 */
3203 		*nodep = node;
3204 		search->need_cleanup = ISC_FALSE;
3205 	}
3206 	if (rdataset != NULL) {
3207 		NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
3208 			  isc_rwlocktype_read);
3209 		bind_rdataset(search->rbtdb, node, search->zonecut_rdataset,
3210 			      search->now, rdataset);
3211 		if (sigrdataset != NULL && search->zonecut_sigrdataset != NULL)
3212 			bind_rdataset(search->rbtdb, node,
3213 				      search->zonecut_sigrdataset,
3214 				      search->now, sigrdataset);
3215 		NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
3216 			    isc_rwlocktype_read);
3217 	}
3218 
3219 	if (type == dns_rdatatype_dname)
3220 		return (DNS_R_DNAME);
3221 	return (DNS_R_DELEGATION);
3222 }
3223 
3224 static inline isc_boolean_t
valid_glue(rbtdb_search_t * search,dns_name_t * name,rbtdb_rdatatype_t type,dns_rbtnode_t * node)3225 valid_glue(rbtdb_search_t *search, dns_name_t *name, rbtdb_rdatatype_t type,
3226 	   dns_rbtnode_t *node)
3227 {
3228 	unsigned char *raw;     /* RDATASLAB */
3229 	unsigned int count, size;
3230 	dns_name_t ns_name;
3231 	isc_boolean_t valid = ISC_FALSE;
3232 	dns_offsets_t offsets;
3233 	isc_region_t region;
3234 	rdatasetheader_t *header;
3235 
3236 	/*
3237 	 * No additional locking is required.
3238 	 */
3239 
3240 	/*
3241 	 * Valid glue types are A, AAAA, A6.  NS is also a valid glue type
3242 	 * if it occurs at a zone cut, but is not valid below it.
3243 	 */
3244 	if (type == dns_rdatatype_ns) {
3245 		if (node != search->zonecut) {
3246 			return (ISC_FALSE);
3247 		}
3248 	} else if (type != dns_rdatatype_a &&
3249 		   type != dns_rdatatype_aaaa &&
3250 		   type != dns_rdatatype_a6) {
3251 		return (ISC_FALSE);
3252 	}
3253 
3254 	header = search->zonecut_rdataset;
3255 	raw = (unsigned char *)header + sizeof(*header);
3256 	count = raw[0] * 256 + raw[1];
3257 #if DNS_RDATASET_FIXED
3258 	raw += 2 + (4 * count);
3259 #else
3260 	raw += 2;
3261 #endif
3262 
3263 	while (count > 0) {
3264 		count--;
3265 		size = raw[0] * 256 + raw[1];
3266 #if DNS_RDATASET_FIXED
3267 		raw += 4;
3268 #else
3269 		raw += 2;
3270 #endif
3271 		region.base = raw;
3272 		region.length = size;
3273 		raw += size;
3274 		/*
3275 		 * XXX Until we have rdata structures, we have no choice but
3276 		 * to directly access the rdata format.
3277 		 */
3278 		dns_name_init(&ns_name, offsets);
3279 		dns_name_fromregion(&ns_name, &region);
3280 		if (dns_name_compare(&ns_name, name) == 0) {
3281 			valid = ISC_TRUE;
3282 			break;
3283 		}
3284 	}
3285 
3286 	return (valid);
3287 }
3288 
3289 static inline isc_boolean_t
activeempty(rbtdb_search_t * search,dns_rbtnodechain_t * chain,dns_name_t * name)3290 activeempty(rbtdb_search_t *search, dns_rbtnodechain_t *chain,
3291 	    dns_name_t *name)
3292 {
3293 	dns_fixedname_t fnext;
3294 	dns_fixedname_t forigin;
3295 	dns_name_t *next;
3296 	dns_name_t *origin;
3297 	dns_name_t prefix;
3298 	dns_rbtdb_t *rbtdb;
3299 	dns_rbtnode_t *node;
3300 	isc_result_t result;
3301 	isc_boolean_t answer = ISC_FALSE;
3302 	rdatasetheader_t *header;
3303 
3304 	rbtdb = search->rbtdb;
3305 
3306 	dns_name_init(&prefix, NULL);
3307 	dns_fixedname_init(&fnext);
3308 	next = dns_fixedname_name(&fnext);
3309 	dns_fixedname_init(&forigin);
3310 	origin = dns_fixedname_name(&forigin);
3311 
3312 	result = dns_rbtnodechain_next(chain, NULL, NULL);
3313 	while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
3314 		node = NULL;
3315 		result = dns_rbtnodechain_current(chain, &prefix,
3316 						  origin, &node);
3317 		if (result != ISC_R_SUCCESS)
3318 			break;
3319 		NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
3320 			  isc_rwlocktype_read);
3321 		for (header = node->data;
3322 		     header != NULL;
3323 		     header = header->next) {
3324 			if (header->serial <= search->serial &&
3325 			    !IGNORE(header) && EXISTS(header))
3326 				break;
3327 		}
3328 		NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
3329 			    isc_rwlocktype_read);
3330 		if (header != NULL)
3331 			break;
3332 		result = dns_rbtnodechain_next(chain, NULL, NULL);
3333 	}
3334 	if (result == ISC_R_SUCCESS)
3335 		result = dns_name_concatenate(&prefix, origin, next, NULL);
3336 	if (result == ISC_R_SUCCESS && dns_name_issubdomain(next, name))
3337 		answer = ISC_TRUE;
3338 	return (answer);
3339 }
3340 
3341 static inline isc_boolean_t
activeemtpynode(rbtdb_search_t * search,dns_name_t * qname,dns_name_t * wname)3342 activeemtpynode(rbtdb_search_t *search, dns_name_t *qname, dns_name_t *wname) {
3343 	dns_fixedname_t fnext;
3344 	dns_fixedname_t forigin;
3345 	dns_fixedname_t fprev;
3346 	dns_name_t *next;
3347 	dns_name_t *origin;
3348 	dns_name_t *prev;
3349 	dns_name_t name;
3350 	dns_name_t rname;
3351 	dns_name_t tname;
3352 	dns_rbtdb_t *rbtdb;
3353 	dns_rbtnode_t *node;
3354 	dns_rbtnodechain_t chain;
3355 	isc_boolean_t check_next = ISC_TRUE;
3356 	isc_boolean_t check_prev = ISC_TRUE;
3357 	isc_boolean_t answer = ISC_FALSE;
3358 	isc_result_t result;
3359 	rdatasetheader_t *header;
3360 	unsigned int n;
3361 
3362 	rbtdb = search->rbtdb;
3363 
3364 	dns_name_init(&name, NULL);
3365 	dns_name_init(&tname, NULL);
3366 	dns_name_init(&rname, NULL);
3367 	dns_fixedname_init(&fnext);
3368 	next = dns_fixedname_name(&fnext);
3369 	dns_fixedname_init(&fprev);
3370 	prev = dns_fixedname_name(&fprev);
3371 	dns_fixedname_init(&forigin);
3372 	origin = dns_fixedname_name(&forigin);
3373 
3374 	/*
3375 	 * Find if qname is at or below a empty node.
3376 	 * Use our own copy of the chain.
3377 	 */
3378 
3379 	chain = search->chain;
3380 	do {
3381 		node = NULL;
3382 		result = dns_rbtnodechain_current(&chain, &name,
3383 						  origin, &node);
3384 		if (result != ISC_R_SUCCESS)
3385 			break;
3386 		NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
3387 			  isc_rwlocktype_read);
3388 		for (header = node->data;
3389 		     header != NULL;
3390 		     header = header->next) {
3391 			if (header->serial <= search->serial &&
3392 			    !IGNORE(header) && EXISTS(header))
3393 				break;
3394 		}
3395 		NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
3396 			    isc_rwlocktype_read);
3397 		if (header != NULL)
3398 			break;
3399 		result = dns_rbtnodechain_prev(&chain, NULL, NULL);
3400 	} while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN);
3401 	if (result == ISC_R_SUCCESS)
3402 		result = dns_name_concatenate(&name, origin, prev, NULL);
3403 	if (result != ISC_R_SUCCESS)
3404 		check_prev = ISC_FALSE;
3405 
3406 	result = dns_rbtnodechain_next(&chain, NULL, NULL);
3407 	while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
3408 		node = NULL;
3409 		result = dns_rbtnodechain_current(&chain, &name,
3410 						  origin, &node);
3411 		if (result != ISC_R_SUCCESS)
3412 			break;
3413 		NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
3414 			  isc_rwlocktype_read);
3415 		for (header = node->data;
3416 		     header != NULL;
3417 		     header = header->next) {
3418 			if (header->serial <= search->serial &&
3419 			    !IGNORE(header) && EXISTS(header))
3420 				break;
3421 		}
3422 		NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
3423 			    isc_rwlocktype_read);
3424 		if (header != NULL)
3425 			break;
3426 		result = dns_rbtnodechain_next(&chain, NULL, NULL);
3427 	}
3428 	if (result == ISC_R_SUCCESS)
3429 		result = dns_name_concatenate(&name, origin, next, NULL);
3430 	if (result != ISC_R_SUCCESS)
3431 		check_next = ISC_FALSE;
3432 
3433 	dns_name_clone(qname, &rname);
3434 
3435 	/*
3436 	 * Remove the wildcard label to find the terminal name.
3437 	 */
3438 	n = dns_name_countlabels(wname);
3439 	dns_name_getlabelsequence(wname, 1, n - 1, &tname);
3440 
3441 	do {
3442 		if ((check_prev && dns_name_issubdomain(prev, &rname)) ||
3443 		    (check_next && dns_name_issubdomain(next, &rname))) {
3444 			answer = ISC_TRUE;
3445 			break;
3446 		}
3447 		/*
3448 		 * Remove the left hand label.
3449 		 */
3450 		n = dns_name_countlabels(&rname);
3451 		dns_name_getlabelsequence(&rname, 1, n - 1, &rname);
3452 	} while (!dns_name_equal(&rname, &tname));
3453 	return (answer);
3454 }
3455 
3456 static inline isc_result_t
find_wildcard(rbtdb_search_t * search,dns_rbtnode_t ** nodep,dns_name_t * qname)3457 find_wildcard(rbtdb_search_t *search, dns_rbtnode_t **nodep,
3458 	      dns_name_t *qname)
3459 {
3460 	unsigned int i, j;
3461 	dns_rbtnode_t *node, *level_node, *wnode;
3462 	rdatasetheader_t *header;
3463 	isc_result_t result = ISC_R_NOTFOUND;
3464 	dns_name_t name;
3465 	dns_name_t *wname;
3466 	dns_fixedname_t fwname;
3467 	dns_rbtdb_t *rbtdb;
3468 	isc_boolean_t done, wild, active;
3469 	dns_rbtnodechain_t wchain;
3470 
3471 	/*
3472 	 * Caller must be holding the tree lock and MUST NOT be holding
3473 	 * any node locks.
3474 	 */
3475 
3476 	/*
3477 	 * Examine each ancestor level.  If the level's wild bit
3478 	 * is set, then construct the corresponding wildcard name and
3479 	 * search for it.  If the wildcard node exists, and is active in
3480 	 * this version, we're done.  If not, then we next check to see
3481 	 * if the ancestor is active in this version.  If so, then there
3482 	 * can be no possible wildcard match and again we're done.  If not,
3483 	 * continue the search.
3484 	 */
3485 
3486 	rbtdb = search->rbtdb;
3487 	i = search->chain.level_matches;
3488 	done = ISC_FALSE;
3489 	node = *nodep;
3490 	do {
3491 		NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
3492 			  isc_rwlocktype_read);
3493 
3494 		/*
3495 		 * First we try to figure out if this node is active in
3496 		 * the search's version.  We do this now, even though we
3497 		 * may not need the information, because it simplifies the
3498 		 * locking and code flow.
3499 		 */
3500 		for (header = node->data;
3501 		     header != NULL;
3502 		     header = header->next) {
3503 			if (header->serial <= search->serial &&
3504 			    !IGNORE(header) && EXISTS(header))
3505 				break;
3506 		}
3507 		if (header != NULL)
3508 			active = ISC_TRUE;
3509 		else
3510 			active = ISC_FALSE;
3511 
3512 		if (node->wild)
3513 			wild = ISC_TRUE;
3514 		else
3515 			wild = ISC_FALSE;
3516 
3517 		NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
3518 			    isc_rwlocktype_read);
3519 
3520 		if (wild) {
3521 			/*
3522 			 * Construct the wildcard name for this level.
3523 			 */
3524 			dns_name_init(&name, NULL);
3525 			dns_rbt_namefromnode(node, &name);
3526 			dns_fixedname_init(&fwname);
3527 			wname = dns_fixedname_name(&fwname);
3528 			result = dns_name_concatenate(dns_wildcardname, &name,
3529 						      wname, NULL);
3530 			j = i;
3531 			while (result == ISC_R_SUCCESS && j != 0) {
3532 				j--;
3533 				level_node = search->chain.levels[j];
3534 				dns_name_init(&name, NULL);
3535 				dns_rbt_namefromnode(level_node, &name);
3536 				result = dns_name_concatenate(wname,
3537 							      &name,
3538 							      wname,
3539 							      NULL);
3540 			}
3541 			if (result != ISC_R_SUCCESS)
3542 				break;
3543 
3544 			wnode = NULL;
3545 			dns_rbtnodechain_init(&wchain, NULL);
3546 			result = dns_rbt_findnode(rbtdb->tree, wname,
3547 						  NULL, &wnode, &wchain,
3548 						  DNS_RBTFIND_EMPTYDATA,
3549 						  NULL, NULL);
3550 			if (result == ISC_R_SUCCESS) {
3551 				nodelock_t *lock;
3552 
3553 				/*
3554 				 * We have found the wildcard node.  If it
3555 				 * is active in the search's version, we're
3556 				 * done.
3557 				 */
3558 				lock = &rbtdb->node_locks[wnode->locknum].lock;
3559 				NODE_LOCK(lock, isc_rwlocktype_read);
3560 				for (header = wnode->data;
3561 				     header != NULL;
3562 				     header = header->next) {
3563 					if (header->serial <= search->serial &&
3564 					    !IGNORE(header) && EXISTS(header))
3565 						break;
3566 				}
3567 				NODE_UNLOCK(lock, isc_rwlocktype_read);
3568 				if (header != NULL ||
3569 				    activeempty(search, &wchain, wname)) {
3570 					if (activeemtpynode(search, qname,
3571 							    wname)) {
3572 						return (ISC_R_NOTFOUND);
3573 					}
3574 					/*
3575 					 * The wildcard node is active!
3576 					 *
3577 					 * Note: result is still ISC_R_SUCCESS
3578 					 * so we don't have to set it.
3579 					 */
3580 					*nodep = wnode;
3581 					break;
3582 				}
3583 			} else if (result != ISC_R_NOTFOUND &&
3584 				   result != DNS_R_PARTIALMATCH) {
3585 				/*
3586 				 * An error has occurred.  Bail out.
3587 				 */
3588 				break;
3589 			}
3590 		}
3591 
3592 		if (active) {
3593 			/*
3594 			 * The level node is active.  Any wildcarding
3595 			 * present at higher levels has no
3596 			 * effect and we're done.
3597 			 */
3598 			result = ISC_R_NOTFOUND;
3599 			break;
3600 		}
3601 
3602 		if (i > 0) {
3603 			i--;
3604 			node = search->chain.levels[i];
3605 		} else
3606 			done = ISC_TRUE;
3607 	} while (!done);
3608 
3609 	return (result);
3610 }
3611 
3612 static isc_boolean_t
matchparams(rdatasetheader_t * header,rbtdb_search_t * search)3613 matchparams(rdatasetheader_t *header, rbtdb_search_t *search)
3614 {
3615 	dns_rdata_t rdata = DNS_RDATA_INIT;
3616 	dns_rdata_nsec3_t nsec3;
3617 	unsigned char *raw;                     /* RDATASLAB */
3618 	unsigned int rdlen, count;
3619 	isc_region_t region;
3620 	isc_result_t result;
3621 
3622 	REQUIRE(header->type == dns_rdatatype_nsec3);
3623 
3624 	raw = (unsigned char *)header + sizeof(*header);
3625 	count = raw[0] * 256 + raw[1]; /* count */
3626 #if DNS_RDATASET_FIXED
3627 	raw += count * 4 + 2;
3628 #else
3629 	raw += 2;
3630 #endif
3631 	while (count-- > 0) {
3632 		rdlen = raw[0] * 256 + raw[1];
3633 #if DNS_RDATASET_FIXED
3634 		raw += 4;
3635 #else
3636 		raw += 2;
3637 #endif
3638 		region.base = raw;
3639 		region.length = rdlen;
3640 		dns_rdata_fromregion(&rdata, search->rbtdb->common.rdclass,
3641 				     dns_rdatatype_nsec3, &region);
3642 		raw += rdlen;
3643 		result = dns_rdata_tostruct(&rdata, &nsec3, NULL);
3644 		INSIST(result == ISC_R_SUCCESS);
3645 		if (nsec3.hash == search->rbtversion->hash &&
3646 		    nsec3.iterations == search->rbtversion->iterations &&
3647 		    nsec3.salt_length == search->rbtversion->salt_length &&
3648 		    memcmp(nsec3.salt, search->rbtversion->salt,
3649 			   nsec3.salt_length) == 0)
3650 			return (ISC_TRUE);
3651 		dns_rdata_reset(&rdata);
3652 	}
3653 	return (ISC_FALSE);
3654 }
3655 
3656 /*
3657  * Find node of the NSEC/NSEC3 record that is 'name'.
3658  */
3659 static inline isc_result_t
previous_closest_nsec(dns_rdatatype_t type,rbtdb_search_t * search,dns_name_t * name,dns_name_t * origin,dns_rbtnode_t ** nodep,dns_rbtnodechain_t * nsecchain,isc_boolean_t * firstp)3660 previous_closest_nsec(dns_rdatatype_t type, rbtdb_search_t *search,
3661 		    dns_name_t *name, dns_name_t *origin,
3662 		    dns_rbtnode_t **nodep, dns_rbtnodechain_t *nsecchain,
3663 		    isc_boolean_t *firstp)
3664 {
3665 	dns_fixedname_t ftarget;
3666 	dns_name_t *target;
3667 	dns_rbtnode_t *nsecnode;
3668 	isc_result_t result;
3669 
3670 	REQUIRE(nodep != NULL && *nodep == NULL);
3671 
3672 	if (type == dns_rdatatype_nsec3) {
3673 		result = dns_rbtnodechain_prev(&search->chain, NULL, NULL);
3674 		if (result != ISC_R_SUCCESS && result != DNS_R_NEWORIGIN)
3675 			return (result);
3676 		result = dns_rbtnodechain_current(&search->chain, name, origin,
3677 						  nodep);
3678 		return (result);
3679 	}
3680 
3681 	dns_fixedname_init(&ftarget);
3682 	target = dns_fixedname_name(&ftarget);
3683 
3684 	for (;;) {
3685 		if (*firstp) {
3686 			/*
3687 			 * Construct the name of the second node to check.
3688 			 * It is the first node sought in the NSEC tree.
3689 			 */
3690 			*firstp = ISC_FALSE;
3691 			dns_rbtnodechain_init(nsecchain, NULL);
3692 			result = dns_name_concatenate(name, origin,
3693 						      target, NULL);
3694 			if (result != ISC_R_SUCCESS)
3695 				return (result);
3696 			nsecnode = NULL;
3697 			result = dns_rbt_findnode(search->rbtdb->nsec,
3698 						  target, NULL,
3699 						  &nsecnode, nsecchain,
3700 						  DNS_RBTFIND_NOOPTIONS,
3701 						  NULL, NULL);
3702 			if (result == ISC_R_SUCCESS) {
3703 				/*
3704 				 * Since this was the first loop, finding the
3705 				 * name in the NSEC tree implies that the first
3706 				 * node checked in the main tree had an
3707 				 * unacceptable NSEC record.
3708 				 * Try the previous node in the NSEC tree.
3709 				 */
3710 				result = dns_rbtnodechain_prev(nsecchain,
3711 							       name, origin);
3712 				if (result == DNS_R_NEWORIGIN)
3713 					result = ISC_R_SUCCESS;
3714 			} else if (result == ISC_R_NOTFOUND ||
3715 				   result == DNS_R_PARTIALMATCH) {
3716 				result = dns_rbtnodechain_current(nsecchain,
3717 							name, origin, NULL);
3718 				if (result == ISC_R_NOTFOUND)
3719 					result = ISC_R_NOMORE;
3720 			}
3721 		} else {
3722 			/*
3723 			 * This is a second or later trip through the auxiliary
3724 			 * tree for the name of a third or earlier NSEC node in
3725 			 * the main tree.  Previous trips through the NSEC tree
3726 			 * must have found nodes in the main tree with NSEC
3727 			 * records.  Perhaps they lacked signature records.
3728 			 */
3729 			result = dns_rbtnodechain_prev(nsecchain, name, origin);
3730 			if (result == DNS_R_NEWORIGIN)
3731 				result = ISC_R_SUCCESS;
3732 		}
3733 		if (result != ISC_R_SUCCESS)
3734 			return (result);
3735 
3736 		/*
3737 		 * Construct the name to seek in the main tree.
3738 		 */
3739 		result = dns_name_concatenate(name, origin, target, NULL);
3740 		if (result != ISC_R_SUCCESS)
3741 			return (result);
3742 
3743 		*nodep = NULL;
3744 		result = dns_rbt_findnode(search->rbtdb->tree, target, NULL,
3745 					  nodep, &search->chain,
3746 					  DNS_RBTFIND_NOOPTIONS, NULL, NULL);
3747 		if (result == ISC_R_SUCCESS)
3748 			return (result);
3749 
3750 		/*
3751 		 * There should always be a node in the main tree with the
3752 		 * same name as the node in the auxiliary NSEC tree, except for
3753 		 * nodes in the auxiliary tree that are awaiting deletion.
3754 		 */
3755 		if (result != DNS_R_PARTIALMATCH && result != ISC_R_NOTFOUND) {
3756 			isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
3757 				      DNS_LOGMODULE_CACHE, ISC_LOG_ERROR,
3758 				      "previous_closest_nsec(): %s",
3759 				      isc_result_totext(result));
3760 			return (DNS_R_BADDB);
3761 		}
3762 	}
3763 }
3764 
3765 /*
3766  * Find the NSEC/NSEC3 which is or before the current point on the
3767  * search chain.  For NSEC3 records only NSEC3 records that match the
3768  * current NSEC3PARAM record are considered.
3769  */
3770 static inline isc_result_t
find_closest_nsec(rbtdb_search_t * search,dns_dbnode_t ** nodep,dns_name_t * foundname,dns_rdataset_t * rdataset,dns_rdataset_t * sigrdataset,dns_rbt_t * tree,dns_db_secure_t secure)3771 find_closest_nsec(rbtdb_search_t *search, dns_dbnode_t **nodep,
3772 		  dns_name_t *foundname, dns_rdataset_t *rdataset,
3773 		  dns_rdataset_t *sigrdataset, dns_rbt_t *tree,
3774 		  dns_db_secure_t secure)
3775 {
3776 	dns_rbtnode_t *node, *prevnode;
3777 	rdatasetheader_t *header, *header_next, *found, *foundsig;
3778 	dns_rbtnodechain_t nsecchain;
3779 	isc_boolean_t empty_node;
3780 	isc_result_t result;
3781 	dns_fixedname_t fname, forigin;
3782 	dns_name_t *name, *origin;
3783 	dns_rdatatype_t type;
3784 	rbtdb_rdatatype_t sigtype;
3785 	isc_boolean_t wraps;
3786 	isc_boolean_t first = ISC_TRUE;
3787 	isc_boolean_t need_sig = ISC_TF(secure == dns_db_secure);
3788 
3789 	if (tree == search->rbtdb->nsec3) {
3790 		type = dns_rdatatype_nsec3;
3791 		sigtype = RBTDB_RDATATYPE_SIGNSEC3;
3792 		wraps = ISC_TRUE;
3793 	} else {
3794 		type = dns_rdatatype_nsec;
3795 		sigtype = RBTDB_RDATATYPE_SIGNSEC;
3796 		wraps = ISC_FALSE;
3797 	}
3798 
3799 	/*
3800 	 * Use the auxiliary tree only starting with the second node in the
3801 	 * hope that the original node will be right much of the time.
3802 	 */
3803 	dns_fixedname_init(&fname);
3804 	name = dns_fixedname_name(&fname);
3805 	dns_fixedname_init(&forigin);
3806 	origin = dns_fixedname_name(&forigin);
3807  again:
3808 	node = NULL;
3809 	prevnode = NULL;
3810 	result = dns_rbtnodechain_current(&search->chain, name, origin, &node);
3811 	if (result != ISC_R_SUCCESS)
3812 		return (result);
3813 	do {
3814 		NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
3815 			  isc_rwlocktype_read);
3816 		found = NULL;
3817 		foundsig = NULL;
3818 		empty_node = ISC_TRUE;
3819 		for (header = node->data;
3820 		     header != NULL;
3821 		     header = header_next) {
3822 			header_next = header->next;
3823 			/*
3824 			 * Look for an active, extant NSEC or RRSIG NSEC.
3825 			 */
3826 			do {
3827 				if (header->serial <= search->serial &&
3828 				    !IGNORE(header)) {
3829 					/*
3830 					 * Is this a "this rdataset doesn't
3831 					 * exist" record?
3832 					 */
3833 					if (NONEXISTENT(header))
3834 						header = NULL;
3835 					break;
3836 				} else
3837 					header = header->down;
3838 			} while (header != NULL);
3839 			if (header != NULL) {
3840 				/*
3841 				 * We now know that there is at least one
3842 				 * active rdataset at this node.
3843 				 */
3844 				empty_node = ISC_FALSE;
3845 				if (header->type == type) {
3846 					found = header;
3847 					if (foundsig != NULL)
3848 						break;
3849 				} else if (header->type == sigtype) {
3850 					foundsig = header;
3851 					if (found != NULL)
3852 						break;
3853 				}
3854 			}
3855 		}
3856 		if (!empty_node) {
3857 			if (found != NULL && search->rbtversion->havensec3 &&
3858 			    found->type == dns_rdatatype_nsec3 &&
3859 			    !matchparams(found, search)) {
3860 				empty_node = ISC_TRUE;
3861 				found = NULL;
3862 				foundsig = NULL;
3863 				result = previous_closest_nsec(type, search,
3864 							       name, origin,
3865 							       &prevnode, NULL,
3866 							       NULL);
3867 			} else if (found != NULL &&
3868 				   (foundsig != NULL || !need_sig)) {
3869 				/*
3870 				 * We've found the right NSEC/NSEC3 record.
3871 				 *
3872 				 * Note: for this to really be the right
3873 				 * NSEC record, it's essential that the NSEC
3874 				 * records of any nodes obscured by a zone
3875 				 * cut have been removed; we assume this is
3876 				 * the case.
3877 				 */
3878 				result = dns_name_concatenate(name, origin,
3879 							      foundname, NULL);
3880 				if (result == ISC_R_SUCCESS) {
3881 					if (nodep != NULL) {
3882 						new_reference(search->rbtdb,
3883 							      node);
3884 						*nodep = node;
3885 					}
3886 					bind_rdataset(search->rbtdb, node,
3887 						      found, search->now,
3888 						      rdataset);
3889 					if (foundsig != NULL)
3890 						bind_rdataset(search->rbtdb,
3891 							      node,
3892 							      foundsig,
3893 							      search->now,
3894 							      sigrdataset);
3895 				}
3896 			} else if (found == NULL && foundsig == NULL) {
3897 				/*
3898 				 * This node is active, but has no NSEC or
3899 				 * RRSIG NSEC.  That means it's glue or
3900 				 * other obscured zone data that isn't
3901 				 * relevant for our search.  Treat the
3902 				 * node as if it were empty and keep looking.
3903 				 */
3904 				empty_node = ISC_TRUE;
3905 				result = previous_closest_nsec(type, search,
3906 							       name, origin,
3907 							       &prevnode,
3908 							       &nsecchain,
3909 							       &first);
3910 			} else {
3911 				/*
3912 				 * We found an active node, but either the
3913 				 * NSEC or the RRSIG NSEC is missing.  This
3914 				 * shouldn't happen.
3915 				 */
3916 				result = DNS_R_BADDB;
3917 			}
3918 		} else {
3919 			/*
3920 			 * This node isn't active.  We've got to keep
3921 			 * looking.
3922 			 */
3923 			result = previous_closest_nsec(type, search,
3924 						       name, origin, &prevnode,
3925 						       &nsecchain, &first);
3926 		}
3927 		NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
3928 			    isc_rwlocktype_read);
3929 		node = prevnode;
3930 		prevnode = NULL;
3931 	} while (empty_node && result == ISC_R_SUCCESS);
3932 
3933 	if (!first)
3934 		dns_rbtnodechain_invalidate(&nsecchain);
3935 
3936 	if (result == ISC_R_NOMORE && wraps) {
3937 		result = dns_rbtnodechain_last(&search->chain, tree,
3938 					       NULL, NULL);
3939 		if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
3940 			wraps = ISC_FALSE;
3941 			goto again;
3942 		}
3943 	}
3944 
3945 	/*
3946 	 * If the result is ISC_R_NOMORE, then we got to the beginning of
3947 	 * the database and didn't find a NSEC record.  This shouldn't
3948 	 * happen.
3949 	 */
3950 	if (result == ISC_R_NOMORE)
3951 		result = DNS_R_BADDB;
3952 
3953 	return (result);
3954 }
3955 
3956 static isc_result_t
zone_find(dns_db_t * db,dns_name_t * name,dns_dbversion_t * version,dns_rdatatype_t type,unsigned int options,isc_stdtime_t now,dns_dbnode_t ** nodep,dns_name_t * foundname,dns_rdataset_t * rdataset,dns_rdataset_t * sigrdataset)3957 zone_find(dns_db_t *db, dns_name_t *name, dns_dbversion_t *version,
3958 	  dns_rdatatype_t type, unsigned int options, isc_stdtime_t now,
3959 	  dns_dbnode_t **nodep, dns_name_t *foundname,
3960 	  dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
3961 {
3962 	dns_rbtnode_t *node = NULL;
3963 	isc_result_t result;
3964 	rbtdb_search_t search;
3965 	isc_boolean_t cname_ok = ISC_TRUE;
3966 	isc_boolean_t close_version = ISC_FALSE;
3967 	isc_boolean_t maybe_zonecut = ISC_FALSE;
3968 	isc_boolean_t at_zonecut = ISC_FALSE;
3969 	isc_boolean_t wild;
3970 	isc_boolean_t empty_node;
3971 	rdatasetheader_t *header, *header_next, *found, *nsecheader;
3972 	rdatasetheader_t *foundsig, *cnamesig, *nsecsig;
3973 	rbtdb_rdatatype_t sigtype;
3974 	isc_boolean_t active;
3975 	dns_rbtnodechain_t chain;
3976 	nodelock_t *lock;
3977 	dns_rbt_t *tree;
3978 
3979 	search.rbtdb = (dns_rbtdb_t *)db;
3980 
3981 	REQUIRE(VALID_RBTDB(search.rbtdb));
3982 	INSIST(version == NULL ||
3983 	       ((rbtdb_version_t *)version)->rbtdb == (dns_rbtdb_t *)db);
3984 
3985 	/*
3986 	 * We don't care about 'now'.
3987 	 */
3988 	UNUSED(now);
3989 
3990 	/*
3991 	 * If the caller didn't supply a version, attach to the current
3992 	 * version.
3993 	 */
3994 	if (version == NULL) {
3995 		currentversion(db, &version);
3996 		close_version = ISC_TRUE;
3997 	}
3998 
3999 	search.rbtversion = version;
4000 	search.serial = search.rbtversion->serial;
4001 	search.options = options;
4002 	search.copy_name = ISC_FALSE;
4003 	search.need_cleanup = ISC_FALSE;
4004 	search.wild = ISC_FALSE;
4005 	search.zonecut = NULL;
4006 	dns_fixedname_init(&search.zonecut_name);
4007 	dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
4008 	search.now = 0;
4009 
4010 	/*
4011 	 * 'wild' will be true iff. we've matched a wildcard.
4012 	 */
4013 	wild = ISC_FALSE;
4014 
4015 	RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4016 
4017 	/*
4018 	 * Search down from the root of the tree.  If, while going down, we
4019 	 * encounter a callback node, zone_zonecut_callback() will search the
4020 	 * rdatasets at the zone cut for active DNAME or NS rdatasets.
4021 	 */
4022 	tree =  (options & DNS_DBFIND_FORCENSEC3) != 0 ? search.rbtdb->nsec3 :
4023 							 search.rbtdb->tree;
4024 	result = dns_rbt_findnode(tree, name, foundname, &node,
4025 				  &search.chain, DNS_RBTFIND_EMPTYDATA,
4026 				  zone_zonecut_callback, &search);
4027 
4028 	if (result == DNS_R_PARTIALMATCH) {
4029 	partial_match:
4030 		if (search.zonecut != NULL) {
4031 		    result = setup_delegation(&search, nodep, foundname,
4032 					      rdataset, sigrdataset);
4033 		    goto tree_exit;
4034 		}
4035 
4036 		if (search.wild) {
4037 			/*
4038 			 * At least one of the levels in the search chain
4039 			 * potentially has a wildcard.  For each such level,
4040 			 * we must see if there's a matching wildcard active
4041 			 * in the current version.
4042 			 */
4043 			result = find_wildcard(&search, &node, name);
4044 			if (result == ISC_R_SUCCESS) {
4045 				result = dns_name_copy(name, foundname, NULL);
4046 				if (result != ISC_R_SUCCESS)
4047 					goto tree_exit;
4048 				wild = ISC_TRUE;
4049 				goto found;
4050 			}
4051 			else if (result != ISC_R_NOTFOUND)
4052 				goto tree_exit;
4053 		}
4054 
4055 		chain = search.chain;
4056 		active = activeempty(&search, &chain, name);
4057 
4058 		/*
4059 		 * If we're here, then the name does not exist, is not
4060 		 * beneath a zonecut, and there's no matching wildcard.
4061 		 */
4062 		if ((search.rbtversion->secure == dns_db_secure &&
4063 		     !search.rbtversion->havensec3) ||
4064 		    (search.options & DNS_DBFIND_FORCENSEC) != 0 ||
4065 		    (search.options & DNS_DBFIND_FORCENSEC3) != 0)
4066 		{
4067 			result = find_closest_nsec(&search, nodep, foundname,
4068 						   rdataset, sigrdataset, tree,
4069 						   search.rbtversion->secure);
4070 			if (result == ISC_R_SUCCESS)
4071 				result = active ? DNS_R_EMPTYNAME :
4072 						  DNS_R_NXDOMAIN;
4073 		} else
4074 			result = active ? DNS_R_EMPTYNAME : DNS_R_NXDOMAIN;
4075 		goto tree_exit;
4076 	} else if (result != ISC_R_SUCCESS)
4077 		goto tree_exit;
4078 
4079  found:
4080 	/*
4081 	 * We have found a node whose name is the desired name, or we
4082 	 * have matched a wildcard.
4083 	 */
4084 
4085 	if (search.zonecut != NULL) {
4086 		/*
4087 		 * If we're beneath a zone cut, we don't want to look for
4088 		 * CNAMEs because they're not legitimate zone glue.
4089 		 */
4090 		cname_ok = ISC_FALSE;
4091 	} else {
4092 		/*
4093 		 * The node may be a zone cut itself.  If it might be one,
4094 		 * make sure we check for it later.
4095 		 *
4096 		 * DS records live above the zone cut in ordinary zone so
4097 		 * we want to ignore any referral.
4098 		 *
4099 		 * Stub zones don't have anything "above" the delgation so
4100 		 * we always return a referral.
4101 		 */
4102 		if (node->find_callback &&
4103 		    ((node != search.rbtdb->origin_node &&
4104 		      !dns_rdatatype_atparent(type)) ||
4105 		     IS_STUB(search.rbtdb)))
4106 			maybe_zonecut = ISC_TRUE;
4107 	}
4108 
4109 	/*
4110 	 * Certain DNSSEC types are not subject to CNAME matching
4111 	 * (RFC4035, section 2.5 and RFC3007).
4112 	 *
4113 	 * We don't check for RRSIG, because we don't store RRSIG records
4114 	 * directly.
4115 	 */
4116 	if (type == dns_rdatatype_key || type == dns_rdatatype_nsec)
4117 		cname_ok = ISC_FALSE;
4118 
4119 	/*
4120 	 * We now go looking for rdata...
4121 	 */
4122 
4123 	lock = &search.rbtdb->node_locks[node->locknum].lock;
4124 	NODE_LOCK(lock, isc_rwlocktype_read);
4125 
4126 	found = NULL;
4127 	foundsig = NULL;
4128 	sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
4129 	nsecheader = NULL;
4130 	nsecsig = NULL;
4131 	cnamesig = NULL;
4132 	empty_node = ISC_TRUE;
4133 	for (header = node->data; header != NULL; header = header_next) {
4134 		header_next = header->next;
4135 		/*
4136 		 * Look for an active, extant rdataset.
4137 		 */
4138 		do {
4139 			if (header->serial <= search.serial &&
4140 			    !IGNORE(header)) {
4141 				/*
4142 				 * Is this a "this rdataset doesn't
4143 				 * exist" record?
4144 				 */
4145 				if (NONEXISTENT(header))
4146 					header = NULL;
4147 				break;
4148 			} else
4149 				header = header->down;
4150 		} while (header != NULL);
4151 		if (header != NULL) {
4152 			/*
4153 			 * We now know that there is at least one active
4154 			 * rdataset at this node.
4155 			 */
4156 			empty_node = ISC_FALSE;
4157 
4158 			/*
4159 			 * Do special zone cut handling, if requested.
4160 			 */
4161 			if (maybe_zonecut &&
4162 			    header->type == dns_rdatatype_ns) {
4163 				/*
4164 				 * We increment the reference count on node to
4165 				 * ensure that search->zonecut_rdataset will
4166 				 * still be valid later.
4167 				 */
4168 				new_reference(search.rbtdb, node);
4169 				search.zonecut = node;
4170 				search.zonecut_rdataset = header;
4171 				search.zonecut_sigrdataset = NULL;
4172 				search.need_cleanup = ISC_TRUE;
4173 				maybe_zonecut = ISC_FALSE;
4174 				at_zonecut = ISC_TRUE;
4175 				/*
4176 				 * It is not clear if KEY should still be
4177 				 * allowed at the parent side of the zone
4178 				 * cut or not.  It is needed for RFC3007
4179 				 * validated updates.
4180 				 */
4181 				if ((search.options & DNS_DBFIND_GLUEOK) == 0
4182 				    && type != dns_rdatatype_nsec
4183 				    && type != dns_rdatatype_key) {
4184 					/*
4185 					 * Glue is not OK, but any answer we
4186 					 * could return would be glue.  Return
4187 					 * the delegation.
4188 					 */
4189 					found = NULL;
4190 					break;
4191 				}
4192 				if (found != NULL && foundsig != NULL)
4193 					break;
4194 			}
4195 
4196 
4197 			/*
4198 			 * If the NSEC3 record doesn't match the chain
4199 			 * we are using behave as if it isn't here.
4200 			 */
4201 			if (header->type == dns_rdatatype_nsec3 &&
4202 			   !matchparams(header, &search)) {
4203 				NODE_UNLOCK(lock, isc_rwlocktype_read);
4204 				goto partial_match;
4205 			}
4206 			/*
4207 			 * If we found a type we were looking for,
4208 			 * remember it.
4209 			 */
4210 			if (header->type == type ||
4211 			    type == dns_rdatatype_any ||
4212 			    (header->type == dns_rdatatype_cname &&
4213 			     cname_ok)) {
4214 				/*
4215 				 * We've found the answer!
4216 				 */
4217 				found = header;
4218 				if (header->type == dns_rdatatype_cname &&
4219 				    cname_ok) {
4220 					/*
4221 					 * We may be finding a CNAME instead
4222 					 * of the desired type.
4223 					 *
4224 					 * If we've already got the CNAME RRSIG,
4225 					 * use it, otherwise change sigtype
4226 					 * so that we find it.
4227 					 */
4228 					if (cnamesig != NULL)
4229 						foundsig = cnamesig;
4230 					else
4231 						sigtype =
4232 						    RBTDB_RDATATYPE_SIGCNAME;
4233 				}
4234 				/*
4235 				 * If we've got all we need, end the search.
4236 				 */
4237 				if (!maybe_zonecut && foundsig != NULL)
4238 					break;
4239 			} else if (header->type == sigtype) {
4240 				/*
4241 				 * We've found the RRSIG rdataset for our
4242 				 * target type.  Remember it.
4243 				 */
4244 				foundsig = header;
4245 				/*
4246 				 * If we've got all we need, end the search.
4247 				 */
4248 				if (!maybe_zonecut && found != NULL)
4249 					break;
4250 			} else if (header->type == dns_rdatatype_nsec &&
4251 				   !search.rbtversion->havensec3) {
4252 				/*
4253 				 * Remember a NSEC rdataset even if we're
4254 				 * not specifically looking for it, because
4255 				 * we might need it later.
4256 				 */
4257 				nsecheader = header;
4258 			} else if (header->type == RBTDB_RDATATYPE_SIGNSEC &&
4259 				   !search.rbtversion->havensec3) {
4260 				/*
4261 				 * If we need the NSEC rdataset, we'll also
4262 				 * need its signature.
4263 				 */
4264 				nsecsig = header;
4265 			} else if (cname_ok &&
4266 				   header->type == RBTDB_RDATATYPE_SIGCNAME) {
4267 				/*
4268 				 * If we get a CNAME match, we'll also need
4269 				 * its signature.
4270 				 */
4271 				cnamesig = header;
4272 			}
4273 		}
4274 	}
4275 
4276 	if (empty_node) {
4277 		/*
4278 		 * We have an exact match for the name, but there are no
4279 		 * active rdatasets in the desired version.  That means that
4280 		 * this node doesn't exist in the desired version, and that
4281 		 * we really have a partial match.
4282 		 */
4283 		if (!wild) {
4284 			NODE_UNLOCK(lock, isc_rwlocktype_read);
4285 			goto partial_match;
4286 		}
4287 	}
4288 
4289 	/*
4290 	 * If we didn't find what we were looking for...
4291 	 */
4292 	if (found == NULL) {
4293 		if (search.zonecut != NULL) {
4294 			/*
4295 			 * We were trying to find glue at a node beneath a
4296 			 * zone cut, but didn't.
4297 			 *
4298 			 * Return the delegation.
4299 			 */
4300 			NODE_UNLOCK(lock, isc_rwlocktype_read);
4301 			result = setup_delegation(&search, nodep, foundname,
4302 						  rdataset, sigrdataset);
4303 			goto tree_exit;
4304 		}
4305 		/*
4306 		 * The desired type doesn't exist.
4307 		 */
4308 		result = DNS_R_NXRRSET;
4309 		if (search.rbtversion->secure == dns_db_secure &&
4310 		    !search.rbtversion->havensec3 &&
4311 		    (nsecheader == NULL || nsecsig == NULL)) {
4312 			/*
4313 			 * The zone is secure but there's no NSEC,
4314 			 * or the NSEC has no signature!
4315 			 */
4316 			if (!wild) {
4317 				result = DNS_R_BADDB;
4318 				goto node_exit;
4319 			}
4320 
4321 			NODE_UNLOCK(lock, isc_rwlocktype_read);
4322 			result = find_closest_nsec(&search, nodep, foundname,
4323 						   rdataset, sigrdataset,
4324 						   search.rbtdb->tree,
4325 						   search.rbtversion->secure);
4326 			if (result == ISC_R_SUCCESS)
4327 				result = DNS_R_EMPTYWILD;
4328 			goto tree_exit;
4329 		}
4330 		if ((search.options & DNS_DBFIND_FORCENSEC) != 0 &&
4331 		    nsecheader == NULL)
4332 		{
4333 			/*
4334 			 * There's no NSEC record, and we were told
4335 			 * to find one.
4336 			 */
4337 			result = DNS_R_BADDB;
4338 			goto node_exit;
4339 		}
4340 		if (nodep != NULL) {
4341 			new_reference(search.rbtdb, node);
4342 			*nodep = node;
4343 		}
4344 		if ((search.rbtversion->secure == dns_db_secure &&
4345 		     !search.rbtversion->havensec3) ||
4346 		    (search.options & DNS_DBFIND_FORCENSEC) != 0)
4347 		{
4348 			bind_rdataset(search.rbtdb, node, nsecheader,
4349 				      0, rdataset);
4350 			if (nsecsig != NULL)
4351 				bind_rdataset(search.rbtdb, node,
4352 					      nsecsig, 0, sigrdataset);
4353 		}
4354 		if (wild)
4355 			foundname->attributes |= DNS_NAMEATTR_WILDCARD;
4356 		goto node_exit;
4357 	}
4358 
4359 	/*
4360 	 * We found what we were looking for, or we found a CNAME.
4361 	 */
4362 
4363 	if (type != found->type &&
4364 	    type != dns_rdatatype_any &&
4365 	    found->type == dns_rdatatype_cname) {
4366 		/*
4367 		 * We weren't doing an ANY query and we found a CNAME instead
4368 		 * of the type we were looking for, so we need to indicate
4369 		 * that result to the caller.
4370 		 */
4371 		result = DNS_R_CNAME;
4372 	} else if (search.zonecut != NULL) {
4373 		/*
4374 		 * If we're beneath a zone cut, we must indicate that the
4375 		 * result is glue, unless we're actually at the zone cut
4376 		 * and the type is NSEC or KEY.
4377 		 */
4378 		if (search.zonecut == node) {
4379 			/*
4380 			 * It is not clear if KEY should still be
4381 			 * allowed at the parent side of the zone
4382 			 * cut or not.  It is needed for RFC3007
4383 			 * validated updates.
4384 			 */
4385 			if (type == dns_rdatatype_nsec ||
4386 			    type == dns_rdatatype_nsec3 ||
4387 			    type == dns_rdatatype_key)
4388 				result = ISC_R_SUCCESS;
4389 			else if (type == dns_rdatatype_any)
4390 				result = DNS_R_ZONECUT;
4391 			else
4392 				result = DNS_R_GLUE;
4393 		} else
4394 			result = DNS_R_GLUE;
4395 		/*
4396 		 * We might have found data that isn't glue, but was occluded
4397 		 * by a dynamic update.  If the caller cares about this, they
4398 		 * will have told us to validate glue.
4399 		 *
4400 		 * XXX We should cache the glue validity state!
4401 		 */
4402 		if (result == DNS_R_GLUE &&
4403 		    (search.options & DNS_DBFIND_VALIDATEGLUE) != 0 &&
4404 		    !valid_glue(&search, foundname, type, node)) {
4405 			NODE_UNLOCK(lock, isc_rwlocktype_read);
4406 			result = setup_delegation(&search, nodep, foundname,
4407 						  rdataset, sigrdataset);
4408 		    goto tree_exit;
4409 		}
4410 	} else {
4411 		/*
4412 		 * An ordinary successful query!
4413 		 */
4414 		result = ISC_R_SUCCESS;
4415 	}
4416 
4417 	if (nodep != NULL) {
4418 		if (!at_zonecut)
4419 			new_reference(search.rbtdb, node);
4420 		else
4421 			search.need_cleanup = ISC_FALSE;
4422 		*nodep = node;
4423 	}
4424 
4425 	if (type != dns_rdatatype_any) {
4426 		bind_rdataset(search.rbtdb, node, found, 0, rdataset);
4427 		if (foundsig != NULL)
4428 			bind_rdataset(search.rbtdb, node, foundsig, 0,
4429 				      sigrdataset);
4430 	}
4431 
4432 	if (wild)
4433 		foundname->attributes |= DNS_NAMEATTR_WILDCARD;
4434 
4435  node_exit:
4436 	NODE_UNLOCK(lock, isc_rwlocktype_read);
4437 
4438  tree_exit:
4439 	RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4440 
4441 	/*
4442 	 * If we found a zonecut but aren't going to use it, we have to
4443 	 * let go of it.
4444 	 */
4445 	if (search.need_cleanup) {
4446 		node = search.zonecut;
4447 		INSIST(node != NULL);
4448 		lock = &(search.rbtdb->node_locks[node->locknum].lock);
4449 
4450 		NODE_LOCK(lock, isc_rwlocktype_read);
4451 		decrement_reference(search.rbtdb, node, 0,
4452 				    isc_rwlocktype_read, isc_rwlocktype_none,
4453 				    ISC_FALSE);
4454 		NODE_UNLOCK(lock, isc_rwlocktype_read);
4455 	}
4456 
4457 	if (close_version)
4458 		closeversion(db, &version, ISC_FALSE);
4459 
4460 	dns_rbtnodechain_reset(&search.chain);
4461 
4462 	return (result);
4463 }
4464 
4465 static isc_result_t
zone_findzonecut(dns_db_t * db,dns_name_t * name,unsigned int options,isc_stdtime_t now,dns_dbnode_t ** nodep,dns_name_t * foundname,dns_rdataset_t * rdataset,dns_rdataset_t * sigrdataset)4466 zone_findzonecut(dns_db_t *db, dns_name_t *name, unsigned int options,
4467 		 isc_stdtime_t now, dns_dbnode_t **nodep,
4468 		 dns_name_t *foundname,
4469 		 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4470 {
4471 	UNUSED(db);
4472 	UNUSED(name);
4473 	UNUSED(options);
4474 	UNUSED(now);
4475 	UNUSED(nodep);
4476 	UNUSED(foundname);
4477 	UNUSED(rdataset);
4478 	UNUSED(sigrdataset);
4479 
4480 	FATAL_ERROR(__FILE__, __LINE__, "zone_findzonecut() called!");
4481 
4482 	/* NOTREACHED */
4483 	return (ISC_R_NOTIMPLEMENTED);
4484 }
4485 
4486 static isc_result_t
cache_zonecut_callback(dns_rbtnode_t * node,dns_name_t * name,void * arg)4487 cache_zonecut_callback(dns_rbtnode_t *node, dns_name_t *name, void *arg) {
4488 	rbtdb_search_t *search = arg;
4489 	rdatasetheader_t *header, *header_prev, *header_next;
4490 	rdatasetheader_t *dname_header, *sigdname_header;
4491 	isc_result_t result;
4492 	nodelock_t *lock;
4493 	isc_rwlocktype_t locktype;
4494 
4495 	/* XXX comment */
4496 
4497 	REQUIRE(search->zonecut == NULL);
4498 
4499 	/*
4500 	 * Keep compiler silent.
4501 	 */
4502 	UNUSED(name);
4503 
4504 	lock = &(search->rbtdb->node_locks[node->locknum].lock);
4505 	locktype = isc_rwlocktype_read;
4506 	NODE_LOCK(lock, locktype);
4507 
4508 	/*
4509 	 * Look for a DNAME or RRSIG DNAME rdataset.
4510 	 */
4511 	dname_header = NULL;
4512 	sigdname_header = NULL;
4513 	header_prev = NULL;
4514 	for (header = node->data; header != NULL; header = header_next) {
4515 		header_next = header->next;
4516 		if (header->rdh_ttl <  search->now) {
4517 			/*
4518 			 * This rdataset is stale.  If no one else is
4519 			 * using the node, we can clean it up right
4520 			 * now, otherwise we mark it as stale, and
4521 			 * the node as dirty, so it will get cleaned
4522 			 * up later.
4523 			 */
4524 			if ((header->rdh_ttl <  search->now - RBTDB_VIRTUAL) &&
4525 			    (locktype == isc_rwlocktype_write ||
4526 			     NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4527 				/*
4528 				 * We update the node's status only when we
4529 				 * can get write access; otherwise, we leave
4530 				 * others to this work.  Periodical cleaning
4531 				 * will eventually take the job as the last
4532 				 * resort.
4533 				 * We won't downgrade the lock, since other
4534 				 * rdatasets are probably stale, too.
4535 				 */
4536 				locktype = isc_rwlocktype_write;
4537 
4538 				if (dns_rbtnode_refcurrent(node) == 0) {
4539 					isc_mem_t *mctx;
4540 
4541 					/*
4542 					 * header->down can be non-NULL if the
4543 					 * refcount has just decremented to 0
4544 					 * but decrement_reference() has not
4545 					 * performed clean_cache_node(), in
4546 					 * which case we need to purge the
4547 					 * stale headers first.
4548 					 */
4549 					mctx = search->rbtdb->common.mctx;
4550 					clean_stale_headers(search->rbtdb,
4551 							    mctx,
4552 							    header);
4553 					if (header_prev != NULL)
4554 						header_prev->next =
4555 							header->next;
4556 					else
4557 						node->data = header->next;
4558 					free_rdataset(search->rbtdb, mctx,
4559 						      header);
4560 				} else {
4561 					mark_stale_header(search->rbtdb,
4562 							  header);
4563 					header_prev = header;
4564 				}
4565 			} else
4566 				header_prev = header;
4567 		} else if (header->type == dns_rdatatype_dname &&
4568 			   EXISTS(header)) {
4569 			dname_header = header;
4570 			header_prev = header;
4571 		} else if (header->type == RBTDB_RDATATYPE_SIGDNAME &&
4572 			 EXISTS(header)) {
4573 			sigdname_header = header;
4574 			header_prev = header;
4575 		} else
4576 			header_prev = header;
4577 	}
4578 
4579 	if (dname_header != NULL &&
4580 	    (!DNS_TRUST_PENDING(dname_header->trust) ||
4581 	     (search->options & DNS_DBFIND_PENDINGOK) != 0)) {
4582 		/*
4583 		 * We increment the reference count on node to ensure that
4584 		 * search->zonecut_rdataset will still be valid later.
4585 		 */
4586 		new_reference(search->rbtdb, node);
4587 		INSIST(!ISC_LINK_LINKED(node, deadlink));
4588 		search->zonecut = node;
4589 		search->zonecut_rdataset = dname_header;
4590 		search->zonecut_sigrdataset = sigdname_header;
4591 		search->need_cleanup = ISC_TRUE;
4592 		result = DNS_R_PARTIALMATCH;
4593 	} else
4594 		result = DNS_R_CONTINUE;
4595 
4596 	NODE_UNLOCK(lock, locktype);
4597 
4598 	return (result);
4599 }
4600 
4601 static inline isc_result_t
find_deepest_zonecut(rbtdb_search_t * search,dns_rbtnode_t * node,dns_dbnode_t ** nodep,dns_name_t * foundname,dns_rdataset_t * rdataset,dns_rdataset_t * sigrdataset)4602 find_deepest_zonecut(rbtdb_search_t *search, dns_rbtnode_t *node,
4603 		     dns_dbnode_t **nodep, dns_name_t *foundname,
4604 		     dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4605 {
4606 	unsigned int i;
4607 	dns_rbtnode_t *level_node;
4608 	rdatasetheader_t *header, *header_prev, *header_next;
4609 	rdatasetheader_t *found, *foundsig;
4610 	isc_result_t result = ISC_R_NOTFOUND;
4611 	dns_name_t name;
4612 	dns_rbtdb_t *rbtdb;
4613 	isc_boolean_t done;
4614 	nodelock_t *lock;
4615 	isc_rwlocktype_t locktype;
4616 
4617 	/*
4618 	 * Caller must be holding the tree lock.
4619 	 */
4620 
4621 	rbtdb = search->rbtdb;
4622 	i = search->chain.level_matches;
4623 	done = ISC_FALSE;
4624 	do {
4625 		locktype = isc_rwlocktype_read;
4626 		lock = &rbtdb->node_locks[node->locknum].lock;
4627 		NODE_LOCK(lock, locktype);
4628 
4629 		/*
4630 		 * Look for NS and RRSIG NS rdatasets.
4631 		 */
4632 		found = NULL;
4633 		foundsig = NULL;
4634 		header_prev = NULL;
4635 		for (header = node->data;
4636 		     header != NULL;
4637 		     header = header_next) {
4638 			header_next = header->next;
4639 			if (header->rdh_ttl <  search->now) {
4640 				/*
4641 				 * This rdataset is stale.  If no one else is
4642 				 * using the node, we can clean it up right
4643 				 * now, otherwise we mark it as stale, and
4644 				 * the node as dirty, so it will get cleaned
4645 				 * up later.
4646 				 */
4647 				if ((header->rdh_ttl <  search->now -
4648 						    RBTDB_VIRTUAL) &&
4649 				    (locktype == isc_rwlocktype_write ||
4650 				     NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4651 					/*
4652 					 * We update the node's status only
4653 					 * when we can get write access.
4654 					 */
4655 					locktype = isc_rwlocktype_write;
4656 
4657 					if (dns_rbtnode_refcurrent(node)
4658 					    == 0) {
4659 						isc_mem_t *m;
4660 
4661 						m = search->rbtdb->common.mctx;
4662 						clean_stale_headers(
4663 							search->rbtdb,
4664 							m, header);
4665 						if (header_prev != NULL)
4666 							header_prev->next =
4667 								header->next;
4668 						else
4669 							node->data =
4670 								header->next;
4671 						free_rdataset(rbtdb, m,
4672 							      header);
4673 					} else {
4674 						mark_stale_header(rbtdb,
4675 								  header);
4676 						header_prev = header;
4677 					}
4678 				} else
4679 					header_prev = header;
4680 			} else if (EXISTS(header)) {
4681 				/*
4682 				 * We've found an extant rdataset.  See if
4683 				 * we're interested in it.
4684 				 */
4685 				if (header->type == dns_rdatatype_ns) {
4686 					found = header;
4687 					if (foundsig != NULL)
4688 						break;
4689 				} else if (header->type ==
4690 					   RBTDB_RDATATYPE_SIGNS) {
4691 					foundsig = header;
4692 					if (found != NULL)
4693 						break;
4694 				}
4695 				header_prev = header;
4696 			} else
4697 				header_prev = header;
4698 		}
4699 
4700 		if (found != NULL) {
4701 			/*
4702 			 * If we have to set foundname, we do it before
4703 			 * anything else.  If we were to set foundname after
4704 			 * we had set nodep or bound the rdataset, then we'd
4705 			 * have to undo that work if dns_name_concatenate()
4706 			 * failed.  By setting foundname first, there's
4707 			 * nothing to undo if we have trouble.
4708 			 */
4709 			if (foundname != NULL) {
4710 				dns_name_init(&name, NULL);
4711 				dns_rbt_namefromnode(node, &name);
4712 				result = dns_name_copy(&name, foundname, NULL);
4713 				while (result == ISC_R_SUCCESS && i > 0) {
4714 					i--;
4715 					level_node = search->chain.levels[i];
4716 					dns_name_init(&name, NULL);
4717 					dns_rbt_namefromnode(level_node,
4718 							     &name);
4719 					result =
4720 						dns_name_concatenate(foundname,
4721 								     &name,
4722 								     foundname,
4723 								     NULL);
4724 				}
4725 				if (result != ISC_R_SUCCESS) {
4726 					*nodep = NULL;
4727 					goto node_exit;
4728 				}
4729 			}
4730 			result = DNS_R_DELEGATION;
4731 			if (nodep != NULL) {
4732 				new_reference(search->rbtdb, node);
4733 				*nodep = node;
4734 			}
4735 			bind_rdataset(search->rbtdb, node, found, search->now,
4736 				      rdataset);
4737 			if (foundsig != NULL)
4738 				bind_rdataset(search->rbtdb, node, foundsig,
4739 					      search->now, sigrdataset);
4740 			if (need_headerupdate(found, search->now) ||
4741 			    (foundsig != NULL &&
4742 			     need_headerupdate(foundsig, search->now))) {
4743 				if (locktype != isc_rwlocktype_write) {
4744 					NODE_UNLOCK(lock, locktype);
4745 					NODE_LOCK(lock, isc_rwlocktype_write);
4746 					locktype = isc_rwlocktype_write;
4747 					POST(locktype);
4748 				}
4749 				if (need_headerupdate(found, search->now))
4750 					update_header(search->rbtdb, found,
4751 						      search->now);
4752 				if (foundsig != NULL &&
4753 				    need_headerupdate(foundsig, search->now)) {
4754 					update_header(search->rbtdb, foundsig,
4755 						      search->now);
4756 				}
4757 			}
4758 		}
4759 
4760 	node_exit:
4761 		NODE_UNLOCK(lock, locktype);
4762 
4763 		if (found == NULL && i > 0) {
4764 			i--;
4765 			node = search->chain.levels[i];
4766 		} else
4767 			done = ISC_TRUE;
4768 
4769 	} while (!done);
4770 
4771 	return (result);
4772 }
4773 
4774 static isc_result_t
find_coveringnsec(rbtdb_search_t * search,dns_dbnode_t ** nodep,isc_stdtime_t now,dns_name_t * foundname,dns_rdataset_t * rdataset,dns_rdataset_t * sigrdataset)4775 find_coveringnsec(rbtdb_search_t *search, dns_dbnode_t **nodep,
4776 		  isc_stdtime_t now, dns_name_t *foundname,
4777 		  dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4778 {
4779 	dns_rbtnode_t *node;
4780 	rdatasetheader_t *header, *header_next, *header_prev;
4781 	rdatasetheader_t *found, *foundsig;
4782 	isc_boolean_t empty_node;
4783 	isc_result_t result;
4784 	dns_fixedname_t fname, forigin;
4785 	dns_name_t *name, *origin;
4786 	rbtdb_rdatatype_t matchtype, sigmatchtype;
4787 	nodelock_t *lock;
4788 	isc_rwlocktype_t locktype;
4789 
4790 	matchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_nsec, 0);
4791 	sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig,
4792 					     dns_rdatatype_nsec);
4793 
4794 	do {
4795 		node = NULL;
4796 		dns_fixedname_init(&fname);
4797 		name = dns_fixedname_name(&fname);
4798 		dns_fixedname_init(&forigin);
4799 		origin = dns_fixedname_name(&forigin);
4800 		result = dns_rbtnodechain_current(&search->chain, name,
4801 						  origin, &node);
4802 		if (result != ISC_R_SUCCESS)
4803 			return (result);
4804 		locktype = isc_rwlocktype_read;
4805 		lock = &(search->rbtdb->node_locks[node->locknum].lock);
4806 		NODE_LOCK(lock, locktype);
4807 		found = NULL;
4808 		foundsig = NULL;
4809 		empty_node = ISC_TRUE;
4810 		header_prev = NULL;
4811 		for (header = node->data;
4812 		     header != NULL;
4813 		     header = header_next) {
4814 			header_next = header->next;
4815 			if (header->rdh_ttl <  now) {
4816 				/*
4817 				 * This rdataset is stale.  If no one else is
4818 				 * using the node, we can clean it up right
4819 				 * now, otherwise we mark it as stale, and the
4820 				 * node as dirty, so it will get cleaned up
4821 				 * later.
4822 				 */
4823 				if ((header->rdh_ttl <  now - RBTDB_VIRTUAL) &&
4824 				    (locktype == isc_rwlocktype_write ||
4825 				     NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4826 					/*
4827 					 * We update the node's status only
4828 					 * when we can get write access.
4829 					 */
4830 					locktype = isc_rwlocktype_write;
4831 
4832 					if (dns_rbtnode_refcurrent(node)
4833 					    == 0) {
4834 						isc_mem_t *m;
4835 
4836 						m = search->rbtdb->common.mctx;
4837 						clean_stale_headers(
4838 							search->rbtdb,
4839 							m, header);
4840 						if (header_prev != NULL)
4841 							header_prev->next =
4842 								header->next;
4843 						else
4844 							node->data = header->next;
4845 						free_rdataset(search->rbtdb, m,
4846 							      header);
4847 					} else {
4848 						mark_stale_header(search->rbtdb,
4849 								  header);
4850 						header_prev = header;
4851 					}
4852 				} else
4853 					header_prev = header;
4854 				continue;
4855 			}
4856 			if (NONEXISTENT(header) ||
4857 			    RBTDB_RDATATYPE_BASE(header->type) == 0) {
4858 				header_prev = header;
4859 				continue;
4860 			}
4861 			empty_node = ISC_FALSE;
4862 			if (header->type == matchtype)
4863 				found = header;
4864 			else if (header->type == sigmatchtype)
4865 				foundsig = header;
4866 			header_prev = header;
4867 		}
4868 		if (found != NULL) {
4869 			result = dns_name_concatenate(name, origin,
4870 						      foundname, NULL);
4871 			if (result != ISC_R_SUCCESS)
4872 				goto unlock_node;
4873 			bind_rdataset(search->rbtdb, node, found,
4874 				      now, rdataset);
4875 			if (foundsig != NULL)
4876 				bind_rdataset(search->rbtdb, node, foundsig,
4877 					      now, sigrdataset);
4878 			new_reference(search->rbtdb, node);
4879 			*nodep = node;
4880 			result = DNS_R_COVERINGNSEC;
4881 		} else if (!empty_node) {
4882 			result = ISC_R_NOTFOUND;
4883 		} else
4884 			result = dns_rbtnodechain_prev(&search->chain, NULL,
4885 						       NULL);
4886  unlock_node:
4887 		NODE_UNLOCK(lock, locktype);
4888 	} while (empty_node && result == ISC_R_SUCCESS);
4889 	return (result);
4890 }
4891 
4892 /*
4893  * Connect this RBTDB to the response policy zone summary data for the view.
4894  */
4895 static void
rpz_attach(dns_db_t * db,dns_rpz_zones_t * rpzs,dns_rpz_num_t rpz_num)4896 rpz_attach(dns_db_t *db, dns_rpz_zones_t *rpzs, dns_rpz_num_t rpz_num) {
4897 	dns_rbtdb_t * rbtdb;
4898 
4899 	rbtdb = (dns_rbtdb_t *)db;
4900 	REQUIRE(VALID_RBTDB(rbtdb));
4901 
4902 	RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
4903 	REQUIRE(rbtdb->rpzs == NULL && rbtdb->rpz_num == DNS_RPZ_INVALID_NUM);
4904 	dns_rpz_attach_rpzs(rpzs, &rbtdb->rpzs);
4905 	rbtdb->rpz_num = rpz_num;
4906 	RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
4907 }
4908 
4909 /*
4910  * Enable this RBTDB as a response policy zone.
4911  */
4912 static isc_result_t
rpz_ready(dns_db_t * db)4913 rpz_ready(dns_db_t *db) {
4914 	dns_rbtdb_t * rbtdb;
4915 	isc_result_t result;
4916 
4917 	rbtdb = (dns_rbtdb_t *)db;
4918 	REQUIRE(VALID_RBTDB(rbtdb));
4919 
4920 	RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
4921 	if (rbtdb->rpzs == NULL) {
4922 		INSIST(rbtdb->rpz_num == DNS_RPZ_INVALID_NUM);
4923 		result = ISC_R_SUCCESS;
4924 	} else {
4925 		result = dns_rpz_ready(rbtdb->rpzs, &rbtdb->load_rpzs,
4926 				       rbtdb->rpz_num);
4927 	}
4928 	RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
4929 	return (result);
4930 }
4931 
4932 static isc_result_t
cache_find(dns_db_t * db,dns_name_t * name,dns_dbversion_t * version,dns_rdatatype_t type,unsigned int options,isc_stdtime_t now,dns_dbnode_t ** nodep,dns_name_t * foundname,dns_rdataset_t * rdataset,dns_rdataset_t * sigrdataset)4933 cache_find(dns_db_t *db, dns_name_t *name, dns_dbversion_t *version,
4934 	   dns_rdatatype_t type, unsigned int options, isc_stdtime_t now,
4935 	   dns_dbnode_t **nodep, dns_name_t *foundname,
4936 	   dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4937 {
4938 	dns_rbtnode_t *node = NULL;
4939 	isc_result_t result;
4940 	rbtdb_search_t search;
4941 	isc_boolean_t cname_ok = ISC_TRUE;
4942 	isc_boolean_t empty_node;
4943 	nodelock_t *lock;
4944 	isc_rwlocktype_t locktype;
4945 	rdatasetheader_t *header, *header_prev, *header_next;
4946 	rdatasetheader_t *found, *nsheader;
4947 	rdatasetheader_t *foundsig, *nssig, *cnamesig;
4948 	rdatasetheader_t *update, *updatesig;
4949 	rbtdb_rdatatype_t sigtype, negtype;
4950 
4951 	UNUSED(version);
4952 
4953 	search.rbtdb = (dns_rbtdb_t *)db;
4954 
4955 	REQUIRE(VALID_RBTDB(search.rbtdb));
4956 	REQUIRE(version == NULL);
4957 
4958 	if (now == 0)
4959 		isc_stdtime_get(&now);
4960 
4961 	search.rbtversion = NULL;
4962 	search.serial = 1;
4963 	search.options = options;
4964 	search.copy_name = ISC_FALSE;
4965 	search.need_cleanup = ISC_FALSE;
4966 	search.wild = ISC_FALSE;
4967 	search.zonecut = NULL;
4968 	dns_fixedname_init(&search.zonecut_name);
4969 	dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
4970 	search.now = now;
4971 	update = NULL;
4972 	updatesig = NULL;
4973 
4974 	RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4975 
4976 	/*
4977 	 * Search down from the root of the tree.  If, while going down, we
4978 	 * encounter a callback node, cache_zonecut_callback() will search the
4979 	 * rdatasets at the zone cut for a DNAME rdataset.
4980 	 */
4981 	result = dns_rbt_findnode(search.rbtdb->tree, name, foundname, &node,
4982 				  &search.chain, DNS_RBTFIND_EMPTYDATA,
4983 				  cache_zonecut_callback, &search);
4984 
4985 	if (result == DNS_R_PARTIALMATCH) {
4986 		if ((search.options & DNS_DBFIND_COVERINGNSEC) != 0) {
4987 			result = find_coveringnsec(&search, nodep, now,
4988 						   foundname, rdataset,
4989 						   sigrdataset);
4990 			if (result == DNS_R_COVERINGNSEC)
4991 				goto tree_exit;
4992 		}
4993 		if (search.zonecut != NULL) {
4994 		    result = setup_delegation(&search, nodep, foundname,
4995 					      rdataset, sigrdataset);
4996 		    goto tree_exit;
4997 		} else {
4998 		find_ns:
4999 			result = find_deepest_zonecut(&search, node, nodep,
5000 						      foundname, rdataset,
5001 						      sigrdataset);
5002 			goto tree_exit;
5003 		}
5004 	} else if (result != ISC_R_SUCCESS)
5005 		goto tree_exit;
5006 
5007 	/*
5008 	 * Certain DNSSEC types are not subject to CNAME matching
5009 	 * (RFC4035, section 2.5 and RFC3007).
5010 	 *
5011 	 * We don't check for RRSIG, because we don't store RRSIG records
5012 	 * directly.
5013 	 */
5014 	if (type == dns_rdatatype_key || type == dns_rdatatype_nsec)
5015 		cname_ok = ISC_FALSE;
5016 
5017 	/*
5018 	 * We now go looking for rdata...
5019 	 */
5020 
5021 	lock = &(search.rbtdb->node_locks[node->locknum].lock);
5022 	locktype = isc_rwlocktype_read;
5023 	NODE_LOCK(lock, locktype);
5024 
5025 	found = NULL;
5026 	foundsig = NULL;
5027 	sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
5028 	negtype = RBTDB_RDATATYPE_VALUE(0, type);
5029 	nsheader = NULL;
5030 	nssig = NULL;
5031 	cnamesig = NULL;
5032 	empty_node = ISC_TRUE;
5033 	header_prev = NULL;
5034 	for (header = node->data; header != NULL; header = header_next) {
5035 		header_next = header->next;
5036 		if (header->rdh_ttl <  now) {
5037 			/*
5038 			 * This rdataset is stale.  If no one else is using the
5039 			 * node, we can clean it up right now, otherwise we
5040 			 * mark it as stale, and the node as dirty, so it will
5041 			 * get cleaned up later.
5042 			 */
5043 			if ((header->rdh_ttl <  now - RBTDB_VIRTUAL) &&
5044 			    (locktype == isc_rwlocktype_write ||
5045 			     NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
5046 				/*
5047 				 * We update the node's status only when we
5048 				 * can get write access.
5049 				 */
5050 				locktype = isc_rwlocktype_write;
5051 
5052 				if (dns_rbtnode_refcurrent(node) == 0) {
5053 					isc_mem_t *mctx;
5054 
5055 					mctx = search.rbtdb->common.mctx;
5056 					clean_stale_headers(search.rbtdb, mctx,
5057 							    header);
5058 					if (header_prev != NULL)
5059 						header_prev->next =
5060 							header->next;
5061 					else
5062 						node->data = header->next;
5063 					free_rdataset(search.rbtdb, mctx,
5064 						      header);
5065 				} else {
5066 					mark_stale_header(search.rbtdb, header);
5067 					header_prev = header;
5068 				}
5069 			} else
5070 				header_prev = header;
5071 		} else if (EXISTS(header)) {
5072 			/*
5073 			 * We now know that there is at least one active
5074 			 * non-stale rdataset at this node.
5075 			 */
5076 			empty_node = ISC_FALSE;
5077 
5078 			/*
5079 			 * If we found a type we were looking for, remember
5080 			 * it.
5081 			 */
5082 			if (header->type == type ||
5083 			    (type == dns_rdatatype_any &&
5084 			     RBTDB_RDATATYPE_BASE(header->type) != 0) ||
5085 			    (cname_ok && header->type ==
5086 			     dns_rdatatype_cname)) {
5087 				/*
5088 				 * We've found the answer.
5089 				 */
5090 				found = header;
5091 				if (header->type == dns_rdatatype_cname &&
5092 				    cname_ok &&
5093 				    cnamesig != NULL) {
5094 					/*
5095 					 * If we've already got the
5096 					 * CNAME RRSIG, use it.
5097 					 */
5098 					foundsig = cnamesig;
5099 				}
5100 			} else if (header->type == sigtype) {
5101 				/*
5102 				 * We've found the RRSIG rdataset for our
5103 				 * target type.  Remember it.
5104 				 */
5105 				foundsig = header;
5106 			} else if (header->type == RBTDB_RDATATYPE_NCACHEANY ||
5107 				   header->type == negtype) {
5108 				/*
5109 				 * We've found a negative cache entry.
5110 				 */
5111 				found = header;
5112 			} else if (header->type == dns_rdatatype_ns) {
5113 				/*
5114 				 * Remember a NS rdataset even if we're
5115 				 * not specifically looking for it, because
5116 				 * we might need it later.
5117 				 */
5118 				nsheader = header;
5119 			} else if (header->type == RBTDB_RDATATYPE_SIGNS) {
5120 				/*
5121 				 * If we need the NS rdataset, we'll also
5122 				 * need its signature.
5123 				 */
5124 				nssig = header;
5125 			} else if (cname_ok &&
5126 				   header->type == RBTDB_RDATATYPE_SIGCNAME) {
5127 				/*
5128 				 * If we get a CNAME match, we'll also need
5129 				 * its signature.
5130 				 */
5131 				cnamesig = header;
5132 			}
5133 			header_prev = header;
5134 		} else
5135 			header_prev = header;
5136 	}
5137 
5138 	if (empty_node) {
5139 		/*
5140 		 * We have an exact match for the name, but there are no
5141 		 * extant rdatasets.  That means that this node doesn't
5142 		 * meaningfully exist, and that we really have a partial match.
5143 		 */
5144 		NODE_UNLOCK(lock, locktype);
5145 		goto find_ns;
5146 	}
5147 
5148 	/*
5149 	 * If we didn't find what we were looking for...
5150 	 */
5151 	if (found == NULL ||
5152 	    (DNS_TRUST_ADDITIONAL(found->trust) &&
5153 	     ((options & DNS_DBFIND_ADDITIONALOK) == 0)) ||
5154 	    (found->trust == dns_trust_glue &&
5155 	     ((options & DNS_DBFIND_GLUEOK) == 0)) ||
5156 	    (DNS_TRUST_PENDING(found->trust) &&
5157 	     ((options & DNS_DBFIND_PENDINGOK) == 0))) {
5158 		/*
5159 		 * If there is an NS rdataset at this node, then this is the
5160 		 * deepest zone cut.
5161 		 */
5162 		if (nsheader != NULL) {
5163 			if (nodep != NULL) {
5164 				new_reference(search.rbtdb, node);
5165 				INSIST(!ISC_LINK_LINKED(node, deadlink));
5166 				*nodep = node;
5167 			}
5168 			bind_rdataset(search.rbtdb, node, nsheader, search.now,
5169 				      rdataset);
5170 			if (need_headerupdate(nsheader, search.now))
5171 				update = nsheader;
5172 			if (nssig != NULL) {
5173 				bind_rdataset(search.rbtdb, node, nssig,
5174 					      search.now, sigrdataset);
5175 				if (need_headerupdate(nssig, search.now))
5176 					updatesig = nssig;
5177 			}
5178 			result = DNS_R_DELEGATION;
5179 			goto node_exit;
5180 		}
5181 
5182 		/*
5183 		 * Go find the deepest zone cut.
5184 		 */
5185 		NODE_UNLOCK(lock, locktype);
5186 		goto find_ns;
5187 	}
5188 
5189 	/*
5190 	 * We found what we were looking for, or we found a CNAME.
5191 	 */
5192 
5193 	if (nodep != NULL) {
5194 		new_reference(search.rbtdb, node);
5195 		INSIST(!ISC_LINK_LINKED(node, deadlink));
5196 		*nodep = node;
5197 	}
5198 
5199 	if (NEGATIVE(found)) {
5200 		/*
5201 		 * We found a negative cache entry.
5202 		 */
5203 		if (NXDOMAIN(found))
5204 			result = DNS_R_NCACHENXDOMAIN;
5205 		else
5206 			result = DNS_R_NCACHENXRRSET;
5207 	} else if (type != found->type &&
5208 		   type != dns_rdatatype_any &&
5209 		   found->type == dns_rdatatype_cname) {
5210 		/*
5211 		 * We weren't doing an ANY query and we found a CNAME instead
5212 		 * of the type we were looking for, so we need to indicate
5213 		 * that result to the caller.
5214 		 */
5215 		result = DNS_R_CNAME;
5216 	} else {
5217 		/*
5218 		 * An ordinary successful query!
5219 		 */
5220 		result = ISC_R_SUCCESS;
5221 	}
5222 
5223 	if (type != dns_rdatatype_any || result == DNS_R_NCACHENXDOMAIN ||
5224 	    result == DNS_R_NCACHENXRRSET) {
5225 		bind_rdataset(search.rbtdb, node, found, search.now,
5226 			      rdataset);
5227 		if (need_headerupdate(found, search.now))
5228 			update = found;
5229 		if (!NEGATIVE(found) && foundsig != NULL) {
5230 			bind_rdataset(search.rbtdb, node, foundsig, search.now,
5231 				      sigrdataset);
5232 			if (need_headerupdate(foundsig, search.now))
5233 				updatesig = foundsig;
5234 		}
5235 	}
5236 
5237  node_exit:
5238 	if ((update != NULL || updatesig != NULL) &&
5239 	    locktype != isc_rwlocktype_write) {
5240 		NODE_UNLOCK(lock, locktype);
5241 		NODE_LOCK(lock, isc_rwlocktype_write);
5242 		locktype = isc_rwlocktype_write;
5243 		POST(locktype);
5244 	}
5245 	if (update != NULL && need_headerupdate(update, search.now))
5246 		update_header(search.rbtdb, update, search.now);
5247 	if (updatesig != NULL && need_headerupdate(updatesig, search.now))
5248 		update_header(search.rbtdb, updatesig, search.now);
5249 
5250 	NODE_UNLOCK(lock, locktype);
5251 
5252  tree_exit:
5253 	RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
5254 
5255 	/*
5256 	 * If we found a zonecut but aren't going to use it, we have to
5257 	 * let go of it.
5258 	 */
5259 	if (search.need_cleanup) {
5260 		node = search.zonecut;
5261 		INSIST(node != NULL);
5262 		lock = &(search.rbtdb->node_locks[node->locknum].lock);
5263 
5264 		NODE_LOCK(lock, isc_rwlocktype_read);
5265 		decrement_reference(search.rbtdb, node, 0,
5266 				    isc_rwlocktype_read, isc_rwlocktype_none,
5267 				    ISC_FALSE);
5268 		NODE_UNLOCK(lock, isc_rwlocktype_read);
5269 	}
5270 
5271 	dns_rbtnodechain_reset(&search.chain);
5272 
5273 	update_cachestats(search.rbtdb, result);
5274 	return (result);
5275 }
5276 
5277 static isc_result_t
cache_findzonecut(dns_db_t * db,dns_name_t * name,unsigned int options,isc_stdtime_t now,dns_dbnode_t ** nodep,dns_name_t * foundname,dns_rdataset_t * rdataset,dns_rdataset_t * sigrdataset)5278 cache_findzonecut(dns_db_t *db, dns_name_t *name, unsigned int options,
5279 		  isc_stdtime_t now, dns_dbnode_t **nodep,
5280 		  dns_name_t *foundname,
5281 		  dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
5282 {
5283 	dns_rbtnode_t *node = NULL;
5284 	nodelock_t *lock;
5285 	isc_result_t result;
5286 	rbtdb_search_t search;
5287 	rdatasetheader_t *header, *header_prev, *header_next;
5288 	rdatasetheader_t *found, *foundsig;
5289 	unsigned int rbtoptions = DNS_RBTFIND_EMPTYDATA;
5290 	isc_rwlocktype_t locktype;
5291 
5292 	search.rbtdb = (dns_rbtdb_t *)db;
5293 
5294 	REQUIRE(VALID_RBTDB(search.rbtdb));
5295 
5296 	if (now == 0)
5297 		isc_stdtime_get(&now);
5298 
5299 	search.rbtversion = NULL;
5300 	search.serial = 1;
5301 	search.options = options;
5302 	search.copy_name = ISC_FALSE;
5303 	search.need_cleanup = ISC_FALSE;
5304 	search.wild = ISC_FALSE;
5305 	search.zonecut = NULL;
5306 	dns_fixedname_init(&search.zonecut_name);
5307 	dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
5308 	search.now = now;
5309 
5310 	if ((options & DNS_DBFIND_NOEXACT) != 0)
5311 		rbtoptions |= DNS_RBTFIND_NOEXACT;
5312 
5313 	RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
5314 
5315 	/*
5316 	 * Search down from the root of the tree.
5317 	 */
5318 	result = dns_rbt_findnode(search.rbtdb->tree, name, foundname, &node,
5319 				  &search.chain, rbtoptions, NULL, &search);
5320 
5321 	if (result == DNS_R_PARTIALMATCH) {
5322 	find_ns:
5323 		result = find_deepest_zonecut(&search, node, nodep, foundname,
5324 					      rdataset, sigrdataset);
5325 		goto tree_exit;
5326 	} else if (result != ISC_R_SUCCESS)
5327 		goto tree_exit;
5328 
5329 	/*
5330 	 * We now go looking for an NS rdataset at the node.
5331 	 */
5332 
5333 	lock = &(search.rbtdb->node_locks[node->locknum].lock);
5334 	locktype = isc_rwlocktype_read;
5335 	NODE_LOCK(lock, locktype);
5336 
5337 	found = NULL;
5338 	foundsig = NULL;
5339 	header_prev = NULL;
5340 	for (header = node->data; header != NULL; header = header_next) {
5341 		header_next = header->next;
5342 		if (header->rdh_ttl <  now) {
5343 			/*
5344 			 * This rdataset is stale.  If no one else is using the
5345 			 * node, we can clean it up right now, otherwise we
5346 			 * mark it as stale, and the node as dirty, so it will
5347 			 * get cleaned up later.
5348 			 */
5349 			if ((header->rdh_ttl < now - RBTDB_VIRTUAL) &&
5350 			    (locktype == isc_rwlocktype_write ||
5351 			     NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
5352 				/*
5353 				 * We update the node's status only when we
5354 				 * can get write access.
5355 				 */
5356 				locktype = isc_rwlocktype_write;
5357 
5358 				if (dns_rbtnode_refcurrent(node) == 0) {
5359 					isc_mem_t *mctx;
5360 
5361 					mctx = search.rbtdb->common.mctx;
5362 					clean_stale_headers(search.rbtdb, mctx,
5363 							    header);
5364 					if (header_prev != NULL)
5365 						header_prev->next =
5366 							header->next;
5367 					else
5368 						node->data = header->next;
5369 					free_rdataset(search.rbtdb, mctx,
5370 						      header);
5371 				} else {
5372 					mark_stale_header(search.rbtdb, header);
5373 					header_prev = header;
5374 				}
5375 			} else
5376 				header_prev = header;
5377 		} else if (EXISTS(header)) {
5378 			/*
5379 			 * If we found a type we were looking for, remember
5380 			 * it.
5381 			 */
5382 			if (header->type == dns_rdatatype_ns) {
5383 				/*
5384 				 * Remember a NS rdataset even if we're
5385 				 * not specifically looking for it, because
5386 				 * we might need it later.
5387 				 */
5388 				found = header;
5389 			} else if (header->type == RBTDB_RDATATYPE_SIGNS) {
5390 				/*
5391 				 * If we need the NS rdataset, we'll also
5392 				 * need its signature.
5393 				 */
5394 				foundsig = header;
5395 			}
5396 			header_prev = header;
5397 		} else
5398 			header_prev = header;
5399 	}
5400 
5401 	if (found == NULL) {
5402 		/*
5403 		 * No NS records here.
5404 		 */
5405 		NODE_UNLOCK(lock, locktype);
5406 		goto find_ns;
5407 	}
5408 
5409 	if (nodep != NULL) {
5410 		new_reference(search.rbtdb, node);
5411 		INSIST(!ISC_LINK_LINKED(node, deadlink));
5412 		*nodep = node;
5413 	}
5414 
5415 	bind_rdataset(search.rbtdb, node, found, search.now, rdataset);
5416 	if (foundsig != NULL)
5417 		bind_rdataset(search.rbtdb, node, foundsig, search.now,
5418 			      sigrdataset);
5419 
5420 	if (need_headerupdate(found, search.now) ||
5421 	    (foundsig != NULL &&  need_headerupdate(foundsig, search.now))) {
5422 		if (locktype != isc_rwlocktype_write) {
5423 			NODE_UNLOCK(lock, locktype);
5424 			NODE_LOCK(lock, isc_rwlocktype_write);
5425 			locktype = isc_rwlocktype_write;
5426 			POST(locktype);
5427 		}
5428 		if (need_headerupdate(found, search.now))
5429 			update_header(search.rbtdb, found, search.now);
5430 		if (foundsig != NULL &&
5431 		    need_headerupdate(foundsig, search.now)) {
5432 			update_header(search.rbtdb, foundsig, search.now);
5433 		}
5434 	}
5435 
5436 	NODE_UNLOCK(lock, locktype);
5437 
5438  tree_exit:
5439 	RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
5440 
5441 	INSIST(!search.need_cleanup);
5442 
5443 	dns_rbtnodechain_reset(&search.chain);
5444 
5445 	if (result == DNS_R_DELEGATION)
5446 		result = ISC_R_SUCCESS;
5447 
5448 	return (result);
5449 }
5450 
5451 static void
attachnode(dns_db_t * db,dns_dbnode_t * source,dns_dbnode_t ** targetp)5452 attachnode(dns_db_t *db, dns_dbnode_t *source, dns_dbnode_t **targetp) {
5453 	dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5454 	dns_rbtnode_t *node = (dns_rbtnode_t *)source;
5455 	unsigned int refs;
5456 
5457 	REQUIRE(VALID_RBTDB(rbtdb));
5458 	REQUIRE(targetp != NULL && *targetp == NULL);
5459 
5460 	NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
5461 	dns_rbtnode_refincrement(node, &refs);
5462 	INSIST(refs != 0);
5463 	NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
5464 
5465 	*targetp = source;
5466 }
5467 
5468 static void
detachnode(dns_db_t * db,dns_dbnode_t ** targetp)5469 detachnode(dns_db_t *db, dns_dbnode_t **targetp) {
5470 	dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5471 	dns_rbtnode_t *node;
5472 	isc_boolean_t want_free = ISC_FALSE;
5473 	isc_boolean_t inactive = ISC_FALSE;
5474 	rbtdb_nodelock_t *nodelock;
5475 
5476 	REQUIRE(VALID_RBTDB(rbtdb));
5477 	REQUIRE(targetp != NULL && *targetp != NULL);
5478 
5479 	node = (dns_rbtnode_t *)(*targetp);
5480 	nodelock = &rbtdb->node_locks[node->locknum];
5481 
5482 	NODE_LOCK(&nodelock->lock, isc_rwlocktype_read);
5483 
5484 	if (decrement_reference(rbtdb, node, 0, isc_rwlocktype_read,
5485 				isc_rwlocktype_none, ISC_FALSE)) {
5486 		if (isc_refcount_current(&nodelock->references) == 0 &&
5487 		    nodelock->exiting) {
5488 			inactive = ISC_TRUE;
5489 		}
5490 	}
5491 
5492 	NODE_UNLOCK(&nodelock->lock, isc_rwlocktype_read);
5493 
5494 	*targetp = NULL;
5495 
5496 	if (inactive) {
5497 		RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
5498 		rbtdb->active--;
5499 		if (rbtdb->active == 0)
5500 			want_free = ISC_TRUE;
5501 		RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
5502 		if (want_free) {
5503 			char buf[DNS_NAME_FORMATSIZE];
5504 			if (dns_name_dynamic(&rbtdb->common.origin))
5505 				dns_name_format(&rbtdb->common.origin, buf,
5506 						sizeof(buf));
5507 			else
5508 				strcpy(buf, "<UNKNOWN>");
5509 			isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
5510 				      DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
5511 				      "calling free_rbtdb(%s)", buf);
5512 			free_rbtdb(rbtdb, ISC_TRUE, NULL);
5513 		}
5514 	}
5515 }
5516 
5517 static isc_result_t
expirenode(dns_db_t * db,dns_dbnode_t * node,isc_stdtime_t now)5518 expirenode(dns_db_t *db, dns_dbnode_t *node, isc_stdtime_t now) {
5519 	dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5520 	dns_rbtnode_t *rbtnode = node;
5521 	rdatasetheader_t *header;
5522 	isc_boolean_t force_expire = ISC_FALSE;
5523 	/*
5524 	 * These are the category and module used by the cache cleaner.
5525 	 */
5526 	isc_boolean_t log = ISC_FALSE;
5527 	isc_logcategory_t *category = DNS_LOGCATEGORY_DATABASE;
5528 	isc_logmodule_t *module = DNS_LOGMODULE_CACHE;
5529 	int level = ISC_LOG_DEBUG(2);
5530 	char printname[DNS_NAME_FORMATSIZE];
5531 
5532 	REQUIRE(VALID_RBTDB(rbtdb));
5533 
5534 	/*
5535 	 * Caller must hold a tree lock.
5536 	 */
5537 
5538 	if (now == 0)
5539 		isc_stdtime_get(&now);
5540 
5541 	if (isc_mem_isovermem(rbtdb->common.mctx)) {
5542 		isc_uint32_t val;
5543 
5544 		isc_random_get(&val);
5545 		/*
5546 		 * XXXDCL Could stand to have a better policy, like LRU.
5547 		 */
5548 		force_expire = ISC_TF(rbtnode->down == NULL && val % 4 == 0);
5549 
5550 		/*
5551 		 * Note that 'log' can be true IFF overmem is also true.
5552 		 * overmem can currently only be true for cache
5553 		 * databases -- hence all of the "overmem cache" log strings.
5554 		 */
5555 		log = ISC_TF(isc_log_wouldlog(dns_lctx, level));
5556 		if (log)
5557 			isc_log_write(dns_lctx, category, module, level,
5558 				      "overmem cache: %s %s",
5559 				      force_expire ? "FORCE" : "check",
5560 				      dns_rbt_formatnodename(rbtnode,
5561 							   printname,
5562 							   sizeof(printname)));
5563 	}
5564 
5565 	/*
5566 	 * We may not need write access, but this code path is not performance
5567 	 * sensitive, so it should be okay to always lock as a writer.
5568 	 */
5569 	NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5570 		  isc_rwlocktype_write);
5571 
5572 	for (header = rbtnode->data; header != NULL; header = header->next)
5573 		if (header->rdh_ttl <= now - RBTDB_VIRTUAL) {
5574 			/*
5575 			 * We don't check if refcurrent(rbtnode) == 0 and try
5576 			 * to free like we do in cache_find(), because
5577 			 * refcurrent(rbtnode) must be non-zero.  This is so
5578 			 * because 'node' is an argument to the function.
5579 			 */
5580 			mark_stale_header(rbtdb, header);
5581 			if (log)
5582 				isc_log_write(dns_lctx, category, module,
5583 					      level, "overmem cache: stale %s",
5584 					      printname);
5585 		} else if (force_expire) {
5586 			if (! RETAIN(header)) {
5587 				set_ttl(rbtdb, header, 0);
5588 				mark_stale_header(rbtdb, header);
5589 			} else if (log) {
5590 				isc_log_write(dns_lctx, category, module,
5591 					      level, "overmem cache: "
5592 					      "reprieve by RETAIN() %s",
5593 					      printname);
5594 			}
5595 		} else if (isc_mem_isovermem(rbtdb->common.mctx) && log)
5596 			isc_log_write(dns_lctx, category, module, level,
5597 				      "overmem cache: saved %s", printname);
5598 
5599 	NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5600 		    isc_rwlocktype_write);
5601 
5602 	return (ISC_R_SUCCESS);
5603 }
5604 
5605 static void
overmem(dns_db_t * db,isc_boolean_t overmem)5606 overmem(dns_db_t *db, isc_boolean_t overmem) {
5607 	/* This is an empty callback.  See adb.c:water() */
5608 
5609 	UNUSED(db);
5610 	UNUSED(overmem);
5611 
5612 	return;
5613 }
5614 
5615 static void
printnode(dns_db_t * db,dns_dbnode_t * node,FILE * out)5616 printnode(dns_db_t *db, dns_dbnode_t *node, FILE *out) {
5617 	dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5618 	dns_rbtnode_t *rbtnode = node;
5619 	isc_boolean_t first;
5620 
5621 	REQUIRE(VALID_RBTDB(rbtdb));
5622 
5623 	NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5624 		  isc_rwlocktype_read);
5625 
5626 	fprintf(out, "node %p, %u references, locknum = %u\n",
5627 		rbtnode, dns_rbtnode_refcurrent(rbtnode),
5628 		rbtnode->locknum);
5629 	if (rbtnode->data != NULL) {
5630 		rdatasetheader_t *current, *top_next;
5631 
5632 		for (current = rbtnode->data; current != NULL;
5633 		     current = top_next) {
5634 			top_next = current->next;
5635 			first = ISC_TRUE;
5636 			fprintf(out, "\ttype %u", current->type);
5637 			do {
5638 				if (!first)
5639 					fprintf(out, "\t");
5640 				first = ISC_FALSE;
5641 				fprintf(out,
5642 					"\tserial = %lu, ttl = %u, "
5643 					"trust = %u, attributes = %u, "
5644 					"resign = %u\n",
5645 					(unsigned long)current->serial,
5646 					current->rdh_ttl,
5647 					current->trust,
5648 					current->attributes,
5649 					current->resign);
5650 				current = current->down;
5651 			} while (current != NULL);
5652 		}
5653 	} else
5654 		fprintf(out, "(empty)\n");
5655 
5656 	NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5657 		    isc_rwlocktype_read);
5658 }
5659 
5660 static isc_result_t
createiterator(dns_db_t * db,unsigned int options,dns_dbiterator_t ** iteratorp)5661 createiterator(dns_db_t *db, unsigned int options, dns_dbiterator_t **iteratorp)
5662 {
5663 	dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5664 	rbtdb_dbiterator_t *rbtdbiter;
5665 
5666 	REQUIRE(VALID_RBTDB(rbtdb));
5667 
5668 	rbtdbiter = isc_mem_get(rbtdb->common.mctx, sizeof(*rbtdbiter));
5669 	if (rbtdbiter == NULL)
5670 		return (ISC_R_NOMEMORY);
5671 
5672 	rbtdbiter->common.methods = &dbiterator_methods;
5673 	rbtdbiter->common.db = NULL;
5674 	dns_db_attach(db, &rbtdbiter->common.db);
5675 	rbtdbiter->common.relative_names =
5676 			ISC_TF((options & DNS_DB_RELATIVENAMES) != 0);
5677 	rbtdbiter->common.magic = DNS_DBITERATOR_MAGIC;
5678 	rbtdbiter->common.cleaning = ISC_FALSE;
5679 	rbtdbiter->paused = ISC_TRUE;
5680 	rbtdbiter->tree_locked = isc_rwlocktype_none;
5681 	rbtdbiter->result = ISC_R_SUCCESS;
5682 	dns_fixedname_init(&rbtdbiter->name);
5683 	dns_fixedname_init(&rbtdbiter->origin);
5684 	rbtdbiter->node = NULL;
5685 	rbtdbiter->delete = 0;
5686 	rbtdbiter->nsec3only = ISC_TF((options & DNS_DB_NSEC3ONLY) != 0);
5687 	rbtdbiter->nonsec3 = ISC_TF((options & DNS_DB_NONSEC3) != 0);
5688 	memset(rbtdbiter->deletions, 0, sizeof(rbtdbiter->deletions));
5689 	dns_rbtnodechain_init(&rbtdbiter->chain, db->mctx);
5690 	dns_rbtnodechain_init(&rbtdbiter->nsec3chain, db->mctx);
5691 	if (rbtdbiter->nsec3only)
5692 		rbtdbiter->current = &rbtdbiter->nsec3chain;
5693 	else
5694 		rbtdbiter->current = &rbtdbiter->chain;
5695 
5696 	*iteratorp = (dns_dbiterator_t *)rbtdbiter;
5697 
5698 	return (ISC_R_SUCCESS);
5699 }
5700 
5701 static isc_result_t
zone_findrdataset(dns_db_t * db,dns_dbnode_t * node,dns_dbversion_t * version,dns_rdatatype_t type,dns_rdatatype_t covers,isc_stdtime_t now,dns_rdataset_t * rdataset,dns_rdataset_t * sigrdataset)5702 zone_findrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5703 		  dns_rdatatype_t type, dns_rdatatype_t covers,
5704 		  isc_stdtime_t now, dns_rdataset_t *rdataset,
5705 		  dns_rdataset_t *sigrdataset)
5706 {
5707 	dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5708 	dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5709 	rdatasetheader_t *header, *header_next, *found, *foundsig;
5710 	rbtdb_serial_t serial;
5711 	rbtdb_version_t *rbtversion = version;
5712 	isc_boolean_t close_version = ISC_FALSE;
5713 	rbtdb_rdatatype_t matchtype, sigmatchtype;
5714 
5715 	REQUIRE(VALID_RBTDB(rbtdb));
5716 	REQUIRE(type != dns_rdatatype_any);
5717 	INSIST(rbtversion == NULL || rbtversion->rbtdb == rbtdb);
5718 
5719 	if (rbtversion == NULL) {
5720 		currentversion(db, (dns_dbversion_t **) (void *)(&rbtversion));
5721 		close_version = ISC_TRUE;
5722 	}
5723 	serial = rbtversion->serial;
5724 	now = 0;
5725 
5726 	NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5727 		  isc_rwlocktype_read);
5728 
5729 	found = NULL;
5730 	foundsig = NULL;
5731 	matchtype = RBTDB_RDATATYPE_VALUE(type, covers);
5732 	if (covers == 0)
5733 		sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
5734 	else
5735 		sigmatchtype = 0;
5736 
5737 	for (header = rbtnode->data; header != NULL; header = header_next) {
5738 		header_next = header->next;
5739 		do {
5740 			if (header->serial <= serial &&
5741 			    !IGNORE(header)) {
5742 				/*
5743 				 * Is this a "this rdataset doesn't
5744 				 * exist" record?
5745 				 */
5746 				if (NONEXISTENT(header))
5747 					header = NULL;
5748 				break;
5749 			} else
5750 				header = header->down;
5751 		} while (header != NULL);
5752 		if (header != NULL) {
5753 			/*
5754 			 * We have an active, extant rdataset.  If it's a
5755 			 * type we're looking for, remember it.
5756 			 */
5757 			if (header->type == matchtype) {
5758 				found = header;
5759 				if (foundsig != NULL)
5760 					break;
5761 			} else if (header->type == sigmatchtype) {
5762 				foundsig = header;
5763 				if (found != NULL)
5764 					break;
5765 			}
5766 		}
5767 	}
5768 	if (found != NULL) {
5769 		bind_rdataset(rbtdb, rbtnode, found, now, rdataset);
5770 		if (foundsig != NULL)
5771 			bind_rdataset(rbtdb, rbtnode, foundsig, now,
5772 				      sigrdataset);
5773 	}
5774 
5775 	NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5776 		    isc_rwlocktype_read);
5777 
5778 	if (close_version)
5779 		closeversion(db, (dns_dbversion_t **) (void *)(&rbtversion),
5780 			     ISC_FALSE);
5781 
5782 	if (found == NULL)
5783 		return (ISC_R_NOTFOUND);
5784 
5785 	return (ISC_R_SUCCESS);
5786 }
5787 
5788 static isc_result_t
cache_findrdataset(dns_db_t * db,dns_dbnode_t * node,dns_dbversion_t * version,dns_rdatatype_t type,dns_rdatatype_t covers,isc_stdtime_t now,dns_rdataset_t * rdataset,dns_rdataset_t * sigrdataset)5789 cache_findrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5790 		   dns_rdatatype_t type, dns_rdatatype_t covers,
5791 		   isc_stdtime_t now, dns_rdataset_t *rdataset,
5792 		   dns_rdataset_t *sigrdataset)
5793 {
5794 	dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5795 	dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5796 	rdatasetheader_t *header, *header_next, *found, *foundsig;
5797 	rbtdb_rdatatype_t matchtype, sigmatchtype, negtype;
5798 	isc_result_t result;
5799 	nodelock_t *lock;
5800 	isc_rwlocktype_t locktype;
5801 
5802 	REQUIRE(VALID_RBTDB(rbtdb));
5803 	REQUIRE(type != dns_rdatatype_any);
5804 
5805 	UNUSED(version);
5806 
5807 	result = ISC_R_SUCCESS;
5808 
5809 	if (now == 0)
5810 		isc_stdtime_get(&now);
5811 
5812 	lock = &rbtdb->node_locks[rbtnode->locknum].lock;
5813 	locktype = isc_rwlocktype_read;
5814 	NODE_LOCK(lock, locktype);
5815 
5816 	found = NULL;
5817 	foundsig = NULL;
5818 	matchtype = RBTDB_RDATATYPE_VALUE(type, covers);
5819 	negtype = RBTDB_RDATATYPE_VALUE(0, type);
5820 	if (covers == 0)
5821 		sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
5822 	else
5823 		sigmatchtype = 0;
5824 
5825 	for (header = rbtnode->data; header != NULL; header = header_next) {
5826 		header_next = header->next;
5827 		if (header->rdh_ttl < now) {
5828 			if ((header->rdh_ttl < now - RBTDB_VIRTUAL) &&
5829 			    (locktype == isc_rwlocktype_write ||
5830 			     NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
5831 				/*
5832 				 * We update the node's status only when we
5833 				 * can get write access.
5834 				 */
5835 				locktype = isc_rwlocktype_write;
5836 
5837 				/*
5838 				 * We don't check if refcurrent(rbtnode) == 0
5839 				 * and try to free like we do in cache_find(),
5840 				 * because refcurrent(rbtnode) must be
5841 				 * non-zero.  This is so because 'node' is an
5842 				 * argument to the function.
5843 				 */
5844 				mark_stale_header(rbtdb, header);
5845 			}
5846 		} else if (EXISTS(header)) {
5847 			if (header->type == matchtype)
5848 				found = header;
5849 			else if (header->type == RBTDB_RDATATYPE_NCACHEANY ||
5850 				 header->type == negtype)
5851 				found = header;
5852 			else if (header->type == sigmatchtype)
5853 				foundsig = header;
5854 		}
5855 	}
5856 	if (found != NULL) {
5857 		bind_rdataset(rbtdb, rbtnode, found, now, rdataset);
5858 		if (!NEGATIVE(found) && foundsig != NULL)
5859 			bind_rdataset(rbtdb, rbtnode, foundsig, now,
5860 				      sigrdataset);
5861 	}
5862 
5863 	NODE_UNLOCK(lock, locktype);
5864 
5865 	if (found == NULL)
5866 		return (ISC_R_NOTFOUND);
5867 
5868 	if (NEGATIVE(found)) {
5869 		/*
5870 		 * We found a negative cache entry.
5871 		 */
5872 		if (NXDOMAIN(found))
5873 			result = DNS_R_NCACHENXDOMAIN;
5874 		else
5875 			result = DNS_R_NCACHENXRRSET;
5876 	}
5877 
5878 	update_cachestats(rbtdb, result);
5879 
5880 	return (result);
5881 }
5882 
5883 static isc_result_t
allrdatasets(dns_db_t * db,dns_dbnode_t * node,dns_dbversion_t * version,isc_stdtime_t now,dns_rdatasetiter_t ** iteratorp)5884 allrdatasets(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5885 	     isc_stdtime_t now, dns_rdatasetiter_t **iteratorp)
5886 {
5887 	dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5888 	dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5889 	rbtdb_version_t *rbtversion = version;
5890 	rbtdb_rdatasetiter_t *iterator;
5891 	unsigned int refs;
5892 
5893 	REQUIRE(VALID_RBTDB(rbtdb));
5894 
5895 	iterator = isc_mem_get(rbtdb->common.mctx, sizeof(*iterator));
5896 	if (iterator == NULL)
5897 		return (ISC_R_NOMEMORY);
5898 
5899 	if ((db->attributes & DNS_DBATTR_CACHE) == 0) {
5900 		now = 0;
5901 		if (rbtversion == NULL)
5902 			currentversion(db,
5903 				 (dns_dbversion_t **) (void *)(&rbtversion));
5904 		else {
5905 			INSIST(rbtversion->rbtdb == rbtdb);
5906 
5907 			isc_refcount_increment(&rbtversion->references,
5908 					       &refs);
5909 			INSIST(refs > 1);
5910 		}
5911 	} else {
5912 		if (now == 0)
5913 			isc_stdtime_get(&now);
5914 		rbtversion = NULL;
5915 	}
5916 
5917 	iterator->common.magic = DNS_RDATASETITER_MAGIC;
5918 	iterator->common.methods = &rdatasetiter_methods;
5919 	iterator->common.db = db;
5920 	iterator->common.node = node;
5921 	iterator->common.version = (dns_dbversion_t *)rbtversion;
5922 	iterator->common.now = now;
5923 
5924 	NODE_STRONGLOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
5925 
5926 	dns_rbtnode_refincrement(rbtnode, &refs);
5927 	INSIST(refs != 0);
5928 
5929 	iterator->current = NULL;
5930 
5931 	NODE_STRONGUNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
5932 
5933 	*iteratorp = (dns_rdatasetiter_t *)iterator;
5934 
5935 	return (ISC_R_SUCCESS);
5936 }
5937 
5938 static isc_boolean_t
cname_and_other_data(dns_rbtnode_t * node,rbtdb_serial_t serial)5939 cname_and_other_data(dns_rbtnode_t *node, rbtdb_serial_t serial) {
5940 	rdatasetheader_t *header, *header_next;
5941 	isc_boolean_t cname, other_data;
5942 	dns_rdatatype_t rdtype;
5943 
5944 	/*
5945 	 * The caller must hold the node lock.
5946 	 */
5947 
5948 	/*
5949 	 * Look for CNAME and "other data" rdatasets active in our version.
5950 	 */
5951 	cname = ISC_FALSE;
5952 	other_data = ISC_FALSE;
5953 	for (header = node->data; header != NULL; header = header_next) {
5954 		header_next = header->next;
5955 		if (header->type == dns_rdatatype_cname) {
5956 			/*
5957 			 * Look for an active extant CNAME.
5958 			 */
5959 			do {
5960 				if (header->serial <= serial &&
5961 				    !IGNORE(header)) {
5962 					/*
5963 					 * Is this a "this rdataset doesn't
5964 					 * exist" record?
5965 					 */
5966 					if (NONEXISTENT(header))
5967 						header = NULL;
5968 					break;
5969 				} else
5970 					header = header->down;
5971 			} while (header != NULL);
5972 			if (header != NULL)
5973 				cname = ISC_TRUE;
5974 		} else {
5975 			/*
5976 			 * Look for active extant "other data".
5977 			 *
5978 			 * "Other data" is any rdataset whose type is not
5979 			 * KEY, NSEC, SIG or RRSIG.
5980 			 */
5981 			rdtype = RBTDB_RDATATYPE_BASE(header->type);
5982 			if (rdtype != dns_rdatatype_key &&
5983 			    rdtype != dns_rdatatype_sig &&
5984 			    rdtype != dns_rdatatype_nsec &&
5985 			    rdtype != dns_rdatatype_rrsig) {
5986 				/*
5987 				 * Is it active and extant?
5988 				 */
5989 				do {
5990 					if (header->serial <= serial &&
5991 					    !IGNORE(header)) {
5992 						/*
5993 						 * Is this a "this rdataset
5994 						 * doesn't exist" record?
5995 						 */
5996 						if (NONEXISTENT(header))
5997 							header = NULL;
5998 						break;
5999 					} else
6000 						header = header->down;
6001 				} while (header != NULL);
6002 				if (header != NULL)
6003 					other_data = ISC_TRUE;
6004 			}
6005 		}
6006 	}
6007 
6008 	if (cname && other_data)
6009 		return (ISC_TRUE);
6010 
6011 	return (ISC_FALSE);
6012 }
6013 
6014 static isc_result_t
resign_insert(dns_rbtdb_t * rbtdb,int idx,rdatasetheader_t * newheader)6015 resign_insert(dns_rbtdb_t *rbtdb, int idx, rdatasetheader_t *newheader) {
6016 	isc_result_t result;
6017 
6018 	INSIST(!IS_CACHE(rbtdb));
6019 	INSIST(newheader->heap_index == 0);
6020 	INSIST(!ISC_LINK_LINKED(newheader, link));
6021 
6022 	result = isc_heap_insert(rbtdb->heaps[idx], newheader);
6023 	return (result);
6024 }
6025 
6026 static void
resign_delete(dns_rbtdb_t * rbtdb,rbtdb_version_t * version,rdatasetheader_t * header)6027 resign_delete(dns_rbtdb_t *rbtdb, rbtdb_version_t *version,
6028 	      rdatasetheader_t *header)
6029 {
6030 	/*
6031 	 * Remove the old header from the heap
6032 	 */
6033 	if (header != NULL && header->heap_index != 0) {
6034 		isc_heap_delete(rbtdb->heaps[header->node->locknum],
6035 				header->heap_index);
6036 		header->heap_index = 0;
6037 		if (version != NULL) {
6038 			new_reference(rbtdb, header->node);
6039 			ISC_LIST_APPEND(version->resigned_list, header, link);
6040 		}
6041 	}
6042 }
6043 
6044 static isc_result_t
add32(dns_rbtdb_t * rbtdb,dns_rbtnode_t * rbtnode,rbtdb_version_t * rbtversion,rdatasetheader_t * newheader,unsigned int options,isc_boolean_t loading,dns_rdataset_t * addedrdataset,isc_stdtime_t now)6045 add32(dns_rbtdb_t *rbtdb, dns_rbtnode_t *rbtnode, rbtdb_version_t *rbtversion,
6046       rdatasetheader_t *newheader, unsigned int options, isc_boolean_t loading,
6047       dns_rdataset_t *addedrdataset, isc_stdtime_t now)
6048 {
6049 	rbtdb_changed_t *changed = NULL;
6050 	rdatasetheader_t *topheader, *topheader_prev, *header, *sigheader;
6051 	unsigned char *merged;
6052 	isc_result_t result;
6053 	isc_boolean_t header_nx;
6054 	isc_boolean_t newheader_nx;
6055 	isc_boolean_t merge;
6056 	dns_rdatatype_t rdtype, covers;
6057 	rbtdb_rdatatype_t negtype, sigtype;
6058 	dns_trust_t trust;
6059 	int idx;
6060 
6061 	/*
6062 	 * Add an rdatasetheader_t to a node.
6063 	 */
6064 
6065 	/*
6066 	 * Caller must be holding the node lock.
6067 	 */
6068 
6069 	if ((options & DNS_DBADD_MERGE) != 0) {
6070 		REQUIRE(rbtversion != NULL);
6071 		merge = ISC_TRUE;
6072 	} else
6073 		merge = ISC_FALSE;
6074 
6075 	if ((options & DNS_DBADD_FORCE) != 0)
6076 		trust = dns_trust_ultimate;
6077 	else
6078 		trust = newheader->trust;
6079 
6080 	if (rbtversion != NULL && !loading) {
6081 		/*
6082 		 * We always add a changed record, even if no changes end up
6083 		 * being made to this node, because it's harmless and
6084 		 * simplifies the code.
6085 		 */
6086 		changed = add_changed(rbtdb, rbtversion, rbtnode);
6087 		if (changed == NULL) {
6088 			free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6089 			return (ISC_R_NOMEMORY);
6090 		}
6091 	}
6092 
6093 	newheader_nx = NONEXISTENT(newheader) ? ISC_TRUE : ISC_FALSE;
6094 	topheader_prev = NULL;
6095 	sigheader = NULL;
6096 	negtype = 0;
6097 	if (rbtversion == NULL && !newheader_nx) {
6098 		rdtype = RBTDB_RDATATYPE_BASE(newheader->type);
6099 		covers = RBTDB_RDATATYPE_EXT(newheader->type);
6100 		sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, covers);
6101 		if (NEGATIVE(newheader)) {
6102 			/*
6103 			 * We're adding a negative cache entry.
6104 			 */
6105 			for (topheader = rbtnode->data;
6106 			     topheader != NULL;
6107 			     topheader = topheader->next) {
6108 				/*
6109 				 * If we're adding an negative cache entry
6110 				 * which covers all types (NXDOMAIN,
6111 				 * NODATA(QTYPE=ANY)).
6112 				 *
6113 				 * We make all other data stale so that the
6114 				 * only rdataset that can be found at this
6115 				 * node is the negative cache entry.
6116 				 *
6117 				 * Otherwise look for any RRSIGs of the
6118 				 * given type so they can be marked stale
6119 				 * later.
6120 				 */
6121 				if (covers == dns_rdatatype_any) {
6122 					set_ttl(rbtdb, topheader, 0);
6123 					mark_stale_header(rbtdb, topheader);
6124 				} else if (topheader->type == sigtype)
6125 					sigheader = topheader;
6126 			}
6127 			if (covers == dns_rdatatype_any)
6128 				goto find_header;
6129 			negtype = RBTDB_RDATATYPE_VALUE(covers, 0);
6130 		} else {
6131 			/*
6132 			 * We're adding something that isn't a
6133 			 * negative cache entry.  Look for an extant
6134 			 * non-stale NXDOMAIN/NODATA(QTYPE=ANY) negative
6135 			 * cache entry.  If we're adding an RRSIG, also
6136 			 * check for an extant non-stale NODATA ncache
6137 			 * entry which covers the same type as the RRSIG.
6138 			 */
6139 			for (topheader = rbtnode->data;
6140 			     topheader != NULL;
6141 			     topheader = topheader->next) {
6142 				if ((topheader->type ==
6143 					RBTDB_RDATATYPE_NCACHEANY) ||
6144 					(newheader->type == sigtype &&
6145 					topheader->type ==
6146 					RBTDB_RDATATYPE_VALUE(0, covers))) {
6147 						break;
6148 					}
6149 			}
6150 			if (topheader != NULL && EXISTS(topheader) &&
6151 			    topheader->rdh_ttl >= now) {
6152 				/*
6153 				 * Found one.
6154 				 */
6155 				if (trust < topheader->trust) {
6156 					/*
6157 					 * The NXDOMAIN/NODATA(QTYPE=ANY)
6158 					 * is more trusted.
6159 					 */
6160 					free_rdataset(rbtdb,
6161 						      rbtdb->common.mctx,
6162 						      newheader);
6163 					if (addedrdataset != NULL)
6164 						bind_rdataset(rbtdb, rbtnode,
6165 							      topheader, now,
6166 							      addedrdataset);
6167 					return (DNS_R_UNCHANGED);
6168 				}
6169 				/*
6170 				 * The new rdataset is better.  Expire the
6171 				 * ncache entry.
6172 				 */
6173 				set_ttl(rbtdb, topheader, 0);
6174 				mark_stale_header(rbtdb, topheader);
6175 				topheader = NULL;
6176 				goto find_header;
6177 			}
6178 			negtype = RBTDB_RDATATYPE_VALUE(0, rdtype);
6179 		}
6180 	}
6181 
6182 	for (topheader = rbtnode->data;
6183 	     topheader != NULL;
6184 	     topheader = topheader->next) {
6185 		if (topheader->type == newheader->type ||
6186 		    topheader->type == negtype)
6187 			break;
6188 		topheader_prev = topheader;
6189 	}
6190 
6191  find_header:
6192 	/*
6193 	 * If header isn't NULL, we've found the right type.  There may be
6194 	 * IGNORE rdatasets between the top of the chain and the first real
6195 	 * data.  We skip over them.
6196 	 */
6197 	header = topheader;
6198 	while (header != NULL && IGNORE(header))
6199 		header = header->down;
6200 	if (header != NULL) {
6201 		header_nx = NONEXISTENT(header) ? ISC_TRUE : ISC_FALSE;
6202 
6203 		/*
6204 		 * Deleting an already non-existent rdataset has no effect.
6205 		 */
6206 		if (header_nx && newheader_nx) {
6207 			free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6208 			return (DNS_R_UNCHANGED);
6209 		}
6210 
6211 		/*
6212 		 * Trying to add an rdataset with lower trust to a cache DB
6213 		 * has no effect, provided that the cache data isn't stale.
6214 		 */
6215 		if (rbtversion == NULL && trust < header->trust &&
6216 		    (header->rdh_ttl >= now || header_nx)) {
6217 			free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6218 			if (addedrdataset != NULL)
6219 				bind_rdataset(rbtdb, rbtnode, header, now,
6220 					      addedrdataset);
6221 			return (DNS_R_UNCHANGED);
6222 		}
6223 
6224 		/*
6225 		 * Don't merge if a nonexistent rdataset is involved.
6226 		 */
6227 		if (merge && (header_nx || newheader_nx))
6228 			merge = ISC_FALSE;
6229 
6230 		/*
6231 		 * If 'merge' is ISC_TRUE, we'll try to create a new rdataset
6232 		 * that is the union of 'newheader' and 'header'.
6233 		 */
6234 		if (merge) {
6235 			unsigned int flags = 0;
6236 			INSIST(rbtversion->serial >= header->serial);
6237 			merged = NULL;
6238 			result = ISC_R_SUCCESS;
6239 
6240 			if ((options & DNS_DBADD_EXACT) != 0)
6241 				flags |= DNS_RDATASLAB_EXACT;
6242 			if ((options & DNS_DBADD_EXACTTTL) != 0 &&
6243 			     newheader->rdh_ttl != header->rdh_ttl)
6244 					result = DNS_R_NOTEXACT;
6245 			else if (newheader->rdh_ttl != header->rdh_ttl)
6246 				flags |= DNS_RDATASLAB_FORCE;
6247 			if (result == ISC_R_SUCCESS)
6248 				result = dns_rdataslab_merge(
6249 					     (unsigned char *)header,
6250 					     (unsigned char *)newheader,
6251 					     (unsigned int)(sizeof(*newheader)),
6252 					     rbtdb->common.mctx,
6253 					     rbtdb->common.rdclass,
6254 					     (dns_rdatatype_t)header->type,
6255 					     flags, &merged);
6256 			if (result == ISC_R_SUCCESS) {
6257 				/*
6258 				 * If 'header' has the same serial number as
6259 				 * we do, we could clean it up now if we knew
6260 				 * that our caller had no references to it.
6261 				 * We don't know this, however, so we leave it
6262 				 * alone.  It will get cleaned up when
6263 				 * clean_zone_node() runs.
6264 				 */
6265 				free_rdataset(rbtdb, rbtdb->common.mctx,
6266 					      newheader);
6267 				newheader = (rdatasetheader_t *)merged;
6268 				init_rdataset(rbtdb, newheader);
6269 				update_newheader(newheader, header);
6270 				if (loading && RESIGN(newheader) &&
6271 				    RESIGN(header) &&
6272 				    header->resign < newheader->resign)
6273 					newheader->resign = header->resign;
6274 			} else {
6275 				free_rdataset(rbtdb, rbtdb->common.mctx,
6276 					      newheader);
6277 				return (result);
6278 			}
6279 		}
6280 		/*
6281 		 * Don't replace existing NS, A and AAAA RRsets
6282 		 * in the cache if they are already exist.  This
6283 		 * prevents named being locked to old servers.
6284 		 * Don't lower trust of existing record if the
6285 		 * update is forced.
6286 		 */
6287 		if (IS_CACHE(rbtdb) && header->rdh_ttl >= now &&
6288 		    header->type == dns_rdatatype_ns &&
6289 		    !header_nx && !newheader_nx &&
6290 		    header->trust >= newheader->trust &&
6291 		    dns_rdataslab_equalx((unsigned char *)header,
6292 					 (unsigned char *)newheader,
6293 					 (unsigned int)(sizeof(*newheader)),
6294 					 rbtdb->common.rdclass,
6295 					 (dns_rdatatype_t)header->type)) {
6296 			/*
6297 			 * Honour the new ttl if it is less than the
6298 			 * older one.
6299 			 */
6300 			if (header->rdh_ttl > newheader->rdh_ttl)
6301 				set_ttl(rbtdb, header, newheader->rdh_ttl);
6302 			if (header->noqname == NULL &&
6303 			    newheader->noqname != NULL) {
6304 				header->noqname = newheader->noqname;
6305 				newheader->noqname = NULL;
6306 			}
6307 			if (header->closest == NULL &&
6308 			    newheader->closest != NULL) {
6309 				header->closest = newheader->closest;
6310 				newheader->closest = NULL;
6311 			}
6312 			free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6313 			if (addedrdataset != NULL)
6314 				bind_rdataset(rbtdb, rbtnode, header, now,
6315 					      addedrdataset);
6316 			return (ISC_R_SUCCESS);
6317 		}
6318 		/*
6319 		 * If we have will be replacing a NS RRset force its TTL
6320 		 * to be no more than the current NS RRset's TTL.  This
6321 		 * ensures the delegations that are withdrawn are honoured.
6322 		 */
6323 		if (IS_CACHE(rbtdb) && header->rdh_ttl >= now &&
6324 		    header->type == dns_rdatatype_ns &&
6325 		    !header_nx && !newheader_nx &&
6326 		    header->trust <= newheader->trust) {
6327 			if (newheader->rdh_ttl > header->rdh_ttl) {
6328 				newheader->rdh_ttl = header->rdh_ttl;
6329 			}
6330 		}
6331 		if (IS_CACHE(rbtdb) && header->rdh_ttl >= now &&
6332 		    (options & DNS_DBADD_PREFETCH) == 0 &&
6333 		    (header->type == dns_rdatatype_a ||
6334 		     header->type == dns_rdatatype_aaaa ||
6335 		     header->type == dns_rdatatype_ds ||
6336 		     header->type == RBTDB_RDATATYPE_SIGDDS) &&
6337 		    !header_nx && !newheader_nx &&
6338 		    header->trust >= newheader->trust &&
6339 		    dns_rdataslab_equal((unsigned char *)header,
6340 					(unsigned char *)newheader,
6341 					(unsigned int)(sizeof(*newheader)))) {
6342 			/*
6343 			 * Honour the new ttl if it is less than the
6344 			 * older one.
6345 			 */
6346 			if (header->rdh_ttl > newheader->rdh_ttl)
6347 				set_ttl(rbtdb, header, newheader->rdh_ttl);
6348 			if (header->noqname == NULL &&
6349 			    newheader->noqname != NULL) {
6350 				header->noqname = newheader->noqname;
6351 				newheader->noqname = NULL;
6352 			}
6353 			if (header->closest == NULL &&
6354 			    newheader->closest != NULL) {
6355 				header->closest = newheader->closest;
6356 				newheader->closest = NULL;
6357 			}
6358 			free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6359 			if (addedrdataset != NULL)
6360 				bind_rdataset(rbtdb, rbtnode, header, now,
6361 					      addedrdataset);
6362 			return (ISC_R_SUCCESS);
6363 		}
6364 		INSIST(rbtversion == NULL ||
6365 		       rbtversion->serial >= topheader->serial);
6366 		if (topheader_prev != NULL)
6367 			topheader_prev->next = newheader;
6368 		else
6369 			rbtnode->data = newheader;
6370 		newheader->next = topheader->next;
6371 		if (loading) {
6372 			/*
6373 			 * There are no other references to 'header' when
6374 			 * loading, so we MAY clean up 'header' now.
6375 			 * Since we don't generate changed records when
6376 			 * loading, we MUST clean up 'header' now.
6377 			 */
6378 			newheader->down = NULL;
6379 			free_rdataset(rbtdb, rbtdb->common.mctx, header);
6380 
6381 			idx = newheader->node->locknum;
6382 			if (IS_CACHE(rbtdb)) {
6383 				ISC_LIST_PREPEND(rbtdb->rdatasets[idx],
6384 						 newheader, link);
6385 				INSIST(rbtdb->heaps != NULL);
6386 				(void)isc_heap_insert(rbtdb->heaps[idx],
6387 						      newheader);
6388 			} else if (RESIGN(newheader)) {
6389 				result = resign_insert(rbtdb, idx, newheader);
6390 				if (result != ISC_R_SUCCESS)
6391 					return (result);
6392 			}
6393 		} else {
6394 			newheader->down = topheader;
6395 			topheader->next = newheader;
6396 			rbtnode->dirty = 1;
6397 			if (changed != NULL)
6398 				changed->dirty = ISC_TRUE;
6399 			if (rbtversion == NULL) {
6400 				set_ttl(rbtdb, header, 0);
6401 				mark_stale_header(rbtdb, header);
6402 				if (sigheader != NULL) {
6403 					set_ttl(rbtdb, sigheader, 0);
6404 					mark_stale_header(rbtdb, sigheader);
6405 				}
6406 			}
6407 			idx = newheader->node->locknum;
6408 			if (IS_CACHE(rbtdb)) {
6409 				ISC_LIST_PREPEND(rbtdb->rdatasets[idx],
6410 						 newheader, link);
6411 				/*
6412 				 * XXXMLG We don't check the return value
6413 				 * here.  If it fails, we will not do TTL
6414 				 * based expiry on this node.  However, we
6415 				 * will do it on the LRU side, so memory
6416 				 * will not leak... for long.
6417 				 */
6418 				INSIST(rbtdb->heaps != NULL);
6419 				(void)isc_heap_insert(rbtdb->heaps[idx],
6420 						      newheader);
6421 			} else if (RESIGN(newheader)) {
6422 				resign_delete(rbtdb, rbtversion, header);
6423 				result = resign_insert(rbtdb, idx, newheader);
6424 				if (result != ISC_R_SUCCESS)
6425 					return (result);
6426 			}
6427 		}
6428 	} else {
6429 		/*
6430 		 * No non-IGNORED rdatasets of the given type exist at
6431 		 * this node.
6432 		 */
6433 
6434 		/*
6435 		 * If we're trying to delete the type, don't bother.
6436 		 */
6437 		if (newheader_nx) {
6438 			free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6439 			return (DNS_R_UNCHANGED);
6440 		}
6441 
6442 		if (topheader != NULL) {
6443 			/*
6444 			 * We have an list of rdatasets of the given type,
6445 			 * but they're all marked IGNORE.  We simply insert
6446 			 * the new rdataset at the head of the list.
6447 			 *
6448 			 * Ignored rdatasets cannot occur during loading, so
6449 			 * we INSIST on it.
6450 			 */
6451 			INSIST(!loading);
6452 			INSIST(rbtversion == NULL ||
6453 			       rbtversion->serial >= topheader->serial);
6454 			if (topheader_prev != NULL)
6455 				topheader_prev->next = newheader;
6456 			else
6457 				rbtnode->data = newheader;
6458 			newheader->next = topheader->next;
6459 			newheader->down = topheader;
6460 			topheader->next = newheader;
6461 			rbtnode->dirty = 1;
6462 			if (changed != NULL)
6463 				changed->dirty = ISC_TRUE;
6464 		} else {
6465 			/*
6466 			 * No rdatasets of the given type exist at the node.
6467 			 */
6468 			newheader->next = rbtnode->data;
6469 			newheader->down = NULL;
6470 			rbtnode->data = newheader;
6471 		}
6472 		idx = newheader->node->locknum;
6473 		if (IS_CACHE(rbtdb)) {
6474 			ISC_LIST_PREPEND(rbtdb->rdatasets[idx],
6475 					 newheader, link);
6476 			isc_heap_insert(rbtdb->heaps[idx], newheader);
6477 		} else if (RESIGN(newheader)) {
6478 			resign_delete(rbtdb, rbtversion, header);
6479 			result = resign_insert(rbtdb, idx, newheader);
6480 			if (result != ISC_R_SUCCESS)
6481 				return (result);
6482 		}
6483 	}
6484 
6485 	/*
6486 	 * Check if the node now contains CNAME and other data.
6487 	 */
6488 	if (rbtversion != NULL &&
6489 	    cname_and_other_data(rbtnode, rbtversion->serial))
6490 		return (DNS_R_CNAMEANDOTHER);
6491 
6492 	if (addedrdataset != NULL)
6493 		bind_rdataset(rbtdb, rbtnode, newheader, now, addedrdataset);
6494 
6495 	return (ISC_R_SUCCESS);
6496 }
6497 
6498 static inline isc_boolean_t
delegating_type(dns_rbtdb_t * rbtdb,dns_rbtnode_t * node,rbtdb_rdatatype_t type)6499 delegating_type(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
6500 		rbtdb_rdatatype_t type)
6501 {
6502 	if (IS_CACHE(rbtdb)) {
6503 		if (type == dns_rdatatype_dname)
6504 			return (ISC_TRUE);
6505 		else
6506 			return (ISC_FALSE);
6507 	} else if (type == dns_rdatatype_dname ||
6508 		   (type == dns_rdatatype_ns &&
6509 		    (node != rbtdb->origin_node || IS_STUB(rbtdb))))
6510 		return (ISC_TRUE);
6511 	return (ISC_FALSE);
6512 }
6513 
6514 static inline isc_result_t
addnoqname(dns_rbtdb_t * rbtdb,rdatasetheader_t * newheader,dns_rdataset_t * rdataset)6515 addnoqname(dns_rbtdb_t *rbtdb, rdatasetheader_t *newheader,
6516 	   dns_rdataset_t *rdataset)
6517 {
6518 	struct noqname *noqname;
6519 	isc_mem_t *mctx = rbtdb->common.mctx;
6520 	dns_name_t name;
6521 	dns_rdataset_t neg, negsig;
6522 	isc_result_t result;
6523 	isc_region_t r;
6524 
6525 	dns_name_init(&name, NULL);
6526 	dns_rdataset_init(&neg);
6527 	dns_rdataset_init(&negsig);
6528 
6529 	result = dns_rdataset_getnoqname(rdataset, &name, &neg, &negsig);
6530 	RUNTIME_CHECK(result == ISC_R_SUCCESS);
6531 
6532 	noqname = isc_mem_get(mctx, sizeof(*noqname));
6533 	if (noqname == NULL) {
6534 		result = ISC_R_NOMEMORY;
6535 		goto cleanup;
6536 	}
6537 	dns_name_init(&noqname->name, NULL);
6538 	noqname->neg = NULL;
6539 	noqname->negsig = NULL;
6540 	noqname->type = neg.type;
6541 	result = dns_name_dup(&name, mctx, &noqname->name);
6542 	if (result != ISC_R_SUCCESS)
6543 		goto cleanup;
6544 	result = dns_rdataslab_fromrdataset(&neg, mctx, &r, 0);
6545 	if (result != ISC_R_SUCCESS)
6546 		goto cleanup;
6547 	noqname->neg = r.base;
6548 	result = dns_rdataslab_fromrdataset(&negsig, mctx, &r, 0);
6549 	if (result != ISC_R_SUCCESS)
6550 		goto cleanup;
6551 	noqname->negsig = r.base;
6552 	dns_rdataset_disassociate(&neg);
6553 	dns_rdataset_disassociate(&negsig);
6554 	newheader->noqname = noqname;
6555 	return (ISC_R_SUCCESS);
6556 
6557 cleanup:
6558 	dns_rdataset_disassociate(&neg);
6559 	dns_rdataset_disassociate(&negsig);
6560 	if (noqname != NULL)
6561 		free_noqname(mctx, &noqname);
6562 	return(result);
6563 }
6564 
6565 static inline isc_result_t
addclosest(dns_rbtdb_t * rbtdb,rdatasetheader_t * newheader,dns_rdataset_t * rdataset)6566 addclosest(dns_rbtdb_t *rbtdb, rdatasetheader_t *newheader,
6567 	   dns_rdataset_t *rdataset)
6568 {
6569 	struct noqname *closest;
6570 	isc_mem_t *mctx = rbtdb->common.mctx;
6571 	dns_name_t name;
6572 	dns_rdataset_t neg, negsig;
6573 	isc_result_t result;
6574 	isc_region_t r;
6575 
6576 	dns_name_init(&name, NULL);
6577 	dns_rdataset_init(&neg);
6578 	dns_rdataset_init(&negsig);
6579 
6580 	result = dns_rdataset_getclosest(rdataset, &name, &neg, &negsig);
6581 	RUNTIME_CHECK(result == ISC_R_SUCCESS);
6582 
6583 	closest = isc_mem_get(mctx, sizeof(*closest));
6584 	if (closest == NULL) {
6585 		result = ISC_R_NOMEMORY;
6586 		goto cleanup;
6587 	}
6588 	dns_name_init(&closest->name, NULL);
6589 	closest->neg = NULL;
6590 	closest->negsig = NULL;
6591 	closest->type = neg.type;
6592 	result = dns_name_dup(&name, mctx, &closest->name);
6593 	if (result != ISC_R_SUCCESS)
6594 		goto cleanup;
6595 	result = dns_rdataslab_fromrdataset(&neg, mctx, &r, 0);
6596 	if (result != ISC_R_SUCCESS)
6597 		goto cleanup;
6598 	closest->neg = r.base;
6599 	result = dns_rdataslab_fromrdataset(&negsig, mctx, &r, 0);
6600 	if (result != ISC_R_SUCCESS)
6601 		goto cleanup;
6602 	closest->negsig = r.base;
6603 	dns_rdataset_disassociate(&neg);
6604 	dns_rdataset_disassociate(&negsig);
6605 	newheader->closest = closest;
6606 	return (ISC_R_SUCCESS);
6607 
6608  cleanup:
6609 	dns_rdataset_disassociate(&neg);
6610 	dns_rdataset_disassociate(&negsig);
6611 	if (closest != NULL)
6612 		free_noqname(mctx, &closest);
6613 	return(result);
6614 }
6615 
6616 static dns_dbmethods_t zone_methods;
6617 
6618 static isc_result_t
addrdataset(dns_db_t * db,dns_dbnode_t * node,dns_dbversion_t * version,isc_stdtime_t now,dns_rdataset_t * rdataset,unsigned int options,dns_rdataset_t * addedrdataset)6619 addrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
6620 	    isc_stdtime_t now, dns_rdataset_t *rdataset, unsigned int options,
6621 	    dns_rdataset_t *addedrdataset)
6622 {
6623 	dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6624 	dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
6625 	rbtdb_version_t *rbtversion = version;
6626 	isc_region_t region;
6627 	rdatasetheader_t *newheader;
6628 	rdatasetheader_t *header;
6629 	isc_result_t result;
6630 	isc_boolean_t delegating;
6631 	isc_boolean_t newnsec;
6632 	isc_boolean_t tree_locked = ISC_FALSE;
6633 	isc_boolean_t cache_is_overmem = ISC_FALSE;
6634 
6635 	REQUIRE(VALID_RBTDB(rbtdb));
6636 	INSIST(rbtversion == NULL || rbtversion->rbtdb == rbtdb);
6637 
6638 	if (rbtdb->common.methods == &zone_methods)
6639 		REQUIRE(((rbtnode->nsec == DNS_RBT_NSEC_NSEC3 &&
6640 			  (rdataset->type == dns_rdatatype_nsec3 ||
6641 			   rdataset->covers == dns_rdatatype_nsec3)) ||
6642 			 (rbtnode->nsec != DNS_RBT_NSEC_NSEC3 &&
6643 			   rdataset->type != dns_rdatatype_nsec3 &&
6644 			   rdataset->covers != dns_rdatatype_nsec3)));
6645 
6646 	if (rbtversion == NULL) {
6647 		if (now == 0)
6648 			isc_stdtime_get(&now);
6649 	} else
6650 		now = 0;
6651 
6652 	result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
6653 					    &region, sizeof(rdatasetheader_t));
6654 	if (result != ISC_R_SUCCESS)
6655 		return (result);
6656 
6657 	newheader = (rdatasetheader_t *)region.base;
6658 	init_rdataset(rbtdb, newheader);
6659 	set_ttl(rbtdb, newheader, rdataset->ttl + now);
6660 	newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
6661 						rdataset->covers);
6662 	newheader->attributes = 0;
6663 	newheader->noqname = NULL;
6664 	newheader->closest = NULL;
6665 	newheader->count = init_count++;
6666 	newheader->trust = rdataset->trust;
6667 	newheader->additional_auth = NULL;
6668 	newheader->additional_glue = NULL;
6669 	newheader->last_used = now;
6670 	newheader->node = rbtnode;
6671 	if (rbtversion != NULL) {
6672 		newheader->serial = rbtversion->serial;
6673 		now = 0;
6674 
6675 		if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) {
6676 			newheader->attributes |= RDATASET_ATTR_RESIGN;
6677 			newheader->resign = rdataset->resign;
6678 		} else
6679 			newheader->resign = 0;
6680 	} else {
6681 		newheader->serial = 1;
6682 		newheader->resign = 0;
6683 		if ((rdataset->attributes & DNS_RDATASETATTR_PREFETCH) != 0)
6684 			newheader->attributes |= RDATASET_ATTR_PREFETCH;
6685 		if ((rdataset->attributes & DNS_RDATASETATTR_NEGATIVE) != 0)
6686 			newheader->attributes |= RDATASET_ATTR_NEGATIVE;
6687 		if ((rdataset->attributes & DNS_RDATASETATTR_NXDOMAIN) != 0)
6688 			newheader->attributes |= RDATASET_ATTR_NXDOMAIN;
6689 		if ((rdataset->attributes & DNS_RDATASETATTR_OPTOUT) != 0)
6690 			newheader->attributes |= RDATASET_ATTR_OPTOUT;
6691 		if ((rdataset->attributes & DNS_RDATASETATTR_NOQNAME) != 0) {
6692 			result = addnoqname(rbtdb, newheader, rdataset);
6693 			if (result != ISC_R_SUCCESS) {
6694 				free_rdataset(rbtdb, rbtdb->common.mctx,
6695 					      newheader);
6696 				return (result);
6697 			}
6698 		}
6699 		if ((rdataset->attributes & DNS_RDATASETATTR_CLOSEST) != 0) {
6700 			result = addclosest(rbtdb, newheader, rdataset);
6701 			if (result != ISC_R_SUCCESS) {
6702 				free_rdataset(rbtdb, rbtdb->common.mctx,
6703 					      newheader);
6704 				return (result);
6705 			}
6706 		}
6707 	}
6708 
6709 	/*
6710 	 * If we're adding a delegation type (e.g. NS or DNAME for a zone,
6711 	 * just DNAME for the cache), then we need to set the callback bit
6712 	 * on the node.
6713 	 */
6714 	if (delegating_type(rbtdb, rbtnode, rdataset->type))
6715 		delegating = ISC_TRUE;
6716 	else
6717 		delegating = ISC_FALSE;
6718 
6719 	/*
6720 	 * Add to the auxiliary NSEC tree if we're adding an NSEC record.
6721 	 */
6722 	if (rbtnode->nsec != DNS_RBT_NSEC_HAS_NSEC &&
6723 	    rdataset->type == dns_rdatatype_nsec)
6724 		newnsec = ISC_TRUE;
6725 	else
6726 		newnsec = ISC_FALSE;
6727 
6728 	/*
6729 	 * If we're adding a delegation type, adding to the auxiliary NSEC tree,
6730 	 * or the DB is a cache in an overmem state, hold an exclusive lock on
6731 	 * the tree.  In the latter case the lock does not necessarily have to
6732 	 * be acquired but it will help purge stale entries more effectively.
6733 	 */
6734 	if (IS_CACHE(rbtdb) && isc_mem_isovermem(rbtdb->common.mctx))
6735 		cache_is_overmem = ISC_TRUE;
6736 	if (delegating || newnsec || cache_is_overmem) {
6737 		tree_locked = ISC_TRUE;
6738 		RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6739 	}
6740 
6741 	if (cache_is_overmem)
6742 		overmem_purge(rbtdb, rbtnode->locknum, now, tree_locked);
6743 
6744 	NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6745 		  isc_rwlocktype_write);
6746 
6747 	if (rbtdb->rrsetstats != NULL) {
6748 		newheader->attributes |= RDATASET_ATTR_STATCOUNT;
6749 		update_rrsetstats(rbtdb, newheader, ISC_TRUE);
6750 	}
6751 
6752 	if (IS_CACHE(rbtdb)) {
6753 		if (tree_locked)
6754 			cleanup_dead_nodes(rbtdb, rbtnode->locknum);
6755 
6756 		header = isc_heap_element(rbtdb->heaps[rbtnode->locknum], 1);
6757 		if (header && header->rdh_ttl < now - RBTDB_VIRTUAL)
6758 			expire_header(rbtdb, header, tree_locked,
6759 				      expire_ttl);
6760 
6761 		/*
6762 		 * If we've been holding a write lock on the tree just for
6763 		 * cleaning, we can release it now.  However, we still need the
6764 		 * node lock.
6765 		 */
6766 		if (tree_locked && !delegating && !newnsec) {
6767 			RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6768 			tree_locked = ISC_FALSE;
6769 		}
6770 	}
6771 
6772 	result = ISC_R_SUCCESS;
6773 	if (newnsec) {
6774 		dns_fixedname_t fname;
6775 		dns_name_t *name;
6776 		dns_rbtnode_t *nsecnode;
6777 
6778 		dns_fixedname_init(&fname);
6779 		name = dns_fixedname_name(&fname);
6780 		dns_rbt_fullnamefromnode(rbtnode, name);
6781 		nsecnode = NULL;
6782 		result = dns_rbt_addnode(rbtdb->nsec, name, &nsecnode);
6783 		if (result == ISC_R_SUCCESS) {
6784 			nsecnode->nsec = DNS_RBT_NSEC_NSEC;
6785 			rbtnode->nsec = DNS_RBT_NSEC_HAS_NSEC;
6786 		} else if (result == ISC_R_EXISTS) {
6787 			rbtnode->nsec = DNS_RBT_NSEC_HAS_NSEC;
6788 			result = ISC_R_SUCCESS;
6789 		}
6790 	}
6791 
6792 	if (result == ISC_R_SUCCESS)
6793 		result = add32(rbtdb, rbtnode, rbtversion, newheader, options,
6794 			       ISC_FALSE, addedrdataset, now);
6795 	if (result == ISC_R_SUCCESS && delegating)
6796 		rbtnode->find_callback = 1;
6797 
6798 	NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6799 		    isc_rwlocktype_write);
6800 
6801 	if (tree_locked)
6802 		RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6803 
6804 	/*
6805 	 * Update the zone's secure status.  If version is non-NULL
6806 	 * this is deferred until closeversion() is called.
6807 	 */
6808 	if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
6809 		iszonesecure(db, version, rbtdb->origin_node);
6810 
6811 	return (result);
6812 }
6813 
6814 static isc_result_t
subtractrdataset(dns_db_t * db,dns_dbnode_t * node,dns_dbversion_t * version,dns_rdataset_t * rdataset,unsigned int options,dns_rdataset_t * newrdataset)6815 subtractrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
6816 		 dns_rdataset_t *rdataset, unsigned int options,
6817 		 dns_rdataset_t *newrdataset)
6818 {
6819 	dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6820 	dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
6821 	rbtdb_version_t *rbtversion = version;
6822 	rdatasetheader_t *topheader, *topheader_prev, *header, *newheader;
6823 	unsigned char *subresult;
6824 	isc_region_t region;
6825 	isc_result_t result;
6826 	rbtdb_changed_t *changed;
6827 
6828 	REQUIRE(VALID_RBTDB(rbtdb));
6829 	REQUIRE(rbtversion != NULL && rbtversion->rbtdb == rbtdb);
6830 
6831 	if (rbtdb->common.methods == &zone_methods)
6832 		REQUIRE(((rbtnode->nsec == DNS_RBT_NSEC_NSEC3 &&
6833 			  (rdataset->type == dns_rdatatype_nsec3 ||
6834 			   rdataset->covers == dns_rdatatype_nsec3)) ||
6835 			 (rbtnode->nsec != DNS_RBT_NSEC_NSEC3 &&
6836 			   rdataset->type != dns_rdatatype_nsec3 &&
6837 			   rdataset->covers != dns_rdatatype_nsec3)));
6838 
6839 	result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
6840 					    &region,
6841 					    sizeof(rdatasetheader_t));
6842 	if (result != ISC_R_SUCCESS)
6843 		return (result);
6844 	newheader = (rdatasetheader_t *)region.base;
6845 	init_rdataset(rbtdb, newheader);
6846 	set_ttl(rbtdb, newheader, rdataset->ttl);
6847 	newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
6848 						rdataset->covers);
6849 	newheader->attributes = 0;
6850 	newheader->serial = rbtversion->serial;
6851 	newheader->trust = 0;
6852 	newheader->noqname = NULL;
6853 	newheader->closest = NULL;
6854 	newheader->count = init_count++;
6855 	newheader->additional_auth = NULL;
6856 	newheader->additional_glue = NULL;
6857 	newheader->last_used = 0;
6858 	newheader->node = rbtnode;
6859 	if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) {
6860 		newheader->attributes |= RDATASET_ATTR_RESIGN;
6861 		newheader->resign = rdataset->resign;
6862 	} else
6863 		newheader->resign = 0;
6864 
6865 	NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6866 		  isc_rwlocktype_write);
6867 
6868 	changed = add_changed(rbtdb, rbtversion, rbtnode);
6869 	if (changed == NULL) {
6870 		free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6871 		NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6872 			    isc_rwlocktype_write);
6873 		return (ISC_R_NOMEMORY);
6874 	}
6875 
6876 	topheader_prev = NULL;
6877 	for (topheader = rbtnode->data;
6878 	     topheader != NULL;
6879 	     topheader = topheader->next) {
6880 		if (topheader->type == newheader->type)
6881 			break;
6882 		topheader_prev = topheader;
6883 	}
6884 	/*
6885 	 * If header isn't NULL, we've found the right type.  There may be
6886 	 * IGNORE rdatasets between the top of the chain and the first real
6887 	 * data.  We skip over them.
6888 	 */
6889 	header = topheader;
6890 	while (header != NULL && IGNORE(header))
6891 		header = header->down;
6892 	if (header != NULL && EXISTS(header)) {
6893 		unsigned int flags = 0;
6894 		subresult = NULL;
6895 		result = ISC_R_SUCCESS;
6896 		if ((options & DNS_DBSUB_EXACT) != 0) {
6897 			flags |= DNS_RDATASLAB_EXACT;
6898 			if (newheader->rdh_ttl != header->rdh_ttl)
6899 				result = DNS_R_NOTEXACT;
6900 		}
6901 		if (result == ISC_R_SUCCESS)
6902 			result = dns_rdataslab_subtract(
6903 					(unsigned char *)header,
6904 					(unsigned char *)newheader,
6905 					(unsigned int)(sizeof(*newheader)),
6906 					rbtdb->common.mctx,
6907 					rbtdb->common.rdclass,
6908 					(dns_rdatatype_t)header->type,
6909 					flags, &subresult);
6910 		if (result == ISC_R_SUCCESS) {
6911 			free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6912 			newheader = (rdatasetheader_t *)subresult;
6913 			init_rdataset(rbtdb, newheader);
6914 			update_newheader(newheader, header);
6915 			/*
6916 			 * We have to set the serial since the rdataslab
6917 			 * subtraction routine copies the reserved portion of
6918 			 * header, not newheader.
6919 			 */
6920 			newheader->serial = rbtversion->serial;
6921 			/*
6922 			 * XXXJT: dns_rdataslab_subtract() copied the pointers
6923 			 * to additional info.  We need to clear these fields
6924 			 * to avoid having duplicated references.
6925 			 */
6926 			newheader->additional_auth = NULL;
6927 			newheader->additional_glue = NULL;
6928 		} else if (result == DNS_R_NXRRSET) {
6929 			/*
6930 			 * This subtraction would remove all of the rdata;
6931 			 * add a nonexistent header instead.
6932 			 */
6933 			free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6934 			newheader = new_rdataset(rbtdb, rbtdb->common.mctx);
6935 			if (newheader == NULL) {
6936 				result = ISC_R_NOMEMORY;
6937 				goto unlock;
6938 			}
6939 			init_rdataset(rbtdb, newheader);
6940 			set_ttl(rbtdb, newheader, 0);
6941 			newheader->type = topheader->type;
6942 			newheader->attributes = RDATASET_ATTR_NONEXISTENT;
6943 			newheader->trust = 0;
6944 			newheader->serial = rbtversion->serial;
6945 			newheader->noqname = NULL;
6946 			newheader->closest = NULL;
6947 			newheader->count = 0;
6948 			newheader->additional_auth = NULL;
6949 			newheader->additional_glue = NULL;
6950 			newheader->node = rbtnode;
6951 			newheader->resign = 0;
6952 			newheader->last_used = 0;
6953 		} else {
6954 			free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6955 			goto unlock;
6956 		}
6957 
6958 		/*
6959 		 * If we're here, we want to link newheader in front of
6960 		 * topheader.
6961 		 */
6962 		INSIST(rbtversion->serial >= topheader->serial);
6963 		if (topheader_prev != NULL)
6964 			topheader_prev->next = newheader;
6965 		else
6966 			rbtnode->data = newheader;
6967 		newheader->next = topheader->next;
6968 		newheader->down = topheader;
6969 		topheader->next = newheader;
6970 		rbtnode->dirty = 1;
6971 		changed->dirty = ISC_TRUE;
6972 		resign_delete(rbtdb, rbtversion, header);
6973 	} else {
6974 		/*
6975 		 * The rdataset doesn't exist, so we don't need to do anything
6976 		 * to satisfy the deletion request.
6977 		 */
6978 		free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6979 		if ((options & DNS_DBSUB_EXACT) != 0)
6980 			result = DNS_R_NOTEXACT;
6981 		else
6982 			result = DNS_R_UNCHANGED;
6983 	}
6984 
6985 	if (result == ISC_R_SUCCESS && newrdataset != NULL)
6986 		bind_rdataset(rbtdb, rbtnode, newheader, 0, newrdataset);
6987 
6988  unlock:
6989 	NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6990 		    isc_rwlocktype_write);
6991 
6992 	/*
6993 	 * Update the zone's secure status.  If version is non-NULL
6994 	 * this is deferred until closeversion() is called.
6995 	 */
6996 	if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
6997 		iszonesecure(db, rbtdb->current_version, rbtdb->origin_node);
6998 
6999 	return (result);
7000 }
7001 
7002 static isc_result_t
deleterdataset(dns_db_t * db,dns_dbnode_t * node,dns_dbversion_t * version,dns_rdatatype_t type,dns_rdatatype_t covers)7003 deleterdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
7004 	       dns_rdatatype_t type, dns_rdatatype_t covers)
7005 {
7006 	dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
7007 	dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
7008 	rbtdb_version_t *rbtversion = version;
7009 	isc_result_t result;
7010 	rdatasetheader_t *newheader;
7011 
7012 	REQUIRE(VALID_RBTDB(rbtdb));
7013 	INSIST(rbtversion == NULL || rbtversion->rbtdb == rbtdb);
7014 
7015 	if (type == dns_rdatatype_any)
7016 		return (ISC_R_NOTIMPLEMENTED);
7017 	if (type == dns_rdatatype_rrsig && covers == 0)
7018 		return (ISC_R_NOTIMPLEMENTED);
7019 
7020 	newheader = new_rdataset(rbtdb, rbtdb->common.mctx);
7021 	if (newheader == NULL)
7022 		return (ISC_R_NOMEMORY);
7023 	init_rdataset(rbtdb, newheader);
7024 	set_ttl(rbtdb, newheader, 0);
7025 	newheader->type = RBTDB_RDATATYPE_VALUE(type, covers);
7026 	newheader->attributes = RDATASET_ATTR_NONEXISTENT;
7027 	newheader->trust = 0;
7028 	newheader->noqname = NULL;
7029 	newheader->closest = NULL;
7030 	newheader->additional_auth = NULL;
7031 	newheader->additional_glue = NULL;
7032 	if (rbtversion != NULL)
7033 		newheader->serial = rbtversion->serial;
7034 	else
7035 		newheader->serial = 0;
7036 	newheader->count = 0;
7037 	newheader->last_used = 0;
7038 	newheader->node = rbtnode;
7039 
7040 	NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7041 		  isc_rwlocktype_write);
7042 
7043 	result = add32(rbtdb, rbtnode, rbtversion, newheader, DNS_DBADD_FORCE,
7044 		       ISC_FALSE, NULL, 0);
7045 
7046 	NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7047 		    isc_rwlocktype_write);
7048 
7049 	/*
7050 	 * Update the zone's secure status.  If version is non-NULL
7051 	 * this is deferred until closeversion() is called.
7052 	 */
7053 	if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
7054 		iszonesecure(db, rbtdb->current_version, rbtdb->origin_node);
7055 
7056 	return (result);
7057 }
7058 
7059 /*
7060  * load a non-NSEC3 node in the main tree and optionally to the auxiliary NSEC
7061  */
7062 static isc_result_t
loadnode(dns_rbtdb_t * rbtdb,dns_name_t * name,dns_rbtnode_t ** nodep,isc_boolean_t hasnsec)7063 loadnode(dns_rbtdb_t *rbtdb, dns_name_t *name, dns_rbtnode_t **nodep,
7064 	 isc_boolean_t hasnsec)
7065 {
7066 	isc_result_t noderesult, nsecresult, tmpresult;
7067 	dns_rbtnode_t *nsecnode = NULL, *node = NULL;
7068 
7069 	noderesult = dns_rbt_addnode(rbtdb->tree, name, &node);
7070 	if (rbtdb->rpzs != NULL && noderesult == ISC_R_SUCCESS) {
7071 		noderesult = dns_rpz_add(rbtdb->load_rpzs, rbtdb->rpz_num,
7072 					 name);
7073 		if (noderesult == ISC_R_SUCCESS) {
7074 			node->rpz = 1;
7075 		} else  {
7076 			/*
7077 			 * Remove the node we just added above.
7078 			 */
7079 			tmpresult = dns_rbt_deletenode(rbtdb->tree, node,
7080 						       ISC_FALSE);
7081 			if (tmpresult != ISC_R_SUCCESS)
7082 				isc_log_write(dns_lctx,
7083 					      DNS_LOGCATEGORY_DATABASE,
7084 					      DNS_LOGMODULE_CACHE,
7085 					      ISC_LOG_WARNING,
7086 					      "loading_addrdataset: "
7087 					      "dns_rbt_deletenode: %s after "
7088 					      "dns_rbt_addnode(NSEC): %s",
7089 					      isc_result_totext(tmpresult),
7090 					      isc_result_totext(ISC_R_SUCCESS));
7091 		}
7092 	}
7093 	if (!hasnsec)
7094 		goto done;
7095 	if (noderesult == ISC_R_EXISTS) {
7096 		/*
7097 		 * Add a node to the auxiliary NSEC tree for an old node
7098 		 * just now getting an NSEC record.
7099 		 */
7100 		if (node->nsec == DNS_RBT_NSEC_HAS_NSEC)
7101 			goto done;
7102 	} else if (noderesult != ISC_R_SUCCESS)
7103 		goto done;
7104 
7105 	/*
7106 	 * Build the auxiliary tree for NSECs as we go.
7107 	 * This tree speeds searches for closest NSECs that would otherwise
7108 	 * need to examine many irrelevant nodes in large TLDs.
7109 	 *
7110 	 * Add nodes to the auxiliary tree after corresponding nodes have
7111 	 * been added to the main tree.
7112 	 */
7113 	nsecresult = dns_rbt_addnode(rbtdb->nsec, name, &nsecnode);
7114 	if (nsecresult == ISC_R_SUCCESS) {
7115 		nsecnode->nsec = DNS_RBT_NSEC_NSEC;
7116 		node->nsec = DNS_RBT_NSEC_HAS_NSEC;
7117 		goto done;
7118 	}
7119 
7120 	if (nsecresult == ISC_R_EXISTS) {
7121 #if 1 /* 0 */
7122 		isc_log_write(dns_lctx,
7123 			      DNS_LOGCATEGORY_DATABASE,
7124 			      DNS_LOGMODULE_CACHE,
7125 			      ISC_LOG_WARNING,
7126 			      "addnode: NSEC node already exists");
7127 #endif
7128 		node->nsec = DNS_RBT_NSEC_HAS_NSEC;
7129 		goto done;
7130 	}
7131 
7132 	if (noderesult == ISC_R_SUCCESS) {
7133 		unsigned int node_has_rpz;
7134 
7135 		/*
7136 		 * Remove the node we just added above.
7137 		 */
7138 		node_has_rpz = node->rpz;
7139 		tmpresult = dns_rbt_deletenode(rbtdb->tree, node, ISC_FALSE);
7140 		if (tmpresult == ISC_R_SUCCESS) {
7141 			/*
7142 			 * Clean rpz entries added above.
7143 			 */
7144 			if (rbtdb->rpzs != NULL && node_has_rpz)
7145 				dns_rpz_delete(rbtdb->load_rpzs,
7146 					       rbtdb->rpz_num, name);
7147 		} else {
7148 			isc_log_write(dns_lctx,
7149 				      DNS_LOGCATEGORY_DATABASE,
7150 				      DNS_LOGMODULE_CACHE,
7151 				      ISC_LOG_WARNING,
7152 				      "loading_addrdataset: "
7153 				      "dns_rbt_deletenode: %s after "
7154 				      "dns_rbt_addnode(NSEC): %s",
7155 				      isc_result_totext(tmpresult),
7156 				      isc_result_totext(noderesult));
7157 		}
7158 	}
7159 
7160 	/*
7161 	 * Set the error condition to be returned.
7162 	 */
7163 	noderesult = nsecresult;
7164 
7165  done:
7166 	if (noderesult == ISC_R_SUCCESS || noderesult == ISC_R_EXISTS)
7167 		*nodep = node;
7168 
7169 	return (noderesult);
7170 }
7171 
7172 static isc_result_t
loading_addrdataset(void * arg,dns_name_t * name,dns_rdataset_t * rdataset)7173 loading_addrdataset(void *arg, dns_name_t *name, dns_rdataset_t *rdataset) {
7174 	rbtdb_load_t *loadctx = arg;
7175 	dns_rbtdb_t *rbtdb = loadctx->rbtdb;
7176 	dns_rbtnode_t *node;
7177 	isc_result_t result;
7178 	isc_region_t region;
7179 	rdatasetheader_t *newheader;
7180 
7181 	/*
7182 	 * This routine does no node locking.  See comments in
7183 	 * 'load' below for more information on loading and
7184 	 * locking.
7185 	 */
7186 
7187 
7188 	/*
7189 	 * SOA records are only allowed at top of zone.
7190 	 */
7191 	if (rdataset->type == dns_rdatatype_soa &&
7192 	    !IS_CACHE(rbtdb) && !dns_name_equal(name, &rbtdb->common.origin))
7193 		return (DNS_R_NOTZONETOP);
7194 
7195 	if (rdataset->type != dns_rdatatype_nsec3 &&
7196 	    rdataset->covers != dns_rdatatype_nsec3)
7197 		add_empty_wildcards(rbtdb, name);
7198 
7199 	if (dns_name_iswildcard(name)) {
7200 		/*
7201 		 * NS record owners cannot legally be wild cards.
7202 		 */
7203 		if (rdataset->type == dns_rdatatype_ns)
7204 			return (DNS_R_INVALIDNS);
7205 		/*
7206 		 * NSEC3 record owners cannot legally be wild cards.
7207 		 */
7208 		if (rdataset->type == dns_rdatatype_nsec3)
7209 			return (DNS_R_INVALIDNSEC3);
7210 		result = add_wildcard_magic(rbtdb, name);
7211 		if (result != ISC_R_SUCCESS)
7212 			return (result);
7213 	}
7214 
7215 	node = NULL;
7216 	if (rdataset->type == dns_rdatatype_nsec3 ||
7217 	    rdataset->covers == dns_rdatatype_nsec3) {
7218 		result = dns_rbt_addnode(rbtdb->nsec3, name, &node);
7219 		if (result == ISC_R_SUCCESS)
7220 			node->nsec = DNS_RBT_NSEC_NSEC3;
7221 	} else if (rdataset->type == dns_rdatatype_nsec) {
7222 		result = loadnode(rbtdb, name, &node, ISC_TRUE);
7223 	} else {
7224 		result = loadnode(rbtdb, name, &node, ISC_FALSE);
7225 	}
7226 	if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
7227 		return (result);
7228 	if (result == ISC_R_SUCCESS) {
7229 		dns_name_t foundname;
7230 		dns_name_init(&foundname, NULL);
7231 		dns_rbt_namefromnode(node, &foundname);
7232 #ifdef DNS_RBT_USEHASH
7233 		node->locknum = node->hashval % rbtdb->node_lock_count;
7234 #else
7235 		node->locknum = dns_name_hash(&foundname, ISC_TRUE) %
7236 			rbtdb->node_lock_count;
7237 #endif
7238 	}
7239 
7240 	result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
7241 					    &region,
7242 					    sizeof(rdatasetheader_t));
7243 	if (result != ISC_R_SUCCESS)
7244 		return (result);
7245 	newheader = (rdatasetheader_t *)region.base;
7246 	init_rdataset(rbtdb, newheader);
7247 	set_ttl(rbtdb, newheader,
7248 		rdataset->ttl + loadctx->now); /* XXX overflow check */
7249 	newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
7250 						rdataset->covers);
7251 	newheader->attributes = 0;
7252 	newheader->trust = rdataset->trust;
7253 	newheader->serial = 1;
7254 	newheader->noqname = NULL;
7255 	newheader->closest = NULL;
7256 	newheader->count = init_count++;
7257 	newheader->additional_auth = NULL;
7258 	newheader->additional_glue = NULL;
7259 	newheader->last_used = 0;
7260 	newheader->node = node;
7261 	if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) {
7262 		newheader->attributes |= RDATASET_ATTR_RESIGN;
7263 		newheader->resign = rdataset->resign;
7264 	} else
7265 		newheader->resign = 0;
7266 
7267 	result = add32(rbtdb, node, rbtdb->current_version, newheader,
7268 		       DNS_DBADD_MERGE, ISC_TRUE, NULL, 0);
7269 	if (result == ISC_R_SUCCESS &&
7270 	    delegating_type(rbtdb, node, rdataset->type))
7271 		node->find_callback = 1;
7272 	else if (result == DNS_R_UNCHANGED)
7273 		result = ISC_R_SUCCESS;
7274 
7275 	return (result);
7276 }
7277 
7278 static isc_result_t
rbt_datafixer(dns_rbtnode_t * rbtnode,void * base,size_t filesize,void * arg,isc_uint64_t * crc)7279 rbt_datafixer(dns_rbtnode_t *rbtnode, void *base, size_t filesize,
7280 	      void *arg, isc_uint64_t *crc)
7281 {
7282 	isc_result_t result;
7283 	dns_rbtdb_t *rbtdb = (dns_rbtdb_t *) arg;
7284 	rdatasetheader_t *header;
7285 	unsigned char *limit = ((unsigned char *) base) + filesize;
7286 	unsigned char *p;
7287 	size_t size;
7288 
7289 	REQUIRE(rbtnode != NULL);
7290 
7291 	for (header = rbtnode->data; header != NULL; header = header->next) {
7292 		p = (unsigned char *) header;
7293 
7294 		size = dns_rdataslab_size(p, sizeof(*header));
7295 		isc_crc64_update(crc, p, size);
7296 #ifdef DEBUG
7297 		hexdump("hashing header", p, sizeof(rdatasetheader_t));
7298 		hexdump("hashing slab", p + sizeof(rdatasetheader_t),
7299 			size - sizeof(rdatasetheader_t));
7300 #endif
7301 		header->serial = 1;
7302 		header->is_mmapped = 1;
7303 		header->node = rbtnode;
7304 		header->node_is_relative = 0;
7305 
7306 		if (rbtdb != NULL && RESIGN(header) && header->resign != 0) {
7307 			int idx = header->node->locknum;
7308 			result = isc_heap_insert(rbtdb->heaps[idx], header);
7309 			if (result != ISC_R_SUCCESS)
7310 				return (result);
7311 		}
7312 
7313 		if (header->next != NULL) {
7314 			size_t cooked = dns_rbt_serialize_align(size);
7315 			if ((uintptr_t)header->next !=
7316 				    (p - (unsigned char *)base) + cooked)
7317 				return (ISC_R_INVALIDFILE);
7318 			header->next = (rdatasetheader_t *)(p + cooked);
7319 			header->next_is_relative = 0;
7320 			if ((header->next < (rdatasetheader_t *) base) ||
7321 			    (header->next > (rdatasetheader_t *) limit))
7322 				return (ISC_R_INVALIDFILE);
7323 		}
7324 	}
7325 
7326 	return (ISC_R_SUCCESS);
7327 }
7328 
7329 /*
7330  * Load the RBT database from the image in 'f'
7331  */
7332 static isc_result_t
deserialize32(void * arg,FILE * f,off_t offset)7333 deserialize32(void *arg, FILE *f, off_t offset) {
7334 	isc_result_t result;
7335 	rbtdb_load_t *loadctx = arg;
7336 	dns_rbtdb_t *rbtdb = loadctx->rbtdb;
7337 	rbtdb_file_header_t *header;
7338 	int fd;
7339 	off_t filesize = 0;
7340 	char *base;
7341 	dns_rbt_t *temporary_rbt = NULL;
7342 	int protect, flags;
7343 
7344 	REQUIRE(VALID_RBTDB(rbtdb));
7345 
7346 	/*
7347 	 * TODO CKB: since this is read-write (had to be to add nodes later)
7348 	 * we will need to lock the file or the nodes in it before modifying
7349 	 * the nodes in the file.
7350 	 */
7351 
7352 	/* Map in the whole file in one go */
7353 	fd = fileno(f);
7354 	isc_file_getsizefd(fd, &filesize);
7355 	protect = PROT_READ|PROT_WRITE;
7356 	flags = MAP_PRIVATE;
7357 #ifdef MAP_FILE
7358 	flags |= MAP_FILE;
7359 #endif
7360 
7361 	base = isc_file_mmap(NULL, filesize, protect, flags, fd, 0);
7362 	if (base == NULL || base == MAP_FAILED)
7363 		return (ISC_R_FAILURE);
7364 
7365 	header = (rbtdb_file_header_t *)(base + offset);
7366 
7367 	rbtdb->mmap_location = base;
7368 	rbtdb->mmap_size = (size_t) filesize;
7369 	rbtdb->origin_node = NULL;
7370 
7371 	if (header->tree != 0) {
7372 		result = dns_rbt_deserialize_tree(base, filesize,
7373 						  (off_t) header->tree,
7374 						  rbtdb->common.mctx,
7375 						  delete_callback, rbtdb,
7376 						  rbt_datafixer, rbtdb,
7377 						  &rbtdb->origin_node,
7378 						  &temporary_rbt);
7379 		if (temporary_rbt != NULL) {
7380 			dns_rbt_destroy(&rbtdb->tree);
7381 			rbtdb->tree = temporary_rbt;
7382 			temporary_rbt = NULL;
7383 
7384 			rbtdb->origin_node =
7385 				(dns_rbtnode_t *)(header->tree + base + 1024);
7386 		}
7387 		if (result != ISC_R_SUCCESS)
7388 			return (result);
7389 	}
7390 
7391 	if (header->nsec != 0) {
7392 		result = dns_rbt_deserialize_tree(base, filesize,
7393 						  (off_t) header->nsec,
7394 						  rbtdb->common.mctx,
7395 						  delete_callback, rbtdb,
7396 						  rbt_datafixer, rbtdb,
7397 						  NULL, &temporary_rbt);
7398 		if (temporary_rbt != NULL) {
7399 			dns_rbt_destroy(&rbtdb->nsec);
7400 			rbtdb->nsec = temporary_rbt;
7401 			temporary_rbt = NULL;
7402 		}
7403 		if (result != ISC_R_SUCCESS)
7404 			return (result);
7405 	}
7406 
7407 	if (header->nsec3 != 0) {
7408 		result = dns_rbt_deserialize_tree(base, filesize,
7409 						  (off_t) header->nsec3,
7410 						  rbtdb->common.mctx,
7411 						  delete_callback, rbtdb,
7412 						  rbt_datafixer, rbtdb,
7413 						  NULL, &temporary_rbt);
7414 		if (temporary_rbt != NULL) {
7415 			dns_rbt_destroy(&rbtdb->nsec3);
7416 			rbtdb->nsec3 = temporary_rbt;
7417 			temporary_rbt = NULL;
7418 		}
7419 		if (result != ISC_R_SUCCESS)
7420 			return (result);
7421 	}
7422 
7423 	return (ISC_R_SUCCESS);
7424 }
7425 
7426 static isc_result_t
beginload(dns_db_t * db,dns_rdatacallbacks_t * callbacks)7427 beginload(dns_db_t *db, dns_rdatacallbacks_t *callbacks) {
7428 	rbtdb_load_t *loadctx;
7429 	dns_rbtdb_t *rbtdb;
7430 	rbtdb = (dns_rbtdb_t *)db;
7431 
7432 	REQUIRE(DNS_CALLBACK_VALID(callbacks));
7433 	REQUIRE(VALID_RBTDB(rbtdb));
7434 
7435 	loadctx = isc_mem_get(rbtdb->common.mctx, sizeof(*loadctx));
7436 	if (loadctx == NULL)
7437 		return (ISC_R_NOMEMORY);
7438 
7439 	loadctx->rbtdb = rbtdb;
7440 	if (IS_CACHE(rbtdb))
7441 		isc_stdtime_get(&loadctx->now);
7442 	else
7443 		loadctx->now = 0;
7444 
7445 	RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
7446 
7447 	if (rbtdb->rpzs != NULL) {
7448 		isc_result_t result;
7449 
7450 		result = dns_rpz_beginload(&rbtdb->load_rpzs,
7451 					   rbtdb->rpzs, rbtdb->rpz_num);
7452 		if (result != ISC_R_SUCCESS) {
7453 			isc_mem_put(rbtdb->common.mctx, loadctx,
7454 				    sizeof(*loadctx));
7455 			return (result);
7456 		}
7457 	}
7458 
7459 	REQUIRE((rbtdb->attributes & (RBTDB_ATTR_LOADED|RBTDB_ATTR_LOADING))
7460 		== 0);
7461 	rbtdb->attributes |= RBTDB_ATTR_LOADING;
7462 
7463 	RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
7464 
7465 	callbacks->add = loading_addrdataset;
7466 	callbacks->add_private = loadctx;
7467 	callbacks->deserialize = deserialize32;
7468 	callbacks->deserialize_private = loadctx;
7469 
7470 	return (ISC_R_SUCCESS);
7471 }
7472 
7473 static isc_result_t
endload(dns_db_t * db,dns_rdatacallbacks_t * callbacks)7474 endload(dns_db_t *db, dns_rdatacallbacks_t *callbacks) {
7475 	rbtdb_load_t *loadctx;
7476 	dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
7477 
7478 	REQUIRE(VALID_RBTDB(rbtdb));
7479 	REQUIRE(DNS_CALLBACK_VALID(callbacks));
7480 	loadctx = callbacks->add_private;
7481 	REQUIRE(loadctx != NULL);
7482 	REQUIRE(loadctx->rbtdb == rbtdb);
7483 
7484 	RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
7485 
7486 	REQUIRE((rbtdb->attributes & RBTDB_ATTR_LOADING) != 0);
7487 	REQUIRE((rbtdb->attributes & RBTDB_ATTR_LOADED) == 0);
7488 
7489 	rbtdb->attributes &= ~RBTDB_ATTR_LOADING;
7490 	rbtdb->attributes |= RBTDB_ATTR_LOADED;
7491 
7492 	RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
7493 
7494 	/*
7495 	 * If there's a KEY rdataset at the zone origin containing a
7496 	 * zone key, we consider the zone secure.
7497 	 */
7498 	if (! IS_CACHE(rbtdb) && rbtdb->origin_node != NULL)
7499 		iszonesecure(db, rbtdb->current_version, rbtdb->origin_node);
7500 
7501 	callbacks->add = NULL;
7502 	callbacks->add_private = NULL;
7503 	callbacks->deserialize = NULL;
7504 	callbacks->deserialize_private = NULL;
7505 
7506 	isc_mem_put(rbtdb->common.mctx, loadctx, sizeof(*loadctx));
7507 
7508 	return (ISC_R_SUCCESS);
7509 }
7510 
7511 /*
7512  * helper function to handle writing out the rdataset data pointed to
7513  * by the void *data pointer in the dns_rbtnode
7514  */
7515 static isc_result_t
rbt_datawriter(FILE * rbtfile,unsigned char * data,void * arg,isc_uint64_t * crc)7516 rbt_datawriter(FILE *rbtfile, unsigned char *data, void *arg,
7517 	       isc_uint64_t *crc)
7518 {
7519 	rbtdb_version_t *version = (rbtdb_version_t *) arg;
7520 	rbtdb_serial_t serial;
7521 	rdatasetheader_t newheader;
7522 	rdatasetheader_t *header = (rdatasetheader_t *) data, *next;
7523 	off_t where;
7524 	size_t cooked, size;
7525 	unsigned char *p;
7526 	isc_result_t result = ISC_R_SUCCESS;
7527 	char pad[sizeof(char *)];
7528 	uintptr_t off;
7529 
7530 	REQUIRE(rbtfile != NULL);
7531 	REQUIRE(data != NULL);
7532 	REQUIRE(version != NULL);
7533 
7534 	serial = version->serial;
7535 
7536 	for (; header != NULL; header = next) {
7537 		next = header->next;
7538 		do {
7539 			if (header->serial <= serial && !IGNORE(header)) {
7540 				if (NONEXISTENT(header))
7541 					header = NULL;
7542 				break;
7543 			} else
7544 				header = header->down;
7545 		} while (header != NULL);
7546 
7547 		if (header == NULL)
7548 			continue;
7549 
7550 		CHECK(isc_stdio_tell(rbtfile, &where));
7551 		size = dns_rdataslab_size((unsigned char *) header,
7552 					  sizeof(rdatasetheader_t));
7553 
7554 		p = (unsigned char *) header;
7555 		memmove(&newheader, p, sizeof(rdatasetheader_t));
7556 		newheader.down = NULL;
7557 		newheader.next = NULL;
7558 		off = where;
7559 		if ((off_t)off != where)
7560 			return (ISC_R_RANGE);
7561 		newheader.node = (dns_rbtnode_t *) off;
7562 		newheader.node_is_relative = 1;
7563 		newheader.serial = 1;
7564 
7565 		/*
7566 		 * Round size up to the next pointer sized offset so it
7567 		 * will be properly aligned when read back in.
7568 		 */
7569 		cooked = dns_rbt_serialize_align(size);
7570 		if (next != NULL) {
7571 			newheader.next = (rdatasetheader_t *) (off + cooked);
7572 			newheader.next_is_relative = 1;
7573 		}
7574 
7575 #ifdef DEBUG
7576 		hexdump("writing header", (unsigned char *) &newheader,
7577 			sizeof(rdatasetheader_t));
7578 		hexdump("writing slab", p + sizeof(rdatasetheader_t),
7579 			size - sizeof(rdatasetheader_t));
7580 #endif
7581 		isc_crc64_update(crc, (unsigned char *) &newheader,
7582 				 sizeof(rdatasetheader_t));
7583 		CHECK(isc_stdio_write(&newheader, sizeof(rdatasetheader_t), 1,
7584 				      rbtfile, NULL));
7585 
7586 		isc_crc64_update(crc, p + sizeof(rdatasetheader_t),
7587 				 size - sizeof(rdatasetheader_t));
7588 		CHECK(isc_stdio_write(p + sizeof(rdatasetheader_t),
7589 				      size - sizeof(rdatasetheader_t), 1,
7590 				      rbtfile, NULL));
7591 		/*
7592 		 * Pad to force alignment.
7593 		 */
7594 		if (size != (size_t) cooked) {
7595 			memset(pad, 0, sizeof(pad));
7596 			CHECK(isc_stdio_write(pad, cooked - size, 1,
7597 					      rbtfile, NULL));
7598 		}
7599 	}
7600 
7601  failure:
7602 	return (result);
7603 }
7604 
7605 /*
7606  * Write out a zeroed header as a placeholder.  Doing this ensures
7607  * that the file will not read while it is partially written, should
7608  * writing fail or be interrupted.
7609  */
7610 static isc_result_t
rbtdb_zero_header(FILE * rbtfile)7611 rbtdb_zero_header(FILE *rbtfile) {
7612 	char buffer[RBTDB_HEADER_LENGTH];
7613 	isc_result_t result;
7614 
7615 	memset(buffer, 0, RBTDB_HEADER_LENGTH);
7616 	result = isc_stdio_write(buffer, 1, RBTDB_HEADER_LENGTH, rbtfile, NULL);
7617 	fflush(rbtfile);
7618 
7619 	return (result);
7620 }
7621 
7622 static isc_once_t once = ISC_ONCE_INIT;
7623 
7624 static void
init_file_version(void)7625 init_file_version(void) {
7626 	int n;
7627 
7628 	memset(FILE_VERSION, 0, sizeof(FILE_VERSION));
7629 	n = snprintf(FILE_VERSION, sizeof(FILE_VERSION),
7630 		 "RBTDB Image %s %s", dns_major, dns_mapapi);
7631 	INSIST(n > 0 && (unsigned int)n < sizeof(FILE_VERSION));
7632 }
7633 
7634 /*
7635  * Write the file header out, recording the locations of the three
7636  * RBT's used in the rbtdb: tree, nsec, and nsec3, and including NodeDump
7637  * version information and any information stored in the rbtdb object
7638  * itself that should be stored here.
7639  */
7640 static isc_result_t
rbtdb_write_header(FILE * rbtfile,off_t tree_location,off_t nsec_location,off_t nsec3_location)7641 rbtdb_write_header(FILE *rbtfile, off_t tree_location, off_t nsec_location,
7642 		   off_t nsec3_location)
7643 {
7644 	rbtdb_file_header_t header;
7645 	isc_result_t result;
7646 
7647 	RUNTIME_CHECK(isc_once_do(&once, init_file_version) == ISC_R_SUCCESS);
7648 
7649 	memset(&header, 0, sizeof(rbtdb_file_header_t));
7650 	memmove(header.version1, FILE_VERSION, sizeof(header.version1));
7651 	memmove(header.version2, FILE_VERSION, sizeof(header.version2));
7652 	header.ptrsize = (isc_uint32_t) sizeof(void *);
7653 	header.bigendian = (1 == htonl(1)) ? 1 : 0;
7654 	header.tree = (isc_uint64_t) tree_location;
7655 	header.nsec = (isc_uint64_t) nsec_location;
7656 	header.nsec3 = (isc_uint64_t) nsec3_location;
7657 	result = isc_stdio_write(&header, 1, sizeof(rbtdb_file_header_t),
7658 			      rbtfile, NULL);
7659 	fflush(rbtfile);
7660 
7661 	return (result);
7662 }
7663 
7664 static isc_result_t
serialize(dns_db_t * db,dns_dbversion_t * ver,FILE * rbtfile)7665 serialize(dns_db_t *db, dns_dbversion_t *ver, FILE *rbtfile) {
7666 	rbtdb_version_t *version = (rbtdb_version_t *) ver;
7667 	dns_rbtdb_t *rbtdb;
7668 	isc_result_t result;
7669 	off_t tree_location, nsec_location, nsec3_location, header_location;
7670 
7671 	rbtdb = (dns_rbtdb_t *)db;
7672 
7673 	REQUIRE(VALID_RBTDB(rbtdb));
7674 	REQUIRE(rbtfile != NULL);
7675 
7676 	/* Ensure we're writing to a plain file */
7677 	CHECK(isc_file_isplainfilefd(fileno(rbtfile)));
7678 
7679 	/*
7680 	 * first, write out a zeroed header to store rbtdb information
7681 	 *
7682 	 * then for each of the three trees, store the current position
7683 	 * in the file and call dns_rbt_serialize_tree
7684 	 *
7685 	 * finally, write out the rbtdb header, storing the locations of the
7686 	 * rbtheaders
7687 	 *
7688 	 * NOTE: need to do something better with the return codes, &= will
7689 	 * not work.
7690 	 */
7691 	CHECK(isc_stdio_tell(rbtfile, &header_location));
7692 	CHECK(rbtdb_zero_header(rbtfile));
7693 	CHECK(dns_rbt_serialize_tree(rbtfile, rbtdb->tree, rbt_datawriter,
7694 				     version, &tree_location));
7695 	CHECK(dns_rbt_serialize_tree(rbtfile, rbtdb->nsec, rbt_datawriter,
7696 				     version, &nsec_location));
7697 	CHECK(dns_rbt_serialize_tree(rbtfile, rbtdb->nsec3, rbt_datawriter,
7698 				     version, &nsec3_location));
7699 
7700 	CHECK(isc_stdio_seek(rbtfile, header_location, SEEK_SET));
7701 	CHECK(rbtdb_write_header(rbtfile, tree_location, nsec_location,
7702 				 nsec3_location));
7703  failure:
7704 	return (result);
7705 }
7706 
7707 static isc_result_t
dump(dns_db_t * db,dns_dbversion_t * version,const char * filename,dns_masterformat_t masterformat)7708 dump(dns_db_t *db, dns_dbversion_t *version, const char *filename,
7709      dns_masterformat_t masterformat)
7710 {
7711 	dns_rbtdb_t *rbtdb;
7712 	rbtdb_version_t *rbtversion = version;
7713 
7714 	rbtdb = (dns_rbtdb_t *)db;
7715 
7716 	REQUIRE(VALID_RBTDB(rbtdb));
7717 	INSIST(rbtversion == NULL || rbtversion->rbtdb == rbtdb);
7718 
7719 	return (dns_master_dump2(rbtdb->common.mctx, db, version,
7720 				 &dns_master_style_default,
7721 				 filename, masterformat));
7722 }
7723 
7724 static void
delete_callback(void * data,void * arg)7725 delete_callback(void *data, void *arg) {
7726 	dns_rbtdb_t *rbtdb = arg;
7727 	rdatasetheader_t *current, *next;
7728 	unsigned int locknum;
7729 
7730 	current = data;
7731 	locknum = current->node->locknum;
7732 	NODE_LOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
7733 	while (current != NULL) {
7734 		next = current->next;
7735 		free_rdataset(rbtdb, rbtdb->common.mctx, current);
7736 		current = next;
7737 	}
7738 	NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
7739 }
7740 
7741 static isc_boolean_t
issecure(dns_db_t * db)7742 issecure(dns_db_t *db) {
7743 	dns_rbtdb_t *rbtdb;
7744 	isc_boolean_t secure;
7745 
7746 	rbtdb = (dns_rbtdb_t *)db;
7747 
7748 	REQUIRE(VALID_RBTDB(rbtdb));
7749 
7750 	RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7751 	secure = ISC_TF(rbtdb->current_version->secure == dns_db_secure);
7752 	RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7753 
7754 	return (secure);
7755 }
7756 
7757 static isc_boolean_t
isdnssec(dns_db_t * db)7758 isdnssec(dns_db_t *db) {
7759 	dns_rbtdb_t *rbtdb;
7760 	isc_boolean_t dnssec;
7761 
7762 	rbtdb = (dns_rbtdb_t *)db;
7763 
7764 	REQUIRE(VALID_RBTDB(rbtdb));
7765 
7766 	RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7767 	dnssec = ISC_TF(rbtdb->current_version->secure != dns_db_insecure);
7768 	RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7769 
7770 	return (dnssec);
7771 }
7772 
7773 static unsigned int
nodecount(dns_db_t * db)7774 nodecount(dns_db_t *db) {
7775 	dns_rbtdb_t *rbtdb;
7776 	unsigned int count;
7777 
7778 	rbtdb = (dns_rbtdb_t *)db;
7779 
7780 	REQUIRE(VALID_RBTDB(rbtdb));
7781 
7782 	RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7783 	count = dns_rbt_nodecount(rbtdb->tree);
7784 	RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7785 
7786 	return (count);
7787 }
7788 
7789 static unsigned int
hashsize(dns_db_t * db)7790 hashsize(dns_db_t *db) {
7791 	dns_rbtdb_t *rbtdb;
7792 	unsigned int count;
7793 
7794 	rbtdb = (dns_rbtdb_t *)db;
7795 
7796 	REQUIRE(VALID_RBTDB(rbtdb));
7797 
7798 	RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7799 	count = dns_rbt_hashsize(rbtdb->tree);
7800 	RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7801 
7802 	return (count);
7803 }
7804 
7805 static void
settask(dns_db_t * db,isc_task_t * task)7806 settask(dns_db_t *db, isc_task_t *task) {
7807 	dns_rbtdb_t *rbtdb;
7808 
7809 	rbtdb = (dns_rbtdb_t *)db;
7810 
7811 	REQUIRE(VALID_RBTDB(rbtdb));
7812 
7813 	RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
7814 	if (rbtdb->task != NULL)
7815 		isc_task_detach(&rbtdb->task);
7816 	if (task != NULL)
7817 		isc_task_attach(task, &rbtdb->task);
7818 	RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
7819 }
7820 
7821 static isc_boolean_t
ispersistent(dns_db_t * db)7822 ispersistent(dns_db_t *db) {
7823 	UNUSED(db);
7824 	return (ISC_FALSE);
7825 }
7826 
7827 static isc_result_t
getoriginnode(dns_db_t * db,dns_dbnode_t ** nodep)7828 getoriginnode(dns_db_t *db, dns_dbnode_t **nodep) {
7829 	dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
7830 	dns_rbtnode_t *onode;
7831 	isc_result_t result = ISC_R_SUCCESS;
7832 
7833 	REQUIRE(VALID_RBTDB(rbtdb));
7834 	REQUIRE(nodep != NULL && *nodep == NULL);
7835 
7836 	/* Note that the access to origin_node doesn't require a DB lock */
7837 	onode = (dns_rbtnode_t *)rbtdb->origin_node;
7838 	if (onode != NULL) {
7839 		NODE_STRONGLOCK(&rbtdb->node_locks[onode->locknum].lock);
7840 		new_reference(rbtdb, onode);
7841 		NODE_STRONGUNLOCK(&rbtdb->node_locks[onode->locknum].lock);
7842 
7843 		*nodep = rbtdb->origin_node;
7844 	} else {
7845 		INSIST(IS_CACHE(rbtdb));
7846 		result = ISC_R_NOTFOUND;
7847 	}
7848 
7849 	return (result);
7850 }
7851 
7852 static isc_result_t
getnsec3parameters(dns_db_t * db,dns_dbversion_t * version,dns_hash_t * hash,isc_uint8_t * flags,isc_uint16_t * iterations,unsigned char * salt,size_t * salt_length)7853 getnsec3parameters(dns_db_t *db, dns_dbversion_t *version, dns_hash_t *hash,
7854 		   isc_uint8_t *flags, isc_uint16_t *iterations,
7855 		   unsigned char *salt, size_t *salt_length)
7856 {
7857 	dns_rbtdb_t *rbtdb;
7858 	isc_result_t result = ISC_R_NOTFOUND;
7859 	rbtdb_version_t *rbtversion = version;
7860 
7861 	rbtdb = (dns_rbtdb_t *)db;
7862 
7863 	REQUIRE(VALID_RBTDB(rbtdb));
7864 	INSIST(rbtversion == NULL || rbtversion->rbtdb == rbtdb);
7865 
7866 	RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7867 
7868 	if (rbtversion == NULL)
7869 		rbtversion = rbtdb->current_version;
7870 
7871 	if (rbtversion->havensec3) {
7872 		if (hash != NULL)
7873 			*hash = rbtversion->hash;
7874 		if (salt != NULL && salt_length != NULL) {
7875 			REQUIRE(*salt_length >= rbtversion->salt_length);
7876 			memmove(salt, rbtversion->salt,
7877 				rbtversion->salt_length);
7878 		}
7879 		if (salt_length != NULL)
7880 			*salt_length = rbtversion->salt_length;
7881 		if (iterations != NULL)
7882 			*iterations = rbtversion->iterations;
7883 		if (flags != NULL)
7884 			*flags = rbtversion->flags;
7885 		result = ISC_R_SUCCESS;
7886 	}
7887 	RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7888 
7889 	return (result);
7890 }
7891 
7892 static isc_result_t
setsigningtime(dns_db_t * db,dns_rdataset_t * rdataset,isc_stdtime_t resign)7893 setsigningtime(dns_db_t *db, dns_rdataset_t *rdataset, isc_stdtime_t resign) {
7894 	dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
7895 	isc_stdtime_t oldresign;
7896 	isc_result_t result = ISC_R_SUCCESS;
7897 	rdatasetheader_t *header;
7898 
7899 	REQUIRE(VALID_RBTDB(rbtdb));
7900 	REQUIRE(!IS_CACHE(rbtdb));
7901 	REQUIRE(rdataset != NULL);
7902 
7903 	header = rdataset->private3;
7904 	header--;
7905 
7906 	NODE_LOCK(&rbtdb->node_locks[header->node->locknum].lock,
7907 		  isc_rwlocktype_write);
7908 
7909 	oldresign = header->resign;
7910 	header->resign = resign;
7911 	if (header->heap_index != 0) {
7912 		INSIST(RESIGN(header));
7913 		if (resign == 0) {
7914 			isc_heap_delete(rbtdb->heaps[header->node->locknum],
7915 					header->heap_index);
7916 			header->heap_index = 0;
7917 		} else if (resign < oldresign)
7918 			isc_heap_increased(rbtdb->heaps[header->node->locknum],
7919 					   header->heap_index);
7920 		else if (resign > oldresign)
7921 			isc_heap_decreased(rbtdb->heaps[header->node->locknum],
7922 					   header->heap_index);
7923 	} else if (resign && header->heap_index == 0) {
7924 		header->attributes |= RDATASET_ATTR_RESIGN;
7925 		result = resign_insert(rbtdb, header->node->locknum, header);
7926 	}
7927 	NODE_UNLOCK(&rbtdb->node_locks[header->node->locknum].lock,
7928 		    isc_rwlocktype_write);
7929 	return (result);
7930 }
7931 
7932 static isc_result_t
getsigningtime(dns_db_t * db,dns_rdataset_t * rdataset,dns_name_t * foundname)7933 getsigningtime(dns_db_t *db, dns_rdataset_t *rdataset,
7934 	       dns_name_t *foundname)
7935 {
7936 	dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
7937 	rdatasetheader_t *header = NULL, *this;
7938 	unsigned int i;
7939 	isc_result_t result = ISC_R_NOTFOUND;
7940 	unsigned int locknum;
7941 
7942 	REQUIRE(VALID_RBTDB(rbtdb));
7943 
7944 	RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7945 
7946 	for (i = 0; i < rbtdb->node_lock_count; i++) {
7947 		NODE_LOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_read);
7948 		this = isc_heap_element(rbtdb->heaps[i], 1);
7949 		if (this == NULL) {
7950 			NODE_UNLOCK(&rbtdb->node_locks[i].lock,
7951 				    isc_rwlocktype_read);
7952 			continue;
7953 		}
7954 		if (header == NULL)
7955 			header = this;
7956 		else if (isc_serial_lt(this->resign, header->resign)) {
7957 			locknum = header->node->locknum;
7958 			NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
7959 				    isc_rwlocktype_read);
7960 			header = this;
7961 		} else
7962 			NODE_UNLOCK(&rbtdb->node_locks[i].lock,
7963 				    isc_rwlocktype_read);
7964 	}
7965 
7966 	if (header == NULL)
7967 		goto unlock;
7968 
7969 	bind_rdataset(rbtdb, header->node, header, 0, rdataset);
7970 
7971 	if (foundname != NULL)
7972 		dns_rbt_fullnamefromnode(header->node, foundname);
7973 
7974 	NODE_UNLOCK(&rbtdb->node_locks[header->node->locknum].lock,
7975 		    isc_rwlocktype_read);
7976 
7977 	result = ISC_R_SUCCESS;
7978 
7979  unlock:
7980 	RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7981 
7982 	return (result);
7983 }
7984 
7985 static void
resigned(dns_db_t * db,dns_rdataset_t * rdataset,dns_dbversion_t * version)7986 resigned(dns_db_t *db, dns_rdataset_t *rdataset, dns_dbversion_t *version)
7987 {
7988 	rbtdb_version_t *rbtversion = (rbtdb_version_t *)version;
7989 	dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
7990 	dns_rbtnode_t *node;
7991 	rdatasetheader_t *header;
7992 
7993 	REQUIRE(VALID_RBTDB(rbtdb));
7994 	REQUIRE(rdataset != NULL);
7995 	REQUIRE(rdataset->methods == &rdataset_methods);
7996 	REQUIRE(rbtdb->future_version == rbtversion);
7997 	REQUIRE(rbtversion != NULL);
7998 	REQUIRE(rbtversion->writer);
7999 	REQUIRE(rbtversion->rbtdb == rbtdb);
8000 
8001 	node = rdataset->private2;
8002 	INSIST(node != NULL);
8003 	header = rdataset->private3;
8004 	INSIST(header != NULL);
8005 	header--;
8006 
8007 	if (header->heap_index == 0)
8008 		return;
8009 
8010 	RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
8011 	NODE_LOCK(&rbtdb->node_locks[node->locknum].lock,
8012 		  isc_rwlocktype_write);
8013 	/*
8014 	 * Delete from heap and save to re-signed list so that it can
8015 	 * be restored if we backout of this change.
8016 	 */
8017 	resign_delete(rbtdb, rbtversion, header);
8018 	NODE_UNLOCK(&rbtdb->node_locks[node->locknum].lock,
8019 		    isc_rwlocktype_write);
8020 	RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
8021 }
8022 
8023 static isc_result_t
setcachestats(dns_db_t * db,isc_stats_t * stats)8024 setcachestats(dns_db_t *db, isc_stats_t *stats) {
8025 	dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
8026 
8027 	REQUIRE(VALID_RBTDB(rbtdb));
8028 	REQUIRE(IS_CACHE(rbtdb)); /* current restriction */
8029 	REQUIRE(stats != NULL);
8030 
8031 	isc_stats_attach(stats, &rbtdb->cachestats);
8032 	return (ISC_R_SUCCESS);
8033 }
8034 
8035 static dns_stats_t *
getrrsetstats(dns_db_t * db)8036 getrrsetstats(dns_db_t *db) {
8037 	dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
8038 
8039 	REQUIRE(VALID_RBTDB(rbtdb));
8040 	REQUIRE(IS_CACHE(rbtdb)); /* current restriction */
8041 
8042 	return (rbtdb->rrsetstats);
8043 }
8044 
8045 static dns_dbmethods_t zone_methods = {
8046 	attach,
8047 	detach,
8048 	beginload,
8049 	endload,
8050 	serialize,
8051 	dump,
8052 	currentversion,
8053 	newversion,
8054 	attachversion,
8055 	closeversion,
8056 	findnode,
8057 	zone_find,
8058 	zone_findzonecut,
8059 	attachnode,
8060 	detachnode,
8061 	expirenode,
8062 	printnode,
8063 	createiterator,
8064 	zone_findrdataset,
8065 	allrdatasets,
8066 	addrdataset,
8067 	subtractrdataset,
8068 	deleterdataset,
8069 	issecure,
8070 	nodecount,
8071 	ispersistent,
8072 	overmem,
8073 	settask,
8074 	getoriginnode,
8075 	NULL,
8076 	getnsec3parameters,
8077 	findnsec3node,
8078 	setsigningtime,
8079 	getsigningtime,
8080 	resigned,
8081 	isdnssec,
8082 	NULL,
8083 	rpz_attach,
8084 	rpz_ready,
8085 	NULL,
8086 	NULL,
8087 	NULL,
8088 	hashsize
8089 };
8090 
8091 static dns_dbmethods_t cache_methods = {
8092 	attach,
8093 	detach,
8094 	beginload,
8095 	endload,
8096 	NULL,
8097 	dump,
8098 	currentversion,
8099 	newversion,
8100 	attachversion,
8101 	closeversion,
8102 	findnode,
8103 	cache_find,
8104 	cache_findzonecut,
8105 	attachnode,
8106 	detachnode,
8107 	expirenode,
8108 	printnode,
8109 	createiterator,
8110 	cache_findrdataset,
8111 	allrdatasets,
8112 	addrdataset,
8113 	subtractrdataset,
8114 	deleterdataset,
8115 	issecure,
8116 	nodecount,
8117 	ispersistent,
8118 	overmem,
8119 	settask,
8120 	getoriginnode,
8121 	NULL,
8122 	NULL,
8123 	NULL,
8124 	NULL,
8125 	NULL,
8126 	NULL,
8127 	isdnssec,
8128 	getrrsetstats,
8129 	NULL,
8130 	NULL,
8131 	NULL,
8132 	NULL,
8133 	setcachestats,
8134 	hashsize
8135 };
8136 
8137 isc_result_t
8138 #ifdef DNS_RBTDB_VERSION64
dns_rbtdb64_create(isc_mem_t * mctx,dns_name_t * origin,dns_dbtype_t type,dns_rdataclass_t rdclass,unsigned int argc,char * argv[],void * driverarg,dns_db_t ** dbp)8139 dns_rbtdb64_create
8140 #else
8141 dns_rbtdb_create
8142 #endif
8143 		(isc_mem_t *mctx, dns_name_t *origin, dns_dbtype_t type,
8144 		 dns_rdataclass_t rdclass, unsigned int argc, char *argv[],
8145 		 void *driverarg, dns_db_t **dbp)
8146 {
8147 	dns_rbtdb_t *rbtdb;
8148 	isc_result_t result;
8149 	int i;
8150 	dns_name_t name;
8151 	isc_boolean_t (*sooner)(void *, void *);
8152 	isc_mem_t *hmctx = mctx;
8153 
8154 	/* Keep the compiler happy. */
8155 	UNUSED(driverarg);
8156 
8157 	rbtdb = isc_mem_get(mctx, sizeof(*rbtdb));
8158 	if (rbtdb == NULL)
8159 		return (ISC_R_NOMEMORY);
8160 
8161 	/*
8162 	 * If argv[0] exists, it points to a memory context to use for heap
8163 	 */
8164 	if (argc != 0)
8165 		hmctx = (isc_mem_t *) argv[0];
8166 
8167 	memset(rbtdb, '\0', sizeof(*rbtdb));
8168 	dns_name_init(&rbtdb->common.origin, NULL);
8169 	rbtdb->common.attributes = 0;
8170 	if (type == dns_dbtype_cache) {
8171 		rbtdb->common.methods = &cache_methods;
8172 		rbtdb->common.attributes |= DNS_DBATTR_CACHE;
8173 	} else if (type == dns_dbtype_stub) {
8174 		rbtdb->common.methods = &zone_methods;
8175 		rbtdb->common.attributes |= DNS_DBATTR_STUB;
8176 	} else
8177 		rbtdb->common.methods = &zone_methods;
8178 	rbtdb->common.rdclass = rdclass;
8179 	rbtdb->common.mctx = NULL;
8180 
8181 	result = RBTDB_INITLOCK(&rbtdb->lock);
8182 	if (result != ISC_R_SUCCESS)
8183 		goto cleanup_rbtdb;
8184 
8185 	result = isc_rwlock_init(&rbtdb->tree_lock, 0, 0);
8186 	if (result != ISC_R_SUCCESS)
8187 		goto cleanup_lock;
8188 
8189 	/*
8190 	 * Initialize node_lock_count in a generic way to support future
8191 	 * extension which allows the user to specify this value on creation.
8192 	 * Note that when specified for a cache DB it must be larger than 1
8193 	 * as commented with the definition of DEFAULT_CACHE_NODE_LOCK_COUNT.
8194 	 */
8195 	if (rbtdb->node_lock_count == 0) {
8196 		if (IS_CACHE(rbtdb))
8197 			rbtdb->node_lock_count = DEFAULT_CACHE_NODE_LOCK_COUNT;
8198 		else
8199 			rbtdb->node_lock_count = DEFAULT_NODE_LOCK_COUNT;
8200 	} else if (rbtdb->node_lock_count < 2 && IS_CACHE(rbtdb)) {
8201 		result = ISC_R_RANGE;
8202 		goto cleanup_tree_lock;
8203 	}
8204 	INSIST(rbtdb->node_lock_count < (1 << DNS_RBT_LOCKLENGTH));
8205 	rbtdb->node_locks = isc_mem_get(mctx, rbtdb->node_lock_count *
8206 					sizeof(rbtdb_nodelock_t));
8207 	if (rbtdb->node_locks == NULL) {
8208 		result = ISC_R_NOMEMORY;
8209 		goto cleanup_tree_lock;
8210 	}
8211 
8212 	rbtdb->cachestats = NULL;
8213 	rbtdb->rrsetstats = NULL;
8214 	if (IS_CACHE(rbtdb)) {
8215 		result = dns_rdatasetstats_create(mctx, &rbtdb->rrsetstats);
8216 		if (result != ISC_R_SUCCESS)
8217 			goto cleanup_node_locks;
8218 		rbtdb->rdatasets = isc_mem_get(mctx, rbtdb->node_lock_count *
8219 					       sizeof(rdatasetheaderlist_t));
8220 		if (rbtdb->rdatasets == NULL) {
8221 			result = ISC_R_NOMEMORY;
8222 			goto cleanup_rrsetstats;
8223 		}
8224 		for (i = 0; i < (int)rbtdb->node_lock_count; i++)
8225 			ISC_LIST_INIT(rbtdb->rdatasets[i]);
8226 	} else
8227 		rbtdb->rdatasets = NULL;
8228 
8229 	/*
8230 	 * Create the heaps.
8231 	 */
8232 	rbtdb->heaps = isc_mem_get(hmctx, rbtdb->node_lock_count *
8233 				   sizeof(isc_heap_t *));
8234 	if (rbtdb->heaps == NULL) {
8235 		result = ISC_R_NOMEMORY;
8236 		goto cleanup_rdatasets;
8237 	}
8238 	for (i = 0; i < (int)rbtdb->node_lock_count; i++)
8239 		rbtdb->heaps[i] = NULL;
8240 	sooner = IS_CACHE(rbtdb) ? ttl_sooner : resign_sooner;
8241 	for (i = 0; i < (int)rbtdb->node_lock_count; i++) {
8242 		result = isc_heap_create(hmctx, sooner, set_index, 0,
8243 					 &rbtdb->heaps[i]);
8244 		if (result != ISC_R_SUCCESS)
8245 			goto cleanup_heaps;
8246 	}
8247 
8248 	/*
8249 	 * Create deadnode lists.
8250 	 */
8251 	rbtdb->deadnodes = isc_mem_get(mctx, rbtdb->node_lock_count *
8252 				       sizeof(rbtnodelist_t));
8253 	if (rbtdb->deadnodes == NULL) {
8254 		result = ISC_R_NOMEMORY;
8255 		goto cleanup_heaps;
8256 	}
8257 	for (i = 0; i < (int)rbtdb->node_lock_count; i++)
8258 		ISC_LIST_INIT(rbtdb->deadnodes[i]);
8259 
8260 	rbtdb->active = rbtdb->node_lock_count;
8261 
8262 	for (i = 0; i < (int)(rbtdb->node_lock_count); i++) {
8263 		result = NODE_INITLOCK(&rbtdb->node_locks[i].lock);
8264 		if (result == ISC_R_SUCCESS) {
8265 			result = isc_refcount_init(&rbtdb->node_locks[i].references, 0);
8266 			if (result != ISC_R_SUCCESS)
8267 				NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
8268 		}
8269 		if (result != ISC_R_SUCCESS) {
8270 			while (i-- > 0) {
8271 				NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
8272 				isc_refcount_decrement(&rbtdb->node_locks[i].references, NULL);
8273 				isc_refcount_destroy(&rbtdb->node_locks[i].references);
8274 			}
8275 			goto cleanup_deadnodes;
8276 		}
8277 		rbtdb->node_locks[i].exiting = ISC_FALSE;
8278 	}
8279 
8280 	/*
8281 	 * Attach to the mctx.  The database will persist so long as there
8282 	 * are references to it, and attaching to the mctx ensures that our
8283 	 * mctx won't disappear out from under us.
8284 	 */
8285 	isc_mem_attach(mctx, &rbtdb->common.mctx);
8286 	isc_mem_attach(hmctx, &rbtdb->hmctx);
8287 
8288 	/*
8289 	 * Must be initialized before free_rbtdb() is called.
8290 	 */
8291 	isc_ondestroy_init(&rbtdb->common.ondest);
8292 
8293 	/*
8294 	 * Make a copy of the origin name.
8295 	 */
8296 	result = dns_name_dupwithoffsets(origin, mctx, &rbtdb->common.origin);
8297 	if (result != ISC_R_SUCCESS) {
8298 		free_rbtdb(rbtdb, ISC_FALSE, NULL);
8299 		return (result);
8300 	}
8301 
8302 	/*
8303 	 * Make the Red-Black Trees.
8304 	 */
8305 	result = dns_rbt_create(mctx, delete_callback, rbtdb, &rbtdb->tree);
8306 	if (result != ISC_R_SUCCESS) {
8307 		free_rbtdb(rbtdb, ISC_FALSE, NULL);
8308 		return (result);
8309 	}
8310 
8311 	result = dns_rbt_create(mctx, delete_callback, rbtdb, &rbtdb->nsec);
8312 	if (result != ISC_R_SUCCESS) {
8313 		free_rbtdb(rbtdb, ISC_FALSE, NULL);
8314 		return (result);
8315 	}
8316 
8317 	result = dns_rbt_create(mctx, delete_callback, rbtdb, &rbtdb->nsec3);
8318 	if (result != ISC_R_SUCCESS) {
8319 		free_rbtdb(rbtdb, ISC_FALSE, NULL);
8320 		return (result);
8321 	}
8322 
8323 	/*
8324 	 * In order to set the node callback bit correctly in zone databases,
8325 	 * we need to know if the node has the origin name of the zone.
8326 	 * In loading_addrdataset() we could simply compare the new name
8327 	 * to the origin name, but this is expensive.  Also, we don't know the
8328 	 * node name in addrdataset(), so we need another way of knowing the
8329 	 * zone's top.
8330 	 *
8331 	 * We now explicitly create a node for the zone's origin, and then
8332 	 * we simply remember the node's address.  This is safe, because
8333 	 * the top-of-zone node can never be deleted, nor can its address
8334 	 * change.
8335 	 */
8336 	if (!IS_CACHE(rbtdb)) {
8337 		dns_rbtnode_t *nsec3node;
8338 
8339 		rbtdb->origin_node = NULL;
8340 		result = dns_rbt_addnode(rbtdb->tree, &rbtdb->common.origin,
8341 					 &rbtdb->origin_node);
8342 		if (result != ISC_R_SUCCESS) {
8343 			INSIST(result != ISC_R_EXISTS);
8344 			free_rbtdb(rbtdb, ISC_FALSE, NULL);
8345 			return (result);
8346 		}
8347 		rbtdb->origin_node->nsec = DNS_RBT_NSEC_NORMAL;
8348 		/*
8349 		 * We need to give the origin node the right locknum.
8350 		 */
8351 		dns_name_init(&name, NULL);
8352 		dns_rbt_namefromnode(rbtdb->origin_node, &name);
8353 #ifdef DNS_RBT_USEHASH
8354 		rbtdb->origin_node->locknum =
8355 			rbtdb->origin_node->hashval %
8356 			rbtdb->node_lock_count;
8357 #else
8358 		rbtdb->origin_node->locknum =
8359 			dns_name_hash(&name, ISC_TRUE) %
8360 			rbtdb->node_lock_count;
8361 #endif
8362 		/*
8363 		 * Add an apex node to the NSEC3 tree so that NSEC3 searches
8364 		 * return partial matches when there is only a single NSEC3
8365 		 * record in the tree.
8366 		 */
8367 		nsec3node = NULL;
8368 		result = dns_rbt_addnode(rbtdb->nsec3, &rbtdb->common.origin,
8369 					 &nsec3node);
8370 		if (result != ISC_R_SUCCESS) {
8371 			INSIST(result != ISC_R_EXISTS);
8372 			free_rbtdb(rbtdb, ISC_FALSE, NULL);
8373 			return (result);
8374 		}
8375 		nsec3node->nsec = DNS_RBT_NSEC_NSEC3;
8376 		/*
8377 		 * We need to give the nsec3 origin node the right locknum.
8378 		 */
8379 		dns_name_init(&name, NULL);
8380 		dns_rbt_namefromnode(nsec3node, &name);
8381 #ifdef DNS_RBT_USEHASH
8382 		nsec3node->locknum = nsec3node->hashval %
8383 			rbtdb->node_lock_count;
8384 #else
8385 		nsec3node->locknum = dns_name_hash(&name, ISC_TRUE) %
8386 			rbtdb->node_lock_count;
8387 #endif
8388 	}
8389 
8390 	/*
8391 	 * Misc. Initialization.
8392 	 */
8393 	result = isc_refcount_init(&rbtdb->references, 1);
8394 	if (result != ISC_R_SUCCESS) {
8395 		free_rbtdb(rbtdb, ISC_FALSE, NULL);
8396 		return (result);
8397 	}
8398 	rbtdb->attributes = 0;
8399 	rbtdb->task = NULL;
8400 	rbtdb->rpzs = NULL;
8401 	rbtdb->load_rpzs = NULL;
8402 	rbtdb->rpz_num = DNS_RPZ_INVALID_NUM;
8403 
8404 	/*
8405 	 * Version Initialization.
8406 	 */
8407 	rbtdb->current_serial = 1;
8408 	rbtdb->least_serial = 1;
8409 	rbtdb->next_serial = 2;
8410 	rbtdb->current_version = allocate_version(mctx, 1, 1, ISC_FALSE);
8411 	if (rbtdb->current_version == NULL) {
8412 		isc_refcount_decrement(&rbtdb->references, NULL);
8413 		isc_refcount_destroy(&rbtdb->references);
8414 		free_rbtdb(rbtdb, ISC_FALSE, NULL);
8415 		return (ISC_R_NOMEMORY);
8416 	}
8417 	rbtdb->current_version->rbtdb = rbtdb;
8418 	rbtdb->current_version->secure = dns_db_insecure;
8419 	rbtdb->current_version->havensec3 = ISC_FALSE;
8420 	rbtdb->current_version->flags = 0;
8421 	rbtdb->current_version->iterations = 0;
8422 	rbtdb->current_version->hash = 0;
8423 	rbtdb->current_version->salt_length = 0;
8424 	memset(rbtdb->current_version->salt, 0,
8425 	       sizeof(rbtdb->current_version->salt));
8426 	rbtdb->future_version = NULL;
8427 	ISC_LIST_INIT(rbtdb->open_versions);
8428 	/*
8429 	 * Keep the current version in the open list so that list operation
8430 	 * won't happen in normal lookup operations.
8431 	 */
8432 	PREPEND(rbtdb->open_versions, rbtdb->current_version, link);
8433 
8434 	rbtdb->common.magic = DNS_DB_MAGIC;
8435 	rbtdb->common.impmagic = RBTDB_MAGIC;
8436 
8437 	*dbp = (dns_db_t *)rbtdb;
8438 
8439 	return (ISC_R_SUCCESS);
8440 
8441  cleanup_deadnodes:
8442 	isc_mem_put(mctx, rbtdb->deadnodes,
8443 		    rbtdb->node_lock_count * sizeof(rbtnodelist_t));
8444 
8445  cleanup_heaps:
8446 	if (rbtdb->heaps != NULL) {
8447 		for (i = 0 ; i < (int)rbtdb->node_lock_count ; i++)
8448 			if (rbtdb->heaps[i] != NULL)
8449 				isc_heap_destroy(&rbtdb->heaps[i]);
8450 		isc_mem_put(hmctx, rbtdb->heaps,
8451 			    rbtdb->node_lock_count * sizeof(isc_heap_t *));
8452 	}
8453 
8454  cleanup_rdatasets:
8455 	if (rbtdb->rdatasets != NULL)
8456 		isc_mem_put(mctx, rbtdb->rdatasets, rbtdb->node_lock_count *
8457 			    sizeof(rdatasetheaderlist_t));
8458  cleanup_rrsetstats:
8459 	if (rbtdb->rrsetstats != NULL)
8460 		dns_stats_detach(&rbtdb->rrsetstats);
8461 
8462  cleanup_node_locks:
8463 	isc_mem_put(mctx, rbtdb->node_locks,
8464 		    rbtdb->node_lock_count * sizeof(rbtdb_nodelock_t));
8465 
8466  cleanup_tree_lock:
8467 	isc_rwlock_destroy(&rbtdb->tree_lock);
8468 
8469  cleanup_lock:
8470 	RBTDB_DESTROYLOCK(&rbtdb->lock);
8471 
8472  cleanup_rbtdb:
8473 	isc_mem_put(mctx, rbtdb,  sizeof(*rbtdb));
8474 	return (result);
8475 }
8476 
8477 
8478 /*
8479  * Slabbed Rdataset Methods
8480  */
8481 
8482 static void
rdataset_disassociate(dns_rdataset_t * rdataset)8483 rdataset_disassociate(dns_rdataset_t *rdataset) {
8484 	dns_db_t *db = rdataset->private1;
8485 	dns_dbnode_t *node = rdataset->private2;
8486 
8487 	detachnode(db, &node);
8488 }
8489 
8490 static isc_result_t
rdataset_first(dns_rdataset_t * rdataset)8491 rdataset_first(dns_rdataset_t *rdataset) {
8492 	unsigned char *raw = rdataset->private3;        /* RDATASLAB */
8493 	unsigned int count;
8494 
8495 	count = raw[0] * 256 + raw[1];
8496 	if (count == 0) {
8497 		rdataset->private5 = NULL;
8498 		return (ISC_R_NOMORE);
8499 	}
8500 
8501 #if DNS_RDATASET_FIXED
8502 	if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) == 0)
8503 		raw += 2 + (4 * count);
8504 	else
8505 #endif
8506 		raw += 2;
8507 
8508 	/*
8509 	 * The privateuint4 field is the number of rdata beyond the
8510 	 * cursor position, so we decrement the total count by one
8511 	 * before storing it.
8512 	 *
8513 	 * If DNS_RDATASETATTR_LOADORDER is not set 'raw' points to the
8514 	 * first record.  If DNS_RDATASETATTR_LOADORDER is set 'raw' points
8515 	 * to the first entry in the offset table.
8516 	 */
8517 	count--;
8518 	rdataset->privateuint4 = count;
8519 	rdataset->private5 = raw;
8520 
8521 	return (ISC_R_SUCCESS);
8522 }
8523 
8524 static isc_result_t
rdataset_next(dns_rdataset_t * rdataset)8525 rdataset_next(dns_rdataset_t *rdataset) {
8526 	unsigned int count;
8527 	unsigned int length;
8528 	unsigned char *raw;     /* RDATASLAB */
8529 
8530 	count = rdataset->privateuint4;
8531 	if (count == 0)
8532 		return (ISC_R_NOMORE);
8533 	count--;
8534 	rdataset->privateuint4 = count;
8535 
8536 	/*
8537 	 * Skip forward one record (length + 4) or one offset (4).
8538 	 */
8539 	raw = rdataset->private5;
8540 #if DNS_RDATASET_FIXED
8541 	if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) == 0) {
8542 #endif
8543 		length = raw[0] * 256 + raw[1];
8544 		raw += length;
8545 #if DNS_RDATASET_FIXED
8546 	}
8547 	rdataset->private5 = raw + 4;           /* length(2) + order(2) */
8548 #else
8549 	rdataset->private5 = raw + 2;           /* length(2) */
8550 #endif
8551 
8552 	return (ISC_R_SUCCESS);
8553 }
8554 
8555 static void
rdataset_current(dns_rdataset_t * rdataset,dns_rdata_t * rdata)8556 rdataset_current(dns_rdataset_t *rdataset, dns_rdata_t *rdata) {
8557 	unsigned char *raw = rdataset->private5;        /* RDATASLAB */
8558 #if DNS_RDATASET_FIXED
8559 	unsigned int offset;
8560 #endif
8561 	unsigned int length;
8562 	isc_region_t r;
8563 	unsigned int flags = 0;
8564 
8565 	REQUIRE(raw != NULL);
8566 
8567 	/*
8568 	 * Find the start of the record if not already in private5
8569 	 * then skip the length and order fields.
8570 	 */
8571 #if DNS_RDATASET_FIXED
8572 	if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) != 0) {
8573 		offset = (raw[0] << 24) + (raw[1] << 16) +
8574 			 (raw[2] << 8) + raw[3];
8575 		raw = rdataset->private3;
8576 		raw += offset;
8577 	}
8578 #endif
8579 	length = raw[0] * 256 + raw[1];
8580 #if DNS_RDATASET_FIXED
8581 	raw += 4;
8582 #else
8583 	raw += 2;
8584 #endif
8585 	if (rdataset->type == dns_rdatatype_rrsig) {
8586 		if (*raw & DNS_RDATASLAB_OFFLINE)
8587 			flags |= DNS_RDATA_OFFLINE;
8588 		length--;
8589 		raw++;
8590 	}
8591 	r.length = length;
8592 	r.base = raw;
8593 	dns_rdata_fromregion(rdata, rdataset->rdclass, rdataset->type, &r);
8594 	rdata->flags |= flags;
8595 }
8596 
8597 static void
rdataset_clone(dns_rdataset_t * source,dns_rdataset_t * target)8598 rdataset_clone(dns_rdataset_t *source, dns_rdataset_t *target) {
8599 	dns_db_t *db = source->private1;
8600 	dns_dbnode_t *node = source->private2;
8601 	dns_dbnode_t *cloned_node = NULL;
8602 
8603 	attachnode(db, node, &cloned_node);
8604 	INSIST(!ISC_LINK_LINKED(target, link));
8605 	*target = *source;
8606 	ISC_LINK_INIT(target, link);
8607 
8608 	/*
8609 	 * Reset iterator state.
8610 	 */
8611 	target->privateuint4 = 0;
8612 	target->private5 = NULL;
8613 }
8614 
8615 static unsigned int
rdataset_count(dns_rdataset_t * rdataset)8616 rdataset_count(dns_rdataset_t *rdataset) {
8617 	unsigned char *raw = rdataset->private3;        /* RDATASLAB */
8618 	unsigned int count;
8619 
8620 	count = raw[0] * 256 + raw[1];
8621 
8622 	return (count);
8623 }
8624 
8625 static isc_result_t
rdataset_getnoqname(dns_rdataset_t * rdataset,dns_name_t * name,dns_rdataset_t * nsec,dns_rdataset_t * nsecsig)8626 rdataset_getnoqname(dns_rdataset_t *rdataset, dns_name_t *name,
8627 		    dns_rdataset_t *nsec, dns_rdataset_t *nsecsig)
8628 {
8629 	dns_db_t *db = rdataset->private1;
8630 	dns_dbnode_t *node = rdataset->private2;
8631 	dns_dbnode_t *cloned_node;
8632 	struct noqname *noqname = rdataset->private6;
8633 
8634 	cloned_node = NULL;
8635 	attachnode(db, node, &cloned_node);
8636 	nsec->methods = &rdataset_methods;
8637 	nsec->rdclass = db->rdclass;
8638 	nsec->type = noqname->type;
8639 	nsec->covers = 0;
8640 	nsec->ttl = rdataset->ttl;
8641 	nsec->trust = rdataset->trust;
8642 	nsec->private1 = rdataset->private1;
8643 	nsec->private2 = rdataset->private2;
8644 	nsec->private3 = noqname->neg;
8645 	nsec->privateuint4 = 0;
8646 	nsec->private5 = NULL;
8647 	nsec->private6 = NULL;
8648 	nsec->private7 = NULL;
8649 
8650 	cloned_node = NULL;
8651 	attachnode(db, node, &cloned_node);
8652 	nsecsig->methods = &rdataset_methods;
8653 	nsecsig->rdclass = db->rdclass;
8654 	nsecsig->type = dns_rdatatype_rrsig;
8655 	nsecsig->covers = noqname->type;
8656 	nsecsig->ttl = rdataset->ttl;
8657 	nsecsig->trust = rdataset->trust;
8658 	nsecsig->private1 = rdataset->private1;
8659 	nsecsig->private2 = rdataset->private2;
8660 	nsecsig->private3 = noqname->negsig;
8661 	nsecsig->privateuint4 = 0;
8662 	nsecsig->private5 = NULL;
8663 	nsec->private6 = NULL;
8664 	nsec->private7 = NULL;
8665 
8666 	dns_name_clone(&noqname->name, name);
8667 
8668 	return (ISC_R_SUCCESS);
8669 }
8670 
8671 static isc_result_t
rdataset_getclosest(dns_rdataset_t * rdataset,dns_name_t * name,dns_rdataset_t * nsec,dns_rdataset_t * nsecsig)8672 rdataset_getclosest(dns_rdataset_t *rdataset, dns_name_t *name,
8673 		    dns_rdataset_t *nsec, dns_rdataset_t *nsecsig)
8674 {
8675 	dns_db_t *db = rdataset->private1;
8676 	dns_dbnode_t *node = rdataset->private2;
8677 	dns_dbnode_t *cloned_node;
8678 	struct noqname *closest = rdataset->private7;
8679 
8680 	cloned_node = NULL;
8681 	attachnode(db, node, &cloned_node);
8682 	nsec->methods = &rdataset_methods;
8683 	nsec->rdclass = db->rdclass;
8684 	nsec->type = closest->type;
8685 	nsec->covers = 0;
8686 	nsec->ttl = rdataset->ttl;
8687 	nsec->trust = rdataset->trust;
8688 	nsec->private1 = rdataset->private1;
8689 	nsec->private2 = rdataset->private2;
8690 	nsec->private3 = closest->neg;
8691 	nsec->privateuint4 = 0;
8692 	nsec->private5 = NULL;
8693 	nsec->private6 = NULL;
8694 	nsec->private7 = NULL;
8695 
8696 	cloned_node = NULL;
8697 	attachnode(db, node, &cloned_node);
8698 	nsecsig->methods = &rdataset_methods;
8699 	nsecsig->rdclass = db->rdclass;
8700 	nsecsig->type = dns_rdatatype_rrsig;
8701 	nsecsig->covers = closest->type;
8702 	nsecsig->ttl = rdataset->ttl;
8703 	nsecsig->trust = rdataset->trust;
8704 	nsecsig->private1 = rdataset->private1;
8705 	nsecsig->private2 = rdataset->private2;
8706 	nsecsig->private3 = closest->negsig;
8707 	nsecsig->privateuint4 = 0;
8708 	nsecsig->private5 = NULL;
8709 	nsec->private6 = NULL;
8710 	nsec->private7 = NULL;
8711 
8712 	dns_name_clone(&closest->name, name);
8713 
8714 	return (ISC_R_SUCCESS);
8715 }
8716 
8717 static void
rdataset_settrust(dns_rdataset_t * rdataset,dns_trust_t trust)8718 rdataset_settrust(dns_rdataset_t *rdataset, dns_trust_t trust) {
8719 	dns_rbtdb_t *rbtdb = rdataset->private1;
8720 	dns_rbtnode_t *rbtnode = rdataset->private2;
8721 	rdatasetheader_t *header = rdataset->private3;
8722 
8723 	header--;
8724 	NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
8725 		  isc_rwlocktype_write);
8726 	header->trust = rdataset->trust = trust;
8727 	NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
8728 		  isc_rwlocktype_write);
8729 }
8730 
8731 static void
rdataset_expire(dns_rdataset_t * rdataset)8732 rdataset_expire(dns_rdataset_t *rdataset) {
8733 	dns_rbtdb_t *rbtdb = rdataset->private1;
8734 	dns_rbtnode_t *rbtnode = rdataset->private2;
8735 	rdatasetheader_t *header = rdataset->private3;
8736 
8737 	header--;
8738 	NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
8739 		  isc_rwlocktype_write);
8740 	expire_header(rbtdb, header, ISC_FALSE, expire_flush);
8741 	NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
8742 		  isc_rwlocktype_write);
8743 }
8744 
8745 static void
rdataset_clearprefetch(dns_rdataset_t * rdataset)8746 rdataset_clearprefetch(dns_rdataset_t *rdataset) {
8747 	dns_rbtdb_t *rbtdb = rdataset->private1;
8748 	dns_rbtnode_t *rbtnode = rdataset->private2;
8749 	rdatasetheader_t *header = rdataset->private3;
8750 
8751 	header--;
8752 	NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
8753 		  isc_rwlocktype_write);
8754 	header->attributes &= ~RDATASET_ATTR_PREFETCH;
8755 	NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
8756 		  isc_rwlocktype_write);
8757 }
8758 
8759 /*
8760  * Rdataset Iterator Methods
8761  */
8762 
8763 static void
rdatasetiter_destroy(dns_rdatasetiter_t ** iteratorp)8764 rdatasetiter_destroy(dns_rdatasetiter_t **iteratorp) {
8765 	rbtdb_rdatasetiter_t *rbtiterator;
8766 
8767 	rbtiterator = (rbtdb_rdatasetiter_t *)(*iteratorp);
8768 
8769 	if (rbtiterator->common.version != NULL)
8770 		closeversion(rbtiterator->common.db,
8771 			     &rbtiterator->common.version, ISC_FALSE);
8772 	detachnode(rbtiterator->common.db, &rbtiterator->common.node);
8773 	isc_mem_put(rbtiterator->common.db->mctx, rbtiterator,
8774 		    sizeof(*rbtiterator));
8775 
8776 	*iteratorp = NULL;
8777 }
8778 
8779 static isc_result_t
rdatasetiter_first(dns_rdatasetiter_t * iterator)8780 rdatasetiter_first(dns_rdatasetiter_t *iterator) {
8781 	rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
8782 	dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
8783 	dns_rbtnode_t *rbtnode = rbtiterator->common.node;
8784 	rbtdb_version_t *rbtversion = rbtiterator->common.version;
8785 	rdatasetheader_t *header, *top_next;
8786 	rbtdb_serial_t serial;
8787 	isc_stdtime_t now;
8788 
8789 	if (IS_CACHE(rbtdb)) {
8790 		serial = 1;
8791 		now = rbtiterator->common.now;
8792 	} else {
8793 		serial = rbtversion->serial;
8794 		now = 0;
8795 	}
8796 
8797 	NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
8798 		  isc_rwlocktype_read);
8799 
8800 	for (header = rbtnode->data; header != NULL; header = top_next) {
8801 		top_next = header->next;
8802 		do {
8803 			if (header->serial <= serial && !IGNORE(header)) {
8804 				/*
8805 				 * Is this a "this rdataset doesn't exist"
8806 				 * record?  Or is it too old in the cache?
8807 				 *
8808 				 * Note: unlike everywhere else, we
8809 				 * check for now > header->rdh_ttl instead
8810 				 * of now >= header->rdh_ttl.  This allows
8811 				 * ANY and RRSIG queries for 0 TTL
8812 				 * rdatasets to work.
8813 				 */
8814 				if (NONEXISTENT(header) ||
8815 				    (now != 0 && now > header->rdh_ttl))
8816 					header = NULL;
8817 				break;
8818 			} else
8819 				header = header->down;
8820 		} while (header != NULL);
8821 		if (header != NULL)
8822 			break;
8823 	}
8824 
8825 	NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
8826 		    isc_rwlocktype_read);
8827 
8828 	rbtiterator->current = header;
8829 
8830 	if (header == NULL)
8831 		return (ISC_R_NOMORE);
8832 
8833 	return (ISC_R_SUCCESS);
8834 }
8835 
8836 static isc_result_t
rdatasetiter_next(dns_rdatasetiter_t * iterator)8837 rdatasetiter_next(dns_rdatasetiter_t *iterator) {
8838 	rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
8839 	dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
8840 	dns_rbtnode_t *rbtnode = rbtiterator->common.node;
8841 	rbtdb_version_t *rbtversion = rbtiterator->common.version;
8842 	rdatasetheader_t *header, *top_next;
8843 	rbtdb_serial_t serial;
8844 	isc_stdtime_t now;
8845 	rbtdb_rdatatype_t type, negtype;
8846 	dns_rdatatype_t rdtype, covers;
8847 
8848 	header = rbtiterator->current;
8849 	if (header == NULL)
8850 		return (ISC_R_NOMORE);
8851 
8852 	if (IS_CACHE(rbtdb)) {
8853 		serial = 1;
8854 		now = rbtiterator->common.now;
8855 	} else {
8856 		serial = rbtversion->serial;
8857 		now = 0;
8858 	}
8859 
8860 	NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
8861 		  isc_rwlocktype_read);
8862 
8863 	type = header->type;
8864 	rdtype = RBTDB_RDATATYPE_BASE(header->type);
8865 	if (NEGATIVE(header)) {
8866 		covers = RBTDB_RDATATYPE_EXT(header->type);
8867 		negtype = RBTDB_RDATATYPE_VALUE(covers, 0);
8868 	} else
8869 		negtype = RBTDB_RDATATYPE_VALUE(0, rdtype);
8870 	for (header = header->next; header != NULL; header = top_next) {
8871 		top_next = header->next;
8872 		/*
8873 		 * If not walking back up the down list.
8874 		 */
8875 		if (header->type != type && header->type != negtype) {
8876 			do {
8877 				if (header->serial <= serial &&
8878 				    !IGNORE(header)) {
8879 					/*
8880 					 * Is this a "this rdataset doesn't
8881 					 * exist" record?
8882 					 *
8883 					 * Note: unlike everywhere else, we
8884 					 * check for now > header->ttl instead
8885 					 * of now >= header->ttl.  This allows
8886 					 * ANY and RRSIG queries for 0 TTL
8887 					 * rdatasets to work.
8888 					 */
8889 					if ((header->attributes &
8890 					     RDATASET_ATTR_NONEXISTENT) != 0 ||
8891 					    (now != 0 && now > header->rdh_ttl))
8892 						header = NULL;
8893 					break;
8894 				} else
8895 					header = header->down;
8896 			} while (header != NULL);
8897 			if (header != NULL)
8898 				break;
8899 		}
8900 	}
8901 
8902 	NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
8903 		    isc_rwlocktype_read);
8904 
8905 	rbtiterator->current = header;
8906 
8907 	if (header == NULL)
8908 		return (ISC_R_NOMORE);
8909 
8910 	return (ISC_R_SUCCESS);
8911 }
8912 
8913 static void
rdatasetiter_current(dns_rdatasetiter_t * iterator,dns_rdataset_t * rdataset)8914 rdatasetiter_current(dns_rdatasetiter_t *iterator, dns_rdataset_t *rdataset) {
8915 	rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
8916 	dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
8917 	dns_rbtnode_t *rbtnode = rbtiterator->common.node;
8918 	rdatasetheader_t *header;
8919 
8920 	header = rbtiterator->current;
8921 	REQUIRE(header != NULL);
8922 
8923 	NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
8924 		  isc_rwlocktype_read);
8925 
8926 	bind_rdataset(rbtdb, rbtnode, header, rbtiterator->common.now,
8927 		      rdataset);
8928 
8929 	NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
8930 		    isc_rwlocktype_read);
8931 }
8932 
8933 
8934 /*
8935  * Database Iterator Methods
8936  */
8937 
8938 static inline void
reference_iter_node(rbtdb_dbiterator_t * rbtdbiter)8939 reference_iter_node(rbtdb_dbiterator_t *rbtdbiter) {
8940 	dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
8941 	dns_rbtnode_t *node = rbtdbiter->node;
8942 
8943 	if (node == NULL)
8944 		return;
8945 
8946 	INSIST(rbtdbiter->tree_locked != isc_rwlocktype_none);
8947 	reactivate_node(rbtdb, node, rbtdbiter->tree_locked);
8948 }
8949 
8950 static inline void
dereference_iter_node(rbtdb_dbiterator_t * rbtdbiter)8951 dereference_iter_node(rbtdb_dbiterator_t *rbtdbiter) {
8952 	dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
8953 	dns_rbtnode_t *node = rbtdbiter->node;
8954 	nodelock_t *lock;
8955 
8956 	if (node == NULL)
8957 		return;
8958 
8959 	lock = &rbtdb->node_locks[node->locknum].lock;
8960 	NODE_LOCK(lock, isc_rwlocktype_read);
8961 	decrement_reference(rbtdb, node, 0, isc_rwlocktype_read,
8962 			    rbtdbiter->tree_locked, ISC_FALSE);
8963 	NODE_UNLOCK(lock, isc_rwlocktype_read);
8964 
8965 	rbtdbiter->node = NULL;
8966 }
8967 
8968 static void
flush_deletions(rbtdb_dbiterator_t * rbtdbiter)8969 flush_deletions(rbtdb_dbiterator_t *rbtdbiter) {
8970 	dns_rbtnode_t *node;
8971 	dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
8972 	isc_boolean_t was_read_locked = ISC_FALSE;
8973 	nodelock_t *lock;
8974 	int i;
8975 
8976 	if (rbtdbiter->delete != 0) {
8977 		/*
8978 		 * Note that "%d node of %d in tree" can report things like
8979 		 * "flush_deletions: 59 nodes of 41 in tree".  This means
8980 		 * That some nodes appear on the deletions list more than
8981 		 * once.  Only the last occurence will actually be deleted.
8982 		 */
8983 		isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
8984 			      DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
8985 			      "flush_deletions: %d nodes of %d in tree",
8986 			      rbtdbiter->delete,
8987 			      dns_rbt_nodecount(rbtdb->tree));
8988 
8989 		if (rbtdbiter->tree_locked == isc_rwlocktype_read) {
8990 			RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
8991 			was_read_locked = ISC_TRUE;
8992 		}
8993 		RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
8994 		rbtdbiter->tree_locked = isc_rwlocktype_write;
8995 
8996 		for (i = 0; i < rbtdbiter->delete; i++) {
8997 			node = rbtdbiter->deletions[i];
8998 			lock = &rbtdb->node_locks[node->locknum].lock;
8999 
9000 			NODE_LOCK(lock, isc_rwlocktype_read);
9001 			decrement_reference(rbtdb, node, 0,
9002 					    isc_rwlocktype_read,
9003 					    rbtdbiter->tree_locked, ISC_FALSE);
9004 			NODE_UNLOCK(lock, isc_rwlocktype_read);
9005 		}
9006 
9007 		rbtdbiter->delete = 0;
9008 
9009 		RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
9010 		if (was_read_locked) {
9011 			RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
9012 			rbtdbiter->tree_locked = isc_rwlocktype_read;
9013 
9014 		} else {
9015 			rbtdbiter->tree_locked = isc_rwlocktype_none;
9016 		}
9017 	}
9018 }
9019 
9020 static inline void
resume_iteration(rbtdb_dbiterator_t * rbtdbiter)9021 resume_iteration(rbtdb_dbiterator_t *rbtdbiter) {
9022 	dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
9023 
9024 	REQUIRE(rbtdbiter->paused);
9025 	REQUIRE(rbtdbiter->tree_locked == isc_rwlocktype_none);
9026 
9027 	RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
9028 	rbtdbiter->tree_locked = isc_rwlocktype_read;
9029 
9030 	rbtdbiter->paused = ISC_FALSE;
9031 }
9032 
9033 static void
dbiterator_destroy(dns_dbiterator_t ** iteratorp)9034 dbiterator_destroy(dns_dbiterator_t **iteratorp) {
9035 	rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)(*iteratorp);
9036 	dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
9037 	dns_db_t *db = NULL;
9038 
9039 	if (rbtdbiter->tree_locked == isc_rwlocktype_read) {
9040 		RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
9041 		rbtdbiter->tree_locked = isc_rwlocktype_none;
9042 	} else
9043 		INSIST(rbtdbiter->tree_locked == isc_rwlocktype_none);
9044 
9045 	dereference_iter_node(rbtdbiter);
9046 
9047 	flush_deletions(rbtdbiter);
9048 
9049 	dns_db_attach(rbtdbiter->common.db, &db);
9050 	dns_db_detach(&rbtdbiter->common.db);
9051 
9052 	dns_rbtnodechain_reset(&rbtdbiter->chain);
9053 	dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
9054 	isc_mem_put(db->mctx, rbtdbiter, sizeof(*rbtdbiter));
9055 	dns_db_detach(&db);
9056 
9057 	*iteratorp = NULL;
9058 }
9059 
9060 static isc_result_t
dbiterator_first(dns_dbiterator_t * iterator)9061 dbiterator_first(dns_dbiterator_t *iterator) {
9062 	isc_result_t result;
9063 	rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
9064 	dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
9065 	dns_name_t *name, *origin;
9066 
9067 	if (rbtdbiter->result != ISC_R_SUCCESS &&
9068 	    rbtdbiter->result != ISC_R_NOMORE)
9069 		return (rbtdbiter->result);
9070 
9071 	if (rbtdbiter->paused)
9072 		resume_iteration(rbtdbiter);
9073 
9074 	dereference_iter_node(rbtdbiter);
9075 
9076 	name = dns_fixedname_name(&rbtdbiter->name);
9077 	origin = dns_fixedname_name(&rbtdbiter->origin);
9078 	dns_rbtnodechain_reset(&rbtdbiter->chain);
9079 	dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
9080 
9081 	if (rbtdbiter->nsec3only) {
9082 		rbtdbiter->current = &rbtdbiter->nsec3chain;
9083 		result = dns_rbtnodechain_first(rbtdbiter->current,
9084 						rbtdb->nsec3, name, origin);
9085 	} else {
9086 		rbtdbiter->current = &rbtdbiter->chain;
9087 		result = dns_rbtnodechain_first(rbtdbiter->current,
9088 						rbtdb->tree, name, origin);
9089 		if (!rbtdbiter->nonsec3 && result == ISC_R_NOTFOUND) {
9090 			rbtdbiter->current = &rbtdbiter->nsec3chain;
9091 			result = dns_rbtnodechain_first(rbtdbiter->current,
9092 							rbtdb->nsec3, name,
9093 							origin);
9094 		}
9095 	}
9096 	if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
9097 		result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
9098 						  NULL, &rbtdbiter->node);
9099 		if (result == ISC_R_SUCCESS) {
9100 			rbtdbiter->new_origin = ISC_TRUE;
9101 			reference_iter_node(rbtdbiter);
9102 		}
9103 	} else {
9104 		INSIST(result == ISC_R_NOTFOUND);
9105 		result = ISC_R_NOMORE; /* The tree is empty. */
9106 	}
9107 
9108 	rbtdbiter->result = result;
9109 
9110 	return (result);
9111 }
9112 
9113 static isc_result_t
dbiterator_last(dns_dbiterator_t * iterator)9114 dbiterator_last(dns_dbiterator_t *iterator) {
9115 	isc_result_t result;
9116 	rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
9117 	dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
9118 	dns_name_t *name, *origin;
9119 
9120 	if (rbtdbiter->result != ISC_R_SUCCESS &&
9121 	    rbtdbiter->result != ISC_R_NOMORE)
9122 		return (rbtdbiter->result);
9123 
9124 	if (rbtdbiter->paused)
9125 		resume_iteration(rbtdbiter);
9126 
9127 	dereference_iter_node(rbtdbiter);
9128 
9129 	name = dns_fixedname_name(&rbtdbiter->name);
9130 	origin = dns_fixedname_name(&rbtdbiter->origin);
9131 	dns_rbtnodechain_reset(&rbtdbiter->chain);
9132 	dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
9133 
9134 	result = ISC_R_NOTFOUND;
9135 	if (rbtdbiter->nsec3only && !rbtdbiter->nonsec3) {
9136 		rbtdbiter->current = &rbtdbiter->nsec3chain;
9137 		result = dns_rbtnodechain_last(rbtdbiter->current,
9138 					       rbtdb->nsec3, name, origin);
9139 	}
9140 	if (!rbtdbiter->nsec3only && result == ISC_R_NOTFOUND) {
9141 		rbtdbiter->current = &rbtdbiter->chain;
9142 		result = dns_rbtnodechain_last(rbtdbiter->current, rbtdb->tree,
9143 					       name, origin);
9144 	}
9145 	if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
9146 		result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
9147 						  NULL, &rbtdbiter->node);
9148 		if (result == ISC_R_SUCCESS) {
9149 			rbtdbiter->new_origin = ISC_TRUE;
9150 			reference_iter_node(rbtdbiter);
9151 		}
9152 	} else {
9153 		INSIST(result == ISC_R_NOTFOUND);
9154 		result = ISC_R_NOMORE; /* The tree is empty. */
9155 	}
9156 
9157 	rbtdbiter->result = result;
9158 
9159 	return (result);
9160 }
9161 
9162 static isc_result_t
dbiterator_seek(dns_dbiterator_t * iterator,dns_name_t * name)9163 dbiterator_seek(dns_dbiterator_t *iterator, dns_name_t *name) {
9164 	isc_result_t result, tresult;
9165 	rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
9166 	dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
9167 	dns_name_t *iname, *origin;
9168 
9169 	if (rbtdbiter->result != ISC_R_SUCCESS &&
9170 	    rbtdbiter->result != ISC_R_NOTFOUND &&
9171 	    rbtdbiter->result != ISC_R_NOMORE)
9172 		return (rbtdbiter->result);
9173 
9174 	if (rbtdbiter->paused)
9175 		resume_iteration(rbtdbiter);
9176 
9177 	dereference_iter_node(rbtdbiter);
9178 
9179 	iname = dns_fixedname_name(&rbtdbiter->name);
9180 	origin = dns_fixedname_name(&rbtdbiter->origin);
9181 	dns_rbtnodechain_reset(&rbtdbiter->chain);
9182 	dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
9183 
9184 	if (rbtdbiter->nsec3only) {
9185 		rbtdbiter->current = &rbtdbiter->nsec3chain;
9186 		result = dns_rbt_findnode(rbtdb->nsec3, name, NULL,
9187 					  &rbtdbiter->node,
9188 					  rbtdbiter->current,
9189 					  DNS_RBTFIND_EMPTYDATA, NULL, NULL);
9190 	} else if (rbtdbiter->nonsec3) {
9191 		rbtdbiter->current = &rbtdbiter->chain;
9192 		result = dns_rbt_findnode(rbtdb->tree, name, NULL,
9193 					  &rbtdbiter->node,
9194 					  rbtdbiter->current,
9195 					  DNS_RBTFIND_EMPTYDATA, NULL, NULL);
9196 	} else {
9197 		/*
9198 		 * Stay on main chain if not found on either chain.
9199 		 */
9200 		rbtdbiter->current = &rbtdbiter->chain;
9201 		result = dns_rbt_findnode(rbtdb->tree, name, NULL,
9202 					  &rbtdbiter->node,
9203 					  rbtdbiter->current,
9204 					  DNS_RBTFIND_EMPTYDATA, NULL, NULL);
9205 		if (result == DNS_R_PARTIALMATCH) {
9206 			dns_rbtnode_t *node = NULL;
9207 			tresult = dns_rbt_findnode(rbtdb->nsec3, name, NULL,
9208 						  &node, &rbtdbiter->nsec3chain,
9209 						  DNS_RBTFIND_EMPTYDATA,
9210 						  NULL, NULL);
9211 			if (tresult == ISC_R_SUCCESS) {
9212 				rbtdbiter->node = node;
9213 				rbtdbiter->current = &rbtdbiter->nsec3chain;
9214 				result = tresult;
9215 			}
9216 		}
9217 	}
9218 
9219 #if 1
9220 	if (result == ISC_R_SUCCESS) {
9221 		result = dns_rbtnodechain_current(rbtdbiter->current, iname,
9222 						  origin, NULL);
9223 		if (result == ISC_R_SUCCESS) {
9224 			rbtdbiter->new_origin = ISC_TRUE;
9225 			reference_iter_node(rbtdbiter);
9226 		}
9227 	} else if (result == DNS_R_PARTIALMATCH) {
9228 		result = ISC_R_NOTFOUND;
9229 		rbtdbiter->node = NULL;
9230 	}
9231 
9232 	rbtdbiter->result = result;
9233 #else
9234 	if (result == ISC_R_SUCCESS || result == DNS_R_PARTIALMATCH) {
9235 		isc_result_t tresult;
9236 		tresult = dns_rbtnodechain_current(rbtdbiter->current, iname,
9237 						   origin, NULL);
9238 		if (tresult == ISC_R_SUCCESS) {
9239 			rbtdbiter->new_origin = ISC_TRUE;
9240 			reference_iter_node(rbtdbiter);
9241 		} else {
9242 			result = tresult;
9243 			rbtdbiter->node = NULL;
9244 		}
9245 	} else
9246 		rbtdbiter->node = NULL;
9247 
9248 	rbtdbiter->result = (result == DNS_R_PARTIALMATCH) ?
9249 			    ISC_R_SUCCESS : result;
9250 #endif
9251 
9252 	return (result);
9253 }
9254 
9255 static isc_result_t
dbiterator_prev(dns_dbiterator_t * iterator)9256 dbiterator_prev(dns_dbiterator_t *iterator) {
9257 	isc_result_t result;
9258 	rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
9259 	dns_name_t *name, *origin;
9260 	dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
9261 
9262 	REQUIRE(rbtdbiter->node != NULL);
9263 
9264 	if (rbtdbiter->result != ISC_R_SUCCESS)
9265 		return (rbtdbiter->result);
9266 
9267 	if (rbtdbiter->paused)
9268 		resume_iteration(rbtdbiter);
9269 
9270 	name = dns_fixedname_name(&rbtdbiter->name);
9271 	origin = dns_fixedname_name(&rbtdbiter->origin);
9272 	result = dns_rbtnodechain_prev(rbtdbiter->current, name, origin);
9273 	if (result == ISC_R_NOMORE && !rbtdbiter->nsec3only &&
9274 	    !rbtdbiter->nonsec3 &&
9275 	    &rbtdbiter->nsec3chain == rbtdbiter->current) {
9276 		rbtdbiter->current = &rbtdbiter->chain;
9277 		dns_rbtnodechain_reset(rbtdbiter->current);
9278 		result = dns_rbtnodechain_last(rbtdbiter->current, rbtdb->tree,
9279 					       name, origin);
9280 		if (result == ISC_R_NOTFOUND)
9281 			result = ISC_R_NOMORE;
9282 	}
9283 
9284 	dereference_iter_node(rbtdbiter);
9285 
9286 	if (result == DNS_R_NEWORIGIN || result == ISC_R_SUCCESS) {
9287 		rbtdbiter->new_origin = ISC_TF(result == DNS_R_NEWORIGIN);
9288 		result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
9289 						  NULL, &rbtdbiter->node);
9290 	}
9291 
9292 	if (result == ISC_R_SUCCESS)
9293 		reference_iter_node(rbtdbiter);
9294 
9295 	rbtdbiter->result = result;
9296 
9297 	return (result);
9298 }
9299 
9300 static isc_result_t
dbiterator_next(dns_dbiterator_t * iterator)9301 dbiterator_next(dns_dbiterator_t *iterator) {
9302 	isc_result_t result;
9303 	rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
9304 	dns_name_t *name, *origin;
9305 	dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
9306 
9307 	REQUIRE(rbtdbiter->node != NULL);
9308 
9309 	if (rbtdbiter->result != ISC_R_SUCCESS)
9310 		return (rbtdbiter->result);
9311 
9312 	if (rbtdbiter->paused)
9313 		resume_iteration(rbtdbiter);
9314 
9315 	name = dns_fixedname_name(&rbtdbiter->name);
9316 	origin = dns_fixedname_name(&rbtdbiter->origin);
9317 	result = dns_rbtnodechain_next(rbtdbiter->current, name, origin);
9318 	if (result == ISC_R_NOMORE && !rbtdbiter->nsec3only &&
9319 	    !rbtdbiter->nonsec3 && &rbtdbiter->chain == rbtdbiter->current) {
9320 		rbtdbiter->current = &rbtdbiter->nsec3chain;
9321 		dns_rbtnodechain_reset(rbtdbiter->current);
9322 		result = dns_rbtnodechain_first(rbtdbiter->current,
9323 						rbtdb->nsec3, name, origin);
9324 		if (result == ISC_R_NOTFOUND)
9325 			result = ISC_R_NOMORE;
9326 	}
9327 
9328 	dereference_iter_node(rbtdbiter);
9329 
9330 	if (result == DNS_R_NEWORIGIN || result == ISC_R_SUCCESS) {
9331 		rbtdbiter->new_origin = ISC_TF(result == DNS_R_NEWORIGIN);
9332 		result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
9333 						  NULL, &rbtdbiter->node);
9334 	}
9335 	if (result == ISC_R_SUCCESS)
9336 		reference_iter_node(rbtdbiter);
9337 
9338 	rbtdbiter->result = result;
9339 
9340 	return (result);
9341 }
9342 
9343 static isc_result_t
dbiterator_current(dns_dbiterator_t * iterator,dns_dbnode_t ** nodep,dns_name_t * name)9344 dbiterator_current(dns_dbiterator_t *iterator, dns_dbnode_t **nodep,
9345 		   dns_name_t *name)
9346 {
9347 	dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
9348 	rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
9349 	dns_rbtnode_t *node = rbtdbiter->node;
9350 	isc_result_t result;
9351 	dns_name_t *nodename = dns_fixedname_name(&rbtdbiter->name);
9352 	dns_name_t *origin = dns_fixedname_name(&rbtdbiter->origin);
9353 
9354 	REQUIRE(rbtdbiter->result == ISC_R_SUCCESS);
9355 	REQUIRE(rbtdbiter->node != NULL);
9356 
9357 	if (rbtdbiter->paused)
9358 		resume_iteration(rbtdbiter);
9359 
9360 	if (name != NULL) {
9361 		if (rbtdbiter->common.relative_names)
9362 			origin = NULL;
9363 		result = dns_name_concatenate(nodename, origin, name, NULL);
9364 		if (result != ISC_R_SUCCESS)
9365 			return (result);
9366 		if (rbtdbiter->common.relative_names && rbtdbiter->new_origin)
9367 			result = DNS_R_NEWORIGIN;
9368 	} else
9369 		result = ISC_R_SUCCESS;
9370 
9371 	NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
9372 	new_reference(rbtdb, node);
9373 	NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
9374 
9375 	*nodep = rbtdbiter->node;
9376 
9377 	if (iterator->cleaning && result == ISC_R_SUCCESS) {
9378 		isc_result_t expire_result;
9379 
9380 		/*
9381 		 * If the deletion array is full, flush it before trying
9382 		 * to expire the current node.  The current node can't
9383 		 * fully deleted while the iteration cursor is still on it.
9384 		 */
9385 		if (rbtdbiter->delete == DELETION_BATCH_MAX)
9386 			flush_deletions(rbtdbiter);
9387 
9388 		expire_result = expirenode(iterator->db, *nodep, 0);
9389 
9390 		/*
9391 		 * expirenode() currently always returns success.
9392 		 */
9393 		if (expire_result == ISC_R_SUCCESS && node->down == NULL) {
9394 			unsigned int refs;
9395 
9396 			rbtdbiter->deletions[rbtdbiter->delete++] = node;
9397 			NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
9398 			dns_rbtnode_refincrement(node, &refs);
9399 			INSIST(refs != 0);
9400 			NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
9401 		}
9402 	}
9403 
9404 	return (result);
9405 }
9406 
9407 static isc_result_t
dbiterator_pause(dns_dbiterator_t * iterator)9408 dbiterator_pause(dns_dbiterator_t *iterator) {
9409 	dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
9410 	rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
9411 
9412 	if (rbtdbiter->result != ISC_R_SUCCESS &&
9413 	    rbtdbiter->result != ISC_R_NOMORE)
9414 		return (rbtdbiter->result);
9415 
9416 	if (rbtdbiter->paused)
9417 		return (ISC_R_SUCCESS);
9418 
9419 	rbtdbiter->paused = ISC_TRUE;
9420 
9421 	if (rbtdbiter->tree_locked != isc_rwlocktype_none) {
9422 		INSIST(rbtdbiter->tree_locked == isc_rwlocktype_read);
9423 		RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
9424 		rbtdbiter->tree_locked = isc_rwlocktype_none;
9425 	}
9426 
9427 	flush_deletions(rbtdbiter);
9428 
9429 	return (ISC_R_SUCCESS);
9430 }
9431 
9432 static isc_result_t
dbiterator_origin(dns_dbiterator_t * iterator,dns_name_t * name)9433 dbiterator_origin(dns_dbiterator_t *iterator, dns_name_t *name) {
9434 	rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
9435 	dns_name_t *origin = dns_fixedname_name(&rbtdbiter->origin);
9436 
9437 	if (rbtdbiter->result != ISC_R_SUCCESS)
9438 		return (rbtdbiter->result);
9439 
9440 	return (dns_name_copy(origin, name, NULL));
9441 }
9442 
9443 /*%
9444  * Additional cache routines.
9445  */
9446 static isc_result_t
rdataset_getadditional(dns_rdataset_t * rdataset,dns_rdatasetadditional_t type,dns_rdatatype_t qtype,dns_acache_t * acache,dns_zone_t ** zonep,dns_db_t ** dbp,dns_dbversion_t ** versionp,dns_dbnode_t ** nodep,dns_name_t * fname,dns_message_t * msg,isc_stdtime_t now)9447 rdataset_getadditional(dns_rdataset_t *rdataset, dns_rdatasetadditional_t type,
9448 		       dns_rdatatype_t qtype, dns_acache_t *acache,
9449 		       dns_zone_t **zonep, dns_db_t **dbp,
9450 		       dns_dbversion_t **versionp, dns_dbnode_t **nodep,
9451 		       dns_name_t *fname, dns_message_t *msg,
9452 		       isc_stdtime_t now)
9453 {
9454 	dns_rbtdb_t *rbtdb = rdataset->private1;
9455 	dns_rbtnode_t *rbtnode = rdataset->private2;
9456 	unsigned char *raw = rdataset->private3;        /* RDATASLAB */
9457 	unsigned int current_count = rdataset->privateuint4;
9458 	unsigned int count;
9459 	rdatasetheader_t *header;
9460 	nodelock_t *nodelock;
9461 	unsigned int total_count;
9462 	acachectl_t *acarray;
9463 	dns_acacheentry_t *entry;
9464 	isc_result_t result;
9465 
9466 	UNUSED(qtype); /* we do not use this value at least for now */
9467 	UNUSED(acache);
9468 
9469 	header = (struct rdatasetheader *)(raw - sizeof(*header));
9470 
9471 	total_count = raw[0] * 256 + raw[1];
9472 	INSIST(total_count > current_count);
9473 	count = total_count - current_count - 1;
9474 
9475 	acarray = NULL;
9476 
9477 	nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
9478 	NODE_LOCK(nodelock, isc_rwlocktype_read);
9479 
9480 	switch (type) {
9481 	case dns_rdatasetadditional_fromauth:
9482 		acarray = header->additional_auth;
9483 		break;
9484 	case dns_rdatasetadditional_fromcache:
9485 		acarray = NULL;
9486 		break;
9487 	case dns_rdatasetadditional_fromglue:
9488 		acarray = header->additional_glue;
9489 		break;
9490 	default:
9491 		INSIST(0);
9492 	}
9493 
9494 	if (acarray == NULL) {
9495 		if (type != dns_rdatasetadditional_fromcache)
9496 			dns_acache_countquerymiss(acache);
9497 		NODE_UNLOCK(nodelock, isc_rwlocktype_read);
9498 		return (ISC_R_NOTFOUND);
9499 	}
9500 
9501 	if (acarray[count].entry == NULL) {
9502 		dns_acache_countquerymiss(acache);
9503 		NODE_UNLOCK(nodelock, isc_rwlocktype_read);
9504 		return (ISC_R_NOTFOUND);
9505 	}
9506 
9507 	entry = NULL;
9508 	dns_acache_attachentry(acarray[count].entry, &entry);
9509 
9510 	NODE_UNLOCK(nodelock, isc_rwlocktype_read);
9511 
9512 	result = dns_acache_getentry(entry, zonep, dbp, versionp,
9513 				     nodep, fname, msg, now);
9514 
9515 	dns_acache_detachentry(&entry);
9516 
9517 	return (result);
9518 }
9519 
9520 static void
acache_callback(dns_acacheentry_t * entry,void ** arg)9521 acache_callback(dns_acacheentry_t *entry, void **arg) {
9522 	dns_rbtdb_t *rbtdb;
9523 	dns_rbtnode_t *rbtnode;
9524 	nodelock_t *nodelock;
9525 	acachectl_t *acarray = NULL;
9526 	acache_cbarg_t *cbarg;
9527 	unsigned int count;
9528 
9529 	REQUIRE(arg != NULL);
9530 	cbarg = *arg;
9531 
9532 	/*
9533 	 * The caller must hold the entry lock.
9534 	 */
9535 
9536 	rbtdb = (dns_rbtdb_t *)cbarg->db;
9537 	rbtnode = (dns_rbtnode_t *)cbarg->node;
9538 
9539 	nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
9540 	NODE_LOCK(nodelock, isc_rwlocktype_write);
9541 
9542 	switch (cbarg->type) {
9543 	case dns_rdatasetadditional_fromauth:
9544 		acarray = cbarg->header->additional_auth;
9545 		break;
9546 	case dns_rdatasetadditional_fromglue:
9547 		acarray = cbarg->header->additional_glue;
9548 		break;
9549 	default:
9550 		INSIST(0);
9551 	}
9552 
9553 	count = cbarg->count;
9554 	if (acarray != NULL && acarray[count].entry == entry) {
9555 		acarray[count].entry = NULL;
9556 		INSIST(acarray[count].cbarg == cbarg);
9557 		acarray[count].cbarg = NULL;
9558 		isc_mem_put(rbtdb->common.mctx, cbarg, sizeof(acache_cbarg_t));
9559 		dns_acache_detachentry(&entry);
9560 	}
9561 
9562 	NODE_UNLOCK(nodelock, isc_rwlocktype_write);
9563 
9564 	dns_db_detachnode((dns_db_t *)rbtdb, (dns_dbnode_t **)(void*)&rbtnode);
9565 	dns_db_detach((dns_db_t **)(void*)&rbtdb);
9566 
9567 	*arg = NULL;
9568 }
9569 
9570 static void
acache_cancelentry(isc_mem_t * mctx,dns_acacheentry_t * entry,acache_cbarg_t ** cbargp)9571 acache_cancelentry(isc_mem_t *mctx, dns_acacheentry_t *entry,
9572 		      acache_cbarg_t **cbargp)
9573 {
9574 	acache_cbarg_t *cbarg;
9575 
9576 	REQUIRE(mctx != NULL);
9577 	REQUIRE(entry != NULL);
9578 	REQUIRE(cbargp != NULL && *cbargp != NULL);
9579 
9580 	cbarg = *cbargp;
9581 
9582 	if (dns_acache_cancelentry(entry)) {
9583 		dns_db_detachnode(cbarg->db, &cbarg->node);
9584 		dns_db_detach(&cbarg->db);
9585 	}
9586 
9587 	isc_mem_put(mctx, cbarg, sizeof(acache_cbarg_t));
9588 
9589 	*cbargp = NULL;
9590 }
9591 
9592 static isc_result_t
rdataset_setadditional(dns_rdataset_t * rdataset,dns_rdatasetadditional_t type,dns_rdatatype_t qtype,dns_acache_t * acache,dns_zone_t * zone,dns_db_t * db,dns_dbversion_t * version,dns_dbnode_t * node,dns_name_t * fname)9593 rdataset_setadditional(dns_rdataset_t *rdataset, dns_rdatasetadditional_t type,
9594 		       dns_rdatatype_t qtype, dns_acache_t *acache,
9595 		       dns_zone_t *zone, dns_db_t *db,
9596 		       dns_dbversion_t *version, dns_dbnode_t *node,
9597 		       dns_name_t *fname)
9598 {
9599 	dns_rbtdb_t *rbtdb = rdataset->private1;
9600 	dns_rbtnode_t *rbtnode = rdataset->private2;
9601 	unsigned char *raw = rdataset->private3;        /* RDATASLAB */
9602 	unsigned int current_count = rdataset->privateuint4;
9603 	rdatasetheader_t *header;
9604 	unsigned int total_count, count;
9605 	nodelock_t *nodelock;
9606 	isc_result_t result;
9607 	acachectl_t *acarray;
9608 	dns_acacheentry_t *newentry, *oldentry = NULL;
9609 	acache_cbarg_t *newcbarg, *oldcbarg = NULL;
9610 
9611 	UNUSED(qtype);
9612 
9613 	if (type == dns_rdatasetadditional_fromcache)
9614 		return (ISC_R_SUCCESS);
9615 
9616 	header = (struct rdatasetheader *)(raw - sizeof(*header));
9617 
9618 	total_count = raw[0] * 256 + raw[1];
9619 	INSIST(total_count > current_count);
9620 	count = total_count - current_count - 1; /* should be private data */
9621 
9622 	newcbarg = isc_mem_get(rbtdb->common.mctx, sizeof(*newcbarg));
9623 	if (newcbarg == NULL)
9624 		return (ISC_R_NOMEMORY);
9625 	newcbarg->type = type;
9626 	newcbarg->count = count;
9627 	newcbarg->header = header;
9628 	newcbarg->db = NULL;
9629 	dns_db_attach((dns_db_t *)rbtdb, &newcbarg->db);
9630 	newcbarg->node = NULL;
9631 	dns_db_attachnode((dns_db_t *)rbtdb, (dns_dbnode_t *)rbtnode,
9632 			  &newcbarg->node);
9633 	newentry = NULL;
9634 	result = dns_acache_createentry(acache, (dns_db_t *)rbtdb,
9635 					acache_callback, newcbarg, &newentry);
9636 	if (result != ISC_R_SUCCESS)
9637 		goto fail;
9638 
9639 	/* Set cache data in the new entry. */
9640 	result = dns_acache_setentry(acache, newentry, zone, db,
9641 				     version, node, fname);
9642 	if (result != ISC_R_SUCCESS)
9643 		goto fail;
9644 
9645 	nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
9646 	NODE_LOCK(nodelock, isc_rwlocktype_write);
9647 
9648 	acarray = NULL;
9649 	switch (type) {
9650 	case dns_rdatasetadditional_fromauth:
9651 		acarray = header->additional_auth;
9652 		break;
9653 	case dns_rdatasetadditional_fromglue:
9654 		acarray = header->additional_glue;
9655 		break;
9656 	default:
9657 		INSIST(0);
9658 	}
9659 
9660 	if (acarray == NULL) {
9661 		unsigned int i;
9662 
9663 		acarray = isc_mem_get(rbtdb->common.mctx, total_count *
9664 				      sizeof(acachectl_t));
9665 
9666 		if (acarray == NULL) {
9667 			NODE_UNLOCK(nodelock, isc_rwlocktype_write);
9668 			goto fail;
9669 		}
9670 
9671 		for (i = 0; i < total_count; i++) {
9672 			acarray[i].entry = NULL;
9673 			acarray[i].cbarg = NULL;
9674 		}
9675 	}
9676 	switch (type) {
9677 	case dns_rdatasetadditional_fromauth:
9678 		header->additional_auth = acarray;
9679 		break;
9680 	case dns_rdatasetadditional_fromglue:
9681 		header->additional_glue = acarray;
9682 		break;
9683 	default:
9684 		INSIST(0);
9685 	}
9686 
9687 	if (acarray[count].entry != NULL) {
9688 		/*
9689 		 * Swap the entry.  Delay cleaning-up the old entry since
9690 		 * it would require a node lock.
9691 		 */
9692 		oldentry = acarray[count].entry;
9693 		INSIST(acarray[count].cbarg != NULL);
9694 		oldcbarg = acarray[count].cbarg;
9695 	}
9696 	acarray[count].entry = newentry;
9697 	acarray[count].cbarg = newcbarg;
9698 
9699 	NODE_UNLOCK(nodelock, isc_rwlocktype_write);
9700 
9701 	if (oldentry != NULL) {
9702 		acache_cancelentry(rbtdb->common.mctx, oldentry, &oldcbarg);
9703 		dns_acache_detachentry(&oldentry);
9704 	}
9705 
9706 	return (ISC_R_SUCCESS);
9707 
9708  fail:
9709 	if (newcbarg != NULL) {
9710 		if (newentry != NULL) {
9711 			acache_cancelentry(rbtdb->common.mctx, newentry,
9712 					   &newcbarg);
9713 			dns_acache_detachentry(&newentry);
9714 		} else {
9715 			dns_db_detachnode((dns_db_t *)rbtdb, &newcbarg->node);
9716 			dns_db_detach(&newcbarg->db);
9717 			isc_mem_put(rbtdb->common.mctx, newcbarg,
9718 			    sizeof(*newcbarg));
9719 		}
9720 	}
9721 
9722 	return (result);
9723 }
9724 
9725 static isc_result_t
rdataset_putadditional(dns_acache_t * acache,dns_rdataset_t * rdataset,dns_rdatasetadditional_t type,dns_rdatatype_t qtype)9726 rdataset_putadditional(dns_acache_t *acache, dns_rdataset_t *rdataset,
9727 		       dns_rdatasetadditional_t type, dns_rdatatype_t qtype)
9728 {
9729 	dns_rbtdb_t *rbtdb = rdataset->private1;
9730 	dns_rbtnode_t *rbtnode = rdataset->private2;
9731 	unsigned char *raw = rdataset->private3;        /* RDATASLAB */
9732 	unsigned int current_count = rdataset->privateuint4;
9733 	rdatasetheader_t *header;
9734 	nodelock_t *nodelock;
9735 	unsigned int total_count, count;
9736 	acachectl_t *acarray;
9737 	dns_acacheentry_t *entry;
9738 	acache_cbarg_t *cbarg;
9739 
9740 	UNUSED(qtype);          /* we do not use this value at least for now */
9741 	UNUSED(acache);
9742 
9743 	if (type == dns_rdatasetadditional_fromcache)
9744 		return (ISC_R_SUCCESS);
9745 
9746 	header = (struct rdatasetheader *)(raw - sizeof(*header));
9747 
9748 	total_count = raw[0] * 256 + raw[1];
9749 	INSIST(total_count > current_count);
9750 	count = total_count - current_count - 1;
9751 
9752 	acarray = NULL;
9753 	entry = NULL;
9754 
9755 	nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
9756 	NODE_LOCK(nodelock, isc_rwlocktype_write);
9757 
9758 	switch (type) {
9759 	case dns_rdatasetadditional_fromauth:
9760 		acarray = header->additional_auth;
9761 		break;
9762 	case dns_rdatasetadditional_fromglue:
9763 		acarray = header->additional_glue;
9764 		break;
9765 	default:
9766 		INSIST(0);
9767 	}
9768 
9769 	if (acarray == NULL) {
9770 		NODE_UNLOCK(nodelock, isc_rwlocktype_write);
9771 		return (ISC_R_NOTFOUND);
9772 	}
9773 
9774 	entry = acarray[count].entry;
9775 	if (entry == NULL) {
9776 		NODE_UNLOCK(nodelock, isc_rwlocktype_write);
9777 		return (ISC_R_NOTFOUND);
9778 	}
9779 
9780 	acarray[count].entry = NULL;
9781 	cbarg = acarray[count].cbarg;
9782 	acarray[count].cbarg = NULL;
9783 
9784 	NODE_UNLOCK(nodelock, isc_rwlocktype_write);
9785 
9786 	if (entry != NULL) {
9787 		if (cbarg != NULL)
9788 			acache_cancelentry(rbtdb->common.mctx, entry, &cbarg);
9789 		dns_acache_detachentry(&entry);
9790 	}
9791 
9792 	return (ISC_R_SUCCESS);
9793 }
9794 
9795 /*%
9796  * Routines for LRU-based cache management.
9797  */
9798 
9799 /*%
9800  * See if a given cache entry that is being reused needs to be updated
9801  * in the LRU-list.  From the LRU management point of view, this function is
9802  * expected to return true for almost all cases.  When used with threads,
9803  * however, this may cause a non-negligible performance penalty because a
9804  * writer lock will have to be acquired before updating the list.
9805  * If DNS_RBTDB_LIMITLRUUPDATE is defined to be non 0 at compilation time, this
9806  * function returns true if the entry has not been updated for some period of
9807  * time.  We differentiate the NS or glue address case and the others since
9808  * experiments have shown that the former tends to be accessed relatively
9809  * infrequently and the cost of cache miss is higher (e.g., a missing NS records
9810  * may cause external queries at a higher level zone, involving more
9811  * transactions).
9812  *
9813  * Caller must hold the node (read or write) lock.
9814  */
9815 static inline isc_boolean_t
need_headerupdate(rdatasetheader_t * header,isc_stdtime_t now)9816 need_headerupdate(rdatasetheader_t *header, isc_stdtime_t now) {
9817 	if ((header->attributes &
9818 	     (RDATASET_ATTR_NONEXISTENT|RDATASET_ATTR_STALE)) != 0)
9819 		return (ISC_FALSE);
9820 
9821 #if DNS_RBTDB_LIMITLRUUPDATE
9822 	if (header->type == dns_rdatatype_ns ||
9823 	    (header->trust == dns_trust_glue &&
9824 	     (header->type == dns_rdatatype_a ||
9825 	      header->type == dns_rdatatype_aaaa))) {
9826 		/*
9827 		 * Glue records are updated if at least 60 seconds have passed
9828 		 * since the previous update time.
9829 		 */
9830 		return (header->last_used + 60 <= now);
9831 	}
9832 
9833 	/* Other records are updated if 5 minutes have passed. */
9834 	return (header->last_used + 300 <= now);
9835 #else
9836 	UNUSED(now);
9837 
9838 	return (ISC_TRUE);
9839 #endif
9840 }
9841 
9842 /*%
9843  * Update the timestamp of a given cache entry and move it to the head
9844  * of the corresponding LRU list.
9845  *
9846  * Caller must hold the node (write) lock.
9847  *
9848  * Note that the we do NOT touch the heap here, as the TTL has not changed.
9849  */
9850 static void
update_header(dns_rbtdb_t * rbtdb,rdatasetheader_t * header,isc_stdtime_t now)9851 update_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
9852 	      isc_stdtime_t now)
9853 {
9854 	INSIST(IS_CACHE(rbtdb));
9855 
9856 	/* To be checked: can we really assume this? XXXMLG */
9857 	INSIST(ISC_LINK_LINKED(header, link));
9858 
9859 	ISC_LIST_UNLINK(rbtdb->rdatasets[header->node->locknum], header, link);
9860 	header->last_used = now;
9861 	ISC_LIST_PREPEND(rbtdb->rdatasets[header->node->locknum], header, link);
9862 }
9863 
9864 /*%
9865  * Purge some expired and/or stale (i.e. unused for some period) cache entries
9866  * under an overmem condition.  To recover from this condition quickly, up to
9867  * 2 entries will be purged.  This process is triggered while adding a new
9868  * entry, and we specifically avoid purging entries in the same LRU bucket as
9869  * the one to which the new entry will belong.  Otherwise, we might purge
9870  * entries of the same name of different RR types while adding RRsets from a
9871  * single response (consider the case where we're adding A and AAAA glue records
9872  * of the same NS name).
9873  */
9874 static void
overmem_purge(dns_rbtdb_t * rbtdb,unsigned int locknum_start,isc_stdtime_t now,isc_boolean_t tree_locked)9875 overmem_purge(dns_rbtdb_t *rbtdb, unsigned int locknum_start,
9876 	      isc_stdtime_t now, isc_boolean_t tree_locked)
9877 {
9878 	rdatasetheader_t *header, *header_prev;
9879 	unsigned int locknum;
9880 	int purgecount = 2;
9881 
9882 	for (locknum = (locknum_start + 1) % rbtdb->node_lock_count;
9883 	     locknum != locknum_start && purgecount > 0;
9884 	     locknum = (locknum + 1) % rbtdb->node_lock_count) {
9885 		NODE_LOCK(&rbtdb->node_locks[locknum].lock,
9886 			  isc_rwlocktype_write);
9887 
9888 		header = isc_heap_element(rbtdb->heaps[locknum], 1);
9889 		if (header && header->rdh_ttl < now - RBTDB_VIRTUAL) {
9890 			expire_header(rbtdb, header, tree_locked,
9891 				      expire_ttl);
9892 			purgecount--;
9893 		}
9894 
9895 		for (header = ISC_LIST_TAIL(rbtdb->rdatasets[locknum]);
9896 		     header != NULL && purgecount > 0;
9897 		     header = header_prev) {
9898 			header_prev = ISC_LIST_PREV(header, link);
9899 			/*
9900 			 * Unlink the entry at this point to avoid checking it
9901 			 * again even if it's currently used someone else and
9902 			 * cannot be purged at this moment.  This entry won't be
9903 			 * referenced any more (so unlinking is safe) since the
9904 			 * TTL was reset to 0.
9905 			 */
9906 			ISC_LIST_UNLINK(rbtdb->rdatasets[locknum], header,
9907 					link);
9908 			expire_header(rbtdb, header, tree_locked,
9909 				      expire_lru);
9910 			purgecount--;
9911 		}
9912 
9913 		NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
9914 				    isc_rwlocktype_write);
9915 	}
9916 }
9917 
9918 static void
expire_header(dns_rbtdb_t * rbtdb,rdatasetheader_t * header,isc_boolean_t tree_locked,expire_t reason)9919 expire_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
9920 	      isc_boolean_t tree_locked, expire_t reason)
9921 {
9922 	set_ttl(rbtdb, header, 0);
9923 	mark_stale_header(rbtdb, header);
9924 
9925 	/*
9926 	 * Caller must hold the node (write) lock.
9927 	 */
9928 
9929 	if (dns_rbtnode_refcurrent(header->node) == 0) {
9930 		/*
9931 		 * If no one else is using the node, we can clean it up now.
9932 		 * We first need to gain a new reference to the node to meet a
9933 		 * requirement of decrement_reference().
9934 		 */
9935 		new_reference(rbtdb, header->node);
9936 		decrement_reference(rbtdb, header->node, 0,
9937 				    isc_rwlocktype_write,
9938 				    tree_locked ? isc_rwlocktype_write :
9939 				    isc_rwlocktype_none, ISC_FALSE);
9940 
9941 		if (rbtdb->cachestats == NULL)
9942 			return;
9943 
9944 		switch (reason) {
9945 		case expire_ttl:
9946 			isc_stats_increment(rbtdb->cachestats,
9947 					    dns_cachestatscounter_deletettl);
9948 			break;
9949 		case expire_lru:
9950 			isc_stats_increment(rbtdb->cachestats,
9951 					    dns_cachestatscounter_deletelru);
9952 			break;
9953 		default:
9954 			break;
9955 		}
9956 
9957 	}
9958 }
9959