1 /* $NetBSD: rbtdb.c,v 1.20 2015/07/08 17:28:59 christos Exp $ */
2
3 /*
4 * Copyright (C) 2004-2015 Internet Systems Consortium, Inc. ("ISC")
5 * Copyright (C) 1999-2003 Internet Software Consortium.
6 *
7 * Permission to use, copy, modify, and/or distribute this software for any
8 * purpose with or without fee is hereby granted, provided that the above
9 * copyright notice and this permission notice appear in all copies.
10 *
11 * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
12 * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
13 * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
14 * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
15 * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
16 * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
17 * PERFORMANCE OF THIS SOFTWARE.
18 */
19
20 /*! \file */
21
22 /*
23 * Principal Author: Bob Halley
24 */
25
26 #include <config.h>
27
28 /* #define inline */
29
30 #ifdef HAVE_INTTYPES_H
31 #include <inttypes.h> /* uintptr_t */
32 #endif
33
34 #include <isc/crc64.h>
35 #include <isc/event.h>
36 #include <isc/heap.h>
37 #include <isc/file.h>
38 #include <isc/hex.h>
39 #include <isc/mem.h>
40 #include <isc/mutex.h>
41 #include <isc/once.h>
42 #include <isc/platform.h>
43 #include <isc/print.h>
44 #include <isc/random.h>
45 #include <isc/refcount.h>
46 #include <isc/rwlock.h>
47 #include <isc/serial.h>
48 #include <isc/socket.h>
49 #include <isc/stdio.h>
50 #include <isc/string.h>
51 #include <isc/task.h>
52 #include <isc/time.h>
53 #include <isc/util.h>
54
55 #include <dns/acache.h>
56 #include <dns/callbacks.h>
57 #include <dns/db.h>
58 #include <dns/dbiterator.h>
59 #include <dns/events.h>
60 #include <dns/fixedname.h>
61 #include <dns/lib.h>
62 #include <dns/log.h>
63 #include <dns/masterdump.h>
64 #include <dns/nsec.h>
65 #include <dns/nsec3.h>
66 #include <dns/rbt.h>
67 #include <dns/rpz.h>
68 #include <dns/rdata.h>
69 #include <dns/rdataset.h>
70 #include <dns/rdatasetiter.h>
71 #include <dns/rdataslab.h>
72 #include <dns/rdatastruct.h>
73 #include <dns/result.h>
74 #include <dns/stats.h>
75 #include <dns/version.h>
76 #include <dns/view.h>
77 #include <dns/zone.h>
78 #include <dns/zonekey.h>
79
80 #ifndef WIN32
81 #include <sys/mman.h>
82 #else
83 #define PROT_READ 0x01
84 #define PROT_WRITE 0x02
85 #define MAP_PRIVATE 0x0002
86 #define MAP_FAILED ((void *)-1)
87 #endif
88
89 #ifdef DNS_RBTDB_VERSION64
90 #include "rbtdb64.h"
91 #else
92 #include "rbtdb.h"
93 #endif
94
95 #ifdef DNS_RBTDB_VERSION64
96 #define RBTDB_MAGIC ISC_MAGIC('R', 'B', 'D', '8')
97 #else
98 #define RBTDB_MAGIC ISC_MAGIC('R', 'B', 'D', '4')
99 #endif
100
101 #define CHECK(op) \
102 do { result = (op); \
103 if (result != ISC_R_SUCCESS) goto failure; \
104 } while (/*CONSTCOND*/0)
105
106 /*
107 * This is the map file header for RBTDB images. It is populated, and then
108 * written, as the LAST thing done to the file. Writing this last (with
109 * zeros in the header area initially) will ensure that the header is only
110 * valid when the RBTDB image is also valid.
111 */
112 typedef struct rbtdb_file_header rbtdb_file_header_t;
113
114 /* Header length, always the same size regardless of structure size */
115 #define RBTDB_HEADER_LENGTH 1024
116
117 struct rbtdb_file_header {
118 char version1[32];
119 isc_uint32_t ptrsize;
120 unsigned int bigendian:1;
121 isc_uint64_t tree;
122 isc_uint64_t nsec;
123 isc_uint64_t nsec3;
124
125 char version2[32]; /* repeated; must match version1 */
126 };
127
128
129 /*%
130 * Note that "impmagic" is not the first four bytes of the struct, so
131 * ISC_MAGIC_VALID cannot be used.
132 */
133 #define VALID_RBTDB(rbtdb) ((rbtdb) != NULL && \
134 (rbtdb)->common.impmagic == RBTDB_MAGIC)
135
136 #ifdef DNS_RBTDB_VERSION64
137 typedef isc_uint64_t rbtdb_serial_t;
138 /*%
139 * Make casting easier in symbolic debuggers by using different names
140 * for the 64 bit version.
141 */
142 #define dns_rbtdb_t dns_rbtdb64_t
143 #define rdatasetheader_t rdatasetheader64_t
144 #define rbtdb_version_t rbtdb_version64_t
145
146 #define once once64
147 #define FILE_VERSION FILE_VERSION64
148 #define init_count init_count64
149
150 #define cache_methods cache_methods64
151 #define dbiterator_methods dbiterator_methods64
152 #define rdataset_methods rdataset_methods64
153 #define rdatasetiter_methods rdatasetiter_methods64
154 #define zone_methods zone_methods64
155
156 #define acache_callback acache_callback64
157 #define acache_cancelentry acache_cancelentry64
158 #define activeempty activeempty64
159 #define activeemtpynode activeemtpynode64
160 #define add32 add64
161 #define add_changed add_changed64
162 #define add_empty_wildcards add_empty_wildcards64
163 #define add_wildcard_magic add_wildcard_magic64
164 #define addrdataset addrdataset64
165 #define allrdatasets allrdatasets64
166 #define attach attach64
167 #define attachnode attachnode64
168 #define attachversion attachversion64
169 #define beginload beginload64
170 #define bind_rdataset bind_rdataset64
171 #define cache_find cache_find64
172 #define cache_findrdataset cache_findrdataset64
173 #define cache_findzonecut cache_findzonecut64
174 #define cache_zonecut_callback cache_zonecut_callback64
175 #define cleanup_dead_nodes cleanup_dead_nodes64
176 #define cleanup_dead_nodes_callback cleanup_dead_nodes_callback64
177 #define closeversion closeversion64
178 #define createiterator createiterator64
179 #define currentversion currentversion64
180 #define dbiterator_current dbiterator_current64
181 #define dbiterator_destroy dbiterator_destroy64
182 #define dbiterator_first dbiterator_first64
183 #define dbiterator_last dbiterator_last64
184 #define dbiterator_next dbiterator_next64
185 #define dbiterator_origin dbiterator_origin64
186 #define dbiterator_pause dbiterator_pause64
187 #define dbiterator_prev dbiterator_prev64
188 #define dbiterator_seek dbiterator_seek64
189 #define decrement_reference decrement_reference64
190 #define delete_callback delete_callback64
191 #define delete_node delete_node64
192 #define deleterdataset deleterdataset64
193 #define deserialize32 deserialize64
194 #define detach detach64
195 #define detachnode detachnode64
196 #define dump dump64
197 #define endload endload64
198 #define expire_header expire_header64
199 #define expirenode expirenode64
200 #define find_closest_nsec find_closest_nsec64
201 #define find_coveringnsec find_coveringnsec64
202 #define find_deepest_zonecut find_deepest_zonecut64
203 #define findnode findnode64
204 #define findnodeintree findnodeintree64
205 #define findnsec3node findnsec3node64
206 #define flush_deletions flush_deletions64
207 #define free_acachearray free_acachearray64
208 #define free_noqname free_noqname64
209 #define free_rbtdb free_rbtdb64
210 #define free_rbtdb_callback free_rbtdb_callback64
211 #define free_rdataset free_rdataset64
212 #define getnsec3parameters getnsec3parameters64
213 #define getoriginnode getoriginnode64
214 #define getrrsetstats getrrsetstats64
215 #define getsigningtime getsigningtime64
216 #define hashsize hashsize64
217 #define init_file_version init_file_version64
218 #define isdnssec isdnssec64
219 #define ispersistent ispersistent64
220 #define issecure issecure64
221 #define iszonesecure iszonesecure64
222 #define loading_addrdataset loading_addrdataset64
223 #define loadnode loadnode64
224 #define matchparams matchparams64
225 #define maybe_free_rbtdb maybe_free_rbtdb64
226 #define new_reference new_reference64
227 #define newversion newversion64
228 #define nodecount nodecount64
229 #define overmem overmem64
230 #define previous_closest_nsec previous_closest_nsec64
231 #define printnode printnode64
232 #define prune_tree prune_tree64
233 #define rbt_datafixer rbt_datafixer64
234 #define rbt_datawriter rbt_datawriter64
235 #define rdataset_clearprefetch rdataset_clearprefetch64
236 #define rdataset_clone rdataset_clone64
237 #define rdataset_count rdataset_count64
238 #define rdataset_current rdataset_current64
239 #define rdataset_disassociate rdataset_disassociate64
240 #define rdataset_expire rdataset_expire64
241 #define rdataset_first rdataset_first64
242 #define rdataset_getadditional rdataset_getadditional64
243 #define rdataset_getclosest rdataset_getclosest64
244 #define rdataset_getnoqname rdataset_getnoqname64
245 #define rdataset_next rdataset_next64
246 #define rdataset_putadditional rdataset_putadditional64
247 #define rdataset_setadditional rdataset_setadditional64
248 #define rdataset_settrust rdataset_settrust64
249 #define rdatasetiter_current rdatasetiter_current64
250 #define rdatasetiter_destroy rdatasetiter_destroy64
251 #define rdatasetiter_first rdatasetiter_first64
252 #define rdatasetiter_next rdatasetiter_next64
253 #define reactivate_node reactivate_node64
254 #define resign_delete resign_delete64
255 #define resign_insert resign_insert64
256 #define resign_sooner resign_sooner64
257 #define resigned resigned64
258 #define rpz_attach rpz_attach64
259 #define rpz_ready rpz_ready64
260 #define serialize serialize64
261 #define set_index set_index64
262 #define set_ttl set_ttl64
263 #define setcachestats setcachestats64
264 #define setsigningtime setsigningtime64
265 #define settask settask64
266 #define setup_delegation setup_delegation64
267 #define subtractrdataset subtractrdataset64
268 #define ttl_sooner ttl_sooner64
269 #define update_cachestats update_cachestats64
270 #define update_header update_header64
271 #define update_newheader update_newheader64
272 #define update_rrsetstats update_rrsetstats64
273 #define zone_find zone_find64
274 #define zone_findrdataset zone_findrdataset64
275 #define zone_findzonecut zone_findzonecut64
276 #define zone_zonecut_callback zone_zonecut_callback64
277
278 #else
279 typedef isc_uint32_t rbtdb_serial_t;
280 #endif
281
282 typedef isc_uint32_t rbtdb_rdatatype_t;
283
284 #define RBTDB_RDATATYPE_BASE(type) ((dns_rdatatype_t)((type) & 0xFFFF))
285 #define RBTDB_RDATATYPE_EXT(type) ((dns_rdatatype_t)((type) >> 16))
286 #define RBTDB_RDATATYPE_VALUE(b, e) ((rbtdb_rdatatype_t)((e) << 16) | (b))
287
288 #define RBTDB_RDATATYPE_SIGNSEC \
289 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_nsec)
290 #define RBTDB_RDATATYPE_SIGNSEC3 \
291 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_nsec3)
292 #define RBTDB_RDATATYPE_SIGNS \
293 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_ns)
294 #define RBTDB_RDATATYPE_SIGCNAME \
295 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_cname)
296 #define RBTDB_RDATATYPE_SIGDNAME \
297 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_dname)
298 #define RBTDB_RDATATYPE_SIGDDS \
299 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_ds)
300 #define RBTDB_RDATATYPE_NCACHEANY \
301 RBTDB_RDATATYPE_VALUE(0, dns_rdatatype_any)
302
303 /*
304 * We use rwlock for DB lock only when ISC_RWLOCK_USEATOMIC is non 0.
305 * Using rwlock is effective with regard to lookup performance only when
306 * it is implemented in an efficient way.
307 * Otherwise, it is generally wise to stick to the simple locking since rwlock
308 * would require more memory or can even make lookups slower due to its own
309 * overhead (when it internally calls mutex locks).
310 */
311 #ifdef ISC_RWLOCK_USEATOMIC
312 #define DNS_RBTDB_USERWLOCK 1
313 #else
314 #define DNS_RBTDB_USERWLOCK 0
315 #endif
316
317 #if DNS_RBTDB_USERWLOCK
318 #define RBTDB_INITLOCK(l) isc_rwlock_init((l), 0, 0)
319 #define RBTDB_DESTROYLOCK(l) isc_rwlock_destroy(l)
320 #define RBTDB_LOCK(l, t) RWLOCK((l), (t))
321 #define RBTDB_UNLOCK(l, t) RWUNLOCK((l), (t))
322 #else
323 #define RBTDB_INITLOCK(l) isc_mutex_init(l)
324 #define RBTDB_DESTROYLOCK(l) DESTROYLOCK(l)
325 #define RBTDB_LOCK(l, t) LOCK(l)
326 #define RBTDB_UNLOCK(l, t) UNLOCK(l)
327 #endif
328
329 /*
330 * Since node locking is sensitive to both performance and memory footprint,
331 * we need some trick here. If we have both high-performance rwlock and
332 * high performance and small-memory reference counters, we use rwlock for
333 * node lock and isc_refcount for node references. In this case, we don't have
334 * to protect the access to the counters by locks.
335 * Otherwise, we simply use ordinary mutex lock for node locking, and use
336 * simple integers as reference counters which is protected by the lock.
337 * In most cases, we can simply use wrapper macros such as NODE_LOCK and
338 * NODE_UNLOCK. In some other cases, however, we need to protect reference
339 * counters first and then protect other parts of a node as read-only data.
340 * Special additional macros, NODE_STRONGLOCK(), NODE_WEAKLOCK(), etc, are also
341 * provided for these special cases. When we can use the efficient backend
342 * routines, we should only protect the "other members" by NODE_WEAKLOCK(read).
343 * Otherwise, we should use NODE_STRONGLOCK() to protect the entire critical
344 * section including the access to the reference counter.
345 * Note that we cannot use NODE_LOCK()/NODE_UNLOCK() wherever the protected
346 * section is also protected by NODE_STRONGLOCK().
347 */
348 #if defined(ISC_RWLOCK_USEATOMIC) && defined(DNS_RBT_USEISCREFCOUNT)
349 typedef isc_rwlock_t nodelock_t;
350
351 #define NODE_INITLOCK(l) isc_rwlock_init((l), 0, 0)
352 #define NODE_DESTROYLOCK(l) isc_rwlock_destroy(l)
353 #define NODE_LOCK(l, t) RWLOCK((l), (t))
354 #define NODE_UNLOCK(l, t) RWUNLOCK((l), (t))
355 #define NODE_TRYUPGRADE(l) isc_rwlock_tryupgrade(l)
356
357 #define NODE_STRONGLOCK(l) ((void)0)
358 #define NODE_STRONGUNLOCK(l) ((void)0)
359 #define NODE_WEAKLOCK(l, t) NODE_LOCK(l, t)
360 #define NODE_WEAKUNLOCK(l, t) NODE_UNLOCK(l, t)
361 #define NODE_WEAKDOWNGRADE(l) isc_rwlock_downgrade(l)
362 #else
363 typedef isc_mutex_t nodelock_t;
364
365 #define NODE_INITLOCK(l) isc_mutex_init(l)
366 #define NODE_DESTROYLOCK(l) DESTROYLOCK(l)
367 #define NODE_LOCK(l, t) LOCK(l)
368 #define NODE_UNLOCK(l, t) UNLOCK(l)
369 #define NODE_TRYUPGRADE(l) ISC_R_SUCCESS
370
371 #define NODE_STRONGLOCK(l) LOCK(l)
372 #define NODE_STRONGUNLOCK(l) UNLOCK(l)
373 #define NODE_WEAKLOCK(l, t) ((void)0)
374 #define NODE_WEAKUNLOCK(l, t) ((void)0)
375 #define NODE_WEAKDOWNGRADE(l) ((void)0)
376 #endif
377
378 /*%
379 * Whether to rate-limit updating the LRU to avoid possible thread contention.
380 * Our performance measurement has shown the cost is marginal, so it's defined
381 * to be 0 by default either with or without threads.
382 */
383 #ifndef DNS_RBTDB_LIMITLRUUPDATE
384 #define DNS_RBTDB_LIMITLRUUPDATE 0
385 #endif
386
387 /*
388 * Allow clients with a virtual time of up to 5 minutes in the past to see
389 * records that would have otherwise have expired.
390 */
391 #define RBTDB_VIRTUAL 300
392
393 struct noqname {
394 dns_name_t name;
395 void * neg;
396 void * negsig;
397 dns_rdatatype_t type;
398 };
399
400 typedef struct acachectl acachectl_t;
401
402 typedef struct rdatasetheader {
403 /*%
404 * Locked by the owning node's lock.
405 */
406 rbtdb_serial_t serial;
407 dns_ttl_t rdh_ttl;
408 rbtdb_rdatatype_t type;
409 isc_uint16_t attributes;
410 dns_trust_t trust;
411 struct noqname *noqname;
412 struct noqname *closest;
413 unsigned int is_mmapped : 1;
414 unsigned int next_is_relative : 1;
415 unsigned int node_is_relative : 1;
416 /*%<
417 * We don't use the LIST macros, because the LIST structure has
418 * both head and tail pointers, and is doubly linked.
419 */
420
421 struct rdatasetheader *next;
422 /*%<
423 * If this is the top header for an rdataset, 'next' points
424 * to the top header for the next rdataset (i.e., the next type).
425 * Otherwise, it points up to the header whose down pointer points
426 * at this header.
427 */
428
429 struct rdatasetheader *down;
430 /*%<
431 * Points to the header for the next older version of
432 * this rdataset.
433 */
434
435 isc_uint32_t count;
436 /*%<
437 * Monotonously increased every time this rdataset is bound so that
438 * it is used as the base of the starting point in DNS responses
439 * when the "cyclic" rrset-order is required. Since the ordering
440 * should not be so crucial, no lock is set for the counter for
441 * performance reasons.
442 */
443
444 acachectl_t *additional_auth;
445 acachectl_t *additional_glue;
446
447 dns_rbtnode_t *node;
448 isc_stdtime_t last_used;
449 ISC_LINK(struct rdatasetheader) link;
450
451 unsigned int heap_index;
452 /*%<
453 * Used for TTL-based cache cleaning.
454 */
455 isc_stdtime_t resign;
456 } rdatasetheader_t;
457
458 typedef ISC_LIST(rdatasetheader_t) rdatasetheaderlist_t;
459 typedef ISC_LIST(dns_rbtnode_t) rbtnodelist_t;
460
461 #define RDATASET_ATTR_NONEXISTENT 0x0001
462 #define RDATASET_ATTR_STALE 0x0002
463 #define RDATASET_ATTR_IGNORE 0x0004
464 #define RDATASET_ATTR_RETAIN 0x0008
465 #define RDATASET_ATTR_NXDOMAIN 0x0010
466 #define RDATASET_ATTR_RESIGN 0x0020
467 #define RDATASET_ATTR_STATCOUNT 0x0040
468 #define RDATASET_ATTR_OPTOUT 0x0080
469 #define RDATASET_ATTR_NEGATIVE 0x0100
470 #define RDATASET_ATTR_PREFETCH 0x0200
471
472 typedef struct acache_cbarg {
473 dns_rdatasetadditional_t type;
474 unsigned int count;
475 dns_db_t *db;
476 dns_dbnode_t *node;
477 rdatasetheader_t *header;
478 } acache_cbarg_t;
479
480 struct acachectl {
481 dns_acacheentry_t *entry;
482 acache_cbarg_t *cbarg;
483 };
484
485 /*
486 * XXX
487 * When the cache will pre-expire data (due to memory low or other
488 * situations) before the rdataset's TTL has expired, it MUST
489 * respect the RETAIN bit and not expire the data until its TTL is
490 * expired.
491 */
492
493 #undef IGNORE /* WIN32 winbase.h defines this. */
494
495 #define EXISTS(header) \
496 (((header)->attributes & RDATASET_ATTR_NONEXISTENT) == 0)
497 #define NONEXISTENT(header) \
498 (((header)->attributes & RDATASET_ATTR_NONEXISTENT) != 0)
499 #define IGNORE(header) \
500 (((header)->attributes & RDATASET_ATTR_IGNORE) != 0)
501 #define RETAIN(header) \
502 (((header)->attributes & RDATASET_ATTR_RETAIN) != 0)
503 #define NXDOMAIN(header) \
504 (((header)->attributes & RDATASET_ATTR_NXDOMAIN) != 0)
505 #define RESIGN(header) \
506 (((header)->attributes & RDATASET_ATTR_RESIGN) != 0)
507 #define OPTOUT(header) \
508 (((header)->attributes & RDATASET_ATTR_OPTOUT) != 0)
509 #define NEGATIVE(header) \
510 (((header)->attributes & RDATASET_ATTR_NEGATIVE) != 0)
511 #define PREFETCH(header) \
512 (((header)->attributes & RDATASET_ATTR_PREFETCH) != 0)
513
514 #define DEFAULT_NODE_LOCK_COUNT 7 /*%< Should be prime. */
515
516 /*%
517 * Number of buckets for cache DB entries (locks, LRU lists, TTL heaps).
518 * There is a tradeoff issue about configuring this value: if this is too
519 * small, it may cause heavier contention between threads; if this is too large,
520 * LRU purge algorithm won't work well (entries tend to be purged prematurely).
521 * The default value should work well for most environments, but this can
522 * also be configurable at compilation time via the
523 * DNS_RBTDB_CACHE_NODE_LOCK_COUNT variable. This value must be larger than
524 * 1 due to the assumption of overmem_purge().
525 */
526 #ifdef DNS_RBTDB_CACHE_NODE_LOCK_COUNT
527 #if DNS_RBTDB_CACHE_NODE_LOCK_COUNT <= 1
528 #error "DNS_RBTDB_CACHE_NODE_LOCK_COUNT must be larger than 1"
529 #else
530 #define DEFAULT_CACHE_NODE_LOCK_COUNT DNS_RBTDB_CACHE_NODE_LOCK_COUNT
531 #endif
532 #else
533 #define DEFAULT_CACHE_NODE_LOCK_COUNT 16
534 #endif /* DNS_RBTDB_CACHE_NODE_LOCK_COUNT */
535
536 typedef struct {
537 nodelock_t lock;
538 /* Protected in the refcount routines. */
539 isc_refcount_t references;
540 /* Locked by lock. */
541 isc_boolean_t exiting;
542 } rbtdb_nodelock_t;
543
544 typedef struct rbtdb_changed {
545 dns_rbtnode_t * node;
546 isc_boolean_t dirty;
547 ISC_LINK(struct rbtdb_changed) link;
548 } rbtdb_changed_t;
549
550 typedef ISC_LIST(rbtdb_changed_t) rbtdb_changedlist_t;
551
552 typedef enum {
553 dns_db_insecure,
554 dns_db_partial,
555 dns_db_secure
556 } dns_db_secure_t;
557
558 typedef struct dns_rbtdb dns_rbtdb_t;
559
560 /* Reason for expiring a record from cache */
561 typedef enum {
562 expire_lru,
563 expire_ttl,
564 expire_flush
565 } expire_t;
566
567 typedef struct rbtdb_version {
568 /* Not locked */
569 rbtdb_serial_t serial;
570 dns_rbtdb_t * rbtdb;
571 /*
572 * Protected in the refcount routines.
573 * XXXJT: should we change the lock policy based on the refcount
574 * performance?
575 */
576 isc_refcount_t references;
577 /* Locked by database lock. */
578 isc_boolean_t writer;
579 isc_boolean_t commit_ok;
580 rbtdb_changedlist_t changed_list;
581 rdatasetheaderlist_t resigned_list;
582 ISC_LINK(struct rbtdb_version) link;
583 dns_db_secure_t secure;
584 isc_boolean_t havensec3;
585 /* NSEC3 parameters */
586 dns_hash_t hash;
587 isc_uint8_t flags;
588 isc_uint16_t iterations;
589 isc_uint8_t salt_length;
590 unsigned char salt[DNS_NSEC3_SALTSIZE];
591 } rbtdb_version_t;
592
593 typedef ISC_LIST(rbtdb_version_t) rbtdb_versionlist_t;
594
595 struct dns_rbtdb {
596 /* Unlocked. */
597 dns_db_t common;
598 /* Locks the data in this struct */
599 #if DNS_RBTDB_USERWLOCK
600 isc_rwlock_t lock;
601 #else
602 isc_mutex_t lock;
603 #endif
604 /* Locks the tree structure (prevents nodes appearing/disappearing) */
605 isc_rwlock_t tree_lock;
606 /* Locks for individual tree nodes */
607 unsigned int node_lock_count;
608 rbtdb_nodelock_t * node_locks;
609 dns_rbtnode_t * origin_node;
610 dns_stats_t * rrsetstats; /* cache DB only */
611 isc_stats_t * cachestats; /* cache DB only */
612 /* Locked by lock. */
613 unsigned int active;
614 isc_refcount_t references;
615 unsigned int attributes;
616 rbtdb_serial_t current_serial;
617 rbtdb_serial_t least_serial;
618 rbtdb_serial_t next_serial;
619 rbtdb_version_t * current_version;
620 rbtdb_version_t * future_version;
621 rbtdb_versionlist_t open_versions;
622 isc_task_t * task;
623 dns_dbnode_t *soanode;
624 dns_dbnode_t *nsnode;
625
626 /*
627 * This is a linked list used to implement the LRU cache. There will
628 * be node_lock_count linked lists here. Nodes in bucket 1 will be
629 * placed on the linked list rdatasets[1].
630 */
631 rdatasetheaderlist_t *rdatasets;
632
633 /*%
634 * Temporary storage for stale cache nodes and dynamically deleted
635 * nodes that await being cleaned up.
636 */
637 rbtnodelist_t *deadnodes;
638
639 /*
640 * Heaps. These are used for TTL based expiry in a cache,
641 * or for zone resigning in a zone DB. hmctx is the memory
642 * context to use for the heap (which differs from the main
643 * database memory context in the case of a cache).
644 */
645 isc_mem_t * hmctx;
646 isc_heap_t **heaps;
647
648 /*
649 * Base values for the mmap() code.
650 */
651 void * mmap_location;
652 size_t mmap_size;
653
654 /* Locked by tree_lock. */
655 dns_rbt_t * tree;
656 dns_rbt_t * nsec;
657 dns_rbt_t * nsec3;
658 dns_rpz_zones_t *rpzs;
659 dns_rpz_num_t rpz_num;
660 dns_rpz_zones_t *load_rpzs;
661
662 /* Unlocked */
663 unsigned int quantum;
664 };
665
666 #define RBTDB_ATTR_LOADED 0x01
667 #define RBTDB_ATTR_LOADING 0x02
668
669 /*%
670 * Search Context
671 */
672 typedef struct {
673 dns_rbtdb_t * rbtdb;
674 rbtdb_version_t * rbtversion;
675 rbtdb_serial_t serial;
676 unsigned int options;
677 dns_rbtnodechain_t chain;
678 isc_boolean_t copy_name;
679 isc_boolean_t need_cleanup;
680 isc_boolean_t wild;
681 dns_rbtnode_t * zonecut;
682 rdatasetheader_t * zonecut_rdataset;
683 rdatasetheader_t * zonecut_sigrdataset;
684 dns_fixedname_t zonecut_name;
685 isc_stdtime_t now;
686 } rbtdb_search_t;
687
688 /*%
689 * Load Context
690 */
691 typedef struct {
692 dns_rbtdb_t * rbtdb;
693 isc_stdtime_t now;
694 } rbtdb_load_t;
695
696 static void delete_callback(void *data, void *arg);
697 static void rdataset_disassociate(dns_rdataset_t *rdataset);
698 static isc_result_t rdataset_first(dns_rdataset_t *rdataset);
699 static isc_result_t rdataset_next(dns_rdataset_t *rdataset);
700 static void rdataset_current(dns_rdataset_t *rdataset, dns_rdata_t *rdata);
701 static void rdataset_clone(dns_rdataset_t *source, dns_rdataset_t *target);
702 static unsigned int rdataset_count(dns_rdataset_t *rdataset);
703 static isc_result_t rdataset_getnoqname(dns_rdataset_t *rdataset,
704 dns_name_t *name,
705 dns_rdataset_t *neg,
706 dns_rdataset_t *negsig);
707 static isc_result_t rdataset_getclosest(dns_rdataset_t *rdataset,
708 dns_name_t *name,
709 dns_rdataset_t *neg,
710 dns_rdataset_t *negsig);
711 static isc_result_t rdataset_getadditional(dns_rdataset_t *rdataset,
712 dns_rdatasetadditional_t type,
713 dns_rdatatype_t qtype,
714 dns_acache_t *acache,
715 dns_zone_t **zonep,
716 dns_db_t **dbp,
717 dns_dbversion_t **versionp,
718 dns_dbnode_t **nodep,
719 dns_name_t *fname,
720 dns_message_t *msg,
721 isc_stdtime_t now);
722 static isc_result_t rdataset_setadditional(dns_rdataset_t *rdataset,
723 dns_rdatasetadditional_t type,
724 dns_rdatatype_t qtype,
725 dns_acache_t *acache,
726 dns_zone_t *zone,
727 dns_db_t *db,
728 dns_dbversion_t *version,
729 dns_dbnode_t *node,
730 dns_name_t *fname);
731 static isc_result_t rdataset_putadditional(dns_acache_t *acache,
732 dns_rdataset_t *rdataset,
733 dns_rdatasetadditional_t type,
734 dns_rdatatype_t qtype);
735 static inline isc_boolean_t need_headerupdate(rdatasetheader_t *header,
736 isc_stdtime_t now);
737 static void update_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
738 isc_stdtime_t now);
739 static void expire_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
740 isc_boolean_t tree_locked, expire_t reason);
741 static void overmem_purge(dns_rbtdb_t *rbtdb, unsigned int locknum_start,
742 isc_stdtime_t now, isc_boolean_t tree_locked);
743 static isc_result_t resign_insert(dns_rbtdb_t *rbtdb, int idx,
744 rdatasetheader_t *newheader);
745 static void resign_delete(dns_rbtdb_t *rbtdb, rbtdb_version_t *version,
746 rdatasetheader_t *header);
747 static void prune_tree(isc_task_t *task, isc_event_t *event);
748 static void rdataset_settrust(dns_rdataset_t *rdataset, dns_trust_t trust);
749 static void rdataset_expire(dns_rdataset_t *rdataset);
750 static void rdataset_clearprefetch(dns_rdataset_t *rdataset);
751
752 static dns_rdatasetmethods_t rdataset_methods = {
753 rdataset_disassociate,
754 rdataset_first,
755 rdataset_next,
756 rdataset_current,
757 rdataset_clone,
758 rdataset_count,
759 NULL,
760 rdataset_getnoqname,
761 NULL,
762 rdataset_getclosest,
763 rdataset_getadditional,
764 rdataset_setadditional,
765 rdataset_putadditional,
766 rdataset_settrust,
767 rdataset_expire,
768 rdataset_clearprefetch
769 };
770
771 static void rdatasetiter_destroy(dns_rdatasetiter_t **iteratorp);
772 static isc_result_t rdatasetiter_first(dns_rdatasetiter_t *iterator);
773 static isc_result_t rdatasetiter_next(dns_rdatasetiter_t *iterator);
774 static void rdatasetiter_current(dns_rdatasetiter_t *iterator,
775 dns_rdataset_t *rdataset);
776
777 static dns_rdatasetitermethods_t rdatasetiter_methods = {
778 rdatasetiter_destroy,
779 rdatasetiter_first,
780 rdatasetiter_next,
781 rdatasetiter_current
782 };
783
784 typedef struct rbtdb_rdatasetiter {
785 dns_rdatasetiter_t common;
786 rdatasetheader_t * current;
787 } rbtdb_rdatasetiter_t;
788
789 static void dbiterator_destroy(dns_dbiterator_t **iteratorp);
790 static isc_result_t dbiterator_first(dns_dbiterator_t *iterator);
791 static isc_result_t dbiterator_last(dns_dbiterator_t *iterator);
792 static isc_result_t dbiterator_seek(dns_dbiterator_t *iterator,
793 dns_name_t *name);
794 static isc_result_t dbiterator_prev(dns_dbiterator_t *iterator);
795 static isc_result_t dbiterator_next(dns_dbiterator_t *iterator);
796 static isc_result_t dbiterator_current(dns_dbiterator_t *iterator,
797 dns_dbnode_t **nodep,
798 dns_name_t *name);
799 static isc_result_t dbiterator_pause(dns_dbiterator_t *iterator);
800 static isc_result_t dbiterator_origin(dns_dbiterator_t *iterator,
801 dns_name_t *name);
802
803 static dns_dbiteratormethods_t dbiterator_methods = {
804 dbiterator_destroy,
805 dbiterator_first,
806 dbiterator_last,
807 dbiterator_seek,
808 dbiterator_prev,
809 dbiterator_next,
810 dbiterator_current,
811 dbiterator_pause,
812 dbiterator_origin
813 };
814
815 #define DELETION_BATCH_MAX 64
816
817 /*
818 * If 'paused' is ISC_TRUE, then the tree lock is not being held.
819 */
820 typedef struct rbtdb_dbiterator {
821 dns_dbiterator_t common;
822 isc_boolean_t paused;
823 isc_boolean_t new_origin;
824 isc_rwlocktype_t tree_locked;
825 isc_result_t result;
826 dns_fixedname_t name;
827 dns_fixedname_t origin;
828 dns_rbtnodechain_t chain;
829 dns_rbtnodechain_t nsec3chain;
830 dns_rbtnodechain_t *current;
831 dns_rbtnode_t *node;
832 dns_rbtnode_t *deletions[DELETION_BATCH_MAX];
833 int delete;
834 isc_boolean_t nsec3only;
835 isc_boolean_t nonsec3;
836 } rbtdb_dbiterator_t;
837
838
839 #define IS_STUB(rbtdb) (((rbtdb)->common.attributes & DNS_DBATTR_STUB) != 0)
840 #define IS_CACHE(rbtdb) (((rbtdb)->common.attributes & DNS_DBATTR_CACHE) != 0)
841
842 static void free_rbtdb(dns_rbtdb_t *rbtdb, isc_boolean_t log,
843 isc_event_t *event);
844 static void overmem(dns_db_t *db, isc_boolean_t overmem);
845 static void setnsec3parameters(dns_db_t *db, rbtdb_version_t *version);
846
847 /* Pad to 32 bytes */
848 static char FILE_VERSION[32] = "\0";
849
850 /*%
851 * 'init_count' is used to initialize 'newheader->count' which inturn
852 * is used to determine where in the cycle rrset-order cyclic starts.
853 * We don't lock this as we don't care about simultaneous updates.
854 *
855 * Note:
856 * Both init_count and header->count can be ISC_UINT32_MAX.
857 * The count on the returned rdataset however can't be as
858 * that indicates that the database does not implement cyclic
859 * processing.
860 */
861 static unsigned int init_count;
862
863 /*
864 * Locking
865 *
866 * If a routine is going to lock more than one lock in this module, then
867 * the locking must be done in the following order:
868 *
869 * Tree Lock
870 *
871 * Node Lock (Only one from the set may be locked at one time by
872 * any caller)
873 *
874 * Database Lock
875 *
876 * Failure to follow this hierarchy can result in deadlock.
877 */
878
879 /*
880 * Deleting Nodes
881 *
882 * For zone databases the node for the origin of the zone MUST NOT be deleted.
883 */
884
885 /*
886 * Debugging routines
887 */
888 #ifdef DEBUG
889 static void
hexdump(const char * desc,unsigned char * data,size_t size)890 hexdump(const char *desc, unsigned char *data, size_t size) {
891 char hexdump[BUFSIZ * 2 + 1];
892 isc_buffer_t b;
893 isc_region_t r;
894 isc_result_t result;
895 size_t bytes;
896
897 fprintf(stderr, "%s: ", desc);
898 do {
899 isc_buffer_init(&b, hexdump, sizeof(hexdump));
900 r.base = data;
901 r.length = bytes = (size > BUFSIZ) ? BUFSIZ : size;
902 result = isc_hex_totext(&r, 0, "", &b);
903 RUNTIME_CHECK(result == ISC_R_SUCCESS);
904 isc_buffer_putuint8(&b, 0);
905 fprintf(stderr, "%s", hexdump);
906 data += bytes;
907 size -= bytes;
908 } while (size > 0);
909 fprintf(stderr, "\n");
910 }
911 #endif
912
913
914 /*
915 * DB Routines
916 */
917
918 static void
attach(dns_db_t * source,dns_db_t ** targetp)919 attach(dns_db_t *source, dns_db_t **targetp) {
920 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)source;
921
922 REQUIRE(VALID_RBTDB(rbtdb));
923
924 isc_refcount_increment(&rbtdb->references, NULL);
925
926 *targetp = source;
927 }
928
929 static void
free_rbtdb_callback(isc_task_t * task,isc_event_t * event)930 free_rbtdb_callback(isc_task_t *task, isc_event_t *event) {
931 dns_rbtdb_t *rbtdb = event->ev_arg;
932
933 UNUSED(task);
934
935 free_rbtdb(rbtdb, ISC_TRUE, event);
936 }
937
938 static void
update_cachestats(dns_rbtdb_t * rbtdb,isc_result_t result)939 update_cachestats(dns_rbtdb_t *rbtdb, isc_result_t result) {
940 INSIST(IS_CACHE(rbtdb));
941
942 if (rbtdb->cachestats == NULL)
943 return;
944
945 switch (result) {
946 case ISC_R_SUCCESS:
947 case DNS_R_CNAME:
948 case DNS_R_DNAME:
949 case DNS_R_DELEGATION:
950 case DNS_R_NCACHENXDOMAIN:
951 case DNS_R_NCACHENXRRSET:
952 isc_stats_increment(rbtdb->cachestats,
953 dns_cachestatscounter_hits);
954 break;
955 default:
956 isc_stats_increment(rbtdb->cachestats,
957 dns_cachestatscounter_misses);
958 }
959 }
960
961 static void
update_rrsetstats(dns_rbtdb_t * rbtdb,rdatasetheader_t * header,isc_boolean_t increment)962 update_rrsetstats(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
963 isc_boolean_t increment)
964 {
965 dns_rdatastatstype_t statattributes = 0;
966 dns_rdatastatstype_t base = 0;
967 dns_rdatastatstype_t type;
968
969 /* At the moment we count statistics only for cache DB */
970 INSIST(IS_CACHE(rbtdb));
971
972 if (NEGATIVE(header)) {
973 if (NXDOMAIN(header))
974 statattributes = DNS_RDATASTATSTYPE_ATTR_NXDOMAIN;
975 else {
976 statattributes = DNS_RDATASTATSTYPE_ATTR_NXRRSET;
977 base = RBTDB_RDATATYPE_EXT(header->type);
978 }
979 } else
980 base = RBTDB_RDATATYPE_BASE(header->type);
981
982 type = DNS_RDATASTATSTYPE_VALUE(base, statattributes);
983 if (increment)
984 dns_rdatasetstats_increment(rbtdb->rrsetstats, type);
985 else
986 dns_rdatasetstats_decrement(rbtdb->rrsetstats, type);
987 }
988
989 static void
set_ttl(dns_rbtdb_t * rbtdb,rdatasetheader_t * header,dns_ttl_t newttl)990 set_ttl(dns_rbtdb_t *rbtdb, rdatasetheader_t *header, dns_ttl_t newttl) {
991 int idx;
992 isc_heap_t *heap;
993 dns_ttl_t oldttl;
994
995 oldttl = header->rdh_ttl;
996 header->rdh_ttl = newttl;
997
998 if (!IS_CACHE(rbtdb))
999 return;
1000
1001 /*
1002 * It's possible the rbtdb is not a cache. If this is the case,
1003 * we will not have a heap, and we move on. If we do, though,
1004 * we might need to adjust things.
1005 */
1006 if (header->heap_index == 0 || newttl == oldttl)
1007 return;
1008 idx = header->node->locknum;
1009 if (rbtdb->heaps == NULL || rbtdb->heaps[idx] == NULL)
1010 return;
1011 heap = rbtdb->heaps[idx];
1012
1013 if (newttl < oldttl)
1014 isc_heap_increased(heap, header->heap_index);
1015 else
1016 isc_heap_decreased(heap, header->heap_index);
1017 }
1018
1019 /*%
1020 * These functions allow the heap code to rank the priority of each
1021 * element. It returns ISC_TRUE if v1 happens "sooner" than v2.
1022 */
1023 static isc_boolean_t
ttl_sooner(void * v1,void * v2)1024 ttl_sooner(void *v1, void *v2) {
1025 rdatasetheader_t *h1 = v1;
1026 rdatasetheader_t *h2 = v2;
1027
1028 if (h1->rdh_ttl < h2->rdh_ttl)
1029 return (ISC_TRUE);
1030 return (ISC_FALSE);
1031 }
1032
1033 static isc_boolean_t
resign_sooner(void * v1,void * v2)1034 resign_sooner(void *v1, void *v2) {
1035 rdatasetheader_t *h1 = v1;
1036 rdatasetheader_t *h2 = v2;
1037
1038 if (isc_serial_lt(h1->resign, h2->resign))
1039 return (ISC_TRUE);
1040 return (ISC_FALSE);
1041 }
1042
1043 /*%
1044 * This function sets the heap index into the header.
1045 */
1046 static void
set_index(void * what,unsigned int index)1047 set_index(void *what, unsigned int index) {
1048 rdatasetheader_t *h = what;
1049
1050 h->heap_index = index;
1051 }
1052
1053 /*%
1054 * Work out how many nodes can be deleted in the time between two
1055 * requests to the nameserver. Smooth the resulting number and use it
1056 * as a estimate for the number of nodes to be deleted in the next
1057 * iteration.
1058 */
1059 static unsigned int
adjust_quantum(unsigned int old,isc_time_t * start)1060 adjust_quantum(unsigned int old, isc_time_t *start) {
1061 unsigned int pps = dns_pps; /* packets per second */
1062 unsigned int interval;
1063 isc_uint64_t usecs;
1064 isc_time_t end;
1065 unsigned int new;
1066
1067 if (pps < 100)
1068 pps = 100;
1069 isc_time_now(&end);
1070
1071 interval = 1000000 / pps; /* interval in usec */
1072 if (interval == 0)
1073 interval = 1;
1074 usecs = isc_time_microdiff(&end, start);
1075 if (usecs == 0) {
1076 /*
1077 * We were unable to measure the amount of time taken.
1078 * Double the nodes deleted next time.
1079 */
1080 old *= 2;
1081 if (old > 1000)
1082 old = 1000;
1083 return (old);
1084 }
1085 new = old * interval;
1086 new /= (unsigned int)usecs;
1087 if (new == 0)
1088 new = 1;
1089 else if (new > 1000)
1090 new = 1000;
1091
1092 /* Smooth */
1093 new = (new + old * 3) / 4;
1094
1095 if (new != old)
1096 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
1097 DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
1098 "adjust_quantum: old=%d, new=%d", old, new);
1099
1100 return (new);
1101 }
1102
1103 static void
free_rbtdb(dns_rbtdb_t * rbtdb,isc_boolean_t log,isc_event_t * event)1104 free_rbtdb(dns_rbtdb_t *rbtdb, isc_boolean_t log, isc_event_t *event) {
1105 unsigned int i;
1106 isc_ondestroy_t ondest;
1107 isc_result_t result;
1108 char buf[DNS_NAME_FORMATSIZE];
1109 dns_rbt_t **treep;
1110 isc_time_t start;
1111
1112 if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
1113 overmem((dns_db_t *)rbtdb, (isc_boolean_t)-1);
1114
1115 REQUIRE(rbtdb->current_version != NULL || EMPTY(rbtdb->open_versions));
1116 REQUIRE(rbtdb->future_version == NULL);
1117
1118 if (rbtdb->current_version != NULL) {
1119 unsigned int refs;
1120
1121 isc_refcount_decrement(&rbtdb->current_version->references,
1122 &refs);
1123 INSIST(refs == 0);
1124 UNLINK(rbtdb->open_versions, rbtdb->current_version, link);
1125 isc_refcount_destroy(&rbtdb->current_version->references);
1126 isc_mem_put(rbtdb->common.mctx, rbtdb->current_version,
1127 sizeof(rbtdb_version_t));
1128 }
1129
1130 /*
1131 * We assume the number of remaining dead nodes is reasonably small;
1132 * the overhead of unlinking all nodes here should be negligible.
1133 */
1134 for (i = 0; i < rbtdb->node_lock_count; i++) {
1135 dns_rbtnode_t *node;
1136
1137 node = ISC_LIST_HEAD(rbtdb->deadnodes[i]);
1138 while (node != NULL) {
1139 ISC_LIST_UNLINK(rbtdb->deadnodes[i], node, deadlink);
1140 node = ISC_LIST_HEAD(rbtdb->deadnodes[i]);
1141 }
1142 }
1143
1144 if (event == NULL)
1145 rbtdb->quantum = (rbtdb->task != NULL) ? 100 : 0;
1146
1147 for (;;) {
1148 /*
1149 * pick the next tree to (start to) destroy
1150 */
1151 treep = &rbtdb->tree;
1152 if (*treep == NULL) {
1153 treep = &rbtdb->nsec;
1154 if (*treep == NULL) {
1155 treep = &rbtdb->nsec3;
1156 /*
1157 * we're finished after clear cutting
1158 */
1159 if (*treep == NULL)
1160 break;
1161 }
1162 }
1163
1164 isc_time_now(&start);
1165 result = dns_rbt_destroy2(treep, rbtdb->quantum);
1166 if (result == ISC_R_QUOTA) {
1167 INSIST(rbtdb->task != NULL);
1168 if (rbtdb->quantum != 0)
1169 rbtdb->quantum = adjust_quantum(rbtdb->quantum,
1170 &start);
1171 if (event == NULL)
1172 event = isc_event_allocate(rbtdb->common.mctx,
1173 NULL,
1174 DNS_EVENT_FREESTORAGE,
1175 free_rbtdb_callback,
1176 rbtdb,
1177 sizeof(isc_event_t));
1178 if (event == NULL)
1179 continue;
1180 isc_task_send(rbtdb->task, &event);
1181 return;
1182 }
1183 INSIST(result == ISC_R_SUCCESS && *treep == NULL);
1184 }
1185
1186 if (event != NULL)
1187 isc_event_free(&event);
1188 if (log) {
1189 if (dns_name_dynamic(&rbtdb->common.origin))
1190 dns_name_format(&rbtdb->common.origin, buf,
1191 sizeof(buf));
1192 else
1193 strcpy(buf, "<UNKNOWN>");
1194 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
1195 DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
1196 "done free_rbtdb(%s)", buf);
1197 }
1198 if (dns_name_dynamic(&rbtdb->common.origin))
1199 dns_name_free(&rbtdb->common.origin, rbtdb->common.mctx);
1200 for (i = 0; i < rbtdb->node_lock_count; i++) {
1201 isc_refcount_destroy(&rbtdb->node_locks[i].references);
1202 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
1203 }
1204
1205 /*
1206 * Clean up LRU / re-signing order lists.
1207 */
1208 if (rbtdb->rdatasets != NULL) {
1209 for (i = 0; i < rbtdb->node_lock_count; i++)
1210 INSIST(ISC_LIST_EMPTY(rbtdb->rdatasets[i]));
1211 isc_mem_put(rbtdb->common.mctx, rbtdb->rdatasets,
1212 rbtdb->node_lock_count *
1213 sizeof(rdatasetheaderlist_t));
1214 }
1215 /*
1216 * Clean up dead node buckets.
1217 */
1218 if (rbtdb->deadnodes != NULL) {
1219 for (i = 0; i < rbtdb->node_lock_count; i++)
1220 INSIST(ISC_LIST_EMPTY(rbtdb->deadnodes[i]));
1221 isc_mem_put(rbtdb->common.mctx, rbtdb->deadnodes,
1222 rbtdb->node_lock_count * sizeof(rbtnodelist_t));
1223 }
1224 /*
1225 * Clean up heap objects.
1226 */
1227 if (rbtdb->heaps != NULL) {
1228 for (i = 0; i < rbtdb->node_lock_count; i++)
1229 isc_heap_destroy(&rbtdb->heaps[i]);
1230 isc_mem_put(rbtdb->hmctx, rbtdb->heaps,
1231 rbtdb->node_lock_count * sizeof(isc_heap_t *));
1232 }
1233
1234 if (rbtdb->rrsetstats != NULL)
1235 dns_stats_detach(&rbtdb->rrsetstats);
1236 if (rbtdb->cachestats != NULL)
1237 isc_stats_detach(&rbtdb->cachestats);
1238
1239 if (rbtdb->load_rpzs != NULL) {
1240 /*
1241 * We must be cleaning up after a failed zone loading.
1242 */
1243 REQUIRE(rbtdb->rpzs != NULL &&
1244 rbtdb->rpz_num < rbtdb->rpzs->p.num_zones);
1245 dns_rpz_detach_rpzs(&rbtdb->load_rpzs);
1246 }
1247 if (rbtdb->rpzs != NULL) {
1248 REQUIRE(rbtdb->rpz_num < rbtdb->rpzs->p.num_zones);
1249 dns_rpz_detach_rpzs(&rbtdb->rpzs);
1250 }
1251
1252 isc_mem_put(rbtdb->common.mctx, rbtdb->node_locks,
1253 rbtdb->node_lock_count * sizeof(rbtdb_nodelock_t));
1254 isc_rwlock_destroy(&rbtdb->tree_lock);
1255 isc_refcount_destroy(&rbtdb->references);
1256 if (rbtdb->task != NULL)
1257 isc_task_detach(&rbtdb->task);
1258
1259 RBTDB_DESTROYLOCK(&rbtdb->lock);
1260 rbtdb->common.magic = 0;
1261 rbtdb->common.impmagic = 0;
1262 ondest = rbtdb->common.ondest;
1263 isc_mem_detach(&rbtdb->hmctx);
1264
1265 if (rbtdb->mmap_location != NULL)
1266 isc_file_munmap(rbtdb->mmap_location,
1267 (size_t) rbtdb->mmap_size);
1268
1269 isc_mem_putanddetach(&rbtdb->common.mctx, rbtdb, sizeof(*rbtdb));
1270 isc_ondestroy_notify(&ondest, rbtdb);
1271 }
1272
1273 static inline void
maybe_free_rbtdb(dns_rbtdb_t * rbtdb)1274 maybe_free_rbtdb(dns_rbtdb_t *rbtdb) {
1275 isc_boolean_t want_free = ISC_FALSE;
1276 unsigned int i;
1277 unsigned int inactive = 0;
1278
1279 /* XXX check for open versions here */
1280
1281 if (rbtdb->soanode != NULL)
1282 dns_db_detachnode((dns_db_t *)rbtdb, &rbtdb->soanode);
1283 if (rbtdb->nsnode != NULL)
1284 dns_db_detachnode((dns_db_t *)rbtdb, &rbtdb->nsnode);
1285
1286 /*
1287 * Even though there are no external direct references, there still
1288 * may be nodes in use.
1289 */
1290 for (i = 0; i < rbtdb->node_lock_count; i++) {
1291 NODE_LOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_write);
1292 rbtdb->node_locks[i].exiting = ISC_TRUE;
1293 NODE_UNLOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_write);
1294 if (isc_refcount_current(&rbtdb->node_locks[i].references)
1295 == 0) {
1296 inactive++;
1297 }
1298 }
1299
1300 if (inactive != 0) {
1301 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
1302 rbtdb->active -= inactive;
1303 if (rbtdb->active == 0)
1304 want_free = ISC_TRUE;
1305 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
1306 if (want_free) {
1307 char buf[DNS_NAME_FORMATSIZE];
1308 if (dns_name_dynamic(&rbtdb->common.origin))
1309 dns_name_format(&rbtdb->common.origin, buf,
1310 sizeof(buf));
1311 else
1312 strcpy(buf, "<UNKNOWN>");
1313 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
1314 DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
1315 "calling free_rbtdb(%s)", buf);
1316 free_rbtdb(rbtdb, ISC_TRUE, NULL);
1317 }
1318 }
1319 }
1320
1321 static void
detach(dns_db_t ** dbp)1322 detach(dns_db_t **dbp) {
1323 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(*dbp);
1324 unsigned int refs;
1325
1326 REQUIRE(VALID_RBTDB(rbtdb));
1327
1328 isc_refcount_decrement(&rbtdb->references, &refs);
1329
1330 if (refs == 0)
1331 maybe_free_rbtdb(rbtdb);
1332
1333 *dbp = NULL;
1334 }
1335
1336 static void
currentversion(dns_db_t * db,dns_dbversion_t ** versionp)1337 currentversion(dns_db_t *db, dns_dbversion_t **versionp) {
1338 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1339 rbtdb_version_t *version;
1340 unsigned int refs;
1341
1342 REQUIRE(VALID_RBTDB(rbtdb));
1343
1344 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
1345 version = rbtdb->current_version;
1346 isc_refcount_increment(&version->references, &refs);
1347 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
1348
1349 *versionp = (dns_dbversion_t *)version;
1350 }
1351
1352 static inline rbtdb_version_t *
allocate_version(isc_mem_t * mctx,rbtdb_serial_t serial,unsigned int references,isc_boolean_t writer)1353 allocate_version(isc_mem_t *mctx, rbtdb_serial_t serial,
1354 unsigned int references, isc_boolean_t writer)
1355 {
1356 isc_result_t result;
1357 rbtdb_version_t *version;
1358
1359 version = isc_mem_get(mctx, sizeof(*version));
1360 if (version == NULL)
1361 return (NULL);
1362 version->serial = serial;
1363 result = isc_refcount_init(&version->references, references);
1364 if (result != ISC_R_SUCCESS) {
1365 isc_mem_put(mctx, version, sizeof(*version));
1366 return (NULL);
1367 }
1368 version->writer = writer;
1369 version->commit_ok = ISC_FALSE;
1370 ISC_LIST_INIT(version->changed_list);
1371 ISC_LIST_INIT(version->resigned_list);
1372 ISC_LINK_INIT(version, link);
1373
1374 return (version);
1375 }
1376
1377 static isc_result_t
newversion(dns_db_t * db,dns_dbversion_t ** versionp)1378 newversion(dns_db_t *db, dns_dbversion_t **versionp) {
1379 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1380 rbtdb_version_t *version;
1381
1382 REQUIRE(VALID_RBTDB(rbtdb));
1383 REQUIRE(versionp != NULL && *versionp == NULL);
1384 REQUIRE(rbtdb->future_version == NULL);
1385
1386 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
1387 RUNTIME_CHECK(rbtdb->next_serial != 0); /* XXX Error? */
1388 version = allocate_version(rbtdb->common.mctx, rbtdb->next_serial, 1,
1389 ISC_TRUE);
1390 if (version != NULL) {
1391 version->rbtdb = rbtdb;
1392 version->commit_ok = ISC_TRUE;
1393 version->secure = rbtdb->current_version->secure;
1394 version->havensec3 = rbtdb->current_version->havensec3;
1395 if (version->havensec3) {
1396 version->flags = rbtdb->current_version->flags;
1397 version->iterations =
1398 rbtdb->current_version->iterations;
1399 version->hash = rbtdb->current_version->hash;
1400 version->salt_length =
1401 rbtdb->current_version->salt_length;
1402 memmove(version->salt, rbtdb->current_version->salt,
1403 version->salt_length);
1404 } else {
1405 version->flags = 0;
1406 version->iterations = 0;
1407 version->hash = 0;
1408 version->salt_length = 0;
1409 memset(version->salt, 0, sizeof(version->salt));
1410 }
1411 rbtdb->next_serial++;
1412 rbtdb->future_version = version;
1413 }
1414 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
1415
1416 if (version == NULL)
1417 return (ISC_R_NOMEMORY);
1418
1419 *versionp = version;
1420
1421 return (ISC_R_SUCCESS);
1422 }
1423
1424 static void
attachversion(dns_db_t * db,dns_dbversion_t * source,dns_dbversion_t ** targetp)1425 attachversion(dns_db_t *db, dns_dbversion_t *source,
1426 dns_dbversion_t **targetp)
1427 {
1428 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1429 rbtdb_version_t *rbtversion = source;
1430 unsigned int refs;
1431
1432 REQUIRE(VALID_RBTDB(rbtdb));
1433 INSIST(rbtversion != NULL && rbtversion->rbtdb == rbtdb);
1434
1435 isc_refcount_increment(&rbtversion->references, &refs);
1436 INSIST(refs > 1);
1437
1438 *targetp = rbtversion;
1439 }
1440
1441 static rbtdb_changed_t *
add_changed(dns_rbtdb_t * rbtdb,rbtdb_version_t * version,dns_rbtnode_t * node)1442 add_changed(dns_rbtdb_t *rbtdb, rbtdb_version_t *version,
1443 dns_rbtnode_t *node)
1444 {
1445 rbtdb_changed_t *changed;
1446 unsigned int refs;
1447
1448 /*
1449 * Caller must be holding the node lock if its reference must be
1450 * protected by the lock.
1451 */
1452
1453 changed = isc_mem_get(rbtdb->common.mctx, sizeof(*changed));
1454
1455 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
1456
1457 REQUIRE(version->writer);
1458
1459 if (changed != NULL) {
1460 dns_rbtnode_refincrement(node, &refs);
1461 INSIST(refs != 0);
1462 changed->node = node;
1463 changed->dirty = ISC_FALSE;
1464 ISC_LIST_INITANDAPPEND(version->changed_list, changed, link);
1465 } else
1466 version->commit_ok = ISC_FALSE;
1467
1468 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
1469
1470 return (changed);
1471 }
1472
1473 static void
free_acachearray(isc_mem_t * mctx,rdatasetheader_t * header,acachectl_t * array)1474 free_acachearray(isc_mem_t *mctx, rdatasetheader_t *header,
1475 acachectl_t *array)
1476 {
1477 unsigned int count;
1478 unsigned int i;
1479 unsigned char *raw; /* RDATASLAB */
1480
1481 /*
1482 * The caller must be holding the corresponding node lock.
1483 */
1484
1485 if (array == NULL)
1486 return;
1487
1488 raw = (unsigned char *)header + sizeof(*header);
1489 count = raw[0] * 256 + raw[1];
1490
1491 /*
1492 * Sanity check: since an additional cache entry has a reference to
1493 * the original DB node (in the callback arg), there should be no
1494 * acache entries when the node can be freed.
1495 */
1496 for (i = 0; i < count; i++)
1497 INSIST(array[i].entry == NULL && array[i].cbarg == NULL);
1498
1499 isc_mem_put(mctx, array, count * sizeof(acachectl_t));
1500 }
1501
1502 static inline void
free_noqname(isc_mem_t * mctx,struct noqname ** noqname)1503 free_noqname(isc_mem_t *mctx, struct noqname **noqname) {
1504
1505 if (dns_name_dynamic(&(*noqname)->name))
1506 dns_name_free(&(*noqname)->name, mctx);
1507 if ((*noqname)->neg != NULL)
1508 isc_mem_put(mctx, (*noqname)->neg,
1509 dns_rdataslab_size((*noqname)->neg, 0));
1510 if ((*noqname)->negsig != NULL)
1511 isc_mem_put(mctx, (*noqname)->negsig,
1512 dns_rdataslab_size((*noqname)->negsig, 0));
1513 isc_mem_put(mctx, *noqname, sizeof(**noqname));
1514 *noqname = NULL;
1515 }
1516
1517 static inline void
init_rdataset(dns_rbtdb_t * rbtdb,rdatasetheader_t * h)1518 init_rdataset(dns_rbtdb_t *rbtdb, rdatasetheader_t *h) {
1519 ISC_LINK_INIT(h, link);
1520 h->heap_index = 0;
1521 h->is_mmapped = 0;
1522 h->next_is_relative = 0;
1523 h->node_is_relative = 0;
1524
1525 #if TRACE_HEADER
1526 if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
1527 fprintf(stderr, "initialized header: %p\n", h);
1528 #else
1529 UNUSED(rbtdb);
1530 #endif
1531 }
1532
1533 /*
1534 * Update the copied values of 'next' and 'node' if they are relative.
1535 */
1536 static void
update_newheader(rdatasetheader_t * new,rdatasetheader_t * old)1537 update_newheader(rdatasetheader_t *new, rdatasetheader_t *old) {
1538 char *p;
1539
1540 if (old->next_is_relative) {
1541 p = (char *) old;
1542 p += (uintptr_t)old->next;
1543 new->next = (rdatasetheader_t *)p;
1544 }
1545 if (old->node_is_relative) {
1546 p = (char *) old;
1547 p += (uintptr_t)old->node;
1548 new->node = (dns_rbtnode_t *)p;
1549 }
1550 }
1551
1552 static inline rdatasetheader_t *
new_rdataset(dns_rbtdb_t * rbtdb,isc_mem_t * mctx)1553 new_rdataset(dns_rbtdb_t *rbtdb, isc_mem_t *mctx) {
1554 rdatasetheader_t *h;
1555
1556 h = isc_mem_get(mctx, sizeof(*h));
1557 if (h == NULL)
1558 return (NULL);
1559
1560 #if TRACE_HEADER
1561 if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
1562 fprintf(stderr, "allocated header: %p\n", h);
1563 #endif
1564 init_rdataset(rbtdb, h);
1565 return (h);
1566 }
1567
1568 static inline void
free_rdataset(dns_rbtdb_t * rbtdb,isc_mem_t * mctx,rdatasetheader_t * rdataset)1569 free_rdataset(dns_rbtdb_t *rbtdb, isc_mem_t *mctx, rdatasetheader_t *rdataset) {
1570 unsigned int size;
1571 int idx;
1572
1573 if (EXISTS(rdataset) &&
1574 (rdataset->attributes & RDATASET_ATTR_STATCOUNT) != 0) {
1575 update_rrsetstats(rbtdb, rdataset, ISC_FALSE);
1576 }
1577
1578 idx = rdataset->node->locknum;
1579 if (ISC_LINK_LINKED(rdataset, link)) {
1580 INSIST(IS_CACHE(rbtdb));
1581 ISC_LIST_UNLINK(rbtdb->rdatasets[idx], rdataset, link);
1582 }
1583
1584 if (rdataset->heap_index != 0)
1585 isc_heap_delete(rbtdb->heaps[idx], rdataset->heap_index);
1586 rdataset->heap_index = 0;
1587
1588 if (rdataset->noqname != NULL)
1589 free_noqname(mctx, &rdataset->noqname);
1590 if (rdataset->closest != NULL)
1591 free_noqname(mctx, &rdataset->closest);
1592
1593 free_acachearray(mctx, rdataset, rdataset->additional_auth);
1594 free_acachearray(mctx, rdataset, rdataset->additional_glue);
1595
1596 if ((rdataset->attributes & RDATASET_ATTR_NONEXISTENT) != 0)
1597 size = sizeof(*rdataset);
1598 else
1599 size = dns_rdataslab_size((unsigned char *)rdataset,
1600 sizeof(*rdataset));
1601
1602 if (rdataset->is_mmapped == 1)
1603 return;
1604
1605 isc_mem_put(mctx, rdataset, size);
1606 }
1607
1608 static inline void
rollback_node(dns_rbtnode_t * node,rbtdb_serial_t serial)1609 rollback_node(dns_rbtnode_t *node, rbtdb_serial_t serial) {
1610 rdatasetheader_t *header, *dcurrent;
1611 isc_boolean_t make_dirty = ISC_FALSE;
1612
1613 /*
1614 * Caller must hold the node lock.
1615 */
1616
1617 /*
1618 * We set the IGNORE attribute on rdatasets with serial number
1619 * 'serial'. When the reference count goes to zero, these rdatasets
1620 * will be cleaned up; until that time, they will be ignored.
1621 */
1622 for (header = node->data; header != NULL; header = header->next) {
1623 if (header->serial == serial) {
1624 header->attributes |= RDATASET_ATTR_IGNORE;
1625 make_dirty = ISC_TRUE;
1626 }
1627 for (dcurrent = header->down;
1628 dcurrent != NULL;
1629 dcurrent = dcurrent->down) {
1630 if (dcurrent->serial == serial) {
1631 dcurrent->attributes |= RDATASET_ATTR_IGNORE;
1632 make_dirty = ISC_TRUE;
1633 }
1634 }
1635 }
1636 if (make_dirty)
1637 node->dirty = 1;
1638 }
1639
1640 static inline void
mark_stale_header(dns_rbtdb_t * rbtdb,rdatasetheader_t * header)1641 mark_stale_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header) {
1642
1643 /*
1644 * If we are already stale there is nothing to do.
1645 */
1646 if ((header->attributes & RDATASET_ATTR_STALE) != 0)
1647 return;
1648
1649 header->attributes |= RDATASET_ATTR_STALE;
1650 header->node->dirty = 1;
1651
1652 /*
1653 * If we have not been counted then there is nothing to do.
1654 */
1655 if ((header->attributes & RDATASET_ATTR_STATCOUNT) == 0)
1656 return;
1657
1658 if (EXISTS(header))
1659 update_rrsetstats(rbtdb, header, ISC_TRUE);
1660 }
1661
1662 static inline void
clean_stale_headers(dns_rbtdb_t * rbtdb,isc_mem_t * mctx,rdatasetheader_t * top)1663 clean_stale_headers(dns_rbtdb_t *rbtdb, isc_mem_t *mctx, rdatasetheader_t *top)
1664 {
1665 rdatasetheader_t *d, *down_next;
1666
1667 for (d = top->down; d != NULL; d = down_next) {
1668 down_next = d->down;
1669 free_rdataset(rbtdb, mctx, d);
1670 }
1671 top->down = NULL;
1672 }
1673
1674 static inline void
clean_cache_node(dns_rbtdb_t * rbtdb,dns_rbtnode_t * node)1675 clean_cache_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node) {
1676 rdatasetheader_t *current, *top_prev, *top_next;
1677 isc_mem_t *mctx = rbtdb->common.mctx;
1678
1679 /*
1680 * Caller must be holding the node lock.
1681 */
1682
1683 top_prev = NULL;
1684 for (current = node->data; current != NULL; current = top_next) {
1685 top_next = current->next;
1686 clean_stale_headers(rbtdb, mctx, current);
1687 /*
1688 * If current is nonexistent or stale, we can clean it up.
1689 */
1690 if ((current->attributes &
1691 (RDATASET_ATTR_NONEXISTENT|RDATASET_ATTR_STALE)) != 0) {
1692 if (top_prev != NULL)
1693 top_prev->next = current->next;
1694 else
1695 node->data = current->next;
1696 free_rdataset(rbtdb, mctx, current);
1697 } else
1698 top_prev = current;
1699 }
1700 node->dirty = 0;
1701 }
1702
1703 static inline void
clean_zone_node(dns_rbtdb_t * rbtdb,dns_rbtnode_t * node,rbtdb_serial_t least_serial)1704 clean_zone_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1705 rbtdb_serial_t least_serial)
1706 {
1707 rdatasetheader_t *current, *dcurrent, *down_next, *dparent;
1708 rdatasetheader_t *top_prev, *top_next;
1709 isc_mem_t *mctx = rbtdb->common.mctx;
1710 isc_boolean_t still_dirty = ISC_FALSE;
1711
1712 /*
1713 * Caller must be holding the node lock.
1714 */
1715 REQUIRE(least_serial != 0);
1716
1717 top_prev = NULL;
1718 for (current = node->data; current != NULL; current = top_next) {
1719 top_next = current->next;
1720
1721 /*
1722 * First, we clean up any instances of multiple rdatasets
1723 * with the same serial number, or that have the IGNORE
1724 * attribute.
1725 */
1726 dparent = current;
1727 for (dcurrent = current->down;
1728 dcurrent != NULL;
1729 dcurrent = down_next) {
1730 down_next = dcurrent->down;
1731 INSIST(dcurrent->serial <= dparent->serial);
1732 if (dcurrent->serial == dparent->serial ||
1733 IGNORE(dcurrent)) {
1734 if (down_next != NULL)
1735 down_next->next = dparent;
1736 dparent->down = down_next;
1737 free_rdataset(rbtdb, mctx, dcurrent);
1738 } else
1739 dparent = dcurrent;
1740 }
1741
1742 /*
1743 * We've now eliminated all IGNORE datasets with the possible
1744 * exception of current, which we now check.
1745 */
1746 if (IGNORE(current)) {
1747 down_next = current->down;
1748 if (down_next == NULL) {
1749 if (top_prev != NULL)
1750 top_prev->next = current->next;
1751 else
1752 node->data = current->next;
1753 free_rdataset(rbtdb, mctx, current);
1754 /*
1755 * current no longer exists, so we can
1756 * just continue with the loop.
1757 */
1758 continue;
1759 } else {
1760 /*
1761 * Pull up current->down, making it the new
1762 * current.
1763 */
1764 if (top_prev != NULL)
1765 top_prev->next = down_next;
1766 else
1767 node->data = down_next;
1768 down_next->next = top_next;
1769 free_rdataset(rbtdb, mctx, current);
1770 current = down_next;
1771 }
1772 }
1773
1774 /*
1775 * We now try to find the first down node less than the
1776 * least serial.
1777 */
1778 dparent = current;
1779 for (dcurrent = current->down;
1780 dcurrent != NULL;
1781 dcurrent = down_next) {
1782 down_next = dcurrent->down;
1783 if (dcurrent->serial < least_serial)
1784 break;
1785 dparent = dcurrent;
1786 }
1787
1788 /*
1789 * If there is a such an rdataset, delete it and any older
1790 * versions.
1791 */
1792 if (dcurrent != NULL) {
1793 do {
1794 down_next = dcurrent->down;
1795 INSIST(dcurrent->serial <= least_serial);
1796 free_rdataset(rbtdb, mctx, dcurrent);
1797 dcurrent = down_next;
1798 } while (dcurrent != NULL);
1799 dparent->down = NULL;
1800 }
1801
1802 /*
1803 * Note. The serial number of 'current' might be less than
1804 * least_serial too, but we cannot delete it because it is
1805 * the most recent version, unless it is a NONEXISTENT
1806 * rdataset.
1807 */
1808 if (current->down != NULL) {
1809 still_dirty = ISC_TRUE;
1810 top_prev = current;
1811 } else {
1812 /*
1813 * If this is a NONEXISTENT rdataset, we can delete it.
1814 */
1815 if (NONEXISTENT(current)) {
1816 if (top_prev != NULL)
1817 top_prev->next = current->next;
1818 else
1819 node->data = current->next;
1820 free_rdataset(rbtdb, mctx, current);
1821 } else
1822 top_prev = current;
1823 }
1824 }
1825 if (!still_dirty)
1826 node->dirty = 0;
1827 }
1828
1829 static void
delete_node(dns_rbtdb_t * rbtdb,dns_rbtnode_t * node)1830 delete_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node) {
1831 dns_rbtnode_t *nsecnode;
1832 dns_fixedname_t fname;
1833 dns_name_t *name;
1834 isc_result_t result = ISC_R_UNEXPECTED;
1835 unsigned int node_has_rpz;
1836
1837 INSIST(!ISC_LINK_LINKED(node, deadlink));
1838
1839 switch (node->nsec) {
1840 case DNS_RBT_NSEC_NORMAL:
1841 /*
1842 * Though this may be wasteful, it has to be done before
1843 * node is deleted.
1844 */
1845 dns_fixedname_init(&fname);
1846 name = dns_fixedname_name(&fname);
1847 dns_rbt_fullnamefromnode(node, name);
1848
1849 node_has_rpz = node->rpz;
1850 result = dns_rbt_deletenode(rbtdb->tree, node, ISC_FALSE);
1851 if (result == ISC_R_SUCCESS &&
1852 rbtdb->rpzs != NULL && node_has_rpz)
1853 dns_rpz_delete(rbtdb->rpzs, rbtdb->rpz_num, name);
1854 break;
1855 case DNS_RBT_NSEC_HAS_NSEC:
1856 dns_fixedname_init(&fname);
1857 name = dns_fixedname_name(&fname);
1858 dns_rbt_fullnamefromnode(node, name);
1859 /*
1860 * Delete the corresponding node from the auxiliary NSEC
1861 * tree before deleting from the main tree.
1862 */
1863 nsecnode = NULL;
1864 result = dns_rbt_findnode(rbtdb->nsec, name, NULL, &nsecnode,
1865 NULL, DNS_RBTFIND_EMPTYDATA,
1866 NULL, NULL);
1867 if (result != ISC_R_SUCCESS) {
1868 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
1869 DNS_LOGMODULE_CACHE, ISC_LOG_WARNING,
1870 "delete_node: "
1871 "dns_rbt_findnode(nsec): %s",
1872 isc_result_totext(result));
1873 } else {
1874 result = dns_rbt_deletenode(rbtdb->nsec, nsecnode,
1875 ISC_FALSE);
1876 if (result != ISC_R_SUCCESS) {
1877 isc_log_write(dns_lctx,
1878 DNS_LOGCATEGORY_DATABASE,
1879 DNS_LOGMODULE_CACHE,
1880 ISC_LOG_WARNING,
1881 "delete_node(): "
1882 "dns_rbt_deletenode(nsecnode): %s",
1883 isc_result_totext(result));
1884 }
1885 }
1886 node_has_rpz = node->rpz;
1887 result = dns_rbt_deletenode(rbtdb->tree, node, ISC_FALSE);
1888 if (result == ISC_R_SUCCESS &&
1889 rbtdb->rpzs != NULL && node_has_rpz)
1890 dns_rpz_delete(rbtdb->rpzs, rbtdb->rpz_num, name);
1891 break;
1892 case DNS_RBT_NSEC_NSEC:
1893 result = dns_rbt_deletenode(rbtdb->nsec, node, ISC_FALSE);
1894 break;
1895 case DNS_RBT_NSEC_NSEC3:
1896 result = dns_rbt_deletenode(rbtdb->nsec3, node, ISC_FALSE);
1897 break;
1898 }
1899 if (result != ISC_R_SUCCESS) {
1900 isc_log_write(dns_lctx,
1901 DNS_LOGCATEGORY_DATABASE,
1902 DNS_LOGMODULE_CACHE,
1903 ISC_LOG_WARNING,
1904 "delete_node(): "
1905 "dns_rbt_deletenode: %s",
1906 isc_result_totext(result));
1907 }
1908 }
1909
1910 /*%
1911 * Clean up dead nodes. These are nodes which have no references, and
1912 * have no data. They are dead but we could not or chose not to delete
1913 * them when we deleted all the data at that node because we did not want
1914 * to wait for the tree write lock.
1915 *
1916 * The caller must hold a tree write lock and bucketnum'th node (write) lock.
1917 */
1918 static void
cleanup_dead_nodes(dns_rbtdb_t * rbtdb,int bucketnum)1919 cleanup_dead_nodes(dns_rbtdb_t *rbtdb, int bucketnum) {
1920 dns_rbtnode_t *node;
1921 int count = 10; /* XXXJT: should be adjustable */
1922
1923 node = ISC_LIST_HEAD(rbtdb->deadnodes[bucketnum]);
1924 while (node != NULL && count > 0) {
1925 ISC_LIST_UNLINK(rbtdb->deadnodes[bucketnum], node, deadlink);
1926
1927 /*
1928 * Since we're holding a tree write lock, it should be
1929 * impossible for this node to be referenced by others.
1930 */
1931 INSIST(dns_rbtnode_refcurrent(node) == 0 &&
1932 node->data == NULL);
1933
1934 delete_node(rbtdb, node);
1935
1936 node = ISC_LIST_HEAD(rbtdb->deadnodes[bucketnum]);
1937 count--;
1938 }
1939 }
1940
1941 /*
1942 * Caller must be holding the node lock.
1943 */
1944 static inline void
new_reference(dns_rbtdb_t * rbtdb,dns_rbtnode_t * node)1945 new_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node) {
1946 unsigned int lockrefs, noderefs;
1947 isc_refcount_t *lockref;
1948
1949 INSIST(!ISC_LINK_LINKED(node, deadlink));
1950 dns_rbtnode_refincrement0(node, &noderefs);
1951 if (noderefs == 1) { /* this is the first reference to the node */
1952 lockref = &rbtdb->node_locks[node->locknum].references;
1953 isc_refcount_increment0(lockref, &lockrefs);
1954 INSIST(lockrefs != 0);
1955 }
1956 INSIST(noderefs != 0);
1957 }
1958
1959 /*
1960 * This function is assumed to be called when a node is newly referenced
1961 * and can be in the deadnode list. In that case the node must be retrieved
1962 * from the list because it is going to be used. In addition, if the caller
1963 * happens to hold a write lock on the tree, it's a good chance to purge dead
1964 * nodes.
1965 * Note: while a new reference is gained in multiple places, there are only very
1966 * few cases where the node can be in the deadnode list (only empty nodes can
1967 * have been added to the list).
1968 */
1969 static inline void
reactivate_node(dns_rbtdb_t * rbtdb,dns_rbtnode_t * node,isc_rwlocktype_t treelocktype)1970 reactivate_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1971 isc_rwlocktype_t treelocktype)
1972 {
1973 isc_rwlocktype_t locktype = isc_rwlocktype_read;
1974 nodelock_t *nodelock = &rbtdb->node_locks[node->locknum].lock;
1975 isc_boolean_t maybe_cleanup = ISC_FALSE;
1976
1977 POST(locktype);
1978
1979 NODE_STRONGLOCK(nodelock);
1980 NODE_WEAKLOCK(nodelock, locktype);
1981
1982 /*
1983 * Check if we can possibly cleanup the dead node. If so, upgrade
1984 * the node lock below to perform the cleanup.
1985 */
1986 if (!ISC_LIST_EMPTY(rbtdb->deadnodes[node->locknum]) &&
1987 treelocktype == isc_rwlocktype_write) {
1988 maybe_cleanup = ISC_TRUE;
1989 }
1990
1991 if (ISC_LINK_LINKED(node, deadlink) || maybe_cleanup) {
1992 /*
1993 * Upgrade the lock and test if we still need to unlink.
1994 */
1995 NODE_WEAKUNLOCK(nodelock, locktype);
1996 locktype = isc_rwlocktype_write;
1997 POST(locktype);
1998 NODE_WEAKLOCK(nodelock, locktype);
1999 if (ISC_LINK_LINKED(node, deadlink))
2000 ISC_LIST_UNLINK(rbtdb->deadnodes[node->locknum],
2001 node, deadlink);
2002 if (maybe_cleanup)
2003 cleanup_dead_nodes(rbtdb, node->locknum);
2004 }
2005
2006 new_reference(rbtdb, node);
2007
2008 NODE_WEAKUNLOCK(nodelock, locktype);
2009 NODE_STRONGUNLOCK(nodelock);
2010 }
2011
2012 /*
2013 * Caller must be holding the node lock; either the "strong", read or write
2014 * lock. Note that the lock must be held even when node references are
2015 * atomically modified; in that case the decrement operation itself does not
2016 * have to be protected, but we must avoid a race condition where multiple
2017 * threads are decreasing the reference to zero simultaneously and at least
2018 * one of them is going to free the node.
2019 * This function returns ISC_TRUE if and only if the node reference decreases
2020 * to zero.
2021 */
2022 static isc_boolean_t
decrement_reference(dns_rbtdb_t * rbtdb,dns_rbtnode_t * node,rbtdb_serial_t least_serial,isc_rwlocktype_t nlock,isc_rwlocktype_t tlock,isc_boolean_t pruning)2023 decrement_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
2024 rbtdb_serial_t least_serial,
2025 isc_rwlocktype_t nlock, isc_rwlocktype_t tlock,
2026 isc_boolean_t pruning)
2027 {
2028 isc_result_t result;
2029 isc_boolean_t write_locked;
2030 rbtdb_nodelock_t *nodelock;
2031 unsigned int refs, nrefs;
2032 int bucket = node->locknum;
2033 isc_boolean_t no_reference = ISC_TRUE;
2034
2035 nodelock = &rbtdb->node_locks[bucket];
2036
2037 #define KEEP_NODE(n, r) \
2038 ((n)->data != NULL || (n)->down != NULL || (n) == (r)->origin_node)
2039
2040 /* Handle easy and typical case first. */
2041 if (!node->dirty && KEEP_NODE(node, rbtdb)) {
2042 dns_rbtnode_refdecrement(node, &nrefs);
2043 INSIST((int)nrefs >= 0);
2044 if (nrefs == 0) {
2045 isc_refcount_decrement(&nodelock->references, &refs);
2046 INSIST((int)refs >= 0);
2047 }
2048 return ((nrefs == 0) ? ISC_TRUE : ISC_FALSE);
2049 }
2050
2051 /* Upgrade the lock? */
2052 if (nlock == isc_rwlocktype_read) {
2053 NODE_WEAKUNLOCK(&nodelock->lock, isc_rwlocktype_read);
2054 NODE_WEAKLOCK(&nodelock->lock, isc_rwlocktype_write);
2055 }
2056
2057 dns_rbtnode_refdecrement(node, &nrefs);
2058 INSIST((int)nrefs >= 0);
2059 if (nrefs > 0) {
2060 /* Restore the lock? */
2061 if (nlock == isc_rwlocktype_read)
2062 NODE_WEAKDOWNGRADE(&nodelock->lock);
2063 return (ISC_FALSE);
2064 }
2065
2066 if (node->dirty) {
2067 if (IS_CACHE(rbtdb))
2068 clean_cache_node(rbtdb, node);
2069 else {
2070 if (least_serial == 0) {
2071 /*
2072 * Caller doesn't know the least serial.
2073 * Get it.
2074 */
2075 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
2076 least_serial = rbtdb->least_serial;
2077 RBTDB_UNLOCK(&rbtdb->lock,
2078 isc_rwlocktype_read);
2079 }
2080 clean_zone_node(rbtdb, node, least_serial);
2081 }
2082 }
2083
2084 /*
2085 * Attempt to switch to a write lock on the tree. If this fails,
2086 * we will add this node to a linked list of nodes in this locking
2087 * bucket which we will free later.
2088 */
2089 if (tlock != isc_rwlocktype_write) {
2090 /*
2091 * Locking hierarchy notwithstanding, we don't need to free
2092 * the node lock before acquiring the tree write lock because
2093 * we only do a trylock.
2094 */
2095 if (tlock == isc_rwlocktype_read)
2096 result = isc_rwlock_tryupgrade(&rbtdb->tree_lock);
2097 else
2098 result = isc_rwlock_trylock(&rbtdb->tree_lock,
2099 isc_rwlocktype_write);
2100 RUNTIME_CHECK(result == ISC_R_SUCCESS ||
2101 result == ISC_R_LOCKBUSY);
2102
2103 write_locked = ISC_TF(result == ISC_R_SUCCESS);
2104 } else
2105 write_locked = ISC_TRUE;
2106
2107 isc_refcount_decrement(&nodelock->references, &refs);
2108 INSIST((int)refs >= 0);
2109
2110 if (KEEP_NODE(node, rbtdb))
2111 goto restore_locks;
2112
2113 #undef KEEP_NODE
2114
2115 if (write_locked) {
2116 /*
2117 * We can now delete the node.
2118 */
2119
2120 /*
2121 * If this node is the only one in the level it's in, deleting
2122 * this node may recursively make its parent the only node in
2123 * the parent level; if so, and if no one is currently using
2124 * the parent node, this is almost the only opportunity to
2125 * clean it up. But the recursive cleanup is not that trivial
2126 * since the child and parent may be in different lock buckets,
2127 * which would cause a lock order reversal problem. To avoid
2128 * the trouble, we'll dispatch a separate event for batch
2129 * cleaning. We need to check whether we're deleting the node
2130 * as a result of pruning to avoid infinite dispatching.
2131 * Note: pruning happens only when a task has been set for the
2132 * rbtdb. If the user of the rbtdb chooses not to set a task,
2133 * it's their responsibility to purge stale leaves (e.g. by
2134 * periodic walk-through).
2135 */
2136 if (!pruning && node->parent != NULL &&
2137 node->parent->down == node && node->left == NULL &&
2138 node->right == NULL && rbtdb->task != NULL) {
2139 isc_event_t *ev;
2140 dns_db_t *db;
2141
2142 ev = isc_event_allocate(rbtdb->common.mctx, NULL,
2143 DNS_EVENT_RBTPRUNE,
2144 prune_tree, node,
2145 sizeof(isc_event_t));
2146 if (ev != NULL) {
2147 new_reference(rbtdb, node);
2148 db = NULL;
2149 attach((dns_db_t *)rbtdb, &db);
2150 ev->ev_sender = db;
2151 isc_task_send(rbtdb->task, &ev);
2152 no_reference = ISC_FALSE;
2153 } else {
2154 /*
2155 * XXX: this is a weird situation. We could
2156 * ignore this error case, but then the stale
2157 * node will unlikely be purged except via a
2158 * rare condition such as manual cleanup. So
2159 * we queue it in the deadnodes list, hoping
2160 * the memory shortage is temporary and the node
2161 * will be deleted later.
2162 */
2163 isc_log_write(dns_lctx,
2164 DNS_LOGCATEGORY_DATABASE,
2165 DNS_LOGMODULE_CACHE,
2166 ISC_LOG_INFO,
2167 "decrement_reference: failed to "
2168 "allocate pruning event");
2169 INSIST(node->data == NULL);
2170 INSIST(!ISC_LINK_LINKED(node, deadlink));
2171 ISC_LIST_APPEND(rbtdb->deadnodes[bucket], node,
2172 deadlink);
2173 }
2174 } else {
2175 if (isc_log_wouldlog(dns_lctx, ISC_LOG_DEBUG(1))) {
2176 char printname[DNS_NAME_FORMATSIZE];
2177
2178 isc_log_write(dns_lctx,
2179 DNS_LOGCATEGORY_DATABASE,
2180 DNS_LOGMODULE_CACHE,
2181 ISC_LOG_DEBUG(1),
2182 "decrement_reference: "
2183 "delete from rbt: %p %s",
2184 node,
2185 dns_rbt_formatnodename(node,
2186 printname,
2187 sizeof(printname)));
2188 }
2189
2190 delete_node(rbtdb, node);
2191 }
2192 } else {
2193 INSIST(node->data == NULL);
2194 INSIST(!ISC_LINK_LINKED(node, deadlink));
2195 ISC_LIST_APPEND(rbtdb->deadnodes[bucket], node, deadlink);
2196 }
2197
2198 restore_locks:
2199 /* Restore the lock? */
2200 if (nlock == isc_rwlocktype_read)
2201 NODE_WEAKDOWNGRADE(&nodelock->lock);
2202
2203 /*
2204 * Relock a read lock, or unlock the write lock if no lock was held.
2205 */
2206 if (tlock == isc_rwlocktype_none)
2207 if (write_locked)
2208 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2209
2210 if (tlock == isc_rwlocktype_read)
2211 if (write_locked)
2212 isc_rwlock_downgrade(&rbtdb->tree_lock);
2213
2214 return (no_reference);
2215 }
2216
2217 /*
2218 * Prune the tree by recursively cleaning-up single leaves. In the worst
2219 * case, the number of iteration is the number of tree levels, which is at
2220 * most the maximum number of domain name labels, i.e, 127. In practice, this
2221 * should be much smaller (only a few times), and even the worst case would be
2222 * acceptable for a single event.
2223 */
2224 static void
prune_tree(isc_task_t * task,isc_event_t * event)2225 prune_tree(isc_task_t *task, isc_event_t *event) {
2226 dns_rbtdb_t *rbtdb = event->ev_sender;
2227 dns_rbtnode_t *node = event->ev_arg;
2228 dns_rbtnode_t *parent;
2229 unsigned int locknum;
2230
2231 UNUSED(task);
2232
2233 isc_event_free(&event);
2234
2235 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2236 locknum = node->locknum;
2237 NODE_LOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
2238 do {
2239 parent = node->parent;
2240 decrement_reference(rbtdb, node, 0, isc_rwlocktype_write,
2241 isc_rwlocktype_write, ISC_TRUE);
2242
2243 if (parent != NULL && parent->down == NULL) {
2244 /*
2245 * node was the only down child of the parent and has
2246 * just been removed. We'll then need to examine the
2247 * parent. Keep the lock if possible; otherwise,
2248 * release the old lock and acquire one for the parent.
2249 */
2250 if (parent->locknum != locknum) {
2251 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
2252 isc_rwlocktype_write);
2253 locknum = parent->locknum;
2254 NODE_LOCK(&rbtdb->node_locks[locknum].lock,
2255 isc_rwlocktype_write);
2256 }
2257
2258 /*
2259 * We need to gain a reference to the node before
2260 * decrementing it in the next iteration. In addition,
2261 * if the node is in the dead-nodes list, extract it
2262 * from the list beforehand as we do in
2263 * reactivate_node().
2264 */
2265 if (ISC_LINK_LINKED(parent, deadlink))
2266 ISC_LIST_UNLINK(rbtdb->deadnodes[locknum],
2267 parent, deadlink);
2268 new_reference(rbtdb, parent);
2269 } else
2270 parent = NULL;
2271
2272 node = parent;
2273 } while (node != NULL);
2274 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
2275 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2276
2277 detach((dns_db_t **)(void *)&rbtdb);
2278 }
2279
2280 static inline void
make_least_version(dns_rbtdb_t * rbtdb,rbtdb_version_t * version,rbtdb_changedlist_t * cleanup_list)2281 make_least_version(dns_rbtdb_t *rbtdb, rbtdb_version_t *version,
2282 rbtdb_changedlist_t *cleanup_list)
2283 {
2284 /*
2285 * Caller must be holding the database lock.
2286 */
2287
2288 rbtdb->least_serial = version->serial;
2289 *cleanup_list = version->changed_list;
2290 ISC_LIST_INIT(version->changed_list);
2291 }
2292
2293 static inline void
cleanup_nondirty(rbtdb_version_t * version,rbtdb_changedlist_t * cleanup_list)2294 cleanup_nondirty(rbtdb_version_t *version, rbtdb_changedlist_t *cleanup_list) {
2295 rbtdb_changed_t *changed, *next_changed;
2296
2297 /*
2298 * If the changed record is dirty, then
2299 * an update created multiple versions of
2300 * a given rdataset. We keep this list
2301 * until we're the least open version, at
2302 * which point it's safe to get rid of any
2303 * older versions.
2304 *
2305 * If the changed record isn't dirty, then
2306 * we don't need it anymore since we're
2307 * committing and not rolling back.
2308 *
2309 * The caller must be holding the database lock.
2310 */
2311 for (changed = HEAD(version->changed_list);
2312 changed != NULL;
2313 changed = next_changed) {
2314 next_changed = NEXT(changed, link);
2315 if (!changed->dirty) {
2316 UNLINK(version->changed_list,
2317 changed, link);
2318 APPEND(*cleanup_list,
2319 changed, link);
2320 }
2321 }
2322 }
2323
2324 static void
iszonesecure(dns_db_t * db,rbtdb_version_t * version,dns_dbnode_t * origin)2325 iszonesecure(dns_db_t *db, rbtdb_version_t *version, dns_dbnode_t *origin) {
2326 dns_rdataset_t keyset;
2327 dns_rdataset_t nsecset, signsecset;
2328 isc_boolean_t haszonekey = ISC_FALSE;
2329 isc_boolean_t hasnsec = ISC_FALSE;
2330 isc_result_t result;
2331
2332 dns_rdataset_init(&keyset);
2333 result = dns_db_findrdataset(db, origin, version, dns_rdatatype_dnskey,
2334 0, 0, &keyset, NULL);
2335 if (result == ISC_R_SUCCESS) {
2336 result = dns_rdataset_first(&keyset);
2337 while (result == ISC_R_SUCCESS) {
2338 dns_rdata_t keyrdata = DNS_RDATA_INIT;
2339 dns_rdataset_current(&keyset, &keyrdata);
2340 if (dns_zonekey_iszonekey(&keyrdata)) {
2341 haszonekey = ISC_TRUE;
2342 break;
2343 }
2344 result = dns_rdataset_next(&keyset);
2345 }
2346 dns_rdataset_disassociate(&keyset);
2347 }
2348 if (!haszonekey) {
2349 version->secure = dns_db_insecure;
2350 version->havensec3 = ISC_FALSE;
2351 return;
2352 }
2353
2354 dns_rdataset_init(&nsecset);
2355 dns_rdataset_init(&signsecset);
2356 result = dns_db_findrdataset(db, origin, version, dns_rdatatype_nsec,
2357 0, 0, &nsecset, &signsecset);
2358 if (result == ISC_R_SUCCESS) {
2359 if (dns_rdataset_isassociated(&signsecset)) {
2360 hasnsec = ISC_TRUE;
2361 dns_rdataset_disassociate(&signsecset);
2362 }
2363 dns_rdataset_disassociate(&nsecset);
2364 }
2365
2366 setnsec3parameters(db, version);
2367
2368 /*
2369 * Do we have a valid NSEC/NSEC3 chain?
2370 */
2371 if (version->havensec3 || hasnsec)
2372 version->secure = dns_db_secure;
2373 else
2374 version->secure = dns_db_insecure;
2375 }
2376
2377 /*%<
2378 * Walk the origin node looking for NSEC3PARAM records.
2379 * Cache the nsec3 parameters.
2380 */
2381 static void
setnsec3parameters(dns_db_t * db,rbtdb_version_t * version)2382 setnsec3parameters(dns_db_t *db, rbtdb_version_t *version) {
2383 dns_rbtnode_t *node;
2384 dns_rdata_nsec3param_t nsec3param;
2385 dns_rdata_t rdata = DNS_RDATA_INIT;
2386 isc_region_t region;
2387 isc_result_t result;
2388 rdatasetheader_t *header, *header_next;
2389 unsigned char *raw; /* RDATASLAB */
2390 unsigned int count, length;
2391 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2392
2393 version->havensec3 = ISC_FALSE;
2394 node = rbtdb->origin_node;
2395 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2396 isc_rwlocktype_read);
2397 for (header = node->data;
2398 header != NULL;
2399 header = header_next) {
2400 header_next = header->next;
2401 do {
2402 if (header->serial <= version->serial &&
2403 !IGNORE(header)) {
2404 if (NONEXISTENT(header))
2405 header = NULL;
2406 break;
2407 } else
2408 header = header->down;
2409 } while (header != NULL);
2410
2411 if (header != NULL &&
2412 (header->type == dns_rdatatype_nsec3param)) {
2413 /*
2414 * Find A NSEC3PARAM with a supported algorithm.
2415 */
2416 raw = (unsigned char *)header + sizeof(*header);
2417 count = raw[0] * 256 + raw[1]; /* count */
2418 #if DNS_RDATASET_FIXED
2419 raw += count * 4 + 2;
2420 #else
2421 raw += 2;
2422 #endif
2423 while (count-- > 0U) {
2424 length = raw[0] * 256 + raw[1];
2425 #if DNS_RDATASET_FIXED
2426 raw += 4;
2427 #else
2428 raw += 2;
2429 #endif
2430 region.base = raw;
2431 region.length = length;
2432 raw += length;
2433 dns_rdata_fromregion(&rdata,
2434 rbtdb->common.rdclass,
2435 dns_rdatatype_nsec3param,
2436 ®ion);
2437 result = dns_rdata_tostruct(&rdata,
2438 &nsec3param,
2439 NULL);
2440 INSIST(result == ISC_R_SUCCESS);
2441 dns_rdata_reset(&rdata);
2442
2443 if (nsec3param.hash != DNS_NSEC3_UNKNOWNALG &&
2444 !dns_nsec3_supportedhash(nsec3param.hash))
2445 continue;
2446
2447 if (nsec3param.flags != 0)
2448 continue;
2449
2450 memmove(version->salt, nsec3param.salt,
2451 nsec3param.salt_length);
2452 version->hash = nsec3param.hash;
2453 version->salt_length = nsec3param.salt_length;
2454 version->iterations = nsec3param.iterations;
2455 version->flags = nsec3param.flags;
2456 version->havensec3 = ISC_TRUE;
2457 /*
2458 * Look for a better algorithm than the
2459 * unknown test algorithm.
2460 */
2461 if (nsec3param.hash != DNS_NSEC3_UNKNOWNALG)
2462 goto unlock;
2463 }
2464 }
2465 }
2466 unlock:
2467 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2468 isc_rwlocktype_read);
2469 }
2470
2471 static void
cleanup_dead_nodes_callback(isc_task_t * task,isc_event_t * event)2472 cleanup_dead_nodes_callback(isc_task_t *task, isc_event_t *event) {
2473 dns_rbtdb_t *rbtdb = event->ev_arg;
2474 isc_boolean_t again = ISC_FALSE;
2475 unsigned int locknum;
2476 unsigned int refs;
2477
2478 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2479 for (locknum = 0; locknum < rbtdb->node_lock_count; locknum++) {
2480 NODE_LOCK(&rbtdb->node_locks[locknum].lock,
2481 isc_rwlocktype_write);
2482 cleanup_dead_nodes(rbtdb, locknum);
2483 if (ISC_LIST_HEAD(rbtdb->deadnodes[locknum]) != NULL)
2484 again = ISC_TRUE;
2485 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
2486 isc_rwlocktype_write);
2487 }
2488 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2489 if (again)
2490 isc_task_send(task, &event);
2491 else {
2492 isc_event_free(&event);
2493 isc_refcount_decrement(&rbtdb->references, &refs);
2494 if (refs == 0)
2495 maybe_free_rbtdb(rbtdb);
2496 }
2497 }
2498
2499 static void
closeversion(dns_db_t * db,dns_dbversion_t ** versionp,isc_boolean_t commit)2500 closeversion(dns_db_t *db, dns_dbversion_t **versionp, isc_boolean_t commit) {
2501 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2502 rbtdb_version_t *version, *cleanup_version, *least_greater;
2503 isc_boolean_t rollback = ISC_FALSE;
2504 rbtdb_changedlist_t cleanup_list;
2505 rdatasetheaderlist_t resigned_list;
2506 rbtdb_changed_t *changed, *next_changed;
2507 rbtdb_serial_t serial, least_serial;
2508 dns_rbtnode_t *rbtnode;
2509 unsigned int refs;
2510 rdatasetheader_t *header;
2511
2512 REQUIRE(VALID_RBTDB(rbtdb));
2513 version = (rbtdb_version_t *)*versionp;
2514 INSIST(version->rbtdb == rbtdb);
2515
2516 cleanup_version = NULL;
2517 ISC_LIST_INIT(cleanup_list);
2518 ISC_LIST_INIT(resigned_list);
2519
2520 isc_refcount_decrement(&version->references, &refs);
2521 if (refs > 0) { /* typical and easy case first */
2522 if (commit) {
2523 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
2524 INSIST(!version->writer);
2525 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
2526 }
2527 goto end;
2528 }
2529
2530 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
2531 serial = version->serial;
2532 if (version->writer) {
2533 if (commit) {
2534 unsigned cur_ref;
2535 rbtdb_version_t *cur_version;
2536
2537 INSIST(version->commit_ok);
2538 INSIST(version == rbtdb->future_version);
2539 /*
2540 * The current version is going to be replaced.
2541 * Release the (likely last) reference to it from the
2542 * DB itself and unlink it from the open list.
2543 */
2544 cur_version = rbtdb->current_version;
2545 isc_refcount_decrement(&cur_version->references,
2546 &cur_ref);
2547 if (cur_ref == 0) {
2548 if (cur_version->serial == rbtdb->least_serial)
2549 INSIST(EMPTY(cur_version->changed_list));
2550 UNLINK(rbtdb->open_versions,
2551 cur_version, link);
2552 }
2553 if (EMPTY(rbtdb->open_versions)) {
2554 /*
2555 * We're going to become the least open
2556 * version.
2557 */
2558 make_least_version(rbtdb, version,
2559 &cleanup_list);
2560 } else {
2561 /*
2562 * Some other open version is the
2563 * least version. We can't cleanup
2564 * records that were changed in this
2565 * version because the older versions
2566 * may still be in use by an open
2567 * version.
2568 *
2569 * We can, however, discard the
2570 * changed records for things that
2571 * we've added that didn't exist in
2572 * prior versions.
2573 */
2574 cleanup_nondirty(version, &cleanup_list);
2575 }
2576 /*
2577 * If the (soon to be former) current version
2578 * isn't being used by anyone, we can clean
2579 * it up.
2580 */
2581 if (cur_ref == 0) {
2582 cleanup_version = cur_version;
2583 APPENDLIST(version->changed_list,
2584 cleanup_version->changed_list,
2585 link);
2586 }
2587 /*
2588 * Update the zone's secure status.
2589 */
2590 if (!IS_CACHE(rbtdb))
2591 iszonesecure(db, version, rbtdb->origin_node);
2592 /*
2593 * Become the current version.
2594 */
2595 version->writer = ISC_FALSE;
2596 rbtdb->current_version = version;
2597 rbtdb->current_serial = version->serial;
2598 rbtdb->future_version = NULL;
2599
2600 /*
2601 * Keep the current version in the open list, and
2602 * gain a reference for the DB itself (see the DB
2603 * creation function below). This must be the only
2604 * case where we need to increment the counter from
2605 * zero and need to use isc_refcount_increment0().
2606 */
2607 isc_refcount_increment0(&version->references,
2608 &cur_ref);
2609 INSIST(cur_ref == 1);
2610 PREPEND(rbtdb->open_versions,
2611 rbtdb->current_version, link);
2612 resigned_list = version->resigned_list;
2613 ISC_LIST_INIT(version->resigned_list);
2614 } else {
2615 /*
2616 * We're rolling back this transaction.
2617 */
2618 cleanup_list = version->changed_list;
2619 ISC_LIST_INIT(version->changed_list);
2620 resigned_list = version->resigned_list;
2621 ISC_LIST_INIT(version->resigned_list);
2622 rollback = ISC_TRUE;
2623 cleanup_version = version;
2624 rbtdb->future_version = NULL;
2625 }
2626 } else {
2627 if (version != rbtdb->current_version) {
2628 /*
2629 * There are no external or internal references
2630 * to this version and it can be cleaned up.
2631 */
2632 cleanup_version = version;
2633
2634 /*
2635 * Find the version with the least serial
2636 * number greater than ours.
2637 */
2638 least_greater = PREV(version, link);
2639 if (least_greater == NULL)
2640 least_greater = rbtdb->current_version;
2641
2642 INSIST(version->serial < least_greater->serial);
2643 /*
2644 * Is this the least open version?
2645 */
2646 if (version->serial == rbtdb->least_serial) {
2647 /*
2648 * Yes. Install the new least open
2649 * version.
2650 */
2651 make_least_version(rbtdb,
2652 least_greater,
2653 &cleanup_list);
2654 } else {
2655 /*
2656 * Add any unexecuted cleanups to
2657 * those of the least greater version.
2658 */
2659 APPENDLIST(least_greater->changed_list,
2660 version->changed_list,
2661 link);
2662 }
2663 } else if (version->serial == rbtdb->least_serial)
2664 INSIST(EMPTY(version->changed_list));
2665 UNLINK(rbtdb->open_versions, version, link);
2666 }
2667 least_serial = rbtdb->least_serial;
2668 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
2669
2670 if (cleanup_version != NULL) {
2671 INSIST(EMPTY(cleanup_version->changed_list));
2672 isc_mem_put(rbtdb->common.mctx, cleanup_version,
2673 sizeof(*cleanup_version));
2674 }
2675
2676 /*
2677 * Commit/rollback re-signed headers.
2678 */
2679 for (header = HEAD(resigned_list);
2680 header != NULL;
2681 header = HEAD(resigned_list)) {
2682 nodelock_t *lock;
2683
2684 ISC_LIST_UNLINK(resigned_list, header, link);
2685
2686 lock = &rbtdb->node_locks[header->node->locknum].lock;
2687 NODE_LOCK(lock, isc_rwlocktype_write);
2688 if (rollback && !IGNORE(header)) {
2689 isc_result_t result;
2690 result = resign_insert(rbtdb, header->node->locknum,
2691 header);
2692 if (result != ISC_R_SUCCESS)
2693 isc_log_write(dns_lctx,
2694 DNS_LOGCATEGORY_DATABASE,
2695 DNS_LOGMODULE_ZONE, ISC_LOG_ERROR,
2696 "Unable to reinsert header to "
2697 "re-signing heap: %s\n",
2698 dns_result_totext(result));
2699 }
2700 decrement_reference(rbtdb, header->node, least_serial,
2701 isc_rwlocktype_write, isc_rwlocktype_none,
2702 ISC_FALSE);
2703 NODE_UNLOCK(lock, isc_rwlocktype_write);
2704 }
2705
2706 if (!EMPTY(cleanup_list)) {
2707 isc_event_t *event = NULL;
2708 isc_rwlocktype_t tlock = isc_rwlocktype_none;
2709
2710 if (rbtdb->task != NULL)
2711 event = isc_event_allocate(rbtdb->common.mctx, NULL,
2712 DNS_EVENT_RBTDEADNODES,
2713 cleanup_dead_nodes_callback,
2714 rbtdb, sizeof(isc_event_t));
2715 if (event == NULL) {
2716 /*
2717 * We acquire a tree write lock here in order to make
2718 * sure that stale nodes will be removed in
2719 * decrement_reference(). If we didn't have the lock,
2720 * those nodes could miss the chance to be removed
2721 * until the server stops. The write lock is
2722 * expensive, but this event should be rare enough
2723 * to justify the cost.
2724 */
2725 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2726 tlock = isc_rwlocktype_write;
2727 }
2728
2729 for (changed = HEAD(cleanup_list);
2730 changed != NULL;
2731 changed = next_changed) {
2732 nodelock_t *lock;
2733
2734 next_changed = NEXT(changed, link);
2735 rbtnode = changed->node;
2736 lock = &rbtdb->node_locks[rbtnode->locknum].lock;
2737
2738 NODE_LOCK(lock, isc_rwlocktype_write);
2739 /*
2740 * This is a good opportunity to purge any dead nodes,
2741 * so use it.
2742 */
2743 if (event == NULL)
2744 cleanup_dead_nodes(rbtdb, rbtnode->locknum);
2745
2746 if (rollback)
2747 rollback_node(rbtnode, serial);
2748 decrement_reference(rbtdb, rbtnode, least_serial,
2749 isc_rwlocktype_write, tlock,
2750 ISC_FALSE);
2751
2752 NODE_UNLOCK(lock, isc_rwlocktype_write);
2753
2754 isc_mem_put(rbtdb->common.mctx, changed,
2755 sizeof(*changed));
2756 }
2757 if (event != NULL) {
2758 isc_refcount_increment(&rbtdb->references, NULL);
2759 isc_task_send(rbtdb->task, &event);
2760 } else
2761 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2762 }
2763
2764 end:
2765 *versionp = NULL;
2766 }
2767
2768 /*
2769 * Add the necessary magic for the wildcard name 'name'
2770 * to be found in 'rbtdb'.
2771 *
2772 * In order for wildcard matching to work correctly in
2773 * zone_find(), we must ensure that a node for the wildcarding
2774 * level exists in the database, and has its 'find_callback'
2775 * and 'wild' bits set.
2776 *
2777 * E.g. if the wildcard name is "*.sub.example." then we
2778 * must ensure that "sub.example." exists and is marked as
2779 * a wildcard level.
2780 */
2781 static isc_result_t
add_wildcard_magic(dns_rbtdb_t * rbtdb,dns_name_t * name)2782 add_wildcard_magic(dns_rbtdb_t *rbtdb, dns_name_t *name) {
2783 isc_result_t result;
2784 dns_name_t foundname;
2785 dns_offsets_t offsets;
2786 unsigned int n;
2787 dns_rbtnode_t *node = NULL;
2788
2789 dns_name_init(&foundname, offsets);
2790 n = dns_name_countlabels(name);
2791 INSIST(n >= 2);
2792 n--;
2793 dns_name_getlabelsequence(name, 1, n, &foundname);
2794 result = dns_rbt_addnode(rbtdb->tree, &foundname, &node);
2795 if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
2796 return (result);
2797 if (result == ISC_R_SUCCESS)
2798 node->nsec = DNS_RBT_NSEC_NORMAL;
2799 node->find_callback = 1;
2800 node->wild = 1;
2801 return (ISC_R_SUCCESS);
2802 }
2803
2804 static isc_result_t
add_empty_wildcards(dns_rbtdb_t * rbtdb,dns_name_t * name)2805 add_empty_wildcards(dns_rbtdb_t *rbtdb, dns_name_t *name) {
2806 isc_result_t result;
2807 dns_name_t foundname;
2808 dns_offsets_t offsets;
2809 unsigned int n, l, i;
2810
2811 dns_name_init(&foundname, offsets);
2812 n = dns_name_countlabels(name);
2813 l = dns_name_countlabels(&rbtdb->common.origin);
2814 i = l + 1;
2815 while (i < n) {
2816 dns_rbtnode_t *node = NULL; /* dummy */
2817 dns_name_getlabelsequence(name, n - i, i, &foundname);
2818 if (dns_name_iswildcard(&foundname)) {
2819 result = add_wildcard_magic(rbtdb, &foundname);
2820 if (result != ISC_R_SUCCESS)
2821 return (result);
2822 result = dns_rbt_addnode(rbtdb->tree, &foundname,
2823 &node);
2824 if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
2825 return (result);
2826 if (result == ISC_R_SUCCESS)
2827 node->nsec = DNS_RBT_NSEC_NORMAL;
2828 }
2829 i++;
2830 }
2831 return (ISC_R_SUCCESS);
2832 }
2833
2834 static isc_result_t
findnodeintree(dns_rbtdb_t * rbtdb,dns_rbt_t * tree,dns_name_t * name,isc_boolean_t create,dns_dbnode_t ** nodep)2835 findnodeintree(dns_rbtdb_t *rbtdb, dns_rbt_t *tree, dns_name_t *name,
2836 isc_boolean_t create, dns_dbnode_t **nodep)
2837 {
2838 dns_rbtnode_t *node = NULL;
2839 dns_name_t nodename;
2840 isc_result_t result;
2841 isc_rwlocktype_t locktype = isc_rwlocktype_read;
2842
2843 INSIST(tree == rbtdb->tree || tree == rbtdb->nsec3);
2844
2845 dns_name_init(&nodename, NULL);
2846 RWLOCK(&rbtdb->tree_lock, locktype);
2847 result = dns_rbt_findnode(tree, name, NULL, &node, NULL,
2848 DNS_RBTFIND_EMPTYDATA, NULL, NULL);
2849 if (result != ISC_R_SUCCESS) {
2850 RWUNLOCK(&rbtdb->tree_lock, locktype);
2851 if (!create) {
2852 if (result == DNS_R_PARTIALMATCH)
2853 result = ISC_R_NOTFOUND;
2854 return (result);
2855 }
2856 /*
2857 * It would be nice to try to upgrade the lock instead of
2858 * unlocking then relocking.
2859 */
2860 locktype = isc_rwlocktype_write;
2861 RWLOCK(&rbtdb->tree_lock, locktype);
2862 node = NULL;
2863 result = dns_rbt_addnode(tree, name, &node);
2864 if (result == ISC_R_SUCCESS) {
2865 dns_rbt_namefromnode(node, &nodename);
2866 #ifdef DNS_RBT_USEHASH
2867 node->locknum = node->hashval % rbtdb->node_lock_count;
2868 #else
2869 node->locknum = dns_name_hash(&nodename, ISC_TRUE) %
2870 rbtdb->node_lock_count;
2871 #endif
2872 if (tree == rbtdb->tree) {
2873 add_empty_wildcards(rbtdb, name);
2874
2875 if (dns_name_iswildcard(name)) {
2876 result = add_wildcard_magic(rbtdb, name);
2877 if (result != ISC_R_SUCCESS) {
2878 RWUNLOCK(&rbtdb->tree_lock, locktype);
2879 return (result);
2880 }
2881 }
2882 }
2883 if (tree == rbtdb->nsec3)
2884 node->nsec = DNS_RBT_NSEC_NSEC3;
2885 } else if (result != ISC_R_EXISTS) {
2886 RWUNLOCK(&rbtdb->tree_lock, locktype);
2887 return (result);
2888 }
2889 }
2890
2891 if (tree == rbtdb->nsec3)
2892 INSIST(node->nsec == DNS_RBT_NSEC_NSEC3);
2893
2894 reactivate_node(rbtdb, node, locktype);
2895
2896 /*
2897 * Always try to add the policy zone data, because this node might
2898 * already have been implicitly created by the previous addition of
2899 * a longer domain. A common example is adding *.example.com
2900 * (implicitly creating example.com) followed by explicitly adding
2901 * example.com.
2902 */
2903 if (create && rbtdb->rpzs != NULL && tree == rbtdb->tree) {
2904 dns_fixedname_t fnamef;
2905 dns_name_t *fname;
2906
2907 dns_fixedname_init(&fnamef);
2908 fname = dns_fixedname_name(&fnamef);
2909 dns_rbt_fullnamefromnode(node, fname);
2910 result = dns_rpz_add(rbtdb->rpzs, rbtdb->rpz_num, fname);
2911 if (result == ISC_R_SUCCESS)
2912 node->rpz = 1;
2913 if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS) {
2914 /*
2915 * It is too late to give up, so merely complain.
2916 */
2917 isc_log_write(dns_lctx, DNS_LOGCATEGORY_RPZ,
2918 DNS_LOGMODULE_RBTDB, DNS_RPZ_ERROR_LEVEL,
2919 "dns_rpz_add(): %s",
2920 isc_result_totext(result));
2921 }
2922 }
2923
2924 RWUNLOCK(&rbtdb->tree_lock, locktype);
2925
2926 *nodep = (dns_dbnode_t *)node;
2927
2928 return (ISC_R_SUCCESS);
2929 }
2930
2931 static isc_result_t
findnode(dns_db_t * db,dns_name_t * name,isc_boolean_t create,dns_dbnode_t ** nodep)2932 findnode(dns_db_t *db, dns_name_t *name, isc_boolean_t create,
2933 dns_dbnode_t **nodep)
2934 {
2935 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2936
2937 REQUIRE(VALID_RBTDB(rbtdb));
2938
2939 return (findnodeintree(rbtdb, rbtdb->tree, name, create, nodep));
2940 }
2941
2942 static isc_result_t
findnsec3node(dns_db_t * db,dns_name_t * name,isc_boolean_t create,dns_dbnode_t ** nodep)2943 findnsec3node(dns_db_t *db, dns_name_t *name, isc_boolean_t create,
2944 dns_dbnode_t **nodep)
2945 {
2946 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2947
2948 REQUIRE(VALID_RBTDB(rbtdb));
2949
2950 return (findnodeintree(rbtdb, rbtdb->nsec3, name, create, nodep));
2951 }
2952
2953 static isc_result_t
zone_zonecut_callback(dns_rbtnode_t * node,dns_name_t * name,void * arg)2954 zone_zonecut_callback(dns_rbtnode_t *node, dns_name_t *name, void *arg) {
2955 rbtdb_search_t *search = arg;
2956 rdatasetheader_t *header, *header_next;
2957 rdatasetheader_t *dname_header, *sigdname_header, *ns_header;
2958 rdatasetheader_t *found;
2959 isc_result_t result;
2960 dns_rbtnode_t *onode;
2961
2962 /*
2963 * We only want to remember the topmost zone cut, since it's the one
2964 * that counts, so we'll just continue if we've already found a
2965 * zonecut.
2966 */
2967 if (search->zonecut != NULL)
2968 return (DNS_R_CONTINUE);
2969
2970 found = NULL;
2971 result = DNS_R_CONTINUE;
2972 onode = search->rbtdb->origin_node;
2973
2974 NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2975 isc_rwlocktype_read);
2976
2977 /*
2978 * Look for an NS or DNAME rdataset active in our version.
2979 */
2980 ns_header = NULL;
2981 dname_header = NULL;
2982 sigdname_header = NULL;
2983 for (header = node->data; header != NULL; header = header_next) {
2984 header_next = header->next;
2985 if (header->type == dns_rdatatype_ns ||
2986 header->type == dns_rdatatype_dname ||
2987 header->type == RBTDB_RDATATYPE_SIGDNAME) {
2988 do {
2989 if (header->serial <= search->serial &&
2990 !IGNORE(header)) {
2991 /*
2992 * Is this a "this rdataset doesn't
2993 * exist" record?
2994 */
2995 if (NONEXISTENT(header))
2996 header = NULL;
2997 break;
2998 } else
2999 header = header->down;
3000 } while (header != NULL);
3001 if (header != NULL) {
3002 if (header->type == dns_rdatatype_dname)
3003 dname_header = header;
3004 else if (header->type ==
3005 RBTDB_RDATATYPE_SIGDNAME)
3006 sigdname_header = header;
3007 else if (node != onode ||
3008 IS_STUB(search->rbtdb)) {
3009 /*
3010 * We've found an NS rdataset that
3011 * isn't at the origin node. We check
3012 * that they're not at the origin node,
3013 * because otherwise we'd erroneously
3014 * treat the zone top as if it were
3015 * a delegation.
3016 */
3017 ns_header = header;
3018 }
3019 }
3020 }
3021 }
3022
3023 /*
3024 * Did we find anything?
3025 */
3026 if (!IS_CACHE(search->rbtdb) && !IS_STUB(search->rbtdb) &&
3027 ns_header != NULL) {
3028 /*
3029 * Note that NS has precedence over DNAME if both exist
3030 * in a zone. Otherwise DNAME take precedence over NS.
3031 */
3032 found = ns_header;
3033 search->zonecut_sigrdataset = NULL;
3034 } else if (dname_header != NULL) {
3035 found = dname_header;
3036 search->zonecut_sigrdataset = sigdname_header;
3037 } else if (ns_header != NULL) {
3038 found = ns_header;
3039 search->zonecut_sigrdataset = NULL;
3040 }
3041
3042 if (found != NULL) {
3043 /*
3044 * We increment the reference count on node to ensure that
3045 * search->zonecut_rdataset will still be valid later.
3046 */
3047 new_reference(search->rbtdb, node);
3048 search->zonecut = node;
3049 search->zonecut_rdataset = found;
3050 search->need_cleanup = ISC_TRUE;
3051 /*
3052 * Since we've found a zonecut, anything beneath it is
3053 * glue and is not subject to wildcard matching, so we
3054 * may clear search->wild.
3055 */
3056 search->wild = ISC_FALSE;
3057 if ((search->options & DNS_DBFIND_GLUEOK) == 0) {
3058 /*
3059 * If the caller does not want to find glue, then
3060 * this is the best answer and the search should
3061 * stop now.
3062 */
3063 result = DNS_R_PARTIALMATCH;
3064 } else {
3065 dns_name_t *zcname;
3066
3067 /*
3068 * The search will continue beneath the zone cut.
3069 * This may or may not be the best match. In case it
3070 * is, we need to remember the node name.
3071 */
3072 zcname = dns_fixedname_name(&search->zonecut_name);
3073 RUNTIME_CHECK(dns_name_copy(name, zcname, NULL) ==
3074 ISC_R_SUCCESS);
3075 search->copy_name = ISC_TRUE;
3076 }
3077 } else {
3078 /*
3079 * There is no zonecut at this node which is active in this
3080 * version.
3081 *
3082 * If this is a "wild" node and the caller hasn't disabled
3083 * wildcard matching, remember that we've seen a wild node
3084 * in case we need to go searching for wildcard matches
3085 * later on.
3086 */
3087 if (node->wild && (search->options & DNS_DBFIND_NOWILD) == 0)
3088 search->wild = ISC_TRUE;
3089 }
3090
3091 NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
3092 isc_rwlocktype_read);
3093
3094 return (result);
3095 }
3096
3097 static inline void
bind_rdataset(dns_rbtdb_t * rbtdb,dns_rbtnode_t * node,rdatasetheader_t * header,isc_stdtime_t now,dns_rdataset_t * rdataset)3098 bind_rdataset(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
3099 rdatasetheader_t *header, isc_stdtime_t now,
3100 dns_rdataset_t *rdataset)
3101 {
3102 unsigned char *raw; /* RDATASLAB */
3103
3104 /*
3105 * Caller must be holding the node reader lock.
3106 * XXXJT: technically, we need a writer lock, since we'll increment
3107 * the header count below. However, since the actual counter value
3108 * doesn't matter, we prioritize performance here. (We may want to
3109 * use atomic increment when available).
3110 */
3111
3112 if (rdataset == NULL)
3113 return;
3114
3115 new_reference(rbtdb, node);
3116
3117 INSIST(rdataset->methods == NULL); /* We must be disassociated. */
3118
3119 rdataset->methods = &rdataset_methods;
3120 rdataset->rdclass = rbtdb->common.rdclass;
3121 rdataset->type = RBTDB_RDATATYPE_BASE(header->type);
3122 rdataset->covers = RBTDB_RDATATYPE_EXT(header->type);
3123 rdataset->ttl = header->rdh_ttl - now;
3124 rdataset->trust = header->trust;
3125 if (NEGATIVE(header))
3126 rdataset->attributes |= DNS_RDATASETATTR_NEGATIVE;
3127 if (NXDOMAIN(header))
3128 rdataset->attributes |= DNS_RDATASETATTR_NXDOMAIN;
3129 if (OPTOUT(header))
3130 rdataset->attributes |= DNS_RDATASETATTR_OPTOUT;
3131 if (PREFETCH(header))
3132 rdataset->attributes |= DNS_RDATASETATTR_PREFETCH;
3133 rdataset->private1 = rbtdb;
3134 rdataset->private2 = node;
3135 raw = (unsigned char *)header + sizeof(*header);
3136 rdataset->private3 = raw;
3137 rdataset->count = header->count++;
3138 if (rdataset->count == ISC_UINT32_MAX)
3139 rdataset->count = 0;
3140
3141 /*
3142 * Reset iterator state.
3143 */
3144 rdataset->privateuint4 = 0;
3145 rdataset->private5 = NULL;
3146
3147 /*
3148 * Add noqname proof.
3149 */
3150 rdataset->private6 = header->noqname;
3151 if (rdataset->private6 != NULL)
3152 rdataset->attributes |= DNS_RDATASETATTR_NOQNAME;
3153 rdataset->private7 = header->closest;
3154 if (rdataset->private7 != NULL)
3155 rdataset->attributes |= DNS_RDATASETATTR_CLOSEST;
3156
3157 /*
3158 * Copy out re-signing information.
3159 */
3160 if (RESIGN(header)) {
3161 rdataset->attributes |= DNS_RDATASETATTR_RESIGN;
3162 rdataset->resign = header->resign;
3163 } else
3164 rdataset->resign = 0;
3165 }
3166
3167 static inline isc_result_t
setup_delegation(rbtdb_search_t * search,dns_dbnode_t ** nodep,dns_name_t * foundname,dns_rdataset_t * rdataset,dns_rdataset_t * sigrdataset)3168 setup_delegation(rbtdb_search_t *search, dns_dbnode_t **nodep,
3169 dns_name_t *foundname, dns_rdataset_t *rdataset,
3170 dns_rdataset_t *sigrdataset)
3171 {
3172 isc_result_t result;
3173 dns_name_t *zcname;
3174 rbtdb_rdatatype_t type;
3175 dns_rbtnode_t *node;
3176
3177 /*
3178 * The caller MUST NOT be holding any node locks.
3179 */
3180
3181 node = search->zonecut;
3182 type = search->zonecut_rdataset->type;
3183
3184 /*
3185 * If we have to set foundname, we do it before anything else.
3186 * If we were to set foundname after we had set nodep or bound the
3187 * rdataset, then we'd have to undo that work if dns_name_copy()
3188 * failed. By setting foundname first, there's nothing to undo if
3189 * we have trouble.
3190 */
3191 if (foundname != NULL && search->copy_name) {
3192 zcname = dns_fixedname_name(&search->zonecut_name);
3193 result = dns_name_copy(zcname, foundname, NULL);
3194 if (result != ISC_R_SUCCESS)
3195 return (result);
3196 }
3197 if (nodep != NULL) {
3198 /*
3199 * Note that we don't have to increment the node's reference
3200 * count here because we're going to use the reference we
3201 * already have in the search block.
3202 */
3203 *nodep = node;
3204 search->need_cleanup = ISC_FALSE;
3205 }
3206 if (rdataset != NULL) {
3207 NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
3208 isc_rwlocktype_read);
3209 bind_rdataset(search->rbtdb, node, search->zonecut_rdataset,
3210 search->now, rdataset);
3211 if (sigrdataset != NULL && search->zonecut_sigrdataset != NULL)
3212 bind_rdataset(search->rbtdb, node,
3213 search->zonecut_sigrdataset,
3214 search->now, sigrdataset);
3215 NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
3216 isc_rwlocktype_read);
3217 }
3218
3219 if (type == dns_rdatatype_dname)
3220 return (DNS_R_DNAME);
3221 return (DNS_R_DELEGATION);
3222 }
3223
3224 static inline isc_boolean_t
valid_glue(rbtdb_search_t * search,dns_name_t * name,rbtdb_rdatatype_t type,dns_rbtnode_t * node)3225 valid_glue(rbtdb_search_t *search, dns_name_t *name, rbtdb_rdatatype_t type,
3226 dns_rbtnode_t *node)
3227 {
3228 unsigned char *raw; /* RDATASLAB */
3229 unsigned int count, size;
3230 dns_name_t ns_name;
3231 isc_boolean_t valid = ISC_FALSE;
3232 dns_offsets_t offsets;
3233 isc_region_t region;
3234 rdatasetheader_t *header;
3235
3236 /*
3237 * No additional locking is required.
3238 */
3239
3240 /*
3241 * Valid glue types are A, AAAA, A6. NS is also a valid glue type
3242 * if it occurs at a zone cut, but is not valid below it.
3243 */
3244 if (type == dns_rdatatype_ns) {
3245 if (node != search->zonecut) {
3246 return (ISC_FALSE);
3247 }
3248 } else if (type != dns_rdatatype_a &&
3249 type != dns_rdatatype_aaaa &&
3250 type != dns_rdatatype_a6) {
3251 return (ISC_FALSE);
3252 }
3253
3254 header = search->zonecut_rdataset;
3255 raw = (unsigned char *)header + sizeof(*header);
3256 count = raw[0] * 256 + raw[1];
3257 #if DNS_RDATASET_FIXED
3258 raw += 2 + (4 * count);
3259 #else
3260 raw += 2;
3261 #endif
3262
3263 while (count > 0) {
3264 count--;
3265 size = raw[0] * 256 + raw[1];
3266 #if DNS_RDATASET_FIXED
3267 raw += 4;
3268 #else
3269 raw += 2;
3270 #endif
3271 region.base = raw;
3272 region.length = size;
3273 raw += size;
3274 /*
3275 * XXX Until we have rdata structures, we have no choice but
3276 * to directly access the rdata format.
3277 */
3278 dns_name_init(&ns_name, offsets);
3279 dns_name_fromregion(&ns_name, ®ion);
3280 if (dns_name_compare(&ns_name, name) == 0) {
3281 valid = ISC_TRUE;
3282 break;
3283 }
3284 }
3285
3286 return (valid);
3287 }
3288
3289 static inline isc_boolean_t
activeempty(rbtdb_search_t * search,dns_rbtnodechain_t * chain,dns_name_t * name)3290 activeempty(rbtdb_search_t *search, dns_rbtnodechain_t *chain,
3291 dns_name_t *name)
3292 {
3293 dns_fixedname_t fnext;
3294 dns_fixedname_t forigin;
3295 dns_name_t *next;
3296 dns_name_t *origin;
3297 dns_name_t prefix;
3298 dns_rbtdb_t *rbtdb;
3299 dns_rbtnode_t *node;
3300 isc_result_t result;
3301 isc_boolean_t answer = ISC_FALSE;
3302 rdatasetheader_t *header;
3303
3304 rbtdb = search->rbtdb;
3305
3306 dns_name_init(&prefix, NULL);
3307 dns_fixedname_init(&fnext);
3308 next = dns_fixedname_name(&fnext);
3309 dns_fixedname_init(&forigin);
3310 origin = dns_fixedname_name(&forigin);
3311
3312 result = dns_rbtnodechain_next(chain, NULL, NULL);
3313 while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
3314 node = NULL;
3315 result = dns_rbtnodechain_current(chain, &prefix,
3316 origin, &node);
3317 if (result != ISC_R_SUCCESS)
3318 break;
3319 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
3320 isc_rwlocktype_read);
3321 for (header = node->data;
3322 header != NULL;
3323 header = header->next) {
3324 if (header->serial <= search->serial &&
3325 !IGNORE(header) && EXISTS(header))
3326 break;
3327 }
3328 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
3329 isc_rwlocktype_read);
3330 if (header != NULL)
3331 break;
3332 result = dns_rbtnodechain_next(chain, NULL, NULL);
3333 }
3334 if (result == ISC_R_SUCCESS)
3335 result = dns_name_concatenate(&prefix, origin, next, NULL);
3336 if (result == ISC_R_SUCCESS && dns_name_issubdomain(next, name))
3337 answer = ISC_TRUE;
3338 return (answer);
3339 }
3340
3341 static inline isc_boolean_t
activeemtpynode(rbtdb_search_t * search,dns_name_t * qname,dns_name_t * wname)3342 activeemtpynode(rbtdb_search_t *search, dns_name_t *qname, dns_name_t *wname) {
3343 dns_fixedname_t fnext;
3344 dns_fixedname_t forigin;
3345 dns_fixedname_t fprev;
3346 dns_name_t *next;
3347 dns_name_t *origin;
3348 dns_name_t *prev;
3349 dns_name_t name;
3350 dns_name_t rname;
3351 dns_name_t tname;
3352 dns_rbtdb_t *rbtdb;
3353 dns_rbtnode_t *node;
3354 dns_rbtnodechain_t chain;
3355 isc_boolean_t check_next = ISC_TRUE;
3356 isc_boolean_t check_prev = ISC_TRUE;
3357 isc_boolean_t answer = ISC_FALSE;
3358 isc_result_t result;
3359 rdatasetheader_t *header;
3360 unsigned int n;
3361
3362 rbtdb = search->rbtdb;
3363
3364 dns_name_init(&name, NULL);
3365 dns_name_init(&tname, NULL);
3366 dns_name_init(&rname, NULL);
3367 dns_fixedname_init(&fnext);
3368 next = dns_fixedname_name(&fnext);
3369 dns_fixedname_init(&fprev);
3370 prev = dns_fixedname_name(&fprev);
3371 dns_fixedname_init(&forigin);
3372 origin = dns_fixedname_name(&forigin);
3373
3374 /*
3375 * Find if qname is at or below a empty node.
3376 * Use our own copy of the chain.
3377 */
3378
3379 chain = search->chain;
3380 do {
3381 node = NULL;
3382 result = dns_rbtnodechain_current(&chain, &name,
3383 origin, &node);
3384 if (result != ISC_R_SUCCESS)
3385 break;
3386 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
3387 isc_rwlocktype_read);
3388 for (header = node->data;
3389 header != NULL;
3390 header = header->next) {
3391 if (header->serial <= search->serial &&
3392 !IGNORE(header) && EXISTS(header))
3393 break;
3394 }
3395 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
3396 isc_rwlocktype_read);
3397 if (header != NULL)
3398 break;
3399 result = dns_rbtnodechain_prev(&chain, NULL, NULL);
3400 } while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN);
3401 if (result == ISC_R_SUCCESS)
3402 result = dns_name_concatenate(&name, origin, prev, NULL);
3403 if (result != ISC_R_SUCCESS)
3404 check_prev = ISC_FALSE;
3405
3406 result = dns_rbtnodechain_next(&chain, NULL, NULL);
3407 while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
3408 node = NULL;
3409 result = dns_rbtnodechain_current(&chain, &name,
3410 origin, &node);
3411 if (result != ISC_R_SUCCESS)
3412 break;
3413 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
3414 isc_rwlocktype_read);
3415 for (header = node->data;
3416 header != NULL;
3417 header = header->next) {
3418 if (header->serial <= search->serial &&
3419 !IGNORE(header) && EXISTS(header))
3420 break;
3421 }
3422 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
3423 isc_rwlocktype_read);
3424 if (header != NULL)
3425 break;
3426 result = dns_rbtnodechain_next(&chain, NULL, NULL);
3427 }
3428 if (result == ISC_R_SUCCESS)
3429 result = dns_name_concatenate(&name, origin, next, NULL);
3430 if (result != ISC_R_SUCCESS)
3431 check_next = ISC_FALSE;
3432
3433 dns_name_clone(qname, &rname);
3434
3435 /*
3436 * Remove the wildcard label to find the terminal name.
3437 */
3438 n = dns_name_countlabels(wname);
3439 dns_name_getlabelsequence(wname, 1, n - 1, &tname);
3440
3441 do {
3442 if ((check_prev && dns_name_issubdomain(prev, &rname)) ||
3443 (check_next && dns_name_issubdomain(next, &rname))) {
3444 answer = ISC_TRUE;
3445 break;
3446 }
3447 /*
3448 * Remove the left hand label.
3449 */
3450 n = dns_name_countlabels(&rname);
3451 dns_name_getlabelsequence(&rname, 1, n - 1, &rname);
3452 } while (!dns_name_equal(&rname, &tname));
3453 return (answer);
3454 }
3455
3456 static inline isc_result_t
find_wildcard(rbtdb_search_t * search,dns_rbtnode_t ** nodep,dns_name_t * qname)3457 find_wildcard(rbtdb_search_t *search, dns_rbtnode_t **nodep,
3458 dns_name_t *qname)
3459 {
3460 unsigned int i, j;
3461 dns_rbtnode_t *node, *level_node, *wnode;
3462 rdatasetheader_t *header;
3463 isc_result_t result = ISC_R_NOTFOUND;
3464 dns_name_t name;
3465 dns_name_t *wname;
3466 dns_fixedname_t fwname;
3467 dns_rbtdb_t *rbtdb;
3468 isc_boolean_t done, wild, active;
3469 dns_rbtnodechain_t wchain;
3470
3471 /*
3472 * Caller must be holding the tree lock and MUST NOT be holding
3473 * any node locks.
3474 */
3475
3476 /*
3477 * Examine each ancestor level. If the level's wild bit
3478 * is set, then construct the corresponding wildcard name and
3479 * search for it. If the wildcard node exists, and is active in
3480 * this version, we're done. If not, then we next check to see
3481 * if the ancestor is active in this version. If so, then there
3482 * can be no possible wildcard match and again we're done. If not,
3483 * continue the search.
3484 */
3485
3486 rbtdb = search->rbtdb;
3487 i = search->chain.level_matches;
3488 done = ISC_FALSE;
3489 node = *nodep;
3490 do {
3491 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
3492 isc_rwlocktype_read);
3493
3494 /*
3495 * First we try to figure out if this node is active in
3496 * the search's version. We do this now, even though we
3497 * may not need the information, because it simplifies the
3498 * locking and code flow.
3499 */
3500 for (header = node->data;
3501 header != NULL;
3502 header = header->next) {
3503 if (header->serial <= search->serial &&
3504 !IGNORE(header) && EXISTS(header))
3505 break;
3506 }
3507 if (header != NULL)
3508 active = ISC_TRUE;
3509 else
3510 active = ISC_FALSE;
3511
3512 if (node->wild)
3513 wild = ISC_TRUE;
3514 else
3515 wild = ISC_FALSE;
3516
3517 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
3518 isc_rwlocktype_read);
3519
3520 if (wild) {
3521 /*
3522 * Construct the wildcard name for this level.
3523 */
3524 dns_name_init(&name, NULL);
3525 dns_rbt_namefromnode(node, &name);
3526 dns_fixedname_init(&fwname);
3527 wname = dns_fixedname_name(&fwname);
3528 result = dns_name_concatenate(dns_wildcardname, &name,
3529 wname, NULL);
3530 j = i;
3531 while (result == ISC_R_SUCCESS && j != 0) {
3532 j--;
3533 level_node = search->chain.levels[j];
3534 dns_name_init(&name, NULL);
3535 dns_rbt_namefromnode(level_node, &name);
3536 result = dns_name_concatenate(wname,
3537 &name,
3538 wname,
3539 NULL);
3540 }
3541 if (result != ISC_R_SUCCESS)
3542 break;
3543
3544 wnode = NULL;
3545 dns_rbtnodechain_init(&wchain, NULL);
3546 result = dns_rbt_findnode(rbtdb->tree, wname,
3547 NULL, &wnode, &wchain,
3548 DNS_RBTFIND_EMPTYDATA,
3549 NULL, NULL);
3550 if (result == ISC_R_SUCCESS) {
3551 nodelock_t *lock;
3552
3553 /*
3554 * We have found the wildcard node. If it
3555 * is active in the search's version, we're
3556 * done.
3557 */
3558 lock = &rbtdb->node_locks[wnode->locknum].lock;
3559 NODE_LOCK(lock, isc_rwlocktype_read);
3560 for (header = wnode->data;
3561 header != NULL;
3562 header = header->next) {
3563 if (header->serial <= search->serial &&
3564 !IGNORE(header) && EXISTS(header))
3565 break;
3566 }
3567 NODE_UNLOCK(lock, isc_rwlocktype_read);
3568 if (header != NULL ||
3569 activeempty(search, &wchain, wname)) {
3570 if (activeemtpynode(search, qname,
3571 wname)) {
3572 return (ISC_R_NOTFOUND);
3573 }
3574 /*
3575 * The wildcard node is active!
3576 *
3577 * Note: result is still ISC_R_SUCCESS
3578 * so we don't have to set it.
3579 */
3580 *nodep = wnode;
3581 break;
3582 }
3583 } else if (result != ISC_R_NOTFOUND &&
3584 result != DNS_R_PARTIALMATCH) {
3585 /*
3586 * An error has occurred. Bail out.
3587 */
3588 break;
3589 }
3590 }
3591
3592 if (active) {
3593 /*
3594 * The level node is active. Any wildcarding
3595 * present at higher levels has no
3596 * effect and we're done.
3597 */
3598 result = ISC_R_NOTFOUND;
3599 break;
3600 }
3601
3602 if (i > 0) {
3603 i--;
3604 node = search->chain.levels[i];
3605 } else
3606 done = ISC_TRUE;
3607 } while (!done);
3608
3609 return (result);
3610 }
3611
3612 static isc_boolean_t
matchparams(rdatasetheader_t * header,rbtdb_search_t * search)3613 matchparams(rdatasetheader_t *header, rbtdb_search_t *search)
3614 {
3615 dns_rdata_t rdata = DNS_RDATA_INIT;
3616 dns_rdata_nsec3_t nsec3;
3617 unsigned char *raw; /* RDATASLAB */
3618 unsigned int rdlen, count;
3619 isc_region_t region;
3620 isc_result_t result;
3621
3622 REQUIRE(header->type == dns_rdatatype_nsec3);
3623
3624 raw = (unsigned char *)header + sizeof(*header);
3625 count = raw[0] * 256 + raw[1]; /* count */
3626 #if DNS_RDATASET_FIXED
3627 raw += count * 4 + 2;
3628 #else
3629 raw += 2;
3630 #endif
3631 while (count-- > 0) {
3632 rdlen = raw[0] * 256 + raw[1];
3633 #if DNS_RDATASET_FIXED
3634 raw += 4;
3635 #else
3636 raw += 2;
3637 #endif
3638 region.base = raw;
3639 region.length = rdlen;
3640 dns_rdata_fromregion(&rdata, search->rbtdb->common.rdclass,
3641 dns_rdatatype_nsec3, ®ion);
3642 raw += rdlen;
3643 result = dns_rdata_tostruct(&rdata, &nsec3, NULL);
3644 INSIST(result == ISC_R_SUCCESS);
3645 if (nsec3.hash == search->rbtversion->hash &&
3646 nsec3.iterations == search->rbtversion->iterations &&
3647 nsec3.salt_length == search->rbtversion->salt_length &&
3648 memcmp(nsec3.salt, search->rbtversion->salt,
3649 nsec3.salt_length) == 0)
3650 return (ISC_TRUE);
3651 dns_rdata_reset(&rdata);
3652 }
3653 return (ISC_FALSE);
3654 }
3655
3656 /*
3657 * Find node of the NSEC/NSEC3 record that is 'name'.
3658 */
3659 static inline isc_result_t
previous_closest_nsec(dns_rdatatype_t type,rbtdb_search_t * search,dns_name_t * name,dns_name_t * origin,dns_rbtnode_t ** nodep,dns_rbtnodechain_t * nsecchain,isc_boolean_t * firstp)3660 previous_closest_nsec(dns_rdatatype_t type, rbtdb_search_t *search,
3661 dns_name_t *name, dns_name_t *origin,
3662 dns_rbtnode_t **nodep, dns_rbtnodechain_t *nsecchain,
3663 isc_boolean_t *firstp)
3664 {
3665 dns_fixedname_t ftarget;
3666 dns_name_t *target;
3667 dns_rbtnode_t *nsecnode;
3668 isc_result_t result;
3669
3670 REQUIRE(nodep != NULL && *nodep == NULL);
3671
3672 if (type == dns_rdatatype_nsec3) {
3673 result = dns_rbtnodechain_prev(&search->chain, NULL, NULL);
3674 if (result != ISC_R_SUCCESS && result != DNS_R_NEWORIGIN)
3675 return (result);
3676 result = dns_rbtnodechain_current(&search->chain, name, origin,
3677 nodep);
3678 return (result);
3679 }
3680
3681 dns_fixedname_init(&ftarget);
3682 target = dns_fixedname_name(&ftarget);
3683
3684 for (;;) {
3685 if (*firstp) {
3686 /*
3687 * Construct the name of the second node to check.
3688 * It is the first node sought in the NSEC tree.
3689 */
3690 *firstp = ISC_FALSE;
3691 dns_rbtnodechain_init(nsecchain, NULL);
3692 result = dns_name_concatenate(name, origin,
3693 target, NULL);
3694 if (result != ISC_R_SUCCESS)
3695 return (result);
3696 nsecnode = NULL;
3697 result = dns_rbt_findnode(search->rbtdb->nsec,
3698 target, NULL,
3699 &nsecnode, nsecchain,
3700 DNS_RBTFIND_NOOPTIONS,
3701 NULL, NULL);
3702 if (result == ISC_R_SUCCESS) {
3703 /*
3704 * Since this was the first loop, finding the
3705 * name in the NSEC tree implies that the first
3706 * node checked in the main tree had an
3707 * unacceptable NSEC record.
3708 * Try the previous node in the NSEC tree.
3709 */
3710 result = dns_rbtnodechain_prev(nsecchain,
3711 name, origin);
3712 if (result == DNS_R_NEWORIGIN)
3713 result = ISC_R_SUCCESS;
3714 } else if (result == ISC_R_NOTFOUND ||
3715 result == DNS_R_PARTIALMATCH) {
3716 result = dns_rbtnodechain_current(nsecchain,
3717 name, origin, NULL);
3718 if (result == ISC_R_NOTFOUND)
3719 result = ISC_R_NOMORE;
3720 }
3721 } else {
3722 /*
3723 * This is a second or later trip through the auxiliary
3724 * tree for the name of a third or earlier NSEC node in
3725 * the main tree. Previous trips through the NSEC tree
3726 * must have found nodes in the main tree with NSEC
3727 * records. Perhaps they lacked signature records.
3728 */
3729 result = dns_rbtnodechain_prev(nsecchain, name, origin);
3730 if (result == DNS_R_NEWORIGIN)
3731 result = ISC_R_SUCCESS;
3732 }
3733 if (result != ISC_R_SUCCESS)
3734 return (result);
3735
3736 /*
3737 * Construct the name to seek in the main tree.
3738 */
3739 result = dns_name_concatenate(name, origin, target, NULL);
3740 if (result != ISC_R_SUCCESS)
3741 return (result);
3742
3743 *nodep = NULL;
3744 result = dns_rbt_findnode(search->rbtdb->tree, target, NULL,
3745 nodep, &search->chain,
3746 DNS_RBTFIND_NOOPTIONS, NULL, NULL);
3747 if (result == ISC_R_SUCCESS)
3748 return (result);
3749
3750 /*
3751 * There should always be a node in the main tree with the
3752 * same name as the node in the auxiliary NSEC tree, except for
3753 * nodes in the auxiliary tree that are awaiting deletion.
3754 */
3755 if (result != DNS_R_PARTIALMATCH && result != ISC_R_NOTFOUND) {
3756 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
3757 DNS_LOGMODULE_CACHE, ISC_LOG_ERROR,
3758 "previous_closest_nsec(): %s",
3759 isc_result_totext(result));
3760 return (DNS_R_BADDB);
3761 }
3762 }
3763 }
3764
3765 /*
3766 * Find the NSEC/NSEC3 which is or before the current point on the
3767 * search chain. For NSEC3 records only NSEC3 records that match the
3768 * current NSEC3PARAM record are considered.
3769 */
3770 static inline isc_result_t
find_closest_nsec(rbtdb_search_t * search,dns_dbnode_t ** nodep,dns_name_t * foundname,dns_rdataset_t * rdataset,dns_rdataset_t * sigrdataset,dns_rbt_t * tree,dns_db_secure_t secure)3771 find_closest_nsec(rbtdb_search_t *search, dns_dbnode_t **nodep,
3772 dns_name_t *foundname, dns_rdataset_t *rdataset,
3773 dns_rdataset_t *sigrdataset, dns_rbt_t *tree,
3774 dns_db_secure_t secure)
3775 {
3776 dns_rbtnode_t *node, *prevnode;
3777 rdatasetheader_t *header, *header_next, *found, *foundsig;
3778 dns_rbtnodechain_t nsecchain;
3779 isc_boolean_t empty_node;
3780 isc_result_t result;
3781 dns_fixedname_t fname, forigin;
3782 dns_name_t *name, *origin;
3783 dns_rdatatype_t type;
3784 rbtdb_rdatatype_t sigtype;
3785 isc_boolean_t wraps;
3786 isc_boolean_t first = ISC_TRUE;
3787 isc_boolean_t need_sig = ISC_TF(secure == dns_db_secure);
3788
3789 if (tree == search->rbtdb->nsec3) {
3790 type = dns_rdatatype_nsec3;
3791 sigtype = RBTDB_RDATATYPE_SIGNSEC3;
3792 wraps = ISC_TRUE;
3793 } else {
3794 type = dns_rdatatype_nsec;
3795 sigtype = RBTDB_RDATATYPE_SIGNSEC;
3796 wraps = ISC_FALSE;
3797 }
3798
3799 /*
3800 * Use the auxiliary tree only starting with the second node in the
3801 * hope that the original node will be right much of the time.
3802 */
3803 dns_fixedname_init(&fname);
3804 name = dns_fixedname_name(&fname);
3805 dns_fixedname_init(&forigin);
3806 origin = dns_fixedname_name(&forigin);
3807 again:
3808 node = NULL;
3809 prevnode = NULL;
3810 result = dns_rbtnodechain_current(&search->chain, name, origin, &node);
3811 if (result != ISC_R_SUCCESS)
3812 return (result);
3813 do {
3814 NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
3815 isc_rwlocktype_read);
3816 found = NULL;
3817 foundsig = NULL;
3818 empty_node = ISC_TRUE;
3819 for (header = node->data;
3820 header != NULL;
3821 header = header_next) {
3822 header_next = header->next;
3823 /*
3824 * Look for an active, extant NSEC or RRSIG NSEC.
3825 */
3826 do {
3827 if (header->serial <= search->serial &&
3828 !IGNORE(header)) {
3829 /*
3830 * Is this a "this rdataset doesn't
3831 * exist" record?
3832 */
3833 if (NONEXISTENT(header))
3834 header = NULL;
3835 break;
3836 } else
3837 header = header->down;
3838 } while (header != NULL);
3839 if (header != NULL) {
3840 /*
3841 * We now know that there is at least one
3842 * active rdataset at this node.
3843 */
3844 empty_node = ISC_FALSE;
3845 if (header->type == type) {
3846 found = header;
3847 if (foundsig != NULL)
3848 break;
3849 } else if (header->type == sigtype) {
3850 foundsig = header;
3851 if (found != NULL)
3852 break;
3853 }
3854 }
3855 }
3856 if (!empty_node) {
3857 if (found != NULL && search->rbtversion->havensec3 &&
3858 found->type == dns_rdatatype_nsec3 &&
3859 !matchparams(found, search)) {
3860 empty_node = ISC_TRUE;
3861 found = NULL;
3862 foundsig = NULL;
3863 result = previous_closest_nsec(type, search,
3864 name, origin,
3865 &prevnode, NULL,
3866 NULL);
3867 } else if (found != NULL &&
3868 (foundsig != NULL || !need_sig)) {
3869 /*
3870 * We've found the right NSEC/NSEC3 record.
3871 *
3872 * Note: for this to really be the right
3873 * NSEC record, it's essential that the NSEC
3874 * records of any nodes obscured by a zone
3875 * cut have been removed; we assume this is
3876 * the case.
3877 */
3878 result = dns_name_concatenate(name, origin,
3879 foundname, NULL);
3880 if (result == ISC_R_SUCCESS) {
3881 if (nodep != NULL) {
3882 new_reference(search->rbtdb,
3883 node);
3884 *nodep = node;
3885 }
3886 bind_rdataset(search->rbtdb, node,
3887 found, search->now,
3888 rdataset);
3889 if (foundsig != NULL)
3890 bind_rdataset(search->rbtdb,
3891 node,
3892 foundsig,
3893 search->now,
3894 sigrdataset);
3895 }
3896 } else if (found == NULL && foundsig == NULL) {
3897 /*
3898 * This node is active, but has no NSEC or
3899 * RRSIG NSEC. That means it's glue or
3900 * other obscured zone data that isn't
3901 * relevant for our search. Treat the
3902 * node as if it were empty and keep looking.
3903 */
3904 empty_node = ISC_TRUE;
3905 result = previous_closest_nsec(type, search,
3906 name, origin,
3907 &prevnode,
3908 &nsecchain,
3909 &first);
3910 } else {
3911 /*
3912 * We found an active node, but either the
3913 * NSEC or the RRSIG NSEC is missing. This
3914 * shouldn't happen.
3915 */
3916 result = DNS_R_BADDB;
3917 }
3918 } else {
3919 /*
3920 * This node isn't active. We've got to keep
3921 * looking.
3922 */
3923 result = previous_closest_nsec(type, search,
3924 name, origin, &prevnode,
3925 &nsecchain, &first);
3926 }
3927 NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
3928 isc_rwlocktype_read);
3929 node = prevnode;
3930 prevnode = NULL;
3931 } while (empty_node && result == ISC_R_SUCCESS);
3932
3933 if (!first)
3934 dns_rbtnodechain_invalidate(&nsecchain);
3935
3936 if (result == ISC_R_NOMORE && wraps) {
3937 result = dns_rbtnodechain_last(&search->chain, tree,
3938 NULL, NULL);
3939 if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
3940 wraps = ISC_FALSE;
3941 goto again;
3942 }
3943 }
3944
3945 /*
3946 * If the result is ISC_R_NOMORE, then we got to the beginning of
3947 * the database and didn't find a NSEC record. This shouldn't
3948 * happen.
3949 */
3950 if (result == ISC_R_NOMORE)
3951 result = DNS_R_BADDB;
3952
3953 return (result);
3954 }
3955
3956 static isc_result_t
zone_find(dns_db_t * db,dns_name_t * name,dns_dbversion_t * version,dns_rdatatype_t type,unsigned int options,isc_stdtime_t now,dns_dbnode_t ** nodep,dns_name_t * foundname,dns_rdataset_t * rdataset,dns_rdataset_t * sigrdataset)3957 zone_find(dns_db_t *db, dns_name_t *name, dns_dbversion_t *version,
3958 dns_rdatatype_t type, unsigned int options, isc_stdtime_t now,
3959 dns_dbnode_t **nodep, dns_name_t *foundname,
3960 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
3961 {
3962 dns_rbtnode_t *node = NULL;
3963 isc_result_t result;
3964 rbtdb_search_t search;
3965 isc_boolean_t cname_ok = ISC_TRUE;
3966 isc_boolean_t close_version = ISC_FALSE;
3967 isc_boolean_t maybe_zonecut = ISC_FALSE;
3968 isc_boolean_t at_zonecut = ISC_FALSE;
3969 isc_boolean_t wild;
3970 isc_boolean_t empty_node;
3971 rdatasetheader_t *header, *header_next, *found, *nsecheader;
3972 rdatasetheader_t *foundsig, *cnamesig, *nsecsig;
3973 rbtdb_rdatatype_t sigtype;
3974 isc_boolean_t active;
3975 dns_rbtnodechain_t chain;
3976 nodelock_t *lock;
3977 dns_rbt_t *tree;
3978
3979 search.rbtdb = (dns_rbtdb_t *)db;
3980
3981 REQUIRE(VALID_RBTDB(search.rbtdb));
3982 INSIST(version == NULL ||
3983 ((rbtdb_version_t *)version)->rbtdb == (dns_rbtdb_t *)db);
3984
3985 /*
3986 * We don't care about 'now'.
3987 */
3988 UNUSED(now);
3989
3990 /*
3991 * If the caller didn't supply a version, attach to the current
3992 * version.
3993 */
3994 if (version == NULL) {
3995 currentversion(db, &version);
3996 close_version = ISC_TRUE;
3997 }
3998
3999 search.rbtversion = version;
4000 search.serial = search.rbtversion->serial;
4001 search.options = options;
4002 search.copy_name = ISC_FALSE;
4003 search.need_cleanup = ISC_FALSE;
4004 search.wild = ISC_FALSE;
4005 search.zonecut = NULL;
4006 dns_fixedname_init(&search.zonecut_name);
4007 dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
4008 search.now = 0;
4009
4010 /*
4011 * 'wild' will be true iff. we've matched a wildcard.
4012 */
4013 wild = ISC_FALSE;
4014
4015 RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4016
4017 /*
4018 * Search down from the root of the tree. If, while going down, we
4019 * encounter a callback node, zone_zonecut_callback() will search the
4020 * rdatasets at the zone cut for active DNAME or NS rdatasets.
4021 */
4022 tree = (options & DNS_DBFIND_FORCENSEC3) != 0 ? search.rbtdb->nsec3 :
4023 search.rbtdb->tree;
4024 result = dns_rbt_findnode(tree, name, foundname, &node,
4025 &search.chain, DNS_RBTFIND_EMPTYDATA,
4026 zone_zonecut_callback, &search);
4027
4028 if (result == DNS_R_PARTIALMATCH) {
4029 partial_match:
4030 if (search.zonecut != NULL) {
4031 result = setup_delegation(&search, nodep, foundname,
4032 rdataset, sigrdataset);
4033 goto tree_exit;
4034 }
4035
4036 if (search.wild) {
4037 /*
4038 * At least one of the levels in the search chain
4039 * potentially has a wildcard. For each such level,
4040 * we must see if there's a matching wildcard active
4041 * in the current version.
4042 */
4043 result = find_wildcard(&search, &node, name);
4044 if (result == ISC_R_SUCCESS) {
4045 result = dns_name_copy(name, foundname, NULL);
4046 if (result != ISC_R_SUCCESS)
4047 goto tree_exit;
4048 wild = ISC_TRUE;
4049 goto found;
4050 }
4051 else if (result != ISC_R_NOTFOUND)
4052 goto tree_exit;
4053 }
4054
4055 chain = search.chain;
4056 active = activeempty(&search, &chain, name);
4057
4058 /*
4059 * If we're here, then the name does not exist, is not
4060 * beneath a zonecut, and there's no matching wildcard.
4061 */
4062 if ((search.rbtversion->secure == dns_db_secure &&
4063 !search.rbtversion->havensec3) ||
4064 (search.options & DNS_DBFIND_FORCENSEC) != 0 ||
4065 (search.options & DNS_DBFIND_FORCENSEC3) != 0)
4066 {
4067 result = find_closest_nsec(&search, nodep, foundname,
4068 rdataset, sigrdataset, tree,
4069 search.rbtversion->secure);
4070 if (result == ISC_R_SUCCESS)
4071 result = active ? DNS_R_EMPTYNAME :
4072 DNS_R_NXDOMAIN;
4073 } else
4074 result = active ? DNS_R_EMPTYNAME : DNS_R_NXDOMAIN;
4075 goto tree_exit;
4076 } else if (result != ISC_R_SUCCESS)
4077 goto tree_exit;
4078
4079 found:
4080 /*
4081 * We have found a node whose name is the desired name, or we
4082 * have matched a wildcard.
4083 */
4084
4085 if (search.zonecut != NULL) {
4086 /*
4087 * If we're beneath a zone cut, we don't want to look for
4088 * CNAMEs because they're not legitimate zone glue.
4089 */
4090 cname_ok = ISC_FALSE;
4091 } else {
4092 /*
4093 * The node may be a zone cut itself. If it might be one,
4094 * make sure we check for it later.
4095 *
4096 * DS records live above the zone cut in ordinary zone so
4097 * we want to ignore any referral.
4098 *
4099 * Stub zones don't have anything "above" the delgation so
4100 * we always return a referral.
4101 */
4102 if (node->find_callback &&
4103 ((node != search.rbtdb->origin_node &&
4104 !dns_rdatatype_atparent(type)) ||
4105 IS_STUB(search.rbtdb)))
4106 maybe_zonecut = ISC_TRUE;
4107 }
4108
4109 /*
4110 * Certain DNSSEC types are not subject to CNAME matching
4111 * (RFC4035, section 2.5 and RFC3007).
4112 *
4113 * We don't check for RRSIG, because we don't store RRSIG records
4114 * directly.
4115 */
4116 if (type == dns_rdatatype_key || type == dns_rdatatype_nsec)
4117 cname_ok = ISC_FALSE;
4118
4119 /*
4120 * We now go looking for rdata...
4121 */
4122
4123 lock = &search.rbtdb->node_locks[node->locknum].lock;
4124 NODE_LOCK(lock, isc_rwlocktype_read);
4125
4126 found = NULL;
4127 foundsig = NULL;
4128 sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
4129 nsecheader = NULL;
4130 nsecsig = NULL;
4131 cnamesig = NULL;
4132 empty_node = ISC_TRUE;
4133 for (header = node->data; header != NULL; header = header_next) {
4134 header_next = header->next;
4135 /*
4136 * Look for an active, extant rdataset.
4137 */
4138 do {
4139 if (header->serial <= search.serial &&
4140 !IGNORE(header)) {
4141 /*
4142 * Is this a "this rdataset doesn't
4143 * exist" record?
4144 */
4145 if (NONEXISTENT(header))
4146 header = NULL;
4147 break;
4148 } else
4149 header = header->down;
4150 } while (header != NULL);
4151 if (header != NULL) {
4152 /*
4153 * We now know that there is at least one active
4154 * rdataset at this node.
4155 */
4156 empty_node = ISC_FALSE;
4157
4158 /*
4159 * Do special zone cut handling, if requested.
4160 */
4161 if (maybe_zonecut &&
4162 header->type == dns_rdatatype_ns) {
4163 /*
4164 * We increment the reference count on node to
4165 * ensure that search->zonecut_rdataset will
4166 * still be valid later.
4167 */
4168 new_reference(search.rbtdb, node);
4169 search.zonecut = node;
4170 search.zonecut_rdataset = header;
4171 search.zonecut_sigrdataset = NULL;
4172 search.need_cleanup = ISC_TRUE;
4173 maybe_zonecut = ISC_FALSE;
4174 at_zonecut = ISC_TRUE;
4175 /*
4176 * It is not clear if KEY should still be
4177 * allowed at the parent side of the zone
4178 * cut or not. It is needed for RFC3007
4179 * validated updates.
4180 */
4181 if ((search.options & DNS_DBFIND_GLUEOK) == 0
4182 && type != dns_rdatatype_nsec
4183 && type != dns_rdatatype_key) {
4184 /*
4185 * Glue is not OK, but any answer we
4186 * could return would be glue. Return
4187 * the delegation.
4188 */
4189 found = NULL;
4190 break;
4191 }
4192 if (found != NULL && foundsig != NULL)
4193 break;
4194 }
4195
4196
4197 /*
4198 * If the NSEC3 record doesn't match the chain
4199 * we are using behave as if it isn't here.
4200 */
4201 if (header->type == dns_rdatatype_nsec3 &&
4202 !matchparams(header, &search)) {
4203 NODE_UNLOCK(lock, isc_rwlocktype_read);
4204 goto partial_match;
4205 }
4206 /*
4207 * If we found a type we were looking for,
4208 * remember it.
4209 */
4210 if (header->type == type ||
4211 type == dns_rdatatype_any ||
4212 (header->type == dns_rdatatype_cname &&
4213 cname_ok)) {
4214 /*
4215 * We've found the answer!
4216 */
4217 found = header;
4218 if (header->type == dns_rdatatype_cname &&
4219 cname_ok) {
4220 /*
4221 * We may be finding a CNAME instead
4222 * of the desired type.
4223 *
4224 * If we've already got the CNAME RRSIG,
4225 * use it, otherwise change sigtype
4226 * so that we find it.
4227 */
4228 if (cnamesig != NULL)
4229 foundsig = cnamesig;
4230 else
4231 sigtype =
4232 RBTDB_RDATATYPE_SIGCNAME;
4233 }
4234 /*
4235 * If we've got all we need, end the search.
4236 */
4237 if (!maybe_zonecut && foundsig != NULL)
4238 break;
4239 } else if (header->type == sigtype) {
4240 /*
4241 * We've found the RRSIG rdataset for our
4242 * target type. Remember it.
4243 */
4244 foundsig = header;
4245 /*
4246 * If we've got all we need, end the search.
4247 */
4248 if (!maybe_zonecut && found != NULL)
4249 break;
4250 } else if (header->type == dns_rdatatype_nsec &&
4251 !search.rbtversion->havensec3) {
4252 /*
4253 * Remember a NSEC rdataset even if we're
4254 * not specifically looking for it, because
4255 * we might need it later.
4256 */
4257 nsecheader = header;
4258 } else if (header->type == RBTDB_RDATATYPE_SIGNSEC &&
4259 !search.rbtversion->havensec3) {
4260 /*
4261 * If we need the NSEC rdataset, we'll also
4262 * need its signature.
4263 */
4264 nsecsig = header;
4265 } else if (cname_ok &&
4266 header->type == RBTDB_RDATATYPE_SIGCNAME) {
4267 /*
4268 * If we get a CNAME match, we'll also need
4269 * its signature.
4270 */
4271 cnamesig = header;
4272 }
4273 }
4274 }
4275
4276 if (empty_node) {
4277 /*
4278 * We have an exact match for the name, but there are no
4279 * active rdatasets in the desired version. That means that
4280 * this node doesn't exist in the desired version, and that
4281 * we really have a partial match.
4282 */
4283 if (!wild) {
4284 NODE_UNLOCK(lock, isc_rwlocktype_read);
4285 goto partial_match;
4286 }
4287 }
4288
4289 /*
4290 * If we didn't find what we were looking for...
4291 */
4292 if (found == NULL) {
4293 if (search.zonecut != NULL) {
4294 /*
4295 * We were trying to find glue at a node beneath a
4296 * zone cut, but didn't.
4297 *
4298 * Return the delegation.
4299 */
4300 NODE_UNLOCK(lock, isc_rwlocktype_read);
4301 result = setup_delegation(&search, nodep, foundname,
4302 rdataset, sigrdataset);
4303 goto tree_exit;
4304 }
4305 /*
4306 * The desired type doesn't exist.
4307 */
4308 result = DNS_R_NXRRSET;
4309 if (search.rbtversion->secure == dns_db_secure &&
4310 !search.rbtversion->havensec3 &&
4311 (nsecheader == NULL || nsecsig == NULL)) {
4312 /*
4313 * The zone is secure but there's no NSEC,
4314 * or the NSEC has no signature!
4315 */
4316 if (!wild) {
4317 result = DNS_R_BADDB;
4318 goto node_exit;
4319 }
4320
4321 NODE_UNLOCK(lock, isc_rwlocktype_read);
4322 result = find_closest_nsec(&search, nodep, foundname,
4323 rdataset, sigrdataset,
4324 search.rbtdb->tree,
4325 search.rbtversion->secure);
4326 if (result == ISC_R_SUCCESS)
4327 result = DNS_R_EMPTYWILD;
4328 goto tree_exit;
4329 }
4330 if ((search.options & DNS_DBFIND_FORCENSEC) != 0 &&
4331 nsecheader == NULL)
4332 {
4333 /*
4334 * There's no NSEC record, and we were told
4335 * to find one.
4336 */
4337 result = DNS_R_BADDB;
4338 goto node_exit;
4339 }
4340 if (nodep != NULL) {
4341 new_reference(search.rbtdb, node);
4342 *nodep = node;
4343 }
4344 if ((search.rbtversion->secure == dns_db_secure &&
4345 !search.rbtversion->havensec3) ||
4346 (search.options & DNS_DBFIND_FORCENSEC) != 0)
4347 {
4348 bind_rdataset(search.rbtdb, node, nsecheader,
4349 0, rdataset);
4350 if (nsecsig != NULL)
4351 bind_rdataset(search.rbtdb, node,
4352 nsecsig, 0, sigrdataset);
4353 }
4354 if (wild)
4355 foundname->attributes |= DNS_NAMEATTR_WILDCARD;
4356 goto node_exit;
4357 }
4358
4359 /*
4360 * We found what we were looking for, or we found a CNAME.
4361 */
4362
4363 if (type != found->type &&
4364 type != dns_rdatatype_any &&
4365 found->type == dns_rdatatype_cname) {
4366 /*
4367 * We weren't doing an ANY query and we found a CNAME instead
4368 * of the type we were looking for, so we need to indicate
4369 * that result to the caller.
4370 */
4371 result = DNS_R_CNAME;
4372 } else if (search.zonecut != NULL) {
4373 /*
4374 * If we're beneath a zone cut, we must indicate that the
4375 * result is glue, unless we're actually at the zone cut
4376 * and the type is NSEC or KEY.
4377 */
4378 if (search.zonecut == node) {
4379 /*
4380 * It is not clear if KEY should still be
4381 * allowed at the parent side of the zone
4382 * cut or not. It is needed for RFC3007
4383 * validated updates.
4384 */
4385 if (type == dns_rdatatype_nsec ||
4386 type == dns_rdatatype_nsec3 ||
4387 type == dns_rdatatype_key)
4388 result = ISC_R_SUCCESS;
4389 else if (type == dns_rdatatype_any)
4390 result = DNS_R_ZONECUT;
4391 else
4392 result = DNS_R_GLUE;
4393 } else
4394 result = DNS_R_GLUE;
4395 /*
4396 * We might have found data that isn't glue, but was occluded
4397 * by a dynamic update. If the caller cares about this, they
4398 * will have told us to validate glue.
4399 *
4400 * XXX We should cache the glue validity state!
4401 */
4402 if (result == DNS_R_GLUE &&
4403 (search.options & DNS_DBFIND_VALIDATEGLUE) != 0 &&
4404 !valid_glue(&search, foundname, type, node)) {
4405 NODE_UNLOCK(lock, isc_rwlocktype_read);
4406 result = setup_delegation(&search, nodep, foundname,
4407 rdataset, sigrdataset);
4408 goto tree_exit;
4409 }
4410 } else {
4411 /*
4412 * An ordinary successful query!
4413 */
4414 result = ISC_R_SUCCESS;
4415 }
4416
4417 if (nodep != NULL) {
4418 if (!at_zonecut)
4419 new_reference(search.rbtdb, node);
4420 else
4421 search.need_cleanup = ISC_FALSE;
4422 *nodep = node;
4423 }
4424
4425 if (type != dns_rdatatype_any) {
4426 bind_rdataset(search.rbtdb, node, found, 0, rdataset);
4427 if (foundsig != NULL)
4428 bind_rdataset(search.rbtdb, node, foundsig, 0,
4429 sigrdataset);
4430 }
4431
4432 if (wild)
4433 foundname->attributes |= DNS_NAMEATTR_WILDCARD;
4434
4435 node_exit:
4436 NODE_UNLOCK(lock, isc_rwlocktype_read);
4437
4438 tree_exit:
4439 RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4440
4441 /*
4442 * If we found a zonecut but aren't going to use it, we have to
4443 * let go of it.
4444 */
4445 if (search.need_cleanup) {
4446 node = search.zonecut;
4447 INSIST(node != NULL);
4448 lock = &(search.rbtdb->node_locks[node->locknum].lock);
4449
4450 NODE_LOCK(lock, isc_rwlocktype_read);
4451 decrement_reference(search.rbtdb, node, 0,
4452 isc_rwlocktype_read, isc_rwlocktype_none,
4453 ISC_FALSE);
4454 NODE_UNLOCK(lock, isc_rwlocktype_read);
4455 }
4456
4457 if (close_version)
4458 closeversion(db, &version, ISC_FALSE);
4459
4460 dns_rbtnodechain_reset(&search.chain);
4461
4462 return (result);
4463 }
4464
4465 static isc_result_t
zone_findzonecut(dns_db_t * db,dns_name_t * name,unsigned int options,isc_stdtime_t now,dns_dbnode_t ** nodep,dns_name_t * foundname,dns_rdataset_t * rdataset,dns_rdataset_t * sigrdataset)4466 zone_findzonecut(dns_db_t *db, dns_name_t *name, unsigned int options,
4467 isc_stdtime_t now, dns_dbnode_t **nodep,
4468 dns_name_t *foundname,
4469 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4470 {
4471 UNUSED(db);
4472 UNUSED(name);
4473 UNUSED(options);
4474 UNUSED(now);
4475 UNUSED(nodep);
4476 UNUSED(foundname);
4477 UNUSED(rdataset);
4478 UNUSED(sigrdataset);
4479
4480 FATAL_ERROR(__FILE__, __LINE__, "zone_findzonecut() called!");
4481
4482 /* NOTREACHED */
4483 return (ISC_R_NOTIMPLEMENTED);
4484 }
4485
4486 static isc_result_t
cache_zonecut_callback(dns_rbtnode_t * node,dns_name_t * name,void * arg)4487 cache_zonecut_callback(dns_rbtnode_t *node, dns_name_t *name, void *arg) {
4488 rbtdb_search_t *search = arg;
4489 rdatasetheader_t *header, *header_prev, *header_next;
4490 rdatasetheader_t *dname_header, *sigdname_header;
4491 isc_result_t result;
4492 nodelock_t *lock;
4493 isc_rwlocktype_t locktype;
4494
4495 /* XXX comment */
4496
4497 REQUIRE(search->zonecut == NULL);
4498
4499 /*
4500 * Keep compiler silent.
4501 */
4502 UNUSED(name);
4503
4504 lock = &(search->rbtdb->node_locks[node->locknum].lock);
4505 locktype = isc_rwlocktype_read;
4506 NODE_LOCK(lock, locktype);
4507
4508 /*
4509 * Look for a DNAME or RRSIG DNAME rdataset.
4510 */
4511 dname_header = NULL;
4512 sigdname_header = NULL;
4513 header_prev = NULL;
4514 for (header = node->data; header != NULL; header = header_next) {
4515 header_next = header->next;
4516 if (header->rdh_ttl < search->now) {
4517 /*
4518 * This rdataset is stale. If no one else is
4519 * using the node, we can clean it up right
4520 * now, otherwise we mark it as stale, and
4521 * the node as dirty, so it will get cleaned
4522 * up later.
4523 */
4524 if ((header->rdh_ttl < search->now - RBTDB_VIRTUAL) &&
4525 (locktype == isc_rwlocktype_write ||
4526 NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4527 /*
4528 * We update the node's status only when we
4529 * can get write access; otherwise, we leave
4530 * others to this work. Periodical cleaning
4531 * will eventually take the job as the last
4532 * resort.
4533 * We won't downgrade the lock, since other
4534 * rdatasets are probably stale, too.
4535 */
4536 locktype = isc_rwlocktype_write;
4537
4538 if (dns_rbtnode_refcurrent(node) == 0) {
4539 isc_mem_t *mctx;
4540
4541 /*
4542 * header->down can be non-NULL if the
4543 * refcount has just decremented to 0
4544 * but decrement_reference() has not
4545 * performed clean_cache_node(), in
4546 * which case we need to purge the
4547 * stale headers first.
4548 */
4549 mctx = search->rbtdb->common.mctx;
4550 clean_stale_headers(search->rbtdb,
4551 mctx,
4552 header);
4553 if (header_prev != NULL)
4554 header_prev->next =
4555 header->next;
4556 else
4557 node->data = header->next;
4558 free_rdataset(search->rbtdb, mctx,
4559 header);
4560 } else {
4561 mark_stale_header(search->rbtdb,
4562 header);
4563 header_prev = header;
4564 }
4565 } else
4566 header_prev = header;
4567 } else if (header->type == dns_rdatatype_dname &&
4568 EXISTS(header)) {
4569 dname_header = header;
4570 header_prev = header;
4571 } else if (header->type == RBTDB_RDATATYPE_SIGDNAME &&
4572 EXISTS(header)) {
4573 sigdname_header = header;
4574 header_prev = header;
4575 } else
4576 header_prev = header;
4577 }
4578
4579 if (dname_header != NULL &&
4580 (!DNS_TRUST_PENDING(dname_header->trust) ||
4581 (search->options & DNS_DBFIND_PENDINGOK) != 0)) {
4582 /*
4583 * We increment the reference count on node to ensure that
4584 * search->zonecut_rdataset will still be valid later.
4585 */
4586 new_reference(search->rbtdb, node);
4587 INSIST(!ISC_LINK_LINKED(node, deadlink));
4588 search->zonecut = node;
4589 search->zonecut_rdataset = dname_header;
4590 search->zonecut_sigrdataset = sigdname_header;
4591 search->need_cleanup = ISC_TRUE;
4592 result = DNS_R_PARTIALMATCH;
4593 } else
4594 result = DNS_R_CONTINUE;
4595
4596 NODE_UNLOCK(lock, locktype);
4597
4598 return (result);
4599 }
4600
4601 static inline isc_result_t
find_deepest_zonecut(rbtdb_search_t * search,dns_rbtnode_t * node,dns_dbnode_t ** nodep,dns_name_t * foundname,dns_rdataset_t * rdataset,dns_rdataset_t * sigrdataset)4602 find_deepest_zonecut(rbtdb_search_t *search, dns_rbtnode_t *node,
4603 dns_dbnode_t **nodep, dns_name_t *foundname,
4604 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4605 {
4606 unsigned int i;
4607 dns_rbtnode_t *level_node;
4608 rdatasetheader_t *header, *header_prev, *header_next;
4609 rdatasetheader_t *found, *foundsig;
4610 isc_result_t result = ISC_R_NOTFOUND;
4611 dns_name_t name;
4612 dns_rbtdb_t *rbtdb;
4613 isc_boolean_t done;
4614 nodelock_t *lock;
4615 isc_rwlocktype_t locktype;
4616
4617 /*
4618 * Caller must be holding the tree lock.
4619 */
4620
4621 rbtdb = search->rbtdb;
4622 i = search->chain.level_matches;
4623 done = ISC_FALSE;
4624 do {
4625 locktype = isc_rwlocktype_read;
4626 lock = &rbtdb->node_locks[node->locknum].lock;
4627 NODE_LOCK(lock, locktype);
4628
4629 /*
4630 * Look for NS and RRSIG NS rdatasets.
4631 */
4632 found = NULL;
4633 foundsig = NULL;
4634 header_prev = NULL;
4635 for (header = node->data;
4636 header != NULL;
4637 header = header_next) {
4638 header_next = header->next;
4639 if (header->rdh_ttl < search->now) {
4640 /*
4641 * This rdataset is stale. If no one else is
4642 * using the node, we can clean it up right
4643 * now, otherwise we mark it as stale, and
4644 * the node as dirty, so it will get cleaned
4645 * up later.
4646 */
4647 if ((header->rdh_ttl < search->now -
4648 RBTDB_VIRTUAL) &&
4649 (locktype == isc_rwlocktype_write ||
4650 NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4651 /*
4652 * We update the node's status only
4653 * when we can get write access.
4654 */
4655 locktype = isc_rwlocktype_write;
4656
4657 if (dns_rbtnode_refcurrent(node)
4658 == 0) {
4659 isc_mem_t *m;
4660
4661 m = search->rbtdb->common.mctx;
4662 clean_stale_headers(
4663 search->rbtdb,
4664 m, header);
4665 if (header_prev != NULL)
4666 header_prev->next =
4667 header->next;
4668 else
4669 node->data =
4670 header->next;
4671 free_rdataset(rbtdb, m,
4672 header);
4673 } else {
4674 mark_stale_header(rbtdb,
4675 header);
4676 header_prev = header;
4677 }
4678 } else
4679 header_prev = header;
4680 } else if (EXISTS(header)) {
4681 /*
4682 * We've found an extant rdataset. See if
4683 * we're interested in it.
4684 */
4685 if (header->type == dns_rdatatype_ns) {
4686 found = header;
4687 if (foundsig != NULL)
4688 break;
4689 } else if (header->type ==
4690 RBTDB_RDATATYPE_SIGNS) {
4691 foundsig = header;
4692 if (found != NULL)
4693 break;
4694 }
4695 header_prev = header;
4696 } else
4697 header_prev = header;
4698 }
4699
4700 if (found != NULL) {
4701 /*
4702 * If we have to set foundname, we do it before
4703 * anything else. If we were to set foundname after
4704 * we had set nodep or bound the rdataset, then we'd
4705 * have to undo that work if dns_name_concatenate()
4706 * failed. By setting foundname first, there's
4707 * nothing to undo if we have trouble.
4708 */
4709 if (foundname != NULL) {
4710 dns_name_init(&name, NULL);
4711 dns_rbt_namefromnode(node, &name);
4712 result = dns_name_copy(&name, foundname, NULL);
4713 while (result == ISC_R_SUCCESS && i > 0) {
4714 i--;
4715 level_node = search->chain.levels[i];
4716 dns_name_init(&name, NULL);
4717 dns_rbt_namefromnode(level_node,
4718 &name);
4719 result =
4720 dns_name_concatenate(foundname,
4721 &name,
4722 foundname,
4723 NULL);
4724 }
4725 if (result != ISC_R_SUCCESS) {
4726 *nodep = NULL;
4727 goto node_exit;
4728 }
4729 }
4730 result = DNS_R_DELEGATION;
4731 if (nodep != NULL) {
4732 new_reference(search->rbtdb, node);
4733 *nodep = node;
4734 }
4735 bind_rdataset(search->rbtdb, node, found, search->now,
4736 rdataset);
4737 if (foundsig != NULL)
4738 bind_rdataset(search->rbtdb, node, foundsig,
4739 search->now, sigrdataset);
4740 if (need_headerupdate(found, search->now) ||
4741 (foundsig != NULL &&
4742 need_headerupdate(foundsig, search->now))) {
4743 if (locktype != isc_rwlocktype_write) {
4744 NODE_UNLOCK(lock, locktype);
4745 NODE_LOCK(lock, isc_rwlocktype_write);
4746 locktype = isc_rwlocktype_write;
4747 POST(locktype);
4748 }
4749 if (need_headerupdate(found, search->now))
4750 update_header(search->rbtdb, found,
4751 search->now);
4752 if (foundsig != NULL &&
4753 need_headerupdate(foundsig, search->now)) {
4754 update_header(search->rbtdb, foundsig,
4755 search->now);
4756 }
4757 }
4758 }
4759
4760 node_exit:
4761 NODE_UNLOCK(lock, locktype);
4762
4763 if (found == NULL && i > 0) {
4764 i--;
4765 node = search->chain.levels[i];
4766 } else
4767 done = ISC_TRUE;
4768
4769 } while (!done);
4770
4771 return (result);
4772 }
4773
4774 static isc_result_t
find_coveringnsec(rbtdb_search_t * search,dns_dbnode_t ** nodep,isc_stdtime_t now,dns_name_t * foundname,dns_rdataset_t * rdataset,dns_rdataset_t * sigrdataset)4775 find_coveringnsec(rbtdb_search_t *search, dns_dbnode_t **nodep,
4776 isc_stdtime_t now, dns_name_t *foundname,
4777 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4778 {
4779 dns_rbtnode_t *node;
4780 rdatasetheader_t *header, *header_next, *header_prev;
4781 rdatasetheader_t *found, *foundsig;
4782 isc_boolean_t empty_node;
4783 isc_result_t result;
4784 dns_fixedname_t fname, forigin;
4785 dns_name_t *name, *origin;
4786 rbtdb_rdatatype_t matchtype, sigmatchtype;
4787 nodelock_t *lock;
4788 isc_rwlocktype_t locktype;
4789
4790 matchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_nsec, 0);
4791 sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig,
4792 dns_rdatatype_nsec);
4793
4794 do {
4795 node = NULL;
4796 dns_fixedname_init(&fname);
4797 name = dns_fixedname_name(&fname);
4798 dns_fixedname_init(&forigin);
4799 origin = dns_fixedname_name(&forigin);
4800 result = dns_rbtnodechain_current(&search->chain, name,
4801 origin, &node);
4802 if (result != ISC_R_SUCCESS)
4803 return (result);
4804 locktype = isc_rwlocktype_read;
4805 lock = &(search->rbtdb->node_locks[node->locknum].lock);
4806 NODE_LOCK(lock, locktype);
4807 found = NULL;
4808 foundsig = NULL;
4809 empty_node = ISC_TRUE;
4810 header_prev = NULL;
4811 for (header = node->data;
4812 header != NULL;
4813 header = header_next) {
4814 header_next = header->next;
4815 if (header->rdh_ttl < now) {
4816 /*
4817 * This rdataset is stale. If no one else is
4818 * using the node, we can clean it up right
4819 * now, otherwise we mark it as stale, and the
4820 * node as dirty, so it will get cleaned up
4821 * later.
4822 */
4823 if ((header->rdh_ttl < now - RBTDB_VIRTUAL) &&
4824 (locktype == isc_rwlocktype_write ||
4825 NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4826 /*
4827 * We update the node's status only
4828 * when we can get write access.
4829 */
4830 locktype = isc_rwlocktype_write;
4831
4832 if (dns_rbtnode_refcurrent(node)
4833 == 0) {
4834 isc_mem_t *m;
4835
4836 m = search->rbtdb->common.mctx;
4837 clean_stale_headers(
4838 search->rbtdb,
4839 m, header);
4840 if (header_prev != NULL)
4841 header_prev->next =
4842 header->next;
4843 else
4844 node->data = header->next;
4845 free_rdataset(search->rbtdb, m,
4846 header);
4847 } else {
4848 mark_stale_header(search->rbtdb,
4849 header);
4850 header_prev = header;
4851 }
4852 } else
4853 header_prev = header;
4854 continue;
4855 }
4856 if (NONEXISTENT(header) ||
4857 RBTDB_RDATATYPE_BASE(header->type) == 0) {
4858 header_prev = header;
4859 continue;
4860 }
4861 empty_node = ISC_FALSE;
4862 if (header->type == matchtype)
4863 found = header;
4864 else if (header->type == sigmatchtype)
4865 foundsig = header;
4866 header_prev = header;
4867 }
4868 if (found != NULL) {
4869 result = dns_name_concatenate(name, origin,
4870 foundname, NULL);
4871 if (result != ISC_R_SUCCESS)
4872 goto unlock_node;
4873 bind_rdataset(search->rbtdb, node, found,
4874 now, rdataset);
4875 if (foundsig != NULL)
4876 bind_rdataset(search->rbtdb, node, foundsig,
4877 now, sigrdataset);
4878 new_reference(search->rbtdb, node);
4879 *nodep = node;
4880 result = DNS_R_COVERINGNSEC;
4881 } else if (!empty_node) {
4882 result = ISC_R_NOTFOUND;
4883 } else
4884 result = dns_rbtnodechain_prev(&search->chain, NULL,
4885 NULL);
4886 unlock_node:
4887 NODE_UNLOCK(lock, locktype);
4888 } while (empty_node && result == ISC_R_SUCCESS);
4889 return (result);
4890 }
4891
4892 /*
4893 * Connect this RBTDB to the response policy zone summary data for the view.
4894 */
4895 static void
rpz_attach(dns_db_t * db,dns_rpz_zones_t * rpzs,dns_rpz_num_t rpz_num)4896 rpz_attach(dns_db_t *db, dns_rpz_zones_t *rpzs, dns_rpz_num_t rpz_num) {
4897 dns_rbtdb_t * rbtdb;
4898
4899 rbtdb = (dns_rbtdb_t *)db;
4900 REQUIRE(VALID_RBTDB(rbtdb));
4901
4902 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
4903 REQUIRE(rbtdb->rpzs == NULL && rbtdb->rpz_num == DNS_RPZ_INVALID_NUM);
4904 dns_rpz_attach_rpzs(rpzs, &rbtdb->rpzs);
4905 rbtdb->rpz_num = rpz_num;
4906 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
4907 }
4908
4909 /*
4910 * Enable this RBTDB as a response policy zone.
4911 */
4912 static isc_result_t
rpz_ready(dns_db_t * db)4913 rpz_ready(dns_db_t *db) {
4914 dns_rbtdb_t * rbtdb;
4915 isc_result_t result;
4916
4917 rbtdb = (dns_rbtdb_t *)db;
4918 REQUIRE(VALID_RBTDB(rbtdb));
4919
4920 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
4921 if (rbtdb->rpzs == NULL) {
4922 INSIST(rbtdb->rpz_num == DNS_RPZ_INVALID_NUM);
4923 result = ISC_R_SUCCESS;
4924 } else {
4925 result = dns_rpz_ready(rbtdb->rpzs, &rbtdb->load_rpzs,
4926 rbtdb->rpz_num);
4927 }
4928 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
4929 return (result);
4930 }
4931
4932 static isc_result_t
cache_find(dns_db_t * db,dns_name_t * name,dns_dbversion_t * version,dns_rdatatype_t type,unsigned int options,isc_stdtime_t now,dns_dbnode_t ** nodep,dns_name_t * foundname,dns_rdataset_t * rdataset,dns_rdataset_t * sigrdataset)4933 cache_find(dns_db_t *db, dns_name_t *name, dns_dbversion_t *version,
4934 dns_rdatatype_t type, unsigned int options, isc_stdtime_t now,
4935 dns_dbnode_t **nodep, dns_name_t *foundname,
4936 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4937 {
4938 dns_rbtnode_t *node = NULL;
4939 isc_result_t result;
4940 rbtdb_search_t search;
4941 isc_boolean_t cname_ok = ISC_TRUE;
4942 isc_boolean_t empty_node;
4943 nodelock_t *lock;
4944 isc_rwlocktype_t locktype;
4945 rdatasetheader_t *header, *header_prev, *header_next;
4946 rdatasetheader_t *found, *nsheader;
4947 rdatasetheader_t *foundsig, *nssig, *cnamesig;
4948 rdatasetheader_t *update, *updatesig;
4949 rbtdb_rdatatype_t sigtype, negtype;
4950
4951 UNUSED(version);
4952
4953 search.rbtdb = (dns_rbtdb_t *)db;
4954
4955 REQUIRE(VALID_RBTDB(search.rbtdb));
4956 REQUIRE(version == NULL);
4957
4958 if (now == 0)
4959 isc_stdtime_get(&now);
4960
4961 search.rbtversion = NULL;
4962 search.serial = 1;
4963 search.options = options;
4964 search.copy_name = ISC_FALSE;
4965 search.need_cleanup = ISC_FALSE;
4966 search.wild = ISC_FALSE;
4967 search.zonecut = NULL;
4968 dns_fixedname_init(&search.zonecut_name);
4969 dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
4970 search.now = now;
4971 update = NULL;
4972 updatesig = NULL;
4973
4974 RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4975
4976 /*
4977 * Search down from the root of the tree. If, while going down, we
4978 * encounter a callback node, cache_zonecut_callback() will search the
4979 * rdatasets at the zone cut for a DNAME rdataset.
4980 */
4981 result = dns_rbt_findnode(search.rbtdb->tree, name, foundname, &node,
4982 &search.chain, DNS_RBTFIND_EMPTYDATA,
4983 cache_zonecut_callback, &search);
4984
4985 if (result == DNS_R_PARTIALMATCH) {
4986 if ((search.options & DNS_DBFIND_COVERINGNSEC) != 0) {
4987 result = find_coveringnsec(&search, nodep, now,
4988 foundname, rdataset,
4989 sigrdataset);
4990 if (result == DNS_R_COVERINGNSEC)
4991 goto tree_exit;
4992 }
4993 if (search.zonecut != NULL) {
4994 result = setup_delegation(&search, nodep, foundname,
4995 rdataset, sigrdataset);
4996 goto tree_exit;
4997 } else {
4998 find_ns:
4999 result = find_deepest_zonecut(&search, node, nodep,
5000 foundname, rdataset,
5001 sigrdataset);
5002 goto tree_exit;
5003 }
5004 } else if (result != ISC_R_SUCCESS)
5005 goto tree_exit;
5006
5007 /*
5008 * Certain DNSSEC types are not subject to CNAME matching
5009 * (RFC4035, section 2.5 and RFC3007).
5010 *
5011 * We don't check for RRSIG, because we don't store RRSIG records
5012 * directly.
5013 */
5014 if (type == dns_rdatatype_key || type == dns_rdatatype_nsec)
5015 cname_ok = ISC_FALSE;
5016
5017 /*
5018 * We now go looking for rdata...
5019 */
5020
5021 lock = &(search.rbtdb->node_locks[node->locknum].lock);
5022 locktype = isc_rwlocktype_read;
5023 NODE_LOCK(lock, locktype);
5024
5025 found = NULL;
5026 foundsig = NULL;
5027 sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
5028 negtype = RBTDB_RDATATYPE_VALUE(0, type);
5029 nsheader = NULL;
5030 nssig = NULL;
5031 cnamesig = NULL;
5032 empty_node = ISC_TRUE;
5033 header_prev = NULL;
5034 for (header = node->data; header != NULL; header = header_next) {
5035 header_next = header->next;
5036 if (header->rdh_ttl < now) {
5037 /*
5038 * This rdataset is stale. If no one else is using the
5039 * node, we can clean it up right now, otherwise we
5040 * mark it as stale, and the node as dirty, so it will
5041 * get cleaned up later.
5042 */
5043 if ((header->rdh_ttl < now - RBTDB_VIRTUAL) &&
5044 (locktype == isc_rwlocktype_write ||
5045 NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
5046 /*
5047 * We update the node's status only when we
5048 * can get write access.
5049 */
5050 locktype = isc_rwlocktype_write;
5051
5052 if (dns_rbtnode_refcurrent(node) == 0) {
5053 isc_mem_t *mctx;
5054
5055 mctx = search.rbtdb->common.mctx;
5056 clean_stale_headers(search.rbtdb, mctx,
5057 header);
5058 if (header_prev != NULL)
5059 header_prev->next =
5060 header->next;
5061 else
5062 node->data = header->next;
5063 free_rdataset(search.rbtdb, mctx,
5064 header);
5065 } else {
5066 mark_stale_header(search.rbtdb, header);
5067 header_prev = header;
5068 }
5069 } else
5070 header_prev = header;
5071 } else if (EXISTS(header)) {
5072 /*
5073 * We now know that there is at least one active
5074 * non-stale rdataset at this node.
5075 */
5076 empty_node = ISC_FALSE;
5077
5078 /*
5079 * If we found a type we were looking for, remember
5080 * it.
5081 */
5082 if (header->type == type ||
5083 (type == dns_rdatatype_any &&
5084 RBTDB_RDATATYPE_BASE(header->type) != 0) ||
5085 (cname_ok && header->type ==
5086 dns_rdatatype_cname)) {
5087 /*
5088 * We've found the answer.
5089 */
5090 found = header;
5091 if (header->type == dns_rdatatype_cname &&
5092 cname_ok &&
5093 cnamesig != NULL) {
5094 /*
5095 * If we've already got the
5096 * CNAME RRSIG, use it.
5097 */
5098 foundsig = cnamesig;
5099 }
5100 } else if (header->type == sigtype) {
5101 /*
5102 * We've found the RRSIG rdataset for our
5103 * target type. Remember it.
5104 */
5105 foundsig = header;
5106 } else if (header->type == RBTDB_RDATATYPE_NCACHEANY ||
5107 header->type == negtype) {
5108 /*
5109 * We've found a negative cache entry.
5110 */
5111 found = header;
5112 } else if (header->type == dns_rdatatype_ns) {
5113 /*
5114 * Remember a NS rdataset even if we're
5115 * not specifically looking for it, because
5116 * we might need it later.
5117 */
5118 nsheader = header;
5119 } else if (header->type == RBTDB_RDATATYPE_SIGNS) {
5120 /*
5121 * If we need the NS rdataset, we'll also
5122 * need its signature.
5123 */
5124 nssig = header;
5125 } else if (cname_ok &&
5126 header->type == RBTDB_RDATATYPE_SIGCNAME) {
5127 /*
5128 * If we get a CNAME match, we'll also need
5129 * its signature.
5130 */
5131 cnamesig = header;
5132 }
5133 header_prev = header;
5134 } else
5135 header_prev = header;
5136 }
5137
5138 if (empty_node) {
5139 /*
5140 * We have an exact match for the name, but there are no
5141 * extant rdatasets. That means that this node doesn't
5142 * meaningfully exist, and that we really have a partial match.
5143 */
5144 NODE_UNLOCK(lock, locktype);
5145 goto find_ns;
5146 }
5147
5148 /*
5149 * If we didn't find what we were looking for...
5150 */
5151 if (found == NULL ||
5152 (DNS_TRUST_ADDITIONAL(found->trust) &&
5153 ((options & DNS_DBFIND_ADDITIONALOK) == 0)) ||
5154 (found->trust == dns_trust_glue &&
5155 ((options & DNS_DBFIND_GLUEOK) == 0)) ||
5156 (DNS_TRUST_PENDING(found->trust) &&
5157 ((options & DNS_DBFIND_PENDINGOK) == 0))) {
5158 /*
5159 * If there is an NS rdataset at this node, then this is the
5160 * deepest zone cut.
5161 */
5162 if (nsheader != NULL) {
5163 if (nodep != NULL) {
5164 new_reference(search.rbtdb, node);
5165 INSIST(!ISC_LINK_LINKED(node, deadlink));
5166 *nodep = node;
5167 }
5168 bind_rdataset(search.rbtdb, node, nsheader, search.now,
5169 rdataset);
5170 if (need_headerupdate(nsheader, search.now))
5171 update = nsheader;
5172 if (nssig != NULL) {
5173 bind_rdataset(search.rbtdb, node, nssig,
5174 search.now, sigrdataset);
5175 if (need_headerupdate(nssig, search.now))
5176 updatesig = nssig;
5177 }
5178 result = DNS_R_DELEGATION;
5179 goto node_exit;
5180 }
5181
5182 /*
5183 * Go find the deepest zone cut.
5184 */
5185 NODE_UNLOCK(lock, locktype);
5186 goto find_ns;
5187 }
5188
5189 /*
5190 * We found what we were looking for, or we found a CNAME.
5191 */
5192
5193 if (nodep != NULL) {
5194 new_reference(search.rbtdb, node);
5195 INSIST(!ISC_LINK_LINKED(node, deadlink));
5196 *nodep = node;
5197 }
5198
5199 if (NEGATIVE(found)) {
5200 /*
5201 * We found a negative cache entry.
5202 */
5203 if (NXDOMAIN(found))
5204 result = DNS_R_NCACHENXDOMAIN;
5205 else
5206 result = DNS_R_NCACHENXRRSET;
5207 } else if (type != found->type &&
5208 type != dns_rdatatype_any &&
5209 found->type == dns_rdatatype_cname) {
5210 /*
5211 * We weren't doing an ANY query and we found a CNAME instead
5212 * of the type we were looking for, so we need to indicate
5213 * that result to the caller.
5214 */
5215 result = DNS_R_CNAME;
5216 } else {
5217 /*
5218 * An ordinary successful query!
5219 */
5220 result = ISC_R_SUCCESS;
5221 }
5222
5223 if (type != dns_rdatatype_any || result == DNS_R_NCACHENXDOMAIN ||
5224 result == DNS_R_NCACHENXRRSET) {
5225 bind_rdataset(search.rbtdb, node, found, search.now,
5226 rdataset);
5227 if (need_headerupdate(found, search.now))
5228 update = found;
5229 if (!NEGATIVE(found) && foundsig != NULL) {
5230 bind_rdataset(search.rbtdb, node, foundsig, search.now,
5231 sigrdataset);
5232 if (need_headerupdate(foundsig, search.now))
5233 updatesig = foundsig;
5234 }
5235 }
5236
5237 node_exit:
5238 if ((update != NULL || updatesig != NULL) &&
5239 locktype != isc_rwlocktype_write) {
5240 NODE_UNLOCK(lock, locktype);
5241 NODE_LOCK(lock, isc_rwlocktype_write);
5242 locktype = isc_rwlocktype_write;
5243 POST(locktype);
5244 }
5245 if (update != NULL && need_headerupdate(update, search.now))
5246 update_header(search.rbtdb, update, search.now);
5247 if (updatesig != NULL && need_headerupdate(updatesig, search.now))
5248 update_header(search.rbtdb, updatesig, search.now);
5249
5250 NODE_UNLOCK(lock, locktype);
5251
5252 tree_exit:
5253 RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
5254
5255 /*
5256 * If we found a zonecut but aren't going to use it, we have to
5257 * let go of it.
5258 */
5259 if (search.need_cleanup) {
5260 node = search.zonecut;
5261 INSIST(node != NULL);
5262 lock = &(search.rbtdb->node_locks[node->locknum].lock);
5263
5264 NODE_LOCK(lock, isc_rwlocktype_read);
5265 decrement_reference(search.rbtdb, node, 0,
5266 isc_rwlocktype_read, isc_rwlocktype_none,
5267 ISC_FALSE);
5268 NODE_UNLOCK(lock, isc_rwlocktype_read);
5269 }
5270
5271 dns_rbtnodechain_reset(&search.chain);
5272
5273 update_cachestats(search.rbtdb, result);
5274 return (result);
5275 }
5276
5277 static isc_result_t
cache_findzonecut(dns_db_t * db,dns_name_t * name,unsigned int options,isc_stdtime_t now,dns_dbnode_t ** nodep,dns_name_t * foundname,dns_rdataset_t * rdataset,dns_rdataset_t * sigrdataset)5278 cache_findzonecut(dns_db_t *db, dns_name_t *name, unsigned int options,
5279 isc_stdtime_t now, dns_dbnode_t **nodep,
5280 dns_name_t *foundname,
5281 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
5282 {
5283 dns_rbtnode_t *node = NULL;
5284 nodelock_t *lock;
5285 isc_result_t result;
5286 rbtdb_search_t search;
5287 rdatasetheader_t *header, *header_prev, *header_next;
5288 rdatasetheader_t *found, *foundsig;
5289 unsigned int rbtoptions = DNS_RBTFIND_EMPTYDATA;
5290 isc_rwlocktype_t locktype;
5291
5292 search.rbtdb = (dns_rbtdb_t *)db;
5293
5294 REQUIRE(VALID_RBTDB(search.rbtdb));
5295
5296 if (now == 0)
5297 isc_stdtime_get(&now);
5298
5299 search.rbtversion = NULL;
5300 search.serial = 1;
5301 search.options = options;
5302 search.copy_name = ISC_FALSE;
5303 search.need_cleanup = ISC_FALSE;
5304 search.wild = ISC_FALSE;
5305 search.zonecut = NULL;
5306 dns_fixedname_init(&search.zonecut_name);
5307 dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
5308 search.now = now;
5309
5310 if ((options & DNS_DBFIND_NOEXACT) != 0)
5311 rbtoptions |= DNS_RBTFIND_NOEXACT;
5312
5313 RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
5314
5315 /*
5316 * Search down from the root of the tree.
5317 */
5318 result = dns_rbt_findnode(search.rbtdb->tree, name, foundname, &node,
5319 &search.chain, rbtoptions, NULL, &search);
5320
5321 if (result == DNS_R_PARTIALMATCH) {
5322 find_ns:
5323 result = find_deepest_zonecut(&search, node, nodep, foundname,
5324 rdataset, sigrdataset);
5325 goto tree_exit;
5326 } else if (result != ISC_R_SUCCESS)
5327 goto tree_exit;
5328
5329 /*
5330 * We now go looking for an NS rdataset at the node.
5331 */
5332
5333 lock = &(search.rbtdb->node_locks[node->locknum].lock);
5334 locktype = isc_rwlocktype_read;
5335 NODE_LOCK(lock, locktype);
5336
5337 found = NULL;
5338 foundsig = NULL;
5339 header_prev = NULL;
5340 for (header = node->data; header != NULL; header = header_next) {
5341 header_next = header->next;
5342 if (header->rdh_ttl < now) {
5343 /*
5344 * This rdataset is stale. If no one else is using the
5345 * node, we can clean it up right now, otherwise we
5346 * mark it as stale, and the node as dirty, so it will
5347 * get cleaned up later.
5348 */
5349 if ((header->rdh_ttl < now - RBTDB_VIRTUAL) &&
5350 (locktype == isc_rwlocktype_write ||
5351 NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
5352 /*
5353 * We update the node's status only when we
5354 * can get write access.
5355 */
5356 locktype = isc_rwlocktype_write;
5357
5358 if (dns_rbtnode_refcurrent(node) == 0) {
5359 isc_mem_t *mctx;
5360
5361 mctx = search.rbtdb->common.mctx;
5362 clean_stale_headers(search.rbtdb, mctx,
5363 header);
5364 if (header_prev != NULL)
5365 header_prev->next =
5366 header->next;
5367 else
5368 node->data = header->next;
5369 free_rdataset(search.rbtdb, mctx,
5370 header);
5371 } else {
5372 mark_stale_header(search.rbtdb, header);
5373 header_prev = header;
5374 }
5375 } else
5376 header_prev = header;
5377 } else if (EXISTS(header)) {
5378 /*
5379 * If we found a type we were looking for, remember
5380 * it.
5381 */
5382 if (header->type == dns_rdatatype_ns) {
5383 /*
5384 * Remember a NS rdataset even if we're
5385 * not specifically looking for it, because
5386 * we might need it later.
5387 */
5388 found = header;
5389 } else if (header->type == RBTDB_RDATATYPE_SIGNS) {
5390 /*
5391 * If we need the NS rdataset, we'll also
5392 * need its signature.
5393 */
5394 foundsig = header;
5395 }
5396 header_prev = header;
5397 } else
5398 header_prev = header;
5399 }
5400
5401 if (found == NULL) {
5402 /*
5403 * No NS records here.
5404 */
5405 NODE_UNLOCK(lock, locktype);
5406 goto find_ns;
5407 }
5408
5409 if (nodep != NULL) {
5410 new_reference(search.rbtdb, node);
5411 INSIST(!ISC_LINK_LINKED(node, deadlink));
5412 *nodep = node;
5413 }
5414
5415 bind_rdataset(search.rbtdb, node, found, search.now, rdataset);
5416 if (foundsig != NULL)
5417 bind_rdataset(search.rbtdb, node, foundsig, search.now,
5418 sigrdataset);
5419
5420 if (need_headerupdate(found, search.now) ||
5421 (foundsig != NULL && need_headerupdate(foundsig, search.now))) {
5422 if (locktype != isc_rwlocktype_write) {
5423 NODE_UNLOCK(lock, locktype);
5424 NODE_LOCK(lock, isc_rwlocktype_write);
5425 locktype = isc_rwlocktype_write;
5426 POST(locktype);
5427 }
5428 if (need_headerupdate(found, search.now))
5429 update_header(search.rbtdb, found, search.now);
5430 if (foundsig != NULL &&
5431 need_headerupdate(foundsig, search.now)) {
5432 update_header(search.rbtdb, foundsig, search.now);
5433 }
5434 }
5435
5436 NODE_UNLOCK(lock, locktype);
5437
5438 tree_exit:
5439 RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
5440
5441 INSIST(!search.need_cleanup);
5442
5443 dns_rbtnodechain_reset(&search.chain);
5444
5445 if (result == DNS_R_DELEGATION)
5446 result = ISC_R_SUCCESS;
5447
5448 return (result);
5449 }
5450
5451 static void
attachnode(dns_db_t * db,dns_dbnode_t * source,dns_dbnode_t ** targetp)5452 attachnode(dns_db_t *db, dns_dbnode_t *source, dns_dbnode_t **targetp) {
5453 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5454 dns_rbtnode_t *node = (dns_rbtnode_t *)source;
5455 unsigned int refs;
5456
5457 REQUIRE(VALID_RBTDB(rbtdb));
5458 REQUIRE(targetp != NULL && *targetp == NULL);
5459
5460 NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
5461 dns_rbtnode_refincrement(node, &refs);
5462 INSIST(refs != 0);
5463 NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
5464
5465 *targetp = source;
5466 }
5467
5468 static void
detachnode(dns_db_t * db,dns_dbnode_t ** targetp)5469 detachnode(dns_db_t *db, dns_dbnode_t **targetp) {
5470 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5471 dns_rbtnode_t *node;
5472 isc_boolean_t want_free = ISC_FALSE;
5473 isc_boolean_t inactive = ISC_FALSE;
5474 rbtdb_nodelock_t *nodelock;
5475
5476 REQUIRE(VALID_RBTDB(rbtdb));
5477 REQUIRE(targetp != NULL && *targetp != NULL);
5478
5479 node = (dns_rbtnode_t *)(*targetp);
5480 nodelock = &rbtdb->node_locks[node->locknum];
5481
5482 NODE_LOCK(&nodelock->lock, isc_rwlocktype_read);
5483
5484 if (decrement_reference(rbtdb, node, 0, isc_rwlocktype_read,
5485 isc_rwlocktype_none, ISC_FALSE)) {
5486 if (isc_refcount_current(&nodelock->references) == 0 &&
5487 nodelock->exiting) {
5488 inactive = ISC_TRUE;
5489 }
5490 }
5491
5492 NODE_UNLOCK(&nodelock->lock, isc_rwlocktype_read);
5493
5494 *targetp = NULL;
5495
5496 if (inactive) {
5497 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
5498 rbtdb->active--;
5499 if (rbtdb->active == 0)
5500 want_free = ISC_TRUE;
5501 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
5502 if (want_free) {
5503 char buf[DNS_NAME_FORMATSIZE];
5504 if (dns_name_dynamic(&rbtdb->common.origin))
5505 dns_name_format(&rbtdb->common.origin, buf,
5506 sizeof(buf));
5507 else
5508 strcpy(buf, "<UNKNOWN>");
5509 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
5510 DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
5511 "calling free_rbtdb(%s)", buf);
5512 free_rbtdb(rbtdb, ISC_TRUE, NULL);
5513 }
5514 }
5515 }
5516
5517 static isc_result_t
expirenode(dns_db_t * db,dns_dbnode_t * node,isc_stdtime_t now)5518 expirenode(dns_db_t *db, dns_dbnode_t *node, isc_stdtime_t now) {
5519 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5520 dns_rbtnode_t *rbtnode = node;
5521 rdatasetheader_t *header;
5522 isc_boolean_t force_expire = ISC_FALSE;
5523 /*
5524 * These are the category and module used by the cache cleaner.
5525 */
5526 isc_boolean_t log = ISC_FALSE;
5527 isc_logcategory_t *category = DNS_LOGCATEGORY_DATABASE;
5528 isc_logmodule_t *module = DNS_LOGMODULE_CACHE;
5529 int level = ISC_LOG_DEBUG(2);
5530 char printname[DNS_NAME_FORMATSIZE];
5531
5532 REQUIRE(VALID_RBTDB(rbtdb));
5533
5534 /*
5535 * Caller must hold a tree lock.
5536 */
5537
5538 if (now == 0)
5539 isc_stdtime_get(&now);
5540
5541 if (isc_mem_isovermem(rbtdb->common.mctx)) {
5542 isc_uint32_t val;
5543
5544 isc_random_get(&val);
5545 /*
5546 * XXXDCL Could stand to have a better policy, like LRU.
5547 */
5548 force_expire = ISC_TF(rbtnode->down == NULL && val % 4 == 0);
5549
5550 /*
5551 * Note that 'log' can be true IFF overmem is also true.
5552 * overmem can currently only be true for cache
5553 * databases -- hence all of the "overmem cache" log strings.
5554 */
5555 log = ISC_TF(isc_log_wouldlog(dns_lctx, level));
5556 if (log)
5557 isc_log_write(dns_lctx, category, module, level,
5558 "overmem cache: %s %s",
5559 force_expire ? "FORCE" : "check",
5560 dns_rbt_formatnodename(rbtnode,
5561 printname,
5562 sizeof(printname)));
5563 }
5564
5565 /*
5566 * We may not need write access, but this code path is not performance
5567 * sensitive, so it should be okay to always lock as a writer.
5568 */
5569 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5570 isc_rwlocktype_write);
5571
5572 for (header = rbtnode->data; header != NULL; header = header->next)
5573 if (header->rdh_ttl <= now - RBTDB_VIRTUAL) {
5574 /*
5575 * We don't check if refcurrent(rbtnode) == 0 and try
5576 * to free like we do in cache_find(), because
5577 * refcurrent(rbtnode) must be non-zero. This is so
5578 * because 'node' is an argument to the function.
5579 */
5580 mark_stale_header(rbtdb, header);
5581 if (log)
5582 isc_log_write(dns_lctx, category, module,
5583 level, "overmem cache: stale %s",
5584 printname);
5585 } else if (force_expire) {
5586 if (! RETAIN(header)) {
5587 set_ttl(rbtdb, header, 0);
5588 mark_stale_header(rbtdb, header);
5589 } else if (log) {
5590 isc_log_write(dns_lctx, category, module,
5591 level, "overmem cache: "
5592 "reprieve by RETAIN() %s",
5593 printname);
5594 }
5595 } else if (isc_mem_isovermem(rbtdb->common.mctx) && log)
5596 isc_log_write(dns_lctx, category, module, level,
5597 "overmem cache: saved %s", printname);
5598
5599 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5600 isc_rwlocktype_write);
5601
5602 return (ISC_R_SUCCESS);
5603 }
5604
5605 static void
overmem(dns_db_t * db,isc_boolean_t overmem)5606 overmem(dns_db_t *db, isc_boolean_t overmem) {
5607 /* This is an empty callback. See adb.c:water() */
5608
5609 UNUSED(db);
5610 UNUSED(overmem);
5611
5612 return;
5613 }
5614
5615 static void
printnode(dns_db_t * db,dns_dbnode_t * node,FILE * out)5616 printnode(dns_db_t *db, dns_dbnode_t *node, FILE *out) {
5617 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5618 dns_rbtnode_t *rbtnode = node;
5619 isc_boolean_t first;
5620
5621 REQUIRE(VALID_RBTDB(rbtdb));
5622
5623 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5624 isc_rwlocktype_read);
5625
5626 fprintf(out, "node %p, %u references, locknum = %u\n",
5627 rbtnode, dns_rbtnode_refcurrent(rbtnode),
5628 rbtnode->locknum);
5629 if (rbtnode->data != NULL) {
5630 rdatasetheader_t *current, *top_next;
5631
5632 for (current = rbtnode->data; current != NULL;
5633 current = top_next) {
5634 top_next = current->next;
5635 first = ISC_TRUE;
5636 fprintf(out, "\ttype %u", current->type);
5637 do {
5638 if (!first)
5639 fprintf(out, "\t");
5640 first = ISC_FALSE;
5641 fprintf(out,
5642 "\tserial = %lu, ttl = %u, "
5643 "trust = %u, attributes = %u, "
5644 "resign = %u\n",
5645 (unsigned long)current->serial,
5646 current->rdh_ttl,
5647 current->trust,
5648 current->attributes,
5649 current->resign);
5650 current = current->down;
5651 } while (current != NULL);
5652 }
5653 } else
5654 fprintf(out, "(empty)\n");
5655
5656 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5657 isc_rwlocktype_read);
5658 }
5659
5660 static isc_result_t
createiterator(dns_db_t * db,unsigned int options,dns_dbiterator_t ** iteratorp)5661 createiterator(dns_db_t *db, unsigned int options, dns_dbiterator_t **iteratorp)
5662 {
5663 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5664 rbtdb_dbiterator_t *rbtdbiter;
5665
5666 REQUIRE(VALID_RBTDB(rbtdb));
5667
5668 rbtdbiter = isc_mem_get(rbtdb->common.mctx, sizeof(*rbtdbiter));
5669 if (rbtdbiter == NULL)
5670 return (ISC_R_NOMEMORY);
5671
5672 rbtdbiter->common.methods = &dbiterator_methods;
5673 rbtdbiter->common.db = NULL;
5674 dns_db_attach(db, &rbtdbiter->common.db);
5675 rbtdbiter->common.relative_names =
5676 ISC_TF((options & DNS_DB_RELATIVENAMES) != 0);
5677 rbtdbiter->common.magic = DNS_DBITERATOR_MAGIC;
5678 rbtdbiter->common.cleaning = ISC_FALSE;
5679 rbtdbiter->paused = ISC_TRUE;
5680 rbtdbiter->tree_locked = isc_rwlocktype_none;
5681 rbtdbiter->result = ISC_R_SUCCESS;
5682 dns_fixedname_init(&rbtdbiter->name);
5683 dns_fixedname_init(&rbtdbiter->origin);
5684 rbtdbiter->node = NULL;
5685 rbtdbiter->delete = 0;
5686 rbtdbiter->nsec3only = ISC_TF((options & DNS_DB_NSEC3ONLY) != 0);
5687 rbtdbiter->nonsec3 = ISC_TF((options & DNS_DB_NONSEC3) != 0);
5688 memset(rbtdbiter->deletions, 0, sizeof(rbtdbiter->deletions));
5689 dns_rbtnodechain_init(&rbtdbiter->chain, db->mctx);
5690 dns_rbtnodechain_init(&rbtdbiter->nsec3chain, db->mctx);
5691 if (rbtdbiter->nsec3only)
5692 rbtdbiter->current = &rbtdbiter->nsec3chain;
5693 else
5694 rbtdbiter->current = &rbtdbiter->chain;
5695
5696 *iteratorp = (dns_dbiterator_t *)rbtdbiter;
5697
5698 return (ISC_R_SUCCESS);
5699 }
5700
5701 static isc_result_t
zone_findrdataset(dns_db_t * db,dns_dbnode_t * node,dns_dbversion_t * version,dns_rdatatype_t type,dns_rdatatype_t covers,isc_stdtime_t now,dns_rdataset_t * rdataset,dns_rdataset_t * sigrdataset)5702 zone_findrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5703 dns_rdatatype_t type, dns_rdatatype_t covers,
5704 isc_stdtime_t now, dns_rdataset_t *rdataset,
5705 dns_rdataset_t *sigrdataset)
5706 {
5707 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5708 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5709 rdatasetheader_t *header, *header_next, *found, *foundsig;
5710 rbtdb_serial_t serial;
5711 rbtdb_version_t *rbtversion = version;
5712 isc_boolean_t close_version = ISC_FALSE;
5713 rbtdb_rdatatype_t matchtype, sigmatchtype;
5714
5715 REQUIRE(VALID_RBTDB(rbtdb));
5716 REQUIRE(type != dns_rdatatype_any);
5717 INSIST(rbtversion == NULL || rbtversion->rbtdb == rbtdb);
5718
5719 if (rbtversion == NULL) {
5720 currentversion(db, (dns_dbversion_t **) (void *)(&rbtversion));
5721 close_version = ISC_TRUE;
5722 }
5723 serial = rbtversion->serial;
5724 now = 0;
5725
5726 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5727 isc_rwlocktype_read);
5728
5729 found = NULL;
5730 foundsig = NULL;
5731 matchtype = RBTDB_RDATATYPE_VALUE(type, covers);
5732 if (covers == 0)
5733 sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
5734 else
5735 sigmatchtype = 0;
5736
5737 for (header = rbtnode->data; header != NULL; header = header_next) {
5738 header_next = header->next;
5739 do {
5740 if (header->serial <= serial &&
5741 !IGNORE(header)) {
5742 /*
5743 * Is this a "this rdataset doesn't
5744 * exist" record?
5745 */
5746 if (NONEXISTENT(header))
5747 header = NULL;
5748 break;
5749 } else
5750 header = header->down;
5751 } while (header != NULL);
5752 if (header != NULL) {
5753 /*
5754 * We have an active, extant rdataset. If it's a
5755 * type we're looking for, remember it.
5756 */
5757 if (header->type == matchtype) {
5758 found = header;
5759 if (foundsig != NULL)
5760 break;
5761 } else if (header->type == sigmatchtype) {
5762 foundsig = header;
5763 if (found != NULL)
5764 break;
5765 }
5766 }
5767 }
5768 if (found != NULL) {
5769 bind_rdataset(rbtdb, rbtnode, found, now, rdataset);
5770 if (foundsig != NULL)
5771 bind_rdataset(rbtdb, rbtnode, foundsig, now,
5772 sigrdataset);
5773 }
5774
5775 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5776 isc_rwlocktype_read);
5777
5778 if (close_version)
5779 closeversion(db, (dns_dbversion_t **) (void *)(&rbtversion),
5780 ISC_FALSE);
5781
5782 if (found == NULL)
5783 return (ISC_R_NOTFOUND);
5784
5785 return (ISC_R_SUCCESS);
5786 }
5787
5788 static isc_result_t
cache_findrdataset(dns_db_t * db,dns_dbnode_t * node,dns_dbversion_t * version,dns_rdatatype_t type,dns_rdatatype_t covers,isc_stdtime_t now,dns_rdataset_t * rdataset,dns_rdataset_t * sigrdataset)5789 cache_findrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5790 dns_rdatatype_t type, dns_rdatatype_t covers,
5791 isc_stdtime_t now, dns_rdataset_t *rdataset,
5792 dns_rdataset_t *sigrdataset)
5793 {
5794 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5795 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5796 rdatasetheader_t *header, *header_next, *found, *foundsig;
5797 rbtdb_rdatatype_t matchtype, sigmatchtype, negtype;
5798 isc_result_t result;
5799 nodelock_t *lock;
5800 isc_rwlocktype_t locktype;
5801
5802 REQUIRE(VALID_RBTDB(rbtdb));
5803 REQUIRE(type != dns_rdatatype_any);
5804
5805 UNUSED(version);
5806
5807 result = ISC_R_SUCCESS;
5808
5809 if (now == 0)
5810 isc_stdtime_get(&now);
5811
5812 lock = &rbtdb->node_locks[rbtnode->locknum].lock;
5813 locktype = isc_rwlocktype_read;
5814 NODE_LOCK(lock, locktype);
5815
5816 found = NULL;
5817 foundsig = NULL;
5818 matchtype = RBTDB_RDATATYPE_VALUE(type, covers);
5819 negtype = RBTDB_RDATATYPE_VALUE(0, type);
5820 if (covers == 0)
5821 sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
5822 else
5823 sigmatchtype = 0;
5824
5825 for (header = rbtnode->data; header != NULL; header = header_next) {
5826 header_next = header->next;
5827 if (header->rdh_ttl < now) {
5828 if ((header->rdh_ttl < now - RBTDB_VIRTUAL) &&
5829 (locktype == isc_rwlocktype_write ||
5830 NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
5831 /*
5832 * We update the node's status only when we
5833 * can get write access.
5834 */
5835 locktype = isc_rwlocktype_write;
5836
5837 /*
5838 * We don't check if refcurrent(rbtnode) == 0
5839 * and try to free like we do in cache_find(),
5840 * because refcurrent(rbtnode) must be
5841 * non-zero. This is so because 'node' is an
5842 * argument to the function.
5843 */
5844 mark_stale_header(rbtdb, header);
5845 }
5846 } else if (EXISTS(header)) {
5847 if (header->type == matchtype)
5848 found = header;
5849 else if (header->type == RBTDB_RDATATYPE_NCACHEANY ||
5850 header->type == negtype)
5851 found = header;
5852 else if (header->type == sigmatchtype)
5853 foundsig = header;
5854 }
5855 }
5856 if (found != NULL) {
5857 bind_rdataset(rbtdb, rbtnode, found, now, rdataset);
5858 if (!NEGATIVE(found) && foundsig != NULL)
5859 bind_rdataset(rbtdb, rbtnode, foundsig, now,
5860 sigrdataset);
5861 }
5862
5863 NODE_UNLOCK(lock, locktype);
5864
5865 if (found == NULL)
5866 return (ISC_R_NOTFOUND);
5867
5868 if (NEGATIVE(found)) {
5869 /*
5870 * We found a negative cache entry.
5871 */
5872 if (NXDOMAIN(found))
5873 result = DNS_R_NCACHENXDOMAIN;
5874 else
5875 result = DNS_R_NCACHENXRRSET;
5876 }
5877
5878 update_cachestats(rbtdb, result);
5879
5880 return (result);
5881 }
5882
5883 static isc_result_t
allrdatasets(dns_db_t * db,dns_dbnode_t * node,dns_dbversion_t * version,isc_stdtime_t now,dns_rdatasetiter_t ** iteratorp)5884 allrdatasets(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5885 isc_stdtime_t now, dns_rdatasetiter_t **iteratorp)
5886 {
5887 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5888 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5889 rbtdb_version_t *rbtversion = version;
5890 rbtdb_rdatasetiter_t *iterator;
5891 unsigned int refs;
5892
5893 REQUIRE(VALID_RBTDB(rbtdb));
5894
5895 iterator = isc_mem_get(rbtdb->common.mctx, sizeof(*iterator));
5896 if (iterator == NULL)
5897 return (ISC_R_NOMEMORY);
5898
5899 if ((db->attributes & DNS_DBATTR_CACHE) == 0) {
5900 now = 0;
5901 if (rbtversion == NULL)
5902 currentversion(db,
5903 (dns_dbversion_t **) (void *)(&rbtversion));
5904 else {
5905 INSIST(rbtversion->rbtdb == rbtdb);
5906
5907 isc_refcount_increment(&rbtversion->references,
5908 &refs);
5909 INSIST(refs > 1);
5910 }
5911 } else {
5912 if (now == 0)
5913 isc_stdtime_get(&now);
5914 rbtversion = NULL;
5915 }
5916
5917 iterator->common.magic = DNS_RDATASETITER_MAGIC;
5918 iterator->common.methods = &rdatasetiter_methods;
5919 iterator->common.db = db;
5920 iterator->common.node = node;
5921 iterator->common.version = (dns_dbversion_t *)rbtversion;
5922 iterator->common.now = now;
5923
5924 NODE_STRONGLOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
5925
5926 dns_rbtnode_refincrement(rbtnode, &refs);
5927 INSIST(refs != 0);
5928
5929 iterator->current = NULL;
5930
5931 NODE_STRONGUNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
5932
5933 *iteratorp = (dns_rdatasetiter_t *)iterator;
5934
5935 return (ISC_R_SUCCESS);
5936 }
5937
5938 static isc_boolean_t
cname_and_other_data(dns_rbtnode_t * node,rbtdb_serial_t serial)5939 cname_and_other_data(dns_rbtnode_t *node, rbtdb_serial_t serial) {
5940 rdatasetheader_t *header, *header_next;
5941 isc_boolean_t cname, other_data;
5942 dns_rdatatype_t rdtype;
5943
5944 /*
5945 * The caller must hold the node lock.
5946 */
5947
5948 /*
5949 * Look for CNAME and "other data" rdatasets active in our version.
5950 */
5951 cname = ISC_FALSE;
5952 other_data = ISC_FALSE;
5953 for (header = node->data; header != NULL; header = header_next) {
5954 header_next = header->next;
5955 if (header->type == dns_rdatatype_cname) {
5956 /*
5957 * Look for an active extant CNAME.
5958 */
5959 do {
5960 if (header->serial <= serial &&
5961 !IGNORE(header)) {
5962 /*
5963 * Is this a "this rdataset doesn't
5964 * exist" record?
5965 */
5966 if (NONEXISTENT(header))
5967 header = NULL;
5968 break;
5969 } else
5970 header = header->down;
5971 } while (header != NULL);
5972 if (header != NULL)
5973 cname = ISC_TRUE;
5974 } else {
5975 /*
5976 * Look for active extant "other data".
5977 *
5978 * "Other data" is any rdataset whose type is not
5979 * KEY, NSEC, SIG or RRSIG.
5980 */
5981 rdtype = RBTDB_RDATATYPE_BASE(header->type);
5982 if (rdtype != dns_rdatatype_key &&
5983 rdtype != dns_rdatatype_sig &&
5984 rdtype != dns_rdatatype_nsec &&
5985 rdtype != dns_rdatatype_rrsig) {
5986 /*
5987 * Is it active and extant?
5988 */
5989 do {
5990 if (header->serial <= serial &&
5991 !IGNORE(header)) {
5992 /*
5993 * Is this a "this rdataset
5994 * doesn't exist" record?
5995 */
5996 if (NONEXISTENT(header))
5997 header = NULL;
5998 break;
5999 } else
6000 header = header->down;
6001 } while (header != NULL);
6002 if (header != NULL)
6003 other_data = ISC_TRUE;
6004 }
6005 }
6006 }
6007
6008 if (cname && other_data)
6009 return (ISC_TRUE);
6010
6011 return (ISC_FALSE);
6012 }
6013
6014 static isc_result_t
resign_insert(dns_rbtdb_t * rbtdb,int idx,rdatasetheader_t * newheader)6015 resign_insert(dns_rbtdb_t *rbtdb, int idx, rdatasetheader_t *newheader) {
6016 isc_result_t result;
6017
6018 INSIST(!IS_CACHE(rbtdb));
6019 INSIST(newheader->heap_index == 0);
6020 INSIST(!ISC_LINK_LINKED(newheader, link));
6021
6022 result = isc_heap_insert(rbtdb->heaps[idx], newheader);
6023 return (result);
6024 }
6025
6026 static void
resign_delete(dns_rbtdb_t * rbtdb,rbtdb_version_t * version,rdatasetheader_t * header)6027 resign_delete(dns_rbtdb_t *rbtdb, rbtdb_version_t *version,
6028 rdatasetheader_t *header)
6029 {
6030 /*
6031 * Remove the old header from the heap
6032 */
6033 if (header != NULL && header->heap_index != 0) {
6034 isc_heap_delete(rbtdb->heaps[header->node->locknum],
6035 header->heap_index);
6036 header->heap_index = 0;
6037 if (version != NULL) {
6038 new_reference(rbtdb, header->node);
6039 ISC_LIST_APPEND(version->resigned_list, header, link);
6040 }
6041 }
6042 }
6043
6044 static isc_result_t
add32(dns_rbtdb_t * rbtdb,dns_rbtnode_t * rbtnode,rbtdb_version_t * rbtversion,rdatasetheader_t * newheader,unsigned int options,isc_boolean_t loading,dns_rdataset_t * addedrdataset,isc_stdtime_t now)6045 add32(dns_rbtdb_t *rbtdb, dns_rbtnode_t *rbtnode, rbtdb_version_t *rbtversion,
6046 rdatasetheader_t *newheader, unsigned int options, isc_boolean_t loading,
6047 dns_rdataset_t *addedrdataset, isc_stdtime_t now)
6048 {
6049 rbtdb_changed_t *changed = NULL;
6050 rdatasetheader_t *topheader, *topheader_prev, *header, *sigheader;
6051 unsigned char *merged;
6052 isc_result_t result;
6053 isc_boolean_t header_nx;
6054 isc_boolean_t newheader_nx;
6055 isc_boolean_t merge;
6056 dns_rdatatype_t rdtype, covers;
6057 rbtdb_rdatatype_t negtype, sigtype;
6058 dns_trust_t trust;
6059 int idx;
6060
6061 /*
6062 * Add an rdatasetheader_t to a node.
6063 */
6064
6065 /*
6066 * Caller must be holding the node lock.
6067 */
6068
6069 if ((options & DNS_DBADD_MERGE) != 0) {
6070 REQUIRE(rbtversion != NULL);
6071 merge = ISC_TRUE;
6072 } else
6073 merge = ISC_FALSE;
6074
6075 if ((options & DNS_DBADD_FORCE) != 0)
6076 trust = dns_trust_ultimate;
6077 else
6078 trust = newheader->trust;
6079
6080 if (rbtversion != NULL && !loading) {
6081 /*
6082 * We always add a changed record, even if no changes end up
6083 * being made to this node, because it's harmless and
6084 * simplifies the code.
6085 */
6086 changed = add_changed(rbtdb, rbtversion, rbtnode);
6087 if (changed == NULL) {
6088 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6089 return (ISC_R_NOMEMORY);
6090 }
6091 }
6092
6093 newheader_nx = NONEXISTENT(newheader) ? ISC_TRUE : ISC_FALSE;
6094 topheader_prev = NULL;
6095 sigheader = NULL;
6096 negtype = 0;
6097 if (rbtversion == NULL && !newheader_nx) {
6098 rdtype = RBTDB_RDATATYPE_BASE(newheader->type);
6099 covers = RBTDB_RDATATYPE_EXT(newheader->type);
6100 sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, covers);
6101 if (NEGATIVE(newheader)) {
6102 /*
6103 * We're adding a negative cache entry.
6104 */
6105 for (topheader = rbtnode->data;
6106 topheader != NULL;
6107 topheader = topheader->next) {
6108 /*
6109 * If we're adding an negative cache entry
6110 * which covers all types (NXDOMAIN,
6111 * NODATA(QTYPE=ANY)).
6112 *
6113 * We make all other data stale so that the
6114 * only rdataset that can be found at this
6115 * node is the negative cache entry.
6116 *
6117 * Otherwise look for any RRSIGs of the
6118 * given type so they can be marked stale
6119 * later.
6120 */
6121 if (covers == dns_rdatatype_any) {
6122 set_ttl(rbtdb, topheader, 0);
6123 mark_stale_header(rbtdb, topheader);
6124 } else if (topheader->type == sigtype)
6125 sigheader = topheader;
6126 }
6127 if (covers == dns_rdatatype_any)
6128 goto find_header;
6129 negtype = RBTDB_RDATATYPE_VALUE(covers, 0);
6130 } else {
6131 /*
6132 * We're adding something that isn't a
6133 * negative cache entry. Look for an extant
6134 * non-stale NXDOMAIN/NODATA(QTYPE=ANY) negative
6135 * cache entry. If we're adding an RRSIG, also
6136 * check for an extant non-stale NODATA ncache
6137 * entry which covers the same type as the RRSIG.
6138 */
6139 for (topheader = rbtnode->data;
6140 topheader != NULL;
6141 topheader = topheader->next) {
6142 if ((topheader->type ==
6143 RBTDB_RDATATYPE_NCACHEANY) ||
6144 (newheader->type == sigtype &&
6145 topheader->type ==
6146 RBTDB_RDATATYPE_VALUE(0, covers))) {
6147 break;
6148 }
6149 }
6150 if (topheader != NULL && EXISTS(topheader) &&
6151 topheader->rdh_ttl >= now) {
6152 /*
6153 * Found one.
6154 */
6155 if (trust < topheader->trust) {
6156 /*
6157 * The NXDOMAIN/NODATA(QTYPE=ANY)
6158 * is more trusted.
6159 */
6160 free_rdataset(rbtdb,
6161 rbtdb->common.mctx,
6162 newheader);
6163 if (addedrdataset != NULL)
6164 bind_rdataset(rbtdb, rbtnode,
6165 topheader, now,
6166 addedrdataset);
6167 return (DNS_R_UNCHANGED);
6168 }
6169 /*
6170 * The new rdataset is better. Expire the
6171 * ncache entry.
6172 */
6173 set_ttl(rbtdb, topheader, 0);
6174 mark_stale_header(rbtdb, topheader);
6175 topheader = NULL;
6176 goto find_header;
6177 }
6178 negtype = RBTDB_RDATATYPE_VALUE(0, rdtype);
6179 }
6180 }
6181
6182 for (topheader = rbtnode->data;
6183 topheader != NULL;
6184 topheader = topheader->next) {
6185 if (topheader->type == newheader->type ||
6186 topheader->type == negtype)
6187 break;
6188 topheader_prev = topheader;
6189 }
6190
6191 find_header:
6192 /*
6193 * If header isn't NULL, we've found the right type. There may be
6194 * IGNORE rdatasets between the top of the chain and the first real
6195 * data. We skip over them.
6196 */
6197 header = topheader;
6198 while (header != NULL && IGNORE(header))
6199 header = header->down;
6200 if (header != NULL) {
6201 header_nx = NONEXISTENT(header) ? ISC_TRUE : ISC_FALSE;
6202
6203 /*
6204 * Deleting an already non-existent rdataset has no effect.
6205 */
6206 if (header_nx && newheader_nx) {
6207 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6208 return (DNS_R_UNCHANGED);
6209 }
6210
6211 /*
6212 * Trying to add an rdataset with lower trust to a cache DB
6213 * has no effect, provided that the cache data isn't stale.
6214 */
6215 if (rbtversion == NULL && trust < header->trust &&
6216 (header->rdh_ttl >= now || header_nx)) {
6217 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6218 if (addedrdataset != NULL)
6219 bind_rdataset(rbtdb, rbtnode, header, now,
6220 addedrdataset);
6221 return (DNS_R_UNCHANGED);
6222 }
6223
6224 /*
6225 * Don't merge if a nonexistent rdataset is involved.
6226 */
6227 if (merge && (header_nx || newheader_nx))
6228 merge = ISC_FALSE;
6229
6230 /*
6231 * If 'merge' is ISC_TRUE, we'll try to create a new rdataset
6232 * that is the union of 'newheader' and 'header'.
6233 */
6234 if (merge) {
6235 unsigned int flags = 0;
6236 INSIST(rbtversion->serial >= header->serial);
6237 merged = NULL;
6238 result = ISC_R_SUCCESS;
6239
6240 if ((options & DNS_DBADD_EXACT) != 0)
6241 flags |= DNS_RDATASLAB_EXACT;
6242 if ((options & DNS_DBADD_EXACTTTL) != 0 &&
6243 newheader->rdh_ttl != header->rdh_ttl)
6244 result = DNS_R_NOTEXACT;
6245 else if (newheader->rdh_ttl != header->rdh_ttl)
6246 flags |= DNS_RDATASLAB_FORCE;
6247 if (result == ISC_R_SUCCESS)
6248 result = dns_rdataslab_merge(
6249 (unsigned char *)header,
6250 (unsigned char *)newheader,
6251 (unsigned int)(sizeof(*newheader)),
6252 rbtdb->common.mctx,
6253 rbtdb->common.rdclass,
6254 (dns_rdatatype_t)header->type,
6255 flags, &merged);
6256 if (result == ISC_R_SUCCESS) {
6257 /*
6258 * If 'header' has the same serial number as
6259 * we do, we could clean it up now if we knew
6260 * that our caller had no references to it.
6261 * We don't know this, however, so we leave it
6262 * alone. It will get cleaned up when
6263 * clean_zone_node() runs.
6264 */
6265 free_rdataset(rbtdb, rbtdb->common.mctx,
6266 newheader);
6267 newheader = (rdatasetheader_t *)merged;
6268 init_rdataset(rbtdb, newheader);
6269 update_newheader(newheader, header);
6270 if (loading && RESIGN(newheader) &&
6271 RESIGN(header) &&
6272 header->resign < newheader->resign)
6273 newheader->resign = header->resign;
6274 } else {
6275 free_rdataset(rbtdb, rbtdb->common.mctx,
6276 newheader);
6277 return (result);
6278 }
6279 }
6280 /*
6281 * Don't replace existing NS, A and AAAA RRsets
6282 * in the cache if they are already exist. This
6283 * prevents named being locked to old servers.
6284 * Don't lower trust of existing record if the
6285 * update is forced.
6286 */
6287 if (IS_CACHE(rbtdb) && header->rdh_ttl >= now &&
6288 header->type == dns_rdatatype_ns &&
6289 !header_nx && !newheader_nx &&
6290 header->trust >= newheader->trust &&
6291 dns_rdataslab_equalx((unsigned char *)header,
6292 (unsigned char *)newheader,
6293 (unsigned int)(sizeof(*newheader)),
6294 rbtdb->common.rdclass,
6295 (dns_rdatatype_t)header->type)) {
6296 /*
6297 * Honour the new ttl if it is less than the
6298 * older one.
6299 */
6300 if (header->rdh_ttl > newheader->rdh_ttl)
6301 set_ttl(rbtdb, header, newheader->rdh_ttl);
6302 if (header->noqname == NULL &&
6303 newheader->noqname != NULL) {
6304 header->noqname = newheader->noqname;
6305 newheader->noqname = NULL;
6306 }
6307 if (header->closest == NULL &&
6308 newheader->closest != NULL) {
6309 header->closest = newheader->closest;
6310 newheader->closest = NULL;
6311 }
6312 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6313 if (addedrdataset != NULL)
6314 bind_rdataset(rbtdb, rbtnode, header, now,
6315 addedrdataset);
6316 return (ISC_R_SUCCESS);
6317 }
6318 /*
6319 * If we have will be replacing a NS RRset force its TTL
6320 * to be no more than the current NS RRset's TTL. This
6321 * ensures the delegations that are withdrawn are honoured.
6322 */
6323 if (IS_CACHE(rbtdb) && header->rdh_ttl >= now &&
6324 header->type == dns_rdatatype_ns &&
6325 !header_nx && !newheader_nx &&
6326 header->trust <= newheader->trust) {
6327 if (newheader->rdh_ttl > header->rdh_ttl) {
6328 newheader->rdh_ttl = header->rdh_ttl;
6329 }
6330 }
6331 if (IS_CACHE(rbtdb) && header->rdh_ttl >= now &&
6332 (options & DNS_DBADD_PREFETCH) == 0 &&
6333 (header->type == dns_rdatatype_a ||
6334 header->type == dns_rdatatype_aaaa ||
6335 header->type == dns_rdatatype_ds ||
6336 header->type == RBTDB_RDATATYPE_SIGDDS) &&
6337 !header_nx && !newheader_nx &&
6338 header->trust >= newheader->trust &&
6339 dns_rdataslab_equal((unsigned char *)header,
6340 (unsigned char *)newheader,
6341 (unsigned int)(sizeof(*newheader)))) {
6342 /*
6343 * Honour the new ttl if it is less than the
6344 * older one.
6345 */
6346 if (header->rdh_ttl > newheader->rdh_ttl)
6347 set_ttl(rbtdb, header, newheader->rdh_ttl);
6348 if (header->noqname == NULL &&
6349 newheader->noqname != NULL) {
6350 header->noqname = newheader->noqname;
6351 newheader->noqname = NULL;
6352 }
6353 if (header->closest == NULL &&
6354 newheader->closest != NULL) {
6355 header->closest = newheader->closest;
6356 newheader->closest = NULL;
6357 }
6358 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6359 if (addedrdataset != NULL)
6360 bind_rdataset(rbtdb, rbtnode, header, now,
6361 addedrdataset);
6362 return (ISC_R_SUCCESS);
6363 }
6364 INSIST(rbtversion == NULL ||
6365 rbtversion->serial >= topheader->serial);
6366 if (topheader_prev != NULL)
6367 topheader_prev->next = newheader;
6368 else
6369 rbtnode->data = newheader;
6370 newheader->next = topheader->next;
6371 if (loading) {
6372 /*
6373 * There are no other references to 'header' when
6374 * loading, so we MAY clean up 'header' now.
6375 * Since we don't generate changed records when
6376 * loading, we MUST clean up 'header' now.
6377 */
6378 newheader->down = NULL;
6379 free_rdataset(rbtdb, rbtdb->common.mctx, header);
6380
6381 idx = newheader->node->locknum;
6382 if (IS_CACHE(rbtdb)) {
6383 ISC_LIST_PREPEND(rbtdb->rdatasets[idx],
6384 newheader, link);
6385 INSIST(rbtdb->heaps != NULL);
6386 (void)isc_heap_insert(rbtdb->heaps[idx],
6387 newheader);
6388 } else if (RESIGN(newheader)) {
6389 result = resign_insert(rbtdb, idx, newheader);
6390 if (result != ISC_R_SUCCESS)
6391 return (result);
6392 }
6393 } else {
6394 newheader->down = topheader;
6395 topheader->next = newheader;
6396 rbtnode->dirty = 1;
6397 if (changed != NULL)
6398 changed->dirty = ISC_TRUE;
6399 if (rbtversion == NULL) {
6400 set_ttl(rbtdb, header, 0);
6401 mark_stale_header(rbtdb, header);
6402 if (sigheader != NULL) {
6403 set_ttl(rbtdb, sigheader, 0);
6404 mark_stale_header(rbtdb, sigheader);
6405 }
6406 }
6407 idx = newheader->node->locknum;
6408 if (IS_CACHE(rbtdb)) {
6409 ISC_LIST_PREPEND(rbtdb->rdatasets[idx],
6410 newheader, link);
6411 /*
6412 * XXXMLG We don't check the return value
6413 * here. If it fails, we will not do TTL
6414 * based expiry on this node. However, we
6415 * will do it on the LRU side, so memory
6416 * will not leak... for long.
6417 */
6418 INSIST(rbtdb->heaps != NULL);
6419 (void)isc_heap_insert(rbtdb->heaps[idx],
6420 newheader);
6421 } else if (RESIGN(newheader)) {
6422 resign_delete(rbtdb, rbtversion, header);
6423 result = resign_insert(rbtdb, idx, newheader);
6424 if (result != ISC_R_SUCCESS)
6425 return (result);
6426 }
6427 }
6428 } else {
6429 /*
6430 * No non-IGNORED rdatasets of the given type exist at
6431 * this node.
6432 */
6433
6434 /*
6435 * If we're trying to delete the type, don't bother.
6436 */
6437 if (newheader_nx) {
6438 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6439 return (DNS_R_UNCHANGED);
6440 }
6441
6442 if (topheader != NULL) {
6443 /*
6444 * We have an list of rdatasets of the given type,
6445 * but they're all marked IGNORE. We simply insert
6446 * the new rdataset at the head of the list.
6447 *
6448 * Ignored rdatasets cannot occur during loading, so
6449 * we INSIST on it.
6450 */
6451 INSIST(!loading);
6452 INSIST(rbtversion == NULL ||
6453 rbtversion->serial >= topheader->serial);
6454 if (topheader_prev != NULL)
6455 topheader_prev->next = newheader;
6456 else
6457 rbtnode->data = newheader;
6458 newheader->next = topheader->next;
6459 newheader->down = topheader;
6460 topheader->next = newheader;
6461 rbtnode->dirty = 1;
6462 if (changed != NULL)
6463 changed->dirty = ISC_TRUE;
6464 } else {
6465 /*
6466 * No rdatasets of the given type exist at the node.
6467 */
6468 newheader->next = rbtnode->data;
6469 newheader->down = NULL;
6470 rbtnode->data = newheader;
6471 }
6472 idx = newheader->node->locknum;
6473 if (IS_CACHE(rbtdb)) {
6474 ISC_LIST_PREPEND(rbtdb->rdatasets[idx],
6475 newheader, link);
6476 isc_heap_insert(rbtdb->heaps[idx], newheader);
6477 } else if (RESIGN(newheader)) {
6478 resign_delete(rbtdb, rbtversion, header);
6479 result = resign_insert(rbtdb, idx, newheader);
6480 if (result != ISC_R_SUCCESS)
6481 return (result);
6482 }
6483 }
6484
6485 /*
6486 * Check if the node now contains CNAME and other data.
6487 */
6488 if (rbtversion != NULL &&
6489 cname_and_other_data(rbtnode, rbtversion->serial))
6490 return (DNS_R_CNAMEANDOTHER);
6491
6492 if (addedrdataset != NULL)
6493 bind_rdataset(rbtdb, rbtnode, newheader, now, addedrdataset);
6494
6495 return (ISC_R_SUCCESS);
6496 }
6497
6498 static inline isc_boolean_t
delegating_type(dns_rbtdb_t * rbtdb,dns_rbtnode_t * node,rbtdb_rdatatype_t type)6499 delegating_type(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
6500 rbtdb_rdatatype_t type)
6501 {
6502 if (IS_CACHE(rbtdb)) {
6503 if (type == dns_rdatatype_dname)
6504 return (ISC_TRUE);
6505 else
6506 return (ISC_FALSE);
6507 } else if (type == dns_rdatatype_dname ||
6508 (type == dns_rdatatype_ns &&
6509 (node != rbtdb->origin_node || IS_STUB(rbtdb))))
6510 return (ISC_TRUE);
6511 return (ISC_FALSE);
6512 }
6513
6514 static inline isc_result_t
addnoqname(dns_rbtdb_t * rbtdb,rdatasetheader_t * newheader,dns_rdataset_t * rdataset)6515 addnoqname(dns_rbtdb_t *rbtdb, rdatasetheader_t *newheader,
6516 dns_rdataset_t *rdataset)
6517 {
6518 struct noqname *noqname;
6519 isc_mem_t *mctx = rbtdb->common.mctx;
6520 dns_name_t name;
6521 dns_rdataset_t neg, negsig;
6522 isc_result_t result;
6523 isc_region_t r;
6524
6525 dns_name_init(&name, NULL);
6526 dns_rdataset_init(&neg);
6527 dns_rdataset_init(&negsig);
6528
6529 result = dns_rdataset_getnoqname(rdataset, &name, &neg, &negsig);
6530 RUNTIME_CHECK(result == ISC_R_SUCCESS);
6531
6532 noqname = isc_mem_get(mctx, sizeof(*noqname));
6533 if (noqname == NULL) {
6534 result = ISC_R_NOMEMORY;
6535 goto cleanup;
6536 }
6537 dns_name_init(&noqname->name, NULL);
6538 noqname->neg = NULL;
6539 noqname->negsig = NULL;
6540 noqname->type = neg.type;
6541 result = dns_name_dup(&name, mctx, &noqname->name);
6542 if (result != ISC_R_SUCCESS)
6543 goto cleanup;
6544 result = dns_rdataslab_fromrdataset(&neg, mctx, &r, 0);
6545 if (result != ISC_R_SUCCESS)
6546 goto cleanup;
6547 noqname->neg = r.base;
6548 result = dns_rdataslab_fromrdataset(&negsig, mctx, &r, 0);
6549 if (result != ISC_R_SUCCESS)
6550 goto cleanup;
6551 noqname->negsig = r.base;
6552 dns_rdataset_disassociate(&neg);
6553 dns_rdataset_disassociate(&negsig);
6554 newheader->noqname = noqname;
6555 return (ISC_R_SUCCESS);
6556
6557 cleanup:
6558 dns_rdataset_disassociate(&neg);
6559 dns_rdataset_disassociate(&negsig);
6560 if (noqname != NULL)
6561 free_noqname(mctx, &noqname);
6562 return(result);
6563 }
6564
6565 static inline isc_result_t
addclosest(dns_rbtdb_t * rbtdb,rdatasetheader_t * newheader,dns_rdataset_t * rdataset)6566 addclosest(dns_rbtdb_t *rbtdb, rdatasetheader_t *newheader,
6567 dns_rdataset_t *rdataset)
6568 {
6569 struct noqname *closest;
6570 isc_mem_t *mctx = rbtdb->common.mctx;
6571 dns_name_t name;
6572 dns_rdataset_t neg, negsig;
6573 isc_result_t result;
6574 isc_region_t r;
6575
6576 dns_name_init(&name, NULL);
6577 dns_rdataset_init(&neg);
6578 dns_rdataset_init(&negsig);
6579
6580 result = dns_rdataset_getclosest(rdataset, &name, &neg, &negsig);
6581 RUNTIME_CHECK(result == ISC_R_SUCCESS);
6582
6583 closest = isc_mem_get(mctx, sizeof(*closest));
6584 if (closest == NULL) {
6585 result = ISC_R_NOMEMORY;
6586 goto cleanup;
6587 }
6588 dns_name_init(&closest->name, NULL);
6589 closest->neg = NULL;
6590 closest->negsig = NULL;
6591 closest->type = neg.type;
6592 result = dns_name_dup(&name, mctx, &closest->name);
6593 if (result != ISC_R_SUCCESS)
6594 goto cleanup;
6595 result = dns_rdataslab_fromrdataset(&neg, mctx, &r, 0);
6596 if (result != ISC_R_SUCCESS)
6597 goto cleanup;
6598 closest->neg = r.base;
6599 result = dns_rdataslab_fromrdataset(&negsig, mctx, &r, 0);
6600 if (result != ISC_R_SUCCESS)
6601 goto cleanup;
6602 closest->negsig = r.base;
6603 dns_rdataset_disassociate(&neg);
6604 dns_rdataset_disassociate(&negsig);
6605 newheader->closest = closest;
6606 return (ISC_R_SUCCESS);
6607
6608 cleanup:
6609 dns_rdataset_disassociate(&neg);
6610 dns_rdataset_disassociate(&negsig);
6611 if (closest != NULL)
6612 free_noqname(mctx, &closest);
6613 return(result);
6614 }
6615
6616 static dns_dbmethods_t zone_methods;
6617
6618 static isc_result_t
addrdataset(dns_db_t * db,dns_dbnode_t * node,dns_dbversion_t * version,isc_stdtime_t now,dns_rdataset_t * rdataset,unsigned int options,dns_rdataset_t * addedrdataset)6619 addrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
6620 isc_stdtime_t now, dns_rdataset_t *rdataset, unsigned int options,
6621 dns_rdataset_t *addedrdataset)
6622 {
6623 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6624 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
6625 rbtdb_version_t *rbtversion = version;
6626 isc_region_t region;
6627 rdatasetheader_t *newheader;
6628 rdatasetheader_t *header;
6629 isc_result_t result;
6630 isc_boolean_t delegating;
6631 isc_boolean_t newnsec;
6632 isc_boolean_t tree_locked = ISC_FALSE;
6633 isc_boolean_t cache_is_overmem = ISC_FALSE;
6634
6635 REQUIRE(VALID_RBTDB(rbtdb));
6636 INSIST(rbtversion == NULL || rbtversion->rbtdb == rbtdb);
6637
6638 if (rbtdb->common.methods == &zone_methods)
6639 REQUIRE(((rbtnode->nsec == DNS_RBT_NSEC_NSEC3 &&
6640 (rdataset->type == dns_rdatatype_nsec3 ||
6641 rdataset->covers == dns_rdatatype_nsec3)) ||
6642 (rbtnode->nsec != DNS_RBT_NSEC_NSEC3 &&
6643 rdataset->type != dns_rdatatype_nsec3 &&
6644 rdataset->covers != dns_rdatatype_nsec3)));
6645
6646 if (rbtversion == NULL) {
6647 if (now == 0)
6648 isc_stdtime_get(&now);
6649 } else
6650 now = 0;
6651
6652 result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
6653 ®ion, sizeof(rdatasetheader_t));
6654 if (result != ISC_R_SUCCESS)
6655 return (result);
6656
6657 newheader = (rdatasetheader_t *)region.base;
6658 init_rdataset(rbtdb, newheader);
6659 set_ttl(rbtdb, newheader, rdataset->ttl + now);
6660 newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
6661 rdataset->covers);
6662 newheader->attributes = 0;
6663 newheader->noqname = NULL;
6664 newheader->closest = NULL;
6665 newheader->count = init_count++;
6666 newheader->trust = rdataset->trust;
6667 newheader->additional_auth = NULL;
6668 newheader->additional_glue = NULL;
6669 newheader->last_used = now;
6670 newheader->node = rbtnode;
6671 if (rbtversion != NULL) {
6672 newheader->serial = rbtversion->serial;
6673 now = 0;
6674
6675 if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) {
6676 newheader->attributes |= RDATASET_ATTR_RESIGN;
6677 newheader->resign = rdataset->resign;
6678 } else
6679 newheader->resign = 0;
6680 } else {
6681 newheader->serial = 1;
6682 newheader->resign = 0;
6683 if ((rdataset->attributes & DNS_RDATASETATTR_PREFETCH) != 0)
6684 newheader->attributes |= RDATASET_ATTR_PREFETCH;
6685 if ((rdataset->attributes & DNS_RDATASETATTR_NEGATIVE) != 0)
6686 newheader->attributes |= RDATASET_ATTR_NEGATIVE;
6687 if ((rdataset->attributes & DNS_RDATASETATTR_NXDOMAIN) != 0)
6688 newheader->attributes |= RDATASET_ATTR_NXDOMAIN;
6689 if ((rdataset->attributes & DNS_RDATASETATTR_OPTOUT) != 0)
6690 newheader->attributes |= RDATASET_ATTR_OPTOUT;
6691 if ((rdataset->attributes & DNS_RDATASETATTR_NOQNAME) != 0) {
6692 result = addnoqname(rbtdb, newheader, rdataset);
6693 if (result != ISC_R_SUCCESS) {
6694 free_rdataset(rbtdb, rbtdb->common.mctx,
6695 newheader);
6696 return (result);
6697 }
6698 }
6699 if ((rdataset->attributes & DNS_RDATASETATTR_CLOSEST) != 0) {
6700 result = addclosest(rbtdb, newheader, rdataset);
6701 if (result != ISC_R_SUCCESS) {
6702 free_rdataset(rbtdb, rbtdb->common.mctx,
6703 newheader);
6704 return (result);
6705 }
6706 }
6707 }
6708
6709 /*
6710 * If we're adding a delegation type (e.g. NS or DNAME for a zone,
6711 * just DNAME for the cache), then we need to set the callback bit
6712 * on the node.
6713 */
6714 if (delegating_type(rbtdb, rbtnode, rdataset->type))
6715 delegating = ISC_TRUE;
6716 else
6717 delegating = ISC_FALSE;
6718
6719 /*
6720 * Add to the auxiliary NSEC tree if we're adding an NSEC record.
6721 */
6722 if (rbtnode->nsec != DNS_RBT_NSEC_HAS_NSEC &&
6723 rdataset->type == dns_rdatatype_nsec)
6724 newnsec = ISC_TRUE;
6725 else
6726 newnsec = ISC_FALSE;
6727
6728 /*
6729 * If we're adding a delegation type, adding to the auxiliary NSEC tree,
6730 * or the DB is a cache in an overmem state, hold an exclusive lock on
6731 * the tree. In the latter case the lock does not necessarily have to
6732 * be acquired but it will help purge stale entries more effectively.
6733 */
6734 if (IS_CACHE(rbtdb) && isc_mem_isovermem(rbtdb->common.mctx))
6735 cache_is_overmem = ISC_TRUE;
6736 if (delegating || newnsec || cache_is_overmem) {
6737 tree_locked = ISC_TRUE;
6738 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6739 }
6740
6741 if (cache_is_overmem)
6742 overmem_purge(rbtdb, rbtnode->locknum, now, tree_locked);
6743
6744 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6745 isc_rwlocktype_write);
6746
6747 if (rbtdb->rrsetstats != NULL) {
6748 newheader->attributes |= RDATASET_ATTR_STATCOUNT;
6749 update_rrsetstats(rbtdb, newheader, ISC_TRUE);
6750 }
6751
6752 if (IS_CACHE(rbtdb)) {
6753 if (tree_locked)
6754 cleanup_dead_nodes(rbtdb, rbtnode->locknum);
6755
6756 header = isc_heap_element(rbtdb->heaps[rbtnode->locknum], 1);
6757 if (header && header->rdh_ttl < now - RBTDB_VIRTUAL)
6758 expire_header(rbtdb, header, tree_locked,
6759 expire_ttl);
6760
6761 /*
6762 * If we've been holding a write lock on the tree just for
6763 * cleaning, we can release it now. However, we still need the
6764 * node lock.
6765 */
6766 if (tree_locked && !delegating && !newnsec) {
6767 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6768 tree_locked = ISC_FALSE;
6769 }
6770 }
6771
6772 result = ISC_R_SUCCESS;
6773 if (newnsec) {
6774 dns_fixedname_t fname;
6775 dns_name_t *name;
6776 dns_rbtnode_t *nsecnode;
6777
6778 dns_fixedname_init(&fname);
6779 name = dns_fixedname_name(&fname);
6780 dns_rbt_fullnamefromnode(rbtnode, name);
6781 nsecnode = NULL;
6782 result = dns_rbt_addnode(rbtdb->nsec, name, &nsecnode);
6783 if (result == ISC_R_SUCCESS) {
6784 nsecnode->nsec = DNS_RBT_NSEC_NSEC;
6785 rbtnode->nsec = DNS_RBT_NSEC_HAS_NSEC;
6786 } else if (result == ISC_R_EXISTS) {
6787 rbtnode->nsec = DNS_RBT_NSEC_HAS_NSEC;
6788 result = ISC_R_SUCCESS;
6789 }
6790 }
6791
6792 if (result == ISC_R_SUCCESS)
6793 result = add32(rbtdb, rbtnode, rbtversion, newheader, options,
6794 ISC_FALSE, addedrdataset, now);
6795 if (result == ISC_R_SUCCESS && delegating)
6796 rbtnode->find_callback = 1;
6797
6798 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6799 isc_rwlocktype_write);
6800
6801 if (tree_locked)
6802 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6803
6804 /*
6805 * Update the zone's secure status. If version is non-NULL
6806 * this is deferred until closeversion() is called.
6807 */
6808 if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
6809 iszonesecure(db, version, rbtdb->origin_node);
6810
6811 return (result);
6812 }
6813
6814 static isc_result_t
subtractrdataset(dns_db_t * db,dns_dbnode_t * node,dns_dbversion_t * version,dns_rdataset_t * rdataset,unsigned int options,dns_rdataset_t * newrdataset)6815 subtractrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
6816 dns_rdataset_t *rdataset, unsigned int options,
6817 dns_rdataset_t *newrdataset)
6818 {
6819 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6820 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
6821 rbtdb_version_t *rbtversion = version;
6822 rdatasetheader_t *topheader, *topheader_prev, *header, *newheader;
6823 unsigned char *subresult;
6824 isc_region_t region;
6825 isc_result_t result;
6826 rbtdb_changed_t *changed;
6827
6828 REQUIRE(VALID_RBTDB(rbtdb));
6829 REQUIRE(rbtversion != NULL && rbtversion->rbtdb == rbtdb);
6830
6831 if (rbtdb->common.methods == &zone_methods)
6832 REQUIRE(((rbtnode->nsec == DNS_RBT_NSEC_NSEC3 &&
6833 (rdataset->type == dns_rdatatype_nsec3 ||
6834 rdataset->covers == dns_rdatatype_nsec3)) ||
6835 (rbtnode->nsec != DNS_RBT_NSEC_NSEC3 &&
6836 rdataset->type != dns_rdatatype_nsec3 &&
6837 rdataset->covers != dns_rdatatype_nsec3)));
6838
6839 result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
6840 ®ion,
6841 sizeof(rdatasetheader_t));
6842 if (result != ISC_R_SUCCESS)
6843 return (result);
6844 newheader = (rdatasetheader_t *)region.base;
6845 init_rdataset(rbtdb, newheader);
6846 set_ttl(rbtdb, newheader, rdataset->ttl);
6847 newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
6848 rdataset->covers);
6849 newheader->attributes = 0;
6850 newheader->serial = rbtversion->serial;
6851 newheader->trust = 0;
6852 newheader->noqname = NULL;
6853 newheader->closest = NULL;
6854 newheader->count = init_count++;
6855 newheader->additional_auth = NULL;
6856 newheader->additional_glue = NULL;
6857 newheader->last_used = 0;
6858 newheader->node = rbtnode;
6859 if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) {
6860 newheader->attributes |= RDATASET_ATTR_RESIGN;
6861 newheader->resign = rdataset->resign;
6862 } else
6863 newheader->resign = 0;
6864
6865 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6866 isc_rwlocktype_write);
6867
6868 changed = add_changed(rbtdb, rbtversion, rbtnode);
6869 if (changed == NULL) {
6870 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6871 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6872 isc_rwlocktype_write);
6873 return (ISC_R_NOMEMORY);
6874 }
6875
6876 topheader_prev = NULL;
6877 for (topheader = rbtnode->data;
6878 topheader != NULL;
6879 topheader = topheader->next) {
6880 if (topheader->type == newheader->type)
6881 break;
6882 topheader_prev = topheader;
6883 }
6884 /*
6885 * If header isn't NULL, we've found the right type. There may be
6886 * IGNORE rdatasets between the top of the chain and the first real
6887 * data. We skip over them.
6888 */
6889 header = topheader;
6890 while (header != NULL && IGNORE(header))
6891 header = header->down;
6892 if (header != NULL && EXISTS(header)) {
6893 unsigned int flags = 0;
6894 subresult = NULL;
6895 result = ISC_R_SUCCESS;
6896 if ((options & DNS_DBSUB_EXACT) != 0) {
6897 flags |= DNS_RDATASLAB_EXACT;
6898 if (newheader->rdh_ttl != header->rdh_ttl)
6899 result = DNS_R_NOTEXACT;
6900 }
6901 if (result == ISC_R_SUCCESS)
6902 result = dns_rdataslab_subtract(
6903 (unsigned char *)header,
6904 (unsigned char *)newheader,
6905 (unsigned int)(sizeof(*newheader)),
6906 rbtdb->common.mctx,
6907 rbtdb->common.rdclass,
6908 (dns_rdatatype_t)header->type,
6909 flags, &subresult);
6910 if (result == ISC_R_SUCCESS) {
6911 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6912 newheader = (rdatasetheader_t *)subresult;
6913 init_rdataset(rbtdb, newheader);
6914 update_newheader(newheader, header);
6915 /*
6916 * We have to set the serial since the rdataslab
6917 * subtraction routine copies the reserved portion of
6918 * header, not newheader.
6919 */
6920 newheader->serial = rbtversion->serial;
6921 /*
6922 * XXXJT: dns_rdataslab_subtract() copied the pointers
6923 * to additional info. We need to clear these fields
6924 * to avoid having duplicated references.
6925 */
6926 newheader->additional_auth = NULL;
6927 newheader->additional_glue = NULL;
6928 } else if (result == DNS_R_NXRRSET) {
6929 /*
6930 * This subtraction would remove all of the rdata;
6931 * add a nonexistent header instead.
6932 */
6933 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6934 newheader = new_rdataset(rbtdb, rbtdb->common.mctx);
6935 if (newheader == NULL) {
6936 result = ISC_R_NOMEMORY;
6937 goto unlock;
6938 }
6939 init_rdataset(rbtdb, newheader);
6940 set_ttl(rbtdb, newheader, 0);
6941 newheader->type = topheader->type;
6942 newheader->attributes = RDATASET_ATTR_NONEXISTENT;
6943 newheader->trust = 0;
6944 newheader->serial = rbtversion->serial;
6945 newheader->noqname = NULL;
6946 newheader->closest = NULL;
6947 newheader->count = 0;
6948 newheader->additional_auth = NULL;
6949 newheader->additional_glue = NULL;
6950 newheader->node = rbtnode;
6951 newheader->resign = 0;
6952 newheader->last_used = 0;
6953 } else {
6954 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6955 goto unlock;
6956 }
6957
6958 /*
6959 * If we're here, we want to link newheader in front of
6960 * topheader.
6961 */
6962 INSIST(rbtversion->serial >= topheader->serial);
6963 if (topheader_prev != NULL)
6964 topheader_prev->next = newheader;
6965 else
6966 rbtnode->data = newheader;
6967 newheader->next = topheader->next;
6968 newheader->down = topheader;
6969 topheader->next = newheader;
6970 rbtnode->dirty = 1;
6971 changed->dirty = ISC_TRUE;
6972 resign_delete(rbtdb, rbtversion, header);
6973 } else {
6974 /*
6975 * The rdataset doesn't exist, so we don't need to do anything
6976 * to satisfy the deletion request.
6977 */
6978 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6979 if ((options & DNS_DBSUB_EXACT) != 0)
6980 result = DNS_R_NOTEXACT;
6981 else
6982 result = DNS_R_UNCHANGED;
6983 }
6984
6985 if (result == ISC_R_SUCCESS && newrdataset != NULL)
6986 bind_rdataset(rbtdb, rbtnode, newheader, 0, newrdataset);
6987
6988 unlock:
6989 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6990 isc_rwlocktype_write);
6991
6992 /*
6993 * Update the zone's secure status. If version is non-NULL
6994 * this is deferred until closeversion() is called.
6995 */
6996 if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
6997 iszonesecure(db, rbtdb->current_version, rbtdb->origin_node);
6998
6999 return (result);
7000 }
7001
7002 static isc_result_t
deleterdataset(dns_db_t * db,dns_dbnode_t * node,dns_dbversion_t * version,dns_rdatatype_t type,dns_rdatatype_t covers)7003 deleterdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
7004 dns_rdatatype_t type, dns_rdatatype_t covers)
7005 {
7006 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
7007 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
7008 rbtdb_version_t *rbtversion = version;
7009 isc_result_t result;
7010 rdatasetheader_t *newheader;
7011
7012 REQUIRE(VALID_RBTDB(rbtdb));
7013 INSIST(rbtversion == NULL || rbtversion->rbtdb == rbtdb);
7014
7015 if (type == dns_rdatatype_any)
7016 return (ISC_R_NOTIMPLEMENTED);
7017 if (type == dns_rdatatype_rrsig && covers == 0)
7018 return (ISC_R_NOTIMPLEMENTED);
7019
7020 newheader = new_rdataset(rbtdb, rbtdb->common.mctx);
7021 if (newheader == NULL)
7022 return (ISC_R_NOMEMORY);
7023 init_rdataset(rbtdb, newheader);
7024 set_ttl(rbtdb, newheader, 0);
7025 newheader->type = RBTDB_RDATATYPE_VALUE(type, covers);
7026 newheader->attributes = RDATASET_ATTR_NONEXISTENT;
7027 newheader->trust = 0;
7028 newheader->noqname = NULL;
7029 newheader->closest = NULL;
7030 newheader->additional_auth = NULL;
7031 newheader->additional_glue = NULL;
7032 if (rbtversion != NULL)
7033 newheader->serial = rbtversion->serial;
7034 else
7035 newheader->serial = 0;
7036 newheader->count = 0;
7037 newheader->last_used = 0;
7038 newheader->node = rbtnode;
7039
7040 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7041 isc_rwlocktype_write);
7042
7043 result = add32(rbtdb, rbtnode, rbtversion, newheader, DNS_DBADD_FORCE,
7044 ISC_FALSE, NULL, 0);
7045
7046 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7047 isc_rwlocktype_write);
7048
7049 /*
7050 * Update the zone's secure status. If version is non-NULL
7051 * this is deferred until closeversion() is called.
7052 */
7053 if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
7054 iszonesecure(db, rbtdb->current_version, rbtdb->origin_node);
7055
7056 return (result);
7057 }
7058
7059 /*
7060 * load a non-NSEC3 node in the main tree and optionally to the auxiliary NSEC
7061 */
7062 static isc_result_t
loadnode(dns_rbtdb_t * rbtdb,dns_name_t * name,dns_rbtnode_t ** nodep,isc_boolean_t hasnsec)7063 loadnode(dns_rbtdb_t *rbtdb, dns_name_t *name, dns_rbtnode_t **nodep,
7064 isc_boolean_t hasnsec)
7065 {
7066 isc_result_t noderesult, nsecresult, tmpresult;
7067 dns_rbtnode_t *nsecnode = NULL, *node = NULL;
7068
7069 noderesult = dns_rbt_addnode(rbtdb->tree, name, &node);
7070 if (rbtdb->rpzs != NULL && noderesult == ISC_R_SUCCESS) {
7071 noderesult = dns_rpz_add(rbtdb->load_rpzs, rbtdb->rpz_num,
7072 name);
7073 if (noderesult == ISC_R_SUCCESS) {
7074 node->rpz = 1;
7075 } else {
7076 /*
7077 * Remove the node we just added above.
7078 */
7079 tmpresult = dns_rbt_deletenode(rbtdb->tree, node,
7080 ISC_FALSE);
7081 if (tmpresult != ISC_R_SUCCESS)
7082 isc_log_write(dns_lctx,
7083 DNS_LOGCATEGORY_DATABASE,
7084 DNS_LOGMODULE_CACHE,
7085 ISC_LOG_WARNING,
7086 "loading_addrdataset: "
7087 "dns_rbt_deletenode: %s after "
7088 "dns_rbt_addnode(NSEC): %s",
7089 isc_result_totext(tmpresult),
7090 isc_result_totext(ISC_R_SUCCESS));
7091 }
7092 }
7093 if (!hasnsec)
7094 goto done;
7095 if (noderesult == ISC_R_EXISTS) {
7096 /*
7097 * Add a node to the auxiliary NSEC tree for an old node
7098 * just now getting an NSEC record.
7099 */
7100 if (node->nsec == DNS_RBT_NSEC_HAS_NSEC)
7101 goto done;
7102 } else if (noderesult != ISC_R_SUCCESS)
7103 goto done;
7104
7105 /*
7106 * Build the auxiliary tree for NSECs as we go.
7107 * This tree speeds searches for closest NSECs that would otherwise
7108 * need to examine many irrelevant nodes in large TLDs.
7109 *
7110 * Add nodes to the auxiliary tree after corresponding nodes have
7111 * been added to the main tree.
7112 */
7113 nsecresult = dns_rbt_addnode(rbtdb->nsec, name, &nsecnode);
7114 if (nsecresult == ISC_R_SUCCESS) {
7115 nsecnode->nsec = DNS_RBT_NSEC_NSEC;
7116 node->nsec = DNS_RBT_NSEC_HAS_NSEC;
7117 goto done;
7118 }
7119
7120 if (nsecresult == ISC_R_EXISTS) {
7121 #if 1 /* 0 */
7122 isc_log_write(dns_lctx,
7123 DNS_LOGCATEGORY_DATABASE,
7124 DNS_LOGMODULE_CACHE,
7125 ISC_LOG_WARNING,
7126 "addnode: NSEC node already exists");
7127 #endif
7128 node->nsec = DNS_RBT_NSEC_HAS_NSEC;
7129 goto done;
7130 }
7131
7132 if (noderesult == ISC_R_SUCCESS) {
7133 unsigned int node_has_rpz;
7134
7135 /*
7136 * Remove the node we just added above.
7137 */
7138 node_has_rpz = node->rpz;
7139 tmpresult = dns_rbt_deletenode(rbtdb->tree, node, ISC_FALSE);
7140 if (tmpresult == ISC_R_SUCCESS) {
7141 /*
7142 * Clean rpz entries added above.
7143 */
7144 if (rbtdb->rpzs != NULL && node_has_rpz)
7145 dns_rpz_delete(rbtdb->load_rpzs,
7146 rbtdb->rpz_num, name);
7147 } else {
7148 isc_log_write(dns_lctx,
7149 DNS_LOGCATEGORY_DATABASE,
7150 DNS_LOGMODULE_CACHE,
7151 ISC_LOG_WARNING,
7152 "loading_addrdataset: "
7153 "dns_rbt_deletenode: %s after "
7154 "dns_rbt_addnode(NSEC): %s",
7155 isc_result_totext(tmpresult),
7156 isc_result_totext(noderesult));
7157 }
7158 }
7159
7160 /*
7161 * Set the error condition to be returned.
7162 */
7163 noderesult = nsecresult;
7164
7165 done:
7166 if (noderesult == ISC_R_SUCCESS || noderesult == ISC_R_EXISTS)
7167 *nodep = node;
7168
7169 return (noderesult);
7170 }
7171
7172 static isc_result_t
loading_addrdataset(void * arg,dns_name_t * name,dns_rdataset_t * rdataset)7173 loading_addrdataset(void *arg, dns_name_t *name, dns_rdataset_t *rdataset) {
7174 rbtdb_load_t *loadctx = arg;
7175 dns_rbtdb_t *rbtdb = loadctx->rbtdb;
7176 dns_rbtnode_t *node;
7177 isc_result_t result;
7178 isc_region_t region;
7179 rdatasetheader_t *newheader;
7180
7181 /*
7182 * This routine does no node locking. See comments in
7183 * 'load' below for more information on loading and
7184 * locking.
7185 */
7186
7187
7188 /*
7189 * SOA records are only allowed at top of zone.
7190 */
7191 if (rdataset->type == dns_rdatatype_soa &&
7192 !IS_CACHE(rbtdb) && !dns_name_equal(name, &rbtdb->common.origin))
7193 return (DNS_R_NOTZONETOP);
7194
7195 if (rdataset->type != dns_rdatatype_nsec3 &&
7196 rdataset->covers != dns_rdatatype_nsec3)
7197 add_empty_wildcards(rbtdb, name);
7198
7199 if (dns_name_iswildcard(name)) {
7200 /*
7201 * NS record owners cannot legally be wild cards.
7202 */
7203 if (rdataset->type == dns_rdatatype_ns)
7204 return (DNS_R_INVALIDNS);
7205 /*
7206 * NSEC3 record owners cannot legally be wild cards.
7207 */
7208 if (rdataset->type == dns_rdatatype_nsec3)
7209 return (DNS_R_INVALIDNSEC3);
7210 result = add_wildcard_magic(rbtdb, name);
7211 if (result != ISC_R_SUCCESS)
7212 return (result);
7213 }
7214
7215 node = NULL;
7216 if (rdataset->type == dns_rdatatype_nsec3 ||
7217 rdataset->covers == dns_rdatatype_nsec3) {
7218 result = dns_rbt_addnode(rbtdb->nsec3, name, &node);
7219 if (result == ISC_R_SUCCESS)
7220 node->nsec = DNS_RBT_NSEC_NSEC3;
7221 } else if (rdataset->type == dns_rdatatype_nsec) {
7222 result = loadnode(rbtdb, name, &node, ISC_TRUE);
7223 } else {
7224 result = loadnode(rbtdb, name, &node, ISC_FALSE);
7225 }
7226 if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
7227 return (result);
7228 if (result == ISC_R_SUCCESS) {
7229 dns_name_t foundname;
7230 dns_name_init(&foundname, NULL);
7231 dns_rbt_namefromnode(node, &foundname);
7232 #ifdef DNS_RBT_USEHASH
7233 node->locknum = node->hashval % rbtdb->node_lock_count;
7234 #else
7235 node->locknum = dns_name_hash(&foundname, ISC_TRUE) %
7236 rbtdb->node_lock_count;
7237 #endif
7238 }
7239
7240 result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
7241 ®ion,
7242 sizeof(rdatasetheader_t));
7243 if (result != ISC_R_SUCCESS)
7244 return (result);
7245 newheader = (rdatasetheader_t *)region.base;
7246 init_rdataset(rbtdb, newheader);
7247 set_ttl(rbtdb, newheader,
7248 rdataset->ttl + loadctx->now); /* XXX overflow check */
7249 newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
7250 rdataset->covers);
7251 newheader->attributes = 0;
7252 newheader->trust = rdataset->trust;
7253 newheader->serial = 1;
7254 newheader->noqname = NULL;
7255 newheader->closest = NULL;
7256 newheader->count = init_count++;
7257 newheader->additional_auth = NULL;
7258 newheader->additional_glue = NULL;
7259 newheader->last_used = 0;
7260 newheader->node = node;
7261 if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) {
7262 newheader->attributes |= RDATASET_ATTR_RESIGN;
7263 newheader->resign = rdataset->resign;
7264 } else
7265 newheader->resign = 0;
7266
7267 result = add32(rbtdb, node, rbtdb->current_version, newheader,
7268 DNS_DBADD_MERGE, ISC_TRUE, NULL, 0);
7269 if (result == ISC_R_SUCCESS &&
7270 delegating_type(rbtdb, node, rdataset->type))
7271 node->find_callback = 1;
7272 else if (result == DNS_R_UNCHANGED)
7273 result = ISC_R_SUCCESS;
7274
7275 return (result);
7276 }
7277
7278 static isc_result_t
rbt_datafixer(dns_rbtnode_t * rbtnode,void * base,size_t filesize,void * arg,isc_uint64_t * crc)7279 rbt_datafixer(dns_rbtnode_t *rbtnode, void *base, size_t filesize,
7280 void *arg, isc_uint64_t *crc)
7281 {
7282 isc_result_t result;
7283 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *) arg;
7284 rdatasetheader_t *header;
7285 unsigned char *limit = ((unsigned char *) base) + filesize;
7286 unsigned char *p;
7287 size_t size;
7288
7289 REQUIRE(rbtnode != NULL);
7290
7291 for (header = rbtnode->data; header != NULL; header = header->next) {
7292 p = (unsigned char *) header;
7293
7294 size = dns_rdataslab_size(p, sizeof(*header));
7295 isc_crc64_update(crc, p, size);
7296 #ifdef DEBUG
7297 hexdump("hashing header", p, sizeof(rdatasetheader_t));
7298 hexdump("hashing slab", p + sizeof(rdatasetheader_t),
7299 size - sizeof(rdatasetheader_t));
7300 #endif
7301 header->serial = 1;
7302 header->is_mmapped = 1;
7303 header->node = rbtnode;
7304 header->node_is_relative = 0;
7305
7306 if (rbtdb != NULL && RESIGN(header) && header->resign != 0) {
7307 int idx = header->node->locknum;
7308 result = isc_heap_insert(rbtdb->heaps[idx], header);
7309 if (result != ISC_R_SUCCESS)
7310 return (result);
7311 }
7312
7313 if (header->next != NULL) {
7314 size_t cooked = dns_rbt_serialize_align(size);
7315 if ((uintptr_t)header->next !=
7316 (p - (unsigned char *)base) + cooked)
7317 return (ISC_R_INVALIDFILE);
7318 header->next = (rdatasetheader_t *)(p + cooked);
7319 header->next_is_relative = 0;
7320 if ((header->next < (rdatasetheader_t *) base) ||
7321 (header->next > (rdatasetheader_t *) limit))
7322 return (ISC_R_INVALIDFILE);
7323 }
7324 }
7325
7326 return (ISC_R_SUCCESS);
7327 }
7328
7329 /*
7330 * Load the RBT database from the image in 'f'
7331 */
7332 static isc_result_t
deserialize32(void * arg,FILE * f,off_t offset)7333 deserialize32(void *arg, FILE *f, off_t offset) {
7334 isc_result_t result;
7335 rbtdb_load_t *loadctx = arg;
7336 dns_rbtdb_t *rbtdb = loadctx->rbtdb;
7337 rbtdb_file_header_t *header;
7338 int fd;
7339 off_t filesize = 0;
7340 char *base;
7341 dns_rbt_t *temporary_rbt = NULL;
7342 int protect, flags;
7343
7344 REQUIRE(VALID_RBTDB(rbtdb));
7345
7346 /*
7347 * TODO CKB: since this is read-write (had to be to add nodes later)
7348 * we will need to lock the file or the nodes in it before modifying
7349 * the nodes in the file.
7350 */
7351
7352 /* Map in the whole file in one go */
7353 fd = fileno(f);
7354 isc_file_getsizefd(fd, &filesize);
7355 protect = PROT_READ|PROT_WRITE;
7356 flags = MAP_PRIVATE;
7357 #ifdef MAP_FILE
7358 flags |= MAP_FILE;
7359 #endif
7360
7361 base = isc_file_mmap(NULL, filesize, protect, flags, fd, 0);
7362 if (base == NULL || base == MAP_FAILED)
7363 return (ISC_R_FAILURE);
7364
7365 header = (rbtdb_file_header_t *)(base + offset);
7366
7367 rbtdb->mmap_location = base;
7368 rbtdb->mmap_size = (size_t) filesize;
7369 rbtdb->origin_node = NULL;
7370
7371 if (header->tree != 0) {
7372 result = dns_rbt_deserialize_tree(base, filesize,
7373 (off_t) header->tree,
7374 rbtdb->common.mctx,
7375 delete_callback, rbtdb,
7376 rbt_datafixer, rbtdb,
7377 &rbtdb->origin_node,
7378 &temporary_rbt);
7379 if (temporary_rbt != NULL) {
7380 dns_rbt_destroy(&rbtdb->tree);
7381 rbtdb->tree = temporary_rbt;
7382 temporary_rbt = NULL;
7383
7384 rbtdb->origin_node =
7385 (dns_rbtnode_t *)(header->tree + base + 1024);
7386 }
7387 if (result != ISC_R_SUCCESS)
7388 return (result);
7389 }
7390
7391 if (header->nsec != 0) {
7392 result = dns_rbt_deserialize_tree(base, filesize,
7393 (off_t) header->nsec,
7394 rbtdb->common.mctx,
7395 delete_callback, rbtdb,
7396 rbt_datafixer, rbtdb,
7397 NULL, &temporary_rbt);
7398 if (temporary_rbt != NULL) {
7399 dns_rbt_destroy(&rbtdb->nsec);
7400 rbtdb->nsec = temporary_rbt;
7401 temporary_rbt = NULL;
7402 }
7403 if (result != ISC_R_SUCCESS)
7404 return (result);
7405 }
7406
7407 if (header->nsec3 != 0) {
7408 result = dns_rbt_deserialize_tree(base, filesize,
7409 (off_t) header->nsec3,
7410 rbtdb->common.mctx,
7411 delete_callback, rbtdb,
7412 rbt_datafixer, rbtdb,
7413 NULL, &temporary_rbt);
7414 if (temporary_rbt != NULL) {
7415 dns_rbt_destroy(&rbtdb->nsec3);
7416 rbtdb->nsec3 = temporary_rbt;
7417 temporary_rbt = NULL;
7418 }
7419 if (result != ISC_R_SUCCESS)
7420 return (result);
7421 }
7422
7423 return (ISC_R_SUCCESS);
7424 }
7425
7426 static isc_result_t
beginload(dns_db_t * db,dns_rdatacallbacks_t * callbacks)7427 beginload(dns_db_t *db, dns_rdatacallbacks_t *callbacks) {
7428 rbtdb_load_t *loadctx;
7429 dns_rbtdb_t *rbtdb;
7430 rbtdb = (dns_rbtdb_t *)db;
7431
7432 REQUIRE(DNS_CALLBACK_VALID(callbacks));
7433 REQUIRE(VALID_RBTDB(rbtdb));
7434
7435 loadctx = isc_mem_get(rbtdb->common.mctx, sizeof(*loadctx));
7436 if (loadctx == NULL)
7437 return (ISC_R_NOMEMORY);
7438
7439 loadctx->rbtdb = rbtdb;
7440 if (IS_CACHE(rbtdb))
7441 isc_stdtime_get(&loadctx->now);
7442 else
7443 loadctx->now = 0;
7444
7445 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
7446
7447 if (rbtdb->rpzs != NULL) {
7448 isc_result_t result;
7449
7450 result = dns_rpz_beginload(&rbtdb->load_rpzs,
7451 rbtdb->rpzs, rbtdb->rpz_num);
7452 if (result != ISC_R_SUCCESS) {
7453 isc_mem_put(rbtdb->common.mctx, loadctx,
7454 sizeof(*loadctx));
7455 return (result);
7456 }
7457 }
7458
7459 REQUIRE((rbtdb->attributes & (RBTDB_ATTR_LOADED|RBTDB_ATTR_LOADING))
7460 == 0);
7461 rbtdb->attributes |= RBTDB_ATTR_LOADING;
7462
7463 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
7464
7465 callbacks->add = loading_addrdataset;
7466 callbacks->add_private = loadctx;
7467 callbacks->deserialize = deserialize32;
7468 callbacks->deserialize_private = loadctx;
7469
7470 return (ISC_R_SUCCESS);
7471 }
7472
7473 static isc_result_t
endload(dns_db_t * db,dns_rdatacallbacks_t * callbacks)7474 endload(dns_db_t *db, dns_rdatacallbacks_t *callbacks) {
7475 rbtdb_load_t *loadctx;
7476 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
7477
7478 REQUIRE(VALID_RBTDB(rbtdb));
7479 REQUIRE(DNS_CALLBACK_VALID(callbacks));
7480 loadctx = callbacks->add_private;
7481 REQUIRE(loadctx != NULL);
7482 REQUIRE(loadctx->rbtdb == rbtdb);
7483
7484 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
7485
7486 REQUIRE((rbtdb->attributes & RBTDB_ATTR_LOADING) != 0);
7487 REQUIRE((rbtdb->attributes & RBTDB_ATTR_LOADED) == 0);
7488
7489 rbtdb->attributes &= ~RBTDB_ATTR_LOADING;
7490 rbtdb->attributes |= RBTDB_ATTR_LOADED;
7491
7492 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
7493
7494 /*
7495 * If there's a KEY rdataset at the zone origin containing a
7496 * zone key, we consider the zone secure.
7497 */
7498 if (! IS_CACHE(rbtdb) && rbtdb->origin_node != NULL)
7499 iszonesecure(db, rbtdb->current_version, rbtdb->origin_node);
7500
7501 callbacks->add = NULL;
7502 callbacks->add_private = NULL;
7503 callbacks->deserialize = NULL;
7504 callbacks->deserialize_private = NULL;
7505
7506 isc_mem_put(rbtdb->common.mctx, loadctx, sizeof(*loadctx));
7507
7508 return (ISC_R_SUCCESS);
7509 }
7510
7511 /*
7512 * helper function to handle writing out the rdataset data pointed to
7513 * by the void *data pointer in the dns_rbtnode
7514 */
7515 static isc_result_t
rbt_datawriter(FILE * rbtfile,unsigned char * data,void * arg,isc_uint64_t * crc)7516 rbt_datawriter(FILE *rbtfile, unsigned char *data, void *arg,
7517 isc_uint64_t *crc)
7518 {
7519 rbtdb_version_t *version = (rbtdb_version_t *) arg;
7520 rbtdb_serial_t serial;
7521 rdatasetheader_t newheader;
7522 rdatasetheader_t *header = (rdatasetheader_t *) data, *next;
7523 off_t where;
7524 size_t cooked, size;
7525 unsigned char *p;
7526 isc_result_t result = ISC_R_SUCCESS;
7527 char pad[sizeof(char *)];
7528 uintptr_t off;
7529
7530 REQUIRE(rbtfile != NULL);
7531 REQUIRE(data != NULL);
7532 REQUIRE(version != NULL);
7533
7534 serial = version->serial;
7535
7536 for (; header != NULL; header = next) {
7537 next = header->next;
7538 do {
7539 if (header->serial <= serial && !IGNORE(header)) {
7540 if (NONEXISTENT(header))
7541 header = NULL;
7542 break;
7543 } else
7544 header = header->down;
7545 } while (header != NULL);
7546
7547 if (header == NULL)
7548 continue;
7549
7550 CHECK(isc_stdio_tell(rbtfile, &where));
7551 size = dns_rdataslab_size((unsigned char *) header,
7552 sizeof(rdatasetheader_t));
7553
7554 p = (unsigned char *) header;
7555 memmove(&newheader, p, sizeof(rdatasetheader_t));
7556 newheader.down = NULL;
7557 newheader.next = NULL;
7558 off = where;
7559 if ((off_t)off != where)
7560 return (ISC_R_RANGE);
7561 newheader.node = (dns_rbtnode_t *) off;
7562 newheader.node_is_relative = 1;
7563 newheader.serial = 1;
7564
7565 /*
7566 * Round size up to the next pointer sized offset so it
7567 * will be properly aligned when read back in.
7568 */
7569 cooked = dns_rbt_serialize_align(size);
7570 if (next != NULL) {
7571 newheader.next = (rdatasetheader_t *) (off + cooked);
7572 newheader.next_is_relative = 1;
7573 }
7574
7575 #ifdef DEBUG
7576 hexdump("writing header", (unsigned char *) &newheader,
7577 sizeof(rdatasetheader_t));
7578 hexdump("writing slab", p + sizeof(rdatasetheader_t),
7579 size - sizeof(rdatasetheader_t));
7580 #endif
7581 isc_crc64_update(crc, (unsigned char *) &newheader,
7582 sizeof(rdatasetheader_t));
7583 CHECK(isc_stdio_write(&newheader, sizeof(rdatasetheader_t), 1,
7584 rbtfile, NULL));
7585
7586 isc_crc64_update(crc, p + sizeof(rdatasetheader_t),
7587 size - sizeof(rdatasetheader_t));
7588 CHECK(isc_stdio_write(p + sizeof(rdatasetheader_t),
7589 size - sizeof(rdatasetheader_t), 1,
7590 rbtfile, NULL));
7591 /*
7592 * Pad to force alignment.
7593 */
7594 if (size != (size_t) cooked) {
7595 memset(pad, 0, sizeof(pad));
7596 CHECK(isc_stdio_write(pad, cooked - size, 1,
7597 rbtfile, NULL));
7598 }
7599 }
7600
7601 failure:
7602 return (result);
7603 }
7604
7605 /*
7606 * Write out a zeroed header as a placeholder. Doing this ensures
7607 * that the file will not read while it is partially written, should
7608 * writing fail or be interrupted.
7609 */
7610 static isc_result_t
rbtdb_zero_header(FILE * rbtfile)7611 rbtdb_zero_header(FILE *rbtfile) {
7612 char buffer[RBTDB_HEADER_LENGTH];
7613 isc_result_t result;
7614
7615 memset(buffer, 0, RBTDB_HEADER_LENGTH);
7616 result = isc_stdio_write(buffer, 1, RBTDB_HEADER_LENGTH, rbtfile, NULL);
7617 fflush(rbtfile);
7618
7619 return (result);
7620 }
7621
7622 static isc_once_t once = ISC_ONCE_INIT;
7623
7624 static void
init_file_version(void)7625 init_file_version(void) {
7626 int n;
7627
7628 memset(FILE_VERSION, 0, sizeof(FILE_VERSION));
7629 n = snprintf(FILE_VERSION, sizeof(FILE_VERSION),
7630 "RBTDB Image %s %s", dns_major, dns_mapapi);
7631 INSIST(n > 0 && (unsigned int)n < sizeof(FILE_VERSION));
7632 }
7633
7634 /*
7635 * Write the file header out, recording the locations of the three
7636 * RBT's used in the rbtdb: tree, nsec, and nsec3, and including NodeDump
7637 * version information and any information stored in the rbtdb object
7638 * itself that should be stored here.
7639 */
7640 static isc_result_t
rbtdb_write_header(FILE * rbtfile,off_t tree_location,off_t nsec_location,off_t nsec3_location)7641 rbtdb_write_header(FILE *rbtfile, off_t tree_location, off_t nsec_location,
7642 off_t nsec3_location)
7643 {
7644 rbtdb_file_header_t header;
7645 isc_result_t result;
7646
7647 RUNTIME_CHECK(isc_once_do(&once, init_file_version) == ISC_R_SUCCESS);
7648
7649 memset(&header, 0, sizeof(rbtdb_file_header_t));
7650 memmove(header.version1, FILE_VERSION, sizeof(header.version1));
7651 memmove(header.version2, FILE_VERSION, sizeof(header.version2));
7652 header.ptrsize = (isc_uint32_t) sizeof(void *);
7653 header.bigendian = (1 == htonl(1)) ? 1 : 0;
7654 header.tree = (isc_uint64_t) tree_location;
7655 header.nsec = (isc_uint64_t) nsec_location;
7656 header.nsec3 = (isc_uint64_t) nsec3_location;
7657 result = isc_stdio_write(&header, 1, sizeof(rbtdb_file_header_t),
7658 rbtfile, NULL);
7659 fflush(rbtfile);
7660
7661 return (result);
7662 }
7663
7664 static isc_result_t
serialize(dns_db_t * db,dns_dbversion_t * ver,FILE * rbtfile)7665 serialize(dns_db_t *db, dns_dbversion_t *ver, FILE *rbtfile) {
7666 rbtdb_version_t *version = (rbtdb_version_t *) ver;
7667 dns_rbtdb_t *rbtdb;
7668 isc_result_t result;
7669 off_t tree_location, nsec_location, nsec3_location, header_location;
7670
7671 rbtdb = (dns_rbtdb_t *)db;
7672
7673 REQUIRE(VALID_RBTDB(rbtdb));
7674 REQUIRE(rbtfile != NULL);
7675
7676 /* Ensure we're writing to a plain file */
7677 CHECK(isc_file_isplainfilefd(fileno(rbtfile)));
7678
7679 /*
7680 * first, write out a zeroed header to store rbtdb information
7681 *
7682 * then for each of the three trees, store the current position
7683 * in the file and call dns_rbt_serialize_tree
7684 *
7685 * finally, write out the rbtdb header, storing the locations of the
7686 * rbtheaders
7687 *
7688 * NOTE: need to do something better with the return codes, &= will
7689 * not work.
7690 */
7691 CHECK(isc_stdio_tell(rbtfile, &header_location));
7692 CHECK(rbtdb_zero_header(rbtfile));
7693 CHECK(dns_rbt_serialize_tree(rbtfile, rbtdb->tree, rbt_datawriter,
7694 version, &tree_location));
7695 CHECK(dns_rbt_serialize_tree(rbtfile, rbtdb->nsec, rbt_datawriter,
7696 version, &nsec_location));
7697 CHECK(dns_rbt_serialize_tree(rbtfile, rbtdb->nsec3, rbt_datawriter,
7698 version, &nsec3_location));
7699
7700 CHECK(isc_stdio_seek(rbtfile, header_location, SEEK_SET));
7701 CHECK(rbtdb_write_header(rbtfile, tree_location, nsec_location,
7702 nsec3_location));
7703 failure:
7704 return (result);
7705 }
7706
7707 static isc_result_t
dump(dns_db_t * db,dns_dbversion_t * version,const char * filename,dns_masterformat_t masterformat)7708 dump(dns_db_t *db, dns_dbversion_t *version, const char *filename,
7709 dns_masterformat_t masterformat)
7710 {
7711 dns_rbtdb_t *rbtdb;
7712 rbtdb_version_t *rbtversion = version;
7713
7714 rbtdb = (dns_rbtdb_t *)db;
7715
7716 REQUIRE(VALID_RBTDB(rbtdb));
7717 INSIST(rbtversion == NULL || rbtversion->rbtdb == rbtdb);
7718
7719 return (dns_master_dump2(rbtdb->common.mctx, db, version,
7720 &dns_master_style_default,
7721 filename, masterformat));
7722 }
7723
7724 static void
delete_callback(void * data,void * arg)7725 delete_callback(void *data, void *arg) {
7726 dns_rbtdb_t *rbtdb = arg;
7727 rdatasetheader_t *current, *next;
7728 unsigned int locknum;
7729
7730 current = data;
7731 locknum = current->node->locknum;
7732 NODE_LOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
7733 while (current != NULL) {
7734 next = current->next;
7735 free_rdataset(rbtdb, rbtdb->common.mctx, current);
7736 current = next;
7737 }
7738 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
7739 }
7740
7741 static isc_boolean_t
issecure(dns_db_t * db)7742 issecure(dns_db_t *db) {
7743 dns_rbtdb_t *rbtdb;
7744 isc_boolean_t secure;
7745
7746 rbtdb = (dns_rbtdb_t *)db;
7747
7748 REQUIRE(VALID_RBTDB(rbtdb));
7749
7750 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7751 secure = ISC_TF(rbtdb->current_version->secure == dns_db_secure);
7752 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7753
7754 return (secure);
7755 }
7756
7757 static isc_boolean_t
isdnssec(dns_db_t * db)7758 isdnssec(dns_db_t *db) {
7759 dns_rbtdb_t *rbtdb;
7760 isc_boolean_t dnssec;
7761
7762 rbtdb = (dns_rbtdb_t *)db;
7763
7764 REQUIRE(VALID_RBTDB(rbtdb));
7765
7766 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7767 dnssec = ISC_TF(rbtdb->current_version->secure != dns_db_insecure);
7768 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7769
7770 return (dnssec);
7771 }
7772
7773 static unsigned int
nodecount(dns_db_t * db)7774 nodecount(dns_db_t *db) {
7775 dns_rbtdb_t *rbtdb;
7776 unsigned int count;
7777
7778 rbtdb = (dns_rbtdb_t *)db;
7779
7780 REQUIRE(VALID_RBTDB(rbtdb));
7781
7782 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7783 count = dns_rbt_nodecount(rbtdb->tree);
7784 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7785
7786 return (count);
7787 }
7788
7789 static unsigned int
hashsize(dns_db_t * db)7790 hashsize(dns_db_t *db) {
7791 dns_rbtdb_t *rbtdb;
7792 unsigned int count;
7793
7794 rbtdb = (dns_rbtdb_t *)db;
7795
7796 REQUIRE(VALID_RBTDB(rbtdb));
7797
7798 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7799 count = dns_rbt_hashsize(rbtdb->tree);
7800 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7801
7802 return (count);
7803 }
7804
7805 static void
settask(dns_db_t * db,isc_task_t * task)7806 settask(dns_db_t *db, isc_task_t *task) {
7807 dns_rbtdb_t *rbtdb;
7808
7809 rbtdb = (dns_rbtdb_t *)db;
7810
7811 REQUIRE(VALID_RBTDB(rbtdb));
7812
7813 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
7814 if (rbtdb->task != NULL)
7815 isc_task_detach(&rbtdb->task);
7816 if (task != NULL)
7817 isc_task_attach(task, &rbtdb->task);
7818 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
7819 }
7820
7821 static isc_boolean_t
ispersistent(dns_db_t * db)7822 ispersistent(dns_db_t *db) {
7823 UNUSED(db);
7824 return (ISC_FALSE);
7825 }
7826
7827 static isc_result_t
getoriginnode(dns_db_t * db,dns_dbnode_t ** nodep)7828 getoriginnode(dns_db_t *db, dns_dbnode_t **nodep) {
7829 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
7830 dns_rbtnode_t *onode;
7831 isc_result_t result = ISC_R_SUCCESS;
7832
7833 REQUIRE(VALID_RBTDB(rbtdb));
7834 REQUIRE(nodep != NULL && *nodep == NULL);
7835
7836 /* Note that the access to origin_node doesn't require a DB lock */
7837 onode = (dns_rbtnode_t *)rbtdb->origin_node;
7838 if (onode != NULL) {
7839 NODE_STRONGLOCK(&rbtdb->node_locks[onode->locknum].lock);
7840 new_reference(rbtdb, onode);
7841 NODE_STRONGUNLOCK(&rbtdb->node_locks[onode->locknum].lock);
7842
7843 *nodep = rbtdb->origin_node;
7844 } else {
7845 INSIST(IS_CACHE(rbtdb));
7846 result = ISC_R_NOTFOUND;
7847 }
7848
7849 return (result);
7850 }
7851
7852 static isc_result_t
getnsec3parameters(dns_db_t * db,dns_dbversion_t * version,dns_hash_t * hash,isc_uint8_t * flags,isc_uint16_t * iterations,unsigned char * salt,size_t * salt_length)7853 getnsec3parameters(dns_db_t *db, dns_dbversion_t *version, dns_hash_t *hash,
7854 isc_uint8_t *flags, isc_uint16_t *iterations,
7855 unsigned char *salt, size_t *salt_length)
7856 {
7857 dns_rbtdb_t *rbtdb;
7858 isc_result_t result = ISC_R_NOTFOUND;
7859 rbtdb_version_t *rbtversion = version;
7860
7861 rbtdb = (dns_rbtdb_t *)db;
7862
7863 REQUIRE(VALID_RBTDB(rbtdb));
7864 INSIST(rbtversion == NULL || rbtversion->rbtdb == rbtdb);
7865
7866 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7867
7868 if (rbtversion == NULL)
7869 rbtversion = rbtdb->current_version;
7870
7871 if (rbtversion->havensec3) {
7872 if (hash != NULL)
7873 *hash = rbtversion->hash;
7874 if (salt != NULL && salt_length != NULL) {
7875 REQUIRE(*salt_length >= rbtversion->salt_length);
7876 memmove(salt, rbtversion->salt,
7877 rbtversion->salt_length);
7878 }
7879 if (salt_length != NULL)
7880 *salt_length = rbtversion->salt_length;
7881 if (iterations != NULL)
7882 *iterations = rbtversion->iterations;
7883 if (flags != NULL)
7884 *flags = rbtversion->flags;
7885 result = ISC_R_SUCCESS;
7886 }
7887 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7888
7889 return (result);
7890 }
7891
7892 static isc_result_t
setsigningtime(dns_db_t * db,dns_rdataset_t * rdataset,isc_stdtime_t resign)7893 setsigningtime(dns_db_t *db, dns_rdataset_t *rdataset, isc_stdtime_t resign) {
7894 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
7895 isc_stdtime_t oldresign;
7896 isc_result_t result = ISC_R_SUCCESS;
7897 rdatasetheader_t *header;
7898
7899 REQUIRE(VALID_RBTDB(rbtdb));
7900 REQUIRE(!IS_CACHE(rbtdb));
7901 REQUIRE(rdataset != NULL);
7902
7903 header = rdataset->private3;
7904 header--;
7905
7906 NODE_LOCK(&rbtdb->node_locks[header->node->locknum].lock,
7907 isc_rwlocktype_write);
7908
7909 oldresign = header->resign;
7910 header->resign = resign;
7911 if (header->heap_index != 0) {
7912 INSIST(RESIGN(header));
7913 if (resign == 0) {
7914 isc_heap_delete(rbtdb->heaps[header->node->locknum],
7915 header->heap_index);
7916 header->heap_index = 0;
7917 } else if (resign < oldresign)
7918 isc_heap_increased(rbtdb->heaps[header->node->locknum],
7919 header->heap_index);
7920 else if (resign > oldresign)
7921 isc_heap_decreased(rbtdb->heaps[header->node->locknum],
7922 header->heap_index);
7923 } else if (resign && header->heap_index == 0) {
7924 header->attributes |= RDATASET_ATTR_RESIGN;
7925 result = resign_insert(rbtdb, header->node->locknum, header);
7926 }
7927 NODE_UNLOCK(&rbtdb->node_locks[header->node->locknum].lock,
7928 isc_rwlocktype_write);
7929 return (result);
7930 }
7931
7932 static isc_result_t
getsigningtime(dns_db_t * db,dns_rdataset_t * rdataset,dns_name_t * foundname)7933 getsigningtime(dns_db_t *db, dns_rdataset_t *rdataset,
7934 dns_name_t *foundname)
7935 {
7936 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
7937 rdatasetheader_t *header = NULL, *this;
7938 unsigned int i;
7939 isc_result_t result = ISC_R_NOTFOUND;
7940 unsigned int locknum;
7941
7942 REQUIRE(VALID_RBTDB(rbtdb));
7943
7944 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7945
7946 for (i = 0; i < rbtdb->node_lock_count; i++) {
7947 NODE_LOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_read);
7948 this = isc_heap_element(rbtdb->heaps[i], 1);
7949 if (this == NULL) {
7950 NODE_UNLOCK(&rbtdb->node_locks[i].lock,
7951 isc_rwlocktype_read);
7952 continue;
7953 }
7954 if (header == NULL)
7955 header = this;
7956 else if (isc_serial_lt(this->resign, header->resign)) {
7957 locknum = header->node->locknum;
7958 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
7959 isc_rwlocktype_read);
7960 header = this;
7961 } else
7962 NODE_UNLOCK(&rbtdb->node_locks[i].lock,
7963 isc_rwlocktype_read);
7964 }
7965
7966 if (header == NULL)
7967 goto unlock;
7968
7969 bind_rdataset(rbtdb, header->node, header, 0, rdataset);
7970
7971 if (foundname != NULL)
7972 dns_rbt_fullnamefromnode(header->node, foundname);
7973
7974 NODE_UNLOCK(&rbtdb->node_locks[header->node->locknum].lock,
7975 isc_rwlocktype_read);
7976
7977 result = ISC_R_SUCCESS;
7978
7979 unlock:
7980 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7981
7982 return (result);
7983 }
7984
7985 static void
resigned(dns_db_t * db,dns_rdataset_t * rdataset,dns_dbversion_t * version)7986 resigned(dns_db_t *db, dns_rdataset_t *rdataset, dns_dbversion_t *version)
7987 {
7988 rbtdb_version_t *rbtversion = (rbtdb_version_t *)version;
7989 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
7990 dns_rbtnode_t *node;
7991 rdatasetheader_t *header;
7992
7993 REQUIRE(VALID_RBTDB(rbtdb));
7994 REQUIRE(rdataset != NULL);
7995 REQUIRE(rdataset->methods == &rdataset_methods);
7996 REQUIRE(rbtdb->future_version == rbtversion);
7997 REQUIRE(rbtversion != NULL);
7998 REQUIRE(rbtversion->writer);
7999 REQUIRE(rbtversion->rbtdb == rbtdb);
8000
8001 node = rdataset->private2;
8002 INSIST(node != NULL);
8003 header = rdataset->private3;
8004 INSIST(header != NULL);
8005 header--;
8006
8007 if (header->heap_index == 0)
8008 return;
8009
8010 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
8011 NODE_LOCK(&rbtdb->node_locks[node->locknum].lock,
8012 isc_rwlocktype_write);
8013 /*
8014 * Delete from heap and save to re-signed list so that it can
8015 * be restored if we backout of this change.
8016 */
8017 resign_delete(rbtdb, rbtversion, header);
8018 NODE_UNLOCK(&rbtdb->node_locks[node->locknum].lock,
8019 isc_rwlocktype_write);
8020 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
8021 }
8022
8023 static isc_result_t
setcachestats(dns_db_t * db,isc_stats_t * stats)8024 setcachestats(dns_db_t *db, isc_stats_t *stats) {
8025 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
8026
8027 REQUIRE(VALID_RBTDB(rbtdb));
8028 REQUIRE(IS_CACHE(rbtdb)); /* current restriction */
8029 REQUIRE(stats != NULL);
8030
8031 isc_stats_attach(stats, &rbtdb->cachestats);
8032 return (ISC_R_SUCCESS);
8033 }
8034
8035 static dns_stats_t *
getrrsetstats(dns_db_t * db)8036 getrrsetstats(dns_db_t *db) {
8037 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
8038
8039 REQUIRE(VALID_RBTDB(rbtdb));
8040 REQUIRE(IS_CACHE(rbtdb)); /* current restriction */
8041
8042 return (rbtdb->rrsetstats);
8043 }
8044
8045 static dns_dbmethods_t zone_methods = {
8046 attach,
8047 detach,
8048 beginload,
8049 endload,
8050 serialize,
8051 dump,
8052 currentversion,
8053 newversion,
8054 attachversion,
8055 closeversion,
8056 findnode,
8057 zone_find,
8058 zone_findzonecut,
8059 attachnode,
8060 detachnode,
8061 expirenode,
8062 printnode,
8063 createiterator,
8064 zone_findrdataset,
8065 allrdatasets,
8066 addrdataset,
8067 subtractrdataset,
8068 deleterdataset,
8069 issecure,
8070 nodecount,
8071 ispersistent,
8072 overmem,
8073 settask,
8074 getoriginnode,
8075 NULL,
8076 getnsec3parameters,
8077 findnsec3node,
8078 setsigningtime,
8079 getsigningtime,
8080 resigned,
8081 isdnssec,
8082 NULL,
8083 rpz_attach,
8084 rpz_ready,
8085 NULL,
8086 NULL,
8087 NULL,
8088 hashsize
8089 };
8090
8091 static dns_dbmethods_t cache_methods = {
8092 attach,
8093 detach,
8094 beginload,
8095 endload,
8096 NULL,
8097 dump,
8098 currentversion,
8099 newversion,
8100 attachversion,
8101 closeversion,
8102 findnode,
8103 cache_find,
8104 cache_findzonecut,
8105 attachnode,
8106 detachnode,
8107 expirenode,
8108 printnode,
8109 createiterator,
8110 cache_findrdataset,
8111 allrdatasets,
8112 addrdataset,
8113 subtractrdataset,
8114 deleterdataset,
8115 issecure,
8116 nodecount,
8117 ispersistent,
8118 overmem,
8119 settask,
8120 getoriginnode,
8121 NULL,
8122 NULL,
8123 NULL,
8124 NULL,
8125 NULL,
8126 NULL,
8127 isdnssec,
8128 getrrsetstats,
8129 NULL,
8130 NULL,
8131 NULL,
8132 NULL,
8133 setcachestats,
8134 hashsize
8135 };
8136
8137 isc_result_t
8138 #ifdef DNS_RBTDB_VERSION64
dns_rbtdb64_create(isc_mem_t * mctx,dns_name_t * origin,dns_dbtype_t type,dns_rdataclass_t rdclass,unsigned int argc,char * argv[],void * driverarg,dns_db_t ** dbp)8139 dns_rbtdb64_create
8140 #else
8141 dns_rbtdb_create
8142 #endif
8143 (isc_mem_t *mctx, dns_name_t *origin, dns_dbtype_t type,
8144 dns_rdataclass_t rdclass, unsigned int argc, char *argv[],
8145 void *driverarg, dns_db_t **dbp)
8146 {
8147 dns_rbtdb_t *rbtdb;
8148 isc_result_t result;
8149 int i;
8150 dns_name_t name;
8151 isc_boolean_t (*sooner)(void *, void *);
8152 isc_mem_t *hmctx = mctx;
8153
8154 /* Keep the compiler happy. */
8155 UNUSED(driverarg);
8156
8157 rbtdb = isc_mem_get(mctx, sizeof(*rbtdb));
8158 if (rbtdb == NULL)
8159 return (ISC_R_NOMEMORY);
8160
8161 /*
8162 * If argv[0] exists, it points to a memory context to use for heap
8163 */
8164 if (argc != 0)
8165 hmctx = (isc_mem_t *) argv[0];
8166
8167 memset(rbtdb, '\0', sizeof(*rbtdb));
8168 dns_name_init(&rbtdb->common.origin, NULL);
8169 rbtdb->common.attributes = 0;
8170 if (type == dns_dbtype_cache) {
8171 rbtdb->common.methods = &cache_methods;
8172 rbtdb->common.attributes |= DNS_DBATTR_CACHE;
8173 } else if (type == dns_dbtype_stub) {
8174 rbtdb->common.methods = &zone_methods;
8175 rbtdb->common.attributes |= DNS_DBATTR_STUB;
8176 } else
8177 rbtdb->common.methods = &zone_methods;
8178 rbtdb->common.rdclass = rdclass;
8179 rbtdb->common.mctx = NULL;
8180
8181 result = RBTDB_INITLOCK(&rbtdb->lock);
8182 if (result != ISC_R_SUCCESS)
8183 goto cleanup_rbtdb;
8184
8185 result = isc_rwlock_init(&rbtdb->tree_lock, 0, 0);
8186 if (result != ISC_R_SUCCESS)
8187 goto cleanup_lock;
8188
8189 /*
8190 * Initialize node_lock_count in a generic way to support future
8191 * extension which allows the user to specify this value on creation.
8192 * Note that when specified for a cache DB it must be larger than 1
8193 * as commented with the definition of DEFAULT_CACHE_NODE_LOCK_COUNT.
8194 */
8195 if (rbtdb->node_lock_count == 0) {
8196 if (IS_CACHE(rbtdb))
8197 rbtdb->node_lock_count = DEFAULT_CACHE_NODE_LOCK_COUNT;
8198 else
8199 rbtdb->node_lock_count = DEFAULT_NODE_LOCK_COUNT;
8200 } else if (rbtdb->node_lock_count < 2 && IS_CACHE(rbtdb)) {
8201 result = ISC_R_RANGE;
8202 goto cleanup_tree_lock;
8203 }
8204 INSIST(rbtdb->node_lock_count < (1 << DNS_RBT_LOCKLENGTH));
8205 rbtdb->node_locks = isc_mem_get(mctx, rbtdb->node_lock_count *
8206 sizeof(rbtdb_nodelock_t));
8207 if (rbtdb->node_locks == NULL) {
8208 result = ISC_R_NOMEMORY;
8209 goto cleanup_tree_lock;
8210 }
8211
8212 rbtdb->cachestats = NULL;
8213 rbtdb->rrsetstats = NULL;
8214 if (IS_CACHE(rbtdb)) {
8215 result = dns_rdatasetstats_create(mctx, &rbtdb->rrsetstats);
8216 if (result != ISC_R_SUCCESS)
8217 goto cleanup_node_locks;
8218 rbtdb->rdatasets = isc_mem_get(mctx, rbtdb->node_lock_count *
8219 sizeof(rdatasetheaderlist_t));
8220 if (rbtdb->rdatasets == NULL) {
8221 result = ISC_R_NOMEMORY;
8222 goto cleanup_rrsetstats;
8223 }
8224 for (i = 0; i < (int)rbtdb->node_lock_count; i++)
8225 ISC_LIST_INIT(rbtdb->rdatasets[i]);
8226 } else
8227 rbtdb->rdatasets = NULL;
8228
8229 /*
8230 * Create the heaps.
8231 */
8232 rbtdb->heaps = isc_mem_get(hmctx, rbtdb->node_lock_count *
8233 sizeof(isc_heap_t *));
8234 if (rbtdb->heaps == NULL) {
8235 result = ISC_R_NOMEMORY;
8236 goto cleanup_rdatasets;
8237 }
8238 for (i = 0; i < (int)rbtdb->node_lock_count; i++)
8239 rbtdb->heaps[i] = NULL;
8240 sooner = IS_CACHE(rbtdb) ? ttl_sooner : resign_sooner;
8241 for (i = 0; i < (int)rbtdb->node_lock_count; i++) {
8242 result = isc_heap_create(hmctx, sooner, set_index, 0,
8243 &rbtdb->heaps[i]);
8244 if (result != ISC_R_SUCCESS)
8245 goto cleanup_heaps;
8246 }
8247
8248 /*
8249 * Create deadnode lists.
8250 */
8251 rbtdb->deadnodes = isc_mem_get(mctx, rbtdb->node_lock_count *
8252 sizeof(rbtnodelist_t));
8253 if (rbtdb->deadnodes == NULL) {
8254 result = ISC_R_NOMEMORY;
8255 goto cleanup_heaps;
8256 }
8257 for (i = 0; i < (int)rbtdb->node_lock_count; i++)
8258 ISC_LIST_INIT(rbtdb->deadnodes[i]);
8259
8260 rbtdb->active = rbtdb->node_lock_count;
8261
8262 for (i = 0; i < (int)(rbtdb->node_lock_count); i++) {
8263 result = NODE_INITLOCK(&rbtdb->node_locks[i].lock);
8264 if (result == ISC_R_SUCCESS) {
8265 result = isc_refcount_init(&rbtdb->node_locks[i].references, 0);
8266 if (result != ISC_R_SUCCESS)
8267 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
8268 }
8269 if (result != ISC_R_SUCCESS) {
8270 while (i-- > 0) {
8271 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
8272 isc_refcount_decrement(&rbtdb->node_locks[i].references, NULL);
8273 isc_refcount_destroy(&rbtdb->node_locks[i].references);
8274 }
8275 goto cleanup_deadnodes;
8276 }
8277 rbtdb->node_locks[i].exiting = ISC_FALSE;
8278 }
8279
8280 /*
8281 * Attach to the mctx. The database will persist so long as there
8282 * are references to it, and attaching to the mctx ensures that our
8283 * mctx won't disappear out from under us.
8284 */
8285 isc_mem_attach(mctx, &rbtdb->common.mctx);
8286 isc_mem_attach(hmctx, &rbtdb->hmctx);
8287
8288 /*
8289 * Must be initialized before free_rbtdb() is called.
8290 */
8291 isc_ondestroy_init(&rbtdb->common.ondest);
8292
8293 /*
8294 * Make a copy of the origin name.
8295 */
8296 result = dns_name_dupwithoffsets(origin, mctx, &rbtdb->common.origin);
8297 if (result != ISC_R_SUCCESS) {
8298 free_rbtdb(rbtdb, ISC_FALSE, NULL);
8299 return (result);
8300 }
8301
8302 /*
8303 * Make the Red-Black Trees.
8304 */
8305 result = dns_rbt_create(mctx, delete_callback, rbtdb, &rbtdb->tree);
8306 if (result != ISC_R_SUCCESS) {
8307 free_rbtdb(rbtdb, ISC_FALSE, NULL);
8308 return (result);
8309 }
8310
8311 result = dns_rbt_create(mctx, delete_callback, rbtdb, &rbtdb->nsec);
8312 if (result != ISC_R_SUCCESS) {
8313 free_rbtdb(rbtdb, ISC_FALSE, NULL);
8314 return (result);
8315 }
8316
8317 result = dns_rbt_create(mctx, delete_callback, rbtdb, &rbtdb->nsec3);
8318 if (result != ISC_R_SUCCESS) {
8319 free_rbtdb(rbtdb, ISC_FALSE, NULL);
8320 return (result);
8321 }
8322
8323 /*
8324 * In order to set the node callback bit correctly in zone databases,
8325 * we need to know if the node has the origin name of the zone.
8326 * In loading_addrdataset() we could simply compare the new name
8327 * to the origin name, but this is expensive. Also, we don't know the
8328 * node name in addrdataset(), so we need another way of knowing the
8329 * zone's top.
8330 *
8331 * We now explicitly create a node for the zone's origin, and then
8332 * we simply remember the node's address. This is safe, because
8333 * the top-of-zone node can never be deleted, nor can its address
8334 * change.
8335 */
8336 if (!IS_CACHE(rbtdb)) {
8337 dns_rbtnode_t *nsec3node;
8338
8339 rbtdb->origin_node = NULL;
8340 result = dns_rbt_addnode(rbtdb->tree, &rbtdb->common.origin,
8341 &rbtdb->origin_node);
8342 if (result != ISC_R_SUCCESS) {
8343 INSIST(result != ISC_R_EXISTS);
8344 free_rbtdb(rbtdb, ISC_FALSE, NULL);
8345 return (result);
8346 }
8347 rbtdb->origin_node->nsec = DNS_RBT_NSEC_NORMAL;
8348 /*
8349 * We need to give the origin node the right locknum.
8350 */
8351 dns_name_init(&name, NULL);
8352 dns_rbt_namefromnode(rbtdb->origin_node, &name);
8353 #ifdef DNS_RBT_USEHASH
8354 rbtdb->origin_node->locknum =
8355 rbtdb->origin_node->hashval %
8356 rbtdb->node_lock_count;
8357 #else
8358 rbtdb->origin_node->locknum =
8359 dns_name_hash(&name, ISC_TRUE) %
8360 rbtdb->node_lock_count;
8361 #endif
8362 /*
8363 * Add an apex node to the NSEC3 tree so that NSEC3 searches
8364 * return partial matches when there is only a single NSEC3
8365 * record in the tree.
8366 */
8367 nsec3node = NULL;
8368 result = dns_rbt_addnode(rbtdb->nsec3, &rbtdb->common.origin,
8369 &nsec3node);
8370 if (result != ISC_R_SUCCESS) {
8371 INSIST(result != ISC_R_EXISTS);
8372 free_rbtdb(rbtdb, ISC_FALSE, NULL);
8373 return (result);
8374 }
8375 nsec3node->nsec = DNS_RBT_NSEC_NSEC3;
8376 /*
8377 * We need to give the nsec3 origin node the right locknum.
8378 */
8379 dns_name_init(&name, NULL);
8380 dns_rbt_namefromnode(nsec3node, &name);
8381 #ifdef DNS_RBT_USEHASH
8382 nsec3node->locknum = nsec3node->hashval %
8383 rbtdb->node_lock_count;
8384 #else
8385 nsec3node->locknum = dns_name_hash(&name, ISC_TRUE) %
8386 rbtdb->node_lock_count;
8387 #endif
8388 }
8389
8390 /*
8391 * Misc. Initialization.
8392 */
8393 result = isc_refcount_init(&rbtdb->references, 1);
8394 if (result != ISC_R_SUCCESS) {
8395 free_rbtdb(rbtdb, ISC_FALSE, NULL);
8396 return (result);
8397 }
8398 rbtdb->attributes = 0;
8399 rbtdb->task = NULL;
8400 rbtdb->rpzs = NULL;
8401 rbtdb->load_rpzs = NULL;
8402 rbtdb->rpz_num = DNS_RPZ_INVALID_NUM;
8403
8404 /*
8405 * Version Initialization.
8406 */
8407 rbtdb->current_serial = 1;
8408 rbtdb->least_serial = 1;
8409 rbtdb->next_serial = 2;
8410 rbtdb->current_version = allocate_version(mctx, 1, 1, ISC_FALSE);
8411 if (rbtdb->current_version == NULL) {
8412 isc_refcount_decrement(&rbtdb->references, NULL);
8413 isc_refcount_destroy(&rbtdb->references);
8414 free_rbtdb(rbtdb, ISC_FALSE, NULL);
8415 return (ISC_R_NOMEMORY);
8416 }
8417 rbtdb->current_version->rbtdb = rbtdb;
8418 rbtdb->current_version->secure = dns_db_insecure;
8419 rbtdb->current_version->havensec3 = ISC_FALSE;
8420 rbtdb->current_version->flags = 0;
8421 rbtdb->current_version->iterations = 0;
8422 rbtdb->current_version->hash = 0;
8423 rbtdb->current_version->salt_length = 0;
8424 memset(rbtdb->current_version->salt, 0,
8425 sizeof(rbtdb->current_version->salt));
8426 rbtdb->future_version = NULL;
8427 ISC_LIST_INIT(rbtdb->open_versions);
8428 /*
8429 * Keep the current version in the open list so that list operation
8430 * won't happen in normal lookup operations.
8431 */
8432 PREPEND(rbtdb->open_versions, rbtdb->current_version, link);
8433
8434 rbtdb->common.magic = DNS_DB_MAGIC;
8435 rbtdb->common.impmagic = RBTDB_MAGIC;
8436
8437 *dbp = (dns_db_t *)rbtdb;
8438
8439 return (ISC_R_SUCCESS);
8440
8441 cleanup_deadnodes:
8442 isc_mem_put(mctx, rbtdb->deadnodes,
8443 rbtdb->node_lock_count * sizeof(rbtnodelist_t));
8444
8445 cleanup_heaps:
8446 if (rbtdb->heaps != NULL) {
8447 for (i = 0 ; i < (int)rbtdb->node_lock_count ; i++)
8448 if (rbtdb->heaps[i] != NULL)
8449 isc_heap_destroy(&rbtdb->heaps[i]);
8450 isc_mem_put(hmctx, rbtdb->heaps,
8451 rbtdb->node_lock_count * sizeof(isc_heap_t *));
8452 }
8453
8454 cleanup_rdatasets:
8455 if (rbtdb->rdatasets != NULL)
8456 isc_mem_put(mctx, rbtdb->rdatasets, rbtdb->node_lock_count *
8457 sizeof(rdatasetheaderlist_t));
8458 cleanup_rrsetstats:
8459 if (rbtdb->rrsetstats != NULL)
8460 dns_stats_detach(&rbtdb->rrsetstats);
8461
8462 cleanup_node_locks:
8463 isc_mem_put(mctx, rbtdb->node_locks,
8464 rbtdb->node_lock_count * sizeof(rbtdb_nodelock_t));
8465
8466 cleanup_tree_lock:
8467 isc_rwlock_destroy(&rbtdb->tree_lock);
8468
8469 cleanup_lock:
8470 RBTDB_DESTROYLOCK(&rbtdb->lock);
8471
8472 cleanup_rbtdb:
8473 isc_mem_put(mctx, rbtdb, sizeof(*rbtdb));
8474 return (result);
8475 }
8476
8477
8478 /*
8479 * Slabbed Rdataset Methods
8480 */
8481
8482 static void
rdataset_disassociate(dns_rdataset_t * rdataset)8483 rdataset_disassociate(dns_rdataset_t *rdataset) {
8484 dns_db_t *db = rdataset->private1;
8485 dns_dbnode_t *node = rdataset->private2;
8486
8487 detachnode(db, &node);
8488 }
8489
8490 static isc_result_t
rdataset_first(dns_rdataset_t * rdataset)8491 rdataset_first(dns_rdataset_t *rdataset) {
8492 unsigned char *raw = rdataset->private3; /* RDATASLAB */
8493 unsigned int count;
8494
8495 count = raw[0] * 256 + raw[1];
8496 if (count == 0) {
8497 rdataset->private5 = NULL;
8498 return (ISC_R_NOMORE);
8499 }
8500
8501 #if DNS_RDATASET_FIXED
8502 if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) == 0)
8503 raw += 2 + (4 * count);
8504 else
8505 #endif
8506 raw += 2;
8507
8508 /*
8509 * The privateuint4 field is the number of rdata beyond the
8510 * cursor position, so we decrement the total count by one
8511 * before storing it.
8512 *
8513 * If DNS_RDATASETATTR_LOADORDER is not set 'raw' points to the
8514 * first record. If DNS_RDATASETATTR_LOADORDER is set 'raw' points
8515 * to the first entry in the offset table.
8516 */
8517 count--;
8518 rdataset->privateuint4 = count;
8519 rdataset->private5 = raw;
8520
8521 return (ISC_R_SUCCESS);
8522 }
8523
8524 static isc_result_t
rdataset_next(dns_rdataset_t * rdataset)8525 rdataset_next(dns_rdataset_t *rdataset) {
8526 unsigned int count;
8527 unsigned int length;
8528 unsigned char *raw; /* RDATASLAB */
8529
8530 count = rdataset->privateuint4;
8531 if (count == 0)
8532 return (ISC_R_NOMORE);
8533 count--;
8534 rdataset->privateuint4 = count;
8535
8536 /*
8537 * Skip forward one record (length + 4) or one offset (4).
8538 */
8539 raw = rdataset->private5;
8540 #if DNS_RDATASET_FIXED
8541 if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) == 0) {
8542 #endif
8543 length = raw[0] * 256 + raw[1];
8544 raw += length;
8545 #if DNS_RDATASET_FIXED
8546 }
8547 rdataset->private5 = raw + 4; /* length(2) + order(2) */
8548 #else
8549 rdataset->private5 = raw + 2; /* length(2) */
8550 #endif
8551
8552 return (ISC_R_SUCCESS);
8553 }
8554
8555 static void
rdataset_current(dns_rdataset_t * rdataset,dns_rdata_t * rdata)8556 rdataset_current(dns_rdataset_t *rdataset, dns_rdata_t *rdata) {
8557 unsigned char *raw = rdataset->private5; /* RDATASLAB */
8558 #if DNS_RDATASET_FIXED
8559 unsigned int offset;
8560 #endif
8561 unsigned int length;
8562 isc_region_t r;
8563 unsigned int flags = 0;
8564
8565 REQUIRE(raw != NULL);
8566
8567 /*
8568 * Find the start of the record if not already in private5
8569 * then skip the length and order fields.
8570 */
8571 #if DNS_RDATASET_FIXED
8572 if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) != 0) {
8573 offset = (raw[0] << 24) + (raw[1] << 16) +
8574 (raw[2] << 8) + raw[3];
8575 raw = rdataset->private3;
8576 raw += offset;
8577 }
8578 #endif
8579 length = raw[0] * 256 + raw[1];
8580 #if DNS_RDATASET_FIXED
8581 raw += 4;
8582 #else
8583 raw += 2;
8584 #endif
8585 if (rdataset->type == dns_rdatatype_rrsig) {
8586 if (*raw & DNS_RDATASLAB_OFFLINE)
8587 flags |= DNS_RDATA_OFFLINE;
8588 length--;
8589 raw++;
8590 }
8591 r.length = length;
8592 r.base = raw;
8593 dns_rdata_fromregion(rdata, rdataset->rdclass, rdataset->type, &r);
8594 rdata->flags |= flags;
8595 }
8596
8597 static void
rdataset_clone(dns_rdataset_t * source,dns_rdataset_t * target)8598 rdataset_clone(dns_rdataset_t *source, dns_rdataset_t *target) {
8599 dns_db_t *db = source->private1;
8600 dns_dbnode_t *node = source->private2;
8601 dns_dbnode_t *cloned_node = NULL;
8602
8603 attachnode(db, node, &cloned_node);
8604 INSIST(!ISC_LINK_LINKED(target, link));
8605 *target = *source;
8606 ISC_LINK_INIT(target, link);
8607
8608 /*
8609 * Reset iterator state.
8610 */
8611 target->privateuint4 = 0;
8612 target->private5 = NULL;
8613 }
8614
8615 static unsigned int
rdataset_count(dns_rdataset_t * rdataset)8616 rdataset_count(dns_rdataset_t *rdataset) {
8617 unsigned char *raw = rdataset->private3; /* RDATASLAB */
8618 unsigned int count;
8619
8620 count = raw[0] * 256 + raw[1];
8621
8622 return (count);
8623 }
8624
8625 static isc_result_t
rdataset_getnoqname(dns_rdataset_t * rdataset,dns_name_t * name,dns_rdataset_t * nsec,dns_rdataset_t * nsecsig)8626 rdataset_getnoqname(dns_rdataset_t *rdataset, dns_name_t *name,
8627 dns_rdataset_t *nsec, dns_rdataset_t *nsecsig)
8628 {
8629 dns_db_t *db = rdataset->private1;
8630 dns_dbnode_t *node = rdataset->private2;
8631 dns_dbnode_t *cloned_node;
8632 struct noqname *noqname = rdataset->private6;
8633
8634 cloned_node = NULL;
8635 attachnode(db, node, &cloned_node);
8636 nsec->methods = &rdataset_methods;
8637 nsec->rdclass = db->rdclass;
8638 nsec->type = noqname->type;
8639 nsec->covers = 0;
8640 nsec->ttl = rdataset->ttl;
8641 nsec->trust = rdataset->trust;
8642 nsec->private1 = rdataset->private1;
8643 nsec->private2 = rdataset->private2;
8644 nsec->private3 = noqname->neg;
8645 nsec->privateuint4 = 0;
8646 nsec->private5 = NULL;
8647 nsec->private6 = NULL;
8648 nsec->private7 = NULL;
8649
8650 cloned_node = NULL;
8651 attachnode(db, node, &cloned_node);
8652 nsecsig->methods = &rdataset_methods;
8653 nsecsig->rdclass = db->rdclass;
8654 nsecsig->type = dns_rdatatype_rrsig;
8655 nsecsig->covers = noqname->type;
8656 nsecsig->ttl = rdataset->ttl;
8657 nsecsig->trust = rdataset->trust;
8658 nsecsig->private1 = rdataset->private1;
8659 nsecsig->private2 = rdataset->private2;
8660 nsecsig->private3 = noqname->negsig;
8661 nsecsig->privateuint4 = 0;
8662 nsecsig->private5 = NULL;
8663 nsec->private6 = NULL;
8664 nsec->private7 = NULL;
8665
8666 dns_name_clone(&noqname->name, name);
8667
8668 return (ISC_R_SUCCESS);
8669 }
8670
8671 static isc_result_t
rdataset_getclosest(dns_rdataset_t * rdataset,dns_name_t * name,dns_rdataset_t * nsec,dns_rdataset_t * nsecsig)8672 rdataset_getclosest(dns_rdataset_t *rdataset, dns_name_t *name,
8673 dns_rdataset_t *nsec, dns_rdataset_t *nsecsig)
8674 {
8675 dns_db_t *db = rdataset->private1;
8676 dns_dbnode_t *node = rdataset->private2;
8677 dns_dbnode_t *cloned_node;
8678 struct noqname *closest = rdataset->private7;
8679
8680 cloned_node = NULL;
8681 attachnode(db, node, &cloned_node);
8682 nsec->methods = &rdataset_methods;
8683 nsec->rdclass = db->rdclass;
8684 nsec->type = closest->type;
8685 nsec->covers = 0;
8686 nsec->ttl = rdataset->ttl;
8687 nsec->trust = rdataset->trust;
8688 nsec->private1 = rdataset->private1;
8689 nsec->private2 = rdataset->private2;
8690 nsec->private3 = closest->neg;
8691 nsec->privateuint4 = 0;
8692 nsec->private5 = NULL;
8693 nsec->private6 = NULL;
8694 nsec->private7 = NULL;
8695
8696 cloned_node = NULL;
8697 attachnode(db, node, &cloned_node);
8698 nsecsig->methods = &rdataset_methods;
8699 nsecsig->rdclass = db->rdclass;
8700 nsecsig->type = dns_rdatatype_rrsig;
8701 nsecsig->covers = closest->type;
8702 nsecsig->ttl = rdataset->ttl;
8703 nsecsig->trust = rdataset->trust;
8704 nsecsig->private1 = rdataset->private1;
8705 nsecsig->private2 = rdataset->private2;
8706 nsecsig->private3 = closest->negsig;
8707 nsecsig->privateuint4 = 0;
8708 nsecsig->private5 = NULL;
8709 nsec->private6 = NULL;
8710 nsec->private7 = NULL;
8711
8712 dns_name_clone(&closest->name, name);
8713
8714 return (ISC_R_SUCCESS);
8715 }
8716
8717 static void
rdataset_settrust(dns_rdataset_t * rdataset,dns_trust_t trust)8718 rdataset_settrust(dns_rdataset_t *rdataset, dns_trust_t trust) {
8719 dns_rbtdb_t *rbtdb = rdataset->private1;
8720 dns_rbtnode_t *rbtnode = rdataset->private2;
8721 rdatasetheader_t *header = rdataset->private3;
8722
8723 header--;
8724 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
8725 isc_rwlocktype_write);
8726 header->trust = rdataset->trust = trust;
8727 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
8728 isc_rwlocktype_write);
8729 }
8730
8731 static void
rdataset_expire(dns_rdataset_t * rdataset)8732 rdataset_expire(dns_rdataset_t *rdataset) {
8733 dns_rbtdb_t *rbtdb = rdataset->private1;
8734 dns_rbtnode_t *rbtnode = rdataset->private2;
8735 rdatasetheader_t *header = rdataset->private3;
8736
8737 header--;
8738 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
8739 isc_rwlocktype_write);
8740 expire_header(rbtdb, header, ISC_FALSE, expire_flush);
8741 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
8742 isc_rwlocktype_write);
8743 }
8744
8745 static void
rdataset_clearprefetch(dns_rdataset_t * rdataset)8746 rdataset_clearprefetch(dns_rdataset_t *rdataset) {
8747 dns_rbtdb_t *rbtdb = rdataset->private1;
8748 dns_rbtnode_t *rbtnode = rdataset->private2;
8749 rdatasetheader_t *header = rdataset->private3;
8750
8751 header--;
8752 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
8753 isc_rwlocktype_write);
8754 header->attributes &= ~RDATASET_ATTR_PREFETCH;
8755 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
8756 isc_rwlocktype_write);
8757 }
8758
8759 /*
8760 * Rdataset Iterator Methods
8761 */
8762
8763 static void
rdatasetiter_destroy(dns_rdatasetiter_t ** iteratorp)8764 rdatasetiter_destroy(dns_rdatasetiter_t **iteratorp) {
8765 rbtdb_rdatasetiter_t *rbtiterator;
8766
8767 rbtiterator = (rbtdb_rdatasetiter_t *)(*iteratorp);
8768
8769 if (rbtiterator->common.version != NULL)
8770 closeversion(rbtiterator->common.db,
8771 &rbtiterator->common.version, ISC_FALSE);
8772 detachnode(rbtiterator->common.db, &rbtiterator->common.node);
8773 isc_mem_put(rbtiterator->common.db->mctx, rbtiterator,
8774 sizeof(*rbtiterator));
8775
8776 *iteratorp = NULL;
8777 }
8778
8779 static isc_result_t
rdatasetiter_first(dns_rdatasetiter_t * iterator)8780 rdatasetiter_first(dns_rdatasetiter_t *iterator) {
8781 rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
8782 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
8783 dns_rbtnode_t *rbtnode = rbtiterator->common.node;
8784 rbtdb_version_t *rbtversion = rbtiterator->common.version;
8785 rdatasetheader_t *header, *top_next;
8786 rbtdb_serial_t serial;
8787 isc_stdtime_t now;
8788
8789 if (IS_CACHE(rbtdb)) {
8790 serial = 1;
8791 now = rbtiterator->common.now;
8792 } else {
8793 serial = rbtversion->serial;
8794 now = 0;
8795 }
8796
8797 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
8798 isc_rwlocktype_read);
8799
8800 for (header = rbtnode->data; header != NULL; header = top_next) {
8801 top_next = header->next;
8802 do {
8803 if (header->serial <= serial && !IGNORE(header)) {
8804 /*
8805 * Is this a "this rdataset doesn't exist"
8806 * record? Or is it too old in the cache?
8807 *
8808 * Note: unlike everywhere else, we
8809 * check for now > header->rdh_ttl instead
8810 * of now >= header->rdh_ttl. This allows
8811 * ANY and RRSIG queries for 0 TTL
8812 * rdatasets to work.
8813 */
8814 if (NONEXISTENT(header) ||
8815 (now != 0 && now > header->rdh_ttl))
8816 header = NULL;
8817 break;
8818 } else
8819 header = header->down;
8820 } while (header != NULL);
8821 if (header != NULL)
8822 break;
8823 }
8824
8825 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
8826 isc_rwlocktype_read);
8827
8828 rbtiterator->current = header;
8829
8830 if (header == NULL)
8831 return (ISC_R_NOMORE);
8832
8833 return (ISC_R_SUCCESS);
8834 }
8835
8836 static isc_result_t
rdatasetiter_next(dns_rdatasetiter_t * iterator)8837 rdatasetiter_next(dns_rdatasetiter_t *iterator) {
8838 rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
8839 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
8840 dns_rbtnode_t *rbtnode = rbtiterator->common.node;
8841 rbtdb_version_t *rbtversion = rbtiterator->common.version;
8842 rdatasetheader_t *header, *top_next;
8843 rbtdb_serial_t serial;
8844 isc_stdtime_t now;
8845 rbtdb_rdatatype_t type, negtype;
8846 dns_rdatatype_t rdtype, covers;
8847
8848 header = rbtiterator->current;
8849 if (header == NULL)
8850 return (ISC_R_NOMORE);
8851
8852 if (IS_CACHE(rbtdb)) {
8853 serial = 1;
8854 now = rbtiterator->common.now;
8855 } else {
8856 serial = rbtversion->serial;
8857 now = 0;
8858 }
8859
8860 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
8861 isc_rwlocktype_read);
8862
8863 type = header->type;
8864 rdtype = RBTDB_RDATATYPE_BASE(header->type);
8865 if (NEGATIVE(header)) {
8866 covers = RBTDB_RDATATYPE_EXT(header->type);
8867 negtype = RBTDB_RDATATYPE_VALUE(covers, 0);
8868 } else
8869 negtype = RBTDB_RDATATYPE_VALUE(0, rdtype);
8870 for (header = header->next; header != NULL; header = top_next) {
8871 top_next = header->next;
8872 /*
8873 * If not walking back up the down list.
8874 */
8875 if (header->type != type && header->type != negtype) {
8876 do {
8877 if (header->serial <= serial &&
8878 !IGNORE(header)) {
8879 /*
8880 * Is this a "this rdataset doesn't
8881 * exist" record?
8882 *
8883 * Note: unlike everywhere else, we
8884 * check for now > header->ttl instead
8885 * of now >= header->ttl. This allows
8886 * ANY and RRSIG queries for 0 TTL
8887 * rdatasets to work.
8888 */
8889 if ((header->attributes &
8890 RDATASET_ATTR_NONEXISTENT) != 0 ||
8891 (now != 0 && now > header->rdh_ttl))
8892 header = NULL;
8893 break;
8894 } else
8895 header = header->down;
8896 } while (header != NULL);
8897 if (header != NULL)
8898 break;
8899 }
8900 }
8901
8902 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
8903 isc_rwlocktype_read);
8904
8905 rbtiterator->current = header;
8906
8907 if (header == NULL)
8908 return (ISC_R_NOMORE);
8909
8910 return (ISC_R_SUCCESS);
8911 }
8912
8913 static void
rdatasetiter_current(dns_rdatasetiter_t * iterator,dns_rdataset_t * rdataset)8914 rdatasetiter_current(dns_rdatasetiter_t *iterator, dns_rdataset_t *rdataset) {
8915 rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
8916 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
8917 dns_rbtnode_t *rbtnode = rbtiterator->common.node;
8918 rdatasetheader_t *header;
8919
8920 header = rbtiterator->current;
8921 REQUIRE(header != NULL);
8922
8923 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
8924 isc_rwlocktype_read);
8925
8926 bind_rdataset(rbtdb, rbtnode, header, rbtiterator->common.now,
8927 rdataset);
8928
8929 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
8930 isc_rwlocktype_read);
8931 }
8932
8933
8934 /*
8935 * Database Iterator Methods
8936 */
8937
8938 static inline void
reference_iter_node(rbtdb_dbiterator_t * rbtdbiter)8939 reference_iter_node(rbtdb_dbiterator_t *rbtdbiter) {
8940 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
8941 dns_rbtnode_t *node = rbtdbiter->node;
8942
8943 if (node == NULL)
8944 return;
8945
8946 INSIST(rbtdbiter->tree_locked != isc_rwlocktype_none);
8947 reactivate_node(rbtdb, node, rbtdbiter->tree_locked);
8948 }
8949
8950 static inline void
dereference_iter_node(rbtdb_dbiterator_t * rbtdbiter)8951 dereference_iter_node(rbtdb_dbiterator_t *rbtdbiter) {
8952 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
8953 dns_rbtnode_t *node = rbtdbiter->node;
8954 nodelock_t *lock;
8955
8956 if (node == NULL)
8957 return;
8958
8959 lock = &rbtdb->node_locks[node->locknum].lock;
8960 NODE_LOCK(lock, isc_rwlocktype_read);
8961 decrement_reference(rbtdb, node, 0, isc_rwlocktype_read,
8962 rbtdbiter->tree_locked, ISC_FALSE);
8963 NODE_UNLOCK(lock, isc_rwlocktype_read);
8964
8965 rbtdbiter->node = NULL;
8966 }
8967
8968 static void
flush_deletions(rbtdb_dbiterator_t * rbtdbiter)8969 flush_deletions(rbtdb_dbiterator_t *rbtdbiter) {
8970 dns_rbtnode_t *node;
8971 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
8972 isc_boolean_t was_read_locked = ISC_FALSE;
8973 nodelock_t *lock;
8974 int i;
8975
8976 if (rbtdbiter->delete != 0) {
8977 /*
8978 * Note that "%d node of %d in tree" can report things like
8979 * "flush_deletions: 59 nodes of 41 in tree". This means
8980 * That some nodes appear on the deletions list more than
8981 * once. Only the last occurence will actually be deleted.
8982 */
8983 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
8984 DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
8985 "flush_deletions: %d nodes of %d in tree",
8986 rbtdbiter->delete,
8987 dns_rbt_nodecount(rbtdb->tree));
8988
8989 if (rbtdbiter->tree_locked == isc_rwlocktype_read) {
8990 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
8991 was_read_locked = ISC_TRUE;
8992 }
8993 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
8994 rbtdbiter->tree_locked = isc_rwlocktype_write;
8995
8996 for (i = 0; i < rbtdbiter->delete; i++) {
8997 node = rbtdbiter->deletions[i];
8998 lock = &rbtdb->node_locks[node->locknum].lock;
8999
9000 NODE_LOCK(lock, isc_rwlocktype_read);
9001 decrement_reference(rbtdb, node, 0,
9002 isc_rwlocktype_read,
9003 rbtdbiter->tree_locked, ISC_FALSE);
9004 NODE_UNLOCK(lock, isc_rwlocktype_read);
9005 }
9006
9007 rbtdbiter->delete = 0;
9008
9009 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
9010 if (was_read_locked) {
9011 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
9012 rbtdbiter->tree_locked = isc_rwlocktype_read;
9013
9014 } else {
9015 rbtdbiter->tree_locked = isc_rwlocktype_none;
9016 }
9017 }
9018 }
9019
9020 static inline void
resume_iteration(rbtdb_dbiterator_t * rbtdbiter)9021 resume_iteration(rbtdb_dbiterator_t *rbtdbiter) {
9022 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
9023
9024 REQUIRE(rbtdbiter->paused);
9025 REQUIRE(rbtdbiter->tree_locked == isc_rwlocktype_none);
9026
9027 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
9028 rbtdbiter->tree_locked = isc_rwlocktype_read;
9029
9030 rbtdbiter->paused = ISC_FALSE;
9031 }
9032
9033 static void
dbiterator_destroy(dns_dbiterator_t ** iteratorp)9034 dbiterator_destroy(dns_dbiterator_t **iteratorp) {
9035 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)(*iteratorp);
9036 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
9037 dns_db_t *db = NULL;
9038
9039 if (rbtdbiter->tree_locked == isc_rwlocktype_read) {
9040 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
9041 rbtdbiter->tree_locked = isc_rwlocktype_none;
9042 } else
9043 INSIST(rbtdbiter->tree_locked == isc_rwlocktype_none);
9044
9045 dereference_iter_node(rbtdbiter);
9046
9047 flush_deletions(rbtdbiter);
9048
9049 dns_db_attach(rbtdbiter->common.db, &db);
9050 dns_db_detach(&rbtdbiter->common.db);
9051
9052 dns_rbtnodechain_reset(&rbtdbiter->chain);
9053 dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
9054 isc_mem_put(db->mctx, rbtdbiter, sizeof(*rbtdbiter));
9055 dns_db_detach(&db);
9056
9057 *iteratorp = NULL;
9058 }
9059
9060 static isc_result_t
dbiterator_first(dns_dbiterator_t * iterator)9061 dbiterator_first(dns_dbiterator_t *iterator) {
9062 isc_result_t result;
9063 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
9064 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
9065 dns_name_t *name, *origin;
9066
9067 if (rbtdbiter->result != ISC_R_SUCCESS &&
9068 rbtdbiter->result != ISC_R_NOMORE)
9069 return (rbtdbiter->result);
9070
9071 if (rbtdbiter->paused)
9072 resume_iteration(rbtdbiter);
9073
9074 dereference_iter_node(rbtdbiter);
9075
9076 name = dns_fixedname_name(&rbtdbiter->name);
9077 origin = dns_fixedname_name(&rbtdbiter->origin);
9078 dns_rbtnodechain_reset(&rbtdbiter->chain);
9079 dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
9080
9081 if (rbtdbiter->nsec3only) {
9082 rbtdbiter->current = &rbtdbiter->nsec3chain;
9083 result = dns_rbtnodechain_first(rbtdbiter->current,
9084 rbtdb->nsec3, name, origin);
9085 } else {
9086 rbtdbiter->current = &rbtdbiter->chain;
9087 result = dns_rbtnodechain_first(rbtdbiter->current,
9088 rbtdb->tree, name, origin);
9089 if (!rbtdbiter->nonsec3 && result == ISC_R_NOTFOUND) {
9090 rbtdbiter->current = &rbtdbiter->nsec3chain;
9091 result = dns_rbtnodechain_first(rbtdbiter->current,
9092 rbtdb->nsec3, name,
9093 origin);
9094 }
9095 }
9096 if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
9097 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
9098 NULL, &rbtdbiter->node);
9099 if (result == ISC_R_SUCCESS) {
9100 rbtdbiter->new_origin = ISC_TRUE;
9101 reference_iter_node(rbtdbiter);
9102 }
9103 } else {
9104 INSIST(result == ISC_R_NOTFOUND);
9105 result = ISC_R_NOMORE; /* The tree is empty. */
9106 }
9107
9108 rbtdbiter->result = result;
9109
9110 return (result);
9111 }
9112
9113 static isc_result_t
dbiterator_last(dns_dbiterator_t * iterator)9114 dbiterator_last(dns_dbiterator_t *iterator) {
9115 isc_result_t result;
9116 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
9117 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
9118 dns_name_t *name, *origin;
9119
9120 if (rbtdbiter->result != ISC_R_SUCCESS &&
9121 rbtdbiter->result != ISC_R_NOMORE)
9122 return (rbtdbiter->result);
9123
9124 if (rbtdbiter->paused)
9125 resume_iteration(rbtdbiter);
9126
9127 dereference_iter_node(rbtdbiter);
9128
9129 name = dns_fixedname_name(&rbtdbiter->name);
9130 origin = dns_fixedname_name(&rbtdbiter->origin);
9131 dns_rbtnodechain_reset(&rbtdbiter->chain);
9132 dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
9133
9134 result = ISC_R_NOTFOUND;
9135 if (rbtdbiter->nsec3only && !rbtdbiter->nonsec3) {
9136 rbtdbiter->current = &rbtdbiter->nsec3chain;
9137 result = dns_rbtnodechain_last(rbtdbiter->current,
9138 rbtdb->nsec3, name, origin);
9139 }
9140 if (!rbtdbiter->nsec3only && result == ISC_R_NOTFOUND) {
9141 rbtdbiter->current = &rbtdbiter->chain;
9142 result = dns_rbtnodechain_last(rbtdbiter->current, rbtdb->tree,
9143 name, origin);
9144 }
9145 if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
9146 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
9147 NULL, &rbtdbiter->node);
9148 if (result == ISC_R_SUCCESS) {
9149 rbtdbiter->new_origin = ISC_TRUE;
9150 reference_iter_node(rbtdbiter);
9151 }
9152 } else {
9153 INSIST(result == ISC_R_NOTFOUND);
9154 result = ISC_R_NOMORE; /* The tree is empty. */
9155 }
9156
9157 rbtdbiter->result = result;
9158
9159 return (result);
9160 }
9161
9162 static isc_result_t
dbiterator_seek(dns_dbiterator_t * iterator,dns_name_t * name)9163 dbiterator_seek(dns_dbiterator_t *iterator, dns_name_t *name) {
9164 isc_result_t result, tresult;
9165 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
9166 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
9167 dns_name_t *iname, *origin;
9168
9169 if (rbtdbiter->result != ISC_R_SUCCESS &&
9170 rbtdbiter->result != ISC_R_NOTFOUND &&
9171 rbtdbiter->result != ISC_R_NOMORE)
9172 return (rbtdbiter->result);
9173
9174 if (rbtdbiter->paused)
9175 resume_iteration(rbtdbiter);
9176
9177 dereference_iter_node(rbtdbiter);
9178
9179 iname = dns_fixedname_name(&rbtdbiter->name);
9180 origin = dns_fixedname_name(&rbtdbiter->origin);
9181 dns_rbtnodechain_reset(&rbtdbiter->chain);
9182 dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
9183
9184 if (rbtdbiter->nsec3only) {
9185 rbtdbiter->current = &rbtdbiter->nsec3chain;
9186 result = dns_rbt_findnode(rbtdb->nsec3, name, NULL,
9187 &rbtdbiter->node,
9188 rbtdbiter->current,
9189 DNS_RBTFIND_EMPTYDATA, NULL, NULL);
9190 } else if (rbtdbiter->nonsec3) {
9191 rbtdbiter->current = &rbtdbiter->chain;
9192 result = dns_rbt_findnode(rbtdb->tree, name, NULL,
9193 &rbtdbiter->node,
9194 rbtdbiter->current,
9195 DNS_RBTFIND_EMPTYDATA, NULL, NULL);
9196 } else {
9197 /*
9198 * Stay on main chain if not found on either chain.
9199 */
9200 rbtdbiter->current = &rbtdbiter->chain;
9201 result = dns_rbt_findnode(rbtdb->tree, name, NULL,
9202 &rbtdbiter->node,
9203 rbtdbiter->current,
9204 DNS_RBTFIND_EMPTYDATA, NULL, NULL);
9205 if (result == DNS_R_PARTIALMATCH) {
9206 dns_rbtnode_t *node = NULL;
9207 tresult = dns_rbt_findnode(rbtdb->nsec3, name, NULL,
9208 &node, &rbtdbiter->nsec3chain,
9209 DNS_RBTFIND_EMPTYDATA,
9210 NULL, NULL);
9211 if (tresult == ISC_R_SUCCESS) {
9212 rbtdbiter->node = node;
9213 rbtdbiter->current = &rbtdbiter->nsec3chain;
9214 result = tresult;
9215 }
9216 }
9217 }
9218
9219 #if 1
9220 if (result == ISC_R_SUCCESS) {
9221 result = dns_rbtnodechain_current(rbtdbiter->current, iname,
9222 origin, NULL);
9223 if (result == ISC_R_SUCCESS) {
9224 rbtdbiter->new_origin = ISC_TRUE;
9225 reference_iter_node(rbtdbiter);
9226 }
9227 } else if (result == DNS_R_PARTIALMATCH) {
9228 result = ISC_R_NOTFOUND;
9229 rbtdbiter->node = NULL;
9230 }
9231
9232 rbtdbiter->result = result;
9233 #else
9234 if (result == ISC_R_SUCCESS || result == DNS_R_PARTIALMATCH) {
9235 isc_result_t tresult;
9236 tresult = dns_rbtnodechain_current(rbtdbiter->current, iname,
9237 origin, NULL);
9238 if (tresult == ISC_R_SUCCESS) {
9239 rbtdbiter->new_origin = ISC_TRUE;
9240 reference_iter_node(rbtdbiter);
9241 } else {
9242 result = tresult;
9243 rbtdbiter->node = NULL;
9244 }
9245 } else
9246 rbtdbiter->node = NULL;
9247
9248 rbtdbiter->result = (result == DNS_R_PARTIALMATCH) ?
9249 ISC_R_SUCCESS : result;
9250 #endif
9251
9252 return (result);
9253 }
9254
9255 static isc_result_t
dbiterator_prev(dns_dbiterator_t * iterator)9256 dbiterator_prev(dns_dbiterator_t *iterator) {
9257 isc_result_t result;
9258 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
9259 dns_name_t *name, *origin;
9260 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
9261
9262 REQUIRE(rbtdbiter->node != NULL);
9263
9264 if (rbtdbiter->result != ISC_R_SUCCESS)
9265 return (rbtdbiter->result);
9266
9267 if (rbtdbiter->paused)
9268 resume_iteration(rbtdbiter);
9269
9270 name = dns_fixedname_name(&rbtdbiter->name);
9271 origin = dns_fixedname_name(&rbtdbiter->origin);
9272 result = dns_rbtnodechain_prev(rbtdbiter->current, name, origin);
9273 if (result == ISC_R_NOMORE && !rbtdbiter->nsec3only &&
9274 !rbtdbiter->nonsec3 &&
9275 &rbtdbiter->nsec3chain == rbtdbiter->current) {
9276 rbtdbiter->current = &rbtdbiter->chain;
9277 dns_rbtnodechain_reset(rbtdbiter->current);
9278 result = dns_rbtnodechain_last(rbtdbiter->current, rbtdb->tree,
9279 name, origin);
9280 if (result == ISC_R_NOTFOUND)
9281 result = ISC_R_NOMORE;
9282 }
9283
9284 dereference_iter_node(rbtdbiter);
9285
9286 if (result == DNS_R_NEWORIGIN || result == ISC_R_SUCCESS) {
9287 rbtdbiter->new_origin = ISC_TF(result == DNS_R_NEWORIGIN);
9288 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
9289 NULL, &rbtdbiter->node);
9290 }
9291
9292 if (result == ISC_R_SUCCESS)
9293 reference_iter_node(rbtdbiter);
9294
9295 rbtdbiter->result = result;
9296
9297 return (result);
9298 }
9299
9300 static isc_result_t
dbiterator_next(dns_dbiterator_t * iterator)9301 dbiterator_next(dns_dbiterator_t *iterator) {
9302 isc_result_t result;
9303 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
9304 dns_name_t *name, *origin;
9305 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
9306
9307 REQUIRE(rbtdbiter->node != NULL);
9308
9309 if (rbtdbiter->result != ISC_R_SUCCESS)
9310 return (rbtdbiter->result);
9311
9312 if (rbtdbiter->paused)
9313 resume_iteration(rbtdbiter);
9314
9315 name = dns_fixedname_name(&rbtdbiter->name);
9316 origin = dns_fixedname_name(&rbtdbiter->origin);
9317 result = dns_rbtnodechain_next(rbtdbiter->current, name, origin);
9318 if (result == ISC_R_NOMORE && !rbtdbiter->nsec3only &&
9319 !rbtdbiter->nonsec3 && &rbtdbiter->chain == rbtdbiter->current) {
9320 rbtdbiter->current = &rbtdbiter->nsec3chain;
9321 dns_rbtnodechain_reset(rbtdbiter->current);
9322 result = dns_rbtnodechain_first(rbtdbiter->current,
9323 rbtdb->nsec3, name, origin);
9324 if (result == ISC_R_NOTFOUND)
9325 result = ISC_R_NOMORE;
9326 }
9327
9328 dereference_iter_node(rbtdbiter);
9329
9330 if (result == DNS_R_NEWORIGIN || result == ISC_R_SUCCESS) {
9331 rbtdbiter->new_origin = ISC_TF(result == DNS_R_NEWORIGIN);
9332 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
9333 NULL, &rbtdbiter->node);
9334 }
9335 if (result == ISC_R_SUCCESS)
9336 reference_iter_node(rbtdbiter);
9337
9338 rbtdbiter->result = result;
9339
9340 return (result);
9341 }
9342
9343 static isc_result_t
dbiterator_current(dns_dbiterator_t * iterator,dns_dbnode_t ** nodep,dns_name_t * name)9344 dbiterator_current(dns_dbiterator_t *iterator, dns_dbnode_t **nodep,
9345 dns_name_t *name)
9346 {
9347 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
9348 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
9349 dns_rbtnode_t *node = rbtdbiter->node;
9350 isc_result_t result;
9351 dns_name_t *nodename = dns_fixedname_name(&rbtdbiter->name);
9352 dns_name_t *origin = dns_fixedname_name(&rbtdbiter->origin);
9353
9354 REQUIRE(rbtdbiter->result == ISC_R_SUCCESS);
9355 REQUIRE(rbtdbiter->node != NULL);
9356
9357 if (rbtdbiter->paused)
9358 resume_iteration(rbtdbiter);
9359
9360 if (name != NULL) {
9361 if (rbtdbiter->common.relative_names)
9362 origin = NULL;
9363 result = dns_name_concatenate(nodename, origin, name, NULL);
9364 if (result != ISC_R_SUCCESS)
9365 return (result);
9366 if (rbtdbiter->common.relative_names && rbtdbiter->new_origin)
9367 result = DNS_R_NEWORIGIN;
9368 } else
9369 result = ISC_R_SUCCESS;
9370
9371 NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
9372 new_reference(rbtdb, node);
9373 NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
9374
9375 *nodep = rbtdbiter->node;
9376
9377 if (iterator->cleaning && result == ISC_R_SUCCESS) {
9378 isc_result_t expire_result;
9379
9380 /*
9381 * If the deletion array is full, flush it before trying
9382 * to expire the current node. The current node can't
9383 * fully deleted while the iteration cursor is still on it.
9384 */
9385 if (rbtdbiter->delete == DELETION_BATCH_MAX)
9386 flush_deletions(rbtdbiter);
9387
9388 expire_result = expirenode(iterator->db, *nodep, 0);
9389
9390 /*
9391 * expirenode() currently always returns success.
9392 */
9393 if (expire_result == ISC_R_SUCCESS && node->down == NULL) {
9394 unsigned int refs;
9395
9396 rbtdbiter->deletions[rbtdbiter->delete++] = node;
9397 NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
9398 dns_rbtnode_refincrement(node, &refs);
9399 INSIST(refs != 0);
9400 NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
9401 }
9402 }
9403
9404 return (result);
9405 }
9406
9407 static isc_result_t
dbiterator_pause(dns_dbiterator_t * iterator)9408 dbiterator_pause(dns_dbiterator_t *iterator) {
9409 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
9410 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
9411
9412 if (rbtdbiter->result != ISC_R_SUCCESS &&
9413 rbtdbiter->result != ISC_R_NOMORE)
9414 return (rbtdbiter->result);
9415
9416 if (rbtdbiter->paused)
9417 return (ISC_R_SUCCESS);
9418
9419 rbtdbiter->paused = ISC_TRUE;
9420
9421 if (rbtdbiter->tree_locked != isc_rwlocktype_none) {
9422 INSIST(rbtdbiter->tree_locked == isc_rwlocktype_read);
9423 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
9424 rbtdbiter->tree_locked = isc_rwlocktype_none;
9425 }
9426
9427 flush_deletions(rbtdbiter);
9428
9429 return (ISC_R_SUCCESS);
9430 }
9431
9432 static isc_result_t
dbiterator_origin(dns_dbiterator_t * iterator,dns_name_t * name)9433 dbiterator_origin(dns_dbiterator_t *iterator, dns_name_t *name) {
9434 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
9435 dns_name_t *origin = dns_fixedname_name(&rbtdbiter->origin);
9436
9437 if (rbtdbiter->result != ISC_R_SUCCESS)
9438 return (rbtdbiter->result);
9439
9440 return (dns_name_copy(origin, name, NULL));
9441 }
9442
9443 /*%
9444 * Additional cache routines.
9445 */
9446 static isc_result_t
rdataset_getadditional(dns_rdataset_t * rdataset,dns_rdatasetadditional_t type,dns_rdatatype_t qtype,dns_acache_t * acache,dns_zone_t ** zonep,dns_db_t ** dbp,dns_dbversion_t ** versionp,dns_dbnode_t ** nodep,dns_name_t * fname,dns_message_t * msg,isc_stdtime_t now)9447 rdataset_getadditional(dns_rdataset_t *rdataset, dns_rdatasetadditional_t type,
9448 dns_rdatatype_t qtype, dns_acache_t *acache,
9449 dns_zone_t **zonep, dns_db_t **dbp,
9450 dns_dbversion_t **versionp, dns_dbnode_t **nodep,
9451 dns_name_t *fname, dns_message_t *msg,
9452 isc_stdtime_t now)
9453 {
9454 dns_rbtdb_t *rbtdb = rdataset->private1;
9455 dns_rbtnode_t *rbtnode = rdataset->private2;
9456 unsigned char *raw = rdataset->private3; /* RDATASLAB */
9457 unsigned int current_count = rdataset->privateuint4;
9458 unsigned int count;
9459 rdatasetheader_t *header;
9460 nodelock_t *nodelock;
9461 unsigned int total_count;
9462 acachectl_t *acarray;
9463 dns_acacheentry_t *entry;
9464 isc_result_t result;
9465
9466 UNUSED(qtype); /* we do not use this value at least for now */
9467 UNUSED(acache);
9468
9469 header = (struct rdatasetheader *)(raw - sizeof(*header));
9470
9471 total_count = raw[0] * 256 + raw[1];
9472 INSIST(total_count > current_count);
9473 count = total_count - current_count - 1;
9474
9475 acarray = NULL;
9476
9477 nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
9478 NODE_LOCK(nodelock, isc_rwlocktype_read);
9479
9480 switch (type) {
9481 case dns_rdatasetadditional_fromauth:
9482 acarray = header->additional_auth;
9483 break;
9484 case dns_rdatasetadditional_fromcache:
9485 acarray = NULL;
9486 break;
9487 case dns_rdatasetadditional_fromglue:
9488 acarray = header->additional_glue;
9489 break;
9490 default:
9491 INSIST(0);
9492 }
9493
9494 if (acarray == NULL) {
9495 if (type != dns_rdatasetadditional_fromcache)
9496 dns_acache_countquerymiss(acache);
9497 NODE_UNLOCK(nodelock, isc_rwlocktype_read);
9498 return (ISC_R_NOTFOUND);
9499 }
9500
9501 if (acarray[count].entry == NULL) {
9502 dns_acache_countquerymiss(acache);
9503 NODE_UNLOCK(nodelock, isc_rwlocktype_read);
9504 return (ISC_R_NOTFOUND);
9505 }
9506
9507 entry = NULL;
9508 dns_acache_attachentry(acarray[count].entry, &entry);
9509
9510 NODE_UNLOCK(nodelock, isc_rwlocktype_read);
9511
9512 result = dns_acache_getentry(entry, zonep, dbp, versionp,
9513 nodep, fname, msg, now);
9514
9515 dns_acache_detachentry(&entry);
9516
9517 return (result);
9518 }
9519
9520 static void
acache_callback(dns_acacheentry_t * entry,void ** arg)9521 acache_callback(dns_acacheentry_t *entry, void **arg) {
9522 dns_rbtdb_t *rbtdb;
9523 dns_rbtnode_t *rbtnode;
9524 nodelock_t *nodelock;
9525 acachectl_t *acarray = NULL;
9526 acache_cbarg_t *cbarg;
9527 unsigned int count;
9528
9529 REQUIRE(arg != NULL);
9530 cbarg = *arg;
9531
9532 /*
9533 * The caller must hold the entry lock.
9534 */
9535
9536 rbtdb = (dns_rbtdb_t *)cbarg->db;
9537 rbtnode = (dns_rbtnode_t *)cbarg->node;
9538
9539 nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
9540 NODE_LOCK(nodelock, isc_rwlocktype_write);
9541
9542 switch (cbarg->type) {
9543 case dns_rdatasetadditional_fromauth:
9544 acarray = cbarg->header->additional_auth;
9545 break;
9546 case dns_rdatasetadditional_fromglue:
9547 acarray = cbarg->header->additional_glue;
9548 break;
9549 default:
9550 INSIST(0);
9551 }
9552
9553 count = cbarg->count;
9554 if (acarray != NULL && acarray[count].entry == entry) {
9555 acarray[count].entry = NULL;
9556 INSIST(acarray[count].cbarg == cbarg);
9557 acarray[count].cbarg = NULL;
9558 isc_mem_put(rbtdb->common.mctx, cbarg, sizeof(acache_cbarg_t));
9559 dns_acache_detachentry(&entry);
9560 }
9561
9562 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
9563
9564 dns_db_detachnode((dns_db_t *)rbtdb, (dns_dbnode_t **)(void*)&rbtnode);
9565 dns_db_detach((dns_db_t **)(void*)&rbtdb);
9566
9567 *arg = NULL;
9568 }
9569
9570 static void
acache_cancelentry(isc_mem_t * mctx,dns_acacheentry_t * entry,acache_cbarg_t ** cbargp)9571 acache_cancelentry(isc_mem_t *mctx, dns_acacheentry_t *entry,
9572 acache_cbarg_t **cbargp)
9573 {
9574 acache_cbarg_t *cbarg;
9575
9576 REQUIRE(mctx != NULL);
9577 REQUIRE(entry != NULL);
9578 REQUIRE(cbargp != NULL && *cbargp != NULL);
9579
9580 cbarg = *cbargp;
9581
9582 if (dns_acache_cancelentry(entry)) {
9583 dns_db_detachnode(cbarg->db, &cbarg->node);
9584 dns_db_detach(&cbarg->db);
9585 }
9586
9587 isc_mem_put(mctx, cbarg, sizeof(acache_cbarg_t));
9588
9589 *cbargp = NULL;
9590 }
9591
9592 static isc_result_t
rdataset_setadditional(dns_rdataset_t * rdataset,dns_rdatasetadditional_t type,dns_rdatatype_t qtype,dns_acache_t * acache,dns_zone_t * zone,dns_db_t * db,dns_dbversion_t * version,dns_dbnode_t * node,dns_name_t * fname)9593 rdataset_setadditional(dns_rdataset_t *rdataset, dns_rdatasetadditional_t type,
9594 dns_rdatatype_t qtype, dns_acache_t *acache,
9595 dns_zone_t *zone, dns_db_t *db,
9596 dns_dbversion_t *version, dns_dbnode_t *node,
9597 dns_name_t *fname)
9598 {
9599 dns_rbtdb_t *rbtdb = rdataset->private1;
9600 dns_rbtnode_t *rbtnode = rdataset->private2;
9601 unsigned char *raw = rdataset->private3; /* RDATASLAB */
9602 unsigned int current_count = rdataset->privateuint4;
9603 rdatasetheader_t *header;
9604 unsigned int total_count, count;
9605 nodelock_t *nodelock;
9606 isc_result_t result;
9607 acachectl_t *acarray;
9608 dns_acacheentry_t *newentry, *oldentry = NULL;
9609 acache_cbarg_t *newcbarg, *oldcbarg = NULL;
9610
9611 UNUSED(qtype);
9612
9613 if (type == dns_rdatasetadditional_fromcache)
9614 return (ISC_R_SUCCESS);
9615
9616 header = (struct rdatasetheader *)(raw - sizeof(*header));
9617
9618 total_count = raw[0] * 256 + raw[1];
9619 INSIST(total_count > current_count);
9620 count = total_count - current_count - 1; /* should be private data */
9621
9622 newcbarg = isc_mem_get(rbtdb->common.mctx, sizeof(*newcbarg));
9623 if (newcbarg == NULL)
9624 return (ISC_R_NOMEMORY);
9625 newcbarg->type = type;
9626 newcbarg->count = count;
9627 newcbarg->header = header;
9628 newcbarg->db = NULL;
9629 dns_db_attach((dns_db_t *)rbtdb, &newcbarg->db);
9630 newcbarg->node = NULL;
9631 dns_db_attachnode((dns_db_t *)rbtdb, (dns_dbnode_t *)rbtnode,
9632 &newcbarg->node);
9633 newentry = NULL;
9634 result = dns_acache_createentry(acache, (dns_db_t *)rbtdb,
9635 acache_callback, newcbarg, &newentry);
9636 if (result != ISC_R_SUCCESS)
9637 goto fail;
9638
9639 /* Set cache data in the new entry. */
9640 result = dns_acache_setentry(acache, newentry, zone, db,
9641 version, node, fname);
9642 if (result != ISC_R_SUCCESS)
9643 goto fail;
9644
9645 nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
9646 NODE_LOCK(nodelock, isc_rwlocktype_write);
9647
9648 acarray = NULL;
9649 switch (type) {
9650 case dns_rdatasetadditional_fromauth:
9651 acarray = header->additional_auth;
9652 break;
9653 case dns_rdatasetadditional_fromglue:
9654 acarray = header->additional_glue;
9655 break;
9656 default:
9657 INSIST(0);
9658 }
9659
9660 if (acarray == NULL) {
9661 unsigned int i;
9662
9663 acarray = isc_mem_get(rbtdb->common.mctx, total_count *
9664 sizeof(acachectl_t));
9665
9666 if (acarray == NULL) {
9667 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
9668 goto fail;
9669 }
9670
9671 for (i = 0; i < total_count; i++) {
9672 acarray[i].entry = NULL;
9673 acarray[i].cbarg = NULL;
9674 }
9675 }
9676 switch (type) {
9677 case dns_rdatasetadditional_fromauth:
9678 header->additional_auth = acarray;
9679 break;
9680 case dns_rdatasetadditional_fromglue:
9681 header->additional_glue = acarray;
9682 break;
9683 default:
9684 INSIST(0);
9685 }
9686
9687 if (acarray[count].entry != NULL) {
9688 /*
9689 * Swap the entry. Delay cleaning-up the old entry since
9690 * it would require a node lock.
9691 */
9692 oldentry = acarray[count].entry;
9693 INSIST(acarray[count].cbarg != NULL);
9694 oldcbarg = acarray[count].cbarg;
9695 }
9696 acarray[count].entry = newentry;
9697 acarray[count].cbarg = newcbarg;
9698
9699 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
9700
9701 if (oldentry != NULL) {
9702 acache_cancelentry(rbtdb->common.mctx, oldentry, &oldcbarg);
9703 dns_acache_detachentry(&oldentry);
9704 }
9705
9706 return (ISC_R_SUCCESS);
9707
9708 fail:
9709 if (newcbarg != NULL) {
9710 if (newentry != NULL) {
9711 acache_cancelentry(rbtdb->common.mctx, newentry,
9712 &newcbarg);
9713 dns_acache_detachentry(&newentry);
9714 } else {
9715 dns_db_detachnode((dns_db_t *)rbtdb, &newcbarg->node);
9716 dns_db_detach(&newcbarg->db);
9717 isc_mem_put(rbtdb->common.mctx, newcbarg,
9718 sizeof(*newcbarg));
9719 }
9720 }
9721
9722 return (result);
9723 }
9724
9725 static isc_result_t
rdataset_putadditional(dns_acache_t * acache,dns_rdataset_t * rdataset,dns_rdatasetadditional_t type,dns_rdatatype_t qtype)9726 rdataset_putadditional(dns_acache_t *acache, dns_rdataset_t *rdataset,
9727 dns_rdatasetadditional_t type, dns_rdatatype_t qtype)
9728 {
9729 dns_rbtdb_t *rbtdb = rdataset->private1;
9730 dns_rbtnode_t *rbtnode = rdataset->private2;
9731 unsigned char *raw = rdataset->private3; /* RDATASLAB */
9732 unsigned int current_count = rdataset->privateuint4;
9733 rdatasetheader_t *header;
9734 nodelock_t *nodelock;
9735 unsigned int total_count, count;
9736 acachectl_t *acarray;
9737 dns_acacheentry_t *entry;
9738 acache_cbarg_t *cbarg;
9739
9740 UNUSED(qtype); /* we do not use this value at least for now */
9741 UNUSED(acache);
9742
9743 if (type == dns_rdatasetadditional_fromcache)
9744 return (ISC_R_SUCCESS);
9745
9746 header = (struct rdatasetheader *)(raw - sizeof(*header));
9747
9748 total_count = raw[0] * 256 + raw[1];
9749 INSIST(total_count > current_count);
9750 count = total_count - current_count - 1;
9751
9752 acarray = NULL;
9753 entry = NULL;
9754
9755 nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
9756 NODE_LOCK(nodelock, isc_rwlocktype_write);
9757
9758 switch (type) {
9759 case dns_rdatasetadditional_fromauth:
9760 acarray = header->additional_auth;
9761 break;
9762 case dns_rdatasetadditional_fromglue:
9763 acarray = header->additional_glue;
9764 break;
9765 default:
9766 INSIST(0);
9767 }
9768
9769 if (acarray == NULL) {
9770 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
9771 return (ISC_R_NOTFOUND);
9772 }
9773
9774 entry = acarray[count].entry;
9775 if (entry == NULL) {
9776 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
9777 return (ISC_R_NOTFOUND);
9778 }
9779
9780 acarray[count].entry = NULL;
9781 cbarg = acarray[count].cbarg;
9782 acarray[count].cbarg = NULL;
9783
9784 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
9785
9786 if (entry != NULL) {
9787 if (cbarg != NULL)
9788 acache_cancelentry(rbtdb->common.mctx, entry, &cbarg);
9789 dns_acache_detachentry(&entry);
9790 }
9791
9792 return (ISC_R_SUCCESS);
9793 }
9794
9795 /*%
9796 * Routines for LRU-based cache management.
9797 */
9798
9799 /*%
9800 * See if a given cache entry that is being reused needs to be updated
9801 * in the LRU-list. From the LRU management point of view, this function is
9802 * expected to return true for almost all cases. When used with threads,
9803 * however, this may cause a non-negligible performance penalty because a
9804 * writer lock will have to be acquired before updating the list.
9805 * If DNS_RBTDB_LIMITLRUUPDATE is defined to be non 0 at compilation time, this
9806 * function returns true if the entry has not been updated for some period of
9807 * time. We differentiate the NS or glue address case and the others since
9808 * experiments have shown that the former tends to be accessed relatively
9809 * infrequently and the cost of cache miss is higher (e.g., a missing NS records
9810 * may cause external queries at a higher level zone, involving more
9811 * transactions).
9812 *
9813 * Caller must hold the node (read or write) lock.
9814 */
9815 static inline isc_boolean_t
need_headerupdate(rdatasetheader_t * header,isc_stdtime_t now)9816 need_headerupdate(rdatasetheader_t *header, isc_stdtime_t now) {
9817 if ((header->attributes &
9818 (RDATASET_ATTR_NONEXISTENT|RDATASET_ATTR_STALE)) != 0)
9819 return (ISC_FALSE);
9820
9821 #if DNS_RBTDB_LIMITLRUUPDATE
9822 if (header->type == dns_rdatatype_ns ||
9823 (header->trust == dns_trust_glue &&
9824 (header->type == dns_rdatatype_a ||
9825 header->type == dns_rdatatype_aaaa))) {
9826 /*
9827 * Glue records are updated if at least 60 seconds have passed
9828 * since the previous update time.
9829 */
9830 return (header->last_used + 60 <= now);
9831 }
9832
9833 /* Other records are updated if 5 minutes have passed. */
9834 return (header->last_used + 300 <= now);
9835 #else
9836 UNUSED(now);
9837
9838 return (ISC_TRUE);
9839 #endif
9840 }
9841
9842 /*%
9843 * Update the timestamp of a given cache entry and move it to the head
9844 * of the corresponding LRU list.
9845 *
9846 * Caller must hold the node (write) lock.
9847 *
9848 * Note that the we do NOT touch the heap here, as the TTL has not changed.
9849 */
9850 static void
update_header(dns_rbtdb_t * rbtdb,rdatasetheader_t * header,isc_stdtime_t now)9851 update_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
9852 isc_stdtime_t now)
9853 {
9854 INSIST(IS_CACHE(rbtdb));
9855
9856 /* To be checked: can we really assume this? XXXMLG */
9857 INSIST(ISC_LINK_LINKED(header, link));
9858
9859 ISC_LIST_UNLINK(rbtdb->rdatasets[header->node->locknum], header, link);
9860 header->last_used = now;
9861 ISC_LIST_PREPEND(rbtdb->rdatasets[header->node->locknum], header, link);
9862 }
9863
9864 /*%
9865 * Purge some expired and/or stale (i.e. unused for some period) cache entries
9866 * under an overmem condition. To recover from this condition quickly, up to
9867 * 2 entries will be purged. This process is triggered while adding a new
9868 * entry, and we specifically avoid purging entries in the same LRU bucket as
9869 * the one to which the new entry will belong. Otherwise, we might purge
9870 * entries of the same name of different RR types while adding RRsets from a
9871 * single response (consider the case where we're adding A and AAAA glue records
9872 * of the same NS name).
9873 */
9874 static void
overmem_purge(dns_rbtdb_t * rbtdb,unsigned int locknum_start,isc_stdtime_t now,isc_boolean_t tree_locked)9875 overmem_purge(dns_rbtdb_t *rbtdb, unsigned int locknum_start,
9876 isc_stdtime_t now, isc_boolean_t tree_locked)
9877 {
9878 rdatasetheader_t *header, *header_prev;
9879 unsigned int locknum;
9880 int purgecount = 2;
9881
9882 for (locknum = (locknum_start + 1) % rbtdb->node_lock_count;
9883 locknum != locknum_start && purgecount > 0;
9884 locknum = (locknum + 1) % rbtdb->node_lock_count) {
9885 NODE_LOCK(&rbtdb->node_locks[locknum].lock,
9886 isc_rwlocktype_write);
9887
9888 header = isc_heap_element(rbtdb->heaps[locknum], 1);
9889 if (header && header->rdh_ttl < now - RBTDB_VIRTUAL) {
9890 expire_header(rbtdb, header, tree_locked,
9891 expire_ttl);
9892 purgecount--;
9893 }
9894
9895 for (header = ISC_LIST_TAIL(rbtdb->rdatasets[locknum]);
9896 header != NULL && purgecount > 0;
9897 header = header_prev) {
9898 header_prev = ISC_LIST_PREV(header, link);
9899 /*
9900 * Unlink the entry at this point to avoid checking it
9901 * again even if it's currently used someone else and
9902 * cannot be purged at this moment. This entry won't be
9903 * referenced any more (so unlinking is safe) since the
9904 * TTL was reset to 0.
9905 */
9906 ISC_LIST_UNLINK(rbtdb->rdatasets[locknum], header,
9907 link);
9908 expire_header(rbtdb, header, tree_locked,
9909 expire_lru);
9910 purgecount--;
9911 }
9912
9913 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
9914 isc_rwlocktype_write);
9915 }
9916 }
9917
9918 static void
expire_header(dns_rbtdb_t * rbtdb,rdatasetheader_t * header,isc_boolean_t tree_locked,expire_t reason)9919 expire_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
9920 isc_boolean_t tree_locked, expire_t reason)
9921 {
9922 set_ttl(rbtdb, header, 0);
9923 mark_stale_header(rbtdb, header);
9924
9925 /*
9926 * Caller must hold the node (write) lock.
9927 */
9928
9929 if (dns_rbtnode_refcurrent(header->node) == 0) {
9930 /*
9931 * If no one else is using the node, we can clean it up now.
9932 * We first need to gain a new reference to the node to meet a
9933 * requirement of decrement_reference().
9934 */
9935 new_reference(rbtdb, header->node);
9936 decrement_reference(rbtdb, header->node, 0,
9937 isc_rwlocktype_write,
9938 tree_locked ? isc_rwlocktype_write :
9939 isc_rwlocktype_none, ISC_FALSE);
9940
9941 if (rbtdb->cachestats == NULL)
9942 return;
9943
9944 switch (reason) {
9945 case expire_ttl:
9946 isc_stats_increment(rbtdb->cachestats,
9947 dns_cachestatscounter_deletettl);
9948 break;
9949 case expire_lru:
9950 isc_stats_increment(rbtdb->cachestats,
9951 dns_cachestatscounter_deletelru);
9952 break;
9953 default:
9954 break;
9955 }
9956
9957 }
9958 }
9959