1 /* Copyright © 2012 Brandon L Black <blblack@gmail.com> and Jay Reitz <jreitz@gmail.com>
2 *
3 * This file is part of gdnsd.
4 *
5 * gdnsd is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * gdnsd is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with gdnsd. If not, see <http://www.gnu.org/licenses/>.
17 *
18 */
19
20 #ifndef GDNSD_LTREE_H
21 #define GDNSD_LTREE_H
22
23 /*
24
25 ltree == "Label Tree", a representation of DNS data as a tree by domain
26 labels, e.g:
27 .
28 / \
29 com net
30 / \
31 / \
32 example foo
33 / \
34 www ns1
35
36 A whole ltree represents a whole zone as a linked tree of per-label nodes
37 starting at the root of the zone (the root zone of DNS in the example above).
38 The actual root node of the tree has no label, as the labels are only useful
39 in seaching the children of a node. The root node itself is tracked and
40 searched through another data structure, the "zones tree" in ztree.h.
41
42 Each node in the ltree (ltree_node_t) contains a resizable hash table of
43 child nodes, as well as the data for its own local rrsets and a flags field for
44 identifying important properties like delegation points.
45
46 The zonefile parser (zscan.rl) constructs the label tree by making
47 ltree_add_rec_* calls into the ltree.c code. After a zonefile has been
48 parsed and its raw data added to its ltree, the ltree code does multiple
49 phases of post-processing where it walks the entire tree, doing many data
50 validation checks and setting up inter-node references (such as NS->A glue,
51 MX->A additionals, etc).
52
53 At runtime, the dnspacket.c code searches the ltree database directly, using
54 its own local search function that understands the ltree structure.
55
56 The child node hash tables within each node are doubled in size every time
57 the load factor reaches 1.0 (and rehashed all over again into the new table).
58 Collisions are handled by linked lists of nodes. The rrsets of a node are
59 represented by a linked list, and the rdata items within an rrset are
60 represented as a resizeable array of objects.
61
62 There have been several design iterations, both in checked-in code and
63 private testing. Past experiments that failed: A flat hash table of all
64 domainnames with lots of string-chopping to find the parents of failed lookups
65 (and some parent/deleg pointers in the nodes). Various uses of Judy Arrays of
66 various types at various levels of the structure. Ditto for crit-bit trees
67 (using the agl code which came from the djb code). Ditto for "khash.h" from
68 AttractiveChaos. Ditto for many other trie/tree/hash structures, and other
69 variations on the current setup (open addressing with various jump schemes,
70 etc). I'm actually using a clever open-addressing variant over in ltarena.c
71 for the temporary hashtable that de-duplicates domainname storage within the
72 ltree, but it doesn't really apply here.
73
74 I really want to find a better, faster, and more space-efficient advanced
75 data structure to use, but I just haven't found anything that beats the current
76 "naive (but highly optimized to the problem space) hashing + linked lists"
77 solution all-around on real-world data. Recently I dropped a couple of space-
78 efficiency hacks from the current implementation to gain speed (the space waste
79 was quite minimal). One of the biggest space efficiency gains in the current
80 setup though is the use of the "ltarena" pool allocator, which saves on some
81 of the wasted alignment of strings, and saves all the pointless resize/free-tracking
82 that malloc would normally use (all ltarena objects are static and persistent until
83 daemon exit time).
84
85 */
86
87 #include "dnswire.h"
88
89 // [zl]tree.h have a mutual dependency due to type definitions:
90 struct _ltree_node_struct;
91 typedef struct _ltree_node_struct ltree_node_t;
92 #include "ztree.h"
93
94 #include <gdnsd/compiler.h>
95 #include <gdnsd/plugapi.h>
96 #include <gdnsd/misc.h>
97
98 #include <inttypes.h>
99 #include <stdbool.h>
100
101 struct _ltree_rdata_ns_struct;
102 struct _ltree_rdata_ptr_struct;
103 struct _ltree_rdata_mx_struct;
104 struct _ltree_rdata_srv_struct;
105 struct _ltree_rdata_naptr_struct;
106 struct _ltree_rdata_rfc3597_struct;
107
108 union _ltree_rrset_union;
109 struct _ltree_rrset_addr_struct;
110 struct _ltree_rrset_soa_struct;
111 struct _ltree_rrset_cname_struct;
112 struct _ltree_rrset_dync_struct;
113 struct _ltree_rrset_ns_struct;
114 struct _ltree_rrset_ptr_struct;
115 struct _ltree_rrset_mx_struct;
116 struct _ltree_rrset_srv_struct;
117 struct _ltree_rrset_naptr_struct;
118 struct _ltree_rrset_txt_struct;
119 struct _ltree_rrset_rfc3597_struct;
120
121 typedef struct _ltree_rdata_ns_struct ltree_rdata_ns_t;
122 typedef struct _ltree_rdata_ptr_struct ltree_rdata_ptr_t;
123 typedef struct _ltree_rdata_mx_struct ltree_rdata_mx_t;
124 typedef struct _ltree_rdata_srv_struct ltree_rdata_srv_t;
125 typedef struct _ltree_rdata_naptr_struct ltree_rdata_naptr_t;
126 typedef uint8_t* * ltree_rdata_txt_t;
127 typedef struct _ltree_rdata_rfc3597_struct ltree_rdata_rfc3597_t;
128
129 typedef union _ltree_rrset_union ltree_rrset_t;
130 typedef struct _ltree_rrset_gen_struct ltree_rrset_gen_t;
131 typedef struct _ltree_rrset_addr_struct ltree_rrset_addr_t;
132 typedef struct _ltree_rrset_soa_struct ltree_rrset_soa_t;
133 typedef struct _ltree_rrset_cname_struct ltree_rrset_cname_t;
134 typedef struct _ltree_rrset_dync_struct ltree_rrset_dync_t;
135 typedef struct _ltree_rrset_ns_struct ltree_rrset_ns_t;
136 typedef struct _ltree_rrset_ptr_struct ltree_rrset_ptr_t;
137 typedef struct _ltree_rrset_mx_struct ltree_rrset_mx_t;
138 typedef struct _ltree_rrset_srv_struct ltree_rrset_srv_t;
139 typedef struct _ltree_rrset_naptr_struct ltree_rrset_naptr_t;
140 typedef struct _ltree_rrset_txt_struct ltree_rrset_txt_t;
141 typedef struct _ltree_rrset_rfc3597_struct ltree_rrset_rfc3597_t;
142
143 // Used to set/get the "glue" status of the "ad" pointer
144 // in ltree_rdata_ns_t, which is stored in the LSB.
145 #define AD_IS_GLUE(x) (!!(((uintptr_t)(x)) & 1UL))
146 #define AD_SET_GLUE(x) ((x) = (void*)(((uintptr_t)(x)) | 1UL))
147 #define AD_GET_PTR(x) ((const ltree_rrset_addr_t*)((uintptr_t)(x) & ~((uintptr_t)1UL)))
148
149 struct _ltree_rdata_ns_struct {
150 const uint8_t* dname;
151 ltree_rrset_addr_t* ad;
152 };
153
154 struct _ltree_rdata_ptr_struct {
155 const uint8_t* dname;
156 };
157
158 struct _ltree_rdata_mx_struct {
159 const uint8_t* dname;
160 ltree_rrset_addr_t* ad;
161 uint16_t pref; // net-order
162 };
163
164 struct _ltree_rdata_srv_struct {
165 const uint8_t* dname;
166 ltree_rrset_addr_t* ad;
167 uint16_t priority; // net-order
168 uint16_t weight; // net-order
169 uint16_t port; // net-order
170 };
171
172 #define NAPTR_TEXTS_FLAGS 0
173 #define NAPTR_TEXTS_SERVICES 1
174 #define NAPTR_TEXTS_REGEXP 2
175 struct _ltree_rdata_naptr_struct {
176 const uint8_t* dname;
177 ltree_rrset_addr_t* ad;
178 uint8_t* texts[3]; // flags, services, regexp
179 uint16_t order; // net-order
180 uint16_t pref; // net-order
181 };
182
183 struct _ltree_rdata_rfc3597_struct {
184 uint8_t* rd;
185 uint16_t rdlen;
186 };
187
188 // rrset structs
189
190 struct _ltree_rrset_gen_struct {
191 ltree_rrset_t* next;
192 uint16_t type; // host-order
193 uint16_t count; // host-order
194 uint32_t ttl; // net-order
195 };
196
197 #if SIZEOF_UINTPTR_T == 8
198 # define LTREE_V4A_SIZE 4
199 #else
200 # define LTREE_V4A_SIZE 3
201 #endif
202
203 // The rules for interpreting the structure:
204 // if(!count_v6 && gen.count <= LTREE_V4A_SIZE) {
205 // if(!gen.count)
206 // use .dyn, this is a DYNA
207 // else
208 // use v4a for direct IPv4 address data
209 // }
210 // else {
211 // use addrs.v[46] for address arrays
212 // }
213 struct _ltree_rrset_addr_struct {
214 ltree_rrset_gen_t gen;
215 uint16_t count_v6;
216 uint16_t limit_v4;
217 uint16_t limit_v6;
218 // 16 "free" bits here
219 union {
220 struct {
221 uint32_t* v4;
222 uint8_t* v6;
223 } addrs;
224 uint32_t v4a[LTREE_V4A_SIZE];
225 struct {
226 gdnsd_resolve_cb_t func;
227 unsigned resource;
228 uint32_t ttl_min; // host-order!
229 } dyn;
230 };
231 };
232
233 struct _ltree_rrset_soa_struct {
234 ltree_rrset_gen_t gen;
235 const uint8_t* email;
236 const uint8_t* master;
237 uint32_t times[5];
238 uint32_t neg_ttl; // cache of htons(min(ntohs(gen.ttl), ntohs(times[4])))
239 };
240
241 struct _ltree_rrset_cname_struct {
242 ltree_rrset_gen_t gen;
243 const uint8_t* dname;
244 };
245
246 struct _ltree_rrset_dync_struct {
247 ltree_rrset_gen_t gen;
248 const uint8_t* origin;
249 gdnsd_resolve_cb_t func;
250 unsigned resource;
251 uint32_t ttl_min; // host-order!
252 uint16_t limit_v4;
253 uint16_t limit_v6;
254 };
255
256 struct _ltree_rrset_ns_struct {
257 ltree_rrset_gen_t gen;
258 ltree_rdata_ns_t* rdata;
259 };
260
261 struct _ltree_rrset_ptr_struct {
262 ltree_rrset_gen_t gen;
263 ltree_rdata_ptr_t* rdata;
264 };
265
266 struct _ltree_rrset_mx_struct {
267 ltree_rrset_gen_t gen;
268 ltree_rdata_mx_t* rdata;
269 };
270
271 struct _ltree_rrset_srv_struct {
272 ltree_rrset_gen_t gen;
273 ltree_rdata_srv_t* rdata;
274 };
275
276 struct _ltree_rrset_naptr_struct {
277 ltree_rrset_gen_t gen;
278 ltree_rdata_naptr_t* rdata;
279 };
280
281 struct _ltree_rrset_txt_struct {
282 ltree_rrset_gen_t gen;
283 ltree_rdata_txt_t* rdata;
284 };
285
286 struct _ltree_rrset_rfc3597_struct {
287 ltree_rrset_gen_t gen;
288 ltree_rdata_rfc3597_t* rdata;
289 };
290
291 // This is never allocated, it's just used
292 // for pointer types to cast between generic
293 // rrset_t and the specific rrset_t's
294 union _ltree_rrset_union {
295 ltree_rrset_gen_t gen;
296 ltree_rrset_addr_t addr;
297 ltree_rrset_soa_t soa;
298 ltree_rrset_cname_t cname;
299 ltree_rrset_dync_t dync;
300 ltree_rrset_ns_t ns;
301 ltree_rrset_ptr_t ptr;
302 ltree_rrset_mx_t mx;
303 ltree_rrset_srv_t srv;
304 ltree_rrset_naptr_t naptr;
305 ltree_rrset_txt_t txt;
306 ltree_rrset_rfc3597_t rfc3597;
307 };
308
309 // For ltree_node_t.flags
310 #define LTNFLAG_DELEG 0x1 // This is the exact start of a delegated zone.
311 // These nodes *must* have an NS rrset (that's how they're
312 // detected in the first place), and otherwise can only have
313 // addr rrsets, and child nodes which contain only addr rrsets
314 // (for NS glue)
315 #define LTNFLAG_GUSED 0x2 // For nodes at or below DELEG points which contain addresses, this
316 // is set when the glue is used, and later checked for "glue unused"
317 // warnings. Also re-used in the same manner for out-of-zone glue,
318 // which is stored under a special child node of the zone root.
319
320 struct _ltree_node_struct {
321 uint32_t flags;
322 // During the ltree_add_rec_* (parsing) phase of ltree.c, an accurate count
323 // is maintained in child_hash_mask, and the effective mask is computed from
324 // the count (next power of 2, -1). After all records are added the raw count
325 // becomes useless, and this value is converted to a directly stored mask for
326 // use during post-processing and by the runtime code in dnspacket.c
327 uint32_t child_hash_mask;
328 const uint8_t* label;
329 ltree_node_t* next; // next node in this child_table hash slot
330 ltree_node_t* * child_table; // The table of children.
331 ltree_rrset_t* rrsets; // The list of rrsets
332 };
333
334 // ztree/zone code uses these to create and destroy per-zone ltrees:
335 F_NONNULL
336 void ltree_init_zone(zone_t* zone);
337 F_WUNUSED F_NONNULL
338 bool ltree_postproc_zone(zone_t* zone);
339 F_NONNULL
340 void ltree_destroy(ltree_node_t* node);
341
342 // Adding data to the ltree (called from parser)
343 F_WUNUSED F_NONNULL
344 bool ltree_add_rec_soa(const zone_t* zone, const uint8_t* dname, const uint8_t* master, const uint8_t* email, unsigned ttl, const unsigned serial, const unsigned refresh, const unsigned retry, const unsigned expire, unsigned ncache);
345 F_WUNUSED F_NONNULL
346 bool ltree_add_rec_a(const zone_t* zone, const uint8_t* dname, uint32_t addr, unsigned ttl, const unsigned limit_v4, const bool ooz);
347 F_WUNUSED F_NONNULL
348 bool ltree_add_rec_aaaa(const zone_t* zone, const uint8_t* dname, const uint8_t* addr, unsigned ttl, const unsigned limit_v6, const bool ooz);
349 F_WUNUSED F_NONNULL
350 bool ltree_add_rec_dynaddr(const zone_t* zone, const uint8_t* dname, const char* rhs, unsigned ttl, unsigned ttl_min, const unsigned limit_v4, const unsigned limit_v6, const bool ooz);
351 F_WUNUSED F_NONNULL
352 bool ltree_add_rec_cname(const zone_t* zone, const uint8_t* dname, const uint8_t* rhs, unsigned ttl);
353 F_WUNUSED F_NONNULL
354 bool ltree_add_rec_dync(const zone_t* zone, const uint8_t* dname, const char* rhs, const uint8_t* origin, unsigned ttl, unsigned ttl_min, const unsigned limit_v4, const unsigned limit_v6);
355 F_WUNUSED F_NONNULL
356 bool ltree_add_rec_ptr(const zone_t* zone, const uint8_t* dname, const uint8_t* rhs, unsigned ttl);
357 F_WUNUSED F_NONNULL
358 bool ltree_add_rec_ns(const zone_t* zone, const uint8_t* dname, const uint8_t* rhs, unsigned ttl);
359 F_WUNUSED F_NONNULL
360 bool ltree_add_rec_mx(const zone_t* zone, const uint8_t* dname, const uint8_t* rhs, unsigned ttl, const unsigned pref);
361 F_WUNUSED F_NONNULL
362 bool ltree_add_rec_srv(const zone_t* zone, const uint8_t* dname, const uint8_t* rhs, unsigned ttl, const unsigned priority, const unsigned weight, const unsigned port);
363 F_WUNUSED F_NONNULL
364 bool ltree_add_rec_naptr(const zone_t* zone, const uint8_t* dname, const uint8_t* rhs, unsigned ttl, const unsigned order, const unsigned pref, const unsigned num_texts, uint8_t** texts);
365 F_WUNUSED F_NONNULL
366 bool ltree_add_rec_txt(const zone_t* zone, const uint8_t* dname, const unsigned num_texts, uint8_t** texts, unsigned ttl);
367 F_WUNUSED F_NONNULLX(1)
368 bool ltree_add_rec_rfc3597(const zone_t* zone, const uint8_t* dname, const unsigned rrtype, unsigned ttl, const unsigned rdlen, uint8_t* rd);
369
370 // Load zonefiles (called from main, invokes parser)
371 void ltree_load_zones(void);
372
373 typedef enum {
374 DNAME_NOAUTH = 0,
375 DNAME_AUTH = 1,
376 DNAME_DELEG = 2
377 } ltree_dname_status_t;
378
379 // this hash wrapper is for labels encoded as one length-byte followed
380 // by N characters. Thus the label "www" is "\003www" (4 bytes)
381 F_UNUSED F_PURE F_WUNUSED F_NONNULL F_UNUSED
ltree_hash(const uint8_t * input,const uint32_t hash_mask)382 static uint32_t ltree_hash(const uint8_t* input, const uint32_t hash_mask) {
383 const unsigned len = *input++;
384 return gdnsd_lookup2(input, len) & hash_mask;
385 }
386
387 // "lstack" must be allocated to 127 pointers
388 // "dname" must be valid
389 // retval is label count (not including zero-width root label)
390 F_UNUSED F_WUNUSED F_NONNULL
dname_to_lstack(const uint8_t * dname,const uint8_t ** lstack)391 static unsigned dname_to_lstack(const uint8_t* dname, const uint8_t** lstack) {
392 dmn_assert(dname_status(dname) == DNAME_VALID);
393
394 dname++; // skip overall len byte
395 unsigned lcount = 0;
396 unsigned llen; // current label len
397 while((llen = *dname)) {
398 dmn_assert(lcount < 127);
399 lstack[lcount++] = dname++;
400 dname += llen;
401 }
402
403 return lcount;
404 }
405
406 #endif // GDNSD_LTREE_H
407