1 /* Copyright © 2012 Brandon L Black <blblack@gmail.com> and Jay Reitz <jreitz@gmail.com>
2  *
3  * This file is part of gdnsd.
4  *
5  * gdnsd is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation, either version 3 of the License, or
8  * (at your option) any later version.
9  *
10  * gdnsd is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with gdnsd.  If not, see <http://www.gnu.org/licenses/>.
17  *
18  */
19 
20 #ifndef GDNSD_LTREE_H
21 #define GDNSD_LTREE_H
22 
23 /*
24 
25   ltree == "Label Tree", a representation of DNS data as a tree by domain
26 labels, e.g:
27                         .
28                        / \
29                      com  net
30                     /   \
31                    /     \
32                example   foo
33                 /   \
34               www   ns1
35 
36   A whole ltree represents a whole zone as a linked tree of per-label nodes
37 starting at the root of the zone (the root zone of DNS in the example above).
38 The actual root node of the tree has no label, as the labels are only useful
39 in seaching the children of a node.  The root node itself is tracked and
40 searched through another data structure, the "zones tree" in ztree.h.
41 
42   Each node in the ltree (ltree_node_t) contains a resizable hash table of
43 child nodes, as well as the data for its own local rrsets and a flags field for
44 identifying important properties like delegation points.
45 
46   The zonefile parser (zscan.rl) constructs the label tree by making
47 ltree_add_rec_* calls into the ltree.c code.  After a zonefile has been
48 parsed and its raw data added to its ltree, the ltree code does multiple
49 phases of post-processing where it walks the entire tree, doing many data
50 validation checks and setting up inter-node references (such as NS->A glue,
51 MX->A additionals, etc).
52 
53   At runtime, the dnspacket.c code searches the ltree database directly, using
54 its own local search function that understands the ltree structure.
55 
56   The child node hash tables within each node are doubled in size every time
57 the load factor reaches 1.0 (and rehashed all over again into the new table).
58 Collisions are handled by linked lists of nodes.  The rrsets of a node are
59 represented by a linked list, and the rdata items within an rrset are
60 represented as a resizeable array of objects.
61 
62   There have been several design iterations, both in checked-in code and
63 private testing.  Past experiments that failed: A flat hash table of all
64 domainnames with lots of string-chopping to find the parents of failed lookups
65 (and some parent/deleg pointers in the nodes).  Various uses of Judy Arrays of
66 various types at various levels of the structure.  Ditto for crit-bit trees
67 (using the agl code which came from the djb code).  Ditto for "khash.h" from
68 AttractiveChaos.  Ditto for many other trie/tree/hash structures, and other
69 variations on the current setup (open addressing with various jump schemes,
70 etc).  I'm actually using a clever open-addressing variant over in ltarena.c
71 for the temporary hashtable that de-duplicates domainname storage within the
72 ltree, but it doesn't really apply here.
73 
74   I really want to find a better, faster, and more space-efficient advanced
75 data structure to use, but I just haven't found anything that beats the current
76 "naive (but highly optimized to the problem space) hashing + linked lists"
77 solution all-around on real-world data.  Recently I dropped a couple of space-
78 efficiency hacks from the current implementation to gain speed (the space waste
79 was quite minimal).  One of the biggest space efficiency gains in the current
80 setup though is the use of the "ltarena" pool allocator, which saves on some
81 of the wasted alignment of strings, and saves all the pointless resize/free-tracking
82 that malloc would normally use (all ltarena objects are static and persistent until
83 daemon exit time).
84 
85 */
86 
87 #include "dnswire.h"
88 
89 // [zl]tree.h have a mutual dependency due to type definitions:
90 struct _ltree_node_struct;
91 typedef struct _ltree_node_struct ltree_node_t;
92 #include "ztree.h"
93 
94 #include <gdnsd/compiler.h>
95 #include <gdnsd/plugapi.h>
96 #include <gdnsd/misc.h>
97 
98 #include <inttypes.h>
99 #include <stdbool.h>
100 
101 struct _ltree_rdata_ns_struct;
102 struct _ltree_rdata_ptr_struct;
103 struct _ltree_rdata_mx_struct;
104 struct _ltree_rdata_srv_struct;
105 struct _ltree_rdata_naptr_struct;
106 struct _ltree_rdata_rfc3597_struct;
107 
108 union  _ltree_rrset_union;
109 struct _ltree_rrset_addr_struct;
110 struct _ltree_rrset_soa_struct;
111 struct _ltree_rrset_cname_struct;
112 struct _ltree_rrset_dync_struct;
113 struct _ltree_rrset_ns_struct;
114 struct _ltree_rrset_ptr_struct;
115 struct _ltree_rrset_mx_struct;
116 struct _ltree_rrset_srv_struct;
117 struct _ltree_rrset_naptr_struct;
118 struct _ltree_rrset_txt_struct;
119 struct _ltree_rrset_rfc3597_struct;
120 
121 typedef struct _ltree_rdata_ns_struct ltree_rdata_ns_t;
122 typedef struct _ltree_rdata_ptr_struct ltree_rdata_ptr_t;
123 typedef struct _ltree_rdata_mx_struct ltree_rdata_mx_t;
124 typedef struct _ltree_rdata_srv_struct ltree_rdata_srv_t;
125 typedef struct _ltree_rdata_naptr_struct ltree_rdata_naptr_t;
126 typedef uint8_t* * ltree_rdata_txt_t;
127 typedef struct _ltree_rdata_rfc3597_struct ltree_rdata_rfc3597_t;
128 
129 typedef union  _ltree_rrset_union ltree_rrset_t;
130 typedef struct _ltree_rrset_gen_struct ltree_rrset_gen_t;
131 typedef struct _ltree_rrset_addr_struct ltree_rrset_addr_t;
132 typedef struct _ltree_rrset_soa_struct ltree_rrset_soa_t;
133 typedef struct _ltree_rrset_cname_struct ltree_rrset_cname_t;
134 typedef struct _ltree_rrset_dync_struct ltree_rrset_dync_t;
135 typedef struct _ltree_rrset_ns_struct ltree_rrset_ns_t;
136 typedef struct _ltree_rrset_ptr_struct ltree_rrset_ptr_t;
137 typedef struct _ltree_rrset_mx_struct ltree_rrset_mx_t;
138 typedef struct _ltree_rrset_srv_struct ltree_rrset_srv_t;
139 typedef struct _ltree_rrset_naptr_struct ltree_rrset_naptr_t;
140 typedef struct _ltree_rrset_txt_struct ltree_rrset_txt_t;
141 typedef struct _ltree_rrset_rfc3597_struct ltree_rrset_rfc3597_t;
142 
143 // Used to set/get the "glue" status of the "ad" pointer
144 //  in ltree_rdata_ns_t, which is stored in the LSB.
145 #define AD_IS_GLUE(x) (!!(((uintptr_t)(x)) & 1UL))
146 #define AD_SET_GLUE(x) ((x) = (void*)(((uintptr_t)(x)) | 1UL))
147 #define AD_GET_PTR(x) ((const ltree_rrset_addr_t*)((uintptr_t)(x) & ~((uintptr_t)1UL)))
148 
149 struct _ltree_rdata_ns_struct {
150     const uint8_t* dname;
151     ltree_rrset_addr_t* ad;
152 };
153 
154 struct _ltree_rdata_ptr_struct {
155     const uint8_t* dname;
156 };
157 
158 struct _ltree_rdata_mx_struct {
159     const uint8_t* dname;
160     ltree_rrset_addr_t* ad;
161     uint16_t pref; // net-order
162 };
163 
164 struct _ltree_rdata_srv_struct {
165     const uint8_t* dname;
166     ltree_rrset_addr_t* ad;
167     uint16_t priority; // net-order
168     uint16_t weight; // net-order
169     uint16_t port; // net-order
170 };
171 
172 #define NAPTR_TEXTS_FLAGS 0
173 #define NAPTR_TEXTS_SERVICES 1
174 #define NAPTR_TEXTS_REGEXP 2
175 struct _ltree_rdata_naptr_struct {
176     const uint8_t* dname;
177     ltree_rrset_addr_t* ad;
178     uint8_t* texts[3]; // flags, services, regexp
179     uint16_t order; // net-order
180     uint16_t pref; // net-order
181 };
182 
183 struct _ltree_rdata_rfc3597_struct {
184     uint8_t* rd;
185     uint16_t rdlen;
186 };
187 
188 // rrset structs
189 
190 struct _ltree_rrset_gen_struct {
191     ltree_rrset_t* next;
192     uint16_t type; // host-order
193     uint16_t count; // host-order
194     uint32_t ttl; // net-order
195 };
196 
197 #if SIZEOF_UINTPTR_T == 8
198 #    define LTREE_V4A_SIZE 4
199 #else
200 #    define LTREE_V4A_SIZE 3
201 #endif
202 
203 // The rules for interpreting the structure:
204 //   if(!count_v6 && gen.count <= LTREE_V4A_SIZE) {
205 //       if(!gen.count)
206 //           use .dyn, this is a DYNA
207 //       else
208 //           use v4a for direct IPv4 address data
209 //   }
210 //   else {
211 //      use addrs.v[46] for address arrays
212 //   }
213 struct _ltree_rrset_addr_struct {
214     ltree_rrset_gen_t gen;
215     uint16_t count_v6;
216     uint16_t limit_v4;
217     uint16_t limit_v6;
218     // 16 "free" bits here
219     union {
220         struct {
221             uint32_t* v4;
222             uint8_t* v6;
223         } addrs;
224         uint32_t v4a[LTREE_V4A_SIZE];
225         struct {
226             gdnsd_resolve_cb_t func;
227             unsigned resource;
228             uint32_t ttl_min; // host-order!
229         } dyn;
230     };
231 };
232 
233 struct _ltree_rrset_soa_struct {
234     ltree_rrset_gen_t gen;
235     const uint8_t* email;
236     const uint8_t* master;
237     uint32_t times[5];
238     uint32_t neg_ttl; // cache of htons(min(ntohs(gen.ttl), ntohs(times[4])))
239 };
240 
241 struct _ltree_rrset_cname_struct {
242     ltree_rrset_gen_t gen;
243     const uint8_t* dname;
244 };
245 
246 struct _ltree_rrset_dync_struct {
247     ltree_rrset_gen_t gen;
248     const uint8_t* origin;
249     gdnsd_resolve_cb_t func;
250     unsigned resource;
251     uint32_t ttl_min; // host-order!
252     uint16_t limit_v4;
253     uint16_t limit_v6;
254 };
255 
256 struct _ltree_rrset_ns_struct {
257     ltree_rrset_gen_t gen;
258     ltree_rdata_ns_t* rdata;
259 };
260 
261 struct _ltree_rrset_ptr_struct {
262     ltree_rrset_gen_t gen;
263     ltree_rdata_ptr_t* rdata;
264 };
265 
266 struct _ltree_rrset_mx_struct {
267     ltree_rrset_gen_t gen;
268     ltree_rdata_mx_t* rdata;
269 };
270 
271 struct _ltree_rrset_srv_struct {
272     ltree_rrset_gen_t gen;
273     ltree_rdata_srv_t* rdata;
274 };
275 
276 struct _ltree_rrset_naptr_struct {
277     ltree_rrset_gen_t gen;
278     ltree_rdata_naptr_t* rdata;
279 };
280 
281 struct _ltree_rrset_txt_struct {
282     ltree_rrset_gen_t gen;
283     ltree_rdata_txt_t* rdata;
284 };
285 
286 struct _ltree_rrset_rfc3597_struct {
287     ltree_rrset_gen_t gen;
288     ltree_rdata_rfc3597_t* rdata;
289 };
290 
291 // This is never allocated, it's just used
292 //  for pointer types to cast between generic
293 //  rrset_t and the specific rrset_t's
294 union _ltree_rrset_union {
295     ltree_rrset_gen_t gen;
296     ltree_rrset_addr_t addr;
297     ltree_rrset_soa_t soa;
298     ltree_rrset_cname_t cname;
299     ltree_rrset_dync_t dync;
300     ltree_rrset_ns_t ns;
301     ltree_rrset_ptr_t ptr;
302     ltree_rrset_mx_t mx;
303     ltree_rrset_srv_t srv;
304     ltree_rrset_naptr_t naptr;
305     ltree_rrset_txt_t txt;
306     ltree_rrset_rfc3597_t rfc3597;
307 };
308 
309 // For ltree_node_t.flags
310 #define LTNFLAG_DELEG 0x1 // This is the exact start of a delegated zone.
311                           // These nodes *must* have an NS rrset (that's how they're
312                           //  detected in the first place), and otherwise can only have
313                           //  addr rrsets, and child nodes which contain only addr rrsets
314                           //  (for NS glue)
315 #define LTNFLAG_GUSED 0x2 // For nodes at or below DELEG points which contain addresses, this
316                           //  is set when the glue is used, and later checked for "glue unused"
317                           //  warnings.  Also re-used in the same manner for out-of-zone glue,
318                           //  which is stored under a special child node of the zone root.
319 
320 struct _ltree_node_struct {
321     uint32_t flags;
322     // During the ltree_add_rec_* (parsing) phase of ltree.c, an accurate count
323     //  is maintained in child_hash_mask, and the effective mask is computed from
324     //  the count (next power of 2, -1).  After all records are added the raw count
325     //  becomes useless, and this value is converted to a directly stored mask for
326     //  use during post-processing and by the runtime code in dnspacket.c
327     uint32_t child_hash_mask;
328     const uint8_t* label;
329     ltree_node_t* next;         // next node in this child_table hash slot
330     ltree_node_t* * child_table; // The table of children.
331     ltree_rrset_t* rrsets;     // The list of rrsets
332 };
333 
334 // ztree/zone code uses these to create and destroy per-zone ltrees:
335 F_NONNULL
336 void ltree_init_zone(zone_t* zone);
337 F_WUNUSED F_NONNULL
338 bool ltree_postproc_zone(zone_t* zone);
339 F_NONNULL
340 void ltree_destroy(ltree_node_t* node);
341 
342 // Adding data to the ltree (called from parser)
343 F_WUNUSED F_NONNULL
344 bool ltree_add_rec_soa(const zone_t* zone, const uint8_t* dname, const uint8_t* master, const uint8_t* email, unsigned ttl, const unsigned serial, const unsigned refresh, const unsigned retry, const unsigned expire, unsigned ncache);
345 F_WUNUSED F_NONNULL
346 bool ltree_add_rec_a(const zone_t* zone, const uint8_t* dname, uint32_t addr, unsigned ttl, const unsigned limit_v4, const bool ooz);
347 F_WUNUSED F_NONNULL
348 bool ltree_add_rec_aaaa(const zone_t* zone, const uint8_t* dname, const uint8_t* addr, unsigned ttl, const unsigned limit_v6, const bool ooz);
349 F_WUNUSED F_NONNULL
350 bool ltree_add_rec_dynaddr(const zone_t* zone, const uint8_t* dname, const char* rhs, unsigned ttl, unsigned ttl_min, const unsigned limit_v4, const unsigned limit_v6, const bool ooz);
351 F_WUNUSED F_NONNULL
352 bool ltree_add_rec_cname(const zone_t* zone, const uint8_t* dname, const uint8_t* rhs, unsigned ttl);
353 F_WUNUSED F_NONNULL
354 bool ltree_add_rec_dync(const zone_t* zone, const uint8_t* dname, const char* rhs, const uint8_t* origin, unsigned ttl, unsigned ttl_min, const unsigned limit_v4, const unsigned limit_v6);
355 F_WUNUSED F_NONNULL
356 bool ltree_add_rec_ptr(const zone_t* zone, const uint8_t* dname, const uint8_t* rhs, unsigned ttl);
357 F_WUNUSED F_NONNULL
358 bool ltree_add_rec_ns(const zone_t* zone, const uint8_t* dname, const uint8_t* rhs, unsigned ttl);
359 F_WUNUSED F_NONNULL
360 bool ltree_add_rec_mx(const zone_t* zone, const uint8_t* dname, const uint8_t* rhs, unsigned ttl, const unsigned pref);
361 F_WUNUSED F_NONNULL
362 bool ltree_add_rec_srv(const zone_t* zone, const uint8_t* dname, const uint8_t* rhs, unsigned ttl, const unsigned priority, const unsigned weight, const unsigned port);
363 F_WUNUSED F_NONNULL
364 bool ltree_add_rec_naptr(const zone_t* zone, const uint8_t* dname, const uint8_t* rhs, unsigned ttl, const unsigned order, const unsigned pref, const unsigned num_texts, uint8_t** texts);
365 F_WUNUSED F_NONNULL
366 bool ltree_add_rec_txt(const zone_t* zone, const uint8_t* dname, const unsigned num_texts, uint8_t** texts, unsigned ttl);
367 F_WUNUSED F_NONNULLX(1)
368 bool ltree_add_rec_rfc3597(const zone_t* zone, const uint8_t* dname, const unsigned rrtype, unsigned ttl, const unsigned rdlen, uint8_t* rd);
369 
370 // Load zonefiles (called from main, invokes parser)
371 void ltree_load_zones(void);
372 
373 typedef enum {
374     DNAME_NOAUTH = 0,
375     DNAME_AUTH = 1,
376     DNAME_DELEG = 2
377 } ltree_dname_status_t;
378 
379 // this hash wrapper is for labels encoded as one length-byte followed
380 //  by N characters.  Thus the label "www" is "\003www" (4 bytes)
381 F_UNUSED F_PURE F_WUNUSED F_NONNULL F_UNUSED
ltree_hash(const uint8_t * input,const uint32_t hash_mask)382 static uint32_t ltree_hash(const uint8_t* input, const uint32_t hash_mask) {
383    const unsigned len = *input++;
384    return gdnsd_lookup2(input, len) & hash_mask;
385 }
386 
387 // "lstack" must be allocated to 127 pointers
388 // "dname" must be valid
389 // retval is label count (not including zero-width root label)
390 F_UNUSED F_WUNUSED F_NONNULL
dname_to_lstack(const uint8_t * dname,const uint8_t ** lstack)391 static unsigned dname_to_lstack(const uint8_t* dname, const uint8_t** lstack) {
392     dmn_assert(dname_status(dname) == DNAME_VALID);
393 
394     dname++; // skip overall len byte
395     unsigned lcount = 0;
396     unsigned llen; // current label len
397     while((llen = *dname)) {
398         dmn_assert(lcount < 127);
399         lstack[lcount++] = dname++;
400         dname += llen;
401     }
402 
403     return lcount;
404 }
405 
406 #endif // GDNSD_LTREE_H
407