1 /* Copyright © 2012 Brandon L Black <blblack@gmail.com> and Jay Reitz <jreitz@gmail.com>
2  *
3  * This file is part of gdnsd.
4  *
5  * gdnsd is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation, either version 3 of the License, or
8  * (at your option) any later version.
9  *
10  * gdnsd is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with gdnsd.  If not, see <http://www.gnu.org/licenses/>.
17  *
18  */
19 
20 #ifndef GDNSD_DNAME_H
21 #define GDNSD_DNAME_H
22 
23 #include <gdnsd/compiler.h>
24 #include <gdnsd/alloc.h>
25 #include <gdnsd/dmn.h>
26 #include <gdnsd/misc.h>
27 
28 #include <inttypes.h>
29 #include <string.h>
30 #include <stdlib.h>
31 #include <stdbool.h>
32 
33 /*
34  * Notes about Domain Names in general:
35  * All domainnames are composed from labels.
36  *
37  * In the wire format:
38  *  (compression is not considered here)
39  *  The maximum length of a label is 63 bytes.
40  *  Each label is prefixed by a length byte with a value
41  *   of 1-63 inclusive.
42  *  The domainname as a whole is terminated by a NUL byte (which is
43  *   technically a label of length zero).
44  *  The domainname for the root of the DNS is simply a NUL byte
45  *   with no preceding labels.
46  *  The maximum length of a domainname is 255 bytes in wire-encoded
47  *   form, including the final NUL.
48  *
49  * In ASCII format without considering escaping:
50  *  The maximum label length is still 63 bytes
51  *  Labels have no explicit length prefix, and instead are
52  *   separated by a single period.
53  *  A final trailing dot is often used to signify that the
54  *   name is fully qualified (as opposed to a name which is
55  *   intended to have the local domain appended automatically).
56  *  In some cases the final trailing dot is basically ignored
57  *   and names are considered fully-qualified regardless.
58  *  The maximum overall length is ultimately determined by the
59  *   wire format, not this format, but works out to 254 in practice:
60  *   63 . 63 . 63 . 63   -> illegal (257 in wire form)
61  *   63 . 63 . 63 . 62   -> illegal (256 in wire form)
62  *   63 . 63 . 63 . 61   -> 253 (255 in wire form)
63  *   63 . 63 . 63 . 61 . -> 254 (255 in wire form)
64  *    1 .  1 .. 1 .  1   -> 253 (255 in wire form) (127 1-byte labels)
65  *    1 .  1 .. 1 .  1 . -> 254 (255 in wire form) (127 1-byte labels)
66  *
67  * Escaping in the ASCII format:
68  *  RFC1035 specifies an escaping method for zonefiles in general, which
69  *   allows embedding strange characters (including dots) within labels.
70  *  The escapes come in two forms: "\X" and "\DDD".  In the \X case, the
71  *   X character is taken to have no special meaning (e.g. "\." is a dot
72  *   as part of label data, instead of as a label separator).  In the
73  *   \DDD case, DDD are interpreted as a 3-digit base 10 number from the
74  *   range 0-255, and the escape represents a byte of that same value
75  *   (again with no special meaning).
76  *  The maximum width of an escape (representing a single "real" byte)
77  *   is 4 (the "\DDD" form).
78  *  The maximum width of an ASCII label in escaped form is 252 (63 * 4).
79  *  When considering whole domainnames, one would multiply out the label
80  *   bytes, but not the separator dots.  Therefore those names which pack
81  *   the maximal overall length into the minimum set of labels see the
82  *   most escape-expansion.  Reusing the maximal pair above:
83  *   63 . 63 . 63 . 61   -> 253 (255 in wire form)
84  *     ((63 + 63 + 63 + 61) * 4) + 3 = 1003
85  *   63 . 63 . 63 . 61 . -> 254 (255 in wire form)
86  *     ((63 + 63 + 63 + 61) * 4) + 4 = 1004
87  *
88  * Keep in mind these are data lengths only.  If your ASCII strings are
89  *  NUL-terminated, that's an extra byte.
90  *
91  * So to recap:
92  *  ASCII with escapes:
93  *   Max label storage (NUL-terminated): 252 (253)
94  *   Max name storage (NUL-terminated): 1004 (1005)
95  *  ASCII without escapes:
96  *   Max label storage (NUL-terminated): 63 (64)
97  *   Max name storage (NUL-terminated): 254 (255)
98  *  Wire:
99  *   Max label len: 63
100  *   Max overall len: 255
101  *
102  * BUT:
103  *  The fact that an ASCII representation of a full name
104  *   fits within 1004 (1005 for ASCIIZ) bytes does *not*
105  *   imply that it is legal (that it encodes to a wire
106  *   format name of legal length).  This is obvious because
107  *   the 1004-byte ASCII name might contain no escapes.
108  * For that matter, even if escapes are not allowed and the
109  *   individual labels are checked for the 63 limit, fitting
110  *   within 254 (255 for ASCIIZ) doesn't imply legality
111  *   either.  Example:
112  *   63 . 63 . 63 . 62 -> 254 (256, illegal, in wire form)
113  *
114  */
115 
116 /*
117  * Notes on the "dname" format:
118  *
119  * "dname"-formatted data is the common representation of
120  *  a domainname throughout the gdnsd source code.
121  *
122  * The first byte is always overall len of the rest of the string.
123  * The rest of the string is a wire-format domainname without
124  *  compression pointers.
125  * A full dname should be \x00 terminated as it would be in wire form.
126  * Partial dnames (in the process of being constructed, e.g. a dname
127  *  for a relative name that had no trailing dot in ASCII form, before
128  *  the origin is appended) are signalled by replacing the trailing \0
129  *  with a trailing \xff.  These are not valid for passing as "dname"
130  *  data to the core code, but are useful during intermediate stages
131  *  of construction.
132  * The maximum required storage size is 256 bytes for a maximally long
133  *  name.  e.g. ("uint8_t dname[256];" or "uint8_t* dname = xmalloc(256);").
134  * As a general rule, all generic dname storage a plugin creates should
135  *  be allocated for the full 256 bytes if you are going to construct
136  *  them using the helper functions here.  However, keep in mind that
137  *  constant dname arguments passed down from the core code could be
138  *  (and often are) allocated to their exact size. (e.g. 2 bytes for the
139  *  first example below).
140  * Examples:
141  *  "." -> \x01\x00
142  *  "com." -> \x05\x03com\x00
143  *  "a.com." -> \x07\x01a\x03com\x00
144  *  "www" -> \x05\x03www\xff (not yet fully qualified)
145  */
146 
147 // Status of a dname after some of the operations below.
148 //  DNAME_VALID means the operation succeeded and the result is fully qualified (ends in \0).
149 //  DNAME_PARTIAL means the operation succeeded, but the result is not yet fully qualified (ends in \xff).
150 //  DNAME_INVALID means the operation failed (invalid input), output contents are undefined.
151 //  "PARTIAL" might be valid for your code for intermediate purposes, but is not considered
152 //    a valid dname for normal use with the core code.
153 typedef enum {
154     DNAME_VALID = 0,
155     DNAME_PARTIAL,
156     DNAME_INVALID,
157 } gdnsd_dname_status_t;
158 
159 #pragma GCC visibility push(default)
160 
161 // Unescape the string "in" into the storage at "out", using
162 //  DNS zonefile escaping rules.
163 // Return value is output len, which will be <= input len
164 // input len must be >0
165 // Note: you are responsible for allocating output storage of at least "len" at "out".
166 // This function cannot fail given correct inputs.
167 // If it is given invalid/dangling escapes, it will return
168 //   the error indication by returning a zero length
169 // Note the use of "restrict": out and in cannot overlap
170 F_NONNULL
171 unsigned gdnsd_dns_unescape(char* restrict out, const char* restrict in, const unsigned len);
172 
173 // Parse a uint8_t* human-readable string into a dname.  len is the length of the input.
174 // Escapes will be unescaped and any uppercase ASCII characters will be normalized to lowercase.
175 // If there is no trailing dot, the result will be left in the PARTIAL state.
176 // It is assumed you have allocated a full 256 bytes at "dname" for storage of the
177 //  result.  Any less could result in random crashy bugs.
178 // Note the use of "restrict": dname and instr cannot overlap
179 F_NONNULL
180 gdnsd_dname_status_t gdnsd_dname_from_string(uint8_t* restrict dname, const char* restrict instr, const unsigned len);
181 
182 // Turns a dname back into a printable string
183 // "dname" must be DNAME_VALID or DNAME_PARTIAL
184 // "str" must be preallocated to at least 1024 bytes,
185 // retval is the length of the string stored to "str", including the terminal NUL
186 F_NONNULL
187 unsigned gdnsd_dname_to_string(const uint8_t* restrict dname, char* restrict str);
188 
189 // Concatenate one dname onto the end of another.  Invalid inputs are not allowed.
190 //  You are responsible for ensuring this is the case before calling, or crashes
191 //  could ensue.  As before, "dn1" must have a full 256 bytes of storage.
192 // If the concantenation would result in an illegally-long dname, the return
193 //  value will be INVALID.  Otherwise the return value should mirror
194 //  the status of the "origin" argument (VALID vs PARTIAL).
195 // Note the use of "restrict": dn1 and dn2 cannot overlap
196 F_NONNULL
197 gdnsd_dname_status_t gdnsd_dname_cat(uint8_t* restrict dn1, const uint8_t* restrict dn2);
198 
199 // Terminate a DNAME_PARTIAL name, converting it to DNAME_VALID.  Idempotent
200 //  and thus harmless on names which are already DNAME_VALID.  Invalid input
201 //  could cause crashes.
202 F_NONNULL F_UNUSED
gdnsd_dname_terminate(uint8_t * dname)203 static void gdnsd_dname_terminate(uint8_t* dname) {
204     dmn_assert(*dname);
205     dname[*dname] = 0;
206 }
207 
208 // Check the status/validity of a dname.  The dname will be carefully parsed,
209 //  and should handle any random input (although most random inputs will give
210 //  DNAME_INVALID) assuming dname has at least a 256 byte memory allocation.
211 // If dname is allocated to less than 256 bytes, then crashes are possible with
212 //  input that looks like a name longer than the allocation.
213 F_NONNULL F_PURE
214 gdnsd_dname_status_t gdnsd_dname_status(const uint8_t* dname);
215 
216 // gdnsd_dname_hash was a library function, and must remain so for un-rebuilt
217 // 3rd party plugins for now.  To cleanup in next major version bump...
218 F_PURE F_NONNULL
219 unsigned gdnsd_dname_hash(const uint8_t* input);
220 
221 #pragma GCC visibility pop
222 
223 // This static version of the above and #define lets core code and rebuilt
224 // plugins use the static version instead, which can be inlined.
225 F_PURE F_NONNULL F_UNUSED
gdnsd_dname_hash_static(const uint8_t * input)226 static unsigned gdnsd_dname_hash_static(const uint8_t *input) {
227     const uint32_t len = *input++ - 1U;
228     return gdnsd_lookup2(input, len);
229 }
230 #define gdnsd_dname_hash gdnsd_dname_hash_static
231 
232 // Check the status of a known-good dname.  It is assumed that the dname was
233 //  constructed correctly by other code, and merely differentiates quickly
234 //  between the partial and fully-qualfied cases.  If the input is invalid,
235 //  it could crash.
236 F_NONNULL F_UNUSED
gdnsd_dname_is_partial(const uint8_t * dname)237 static bool gdnsd_dname_is_partial(const uint8_t* dname) {
238     dmn_assert(*dname);
239     return dname[*dname] == 255;
240 }
241 
242 // Trim a dname's storage to the minimum required size.  Assumes storage was
243 //  originally allocated with xmalloc() or equivalent.  Note that after trimming
244 //  you cannot perform operations like dname_cat() on this directly.
245 F_WUNUSED F_NONNULL F_UNUSED
gdnsd_dname_trim(uint8_t * dname)246 static uint8_t* gdnsd_dname_trim(uint8_t* dname) {
247     dmn_assert(*dname);
248     return xrealloc(dname, *dname + 1U);
249 }
250 
251 // Copy "source" dname to the storage at dest, which must be allocated
252 //  large enough (256 max).
253 F_NONNULL F_UNUSED
gdnsd_dname_copy(uint8_t * dest,const uint8_t * source)254 static void gdnsd_dname_copy(uint8_t* dest, const uint8_t* source) {
255     const unsigned len = *source;
256     dmn_assert(len); dmn_assert(len < 256U);
257     memcpy(dest, source, len + 1U);
258 }
259 
260 // Allocate new storage (via xmalloc()), clone the input dname into it, and return.
261 // The second argument "exact" determines whether the new copy will be allocated
262 //  to 256 bytes or to the exact amount necessary to hold the data.
263 F_WUNUSED F_NONNULL F_UNUSED
gdnsd_dname_dup(const uint8_t * dname,bool exact)264 static uint8_t* gdnsd_dname_dup(const uint8_t* dname, bool exact) {
265     dmn_assert(*dname);
266     uint8_t* out = xmalloc(exact ? (*dname + 1U) : 256U);
267     gdnsd_dname_copy(out, dname);
268     return out;
269 }
270 
271 // Returns memcmp()-like return values, primary sort is
272 //  on overall length (<0 means dn1 is shorter than dn2).
273 // dn1 and dn2 must be DNAME_VALID or DNAME_PARTIAL
274 // When lengths are equal the return values will make
275 //  for a stable sort assuming no duplicates, but are
276 //  somewhat meaningless.
277 // Equality (0) will only be returned on a complete match,
278 //  including the difference between VALID and PARTIAL.
279 F_NONNULL F_PURE F_UNUSED
gdnsd_dname_cmp(const uint8_t * dn1,const uint8_t * dn2)280 static int gdnsd_dname_cmp(const uint8_t* dn1, const uint8_t* dn2) {
281     dmn_assert(gdnsd_dname_status(dn1) != DNAME_INVALID);
282     dmn_assert(gdnsd_dname_status(dn2) != DNAME_INVALID);
283     const uint8_t len1 = *dn1++;
284     const uint8_t len2 = *dn2++;
285     int rv = len1 - len2;
286     if(!rv)
287         rv = memcmp(dn1, dn2, len1);
288     return rv;
289 }
290 
291 // As above but for labels (no dname_status() assertion)
292 F_NONNULL F_PURE F_UNUSED
gdnsd_label_cmp(const uint8_t * label1,const uint8_t * label2)293 static int gdnsd_label_cmp(const uint8_t* label1, const uint8_t* label2) {
294     const uint8_t len1 = *label1++;
295     const uint8_t len2 = *label2++;
296     int rv = len1 - len2;
297     if(!rv)
298         rv = memcmp(label1, label2, len1);
299     return rv;
300 }
301 
302 // returns true if dname is within zone
303 // returns true if they are identical
304 // dname and zone must be DNAME_VALID (fully-qualified).
305 F_NONNULL F_PURE F_UNUSED
gdnsd_dname_isinzone(const uint8_t * parent,const uint8_t * child)306 static bool gdnsd_dname_isinzone(const uint8_t* parent, const uint8_t* child) {
307     dmn_assert(gdnsd_dname_status(parent) == DNAME_VALID);
308     dmn_assert(gdnsd_dname_status(child) == DNAME_VALID);
309 
310     bool rv = false;
311     const unsigned plen = *parent++;
312     const unsigned clen = *child++;
313     dmn_assert(plen); // implied by DNAME_VALID check above
314     dmn_assert(clen); // implied by DNAME_VALID check above
315 
316     if(plen <= clen) { // if child shorter than parent, cannot be isinzone
317         // child_pstart is the hypothetical location of
318         //   the trailing "parent" in "child" if isinzone
319         const uint8_t* child_pstart = child + (clen - plen);
320         if(!memcmp(child_pstart, parent, plen)) { // basic trailing ~match
321             // There are corner cases that can fool the quick memcmp check
322             //   into a false positive.  Basically, picture www.xfoo.com vs
323             //   foo.com, where 'x' happens to be the integer value 3 (the
324             //   length of "foo") within child's label. This is more
325             //   realistic for long labels where the length byte could be
326             //   in the ASCII range for numerals, so we must iterate
327             //   child's actual labels and make sure that one of them falls
328             //   exactly on child_pstart.
329             while(*child) { // not reached the terminal \0
330                 if(child == child_pstart) { // definite match
331                     rv = true;
332                     break;
333                 }
334                 // jump to next start-of-label
335                 const unsigned llen = *child++;
336                 child += llen;
337             }
338             // the above misses the case of both parent and child being the
339             //   root zone of the DNS, and this catches it.
340             if(plen == 1)
341                 rv = true;
342         }
343     }
344 
345     return rv;
346 }
347 
348 // both arguments must be DNAME_VALID, and dname must be known
349 //   to be a child of (or equal to) parent (e.g. via gdnsd_dname_isinzone()).
350 // chops the zone part off the end of dname, re-rooting it as a valid name.
351 // this is used for the LHS of in-zone records that are fully qualified
352 //   during zonefile scanning, since insertion is rooted at the top of the
353 //   zone.
354 F_NONNULL F_UNUSED
gdnsd_dname_drop_zone(uint8_t * dname,const uint8_t * zroot)355 static void gdnsd_dname_drop_zone(uint8_t* dname, const uint8_t* zroot) {
356     dmn_assert(gdnsd_dname_status(dname) == DNAME_VALID);
357     dmn_assert(gdnsd_dname_status(zroot) == DNAME_VALID);
358     dmn_assert((*dname) >= (*zroot));
359     const unsigned newterm = (*dname) - ((*zroot) - 1U);
360     dmn_assert(dname[newterm] == zroot[1]);
361     dname[0] = newterm;
362     dname[newterm] = 0;
363 }
364 
365 // Returns true if dname is a wildcard name (first label is a lone "*").
366 // Argument must be DNAME_VALID or DNAME_PARTIAL
367 F_NONNULL F_PURE F_UNUSED
gdnsd_dname_iswild(const uint8_t * dname)368 static bool gdnsd_dname_iswild(const uint8_t* dname) {
369     dmn_assert(gdnsd_dname_status(dname) != DNAME_INVALID);
370     if(dname[1] == 1 && dname[2] == '*')
371         return true;
372     return false;
373 }
374 
375 typedef gdnsd_dname_status_t dname_status_t;
376 #define dns_unescape gdnsd_dns_unescape
377 #define dname_from_string gdnsd_dname_from_string
378 #define dname_cat gdnsd_dname_cat
379 #define dname_terminate gdnsd_dname_terminate
380 #define dname_status gdnsd_dname_status
381 #define dname_is_partial gdnsd_dname_is_partial
382 #define dname_trim gdnsd_dname_trim
383 #define dname_copy gdnsd_dname_copy
384 #define dname_dup gdnsd_dname_dup
385 #define dname_cmp gdnsd_dname_cmp
386 #define dname_isinzone gdnsd_dname_isinzone
387 #define dname_iswild gdnsd_dname_iswild
388 #define dname_hash gdnsd_dname_hash
389 
390 #endif // GDNSD_DNAME_H
391