1 /* Copyright © 2012 Brandon L Black <blblack@gmail.com>
2  *
3  * This file is part of gdnsd.
4  *
5  * gdnsd is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation, either version 3 of the License, or
8  * (at your option) any later version.
9  *
10  * gdnsd is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with gdnsd.  If not, see <http://www.gnu.org/licenses/>.
17  *
18  */
19 
20 #include <config.h>
21 #include "zsrc_rfc1035.h"
22 
23 #include "zscan_rfc1035.h"
24 #include "conf.h"
25 #include "ztree.h"
26 #include "main.h"
27 
28 #include <gdnsd/alloc.h>
29 #include <gdnsd/misc.h>
30 #include <gdnsd/log.h>
31 #include <gdnsd/paths.h>
32 
33 #include <sys/types.h>
34 #include <sys/stat.h>
35 #include <fcntl.h>
36 #include <unistd.h>
37 #include <dirent.h>
38 #include <stdlib.h>
39 #include <time.h>
40 
41 // IFF gcfg->zones_strict_startup is true, this flag will be temporarily set
42 //   to true during the initial scan, then set back to false, making zonefile
43 //   parsing errors fatal for the initial scan.
44 static bool fail_fatally = false;
45 
46 // This is initially 0.0 for the first scan, but is later configured from
47 //   zones_rfc1035_quiesce for runtime operations.
48 static double full_quiesce = 0.0;
49 
50 #ifdef USE_INOTIFY
51 
52 #include <sys/inotify.h>
53 
54 // this doesn't appear in glibc headers until 2.13
55 #ifndef IN_EXCL_UNLINK
56 #define IN_EXCL_UNLINK 0x04000000
57 #endif
58 
59 // size of our read(2) buffer for the inotify fd.
60 // must be able to handle sizeof(struct inotify_event)
61 //  + the max len of a filename in the zones directory
62 // read(2) will return EINVAL if this ends up being too small...
63 static const unsigned inotify_bufsize = 4096;
64 
65 // The inotify mask for the zones dir watcher
66 #define INL_MASK ( IN_ONLYDIR | IN_EXCL_UNLINK \
67      | IN_MOVED_TO | IN_MOVED_FROM | IN_CREATE | IN_DELETE \
68      | IN_MODIFY | IN_CLOSE_WRITE | IN_MOVE_SELF | IN_DELETE_SELF )
69 
70 // runtime inotify bits
71 typedef struct {
72     int main_fd;
73     int watch_desc;
74     ev_io* io_watcher;
75     ev_timer* fallback_watcher;
76 } inot_data;
77 static inot_data inot;
78 
79 #endif
80 
81 static char* rfc1035_dir = NULL;
82 
83 // POSIX states that inode+dev uniquely identifies a file on
84 //   a given system.  Therefore those + mtime should uniquely
85 //   identify a set of file contents for a given pathname over
86 //   time through ops like create/destroy/modify/rename/remount/etc...
87 // The special value of all members being zero indicates a
88 //   non-existent (e.g. deleted) file.  The same value is used
89 //   to indicate an invalid zonefile (e.g. the pathname is
90 //   a subdirectory, a socket, a softlink, etc...)
91 // The file size is added to the set as well for additional insurance
92 //   against the possibility of missed updates due to timestamp
93 //   accuracy issues.
94 typedef struct {
95     uint64_t m; // see ztree.h
96     ino_t i;    // st.st_inode
97     dev_t d;    // st.st_dev
98     off_t s;    // st.st_size
99 } statcmp_t;
100 
statcmp_eq(statcmp_t * a,statcmp_t * b)101 static bool statcmp_eq(statcmp_t* a, statcmp_t* b) {
102     return !(
103         (a->m ^ b->m)
104         | (uint64_t)(a->i ^ b->i)
105         | (uint64_t)(a->d ^ b->d)
106         | (uint64_t)(a->s ^ b->s)
107     );
108 }
109 
110 // check for 0/0/0/0, indicating deleted or invalid (e.g. socket)
statcmp_nx(statcmp_t * a)111 static bool statcmp_nx(statcmp_t* a) {
112     return !(a->m | (uint64_t)a->i | (uint64_t)a->d | (uint64_t)a->s);
113 }
114 
115 // represents a zone file
116 // on initial load, pending_event is NULL, and thus "pending" is irrelevant.
117 // when change detection sees a statcmp diff between "loaded" and the
118 //   filesystem, it's going to set pending_event and save the fs info
119 //   to "pending" and start a quiescence timer
120 // when "pending" and the raw FS have stabilized, then the zone is actually
121 //   reloaded and "loaded" is set to "pending" values and the update_pending
122 //   flag is cleared.
123 typedef struct {
124     unsigned hash;       // hash of "fn"
125     unsigned generation; // generation counter for deletion checks
126     char* full_fn;       // "etc/zones/example.com"
127     const char* fn;      // ptr to "example.com" in above storage
128     zone_t* zone;        // zone data
129     ev_timer* pending_event; // pending quiescence timer, NULL if no pending change
130     statcmp_t pending;   // lstat() info on pending update
131     statcmp_t loaded;    // lstat() info on loaded data
132 } zfile_t;
133 
134 // hash of all extant zonefiles
135 static zfile_t** zfhash = NULL;
136 static unsigned zfhash_count = 0;
137 static unsigned zfhash_alloc = 0;
138 static unsigned generation = 0; // deletion checks use this...
139 
140 // ZFILE_DELETED is a deleted hash entry that can be reclaimed
141 static void* const ZFILE_DELETED = (void*)(uintptr_t)0x1;
142 // SLOT_REAL means not NULL and also not a reclaimable deleted entry
143 #define SLOT_REAL(x) ((uintptr_t)x & ~((uintptr_t)0x1))
144 
145 F_NONNULL
zf_delete(zfile_t * zf)146 static void zf_delete(zfile_t* zf) {
147     if(zf->zone)
148         zone_delete(zf->zone);
149     if(zf->full_fn)
150         free(zf->full_fn);
151     if(zf->pending_event)
152         free(zf->pending_event);
153     free(zf);
154 }
155 
156 F_NONNULL
statcmp_set(const char * full_fn,statcmp_t * out)157 static void statcmp_set(const char* full_fn, statcmp_t* out) {
158     struct stat st;
159     int lstat_rv = lstat(full_fn, &st);
160     if(likely(!lstat_rv && S_ISREG(st.st_mode))) {
161         out->m = get_extended_mtime(&st);
162         out->i = st.st_ino;
163         out->d = st.st_dev;
164         out->s = st.st_size;
165     }
166     else {
167         out->m = 0;
168         out->i = 0;
169         out->d = 0;
170         out->s = 0;
171     }
172 }
173 
174 // grow hash by doubling, while also
175 //   clearing out deletion placeholders
176 F_NONNULL
zfhash_grow(void)177 static void zfhash_grow(void) {
178     if(unlikely(!zfhash_alloc)) {
179         // initial call on empty hash
180         dmn_assert(!zfhash);
181         dmn_assert(!zfhash_count);
182         zfhash_alloc = 16;
183         zfhash = xcalloc(16, sizeof(*zfhash));
184         return;
185     }
186 
187     const unsigned new_alloc = zfhash_alloc << 1; // double
188     const unsigned new_hash_mask = new_alloc - 1;
189     zfile_t** new_hash = xcalloc(new_alloc, sizeof(*new_hash));
190 
191     for(unsigned i = 0; i < zfhash_alloc; i++) {
192         zfile_t* zf = zfhash[i];
193         if(SLOT_REAL(zf)) {
194             unsigned jmpby = 1;
195             unsigned slot = zf->hash & new_hash_mask;
196             while(new_hash[slot]) {
197                 slot += jmpby++;
198                 slot &= new_hash_mask;
199             }
200             new_hash[slot] = zf;
201         }
202     }
203 
204     free(zfhash);
205     zfhash = new_hash;
206     zfhash_alloc = new_alloc;
207 }
208 
209 // assumes this filename does not exist in hash already,
210 // called must use zfhash_find() first!
211 F_NONNULL
zfhash_add(zfile_t * zf)212 static void zfhash_add(zfile_t* zf) {
213     dmn_assert(zf->fn);
214     dmn_assert(zf->full_fn);
215 
216     // Max 25% load
217     if(unlikely(zfhash_count >= (zfhash_alloc >> 2)))
218         zfhash_grow();
219 
220     const unsigned hash_mask = zfhash_alloc - 1;
221     unsigned slot = zf->hash & hash_mask;
222     unsigned jmpby = 1;
223     while(SLOT_REAL(zfhash[slot])) {
224         slot += jmpby++;
225         slot &= hash_mask;
226     }
227     zfhash[slot] = zf;
228     zfhash_count++;
229 }
230 
231 F_NONNULL
zfhash_del(zfile_t * zf)232 static void zfhash_del(zfile_t* zf) {
233     dmn_assert(zf->fn);
234     dmn_assert(zf->full_fn);
235 
236     const unsigned hash_mask = zfhash_alloc - 1;
237     unsigned slot = zf->hash & hash_mask;
238     unsigned jmpby = 1;
239     while(zfhash[slot] != zf) {
240         slot += jmpby++;
241         slot &= hash_mask;
242     }
243 
244     zfhash[slot] = ZFILE_DELETED;
245     zfhash_count--;
246 
247     zf_delete(zf);
248 }
249 
250 F_NONNULL F_PURE
zfhash_find(const char * zfn)251 static zfile_t* zfhash_find(const char* zfn) {
252     if(likely(zfhash_alloc)) {
253         const unsigned zfn_hash = gdnsd_lookup2((const uint8_t*)zfn, strlen(zfn));
254         const unsigned hash_mask = zfhash_alloc - 1;
255         unsigned slot = zfn_hash & hash_mask;
256         unsigned jmpby = 1;
257         zfile_t* cand;
258         while((cand = zfhash[slot])) {
259             if(SLOT_REAL(cand) && cand->hash == zfn_hash && !strcmp(cand->fn, zfn))
260                 return cand;
261             slot += jmpby++;
262             slot &= hash_mask;
263         }
264     }
265 
266     return NULL;
267 }
268 
269 F_NONNULL
make_zone_name(const char * zf_name)270 static char* make_zone_name(const char* zf_name) {
271     unsigned zf_name_len = strlen(zf_name);
272     char* out = NULL;
273 
274     if(zf_name_len > 1004) {
275         log_err("rfc1035: Zone file name '%s' is illegal", zf_name);
276     }
277     else {
278         // check for root zone...
279         if(unlikely(zf_name_len == 9 && !strncmp(zf_name, "ROOT_ZONE", 9))) {
280             out = xmalloc(2);
281             out[0] = '.';
282             out[1] = 0;
283         }
284         else {
285             // convert all '@' to '/' for RFC2137 reverse delegation zones
286             out = xmalloc(zf_name_len + 1);
287             for(unsigned i = 0; i <= zf_name_len; i++) {
288                 if(unlikely(zf_name[i] == '@'))
289                     out[i] = '/';
290                 else
291                     out[i] = zf_name[i];
292             }
293         }
294     }
295 
296     return out;
297 }
298 
299 F_NONNULL
zone_from_zf(zfile_t * zf,bool * retry_me)300 static zone_t* zone_from_zf(zfile_t* zf, bool* retry_me) {
301     dmn_assert(!*retry_me);
302 
303     char* name = make_zone_name(zf->fn);
304     if(!name)
305         return NULL;
306 
307     char* src = gdnsd_str_combine("rfc1035:", zf->fn, NULL);
308     zone_t* z = zone_new(name, src);
309     free(src);
310     free(name);
311 
312     if(z) {
313         zscan_rfc1035_status_t zrv = zscan_rfc1035(z, zf->full_fn);
314         if(zrv != ZSCAN_RFC1035_SUCCESS || zone_finalize(z)) {
315             if(zrv == ZSCAN_RFC1035_FAILED_FILE)
316                 *retry_me = true;
317             zone_delete(z);
318             z = NULL;
319         }
320     }
321 
322     return z;
323 }
324 
325 F_NONNULL
quiesce_check(struct ev_loop * loop,ev_timer * timer,int revents V_UNUSED)326 static void quiesce_check(struct ev_loop* loop, ev_timer* timer, int revents V_UNUSED) {
327     dmn_assert(revents == EV_TIMER);
328 
329     zfile_t* zf = timer->data;
330     dmn_assert(zf->pending_event == timer);
331 
332     // check lstat() again for a new change during quiesce period
333     statcmp_t newstat;
334     statcmp_set(zf->full_fn, &newstat);
335 
336     // if it stayed stable...
337     if(statcmp_eq(&newstat, &zf->pending)) {
338         // stable delete
339         if(statcmp_nx(&newstat)) {
340             if(zf->zone) {
341                 log_debug("rfc1035: zonefile '%s' quiesce timer: acting on deletion, removing zone data from runtime...", zf->fn);
342                 dmn_assert(!statcmp_nx(&zf->loaded));
343                 ztree_update(zf->zone, NULL);
344             }
345             else {
346                 log_debug("rfc1035: zonefile '%s' quiesce timer: processing delete without runtime effects (add->remove before quiescence ended?)", zf->fn);
347             }
348             zfhash_del(zf);
349         }
350         // quiesced state isn't deleted, we need to load data
351         else {
352             bool retry_me = false;
353             zone_t* z = zone_from_zf(zf, &retry_me);
354             // re-check that file didn't change while loading
355             statcmp_t post_check;
356             statcmp_set(zf->full_fn, &post_check);
357             if(!statcmp_eq(&zf->pending, &post_check)) {
358                 log_debug("rfc1035: zonefile '%s' quiesce timer: lstat() changed during zonefile parsing, restarting timer for %.3g seconds...", zf->fn, full_quiesce);
359                 if(z)
360                      zone_delete(z);
361                 memcpy(&zf->pending, &post_check, sizeof(zf->pending));
362                 ev_timer_set(timer, full_quiesce, 0.);
363                 ev_timer_start(loop, timer);
364             }
365             else {
366                 if(z) {
367                     log_debug("rfc1035: zonefile '%s' quiesce timer: new zone data being added/updated for runtime...", zf->fn);
368                     memcpy(&zf->loaded, &zf->pending, sizeof(zf->loaded));
369                     z->mtime = zf->loaded.m;
370                     ztree_update(zf->zone, z);
371                     if(zf->zone)
372                         zone_delete(zf->zone);
373                     zf->zone = z;
374                     free(zf->pending_event);
375                     zf->pending_event = NULL;
376                 }
377                 else {
378                     if(fail_fatally)
379                         log_fatal("rfc1035: Cannot load zonefile '%s', failing", zf->fn);
380                     if(retry_me) {
381                         log_debug("rfc1035: zonefile '%s' quiesce timer: zone loading failed due to file-level issues (permissions? locks?), will retry in %.3g seconds...", zf->fn, full_quiesce);
382                         ev_timer_set(timer, full_quiesce, 0.);
383                         ev_timer_start(loop, timer);
384                     }
385                     else {
386                         log_debug("rfc1035: zonefile '%s' quiesce timer: zone parsing failed due to content issues, awaiting further fresh FS notification before trying again...", zf->fn);
387                         free(zf->pending_event);
388                         zf->pending_event = NULL;
389                     }
390                 }
391             }
392         }
393     }
394     else {
395         log_debug("rfc1035: Change detected for already-pending zonefile '%s' via quiesce_check(), delaying %.3g secs for further changes...", zf->fn, full_quiesce);
396         memcpy(&zf->pending, &newstat, sizeof(zf->pending));
397         ev_timer_set(timer, full_quiesce, 0.);
398         ev_timer_start(loop, timer);
399     }
400 }
401 
402 // verify_statcmp:
403 //   false -> inotify sort of case: we have a positive indication of change, and
404 //     should definitely consider an existing file to have changed even if its
405 //     stat() data wasn't modified.
406 //   true -> scan_dir sort of case: no positive indication yet, must filter existing
407 //     files based on whether stat() data changed before taking any action.
408 F_NONNULL
process_zonefile(const char * zfn,struct ev_loop * loop,const double initial_quiesce_time,const bool verify_statcmp)409 static void process_zonefile(const char* zfn, struct ev_loop* loop, const double initial_quiesce_time, const bool verify_statcmp) {
410     const char* fn;
411     char* full_fn = gdnsd_str_combine(rfc1035_dir, zfn, &fn);
412 
413     statcmp_t newstat;
414     statcmp_set(full_fn, &newstat);
415     zfile_t* current_zft = zfhash_find(fn);
416 
417     if(!statcmp_nx(&newstat) && !current_zft) {
418         // file was found, but previously unknown to the zfhash
419         current_zft = xcalloc(1, sizeof(*current_zft));
420         current_zft->full_fn = full_fn;
421         current_zft->fn = fn;
422         current_zft->hash = gdnsd_lookup2((const uint8_t*)fn, strlen(fn));
423         zfhash_add(current_zft);
424     }
425     else {
426         // else we don't need this new copy of the full fn,
427         //   it's already there in the current_zft
428         dmn_assert(!current_zft || !strcmp(current_zft->full_fn, full_fn));
429         free(full_fn);
430     }
431 
432     // we take no action if both the file in question did
433     //   not exist in the zfhash and also does not currently
434     //   exist on-disk.
435     if(!current_zft)
436         return;
437 
438     // setting current_zft->generation for every file picked up
439     //   by scandir() is what keeps check_missing() from thinking
440     //   this zfile_t*'s target was deleted from the filesystem.
441     current_zft->generation = generation;
442     if(current_zft->pending_event) { // we already had a pending change
443         if(!verify_statcmp || !statcmp_eq(&newstat, &current_zft->pending)) { // but it changed again!
444             log_debug("rfc1035: Change detected for already-pending zonefile '%s' via process_zonefile(), delaying %.3g secs for further changes...", current_zft->fn, full_quiesce);
445             memcpy(&current_zft->pending, &newstat, sizeof(current_zft->pending));
446             ev_timer_stop(loop, current_zft->pending_event);
447             ev_timer_set(current_zft->pending_event, full_quiesce, 0.);
448             ev_timer_start(loop, current_zft->pending_event);
449         }
450     }
451     else if(!verify_statcmp || !statcmp_eq(&newstat, &current_zft->loaded)) { // notification of change with no event currently pending
452         if(statcmp_nx(&current_zft->loaded))
453             log_debug("rfc1035: New zonefile '%s', delaying %.3g secs for further changes...", current_zft->fn, initial_quiesce_time);
454         else
455             log_debug("rfc1035: New change detected for stable zonefile '%s', delaying %.3g secs for further changes...", current_zft->fn, initial_quiesce_time);
456         memcpy(&current_zft->pending, &newstat, sizeof(current_zft->pending));
457         current_zft->pending_event = xmalloc(sizeof(*current_zft->pending_event));
458         ev_timer_init(current_zft->pending_event, quiesce_check, initial_quiesce_time, 0.);
459         current_zft->pending_event->data = current_zft;
460         ev_timer_start(loop, current_zft->pending_event);
461     }
462 }
463 
unload_zones(void)464 static void unload_zones(void) {
465     for(unsigned i = 0; i < zfhash_alloc; i++) {
466         zfile_t* zf = zfhash[i];
467         if(SLOT_REAL(zf)) {
468             if(zf->zone)
469                 ztree_update(zf->zone, NULL);
470             zf_delete(zf);
471         }
472     }
473 }
474 
scan_dir(struct ev_loop * loop,double initial_quiesce_time)475 static void scan_dir(struct ev_loop* loop, double initial_quiesce_time) {
476     DIR* zdhandle = opendir(rfc1035_dir);
477     if(!zdhandle) {
478         log_err("rfc1035: Cannot open zones directory '%s': %s", rfc1035_dir, dmn_logf_strerror(errno));
479     }
480     else {
481         struct dirent* result = NULL;
482         do {
483             errno = 0;
484             // cppcheck-suppress readdirCalled
485             result = readdir(zdhandle);
486             if(likely(result)) {
487                 if(result->d_name[0] != '.')
488                     process_zonefile(result->d_name, loop, initial_quiesce_time, true);
489             }
490             else if(errno) {
491                 log_fatal("rfc1035: readdir(%s) failed: %s", rfc1035_dir, dmn_logf_errno());
492             }
493         } while(result);
494         if(closedir(zdhandle))
495             log_err("rfc1035: closedir(%s) failed: %s", rfc1035_dir, dmn_logf_strerror(errno));
496     }
497 }
498 
499 // This is the complement to the periodic scandir(), which
500 //  detects deletion events.  Its job is to run immediately
501 //  after the scandir loop and find zfhash entries that lack
502 //  the current "generation" counter value, indicating they
503 //  were not seen during scandir(), and feed them back into
504 //  process_zonefile() to be picked up as deletions.
505 F_NONNULL
check_missing(struct ev_loop * loop)506 static void check_missing(struct ev_loop* loop) {
507     dmn_assert(generation);
508 
509     for(unsigned i = 0; i < zfhash_alloc; i++) {
510         zfile_t* zf = zfhash[i];
511         if(SLOT_REAL(zf)) {
512             if(zf->generation != generation) {
513                 log_debug("rfc1035: check_missing() found deletion of zonefile '%s', triggering process_zonefile()", zf->fn);
514                 process_zonefile(zf->fn, loop, full_quiesce, true);
515             }
516         }
517     }
518 }
519 
520 F_NONNULL
do_scandir(struct ev_loop * loop)521 static void do_scandir(struct ev_loop* loop) {
522     generation++;
523     scan_dir(loop, full_quiesce);
524     check_missing(loop);
525 }
526 
527 F_NONNULL
periodic_scan(struct ev_loop * loop,ev_timer * rtimer V_UNUSED,int revents V_UNUSED)528 static void periodic_scan(struct ev_loop* loop, ev_timer* rtimer V_UNUSED, int revents V_UNUSED) {
529     dmn_assert(revents == EV_TIMER);
530     do_scandir(loop);
531 }
532 
533 // ev stuff
534 static ev_timer* reload_timer = NULL;
535 
536 #ifdef USE_INOTIFY
537 
538 // This is for event debugging only
539 #define _maskcat(_x) \
540     if(mask & _x) { \
541         if(!optr[0]) \
542             strcat(optr, #_x); \
543         else \
544             strcat(optr, "|" #_x); \
545     }
logf_inmask(uint32_t mask)546 static const char* logf_inmask(uint32_t mask) {
547     char* output = dmn_fmtbuf_alloc(256);
548     char* optr = output;
549     optr[0] = 0;
550 
551     _maskcat(IN_ISDIR);
552     _maskcat(IN_IGNORED);
553     _maskcat(IN_Q_OVERFLOW);
554     _maskcat(IN_UNMOUNT);
555     _maskcat(IN_ACCESS);
556     _maskcat(IN_ATTRIB);
557     _maskcat(IN_CLOSE_WRITE);
558     _maskcat(IN_CLOSE_NOWRITE);
559     _maskcat(IN_CREATE);
560     _maskcat(IN_DELETE);
561     _maskcat(IN_DELETE_SELF);
562     _maskcat(IN_MODIFY);
563     _maskcat(IN_MOVE_SELF);
564     _maskcat(IN_MOVED_FROM);
565     _maskcat(IN_MOVED_TO);
566     _maskcat(IN_OPEN);
567 
568     return output;
569 }
570 
571 F_NONNULL
572 static void inot_reader(struct ev_loop* loop, ev_io* w, int revents);
573 
inotify_setup(const bool initial)574 static bool inotify_setup(const bool initial) {
575     bool rv = false; // success
576 
577     if(initial && !gdnsd_linux_min_version(2, 6, 36)) {
578         // note that catching ENOSYS below does not obviate this check.
579         // inotify_init1() may exist in older kernels, but we also need
580         // to ensure IN_EXCL_UNLINK compatibility, and that we're past
581         // some earlier implementations of inotify which had some bad bugs.
582         log_info("rfc1035: Insufficient kernel (<2.6.36) for inotify support");
583         rv = true; // failure
584     }
585     else {
586         inot.main_fd = inotify_init1(IN_NONBLOCK | IN_CLOEXEC);
587         if(inot.main_fd < 0) {
588             // initial ENOSYS is reported here as well for 2.6.36+ hosts that
589             //   don't implement the syscall for whatever architecture.
590             log_err("rfc1035: inotify_init1(IN_NONBLOCK) failed: %s", dmn_logf_errno());
591             rv = true; // failure
592         }
593         else {
594             inot.watch_desc = inotify_add_watch(inot.main_fd, rfc1035_dir, INL_MASK);
595             if(inot.watch_desc < 0) {
596                 log_err("rfc1035: inotify_add_watch(%s) failed: %s", rfc1035_dir, dmn_logf_errno());
597                 close(inot.main_fd);
598                 rv = true; // failure
599             }
600             else {
601                 ev_io_init(inot.io_watcher, inot_reader, inot.main_fd, EV_READ);
602             }
603         }
604     }
605 
606     return rv;
607 }
608 
609 // This only gets set to false if the first attempt to set up
610 //   inotify is successful.  When we have an initial failure,
611 //   which could be for lack of OS and/or FS support, we stick
612 //   to compatibility-mode directory scanning exclusively and
613 //   never re-attempt inotify operations.
614 // However, if the initial inotify setup succeeds, and then we
615 //   later have a runtime inotify failure, we merely fallback to
616 //   directory scanning temporarily until inotify can be cleanly
617 //   recovered without lost events.
618 static bool inotify_initial_failure = true;
619 
inotify_initial_setup(void)620 static void inotify_initial_setup(void) {
621     // Set up the actual inotify bits...
622     memset(&inot, 0, sizeof(inot));
623     inot.io_watcher = xmalloc(sizeof(*inot.io_watcher));
624     inot.fallback_watcher = xmalloc(sizeof(*inot.fallback_watcher));
625     inotify_initial_failure = inotify_setup(true);
626     if(inotify_initial_failure)
627         log_info("rfc1035: disabling inotify-based zonefile change detection on this host permanently (initial failure)");
628     else
629         log_info("rfc1035: will use inotify for zone change detection");
630 }
631 
632 F_NONNULL
initial_run(struct ev_loop * loop)633 static void initial_run(struct ev_loop* loop) {
634     if(!inotify_initial_failure) {
635         dmn_assert(inot.io_watcher);
636         ev_io_start(loop, inot.io_watcher);
637     }
638     else {
639         reload_timer = xcalloc(1, sizeof(*reload_timer));
640         ev_timer_init(reload_timer, periodic_scan, gcfg->zones_rfc1035_auto_interval, gcfg->zones_rfc1035_auto_interval);
641         ev_timer_start(loop, reload_timer);
642     }
643 }
644 
645 F_NONNULL
inotify_fallback_scan(struct ev_loop * loop,ev_timer * rtimer,int revents)646 static void inotify_fallback_scan(struct ev_loop* loop, ev_timer* rtimer, int revents) {
647     dmn_assert(revents == EV_TIMER);
648     dmn_assert(!inotify_initial_failure);
649 
650     bool setup_failure = inotify_setup(false);
651     periodic_scan(loop, rtimer, revents);
652     if(!setup_failure) {
653         log_warn("rfc1035: inotify recovered");
654         ev_timer_stop(loop, rtimer);
655         ev_io_start(loop, inot.io_watcher);
656     }
657 }
658 
659 F_NONNULL
handle_inotify_failure(struct ev_loop * loop)660 static void handle_inotify_failure(struct ev_loop* loop) {
661     dmn_assert(!inotify_initial_failure);
662 
663     log_warn("rfc1035: inotify failed, using fallback scandir() method until recovery");
664 
665     // clean up old watcher setup
666     ev_io_stop(loop, inot.io_watcher);
667     inotify_rm_watch(inot.main_fd, inot.watch_desc);
668     close(inot.main_fd);
669 
670     // insert periodic timer for fallback/retry scanning
671     ev_timer_init(inot.fallback_watcher, inotify_fallback_scan, gcfg->zones_rfc1035_auto_interval, gcfg->zones_rfc1035_auto_interval);
672     ev_timer_start(loop, inot.fallback_watcher);
673 }
674 
675 // retval: true -> halt inotify loop
676 // This will not perform correctly in all cases.  This code can easily be
677 //   tricked into attempting to load partially-written zonefiles if the
678 //   zonefile management tools do silly things like overwriting zonefiles in
679 //   place and/or moving open files around while they're being written to.
680 F_NONNULLX(1)
inot_process_event(struct ev_loop * loop,const char * fname,uint32_t emask)681 static bool inot_process_event(struct ev_loop* loop, const char* fname, uint32_t emask) {
682     dmn_assert(!inotify_initial_failure);
683 
684     bool rv = false;
685 
686     if(!fname) { // directory-level event for top-level zones dir
687         dmn_assert(emask & IN_ISDIR);
688         log_debug("rfc1035: inotified for directory event: %s", logf_inmask(emask));
689         if(emask & (IN_Q_OVERFLOW|IN_IGNORED|IN_UNMOUNT|IN_DELETE_SELF|IN_MOVE_SELF)) {
690             log_err("rfc1035: inotify watcher stopping due to directory-level event %s", logf_inmask(emask));
691             handle_inotify_failure(loop);
692             rv = true;
693         }
694         // Other directory-level events (e.g. IN_MODIFY) are ignored.
695         // We'll see their fallout as e.g. IN_MOVED_X operations on the contained filenames.
696     }
697     else if(fname[0] != '.' && !(emask & IN_ISDIR)) { // skip dotfiles and subdirs
698         log_debug("rfc1035: inotified for zonefile: %s event: %s", fname, logf_inmask(emask));
699         // Of the events we listen for, only IN_MODIFY is a clear indicator of
700         //   an ongoing in-place write and thus triggers a full_quiesce.
701         // IN_CLOSE_WRITE will also only be the result of an in-place write,
702         //   but in that case the writing is now already done.
703         // The only ambiguous case is IN_CREATE, which could be due to atomic
704         //   link(2) or due to open(,O_CREAT).  We assume the former and do
705         //   not fully quiesce since the latter isn't going to cause an
706         //   incidental wipe of any current zone data regardless.
707         const double q_timer = (emask & IN_MODIFY) ? full_quiesce : 0.0;
708         process_zonefile(fname, loop, q_timer, false);
709     }
710 
711     return rv;
712 }
713 
inot_reader(struct ev_loop * loop,ev_io * w,int revents V_UNUSED)714 static void inot_reader(struct ev_loop* loop, ev_io* w, int revents V_UNUSED) {
715     dmn_assert(revents == EV_READ);
716     dmn_assert(!inotify_initial_failure);
717 
718     uint8_t evtbuf[inotify_bufsize];
719 
720     while(1) {
721         ssize_t read_rv = read(w->fd, evtbuf, inotify_bufsize);
722         if(read_rv < 1) {
723             if(!read_rv || !ERRNO_WOULDBLOCK) {
724                 if(read_rv)
725                     log_err("rfc1035: read() of inotify file descriptor failed: %s", dmn_logf_errno());
726                 else
727                     log_err("rfc1035: Got EOF on inotify file descriptor!");
728                 handle_inotify_failure(loop);
729             }
730             return;
731         }
732         const size_t bytes = (size_t)read_rv;
733 
734         size_t offset = 0;
735         while(offset < bytes) {
736             if((bytes - offset) < sizeof(struct inotify_event)) {
737                 log_err("rfc1035: inotify sent truncated/garbage data");
738                 handle_inotify_failure(loop);
739                 return;
740             }
741             struct inotify_event* evt = (void*)&evtbuf[offset];
742             offset += sizeof(struct inotify_event);
743             if((bytes - offset) < evt->len) {
744                 log_err("rfc1035: inotify sent truncated/garbage data");
745                 handle_inotify_failure(loop);
746                 return;
747             }
748             offset += evt->len;
749             if(inot_process_event(loop, (evt->len > 0 ? evt->name : NULL), evt->mask))
750                 return;
751         }
752     }
753 }
754 
755 #else // no compile-time support for inotify
756 
inotify_initial_setup(void)757 static void inotify_initial_setup(void) { }
758 
759 F_NONNULL
initial_run(struct ev_loop * loop)760 static void initial_run(struct ev_loop* loop) {
761     reload_timer = xcalloc(1, sizeof(*reload_timer));
762     ev_timer_init(reload_timer, periodic_scan, gcfg->zones_rfc1035_auto_interval, gcfg->zones_rfc1035_auto_interval);
763     ev_timer_start(loop, reload_timer);
764 }
765 
766 #endif // not-inotify
767 
768 /*************************/
769 /*** Public interfaces ***/
770 /*************************/
771 
zsrc_rfc1035_load_zones(const bool check_only V_UNUSED)772 void zsrc_rfc1035_load_zones(const bool check_only V_UNUSED) {
773     dmn_assert(!rfc1035_dir);
774 
775     rfc1035_dir = gdnsd_resolve_path_cfg("zones/", NULL);
776 
777     if(gcfg->zones_rfc1035_auto)
778         inotify_initial_setup(); // no-op if no compile-time support
779     if(gcfg->zones_strict_startup)
780         fail_fatally = true;
781     struct ev_loop* temp_load_loop = ev_loop_new(EVFLAG_AUTO);
782     scan_dir(temp_load_loop, 0.0);
783     ev_run(temp_load_loop, 0);
784     ev_loop_destroy(temp_load_loop);
785     free(reload_timer);
786     fail_fatally = false;
787     gdnsd_atexit_debug(unload_zones);
788 
789     log_info("rfc1035: Loaded %u zonefiles from '%s'", zfhash_count, rfc1035_dir);
790 
791     // We set up non-zero quiesce timer *after* the initial load is complete
792     //   above.  Previously the code tried to avoid races during startup, but
793     //   really there is no reliable way to avoid all possible forms of
794     //   startup race, and in any case inotify or the next manual directory
795     //   scan would pick up the change shortly afterwards and correct it, so
796     //   there's no point delaying startup for the corner-case.
797     if(!getenv("GDNSD_TESTSUITE_NODELAY")) // testsuite can bypass restrictions!
798         full_quiesce = gcfg->zones_rfc1035_quiesce;
799     log_info("rfc1035: quiescence time is %.3g seconds", full_quiesce);
800 }
801 
802 // we track the loop here for the async sigusr1 request
803 static struct ev_loop* zones_loop = NULL;
804 static ev_async* sigusr1_waker = NULL;
805 
806 // called within our thread/loop to take sigusr1 action
807 F_NONNULL
sigusr1_cb(struct ev_loop * loop,ev_async * w V_UNUSED,int revents V_UNUSED)808 static void sigusr1_cb(struct ev_loop* loop, ev_async* w V_UNUSED, int revents V_UNUSED) {
809     log_info("rfc1035: received SIGUSR1 notification, scanning for changes...");
810     do_scandir(loop);
811 }
812 
813 // called from main thread to feed ev_async
zsrc_rfc1035_sigusr1(void)814 void zsrc_rfc1035_sigusr1(void) {
815     dmn_assert(zones_loop); dmn_assert(sigusr1_waker);
816     ev_async_send(zones_loop, sigusr1_waker);
817 }
818 
zsrc_rfc1035_runtime_init(struct ev_loop * loop)819 void zsrc_rfc1035_runtime_init(struct ev_loop* loop) {
820     zones_loop = loop;
821     sigusr1_waker = xmalloc(sizeof(*sigusr1_waker));
822     ev_async_init(sigusr1_waker, sigusr1_cb);
823     ev_async_start(loop, sigusr1_waker);
824 
825     if(gcfg->zones_rfc1035_auto)
826         initial_run(zones_loop);
827 }
828