1 /* Copyright © 2012 Brandon L Black <blblack@gmail.com>
2 *
3 * This file is part of gdnsd.
4 *
5 * gdnsd is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * gdnsd is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with gdnsd. If not, see <http://www.gnu.org/licenses/>.
17 *
18 */
19
20 #include <config.h>
21 #include "zsrc_rfc1035.h"
22
23 #include "zscan_rfc1035.h"
24 #include "conf.h"
25 #include "ztree.h"
26 #include "main.h"
27
28 #include <gdnsd/alloc.h>
29 #include <gdnsd/misc.h>
30 #include <gdnsd/log.h>
31 #include <gdnsd/paths.h>
32
33 #include <sys/types.h>
34 #include <sys/stat.h>
35 #include <fcntl.h>
36 #include <unistd.h>
37 #include <dirent.h>
38 #include <stdlib.h>
39 #include <time.h>
40
41 // IFF gcfg->zones_strict_startup is true, this flag will be temporarily set
42 // to true during the initial scan, then set back to false, making zonefile
43 // parsing errors fatal for the initial scan.
44 static bool fail_fatally = false;
45
46 // This is initially 0.0 for the first scan, but is later configured from
47 // zones_rfc1035_quiesce for runtime operations.
48 static double full_quiesce = 0.0;
49
50 #ifdef USE_INOTIFY
51
52 #include <sys/inotify.h>
53
54 // this doesn't appear in glibc headers until 2.13
55 #ifndef IN_EXCL_UNLINK
56 #define IN_EXCL_UNLINK 0x04000000
57 #endif
58
59 // size of our read(2) buffer for the inotify fd.
60 // must be able to handle sizeof(struct inotify_event)
61 // + the max len of a filename in the zones directory
62 // read(2) will return EINVAL if this ends up being too small...
63 static const unsigned inotify_bufsize = 4096;
64
65 // The inotify mask for the zones dir watcher
66 #define INL_MASK ( IN_ONLYDIR | IN_EXCL_UNLINK \
67 | IN_MOVED_TO | IN_MOVED_FROM | IN_CREATE | IN_DELETE \
68 | IN_MODIFY | IN_CLOSE_WRITE | IN_MOVE_SELF | IN_DELETE_SELF )
69
70 // runtime inotify bits
71 typedef struct {
72 int main_fd;
73 int watch_desc;
74 ev_io* io_watcher;
75 ev_timer* fallback_watcher;
76 } inot_data;
77 static inot_data inot;
78
79 #endif
80
81 static char* rfc1035_dir = NULL;
82
83 // POSIX states that inode+dev uniquely identifies a file on
84 // a given system. Therefore those + mtime should uniquely
85 // identify a set of file contents for a given pathname over
86 // time through ops like create/destroy/modify/rename/remount/etc...
87 // The special value of all members being zero indicates a
88 // non-existent (e.g. deleted) file. The same value is used
89 // to indicate an invalid zonefile (e.g. the pathname is
90 // a subdirectory, a socket, a softlink, etc...)
91 // The file size is added to the set as well for additional insurance
92 // against the possibility of missed updates due to timestamp
93 // accuracy issues.
94 typedef struct {
95 uint64_t m; // see ztree.h
96 ino_t i; // st.st_inode
97 dev_t d; // st.st_dev
98 off_t s; // st.st_size
99 } statcmp_t;
100
statcmp_eq(statcmp_t * a,statcmp_t * b)101 static bool statcmp_eq(statcmp_t* a, statcmp_t* b) {
102 return !(
103 (a->m ^ b->m)
104 | (uint64_t)(a->i ^ b->i)
105 | (uint64_t)(a->d ^ b->d)
106 | (uint64_t)(a->s ^ b->s)
107 );
108 }
109
110 // check for 0/0/0/0, indicating deleted or invalid (e.g. socket)
statcmp_nx(statcmp_t * a)111 static bool statcmp_nx(statcmp_t* a) {
112 return !(a->m | (uint64_t)a->i | (uint64_t)a->d | (uint64_t)a->s);
113 }
114
115 // represents a zone file
116 // on initial load, pending_event is NULL, and thus "pending" is irrelevant.
117 // when change detection sees a statcmp diff between "loaded" and the
118 // filesystem, it's going to set pending_event and save the fs info
119 // to "pending" and start a quiescence timer
120 // when "pending" and the raw FS have stabilized, then the zone is actually
121 // reloaded and "loaded" is set to "pending" values and the update_pending
122 // flag is cleared.
123 typedef struct {
124 unsigned hash; // hash of "fn"
125 unsigned generation; // generation counter for deletion checks
126 char* full_fn; // "etc/zones/example.com"
127 const char* fn; // ptr to "example.com" in above storage
128 zone_t* zone; // zone data
129 ev_timer* pending_event; // pending quiescence timer, NULL if no pending change
130 statcmp_t pending; // lstat() info on pending update
131 statcmp_t loaded; // lstat() info on loaded data
132 } zfile_t;
133
134 // hash of all extant zonefiles
135 static zfile_t** zfhash = NULL;
136 static unsigned zfhash_count = 0;
137 static unsigned zfhash_alloc = 0;
138 static unsigned generation = 0; // deletion checks use this...
139
140 // ZFILE_DELETED is a deleted hash entry that can be reclaimed
141 static void* const ZFILE_DELETED = (void*)(uintptr_t)0x1;
142 // SLOT_REAL means not NULL and also not a reclaimable deleted entry
143 #define SLOT_REAL(x) ((uintptr_t)x & ~((uintptr_t)0x1))
144
145 F_NONNULL
zf_delete(zfile_t * zf)146 static void zf_delete(zfile_t* zf) {
147 if(zf->zone)
148 zone_delete(zf->zone);
149 if(zf->full_fn)
150 free(zf->full_fn);
151 if(zf->pending_event)
152 free(zf->pending_event);
153 free(zf);
154 }
155
156 F_NONNULL
statcmp_set(const char * full_fn,statcmp_t * out)157 static void statcmp_set(const char* full_fn, statcmp_t* out) {
158 struct stat st;
159 int lstat_rv = lstat(full_fn, &st);
160 if(likely(!lstat_rv && S_ISREG(st.st_mode))) {
161 out->m = get_extended_mtime(&st);
162 out->i = st.st_ino;
163 out->d = st.st_dev;
164 out->s = st.st_size;
165 }
166 else {
167 out->m = 0;
168 out->i = 0;
169 out->d = 0;
170 out->s = 0;
171 }
172 }
173
174 // grow hash by doubling, while also
175 // clearing out deletion placeholders
176 F_NONNULL
zfhash_grow(void)177 static void zfhash_grow(void) {
178 if(unlikely(!zfhash_alloc)) {
179 // initial call on empty hash
180 dmn_assert(!zfhash);
181 dmn_assert(!zfhash_count);
182 zfhash_alloc = 16;
183 zfhash = xcalloc(16, sizeof(*zfhash));
184 return;
185 }
186
187 const unsigned new_alloc = zfhash_alloc << 1; // double
188 const unsigned new_hash_mask = new_alloc - 1;
189 zfile_t** new_hash = xcalloc(new_alloc, sizeof(*new_hash));
190
191 for(unsigned i = 0; i < zfhash_alloc; i++) {
192 zfile_t* zf = zfhash[i];
193 if(SLOT_REAL(zf)) {
194 unsigned jmpby = 1;
195 unsigned slot = zf->hash & new_hash_mask;
196 while(new_hash[slot]) {
197 slot += jmpby++;
198 slot &= new_hash_mask;
199 }
200 new_hash[slot] = zf;
201 }
202 }
203
204 free(zfhash);
205 zfhash = new_hash;
206 zfhash_alloc = new_alloc;
207 }
208
209 // assumes this filename does not exist in hash already,
210 // called must use zfhash_find() first!
211 F_NONNULL
zfhash_add(zfile_t * zf)212 static void zfhash_add(zfile_t* zf) {
213 dmn_assert(zf->fn);
214 dmn_assert(zf->full_fn);
215
216 // Max 25% load
217 if(unlikely(zfhash_count >= (zfhash_alloc >> 2)))
218 zfhash_grow();
219
220 const unsigned hash_mask = zfhash_alloc - 1;
221 unsigned slot = zf->hash & hash_mask;
222 unsigned jmpby = 1;
223 while(SLOT_REAL(zfhash[slot])) {
224 slot += jmpby++;
225 slot &= hash_mask;
226 }
227 zfhash[slot] = zf;
228 zfhash_count++;
229 }
230
231 F_NONNULL
zfhash_del(zfile_t * zf)232 static void zfhash_del(zfile_t* zf) {
233 dmn_assert(zf->fn);
234 dmn_assert(zf->full_fn);
235
236 const unsigned hash_mask = zfhash_alloc - 1;
237 unsigned slot = zf->hash & hash_mask;
238 unsigned jmpby = 1;
239 while(zfhash[slot] != zf) {
240 slot += jmpby++;
241 slot &= hash_mask;
242 }
243
244 zfhash[slot] = ZFILE_DELETED;
245 zfhash_count--;
246
247 zf_delete(zf);
248 }
249
250 F_NONNULL F_PURE
zfhash_find(const char * zfn)251 static zfile_t* zfhash_find(const char* zfn) {
252 if(likely(zfhash_alloc)) {
253 const unsigned zfn_hash = gdnsd_lookup2((const uint8_t*)zfn, strlen(zfn));
254 const unsigned hash_mask = zfhash_alloc - 1;
255 unsigned slot = zfn_hash & hash_mask;
256 unsigned jmpby = 1;
257 zfile_t* cand;
258 while((cand = zfhash[slot])) {
259 if(SLOT_REAL(cand) && cand->hash == zfn_hash && !strcmp(cand->fn, zfn))
260 return cand;
261 slot += jmpby++;
262 slot &= hash_mask;
263 }
264 }
265
266 return NULL;
267 }
268
269 F_NONNULL
make_zone_name(const char * zf_name)270 static char* make_zone_name(const char* zf_name) {
271 unsigned zf_name_len = strlen(zf_name);
272 char* out = NULL;
273
274 if(zf_name_len > 1004) {
275 log_err("rfc1035: Zone file name '%s' is illegal", zf_name);
276 }
277 else {
278 // check for root zone...
279 if(unlikely(zf_name_len == 9 && !strncmp(zf_name, "ROOT_ZONE", 9))) {
280 out = xmalloc(2);
281 out[0] = '.';
282 out[1] = 0;
283 }
284 else {
285 // convert all '@' to '/' for RFC2137 reverse delegation zones
286 out = xmalloc(zf_name_len + 1);
287 for(unsigned i = 0; i <= zf_name_len; i++) {
288 if(unlikely(zf_name[i] == '@'))
289 out[i] = '/';
290 else
291 out[i] = zf_name[i];
292 }
293 }
294 }
295
296 return out;
297 }
298
299 F_NONNULL
zone_from_zf(zfile_t * zf,bool * retry_me)300 static zone_t* zone_from_zf(zfile_t* zf, bool* retry_me) {
301 dmn_assert(!*retry_me);
302
303 char* name = make_zone_name(zf->fn);
304 if(!name)
305 return NULL;
306
307 char* src = gdnsd_str_combine("rfc1035:", zf->fn, NULL);
308 zone_t* z = zone_new(name, src);
309 free(src);
310 free(name);
311
312 if(z) {
313 zscan_rfc1035_status_t zrv = zscan_rfc1035(z, zf->full_fn);
314 if(zrv != ZSCAN_RFC1035_SUCCESS || zone_finalize(z)) {
315 if(zrv == ZSCAN_RFC1035_FAILED_FILE)
316 *retry_me = true;
317 zone_delete(z);
318 z = NULL;
319 }
320 }
321
322 return z;
323 }
324
325 F_NONNULL
quiesce_check(struct ev_loop * loop,ev_timer * timer,int revents V_UNUSED)326 static void quiesce_check(struct ev_loop* loop, ev_timer* timer, int revents V_UNUSED) {
327 dmn_assert(revents == EV_TIMER);
328
329 zfile_t* zf = timer->data;
330 dmn_assert(zf->pending_event == timer);
331
332 // check lstat() again for a new change during quiesce period
333 statcmp_t newstat;
334 statcmp_set(zf->full_fn, &newstat);
335
336 // if it stayed stable...
337 if(statcmp_eq(&newstat, &zf->pending)) {
338 // stable delete
339 if(statcmp_nx(&newstat)) {
340 if(zf->zone) {
341 log_debug("rfc1035: zonefile '%s' quiesce timer: acting on deletion, removing zone data from runtime...", zf->fn);
342 dmn_assert(!statcmp_nx(&zf->loaded));
343 ztree_update(zf->zone, NULL);
344 }
345 else {
346 log_debug("rfc1035: zonefile '%s' quiesce timer: processing delete without runtime effects (add->remove before quiescence ended?)", zf->fn);
347 }
348 zfhash_del(zf);
349 }
350 // quiesced state isn't deleted, we need to load data
351 else {
352 bool retry_me = false;
353 zone_t* z = zone_from_zf(zf, &retry_me);
354 // re-check that file didn't change while loading
355 statcmp_t post_check;
356 statcmp_set(zf->full_fn, &post_check);
357 if(!statcmp_eq(&zf->pending, &post_check)) {
358 log_debug("rfc1035: zonefile '%s' quiesce timer: lstat() changed during zonefile parsing, restarting timer for %.3g seconds...", zf->fn, full_quiesce);
359 if(z)
360 zone_delete(z);
361 memcpy(&zf->pending, &post_check, sizeof(zf->pending));
362 ev_timer_set(timer, full_quiesce, 0.);
363 ev_timer_start(loop, timer);
364 }
365 else {
366 if(z) {
367 log_debug("rfc1035: zonefile '%s' quiesce timer: new zone data being added/updated for runtime...", zf->fn);
368 memcpy(&zf->loaded, &zf->pending, sizeof(zf->loaded));
369 z->mtime = zf->loaded.m;
370 ztree_update(zf->zone, z);
371 if(zf->zone)
372 zone_delete(zf->zone);
373 zf->zone = z;
374 free(zf->pending_event);
375 zf->pending_event = NULL;
376 }
377 else {
378 if(fail_fatally)
379 log_fatal("rfc1035: Cannot load zonefile '%s', failing", zf->fn);
380 if(retry_me) {
381 log_debug("rfc1035: zonefile '%s' quiesce timer: zone loading failed due to file-level issues (permissions? locks?), will retry in %.3g seconds...", zf->fn, full_quiesce);
382 ev_timer_set(timer, full_quiesce, 0.);
383 ev_timer_start(loop, timer);
384 }
385 else {
386 log_debug("rfc1035: zonefile '%s' quiesce timer: zone parsing failed due to content issues, awaiting further fresh FS notification before trying again...", zf->fn);
387 free(zf->pending_event);
388 zf->pending_event = NULL;
389 }
390 }
391 }
392 }
393 }
394 else {
395 log_debug("rfc1035: Change detected for already-pending zonefile '%s' via quiesce_check(), delaying %.3g secs for further changes...", zf->fn, full_quiesce);
396 memcpy(&zf->pending, &newstat, sizeof(zf->pending));
397 ev_timer_set(timer, full_quiesce, 0.);
398 ev_timer_start(loop, timer);
399 }
400 }
401
402 // verify_statcmp:
403 // false -> inotify sort of case: we have a positive indication of change, and
404 // should definitely consider an existing file to have changed even if its
405 // stat() data wasn't modified.
406 // true -> scan_dir sort of case: no positive indication yet, must filter existing
407 // files based on whether stat() data changed before taking any action.
408 F_NONNULL
process_zonefile(const char * zfn,struct ev_loop * loop,const double initial_quiesce_time,const bool verify_statcmp)409 static void process_zonefile(const char* zfn, struct ev_loop* loop, const double initial_quiesce_time, const bool verify_statcmp) {
410 const char* fn;
411 char* full_fn = gdnsd_str_combine(rfc1035_dir, zfn, &fn);
412
413 statcmp_t newstat;
414 statcmp_set(full_fn, &newstat);
415 zfile_t* current_zft = zfhash_find(fn);
416
417 if(!statcmp_nx(&newstat) && !current_zft) {
418 // file was found, but previously unknown to the zfhash
419 current_zft = xcalloc(1, sizeof(*current_zft));
420 current_zft->full_fn = full_fn;
421 current_zft->fn = fn;
422 current_zft->hash = gdnsd_lookup2((const uint8_t*)fn, strlen(fn));
423 zfhash_add(current_zft);
424 }
425 else {
426 // else we don't need this new copy of the full fn,
427 // it's already there in the current_zft
428 dmn_assert(!current_zft || !strcmp(current_zft->full_fn, full_fn));
429 free(full_fn);
430 }
431
432 // we take no action if both the file in question did
433 // not exist in the zfhash and also does not currently
434 // exist on-disk.
435 if(!current_zft)
436 return;
437
438 // setting current_zft->generation for every file picked up
439 // by scandir() is what keeps check_missing() from thinking
440 // this zfile_t*'s target was deleted from the filesystem.
441 current_zft->generation = generation;
442 if(current_zft->pending_event) { // we already had a pending change
443 if(!verify_statcmp || !statcmp_eq(&newstat, ¤t_zft->pending)) { // but it changed again!
444 log_debug("rfc1035: Change detected for already-pending zonefile '%s' via process_zonefile(), delaying %.3g secs for further changes...", current_zft->fn, full_quiesce);
445 memcpy(¤t_zft->pending, &newstat, sizeof(current_zft->pending));
446 ev_timer_stop(loop, current_zft->pending_event);
447 ev_timer_set(current_zft->pending_event, full_quiesce, 0.);
448 ev_timer_start(loop, current_zft->pending_event);
449 }
450 }
451 else if(!verify_statcmp || !statcmp_eq(&newstat, ¤t_zft->loaded)) { // notification of change with no event currently pending
452 if(statcmp_nx(¤t_zft->loaded))
453 log_debug("rfc1035: New zonefile '%s', delaying %.3g secs for further changes...", current_zft->fn, initial_quiesce_time);
454 else
455 log_debug("rfc1035: New change detected for stable zonefile '%s', delaying %.3g secs for further changes...", current_zft->fn, initial_quiesce_time);
456 memcpy(¤t_zft->pending, &newstat, sizeof(current_zft->pending));
457 current_zft->pending_event = xmalloc(sizeof(*current_zft->pending_event));
458 ev_timer_init(current_zft->pending_event, quiesce_check, initial_quiesce_time, 0.);
459 current_zft->pending_event->data = current_zft;
460 ev_timer_start(loop, current_zft->pending_event);
461 }
462 }
463
unload_zones(void)464 static void unload_zones(void) {
465 for(unsigned i = 0; i < zfhash_alloc; i++) {
466 zfile_t* zf = zfhash[i];
467 if(SLOT_REAL(zf)) {
468 if(zf->zone)
469 ztree_update(zf->zone, NULL);
470 zf_delete(zf);
471 }
472 }
473 }
474
scan_dir(struct ev_loop * loop,double initial_quiesce_time)475 static void scan_dir(struct ev_loop* loop, double initial_quiesce_time) {
476 DIR* zdhandle = opendir(rfc1035_dir);
477 if(!zdhandle) {
478 log_err("rfc1035: Cannot open zones directory '%s': %s", rfc1035_dir, dmn_logf_strerror(errno));
479 }
480 else {
481 struct dirent* result = NULL;
482 do {
483 errno = 0;
484 // cppcheck-suppress readdirCalled
485 result = readdir(zdhandle);
486 if(likely(result)) {
487 if(result->d_name[0] != '.')
488 process_zonefile(result->d_name, loop, initial_quiesce_time, true);
489 }
490 else if(errno) {
491 log_fatal("rfc1035: readdir(%s) failed: %s", rfc1035_dir, dmn_logf_errno());
492 }
493 } while(result);
494 if(closedir(zdhandle))
495 log_err("rfc1035: closedir(%s) failed: %s", rfc1035_dir, dmn_logf_strerror(errno));
496 }
497 }
498
499 // This is the complement to the periodic scandir(), which
500 // detects deletion events. Its job is to run immediately
501 // after the scandir loop and find zfhash entries that lack
502 // the current "generation" counter value, indicating they
503 // were not seen during scandir(), and feed them back into
504 // process_zonefile() to be picked up as deletions.
505 F_NONNULL
check_missing(struct ev_loop * loop)506 static void check_missing(struct ev_loop* loop) {
507 dmn_assert(generation);
508
509 for(unsigned i = 0; i < zfhash_alloc; i++) {
510 zfile_t* zf = zfhash[i];
511 if(SLOT_REAL(zf)) {
512 if(zf->generation != generation) {
513 log_debug("rfc1035: check_missing() found deletion of zonefile '%s', triggering process_zonefile()", zf->fn);
514 process_zonefile(zf->fn, loop, full_quiesce, true);
515 }
516 }
517 }
518 }
519
520 F_NONNULL
do_scandir(struct ev_loop * loop)521 static void do_scandir(struct ev_loop* loop) {
522 generation++;
523 scan_dir(loop, full_quiesce);
524 check_missing(loop);
525 }
526
527 F_NONNULL
periodic_scan(struct ev_loop * loop,ev_timer * rtimer V_UNUSED,int revents V_UNUSED)528 static void periodic_scan(struct ev_loop* loop, ev_timer* rtimer V_UNUSED, int revents V_UNUSED) {
529 dmn_assert(revents == EV_TIMER);
530 do_scandir(loop);
531 }
532
533 // ev stuff
534 static ev_timer* reload_timer = NULL;
535
536 #ifdef USE_INOTIFY
537
538 // This is for event debugging only
539 #define _maskcat(_x) \
540 if(mask & _x) { \
541 if(!optr[0]) \
542 strcat(optr, #_x); \
543 else \
544 strcat(optr, "|" #_x); \
545 }
logf_inmask(uint32_t mask)546 static const char* logf_inmask(uint32_t mask) {
547 char* output = dmn_fmtbuf_alloc(256);
548 char* optr = output;
549 optr[0] = 0;
550
551 _maskcat(IN_ISDIR);
552 _maskcat(IN_IGNORED);
553 _maskcat(IN_Q_OVERFLOW);
554 _maskcat(IN_UNMOUNT);
555 _maskcat(IN_ACCESS);
556 _maskcat(IN_ATTRIB);
557 _maskcat(IN_CLOSE_WRITE);
558 _maskcat(IN_CLOSE_NOWRITE);
559 _maskcat(IN_CREATE);
560 _maskcat(IN_DELETE);
561 _maskcat(IN_DELETE_SELF);
562 _maskcat(IN_MODIFY);
563 _maskcat(IN_MOVE_SELF);
564 _maskcat(IN_MOVED_FROM);
565 _maskcat(IN_MOVED_TO);
566 _maskcat(IN_OPEN);
567
568 return output;
569 }
570
571 F_NONNULL
572 static void inot_reader(struct ev_loop* loop, ev_io* w, int revents);
573
inotify_setup(const bool initial)574 static bool inotify_setup(const bool initial) {
575 bool rv = false; // success
576
577 if(initial && !gdnsd_linux_min_version(2, 6, 36)) {
578 // note that catching ENOSYS below does not obviate this check.
579 // inotify_init1() may exist in older kernels, but we also need
580 // to ensure IN_EXCL_UNLINK compatibility, and that we're past
581 // some earlier implementations of inotify which had some bad bugs.
582 log_info("rfc1035: Insufficient kernel (<2.6.36) for inotify support");
583 rv = true; // failure
584 }
585 else {
586 inot.main_fd = inotify_init1(IN_NONBLOCK | IN_CLOEXEC);
587 if(inot.main_fd < 0) {
588 // initial ENOSYS is reported here as well for 2.6.36+ hosts that
589 // don't implement the syscall for whatever architecture.
590 log_err("rfc1035: inotify_init1(IN_NONBLOCK) failed: %s", dmn_logf_errno());
591 rv = true; // failure
592 }
593 else {
594 inot.watch_desc = inotify_add_watch(inot.main_fd, rfc1035_dir, INL_MASK);
595 if(inot.watch_desc < 0) {
596 log_err("rfc1035: inotify_add_watch(%s) failed: %s", rfc1035_dir, dmn_logf_errno());
597 close(inot.main_fd);
598 rv = true; // failure
599 }
600 else {
601 ev_io_init(inot.io_watcher, inot_reader, inot.main_fd, EV_READ);
602 }
603 }
604 }
605
606 return rv;
607 }
608
609 // This only gets set to false if the first attempt to set up
610 // inotify is successful. When we have an initial failure,
611 // which could be for lack of OS and/or FS support, we stick
612 // to compatibility-mode directory scanning exclusively and
613 // never re-attempt inotify operations.
614 // However, if the initial inotify setup succeeds, and then we
615 // later have a runtime inotify failure, we merely fallback to
616 // directory scanning temporarily until inotify can be cleanly
617 // recovered without lost events.
618 static bool inotify_initial_failure = true;
619
inotify_initial_setup(void)620 static void inotify_initial_setup(void) {
621 // Set up the actual inotify bits...
622 memset(&inot, 0, sizeof(inot));
623 inot.io_watcher = xmalloc(sizeof(*inot.io_watcher));
624 inot.fallback_watcher = xmalloc(sizeof(*inot.fallback_watcher));
625 inotify_initial_failure = inotify_setup(true);
626 if(inotify_initial_failure)
627 log_info("rfc1035: disabling inotify-based zonefile change detection on this host permanently (initial failure)");
628 else
629 log_info("rfc1035: will use inotify for zone change detection");
630 }
631
632 F_NONNULL
initial_run(struct ev_loop * loop)633 static void initial_run(struct ev_loop* loop) {
634 if(!inotify_initial_failure) {
635 dmn_assert(inot.io_watcher);
636 ev_io_start(loop, inot.io_watcher);
637 }
638 else {
639 reload_timer = xcalloc(1, sizeof(*reload_timer));
640 ev_timer_init(reload_timer, periodic_scan, gcfg->zones_rfc1035_auto_interval, gcfg->zones_rfc1035_auto_interval);
641 ev_timer_start(loop, reload_timer);
642 }
643 }
644
645 F_NONNULL
inotify_fallback_scan(struct ev_loop * loop,ev_timer * rtimer,int revents)646 static void inotify_fallback_scan(struct ev_loop* loop, ev_timer* rtimer, int revents) {
647 dmn_assert(revents == EV_TIMER);
648 dmn_assert(!inotify_initial_failure);
649
650 bool setup_failure = inotify_setup(false);
651 periodic_scan(loop, rtimer, revents);
652 if(!setup_failure) {
653 log_warn("rfc1035: inotify recovered");
654 ev_timer_stop(loop, rtimer);
655 ev_io_start(loop, inot.io_watcher);
656 }
657 }
658
659 F_NONNULL
handle_inotify_failure(struct ev_loop * loop)660 static void handle_inotify_failure(struct ev_loop* loop) {
661 dmn_assert(!inotify_initial_failure);
662
663 log_warn("rfc1035: inotify failed, using fallback scandir() method until recovery");
664
665 // clean up old watcher setup
666 ev_io_stop(loop, inot.io_watcher);
667 inotify_rm_watch(inot.main_fd, inot.watch_desc);
668 close(inot.main_fd);
669
670 // insert periodic timer for fallback/retry scanning
671 ev_timer_init(inot.fallback_watcher, inotify_fallback_scan, gcfg->zones_rfc1035_auto_interval, gcfg->zones_rfc1035_auto_interval);
672 ev_timer_start(loop, inot.fallback_watcher);
673 }
674
675 // retval: true -> halt inotify loop
676 // This will not perform correctly in all cases. This code can easily be
677 // tricked into attempting to load partially-written zonefiles if the
678 // zonefile management tools do silly things like overwriting zonefiles in
679 // place and/or moving open files around while they're being written to.
680 F_NONNULLX(1)
inot_process_event(struct ev_loop * loop,const char * fname,uint32_t emask)681 static bool inot_process_event(struct ev_loop* loop, const char* fname, uint32_t emask) {
682 dmn_assert(!inotify_initial_failure);
683
684 bool rv = false;
685
686 if(!fname) { // directory-level event for top-level zones dir
687 dmn_assert(emask & IN_ISDIR);
688 log_debug("rfc1035: inotified for directory event: %s", logf_inmask(emask));
689 if(emask & (IN_Q_OVERFLOW|IN_IGNORED|IN_UNMOUNT|IN_DELETE_SELF|IN_MOVE_SELF)) {
690 log_err("rfc1035: inotify watcher stopping due to directory-level event %s", logf_inmask(emask));
691 handle_inotify_failure(loop);
692 rv = true;
693 }
694 // Other directory-level events (e.g. IN_MODIFY) are ignored.
695 // We'll see their fallout as e.g. IN_MOVED_X operations on the contained filenames.
696 }
697 else if(fname[0] != '.' && !(emask & IN_ISDIR)) { // skip dotfiles and subdirs
698 log_debug("rfc1035: inotified for zonefile: %s event: %s", fname, logf_inmask(emask));
699 // Of the events we listen for, only IN_MODIFY is a clear indicator of
700 // an ongoing in-place write and thus triggers a full_quiesce.
701 // IN_CLOSE_WRITE will also only be the result of an in-place write,
702 // but in that case the writing is now already done.
703 // The only ambiguous case is IN_CREATE, which could be due to atomic
704 // link(2) or due to open(,O_CREAT). We assume the former and do
705 // not fully quiesce since the latter isn't going to cause an
706 // incidental wipe of any current zone data regardless.
707 const double q_timer = (emask & IN_MODIFY) ? full_quiesce : 0.0;
708 process_zonefile(fname, loop, q_timer, false);
709 }
710
711 return rv;
712 }
713
inot_reader(struct ev_loop * loop,ev_io * w,int revents V_UNUSED)714 static void inot_reader(struct ev_loop* loop, ev_io* w, int revents V_UNUSED) {
715 dmn_assert(revents == EV_READ);
716 dmn_assert(!inotify_initial_failure);
717
718 uint8_t evtbuf[inotify_bufsize];
719
720 while(1) {
721 ssize_t read_rv = read(w->fd, evtbuf, inotify_bufsize);
722 if(read_rv < 1) {
723 if(!read_rv || !ERRNO_WOULDBLOCK) {
724 if(read_rv)
725 log_err("rfc1035: read() of inotify file descriptor failed: %s", dmn_logf_errno());
726 else
727 log_err("rfc1035: Got EOF on inotify file descriptor!");
728 handle_inotify_failure(loop);
729 }
730 return;
731 }
732 const size_t bytes = (size_t)read_rv;
733
734 size_t offset = 0;
735 while(offset < bytes) {
736 if((bytes - offset) < sizeof(struct inotify_event)) {
737 log_err("rfc1035: inotify sent truncated/garbage data");
738 handle_inotify_failure(loop);
739 return;
740 }
741 struct inotify_event* evt = (void*)&evtbuf[offset];
742 offset += sizeof(struct inotify_event);
743 if((bytes - offset) < evt->len) {
744 log_err("rfc1035: inotify sent truncated/garbage data");
745 handle_inotify_failure(loop);
746 return;
747 }
748 offset += evt->len;
749 if(inot_process_event(loop, (evt->len > 0 ? evt->name : NULL), evt->mask))
750 return;
751 }
752 }
753 }
754
755 #else // no compile-time support for inotify
756
inotify_initial_setup(void)757 static void inotify_initial_setup(void) { }
758
759 F_NONNULL
initial_run(struct ev_loop * loop)760 static void initial_run(struct ev_loop* loop) {
761 reload_timer = xcalloc(1, sizeof(*reload_timer));
762 ev_timer_init(reload_timer, periodic_scan, gcfg->zones_rfc1035_auto_interval, gcfg->zones_rfc1035_auto_interval);
763 ev_timer_start(loop, reload_timer);
764 }
765
766 #endif // not-inotify
767
768 /*************************/
769 /*** Public interfaces ***/
770 /*************************/
771
zsrc_rfc1035_load_zones(const bool check_only V_UNUSED)772 void zsrc_rfc1035_load_zones(const bool check_only V_UNUSED) {
773 dmn_assert(!rfc1035_dir);
774
775 rfc1035_dir = gdnsd_resolve_path_cfg("zones/", NULL);
776
777 if(gcfg->zones_rfc1035_auto)
778 inotify_initial_setup(); // no-op if no compile-time support
779 if(gcfg->zones_strict_startup)
780 fail_fatally = true;
781 struct ev_loop* temp_load_loop = ev_loop_new(EVFLAG_AUTO);
782 scan_dir(temp_load_loop, 0.0);
783 ev_run(temp_load_loop, 0);
784 ev_loop_destroy(temp_load_loop);
785 free(reload_timer);
786 fail_fatally = false;
787 gdnsd_atexit_debug(unload_zones);
788
789 log_info("rfc1035: Loaded %u zonefiles from '%s'", zfhash_count, rfc1035_dir);
790
791 // We set up non-zero quiesce timer *after* the initial load is complete
792 // above. Previously the code tried to avoid races during startup, but
793 // really there is no reliable way to avoid all possible forms of
794 // startup race, and in any case inotify or the next manual directory
795 // scan would pick up the change shortly afterwards and correct it, so
796 // there's no point delaying startup for the corner-case.
797 if(!getenv("GDNSD_TESTSUITE_NODELAY")) // testsuite can bypass restrictions!
798 full_quiesce = gcfg->zones_rfc1035_quiesce;
799 log_info("rfc1035: quiescence time is %.3g seconds", full_quiesce);
800 }
801
802 // we track the loop here for the async sigusr1 request
803 static struct ev_loop* zones_loop = NULL;
804 static ev_async* sigusr1_waker = NULL;
805
806 // called within our thread/loop to take sigusr1 action
807 F_NONNULL
sigusr1_cb(struct ev_loop * loop,ev_async * w V_UNUSED,int revents V_UNUSED)808 static void sigusr1_cb(struct ev_loop* loop, ev_async* w V_UNUSED, int revents V_UNUSED) {
809 log_info("rfc1035: received SIGUSR1 notification, scanning for changes...");
810 do_scandir(loop);
811 }
812
813 // called from main thread to feed ev_async
zsrc_rfc1035_sigusr1(void)814 void zsrc_rfc1035_sigusr1(void) {
815 dmn_assert(zones_loop); dmn_assert(sigusr1_waker);
816 ev_async_send(zones_loop, sigusr1_waker);
817 }
818
zsrc_rfc1035_runtime_init(struct ev_loop * loop)819 void zsrc_rfc1035_runtime_init(struct ev_loop* loop) {
820 zones_loop = loop;
821 sigusr1_waker = xmalloc(sizeof(*sigusr1_waker));
822 ev_async_init(sigusr1_waker, sigusr1_cb);
823 ev_async_start(loop, sigusr1_waker);
824
825 if(gcfg->zones_rfc1035_auto)
826 initial_run(zones_loop);
827 }
828