1 /*------------------------------------------------------------------------------
2  *
3  * Copyright (c) 2011-2021, EURid vzw. All rights reserved.
4  * The YADIFA TM software product is provided under the BSD 3-clause license:
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  *        * Redistributions of source code must retain the above copyright
11  *          notice, this list of conditions and the following disclaimer.
12  *        * Redistributions in binary form must reproduce the above copyright
13  *          notice, this list of conditions and the following disclaimer in the
14  *          documentation and/or other materials provided with the distribution.
15  *        * Neither the name of EURid nor the names of its contributors may be
16  *          used to endorse or promote products derived from this software
17  *          without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  *
31  *------------------------------------------------------------------------------
32  *
33  */
34 
35 /** @defgroup
36  *  @ingroup yadifad
37  *  @brief
38  *
39  *
40  *
41  * @{
42  *
43  *----------------------------------------------------------------------------*/
44 
45 #include "server-config.h"
46 
47 #include <stdio.h>
48 #include <stdlib.h>
49 #include <netdb.h>
50 #include <netinet/in.h>
51 #include <unistd.h>
52 #include <fcntl.h>
53 
54 #include <dnscore/rfc.h>
55 #include <dnscore/logger.h>
56 #include <dnscore/serial.h>
57 #include <dnscore/format.h>
58 #include <dnscore/service.h>
59 #include <dnscore/async.h>
60 #include <dnscore/packet_reader.h>
61 #include <dnscore/tcp_io_stream.h>
62 
63 #include <dnsdb/zdb.h>
64 #include <dnsdb/zdb_icmtl.h>
65 #include <dnsdb/zdb_record.h>
66 #include <dnsdb/zdb_zone.h>
67 #include <dnsdb/zdb_zone_label.h>
68 #include <dnsdb/zdb_zone_load.h>
69 
70 #include <dnscore/zone_reader_axfr.h>
71 
72 #include <dnscore/ptr_set.h>
73 
74 #include "notify.h"
75 #include "zone.h"
76 #include "database-service.h"
77 #include "server.h"
78 #include "server_error.h"
79 
80 #define NOTIFY_DETAILED_LOG 0
81 #define NOTIFY_CLEANUP_DUMP 0
82 
83 #define NOTIFY_RECEIVE_TIMEOUT_SECONDS 2
84 
85 #ifndef NOTIFY_DETAILED_LOG
86 #if DEBUG
87 #define NOTIFY_DETAILED_LOG 1
88 #else
89 #define NOTIFY_DETAILED_LOG 0
90 #endif
91 #endif
92 
93 #if NOTIFY_DETAILED_LOG
94 #pragma message("WARNING: NOTIFY_DETAILED_LOG is not set to 0")
95 #endif
96 
97 
98 #if HAS_CTRL
99 #include "ctrl.h"
100 
101 #endif
102 
103 #define NOTFYMSG_TAG 0x47534d5946544f4e
104 #define MESGDATA_TAG 0x415441444753454d
105 
106 
107 /*------------------------------------------------------------------------------
108  * GLOBAL VARIABLES */
109 
110 extern logger_handle *g_server_logger;
111 #define MODULE_MSG_HANDLE g_server_logger
112 
113 #define NOTIFY_MESSAGE_TYPE_NOTIFY  1
114 #define NOTIFY_MESSAGE_TYPE_ANSWER  2
115 #define NOTIFY_MESSAGE_TYPE_DOMAIN  3
116 #define NOTIFY_MESSAGE_TYPE_CLEAR   4
117 
118 #define MESSAGE_QUERY_TIMEOUT 3
119 #define MESSAGE_QUERY_TRIES   3
120 
121 #define MESSAGE_QUERY_TIMEOUT_US (MESSAGE_QUERY_TIMEOUT * 1000000)
122 
123 static struct thread_pool_s *notify_thread_pool = NULL;
124 
125 static int send_socket4 = -1;
126 static int send_socket6 = -1;
127 
128 static struct service_s notify_handler = UNINITIALIZED_SERVICE;
129 static async_queue_s notify_handler_queue;
130 static volatile bool notify_service_initialised = FALSE;
131 
132 typedef struct message_query_summary message_query_summary;
133 
134 #define MSGQSUMR_TAG 0x524d55535147534d
135 
136 struct message_query_summary
137 {
138     host_address *host;
139     message_query_summary *next;    /* this pointer is used to list the items, ie: for deletion */
140     // to discard
141     s64 expire_epoch_us;
142     // for answers, id has to be kept
143     u16 id;
144     // for answers, ip/port should be kept but they are already in the host list (sa.sa4,sa.sa6,addrlen)
145     // times we send the udp packet before giving up
146     s8 tries;
147     // for signed answers, these have to be kept
148     u8 mac_size;    // mesg->tsig.mac_size;
149     u8 fqdn[256];
150     u8 mac[64];     // mesg->tsig.mac;
151 };
152 
153 static void
message_query_summary_init(message_query_summary * mqs,u16 id,host_address * host,const message_data * mesg)154 message_query_summary_init(message_query_summary *mqs, u16 id, host_address *host, const message_data *mesg)
155 {
156     yassert(mqs != NULL);
157 
158     // key
159     mqs->host = host_address_copy(host);
160     mqs->next = NULL;
161     mqs->expire_epoch_us = timeus() + MESSAGE_QUERY_TIMEOUT_US;
162     mqs->id = id;
163     // payload
164     mqs->tries = MESSAGE_QUERY_TRIES;
165 
166     dnsname_copy(mqs->fqdn, message_get_canonised_fqdn(mesg));
167 
168 #if DNSCORE_HAS_TSIG_SUPPORT
169 
170     mqs->mac_size = message_tsig_mac_get_size(mesg);
171 
172     if(mqs->mac_size > 0)
173     {
174         message_tsig_mac_copy(mesg, mqs->mac);
175     }
176 #endif
177 }
178 
179 static void
message_query_summary_clear(message_query_summary * mqs)180 message_query_summary_clear(message_query_summary *mqs)
181 {
182 #if DEBUG
183     log_debug("notify: clearing query for %{hostaddr}", mqs->host);
184 #endif
185     host_address_delete(mqs->host);
186 #if DEBUG
187     memset(mqs, 0xfe, sizeof(message_query_summary));
188 #endif
189 }
190 
191 static void
message_query_summary_delete(message_query_summary * mqs)192 message_query_summary_delete(message_query_summary *mqs)
193 {
194 #if DEBUG
195     log_debug("notify: deleting query for %{hostaddr}", mqs->host);
196 #endif
197     message_query_summary_clear(mqs);
198     ZFREE_OBJECT(mqs);
199 }
200 
201 static s32
message_query_summary_compare(const void * va,const void * vb)202 message_query_summary_compare(const void* va, const void* vb)
203 {
204     message_query_summary *a = (message_query_summary*)va;
205     message_query_summary *b = (message_query_summary*)vb;
206 
207     s32 d;
208 
209     d = (s32)a->id - (s32)b->id;
210 
211     if(d == 0)
212     {
213         d = host_address_compare(a->host, b->host);
214 
215         if(d == 0)
216         {
217             d = dnsname_compare(a->fqdn, b->fqdn);
218         }
219     }
220 
221     return d;
222 }
223 
224 typedef struct notify_message notify_message;
225 
226 struct notify_message_domain
227 {
228     u8 type;
229 };
230 
231 struct notify_message_clear
232 {
233     u8 type;
234 };
235 
236 struct notify_message_notify
237 {
238     u8 type;
239     u8 repeat_countdown;
240     u8 repeat_period;
241     u8 repeat_period_increase;
242     u32 epoch;
243     host_address *hosts_list;   /* 64 bits aligned */
244 #if DNSCORE_HAS_TSIG_SUPPORT
245     message_tsig tsig;
246 #endif
247     u16 ztype;
248     u16 zclass;
249 };
250 
251 struct notify_message_answer
252 {
253     u8   type;
254     u8   rcode;
255     bool aa;
256     u8   r2;
257     host_address *host;
258     message_data *message;  /* only used if the message is signed */
259 };
260 
261 struct notify_message
262 {
263     u8 *origin;
264 
265     union
266     {
267         u8 type;
268         struct notify_message_notify notify;
269         struct notify_message_answer answer;
270         struct notify_message_domain domain;
271         struct notify_message_clear  clear;
272     } payload;
273 };
274 
275 static bool notify_slaves_convert_domain_to_notify(notify_message *notifymsg);
276 
277 /*------------------------------------------------------------------------------
278  * STATIC PROTOTYPES */
279 
280 /*------------------------------------------------------------------------------
281  * FUNCTIONS */
282 
283 static notify_message*
notify_message_newinstance(const u8 * origin,u8 type)284 notify_message_newinstance(const u8* origin, u8 type)
285 {
286     notify_message *notifymsg;
287     ZALLOC_OBJECT_OR_DIE(notifymsg, notify_message, NOTFYMSG_TAG);
288     notifymsg->origin = dnsname_zdup(origin);
289     notifymsg->payload.type = type;
290 
291 #if DEBUG
292     log_debug("notify_message_newinstance({%{dnsname}@%p, %i}@%p)", notifymsg->origin, notifymsg->origin, notifymsg->payload.type, notifymsg);
293 #endif
294 
295     return notifymsg;
296 }
297 
298 /**
299  *
300  * Queue a message telling a slave has answered to a notify
301  *
302  * @param origin the domain of the zone
303  * @param sa the address of the source
304  * @param rcode rcode part of the query
305  * @param aa aa flag value in the query
306  */
307 
308 static void
notify_slaveanswer(const message_data * mesg)309 notify_slaveanswer(const message_data *mesg)
310 {
311 #if NOTIFY_DETAILED_LOG
312     log_debug("notify_slaveanswer(%{dnsname} %{sockaddr})", message_get_canonised_fqdn(mesg), message_get_sender_sa(mesg));
313 #endif
314 
315     if(dnscore_shuttingdown())
316     {
317         return;
318     }
319 
320     if(notify_service_initialised)
321     {
322 #if DNSCORE_HAS_TSIG_SUPPORT
323         const struct tsig_item *mesg_tsig_key = message_tsig_get_key(mesg);  // pointer to the structure used for TSIG, to be used in relevant cases
324         message_data *clone = NULL;
325         if(mesg_tsig_key != NULL)
326         {
327             clone = message_dup(mesg);
328             if(clone == NULL)
329             {
330                 return; // BUFFER_WOULD_OVERFLOW;
331             }
332         }
333 #endif
334 
335         const u8 *origin = message_get_canonised_fqdn(mesg);
336         const socketaddress *sa = message_get_sender(mesg);
337         u8 rcode = message_get_rcode(mesg);
338         bool aa = message_isauthoritative(mesg);
339 
340         notify_message *notifymsg = notify_message_newinstance(origin, NOTIFY_MESSAGE_TYPE_ANSWER);
341 
342         notifymsg->payload.answer.rcode = rcode;
343         notifymsg->payload.answer.aa = aa;
344 
345         ZALLOC_OBJECT_OR_DIE(notifymsg->payload.answer.host, host_address, HOSTADDR_TAG);
346         host_address_set_with_sockaddr(notifymsg->payload.answer.host, sa);
347 
348 #if DNSCORE_HAS_TSIG_SUPPORT
349 
350         // if there is a TSIG ...
351 
352         if(message_tsig_get_key(mesg) != NULL)
353         {
354             notifymsg->payload.answer.message = clone;
355             notifymsg->payload.answer.host->tsig = mesg_tsig_key;
356         }
357         else
358         {
359             notifymsg->payload.answer.message = NULL;
360             notifymsg->payload.answer.host->tsig = NULL;
361         }
362 #endif
363 
364         async_message_s *async = async_message_alloc();
365         async->id = 0;
366         async->args = notifymsg;
367         async->handler = NULL;
368         async->handler_args = NULL;
369         async_message_call(&notify_handler_queue, async);
370     }
371     else
372     {
373         log_err("notify: service not initialised");
374     }
375 }
376 
377 static bool
notify_masterquery_read_soa(const u8 * origin,packet_unpack_reader_data * reader,u32 * serial)378 notify_masterquery_read_soa(const u8 *origin, packet_unpack_reader_data *reader, u32 *serial)
379 {
380     ya_result return_value;
381 
382     u8 tmp[MAX_DOMAIN_LENGTH];
383 
384     /* read and expect an SOA */
385 
386     if(ISOK(packet_reader_read_fqdn(reader, tmp, sizeof(tmp))))
387     {
388         if(dnsname_equals(tmp, origin))
389         {
390             struct type_class_ttl_rdlen tctr;
391 
392             if(packet_reader_read(reader, &tctr, 10) == 10) // exact
393             {
394                 if((tctr.qtype == TYPE_SOA) && (tctr.qclass == CLASS_IN))
395                 {
396                     if(ISOK(return_value = packet_reader_skip_fqdn(reader)))
397                     {
398                         if(ISOK(return_value = packet_reader_skip_fqdn(reader)))
399                         {
400                             if(packet_reader_read(reader, tmp, 4) == 4) // exact
401                             {
402                                 *serial = ntohl(GET_U32_AT_P(tmp));
403 
404                                 return TRUE;
405                             }
406                         }
407                     }
408                 }
409             }
410         }
411     }
412 
413     return FALSE;
414 }
415 
416 #define NTFYMQTA_TAG 0x4154514d5946544e
417 
418 struct notify_masterquery_thread_args
419 {
420     u8 *origin;
421     u32 serial;
422     bool serial_set;
423 };
424 
425 typedef struct notify_masterquery_thread_args notify_masterquery_thread_args;
426 
427 static void *
notify_masterquery_thread(void * args_)428 notify_masterquery_thread(void *args_)
429 {
430     notify_masterquery_thread_args *args = (notify_masterquery_thread_args*)args_;
431 
432     /* get the zone descriptor for that domain */
433 
434     zone_desc_s *zone_desc = zone_acquirebydnsname(args->origin);
435 
436     ya_result return_value;
437 
438     if(zone_desc == NULL)
439     {
440         log_err("notify: slave: %{dnsname}: zone not configured", args->origin);
441         dnsname_zfree(args->origin);
442         ZFREE_OBJECT(args);
443 
444         return NULL;
445     }
446 
447     mutex_lock(&zone_desc->lock);
448     zone_clear_status(zone_desc, ZONE_STATUS_NOTIFIED);
449     mutex_unlock(&zone_desc->lock);
450 
451     log_debug("notify: slave: %{dnsname}: processing notify from master", args->origin);
452 
453     /* do an SOA query to the master to retrieve the serial (wait) */
454 
455     if(!args->serial_set)
456     {
457         log_debug("notify: slave: %{dnsname}: querying the master at %{hostaddr} for SOA", args->origin, zone_desc->masters);
458 
459         zone_lock(zone_desc, ZONE_LOCK_READONLY);
460         host_address *zone_desc_masters = host_address_copy_list(zone_desc->masters);
461         zone_unlock(zone_desc, ZONE_LOCK_READONLY);
462 
463         return_value = message_query_serial(args->origin, zone_desc_masters, &args->serial);
464 
465         host_address_delete_list(zone_desc_masters);
466 
467         if(ISOK(return_value)) // multi-master
468         {
469             args->serial_set = TRUE;
470 
471             log_debug("notify: slave: %{dnsname}: the master at %{hostaddr} has serial %u", args->origin, zone_desc->masters, args->serial);
472         }
473         else
474         {
475             /* we didn't got the serial */
476 
477             log_debug("notify: slave: %{dnsname}: SOA query to the master at %{hostaddr} failed: %r", args->origin, zone_desc->masters, return_value);
478 
479             // this will fall-back to doing an XFR
480         }
481     }
482     else
483     {
484         log_debug("notify: slave: %{dnsname}: the master at %{hostaddr} has serial %u", args->origin, zone_desc->masters, args->serial);
485     }
486 
487     u32 current_serial;
488 
489     /* get the zone of the domain */
490 
491     zdb_zone *dbzone = zdb_acquire_zone_read_from_fqdn(g_config->database, args->origin);
492 
493     if(dbzone != NULL)
494     {
495         /* lock it for the XFR (it's a writer, so no other writer allowed) */
496 
497         log_debug("notify: slave: %{dnsname}: trying to lock for a transfer", args->origin);
498 
499         if(zdb_zone_trylock(dbzone, ZDB_ZONE_MUTEX_XFR))
500         {
501             /* get the current serial of the zone */
502 
503             if(ISOK(zdb_zone_getserial(dbzone, &current_serial))) // zone is locked
504             {
505                 log_debug("notify: slave: %{dnsname}: current serial is %u", args->origin, current_serial);
506 
507                /*
508                 * If the serial on the "master" is lower,
509                 * nothing has to be done except a note on the log.
510                 *
511                 * If we didn't got the serial of course, we can only ask to the master.
512                 */
513 
514                 if(args->serial_set)
515                 {
516                     if(serial_lt(args->serial, current_serial))
517                     {
518                         /* do nothing at all */
519 
520                         log_debug("notify: slave: %{dnsname}: serial on this slave is higher (%u) than on the notification from master (%u)", zone_origin(zone_desc), current_serial, args->serial);
521                     }
522                     else if(serial_gt(args->serial, current_serial))
523                     {
524                         /* download (and apply) the incremental change  */
525 
526                         log_info("notify: slave: %{dnsname}: scheduling an IXFR from %u", zone_origin(zone_desc), current_serial);
527 
528                         database_zone_ixfr_query(zone_origin(zone_desc));
529                     }
530                     else
531                     {
532                         /* nothing to do but mark the zone as being refreshed */
533 
534                         log_info("notify: slave: %{dnsname}: serial matches the masters' (%u)", zone_origin(zone_desc), current_serial);
535 
536                         zdb_zone_clear_invalid(dbzone);
537                         zone_desc->refresh.refreshed_time = zone_desc->refresh.retried_time = time(NULL);
538 
539                         zdb_zone_release_unlock(dbzone, ZDB_ZONE_MUTEX_XFR);                         /* MUST be unlocked here because ... */
540                         database_zone_refresh_maintenance(g_config->database, zone_origin(zone_desc), 0); /* ... this will try to lock */
541 
542                         dnsname_zfree(args->origin);
543                         ZFREE_OBJECT(args);
544 
545                         log_debug("notify: slave: %{dnsname}: master notify processing done", zone_origin(zone_desc));
546 
547                         zone_release(zone_desc);
548 
549                         return NULL;
550                     }
551                 }
552                 else
553                 {
554                     log_warn("notify: slave: %{dnsname}: the serial of the master has not been obtained, trying an incremental transfer", zone_origin(zone_desc));
555 
556                     database_zone_ixfr_query(zone_origin(zone_desc));
557                 }
558             }
559             else // no soa at apex ... zone needs to be downloaded ...
560             {
561                 // the zone is a placeholder
562 
563                 if((zone_get_status(zone_desc) & (ZONE_STATUS_LOAD|ZONE_STATUS_LOADING|ZONE_STATUS_DOWNLOADED)) == 0)
564                 {
565                     log_debug("notify: slave: %{dnsname}: downloading a new copy of the zone", args->origin);
566                     database_zone_axfr_query(zone_origin(zone_desc));
567                 }
568                 else
569                 {
570                     log_debug("notify: slave: %{dnsname}: still busy loading the zone", args->origin);
571                 }
572             }
573 
574             zdb_zone_release_unlock(dbzone, ZDB_ZONE_MUTEX_XFR);
575         }
576         else // could not lock with ZDB_ZONE_MUTEX_XFR
577         {
578            /*
579             * The zone has been locked already ? give up ...
580             */
581 
582             mutex_lock(&dbzone->lock_mutex);
583             u8 dbzone_lock_owner = dbzone->lock_owner;
584             mutex_unlock(&dbzone->lock_mutex);
585 
586             log_info("notify: slave: %{dnsname}: already locked (%x)", args->origin, dbzone_lock_owner);
587 
588             zdb_zone_release(dbzone);
589 
590             database_zone_refresh_maintenance(g_config->database, args->origin, time(NULL) + 5);
591         }
592     }
593     else
594     {
595         /*
596          * Ask for an AXFR of the zone
597          */
598 
599         log_info("notify: slave: %{dnsname}: scheduling an AXFR", zone_origin(zone_desc));
600 
601         database_zone_axfr_query(zone_origin(zone_desc));
602     }   /* AXFR */
603 
604     dnsname_zfree(args->origin);
605     ZFREE_OBJECT(args);
606 
607     log_debug("notify: slave: %{dnsname}: master notify processing done", zone_origin(zone_desc));
608 
609     zone_release(zone_desc);
610 
611     return NULL;
612 }
613 
614 /**
615  * The purely network part of the sending of a notify udp packet
616  *
617  * @param ha        destination, TSIG supported
618  * @param msgdata   a message to be used for message construction
619  * @param id        the message id
620  * @param origin    origin
621  * @param ntype     type
622  * @param nclass    class
623  * @return
624  */
625 
626 static ya_result
notify_send(host_address * ha,message_data * mesg,u16 id,const u8 * origin,u16 ntype,u16 nclass)627 notify_send(host_address* ha, message_data *mesg, u16 id, const u8 *origin, u16 ntype, u16 nclass)
628 {
629 #if DEBUG
630     log_debug("notify: send(%{hostaddr}, %p, %hx, %{dnsname}, %{dnstype}, %{dnsclass})", ha, mesg, id, origin, &ntype, &nclass);
631 #endif
632 
633     socketaddress sa;
634 
635     ya_result return_code;
636 
637     message_make_notify(mesg, id, origin, ntype, nclass);
638 
639 #if DNSCORE_HAS_TSIG_SUPPORT
640     if(ha->tsig != NULL)
641     {
642         if(FAIL(return_code = message_sign_query(mesg, ha->tsig)))
643         {
644             log_err("notify: %{dnsname}: unable to sign message for %{sockaddr} with key %{dnsname}: %r", origin, &sa, ha->tsig->name, return_code);
645 
646             return return_code;
647         }
648     }
649 #endif
650 
651     if(ISOK(return_code = host_address2sockaddr(ha, &sa)))
652     {
653 
654 #if DNSCORE_HAS_TSIG_SUPPORT
655         if(ha->tsig == NULL)
656         {
657 #endif
658 
659 #if !DEBUG
660             log_debug("notify: %{dnsname}: notifying %{sockaddr}", origin, &sa.sa);
661 #else
662             log_info("notify: %{dnsname}: notifying %{sockaddr} with %{dnstype} %{dnsclass} (debug)", origin, &sa.sa, &ntype, &nclass);
663 #endif
664 
665 #if DNSCORE_HAS_TSIG_SUPPORT
666         }
667         else
668         {
669 #if !DEBUG
670             log_debug("notify: %{dnsname}: notifying %{sockaddr} (key=%{dnsname})", origin, &sa.sa, ha->tsig->name);
671 #else
672             log_info("notify: %{dnsname}: notifying %{sockaddr} (key=%{dnsname}) with (%{dnstype} %{dnsclass}) (debug)", origin, &sa.sa, ha->tsig->name, &ntype, &nclass);
673 #endif
674         }
675 #endif
676 
677         int s = -1;
678         int addrlen;
679 
680         switch(sa.sa.sa_family)
681         {
682             case AF_INET:
683             {
684                 s = send_socket4;
685                 addrlen = sizeof(sa.sa4);
686                 break;
687             }
688             case AF_INET6:
689             {
690                 s = send_socket6;
691                 addrlen = sizeof(sa.sa6);
692                 break;
693             }
694         }
695 
696         if(s >= 0)
697         {
698             // s >= 0 => addrlen is initialised
699 #if DEBUG
700             log_debug("notify: sendto(%d, %p, %d, %d, %{sockaddr}, %d)", s, message_get_buffer_const(mesg), message_get_size(mesg), 0, (struct sockaddr*)&sa.sa, addrlen);
701             log_memdump_ex(g_server_logger, MSG_DEBUG5, message_get_buffer_const(mesg), message_get_size(mesg), 16, OSPRINT_DUMP_HEXTEXT);
702 #endif
703             if(ISOK(return_code = sendto(s, message_get_buffer_const(mesg), message_get_size(mesg), 0, &sa.sa, addrlen)))
704             {
705                 log_debug("notify: %{dnsname}: sent %i bytes to %{sockaddr}", origin, message_get_size(mesg), &sa.sa);
706             }
707             else
708             {
709                 int err = errno;
710 
711                 if(err != ENOTSOCK)
712                 {
713                     log_err("notify: %{dnsname}: failed to send notify to %{sockaddr}: %r", origin, &sa.sa, MAKE_ERRNO_ERROR(err));
714                 }
715             }
716         }
717         else
718         {
719             return_code = MAKE_ERRNO_ERROR(ENOTSOCK); // wrong socket
720 
721             // if we cannot get the reply, no point trying to send the query
722 
723             log_err("notify: %{dnsname}: no listening interface can receive from %{sockaddr}", origin, &sa.sa);
724         }
725     }
726     else
727     {
728         log_err("notify: %{dnsname}: unable to convert '%{hostaddr}' to an address", origin, ha);
729     }
730 
731     return return_code;
732 }
733 
734 /**
735  *
736  * Uses a thread to handle the notify from the master (notify_masterquery_thread)
737  *
738  * The message is a NOTIFY SOA IN
739  * The reader points into the buffer of the message and is exactly after the Q section.
740  *
741  *
742  * @param database the database
743  * @param mesg the message
744  * @param reader packet reader into the above message, positioned right after the Q section
745  *
746  * @return an error code
747  */
748 
749 static ya_result
notify_process_masterquery_in_enqueue(const message_data * mesg,packet_unpack_reader_data * reader)750 notify_process_masterquery_in_enqueue(const message_data *mesg, packet_unpack_reader_data *reader)
751 {
752     ya_result return_value;
753 
754     u32 serial = 0; // to silence gcc : this was not a bug
755     bool serial_set = FALSE;
756 
757     if(message_get_answer_count_ne(mesg) != 0)
758     {
759         serial_set = notify_masterquery_read_soa(message_get_canonised_fqdn(mesg), reader, &serial);
760     }
761 
762     notify_masterquery_thread_args *args;
763 
764     ZALLOC_OBJECT_OR_DIE( args, notify_masterquery_thread_args, NTFYMQTA_TAG);
765 
766     args->origin = dnsname_zdup(message_get_canonised_fqdn(mesg));
767     args->serial = serial;
768     args->serial_set = serial_set;
769 
770     return_value = thread_pool_enqueue_call(notify_thread_pool, notify_masterquery_thread, args, NULL, "notify: slave");
771 
772     return return_value;
773 }
774 
775 static ya_result
notify_process_masterquery_in(message_data * mesg,packet_unpack_reader_data * reader)776 notify_process_masterquery_in(message_data *mesg, packet_unpack_reader_data *reader)
777 {
778     zone_desc_s *zone_desc;
779     ya_result return_value = SUCCESS;
780 
781     zone_desc = zone_acquirebydnsname(message_get_canonised_fqdn(mesg));
782 
783     if(zone_desc != NULL)
784     {
785         message_set_authoritative_answer(mesg);
786 
787         if(zone_desc->type == ZT_SLAVE)
788         {
789             if(message_has_tsig(mesg))
790             {
791                 log_info("notify: slave: %{dnsname}: %{sockaddr} sent a notification query, class %{dnsclass}, key %{dnsname}", message_get_canonised_fqdn(mesg), message_get_sender_sa(mesg), message_get_query_class_ptr(mesg), message_tsig_get_name(mesg));
792             }
793             else
794             {
795                 log_info("notify: slave: %{dnsname}: %{sockaddr} sent a notification query, class %{dnsclass}", message_get_canonised_fqdn(mesg), message_get_sender_sa(mesg), message_get_query_class_ptr(mesg));
796             }
797 
798 #if ZDB_HAS_ACL_SUPPORT
799             if(ACL_REJECTED(acl_check_access_filter(mesg, &zone_desc->ac.allow_notify)))
800             {
801                 /* notauth */
802 
803                 if(message_has_tsig(mesg))
804                 {
805                     log_notice("notify: slave: %{dnsname}: %{sockaddr} key %{dnsname}: not authorised", message_get_canonised_fqdn(mesg), message_get_sender_sa(mesg), message_tsig_get_name(mesg));
806                 }
807                 else
808                 {
809                     log_notice("notify: slave: %{dnsname}: %{sockaddr}: not authorised", message_get_canonised_fqdn(mesg), message_get_sender_sa(mesg));
810                 }
811 
812                 message_set_status(mesg, FP_NOTIFY_REJECTED);
813                 message_update_answer_status(mesg);
814 
815                 zone_release(zone_desc);
816 
817                 return ACL_NOTIFY_REJECTED;
818             }
819 #endif
820             if(!zone_isfrozen(zone_desc))
821             {
822                 mutex_lock(&zone_desc->lock);
823                 u32 zone_status_notified = zone_get_set_status(zone_desc, ZONE_STATUS_NOTIFIED);
824                 mutex_unlock(&zone_desc->lock);
825 
826                 if(zone_status_notified == 0)
827                 {
828                     return_value = notify_process_masterquery_in_enqueue(mesg, reader); // thread-safe
829                 }
830                 // else it's already enqueued for notification
831             }
832             else
833             {
834                 log_info("notify: slave: %{dnsname}: %{sockaddr}: zone is frozen", message_get_canonised_fqdn(mesg), message_get_sender_sa(mesg));
835             }
836         }   /* type = SLAVE */
837         else
838         {
839             /* type = MASTER ? */
840 
841             // note: a slave can also be a master ... do not cut this
842 
843             log_info("notify: %{dnsname}: %{sockaddr}: host sent a notification query for master zone ", message_get_canonised_fqdn(mesg), message_get_sender_sa(mesg));
844 
845             message_set_status(mesg, FP_SLAVE_NOTIFIES_MASTER);
846 
847             return_value = NOTIFY_QUERY_TO_MASTER;
848         }
849     }
850     else
851     {
852         log_notice("notify: %{dnsname}: %{sockaddr}: host sent a notification query for an unknown zone", message_get_canonised_fqdn(mesg), message_get_sender_sa(mesg));
853 
854         message_set_status(mesg, FP_NOTIFY_UNKNOWN_ZONE);
855 
856         return_value = NOTIFY_QUERY_TO_UNKNOWN;
857     }
858 
859     message_update_answer_status(mesg);
860 
861     zone_release(zone_desc);
862 
863     return return_value;
864 }
865 
866 
867 
868 /** @brief Handle a notify from the master (or another slave)
869  *
870  *  @param database : the database
871  *  @param mesg     : the input message
872  *
873  *  @retval OK
874  *  @retval NOK
875  */
876 
877 ya_result
notify_process(message_data * mesg)878 notify_process(message_data *mesg)
879 {
880     /* rfc1996
881      * 3.7:
882      *  A NOTIFY request has QDCOUNT>0, ANCOUNT>=0, AUCOUNT>=0,
883      *  ADCOUNT>=0.  If ANCOUNT>0, then the answer section represents an
884      *  unsecure hint at the new RRset for this <QNAME,QCLASS,QTYPE>
885      */
886 
887     if(!message_isquery(mesg))
888     {
889         /*
890          * It's an answer from a slave (we are the master)
891          * It works if we are the master for the zone AND we sent a notify.
892          * Else we discard.
893          */
894 
895         log_debug1("notify: %{dnsname}: %{sockaddr}: processing notification reply", message_get_canonised_fqdn(mesg), message_get_sender_sa(mesg));
896 
897         notify_slaveanswer(mesg);  // thread-safe
898 
899         return SUCCESS;
900     }
901     else
902     {
903         /*
904          * It's a notification by the "master" ... (or in the case of an AXFR/CTRL a request to be notified of all dynamic zones)
905          * It works if we are a slave for the zone.
906          * Else we discard.
907          */
908 
909         ya_result return_value;
910 
911         log_debug1("notify: %{dnsname}: %{sockaddr}: processing notification", message_get_canonised_fqdn(mesg), message_get_sender_sa(mesg));
912 
913         message_set_answer(mesg);
914 
915         packet_unpack_reader_data pr;
916         packet_reader_init_from_message(&pr, mesg);
917 
918         u8 tmp[MAX_DOMAIN_LENGTH];
919 
920         if(ISOK(return_value = packet_reader_read_fqdn(&pr, tmp, sizeof(tmp))))
921         {
922             u16 qtype;
923 
924             if(ISOK(return_value = packet_reader_read_u16(&pr, &qtype)))
925             {
926                 u16 qclass;
927 
928                 if(ISOK(return_value = packet_reader_read_u16(&pr, &qclass)))
929                 {
930                     switch(qclass)
931                     {
932                         case CLASS_IN:
933                         {
934                             /*
935                              * Master sent an notify for the IN class
936                              */
937 
938                             notify_process_masterquery_in(mesg, &pr);
939 
940                             break;
941                         }
942 
943 
944                         default:
945                         {
946                             message_make_error(mesg, FP_NOT_SUPP_CLASS);
947                             break;
948                         }
949                     }
950                 }
951             }
952         }
953 
954 #if DNSCORE_HAS_TSIG_SUPPORT
955         if(message_has_tsig(mesg))  /* NOTE: the TSIG information is in mseg */
956         {
957             tsig_sign_answer(mesg);
958         }
959 #endif
960 
961         return return_value;
962     }
963 }
964 
965 static void
notify_message_free(notify_message * notifymsg)966 notify_message_free(notify_message *notifymsg)
967 {
968     if(notifymsg == NULL)
969     {
970         return;
971     }
972 
973 #if DEBUG
974     log_debug("notify_message_free({%{dnsname}@%p, %i}@%p)", notifymsg->origin, notifymsg->origin, notifymsg->payload.type, notifymsg);
975 #endif
976 
977     if(notifymsg->origin != NULL)
978     {
979         dnsname_zfree(notifymsg->origin);
980         notifymsg->origin = NULL;
981     }
982 
983     switch(notifymsg->payload.type)
984     {
985         case NOTIFY_MESSAGE_TYPE_NOTIFY:
986         {
987             host_address_delete_list(notifymsg->payload.notify.hosts_list);
988             break;
989         }
990         case NOTIFY_MESSAGE_TYPE_ANSWER:
991         {
992 #if DEBUG
993             log_debug("notify_message_free(%p) host_address_delete(%p)", notifymsg, notifymsg->payload.answer.host);
994             debug_log_stacktrace(g_server_logger, MSG_DEBUG7, "notify_message_free:host_address_delete");
995 #endif
996             host_address_delete(notifymsg->payload.answer.host);
997             if(notifymsg->payload.answer.message != NULL)
998             {
999                 message_free(notifymsg->payload.answer.message); // message_data => message_free
1000             }
1001             break;
1002         }
1003         case NOTIFY_MESSAGE_TYPE_DOMAIN:
1004         {
1005             break;
1006         }
1007         case NOTIFY_MESSAGE_TYPE_CLEAR:
1008         {
1009             break;
1010         }
1011         default:
1012         {
1013             log_debug("notify_message_free(%p) invalid notify message type %x", notifymsg, notifymsg->payload.type);
1014             debug_log_stacktrace(g_server_logger, MSG_DEBUG7, "notify_message_free:host_address_delete");
1015 
1016             break;
1017         }
1018     }
1019 #if DEBUG
1020     memset(notifymsg, 0xff, sizeof(notify_message));
1021 #endif
1022     ZFREE_OBJECT(notifymsg);
1023 }
1024 
1025 static int
notify_process_dnsname_compare(const void * node_a,const void * node_b)1026 notify_process_dnsname_compare(const void *node_a, const void *node_b)
1027 {
1028     const u8 *m_a = (const u8*)node_a;
1029     const u8 *m_b = (const u8*)node_b;
1030 
1031     return dnsname_compare(m_a, m_b);
1032 }
1033 
1034 static void
notify_ipv4_receiver_service(struct service_worker_s * worker)1035 notify_ipv4_receiver_service(struct service_worker_s *worker)
1036 {
1037     log_info("notify: notification service IPv4 receiver started (socket %i)", send_socket4);
1038 
1039     message_data *mesg = message_new_instance();
1040     tcp_set_recvtimeout(send_socket4, NOTIFY_RECEIVE_TIMEOUT_SECONDS, 0); /* half a second for UDP is a lot ... */
1041 
1042     while(service_should_run(worker))
1043     {
1044         ya_result ret;
1045         message_recv_udp_reset(mesg);
1046         message_reset_control_size(mesg);
1047         if(message_recv_udp(mesg, send_socket4) > 0)
1048         {
1049             // process slave answer
1050             if(ISOK(ret = message_process_lenient(mesg)))
1051             {
1052 #if NOTIFY_DETAILED_LOG
1053                 log_debug("notify_ipv4_receiver_service(%{dnsname} %{sockaddr})", message_get_canonised_fqdn(mesg), message_get_sender_sa(mesg));
1054 #endif
1055                 notify_slaveanswer(mesg);
1056             }
1057             else
1058             {
1059                 log_err("notify_ipv4_receiver_service: processing message: %r", ret);
1060             }
1061         }
1062         else
1063         {
1064             ret = ERRNO_ERROR;
1065             if((ret == MAKE_ERRNO_ERROR(EAGAIN)) || (ret == MAKE_ERRNO_ERROR(EINTR)))
1066             {
1067 #if NOTIFY_DETAILED_LOG
1068                 log_debug("notify_ipv4_receiver_service: %r", ret);
1069 #endif
1070             }
1071             else
1072             {
1073                 log_err("notify_ipv4_receiver_service: %r", ret);
1074                 sleep(1);
1075             }
1076         }
1077     }
1078 
1079     message_free(mesg);
1080 
1081     log_info("notify: notification service IPv4 receiver stopped");
1082 }
1083 
1084 static void
notify_ipv6_receiver_service(struct service_worker_s * worker)1085 notify_ipv6_receiver_service(struct service_worker_s *worker)
1086 {
1087     log_info("notify: notification service IPv6 receiver started (socket %i)", send_socket6);
1088 
1089     message_data *mesg = message_new_instance();
1090     tcp_set_recvtimeout(send_socket6, NOTIFY_RECEIVE_TIMEOUT_SECONDS, 0); /* half a second for UDP is a lot ... */
1091 
1092     while(service_should_run(worker))
1093     {
1094         ya_result ret;
1095         message_recv_udp_reset(mesg);
1096         if(message_recv_udp(mesg, send_socket6) > 0)
1097         {
1098             // process slave answer
1099 
1100             if(ISOK(ret = message_process_lenient(mesg)))
1101             {
1102 #if NOTIFY_DETAILED_LOG
1103                 log_debug("notify_ipv6_receiver_service(%{dnsname} %{sockaddr})", message_get_canonised_fqdn(mesg), message_get_sender_sa(mesg));
1104 #endif
1105                 notify_slaveanswer(mesg);
1106             }
1107             else
1108             {
1109                 log_err("notify_ipv6_receiver_service: processing message: %r", ret);
1110             }
1111         }
1112         else
1113         {
1114             ret = ERRNO_ERROR;
1115             if((ret == MAKE_ERRNO_ERROR(EAGAIN)) || (ret == MAKE_ERRNO_ERROR(EINTR)))
1116             {
1117 #if NOTIFY_DETAILED_LOG
1118                 log_debug("notify_ipv6_receiver_service: %r", ret);
1119 #endif
1120             }
1121             else
1122             {
1123                 log_err("notify_ipv6_receiver_service: %r", ret);
1124                 sleep(1);
1125             }
1126         }
1127     }
1128 
1129     message_free(mesg);
1130 
1131     log_info("notify: notification service IPv6 receiver stopped");
1132 }
1133 
1134 struct notify_service_context
1135 {
1136     ptr_set notifications_being_sent;
1137     ptr_set notify_queries_not_answered_yet;
1138     ptr_vector todelete;
1139     random_ctx rnd;
1140     message_data *mesg;
1141     s64 last_current_queries_cleanup_epoch_us;
1142     s64 service_loop_begin_us;
1143 };
1144 
1145 static void
notify_service_context_init(struct notify_service_context * ctx)1146 notify_service_context_init(struct notify_service_context *ctx)
1147 {
1148 #if NOTIFY_DETAILED_LOG
1149     log_debug("notify_service_context_init(%p)", ctx);
1150 #endif
1151 
1152     ctx->mesg = message_new_instance();
1153     ctx->rnd = thread_pool_get_random_ctx();
1154     ctx->notifications_being_sent.root = NULL;
1155     ctx->notifications_being_sent.compare = notify_process_dnsname_compare;
1156     ctx->notify_queries_not_answered_yet.root = NULL;
1157     ctx->notify_queries_not_answered_yet.compare = message_query_summary_compare;
1158     ctx->last_current_queries_cleanup_epoch_us = 0;
1159     ptr_vector_init_empty(&ctx->todelete);
1160     ctx->service_loop_begin_us = timeus();
1161 }
1162 
1163 static void
notify_service_context_manage_pending_notifications(struct notify_service_context * ctx)1164 notify_service_context_manage_pending_notifications(struct notify_service_context *ctx)
1165 {
1166     // cleanup start
1167 
1168 #if NOTIFY_DETAILED_LOG
1169     log_debug("notify_service_context_manage_pending_notifications(%p)", ctx);
1170 #endif
1171 
1172     // what happens in here should not interfere with the rest of the function
1173 
1174     s64 tus = ctx->service_loop_begin_us;
1175 
1176     if(!ptr_set_isempty(&ctx->notify_queries_not_answered_yet) && (tus >= ctx->last_current_queries_cleanup_epoch_us))
1177     {
1178         /* create a list of expired message_query_summary */
1179 
1180 #if NOTIFY_CLEANUP_DUMP
1181         log_debug("notify: cleaning up expired notifications");
1182 #endif
1183 
1184         message_query_summary head;
1185         head.next = NULL;
1186         message_query_summary *current_queries_to_clear = &head;
1187         ctx->last_current_queries_cleanup_epoch_us = tus;
1188 
1189         if(ptr_set_isempty(&ctx->notify_queries_not_answered_yet))
1190         {
1191             /* find them using an iterator */
1192 
1193             ptr_set_iterator current_queries_iter;
1194             ptr_set_iterator_init(&ctx->notify_queries_not_answered_yet, &current_queries_iter);
1195             while(ptr_set_iterator_hasnext(&current_queries_iter))
1196             {
1197                 ptr_node *node = ptr_set_iterator_next_node(&current_queries_iter);
1198                 message_query_summary *mqs = (message_query_summary*)node->value;
1199 
1200 #if NOTIFY_DETAILED_LOG
1201                 log_debug("notify: domain=%{dnsname} slave=%{hostaddr} expires=%llT tries=%i",
1202                         mqs->fqdn, mqs->host, mqs->expire_epoch_us, mqs->tries);
1203 #endif
1204                 if(ctx->last_current_queries_cleanup_epoch_us > mqs->expire_epoch_us)
1205                 {
1206 #if NOTIFY_DETAILED_LOG
1207                     log_debug("notify: domain=%{dnsname} slave=%{hostaddr} expires=%llT tries=%i: current try expired",
1208                               mqs->fqdn, mqs->host, mqs->expire_epoch_us, mqs->tries);
1209 #endif
1210 
1211 #if DEBUG
1212                     double expired_since = ctx->last_current_queries_cleanup_epoch_us - mqs->expire_epoch_us;
1213                     expired_since /= ONE_SECOND_US_F;
1214 #endif
1215                     if(--mqs->tries <= 0)
1216                     {
1217 #if NOTIFY_DETAILED_LOG
1218                         log_debug("notify: domain=%{dnsname} slave=%{hostaddr} expires=%llT tries=%i: expired",
1219                                   mqs->fqdn, mqs->host, mqs->expire_epoch_us, mqs->tries);
1220 #endif
1221                         bool give_up = TRUE;
1222                         zdb_zone *zone = zdb_acquire_zone_read_from_fqdn(g_config->database, mqs->fqdn); // RC++
1223                         if(zone != NULL)
1224                         {
1225                             if((zdb_zone_get_status(zone) & ZDB_ZONE_STATUS_WILL_NOTIFY_AGAIN) != 0)
1226                             {
1227 #if DEBUG
1228                                 log_debug("notify: query (%hx) %{dnsname} to %{hostaddr} expired %f seconds ago but was re-armed",
1229                                           mqs->id, mqs->fqdn, mqs->host, expired_since);
1230 #endif
1231                                 give_up = FALSE;
1232                                 zdb_zone_clear_status(zone, ZDB_ZONE_STATUS_WILL_NOTIFY_AGAIN);
1233                                 mqs->expire_epoch_us = tus + MESSAGE_QUERY_TIMEOUT_US;
1234                                 mqs->tries = MESSAGE_QUERY_TRIES;
1235                                 notify_send(mqs->host, ctx->mesg, mqs->id, mqs->fqdn, TYPE_SOA, CLASS_IN);
1236                             }
1237                             else
1238                             {
1239 #if NOTIFY_DETAILED_LOG
1240                                 log_debug("notify: domain=%{dnsname} slave=%{hostaddr} expires=%llT tries=%i: notification status cleared",
1241                                           mqs->fqdn, mqs->host, mqs->expire_epoch_us, mqs->tries);
1242 #endif
1243                                 zdb_zone_clear_status(zone, ZDB_ZONE_STATUS_WILL_NOTIFY);
1244                             }
1245 
1246                             zdb_zone_release(zone);
1247                         }
1248 
1249                         if(give_up)
1250                         {
1251 #if DEBUG
1252                             log_debug("notify: query (%hx) %{dnsname} to %{hostaddr} expired %f seconds ago, giving up",
1253                                       mqs->id, mqs->fqdn, mqs->host, expired_since);
1254 #endif
1255                             current_queries_to_clear->next = mqs;
1256                             current_queries_to_clear = mqs;
1257                         }
1258                     }
1259                     else
1260                     {
1261 #if NOTIFY_DETAILED_LOG
1262                         log_debug("notify: domain=%{dnsname} slave=%{hostaddr} expires=%llT tries=%i: will try again",
1263                                   mqs->fqdn, mqs->host, mqs->expire_epoch_us, mqs->tries);
1264 #endif
1265 
1266 #if DEBUG
1267                         log_debug("notify: query (%hx) %{dnsname} to %{hostaddr} expired %f seconds ago retrying (%i times remaining)",
1268                                   mqs->id, mqs->fqdn, mqs->host, expired_since, mqs->tries);
1269 #endif
1270                         mqs->expire_epoch_us = tus + MESSAGE_QUERY_TIMEOUT_US;
1271 
1272                         // send the message again
1273 
1274                         notify_send(mqs->host, ctx->mesg, mqs->id, mqs->fqdn, TYPE_SOA, CLASS_IN);
1275                     }
1276                 }
1277 #if DEBUG
1278                 else
1279                 {
1280                     log_debug("notify: query (%hx) %{dnsname} to %{hostaddr} still in flight", mqs->id, mqs->fqdn, mqs->host);
1281                 }
1282 #endif
1283             }
1284 
1285             /* once the tree has been scanned, destroy every node listed */
1286 
1287             current_queries_to_clear = head.next;
1288             if(current_queries_to_clear != NULL)
1289             {
1290                 do
1291                 {
1292                     message_query_summary* mqs = current_queries_to_clear;
1293 #if DEBUG
1294                     log_debug("notify: clearing query (%hx) %{dnsname} to %{hostaddr}", mqs->id, mqs->fqdn, mqs->host);
1295 #endif
1296                     current_queries_to_clear = current_queries_to_clear->next;
1297                     ptr_set_delete(&ctx->notify_queries_not_answered_yet, mqs);
1298 
1299                     zdb_zone *zone = zdb_acquire_zone_read_from_fqdn(g_config->database, mqs->fqdn); // RC++
1300                     if(zone != NULL)
1301                     {
1302                         zdb_zone_clear_status(zone, ZDB_ZONE_STATUS_WILL_NOTIFY);
1303                         zdb_zone_release(zone);
1304                     }
1305 
1306                     message_query_summary_delete(mqs);
1307                 }
1308                 while(current_queries_to_clear != NULL);
1309             }
1310 #if DEBUG
1311             else
1312             {
1313 #if NOTIFY_CLEANUP_DUMP
1314                 log_debug("notify: no queries to clear");
1315 #endif
1316             }
1317 #endif
1318 
1319         } // if !ptr_set_isempty(&current_queries)
1320 #if DEBUG
1321         else
1322         {
1323 #if NOTIFY_CLEANUP_DUMP
1324             log_debug("notify: no unanswered queries");
1325 #endif
1326         }
1327 #endif
1328     }
1329 #if DEBUG
1330     else
1331     {
1332         if(ptr_set_isempty(&ctx->notify_queries_not_answered_yet))
1333         {
1334 #if NOTIFY_CLEANUP_DUMP
1335             log_debug("notify: no notification queries needs to be answered");
1336 #endif
1337         }
1338 
1339         if(tus < ctx->last_current_queries_cleanup_epoch_us)
1340         {
1341 #if NOTIFY_CLEANUP_DUMP
1342             float dt = (ctx->last_current_queries_cleanup_epoch_us - tus) / 1000LL;
1343             dt /= 1000.0f;
1344             log_debug("notify: still %.3fus before cleaning up times out", dt);
1345 #endif
1346         }
1347 
1348 #if NOTIFY_CLEANUP_DUMP
1349         if(ctx->last_current_queries_cleanup_epoch_us > 0)
1350         {
1351             log_debug("notify: no timeout to handle (expect next at %llT)", ctx->last_current_queries_cleanup_epoch_us);
1352         }
1353         else
1354         {
1355             log_debug("notify: no timeout to handle");
1356         }
1357 #endif
1358     }
1359 #endif
1360     if((ctx->last_current_queries_cleanup_epoch_us < tus) && (ctx->last_current_queries_cleanup_epoch_us > 0))
1361     {
1362         ctx->last_current_queries_cleanup_epoch_us += MESSAGE_QUERY_TIMEOUT_US;
1363     }
1364 
1365     // cleanup end
1366 }
1367 
1368 static void
notify_service_context_process_next_message(struct notify_service_context * ctx,notify_message * notifymsg)1369 notify_service_context_process_next_message(struct notify_service_context *ctx, notify_message *notifymsg)
1370 {
1371     switch(notifymsg->payload.type)
1372     {
1373         case NOTIFY_MESSAGE_TYPE_CLEAR:
1374         {
1375             ptr_node *node = ptr_set_find(&ctx->notifications_being_sent, notifymsg->origin);
1376             if(node != NULL)
1377             {
1378 #if NOTIFY_DETAILED_LOG
1379                 log_debug("notify_service_context_process_next_message(%{dnsname} : clear)", notifymsg->origin);
1380 #endif
1381                 notify_message *zone_message = (notify_message*)node->value;
1382                 if(zone_message != NULL)
1383                 {
1384 #if !DEBUG
1385                     log_debug("notify: %{dnsname}: removing slaves notifications", notifymsg->origin);
1386 #else
1387                     log_info("notify: %{dnsname}: removing slaves notifications (%p) (debug)", notifymsg->origin, notifymsg);
1388 #endif
1389                     zdb_zone *zone = zdb_acquire_zone_read_from_fqdn(g_config->database, notifymsg->origin); // RC++
1390                     if(zone != NULL)
1391                     {
1392 #if DEBUG
1393                         log_debug("notify: %{dnsname}: clearing notification status for zone", notifymsg->origin);
1394 #endif
1395                         zdb_zone_clear_status(zone, ZDB_ZONE_STATUS_WILL_NOTIFY|ZDB_ZONE_STATUS_WILL_NOTIFY_AGAIN);
1396                         zdb_zone_release(zone);
1397                     }
1398                     else
1399                     {
1400                         log_err("notify: %{dnsname}: could not un-mark zone as queue for notification: zone not found ?", notifymsg->origin);
1401                     }
1402 
1403                     ptr_set_delete(&ctx->notifications_being_sent, notifymsg->origin);
1404                     notify_message_free(zone_message);
1405                 }
1406                 else
1407                 {
1408 #if NOTIFY_DETAILED_LOG
1409                     log_debug("notify_service_context_process_next_message(%{dnsname} : nothing to clear)", notifymsg->origin);
1410 #endif
1411                 }
1412             }
1413             notify_message_free(notifymsg);
1414 
1415             break;
1416         }
1417         case NOTIFY_MESSAGE_TYPE_DOMAIN:
1418         {
1419 #if !DEBUG
1420             log_debug("notify: %{dnsname}: notifying slaves by domain", notifymsg->origin);
1421 #else
1422             log_info("notify: %{dnsname}: notifying slaves by domain (%p) (debug)", notifymsg->origin, notifymsg);
1423 #endif
1424             if(!notify_slaves_convert_domain_to_notify(notifymsg))
1425             {
1426 #if !DEBUG
1427                 log_debug("notify: %{dnsname}: failed to notify slaves by domain", notifymsg->origin);
1428 #else
1429                 log_info("notify: %{dnsname}: failed to notify slaves by domain (%p) (debug)", notifymsg->origin, notifymsg);
1430 #endif
1431                 zdb_zone *zone = zdb_acquire_zone_read_from_fqdn(g_config->database, notifymsg->origin); // RC++
1432                 if(zone != NULL)
1433                 {
1434 #if DEBUG
1435                     log_debug("notify: %{dnsname}: clearing notification status for zone", notifymsg->origin);
1436 #endif
1437                     zdb_zone_clear_status(zone, ZDB_ZONE_STATUS_WILL_NOTIFY|ZDB_ZONE_STATUS_WILL_NOTIFY_AGAIN);
1438                     zdb_zone_release(zone);
1439                 }
1440                 else
1441                 {
1442                     log_err("notify: %{dnsname}: could not un-mark zone as queue for notification: zone not found ?", notifymsg->origin);
1443                 }
1444 
1445                 // failed
1446                 notify_message_free(notifymsg);
1447                 break;
1448             }
1449         }
1450             FALLTHROUGH // fall through
1451         case NOTIFY_MESSAGE_TYPE_NOTIFY:
1452         {
1453 #if !DEBUG
1454             log_debug("notify: %{dnsname}: notifying slaves", notifymsg->origin);
1455 #else
1456             log_info("notify: %{dnsname}: notifying slaves with %{dnstype} %{dnsclass} (debug)", notifymsg->origin, &notifymsg->payload.notify.ztype, &notifymsg->payload.notify.zclass);
1457 #endif
1458             host_address **ha_prev = &notifymsg->payload.notify.hosts_list;
1459             host_address *ha = *ha_prev;
1460 
1461             while(ha != NULL) // resolve all domain names in the list, replace them with the resolved address
1462             {
1463                 if(ha->version == HOST_ADDRESS_DNAME)
1464                 {
1465                     /* resolve */
1466                     char name[MAX_DOMAIN_LENGTH + 1];
1467 
1468                     dnsname_to_cstr(name, ha->ip.dname.dname);
1469 
1470                     socketaddress sa;
1471 
1472                     ya_result ret = gethostaddr(name, DNS_DEFAULT_PORT, &sa.sa, 0);
1473 
1474                     if(ISOK(ret))
1475                     {
1476 #if DEBUG
1477                         log_info("notify: %{dnsname}: notifying slave %{hostaddr} (debug)", notifymsg->origin, ha);
1478 #endif
1479                         host_address ha;
1480                         host_address_set_with_sockaddr(&ha, &sa);
1481                         host_address_append_host_address(notifymsg->payload.notify.hosts_list, &ha);
1482                     }
1483                     else
1484                     {
1485                         log_warn("notify: %{dnsname}: unable to resolve %{dnsname}: %r", notifymsg->origin, ha->ip.dname.dname, ret);
1486                     }
1487 
1488                     *ha_prev = ha->next;
1489 
1490                     host_address_delete(ha);
1491                 }
1492                 else
1493                 {
1494                     ha_prev = &ha->next;
1495                 }
1496 
1497                 ha = *ha_prev;
1498             }
1499 
1500             /*
1501              * The current queue has been resolved.
1502              */
1503 
1504             /**
1505              * The list has to replace the current one for message->origin (because it's starting again)
1506              */
1507 #if DEBUG
1508             log_debug("notify: queuing notifications for %{dnsname}", notifymsg->origin);
1509 #endif
1510             ptr_node *node = ptr_set_insert(&ctx->notifications_being_sent, notifymsg->origin);
1511 
1512             if(node->value != NULL)
1513             {
1514 #if DEBUG
1515                 log_info("notify: %{dnsname}: notifying slave %{hostaddr}: replacing previous message (debug)", notifymsg->origin, ha);
1516 #endif
1517                 notify_message* old_message = (notify_message*)node->value; // get the old value
1518                 node->key = notifymsg->origin;                              // (same key but the old pointer is about to be deleted)
1519                 node->value = notifymsg;                                    // set the new value
1520                 notify_message_free(old_message);                           // destroy the old value.  notify_zones does not contains it anymore
1521             }
1522             else
1523             {
1524                 node->value = notifymsg;
1525             }
1526 
1527             // ready to send
1528 
1529             zdb_zone *zone = zdb_acquire_zone_read_from_fqdn(g_config->database, notifymsg->origin); // RC++
1530             if(zone != NULL)
1531             {
1532 #if DEBUG
1533                 log_debug("notify: %{dnsname}: clearing notification status for zone", notifymsg->origin);
1534 #endif
1535                 zdb_zone_clear_status(zone, ZDB_ZONE_STATUS_WILL_NOTIFY);
1536                 zdb_zone_release(zone);
1537             }
1538             else
1539             {
1540                 log_err("notify: %{dnsname}: could not un-mark zone as queue for notification: zone not found ?", notifymsg->origin);
1541             }
1542 
1543             notifymsg->payload.notify.epoch = time(NULL);
1544 
1545             break;
1546         }
1547         case NOTIFY_MESSAGE_TYPE_ANSWER:
1548         {
1549             log_debug("notify: %{dnsname}: answer from slave at %{hostaddr}", notifymsg->origin, notifymsg->payload.answer.host);
1550 
1551             ptr_node *node = ptr_set_find(&ctx->notifications_being_sent, notifymsg->origin);
1552 
1553             if(node != NULL)
1554             {
1555                 notify_message *notify_zones_notifymsg = (notify_message*)node->value;
1556 
1557                 if(notify_zones_notifymsg != NULL)
1558                 {
1559                     /*
1560                      * Look for the entry and remove it
1561                      */
1562 
1563                     /* notifymsg->payload.answer.tsig ... */
1564 
1565                     /* all's good so remove the notify query from the list */
1566 
1567                     if(host_address_list_contains_host(notify_zones_notifymsg->payload.notify.hosts_list, notifymsg->payload.answer.host))
1568                     {
1569                         host_address *ha;
1570 #if DNSCORE_HAS_TSIG_SUPPORT
1571                         ha = notifymsg->payload.answer.host;
1572 
1573                         message_query_summary tmp;
1574 
1575                         if(ha->tsig != NULL)
1576                         {
1577                             u16 id = message_get_id(notifymsg->payload.answer.message);
1578                             message_query_summary_init(&tmp, id, ha, notifymsg->payload.answer.message);
1579                             // try to find the exact match
1580                             ptr_node *node = ptr_set_find(&ctx->notify_queries_not_answered_yet, &tmp);
1581                             message_query_summary_clear(&tmp);
1582                             if(node == NULL)
1583                             {
1584                                 /* most likely a timeout */
1585 
1586                                 log_notice("notify: %{dnsname}: %{hostaddr}: unexpected answer: could not find a matching query for notification answer with id %04hx",
1587                                         notifymsg->origin, notifymsg->payload.answer.host, id);
1588                                 // delete notifymsg
1589                                 notify_message_free(notifymsg);
1590 
1591                                 break;
1592                             }
1593 
1594                             message_query_summary *mqs = (message_query_summary*)node->value;
1595 
1596                             if(mqs != NULL)
1597                             {
1598                                 // verify the signature
1599 
1600                                 ya_result return_value;
1601 
1602                                 if(FAIL(return_value = tsig_verify_answer(notifymsg->payload.answer.message, mqs->mac, mqs->mac_size)))
1603                                 {
1604                                     // if everything is good, then proceed
1605 
1606                                     log_notice("notify: %{dnsname}: %{hostaddr}: TSIG signature verification failed: %r",
1607                                             notifymsg->origin, notifymsg->payload.answer.host, return_value);
1608                                     // delete notifymsg
1609                                     notify_message_free(notifymsg);
1610                                     break;
1611                                 }
1612 
1613                                 message_free(notifymsg->payload.answer.message); // message_data => message_free
1614                                 notifymsg->payload.answer.message = NULL;
1615                                 ptr_set_delete(&ctx->notify_queries_not_answered_yet, mqs);
1616                                 message_query_summary_delete(mqs);
1617                             }
1618                             else // this should never happen
1619                             {
1620                                 log_err("notify: %{dnsname}: %{hostaddr}: invalid internal state", notifymsg->origin, notifymsg->payload.answer.host);
1621                                 ptr_set_delete(&ctx->notify_queries_not_answered_yet, &tmp);
1622                             }
1623                         } /* end of TSIG verification, with success*/
1624 #endif
1625                         ha = host_address_remove_host_address(&notify_zones_notifymsg->payload.notify.hosts_list, notifymsg->payload.answer.host);
1626                         host_address_delete(ha);
1627 
1628                         if(notifymsg->payload.answer.rcode == RCODE_OK)
1629                         {
1630                             if(notifymsg->payload.answer.aa)    /// @note 20190712 edf -- this 8383
1631                             {
1632                                 log_debug("notify: %{dnsname}: answer from slave at %{hostaddr} confirmed", notifymsg->origin, notifymsg->payload.answer.host);
1633 
1634                                 zdb_zone *zone = zdb_acquire_zone_read_from_fqdn(g_config->database, notifymsg->origin); // RC++
1635                                 if(zone != NULL)
1636                                 {
1637 #if DEBUG
1638                                     log_debug("notify: %{dnsname}: clearing notification status for zone", notifymsg->origin);
1639 #endif
1640                                     if((zdb_zone_get_status(zone) & ZDB_ZONE_STATUS_WILL_NOTIFY_AGAIN) != 0)
1641                                     {
1642                                         // resend
1643                                     }
1644 
1645                                     zdb_zone_clear_status(zone, ZDB_ZONE_STATUS_WILL_NOTIFY|ZDB_ZONE_STATUS_WILL_NOTIFY_AGAIN);
1646 
1647                                     zdb_zone_release(zone);
1648                                 }
1649                                 else
1650                                 {
1651                                     log_err("notify: %{dnsname}: could not un-mark zone as queue for notification: zone not found ?", notifymsg->origin);
1652                                 }
1653                             }
1654 
1655                             else
1656                             {
1657                                 log_notice("notify: %{dnsname}: %{hostaddr}: no AA in answer",
1658                                         notifymsg->origin, notifymsg->payload.answer.host);
1659                             }
1660                         }
1661                         else
1662                         {
1663                             log_warn("notify: %{dnsname}: %{hostaddr}: answered with %r",
1664                                     notifymsg->origin, notifymsg->payload.answer.host, MAKE_DNSMSG_ERROR(notifymsg->payload.answer.rcode));
1665                         }
1666                     }
1667                     else
1668                     {
1669                         log_notice("notify: %{dnsname}: %{hostaddr}: unexpected answer: host is not on the currently notified list", notifymsg->origin, notifymsg->payload.answer.host);
1670                     }
1671 
1672                     if(notify_zones_notifymsg->payload.notify.hosts_list == NULL)
1673                     {
1674                         ptr_set_delete(&ctx->notifications_being_sent, notify_zones_notifymsg->origin);
1675                         notify_message_free(notify_zones_notifymsg);
1676                     }
1677                 }
1678                 else // msg = NULL
1679                 {
1680                     log_notice("notify: %{dnsname}: %{hostaddr}: unexpected answer", notifymsg->origin, notifymsg->payload.answer.host);
1681                     ptr_set_delete(&ctx->notifications_being_sent, notifymsg->origin);
1682                 }
1683             }
1684             else
1685             {
1686                 log_debug("notify: %{dnsname}: %{hostaddr}: unexpected answer: no pending notifications for the zone", notifymsg->origin, notifymsg->payload.answer.host);
1687             }
1688 
1689             // delete notifymsg
1690             notify_message_free(notifymsg);
1691 
1692             break;
1693         }
1694 #if DEBUG
1695         default:
1696         {
1697             log_err("notify: unknown notifymsg type %i", notifymsg->payload.type);
1698             break;
1699         }
1700 #endif
1701     } /* switch notifymsg type */
1702 }
1703 
1704 static void
notify_service_context_send_notifications(struct notify_service_context * ctx)1705 notify_service_context_send_notifications(struct notify_service_context *ctx)
1706 {
1707 #if DEBUG
1708     if(ptr_set_isempty(&ctx->notifications_being_sent))
1709     {
1710         log_debug("notify: no notification to send");
1711         return;
1712     }
1713     else
1714     {
1715         log_debug("notify: sending notifications");
1716     }
1717 #endif
1718 
1719     time_t now = time(NULL);
1720     int total_sent = 0;
1721 
1722     ptr_set_iterator notifications_being_sent_iter;
1723     ptr_set_iterator_init(&ctx->notifications_being_sent, &notifications_being_sent_iter);
1724 
1725     while(ptr_set_iterator_hasnext(&notifications_being_sent_iter))
1726     {
1727         ptr_node *notify_zone_node = ptr_set_iterator_next_node(&notifications_being_sent_iter);
1728         notify_message *notifymsg = notify_zone_node->value;
1729 
1730         if(notifymsg->payload.notify.epoch > now)
1731         {
1732 #if DEBUG
1733             log_debug("notify: notify_send(<slaves>, %p, <id>, %{dnsname}, %{dnstype}, %{dnsclass}) should happend after %T",
1734                       ctx->mesg, notifymsg->origin, &notifymsg->payload.notify.ztype, &notifymsg->payload.notify.zclass,
1735                       notifymsg->payload.notify.epoch);
1736 #endif
1737             continue;
1738         }
1739 
1740         if(dnscore_shuttingdown())
1741         {
1742             ptr_vector_append(&ctx->todelete, notifymsg);
1743             continue;
1744         }
1745 
1746         bool had_failures = FALSE;
1747 
1748         for(host_address *ha = notifymsg->payload.notify.hosts_list; ha != NULL; ) // for all slaves to be notified
1749         {
1750             /*
1751              * Send an UDP packet to the ha
1752              */
1753 
1754             u16 id = random_next(ctx->rnd);
1755 #if DEBUG
1756             log_debug("notify: notify_send(%{hostaddr}, %p, %hx, %{dnsname}, %{dnstype}, %{dnsclass}) repeat=%i, repeat-increase=%i",
1757                       ha, ctx->mesg, id, notifymsg->origin, &notifymsg->payload.notify.ztype, &notifymsg->payload.notify.zclass,
1758                       (int)notifymsg->payload.notify.repeat_countdown,
1759                       (int)notifymsg->payload.notify.repeat_period_increase
1760                       );
1761 #endif
1762 
1763             ya_result ret = notify_send(ha, ctx->mesg, id, notifymsg->origin, notifymsg->payload.notify.ztype, notifymsg->payload.notify.zclass);
1764 
1765             host_address *ha_next = ha->next;
1766 
1767             if(ISOK(ret))
1768             {
1769                 ++total_sent;
1770 
1771                 message_query_summary* mqs;
1772                 ZALLOC_OBJECT_OR_DIE( mqs, message_query_summary, MSGQSUMR_TAG);
1773                 message_query_summary_init(mqs, id, ha, ctx->mesg);
1774 
1775                 ptr_node *node = ptr_set_insert(&ctx->notify_queries_not_answered_yet, mqs);
1776 
1777                 if(node->value != NULL)
1778                 {
1779                     // destroy this mqs
1780 #if DEBUG
1781                     log_debug("notify: node %{hostaddr}[%04x] already exists, replacing", mqs->host, mqs->id);
1782 #endif
1783                     message_query_summary_delete(node->value);
1784                     node->key = mqs;
1785                 }
1786 #if DEBUG
1787                 else
1788                 {
1789                     log_debug("notify: node %{hostaddr}[%04x] added to current queries", mqs->host, mqs->id);
1790                 }
1791 #endif
1792                 node->value = mqs;
1793             }
1794             else // remove it
1795             {
1796                 log_warn("notify: %{dnsname} could not send notification to %{hostaddr}", notifymsg->origin, ha);
1797 
1798                 host_address *rem_ha = host_address_remove_host_address(&notifymsg->payload.notify.hosts_list, ha);
1799 
1800                 if(rem_ha != NULL)
1801                 {
1802                     host_address_delete(rem_ha);
1803                 }
1804 
1805                 had_failures = TRUE;
1806             }
1807 
1808             ha = ha_next;
1809         }
1810 
1811         if(had_failures)
1812         {
1813 #if DEBUG
1814             log_debug("notify: notify_send(<slaves>, %p, <id>, %{dnsname}, %{dnstype}, %{dnsclass}) did not fully succeed",
1815                       ctx->mesg, notifymsg->origin, &notifymsg->payload.notify.ztype, &notifymsg->payload.notify.zclass);
1816 #endif
1817 
1818             zdb_zone *zone = zdb_acquire_zone_read_from_fqdn(g_config->database, notifymsg->origin); // RC++
1819             if(zone == NULL)
1820             {
1821                 zdb_zone_clear_status(zone, ZDB_ZONE_STATUS_WILL_NOTIFY);
1822                 zdb_zone_release(zone);
1823             }
1824 
1825             // try later
1826         }
1827         else
1828         {
1829 #if DEBUG
1830             log_debug("notify: notify_send(<slaves>, %p, <id>, %{dnsname}, %{dnstype}, %{dnsclass}) all notifications sent",
1831                       ctx->mesg, notifymsg->origin, &notifymsg->payload.notify.ztype, &notifymsg->payload.notify.zclass);
1832 #endif
1833         }
1834 
1835         /* decrease the countdown or remove it from the collection */
1836 
1837         if(notifymsg->payload.notify.repeat_countdown > 0)
1838         {
1839             --notifymsg->payload.notify.repeat_countdown;
1840 
1841             /* ensure there is no overload */
1842 
1843             u16 rp = notifymsg->payload.notify.repeat_period + notifymsg->payload.notify.repeat_period_increase;
1844 
1845             if(rp > 255) /* minutes, 8 bits */
1846             {
1847                 rp = 255;
1848             }
1849 
1850             notifymsg->payload.notify.repeat_period = (u8)rp;
1851 
1852             notifymsg->payload.notify.epoch = now + notifymsg->payload.notify.repeat_period * 60; // repeat_period is minutes
1853         }
1854         else
1855         {
1856             ptr_vector_append(&ctx->todelete, notifymsg);
1857         }
1858     }
1859 
1860     notify_message **notifymsgp = (notify_message**)ctx->todelete.data;
1861 
1862     for(s32 idx = 0; idx <= ctx->todelete.offset; idx++)
1863     {
1864         notify_message *notifymsg = notifymsgp[idx];
1865         ptr_set_delete(&ctx->notifications_being_sent, notifymsg->origin);
1866         notify_message_free(notifymsg);
1867     }
1868 
1869     ptr_vector_clear(&ctx->todelete);
1870 
1871 #if DEBUG
1872     if(total_sent > 0)
1873     {
1874         log_debug("notify: %i notifications sent", total_sent);
1875     }
1876 #endif
1877 }
1878 
1879 static void
notify_service_context_wait(struct notify_service_context * ctx)1880 notify_service_context_wait(struct notify_service_context *ctx)
1881 {
1882     for(;;)
1883     {
1884         s64 service_loop_end_us = timeus();
1885 
1886         if(service_loop_end_us < ctx->service_loop_begin_us)
1887         {
1888             service_loop_end_us = ctx->service_loop_begin_us;
1889         }
1890 
1891         s64 remaining = ONE_SECOND_US - (service_loop_end_us - ctx->service_loop_begin_us);
1892 
1893         if(remaining <= 0)
1894         {
1895             break;
1896         }
1897 
1898         usleep(service_loop_end_us - ctx->service_loop_begin_us);
1899     }
1900 }
1901 
1902 static void
notify_service_context_finalize(struct notify_service_context * ctx)1903 notify_service_context_finalize(struct notify_service_context *ctx)
1904 {
1905     ptr_set_iterator iter;
1906 
1907     u32 total_count;
1908     u32 count;
1909 
1910     total_count = 0;
1911     count = 0;
1912     ptr_set_iterator_init(&ctx->notifications_being_sent, &iter);
1913     while(ptr_set_iterator_hasnext(&iter))
1914     {
1915         ptr_node *node = ptr_set_iterator_next_node(&iter);
1916         //host_address *ha = (host_address*)node->key;
1917 
1918         notify_message* message = (notify_message*)node->value;  // get the old value
1919         if(message != NULL)
1920         {
1921             notify_message_free(message);                           // destroy the message
1922             node->key = NULL;                                       // (same key but the old pointer is about to be deleted)
1923             node->value = NULL;                                      // set the new value
1924             count++;
1925         }
1926         total_count++;
1927     }
1928     log_debug("notify: cleared %u messages", count);
1929     if(count != total_count)
1930     {
1931         log_notice("notify: %u messages were empty", total_count - count);
1932     }
1933     ptr_set_destroy(&ctx->notifications_being_sent);
1934 
1935     total_count = 0;
1936     count = 0;
1937     ptr_set_iterator_init(&ctx->notify_queries_not_answered_yet, &iter);
1938     while(ptr_set_iterator_hasnext(&iter))
1939     {
1940         ptr_node *node = ptr_set_iterator_next_node(&iter);
1941         message_query_summary* mqs = (message_query_summary*)node->value;
1942         if(mqs != NULL)
1943         {
1944             message_query_summary_delete(mqs);
1945             count++;
1946         }
1947         total_count++;
1948     }
1949     log_debug("notify: cleared %u summaries", count);
1950     if(count != total_count)
1951     {
1952         log_notice("notify: %u summaries were empty", total_count - count);
1953     }
1954     ptr_set_destroy(&ctx->notify_queries_not_answered_yet);
1955 
1956     ptr_vector_destroy(&ctx->todelete);
1957 
1958     if(ctx->mesg != NULL)
1959     {
1960         message_free(ctx->mesg); // message_data
1961         ctx->mesg = NULL;
1962     }
1963 }
1964 
1965 static int
notify_service(struct service_worker_s * worker)1966 notify_service(struct service_worker_s *worker)
1967 {
1968     /*
1969      * Resolve the names and replace them by their IP
1970      *
1971      * Remove the sender
1972      *
1973      * Remove myself
1974      *
1975      * Store (merge?) the queue for the current serial, replace an existing one.
1976      *
1977      * Update the serial on the queue for each answer ?
1978      *
1979      *
1980      */
1981 
1982     if(worker->worker_index == 1)
1983     {
1984         notify_ipv4_receiver_service(worker);
1985         return SUCCESS;
1986     }
1987 
1988     if(worker->worker_index == 2)
1989     {
1990         notify_ipv6_receiver_service(worker);
1991         return SUCCESS;
1992     }
1993 
1994     log_info("notify: notification service started");
1995 
1996     struct notify_service_context ctx;
1997 
1998     notify_service_context_init(&ctx);
1999 
2000     /*
2001      */
2002 
2003     log_debug("notify: notification service main loop reached");
2004 
2005     while(service_should_run(worker) || !async_queue_empty(&notify_handler_queue))
2006     {
2007         ctx.service_loop_begin_us = timeus();
2008 
2009         notify_service_context_manage_pending_notifications(&ctx);
2010 
2011 #if DEBUG
2012         if(dnscore_shuttingdown())
2013         {
2014             log_info("notify: dnscore is shutting down. should_run: %i, queue_empty: %i (debug)",
2015                  service_should_run(worker),
2016                  async_queue_empty(&notify_handler_queue));
2017         }
2018 #endif
2019 
2020         s64 loop_start = timeus();
2021         s64 loop_now = loop_start;
2022         s64 loop_count = 0;
2023         bool long_accumulation = FALSE;
2024 #if NOTIFY_CLEANUP_DUMP
2025         bool no_message_in_queue = FALSE;
2026 #endif
2027         bool is_shutting_down = !service_should_run(worker);
2028 
2029         // the loop will always enter at least once once
2030 
2031         do
2032         {
2033             /* current_queries tree cleanup */
2034 
2035             async_message_s *async = async_message_next(&notify_handler_queue);
2036 
2037             if(async == NULL)   /*if no message is in the queue, proceed to next step */
2038             {
2039 #if NOTIFY_CLEANUP_DUMP
2040                 no_message_in_queue = TRUE;
2041 #endif
2042                 break;
2043             }
2044 
2045             notify_message *notifymsg = (notify_message*)async->args;
2046 
2047             if(notifymsg == NULL) /*if no message is in the queue, proceed to next step (probably irrelevant) */
2048             {
2049                 async_message_release(async);
2050 #if NOTIFY_CLEANUP_DUMP
2051                 no_message_in_queue = TRUE;
2052 #endif
2053                 break;
2054             }
2055 
2056             // if dnscore is shutting down, release the message
2057 
2058             if(is_shutting_down || dnscore_shuttingdown())
2059             {
2060 #if DEBUG
2061                 log_info("notify: releasing messages (debug)");
2062 #endif
2063                 notify_message_free(notifymsg);
2064                 async_message_release(async);
2065                 is_shutting_down = TRUE;
2066                 loop_start = loop_now;
2067                 continue;
2068             }
2069 
2070             ++loop_count;
2071 
2072             notify_service_context_process_next_message(&ctx, notifymsg);
2073 
2074             async_message_release(async);
2075         } // for(;;)
2076         while((long_accumulation = ( ((loop_now = timeus()) - loop_start) >= ONE_SECOND_US)));
2077 
2078         /*
2079          * For all entries in the queue, send a notify to the ones that need to be repeated
2080          */
2081 
2082         if(long_accumulation)
2083         {
2084             log_debug("notify: notification service accumulated queries for %fms (%lli queries)", ((1.0 * (loop_now - loop_start)) / 1000.0), loop_count);
2085         }
2086 
2087 #if NOTIFY_CLEANUP_DUMP
2088         if(no_message_in_queue)
2089         {
2090             log_debug1("notify: notification service has no more messages queued");
2091         }
2092 #endif
2093 
2094         if(is_shutting_down)
2095         {
2096             log_debug("notify: notification service will shutdown");
2097             break;
2098         }
2099 
2100         notify_service_context_send_notifications(&ctx);
2101         notify_service_context_wait(&ctx);
2102     }
2103 
2104     service_set_stopping(worker);
2105 
2106     notify_service_context_finalize(&ctx);
2107 
2108     log_info("notify: notification service stopped");
2109 
2110     return 0;
2111 }
2112 
2113 /**
2114  * Sends a notify to all the slave for a given domain name:
2115  *
2116  * _ Get the zone
2117  * _ Create an empty list
2118  * _ If notify-auto, add all the IPs of all the NS at the apex of the zone to the list.
2119  * _ Add all the also-notify IPs to the list
2120  * _ Queue the list to the notify service
2121  *
2122  * @param origin
2123  */
2124 
2125 void
notify_slaves(const u8 * origin)2126 notify_slaves(const u8 *origin)
2127 {
2128     if(dnscore_shuttingdown())
2129     {
2130         return;
2131     }
2132 
2133     if(!notify_service_initialised)
2134     {
2135         log_warn("notify: %{dnsname}: notification service has not been initialised", origin);
2136 
2137         return;
2138     }
2139 
2140     zdb *db = g_config->database;
2141     zdb_zone *zone = zdb_acquire_zone_read_from_fqdn(db, origin); // RC++
2142 
2143     if((zone == NULL) || zdb_zone_invalid(zone))
2144     {
2145         if(zone != NULL)
2146         {
2147             zdb_zone_release(zone);
2148         }
2149 
2150         log_warn("notify: %{dnsname}: notify called on an invalid zone", origin);
2151 
2152         return;
2153     }
2154 
2155     // zdb_zone_set_status returns the status before the parameter is added
2156 
2157     if((zdb_zone_set_status(zone, ZDB_ZONE_STATUS_WILL_NOTIFY) & ZDB_ZONE_STATUS_WILL_NOTIFY) != 0)
2158     {
2159         // zone was already marked for notification
2160 
2161         log_debug("notify: %{dnsname}: already marked for notification", origin);
2162 
2163         zdb_zone_set_status(zone, ZDB_ZONE_STATUS_WILL_NOTIFY_AGAIN);
2164 
2165         zdb_zone_release(zone); // the release should only be done now as 'origin' may be passed from the zone
2166 
2167         return;
2168     }
2169 
2170     log_debug("notify: %{dnsname}: slaves notifications will be sent", origin);
2171 
2172     notify_message *notifymsg = notify_message_newinstance(origin, NOTIFY_MESSAGE_TYPE_DOMAIN);
2173 
2174     zdb_zone_release(zone); // RC--
2175 
2176     async_message_s *async = async_message_alloc();
2177     async->id = 0;
2178     async->args = notifymsg;
2179     async->handler = NULL;
2180     async->handler_args = NULL;
2181     async_message_call(&notify_handler_queue, async);
2182 }
2183 
2184 static ya_result
notify_slaves_alarm(void * args_,bool cancel)2185 notify_slaves_alarm(void *args_, bool cancel)
2186 {
2187     u8 *origin = (u8*)args_;
2188 
2189     if(!dnscore_shuttingdown())
2190     {
2191         if(notify_service_initialised && !cancel)
2192         {
2193             log_debug("notify: %{dnsname}: delayed retry", origin);
2194 
2195             notify_message *notifymsg = notify_message_newinstance(origin, NOTIFY_MESSAGE_TYPE_DOMAIN);
2196 
2197             async_message_s *async = async_message_alloc();
2198             async->id = 0;
2199             async->args = notifymsg;
2200             async->handler = NULL;
2201             async->handler_args = NULL;
2202             async_message_call(&notify_handler_queue, async);
2203         }
2204         else
2205         {
2206             zdb_zone *zone = zdb_acquire_zone_read_from_fqdn(g_config->database, origin); // RC++
2207 
2208             if(zone != NULL)
2209             {
2210                 zdb_zone_clear_status(zone, ZDB_ZONE_STATUS_WILL_NOTIFY);
2211                 zdb_zone_release(zone);
2212             }
2213             else
2214             {
2215                 log_err("notify: %{dnsname}: alarm-cancel: could not un-mark zone as queue for notification: zone not found ?", origin);
2216             }
2217         }
2218     }
2219 
2220     dnsname_zfree(origin);
2221 
2222     return SUCCESS;
2223 }
2224 
2225 /**
2226  *
2227  * @param origin
2228  */
2229 
2230 static bool
notify_slaves_convert_domain_to_notify(notify_message * message)2231 notify_slaves_convert_domain_to_notify(notify_message *message)
2232 {
2233     if(message->payload.type == NOTIFY_MESSAGE_TYPE_NOTIFY)
2234     {
2235         return TRUE;
2236     }
2237 
2238     if(!notify_service_initialised)
2239     {
2240         return FALSE;
2241     }
2242 
2243     if(message->payload.type != NOTIFY_MESSAGE_TYPE_DOMAIN)
2244     {
2245         return FALSE;
2246     }
2247 
2248     /*
2249      * Build a list of IPs to contact
2250      * The master in the SOA must not be in this list
2251      * The current server must not be in this list
2252      *
2253      * Once the list is done, launch a thread that will periodically retry anybody in this list until the list is empty
2254      *
2255      * The list should be mutexed
2256      * The list should be in a by-origin collection
2257      * The list should be rebuild for each new notification (because the zone could have changed)
2258      */
2259 
2260     zdb *db = g_config->database;
2261 
2262     zdb_zone *zone = zdb_acquire_zone_read_from_fqdn(db, message->origin); // RC++
2263 
2264     if((zone == NULL) || zdb_zone_invalid(zone) || !notify_service_initialised)
2265     {
2266         if(zone != NULL)
2267         {
2268             zdb_zone_clear_status(zone, ZDB_ZONE_STATUS_WILL_NOTIFY);
2269             zdb_zone_release(zone);
2270         }
2271 
2272         if(notify_service_initialised)
2273         {
2274             log_debug("notify: %{dnsname}: zone temporarily unavailable", message->origin);
2275         }
2276 
2277         return FALSE;
2278     }
2279 
2280     zone_desc_s *zone_desc = zone_acquirebydnsname(message->origin);
2281     if(zone_desc == NULL)
2282     {
2283         zdb_zone_clear_status(zone, ZDB_ZONE_STATUS_WILL_NOTIFY);
2284         zdb_zone_release(zone);
2285         log_err("notify: %{dnsname}: zone not configured", message->origin);
2286         return FALSE;
2287     }
2288 
2289     host_address list;
2290 #if DEBUG
2291     memset(&list, 0xff, sizeof(list));
2292 #endif
2293     list.next = NULL;
2294     list.version = HOST_ADDRESS_NONE;
2295 
2296     bool lock_failed = FALSE;
2297 
2298     /* no need to set TSIG */
2299 
2300     if(zone_ismaster(zone_desc) && zone_is_auto_notify(zone_desc))
2301     {
2302         if(zdb_zone_trylock_wait(zone, ONE_SECOND_US, ZDB_ZONE_MUTEX_SIMPLEREADER))
2303         //if(zdb_zone_trylock(zone, ZDB_ZONE_MUTEX_SIMPLEREADER))
2304         {
2305             // get the SOA
2306             zdb_packed_ttlrdata *soa = zdb_record_find(&zone->apex->resource_record_set, TYPE_SOA); // zone is locked
2307             // get the NS
2308             zdb_packed_ttlrdata *ns = zdb_record_find(&zone->apex->resource_record_set, TYPE_NS); // zone is locked
2309             // get the IPs for each NS but the one in the SOA
2310 
2311             u8 *soa_mname = ZDB_PACKEDRECORD_PTR_RDATAPTR(soa);
2312             u32 soa_mname_size = dnsname_len(soa_mname);
2313 
2314             for(zdb_packed_ttlrdata *nsp = ns; nsp != NULL; nsp = nsp->next)
2315             {
2316                 u32 ns_dname_size = ZDB_PACKEDRECORD_PTR_RDATASIZE(nsp);
2317                 u8 *ns_dname = ZDB_PACKEDRECORD_PTR_RDATAPTR(nsp);
2318 
2319                 if(ns_dname_size == soa_mname_size)
2320                 {
2321                     if(memcmp(ns_dname, soa_mname, soa_mname_size) == 0) // scan-build false positive: soa_mname cannot be NULL
2322                     {
2323                         continue;
2324                     }
2325                 }
2326 
2327                 // valid candidate : append IP addresses
2328 
2329                 if(zdb_append_ip_records(db, ns_dname, &list) <= 0) // zone is locked
2330                 {
2331                     // If no IP has been found, they will have to be resolved using the system ... later
2332 
2333                     host_address_append_dname(&list, ns_dname, NU16(DNS_DEFAULT_PORT));
2334                 }
2335             }
2336 
2337             zdb_zone_release_unlock(zone, ZDB_ZONE_MUTEX_SIMPLEREADER);
2338         }
2339         else
2340         {
2341             log_debug("notify: %{dnsname}: zone already locked", message->origin);
2342 
2343             lock_failed = TRUE;
2344             zdb_zone_release(zone);
2345         }
2346     }
2347     else
2348     {
2349         zdb_zone_release(zone);
2350     }
2351 
2352     // at this point I have the list of every IP I could find along with names I cannot resolve.
2353     // note that we don't need to care about the changes in the database : it would mean a new
2354     // notify and this one would be discarded
2355 
2356     if(!lock_failed && ISOK(zone_try_lock_wait(zone_desc, ONE_SECOND_US, ZONE_LOCK_READONLY)))
2357     {
2358         log_debug("notify: %{dnsname}: preparing notification", message->origin);
2359 
2360         const host_address *also_notifies = zone_desc->notifies;
2361 
2362         while(also_notifies != NULL)
2363         {
2364             host_address_append_host_address(&list, also_notifies); //copy made
2365 
2366             also_notifies = also_notifies->next;
2367         }
2368 
2369         // It's separate from the DB push the lot thread from the pool
2370 
2371         if(list.next != NULL)
2372         {
2373             message->payload.type = NOTIFY_MESSAGE_TYPE_NOTIFY;
2374             message->payload.notify.hosts_list = list.next;
2375             message->payload.notify.repeat_countdown = zone_desc->notify.retry_count; /* 10 times */
2376             message->payload.notify.repeat_period = zone_desc->notify.retry_period; /* 1 minute */
2377             message->payload.notify.repeat_period_increase = zone_desc->notify.retry_period_increase; /* 1 minute */
2378             message->payload.notify.ztype = TYPE_SOA;
2379             message->payload.notify.zclass = CLASS_IN;
2380         }
2381         else
2382         {
2383             log_debug("notify: %{dnsname}: preparing notification: host list empty", message->origin);
2384         }
2385 
2386         zone_unlock(zone_desc, ZONE_LOCK_READONLY);
2387     }
2388     else
2389     {
2390         // could not lock the zone right away : delay a bit
2391         log_debug("notify: %{dnsname}: delaying notification", message->origin);
2392 
2393         zdb_zone *zone = zdb_acquire_zone_read_from_fqdn(db, message->origin); // RC++
2394         if(zone != NULL)
2395         {
2396             if(!zdb_zone_invalid(zone))
2397             {
2398                 alarm_event_node *event = alarm_event_new(
2399                         time(NULL),
2400                         ALARM_KEY_ZONE_NOTIFY_SLAVES,
2401                         notify_slaves_alarm,
2402                         dnsname_zdup(message->origin),
2403                         ALARM_DUP_REMOVE_LATEST,
2404                         "notify slaves");
2405 
2406                 alarm_set(zone->alarm_handle, event);
2407             }
2408             else
2409             {
2410                 // if the message is ignored, will-notify status must be cleared
2411                 zdb_zone_clear_status(zone, ZDB_ZONE_STATUS_WILL_NOTIFY);
2412 
2413                 log_warn("notify: %{dnsname}: (temporarily) invalid zone, notify slaves request will remain ignored", message->origin);
2414             }
2415 
2416             zdb_zone_release(zone);
2417         }
2418         else
2419         {
2420             // could not get the zone anymore
2421 
2422             log_warn("notify: %{dnsname}: could not acquire the zone, notify slaves request will remain ignored", message->origin);
2423         }
2424     }
2425 
2426     zone_release(zone_desc);
2427 
2428     return message->payload.type == NOTIFY_MESSAGE_TYPE_NOTIFY;
2429 }
2430 
2431 /**
2432  * Stops all notification for zone with origin
2433  *
2434  * @param origin
2435  */
2436 
2437 void
notify_clear(const u8 * origin)2438 notify_clear(const u8 *origin)
2439 {
2440     notify_message *notifymsg = notify_message_newinstance(origin, NOTIFY_MESSAGE_TYPE_CLEAR);
2441 
2442     async_message_s *async = async_message_alloc();
2443     async->id = 0;
2444     async->args = notifymsg;
2445     async->handler = NULL;
2446     async->handler_args = NULL;
2447     async_message_call(&notify_handler_queue, async);
2448 }
2449 
2450 ya_result
notify_service_init()2451 notify_service_init()
2452 {
2453     int err = SUCCESS;
2454     if(!notify_service_initialised)
2455     {
2456         int workers = 2;
2457 
2458         if((send_socket4 = socket(AF_INET, SOCK_DGRAM, 0)) < 0)
2459         {
2460             log_err("notify: no usable IPv4 socket bound");
2461             return ERRNO_ERROR;
2462         }
2463 
2464         fd_setcloseonexec(send_socket4);
2465 
2466         if((send_socket6 = socket(AF_INET6, SOCK_DGRAM, 0)) < 0)
2467         {
2468             log_warn("notify: no usable IPv6 socket bound");
2469         }
2470         else
2471         {
2472             fd_setcloseonexec(send_socket6);
2473             ++workers;
2474         }
2475 
2476         if(notify_thread_pool == NULL)
2477         {
2478             if((notify_thread_pool = thread_pool_init_ex(10, 4096, "notify-tp")) == NULL)
2479             {
2480                 close_ex(send_socket4);
2481                 send_socket4 = -1;
2482                 close_ex(send_socket6);
2483                 send_socket6 = -1;
2484                 return THREAD_CREATION_ERROR;
2485             }
2486         }
2487 
2488         if(ISOK(err = service_init_ex(&notify_handler, notify_service, "yadifad-notify", workers )))
2489         {
2490             async_queue_init(&notify_handler_queue, 10000000, 1, 1, "notify"); // note: it's implemented as a linked list
2491 
2492             notify_service_initialised = TRUE;
2493         }
2494     }
2495 
2496     return err;
2497 }
2498 
2499 /**
2500  * Starts the notify service thread
2501  */
2502 
2503 ya_result
notify_service_start()2504 notify_service_start()
2505 {
2506     int err = SERVICE_NOT_INITIALISED;
2507 
2508     if(notify_service_initialised)
2509     {
2510         if(service_stopped(&notify_handler))
2511         {
2512             err = service_start(&notify_handler);
2513         }
2514     }
2515 
2516     return err;
2517 }
2518 
2519 void
notify_wait_servicing()2520 notify_wait_servicing()
2521 {
2522     if(notify_service_initialised)
2523     {
2524         if(!service_stopped(&notify_handler))
2525         {
2526             service_wait_servicing(&notify_handler);
2527         }
2528     }
2529 }
2530 
2531 /**
2532  * Stops the notify service thread
2533  */
2534 
2535 ya_result
notify_service_stop()2536 notify_service_stop()
2537 {
2538     int err = SERVICE_NOT_INITIALISED;
2539 
2540     if(notify_service_initialised)
2541     {
2542         if(!service_stopped(&notify_handler))
2543         {
2544             err = service_stop(&notify_handler);
2545             service_wait(&notify_handler);
2546         }
2547     }
2548 
2549     return err;
2550 }
2551 
2552 ya_result
notify_service_finalize()2553 notify_service_finalize()
2554 {
2555     int err = SUCCESS;
2556 
2557     if(notify_service_initialised)
2558     {
2559         notify_service_initialised = FALSE;
2560 
2561         if(send_socket4 >= 0)
2562         {
2563             shutdown(send_socket4, SHUT_RDWR);
2564         }
2565 
2566         if(send_socket6 >= 0)
2567         {
2568             shutdown(send_socket6, SHUT_RDWR);
2569         }
2570 
2571         err = notify_service_stop();
2572 
2573         service_finalize(&notify_handler);
2574 
2575         /* once the tree has been scanned, destroy every node listed */
2576 
2577         while(!async_queue_empty(&notify_handler_queue))
2578         {
2579             async_message_s *async = async_message_next(&notify_handler_queue);
2580 
2581             if(async == NULL)   /* if no message is in the queue, proceed to next step */
2582             {
2583                 break;
2584             }
2585 
2586             notify_message *msg = (notify_message*)async->args;
2587 
2588             /* if no message is in the queue, proceed to next step (probably irrelevant) */
2589             notify_message_free(msg);
2590 
2591             async_message_release(async);
2592         }
2593 
2594         async_queue_finalize(&notify_handler_queue);
2595 
2596         if(notify_thread_pool != NULL)
2597         {
2598             thread_pool_destroy(notify_thread_pool);
2599             notify_thread_pool = NULL;
2600         }
2601 
2602         if(send_socket4 >= 0)
2603         {
2604             close_ex(send_socket4);
2605             send_socket4 = -1;
2606         }
2607 
2608         if(send_socket6 >= 0)
2609         {
2610             close_ex(send_socket6);
2611             send_socket6 = -1;
2612         }
2613     }
2614 
2615     return err;
2616 }
2617 
2618 /** @} */
2619