1 /*
2 * server.c -- nsd(8) network input/output
3 *
4 * Copyright (c) 2001-2006, NLnet Labs. All rights reserved.
5 *
6 * See LICENSE for the license.
7 *
8 */
9
10 #include "config.h"
11
12 #include <sys/types.h>
13 #include <sys/param.h>
14 #include <limits.h>
15 #include <sys/socket.h>
16 #include <sys/uio.h>
17 #include <sys/wait.h>
18
19 #include <netinet/in.h>
20 #ifdef USE_TCP_FASTOPEN
21 #include <netinet/tcp.h>
22 #endif
23 #include <arpa/inet.h>
24
25 #include <assert.h>
26 #include <ctype.h>
27 #include <errno.h>
28 #include <fcntl.h>
29 #include <stddef.h>
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <time.h>
34 #include <unistd.h>
35 #include <signal.h>
36 #include <netdb.h>
37 #include <poll.h>
38 #ifdef HAVE_SYS_RANDOM_H
39 #include <sys/random.h>
40 #endif
41 #ifndef SHUT_WR
42 #define SHUT_WR 1
43 #endif
44 #ifdef HAVE_MMAP
45 #include <sys/mman.h>
46 #endif /* HAVE_MMAP */
47 #ifdef HAVE_OPENSSL_RAND_H
48 #include <openssl/rand.h>
49 #endif
50 #ifdef HAVE_OPENSSL_SSL_H
51 #include <openssl/ssl.h>
52 #endif
53 #ifdef HAVE_OPENSSL_ERR_H
54 #include <openssl/err.h>
55 #endif
56 #ifdef HAVE_OPENSSL_OCSP_H
57 #include <openssl/ocsp.h>
58 #endif
59 #ifndef USE_MINI_EVENT
60 # ifdef HAVE_EVENT_H
61 # include <event.h>
62 # else
63 # include <event2/event.h>
64 # include "event2/event_struct.h"
65 # include "event2/event_compat.h"
66 # endif
67 #else
68 # include "mini_event.h"
69 #endif
70
71 #include "axfr.h"
72 #include "namedb.h"
73 #include "netio.h"
74 #include "xfrd.h"
75 #include "xfrd-tcp.h"
76 #include "xfrd-disk.h"
77 #include "difffile.h"
78 #include "nsec3.h"
79 #include "ipc.h"
80 #include "udb.h"
81 #include "remote.h"
82 #include "lookup3.h"
83 #include "rrl.h"
84 #include "ixfr.h"
85 #ifdef USE_DNSTAP
86 #include "dnstap/dnstap_collector.h"
87 #endif
88 #include "verify.h"
89 #include "util/proxy_protocol.h"
90
91 #define RELOAD_SYNC_TIMEOUT 25 /* seconds */
92
93 #ifdef USE_DNSTAP
94 /*
95 * log_addr() - the function to print sockaddr_in/sockaddr_in6 structures content
96 * just like its done in Unbound via the same log_addr(VERB_LEVEL, const char*, sockaddr_storage*)
97 */
98 static void
log_addr(const char * descr,struct sockaddr_storage * addr)99 log_addr(const char* descr,
100 #ifdef INET6
101 struct sockaddr_storage* addr
102 #else
103 struct sockaddr_in* addr
104 #endif
105 )
106 {
107 char str_buf[64];
108 if(verbosity < 6)
109 return;
110 if(
111 #ifdef INET6
112 addr->ss_family == AF_INET
113 #else
114 addr->sin_family == AF_INET
115 #endif
116 ) {
117 struct sockaddr_in* s = (struct sockaddr_in*)addr;
118 inet_ntop(AF_INET, &s->sin_addr.s_addr, str_buf, sizeof(str_buf));
119 VERBOSITY(6, (LOG_INFO, "%s: address is: %s, port is: %d", descr, str_buf, ntohs(s->sin_port)));
120 #ifdef INET6
121 } else {
122 struct sockaddr_in6* s6 = (struct sockaddr_in6*)addr;
123 inet_ntop(AF_INET6, &s6->sin6_addr.s6_addr, str_buf, sizeof(str_buf));
124 VERBOSITY(6, (LOG_INFO, "%s: address is: %s, port is: %d", descr, str_buf, ntohs(s6->sin6_port)));
125 #endif
126 }
127 }
128 #endif /* USE_DNSTAP */
129
130 #ifdef USE_TCP_FASTOPEN
131 #define TCP_FASTOPEN_FILE "/proc/sys/net/ipv4/tcp_fastopen"
132 #define TCP_FASTOPEN_SERVER_BIT_MASK 0x2
133 #endif
134
135 /* header state for the PROXYv2 header (for TCP) */
136 enum pp2_header_state {
137 /* no header encounter yet */
138 pp2_header_none = 0,
139 /* read the static part of the header */
140 pp2_header_init,
141 /* read the full header */
142 pp2_header_done
143 };
144
145 /*
146 * Data for the UDP handlers.
147 */
148 struct udp_handler_data
149 {
150 struct nsd *nsd;
151 struct nsd_socket *socket;
152 struct event event;
153 /* if set, PROXYv2 is expected on this connection */
154 int pp2_enabled;
155 };
156
157 struct tcp_accept_handler_data {
158 struct nsd *nsd;
159 struct nsd_socket *socket;
160 int event_added;
161 struct event event;
162 #ifdef HAVE_SSL
163 /* handler accepts TLS connections on the dedicated port */
164 int tls_accept;
165 #endif
166 /* if set, PROXYv2 is expected on this connection */
167 int pp2_enabled;
168 };
169
170 /*
171 * These globals are used to enable the TCP accept handlers
172 * when the number of TCP connection drops below the maximum
173 * number of TCP connections.
174 */
175 static size_t tcp_accept_handler_count;
176 static struct tcp_accept_handler_data *tcp_accept_handlers;
177
178 static struct event slowaccept_event;
179 static int slowaccept;
180
181 #ifdef HAVE_SSL
182 static unsigned char *ocspdata = NULL;
183 static long ocspdata_len = 0;
184 #endif
185
186 #ifdef NONBLOCKING_IS_BROKEN
187 /* Define NUM_RECV_PER_SELECT to 1 (one) to avoid opportunistically trying to
188 read multiple times from a socket when reported ready by select. */
189 # define NUM_RECV_PER_SELECT (1)
190 #else /* !NONBLOCKING_IS_BROKEN */
191 # define NUM_RECV_PER_SELECT (100)
192 #endif /* NONBLOCKING_IS_BROKEN */
193
194 #ifndef HAVE_MMSGHDR
195 struct mmsghdr {
196 struct msghdr msg_hdr;
197 unsigned int msg_len;
198 };
199 #endif
200
201 static struct mmsghdr msgs[NUM_RECV_PER_SELECT];
202 static struct iovec iovecs[NUM_RECV_PER_SELECT];
203 static struct query *queries[NUM_RECV_PER_SELECT];
204
205 /*
206 * Data for the TCP connection handlers.
207 *
208 * The TCP handlers use non-blocking I/O. This is necessary to avoid
209 * blocking the entire server on a slow TCP connection, but does make
210 * reading from and writing to the socket more complicated.
211 *
212 * Basically, whenever a read/write would block (indicated by the
213 * EAGAIN errno variable) we remember the position we were reading
214 * from/writing to and return from the TCP reading/writing event
215 * handler. When the socket becomes readable/writable again we
216 * continue from the same position.
217 */
218 struct tcp_handler_data
219 {
220 /*
221 * The region used to allocate all TCP connection related
222 * data, including this structure. This region is destroyed
223 * when the connection is closed.
224 */
225 region_type* region;
226
227 /*
228 * The global nsd structure.
229 */
230 struct nsd* nsd;
231
232 /*
233 * The current query data for this TCP connection.
234 */
235 query_type* query;
236
237 /*
238 * The query_state is used to remember if we are performing an
239 * AXFR, if we're done processing, or if we should discard the
240 * query and connection.
241 */
242 query_state_type query_state;
243
244 /*
245 * The event for the file descriptor and tcp timeout
246 */
247 struct event event;
248
249 /*
250 * The bytes_transmitted field is used to remember the number
251 * of bytes transmitted when receiving or sending a DNS
252 * packet. The count includes the two additional bytes used
253 * to specify the packet length on a TCP connection.
254 */
255 size_t bytes_transmitted;
256
257 /* If the query is restarted and needs a reset */
258 int query_needs_reset;
259
260 /*
261 * The number of queries handled by this specific TCP connection.
262 */
263 int query_count;
264
265 /*
266 * The timeout in msec for this tcp connection
267 */
268 int tcp_timeout;
269
270 /*
271 * If the connection is allowed to have further queries on it.
272 */
273 int tcp_no_more_queries;
274
275 #ifdef USE_DNSTAP
276 /* the socket of the accept socket to find proper service (local) address the socket is bound to. */
277 struct nsd_socket *socket;
278 #endif /* USE_DNSTAP */
279
280 /* if set, PROXYv2 is expected on this connection */
281 int pp2_enabled;
282
283 /* header state for the PROXYv2 header (for TCP) */
284 enum pp2_header_state pp2_header_state;
285
286 #ifdef HAVE_SSL
287 /*
288 * TLS object.
289 */
290 SSL* tls;
291
292 /*
293 * TLS handshake state.
294 */
295 enum { tls_hs_none, tls_hs_read, tls_hs_write,
296 tls_hs_read_event, tls_hs_write_event } shake_state;
297 #endif
298 /* list of connections, for service of remaining tcp channels */
299 struct tcp_handler_data *prev, *next;
300 };
301 /* global that is the list of active tcp channels */
302 static struct tcp_handler_data *tcp_active_list = NULL;
303
304 /*
305 * Handle incoming queries on the UDP server sockets.
306 */
307 static void handle_udp(int fd, short event, void* arg);
308
309 /*
310 * Handle incoming connections on the TCP sockets. These handlers
311 * usually wait for the NETIO_EVENT_READ event (indicating an incoming
312 * connection) but are disabled when the number of current TCP
313 * connections is equal to the maximum number of TCP connections.
314 * Disabling is done by changing the handler to wait for the
315 * NETIO_EVENT_NONE type. This is done using the function
316 * configure_tcp_accept_handlers.
317 */
318 static void handle_tcp_accept(int fd, short event, void* arg);
319
320 /*
321 * Handle incoming queries on a TCP connection. The TCP connections
322 * are configured to be non-blocking and the handler may be called
323 * multiple times before a complete query is received.
324 */
325 static void handle_tcp_reading(int fd, short event, void* arg);
326
327 /*
328 * Handle outgoing responses on a TCP connection. The TCP connections
329 * are configured to be non-blocking and the handler may be called
330 * multiple times before a complete response is sent.
331 */
332 static void handle_tcp_writing(int fd, short event, void* arg);
333
334 #ifdef HAVE_SSL
335 /* Create SSL object and associate fd */
336 static SSL* incoming_ssl_fd(SSL_CTX* ctx, int fd);
337 /*
338 * Handle TLS handshake. May be called multiple times if incomplete.
339 */
340 static int tls_handshake(struct tcp_handler_data* data, int fd, int writing);
341
342 /*
343 * Handle incoming queries on a TLS over TCP connection. The TLS
344 * connections are configured to be non-blocking and the handler may
345 * be called multiple times before a complete query is received.
346 */
347 static void handle_tls_reading(int fd, short event, void* arg);
348
349 /*
350 * Handle outgoing responses on a TLS over TCP connection. The TLS
351 * connections are configured to be non-blocking and the handler may
352 * be called multiple times before a complete response is sent.
353 */
354 static void handle_tls_writing(int fd, short event, void* arg);
355 #endif
356
357 /*
358 * Send all children the quit nonblocking, then close pipe.
359 */
360 static void send_children_quit(struct nsd* nsd);
361 /* same, for shutdown time, waits for child to exit to avoid restart issues */
362 static void send_children_quit_and_wait(struct nsd* nsd);
363
364 /* set childrens flags to send NSD_STATS to them */
365 #ifdef BIND8_STATS
366 static void set_children_stats(struct nsd* nsd);
367 #endif /* BIND8_STATS */
368
369 /*
370 * Change the event types the HANDLERS are interested in to EVENT_TYPES.
371 */
372 static void configure_handler_event_types(short event_types);
373
374 static uint16_t *compressed_dname_offsets = 0;
375 static uint32_t compression_table_capacity = 0;
376 static uint32_t compression_table_size = 0;
377 static domain_type* compressed_dnames[MAXRRSPP];
378
379 #ifdef USE_TCP_FASTOPEN
380 /* Checks to see if the kernel value must be manually changed in order for
381 TCP Fast Open to support server mode */
report_tcp_fastopen_config()382 static void report_tcp_fastopen_config() {
383
384 int tcp_fastopen_fp;
385 uint8_t tcp_fastopen_value;
386
387 if ( (tcp_fastopen_fp = open(TCP_FASTOPEN_FILE, O_RDONLY)) == -1 ) {
388 log_msg(LOG_INFO,"Error opening " TCP_FASTOPEN_FILE ": %s\n", strerror(errno));
389 }
390 if (read(tcp_fastopen_fp, &tcp_fastopen_value, 1) == -1 ) {
391 log_msg(LOG_INFO,"Error reading " TCP_FASTOPEN_FILE ": %s\n", strerror(errno));
392 close(tcp_fastopen_fp);
393 }
394 if (!(tcp_fastopen_value & TCP_FASTOPEN_SERVER_BIT_MASK)) {
395 log_msg(LOG_WARNING, "Error: TCP Fast Open support is available and configured in NSD by default.\n");
396 log_msg(LOG_WARNING, "However the kernel parameters are not configured to support TCP_FASTOPEN in server mode.\n");
397 log_msg(LOG_WARNING, "To enable TFO use the command:");
398 log_msg(LOG_WARNING, " 'sudo sysctl -w net.ipv4.tcp_fastopen=2' for pure server mode or\n");
399 log_msg(LOG_WARNING, " 'sudo sysctl -w net.ipv4.tcp_fastopen=3' for both client and server mode\n");
400 log_msg(LOG_WARNING, "NSD will not have TCP Fast Open available until this change is made.\n");
401 close(tcp_fastopen_fp);
402 }
403 close(tcp_fastopen_fp);
404 }
405 #endif
406
407 /*
408 * Remove the specified pid from the list of child pids. Returns -1 if
409 * the pid is not in the list, child_num otherwise. The field is set to 0.
410 */
411 static int
delete_child_pid(struct nsd * nsd,pid_t pid)412 delete_child_pid(struct nsd *nsd, pid_t pid)
413 {
414 size_t i;
415 for (i = 0; i < nsd->child_count; ++i) {
416 if (nsd->children[i].pid == pid) {
417 nsd->children[i].pid = 0;
418 if(!nsd->children[i].need_to_exit) {
419 if(nsd->children[i].child_fd != -1)
420 close(nsd->children[i].child_fd);
421 nsd->children[i].child_fd = -1;
422 if(nsd->children[i].handler)
423 nsd->children[i].handler->fd = -1;
424 }
425 return i;
426 }
427 }
428 return -1;
429 }
430
431 /*
432 * Restart child servers if necessary.
433 */
434 static int
restart_child_servers(struct nsd * nsd,region_type * region,netio_type * netio,int * xfrd_sock_p)435 restart_child_servers(struct nsd *nsd, region_type* region, netio_type* netio,
436 int* xfrd_sock_p)
437 {
438 struct main_ipc_handler_data *ipc_data;
439 size_t i;
440 int sv[2];
441
442 /* Fork the child processes... */
443 for (i = 0; i < nsd->child_count; ++i) {
444 if (nsd->children[i].pid <= 0) {
445 if (nsd->children[i].child_fd != -1)
446 close(nsd->children[i].child_fd);
447 if (socketpair(AF_UNIX, SOCK_STREAM, 0, sv) == -1) {
448 log_msg(LOG_ERR, "socketpair: %s",
449 strerror(errno));
450 return -1;
451 }
452 nsd->children[i].child_fd = sv[0];
453 nsd->children[i].parent_fd = sv[1];
454 nsd->children[i].pid = fork();
455 switch (nsd->children[i].pid) {
456 default: /* SERVER MAIN */
457 close(nsd->children[i].parent_fd);
458 nsd->children[i].parent_fd = -1;
459 if (fcntl(nsd->children[i].child_fd, F_SETFL, O_NONBLOCK) == -1) {
460 log_msg(LOG_ERR, "cannot fcntl pipe: %s", strerror(errno));
461 }
462 if(!nsd->children[i].handler)
463 {
464 ipc_data = (struct main_ipc_handler_data*) region_alloc(
465 region, sizeof(struct main_ipc_handler_data));
466 ipc_data->nsd = nsd;
467 ipc_data->child = &nsd->children[i];
468 ipc_data->child_num = i;
469 ipc_data->xfrd_sock = xfrd_sock_p;
470 ipc_data->packet = buffer_create(region, QIOBUFSZ);
471 ipc_data->forward_mode = 0;
472 ipc_data->got_bytes = 0;
473 ipc_data->total_bytes = 0;
474 ipc_data->acl_num = 0;
475 nsd->children[i].handler = (struct netio_handler*) region_alloc(
476 region, sizeof(struct netio_handler));
477 nsd->children[i].handler->fd = nsd->children[i].child_fd;
478 nsd->children[i].handler->timeout = NULL;
479 nsd->children[i].handler->user_data = ipc_data;
480 nsd->children[i].handler->event_types = NETIO_EVENT_READ;
481 nsd->children[i].handler->event_handler = parent_handle_child_command;
482 netio_add_handler(netio, nsd->children[i].handler);
483 }
484 /* clear any ongoing ipc */
485 ipc_data = (struct main_ipc_handler_data*)
486 nsd->children[i].handler->user_data;
487 ipc_data->forward_mode = 0;
488 /* restart - update fd */
489 nsd->children[i].handler->fd = nsd->children[i].child_fd;
490 break;
491 case 0: /* CHILD */
492 #ifdef MEMCLEAN /* OS collects memory pages */
493 region_destroy(region);
494 #endif
495
496 if (pledge("stdio rpath inet", NULL) == -1) {
497 log_msg(LOG_ERR, "pledge");
498 exit(1);
499 }
500
501 nsd->pid = 0;
502 nsd->child_count = 0;
503 nsd->server_kind = nsd->children[i].kind;
504 nsd->this_child = &nsd->children[i];
505 nsd->this_child->child_num = i;
506 /* remove signal flags inherited from parent
507 the parent will handle them. */
508 nsd->signal_hint_reload_hup = 0;
509 nsd->signal_hint_reload = 0;
510 nsd->signal_hint_child = 0;
511 nsd->signal_hint_quit = 0;
512 nsd->signal_hint_shutdown = 0;
513 nsd->signal_hint_stats = 0;
514 nsd->signal_hint_statsusr = 0;
515 close(*xfrd_sock_p);
516 close(nsd->this_child->child_fd);
517 nsd->this_child->child_fd = -1;
518 if (fcntl(nsd->this_child->parent_fd, F_SETFL, O_NONBLOCK) == -1) {
519 log_msg(LOG_ERR, "cannot fcntl pipe: %s", strerror(errno));
520 }
521 server_child(nsd);
522 /* NOTREACH */
523 exit(0);
524 case -1:
525 log_msg(LOG_ERR, "fork failed: %s",
526 strerror(errno));
527 return -1;
528 }
529 }
530 }
531 return 0;
532 }
533
534 #ifdef BIND8_STATS
set_bind8_alarm(struct nsd * nsd)535 static void set_bind8_alarm(struct nsd* nsd)
536 {
537 /* resync so that the next alarm is on the next whole minute */
538 if(nsd->st_period > 0) /* % by 0 gives divbyzero error */
539 alarm(nsd->st_period - (time(NULL) % nsd->st_period));
540 }
541 #endif
542
543 /* set zone stat ids for zones initially read in */
544 static void
zonestatid_tree_set(struct nsd * nsd)545 zonestatid_tree_set(struct nsd* nsd)
546 {
547 struct radnode* n;
548 for(n=radix_first(nsd->db->zonetree); n; n=radix_next(n)) {
549 zone_type* zone = (zone_type*)n->elem;
550 zone->zonestatid = getzonestatid(nsd->options, zone->opts);
551 }
552 }
553
554 #ifdef USE_ZONE_STATS
555 void
server_zonestat_alloc(struct nsd * nsd)556 server_zonestat_alloc(struct nsd* nsd)
557 {
558 size_t num = (nsd->options->zonestatnames->count==0?1:
559 nsd->options->zonestatnames->count);
560 size_t sz = sizeof(struct nsdst)*num;
561 char tmpfile[256];
562 uint8_t z = 0;
563
564 /* file names */
565 nsd->zonestatfname[0] = 0;
566 nsd->zonestatfname[1] = 0;
567 snprintf(tmpfile, sizeof(tmpfile), "%snsd-xfr-%d/nsd.%u.zstat.0",
568 nsd->options->xfrdir, (int)getpid(), (unsigned)getpid());
569 nsd->zonestatfname[0] = region_strdup(nsd->region, tmpfile);
570 snprintf(tmpfile, sizeof(tmpfile), "%snsd-xfr-%d/nsd.%u.zstat.1",
571 nsd->options->xfrdir, (int)getpid(), (unsigned)getpid());
572 nsd->zonestatfname[1] = region_strdup(nsd->region, tmpfile);
573
574 /* file descriptors */
575 nsd->zonestatfd[0] = open(nsd->zonestatfname[0], O_CREAT|O_RDWR, 0600);
576 if(nsd->zonestatfd[0] == -1) {
577 log_msg(LOG_ERR, "cannot create %s: %s", nsd->zonestatfname[0],
578 strerror(errno));
579 exit(1);
580 }
581 nsd->zonestatfd[1] = open(nsd->zonestatfname[1], O_CREAT|O_RDWR, 0600);
582 if(nsd->zonestatfd[0] == -1) {
583 log_msg(LOG_ERR, "cannot create %s: %s", nsd->zonestatfname[1],
584 strerror(errno));
585 close(nsd->zonestatfd[0]);
586 unlink(nsd->zonestatfname[0]);
587 exit(1);
588 }
589
590 #ifdef HAVE_MMAP
591 if(lseek(nsd->zonestatfd[0], (off_t)sz-1, SEEK_SET) == -1) {
592 log_msg(LOG_ERR, "lseek %s: %s", nsd->zonestatfname[0],
593 strerror(errno));
594 exit(1);
595 }
596 if(write(nsd->zonestatfd[0], &z, 1) == -1) {
597 log_msg(LOG_ERR, "cannot extend stat file %s (%s)",
598 nsd->zonestatfname[0], strerror(errno));
599 exit(1);
600 }
601 if(lseek(nsd->zonestatfd[1], (off_t)sz-1, SEEK_SET) == -1) {
602 log_msg(LOG_ERR, "lseek %s: %s", nsd->zonestatfname[1],
603 strerror(errno));
604 exit(1);
605 }
606 if(write(nsd->zonestatfd[1], &z, 1) == -1) {
607 log_msg(LOG_ERR, "cannot extend stat file %s (%s)",
608 nsd->zonestatfname[1], strerror(errno));
609 exit(1);
610 }
611 nsd->zonestat[0] = (struct nsdst*)mmap(NULL, sz, PROT_READ|PROT_WRITE,
612 MAP_SHARED, nsd->zonestatfd[0], 0);
613 if(nsd->zonestat[0] == MAP_FAILED) {
614 log_msg(LOG_ERR, "mmap failed: %s", strerror(errno));
615 unlink(nsd->zonestatfname[0]);
616 unlink(nsd->zonestatfname[1]);
617 exit(1);
618 }
619 nsd->zonestat[1] = (struct nsdst*)mmap(NULL, sz, PROT_READ|PROT_WRITE,
620 MAP_SHARED, nsd->zonestatfd[1], 0);
621 if(nsd->zonestat[1] == MAP_FAILED) {
622 log_msg(LOG_ERR, "mmap failed: %s", strerror(errno));
623 unlink(nsd->zonestatfname[0]);
624 unlink(nsd->zonestatfname[1]);
625 exit(1);
626 }
627 memset(nsd->zonestat[0], 0, sz);
628 memset(nsd->zonestat[1], 0, sz);
629 nsd->zonestatsize[0] = num;
630 nsd->zonestatsize[1] = num;
631 nsd->zonestatdesired = num;
632 nsd->zonestatsizenow = num;
633 nsd->zonestatnow = nsd->zonestat[0];
634 #endif /* HAVE_MMAP */
635 }
636
637 void
zonestat_remap(struct nsd * nsd,int idx,size_t sz)638 zonestat_remap(struct nsd* nsd, int idx, size_t sz)
639 {
640 #ifdef HAVE_MMAP
641 #ifdef MREMAP_MAYMOVE
642 nsd->zonestat[idx] = (struct nsdst*)mremap(nsd->zonestat[idx],
643 sizeof(struct nsdst)*nsd->zonestatsize[idx], sz,
644 MREMAP_MAYMOVE);
645 if(nsd->zonestat[idx] == MAP_FAILED) {
646 log_msg(LOG_ERR, "mremap failed: %s", strerror(errno));
647 exit(1);
648 }
649 #else /* !HAVE MREMAP */
650 if(msync(nsd->zonestat[idx],
651 sizeof(struct nsdst)*nsd->zonestatsize[idx], MS_ASYNC) != 0)
652 log_msg(LOG_ERR, "msync failed: %s", strerror(errno));
653 if(munmap(nsd->zonestat[idx],
654 sizeof(struct nsdst)*nsd->zonestatsize[idx]) != 0)
655 log_msg(LOG_ERR, "munmap failed: %s", strerror(errno));
656 nsd->zonestat[idx] = (struct nsdst*)mmap(NULL, sz,
657 PROT_READ|PROT_WRITE, MAP_SHARED, nsd->zonestatfd[idx], 0);
658 if(nsd->zonestat[idx] == MAP_FAILED) {
659 log_msg(LOG_ERR, "mmap failed: %s", strerror(errno));
660 exit(1);
661 }
662 #endif /* MREMAP */
663 #endif /* HAVE_MMAP */
664 }
665
666 /* realloc the zonestat array for the one that is not currently in use,
667 * to match the desired new size of the array (if applicable) */
668 void
server_zonestat_realloc(struct nsd * nsd)669 server_zonestat_realloc(struct nsd* nsd)
670 {
671 #ifdef HAVE_MMAP
672 uint8_t z = 0;
673 size_t sz;
674 int idx = 0; /* index of the zonestat array that is not in use */
675 if(nsd->zonestatnow == nsd->zonestat[0])
676 idx = 1;
677 if(nsd->zonestatsize[idx] == nsd->zonestatdesired)
678 return;
679 sz = sizeof(struct nsdst)*nsd->zonestatdesired;
680 if(lseek(nsd->zonestatfd[idx], (off_t)sz-1, SEEK_SET) == -1) {
681 log_msg(LOG_ERR, "lseek %s: %s", nsd->zonestatfname[idx],
682 strerror(errno));
683 exit(1);
684 }
685 if(write(nsd->zonestatfd[idx], &z, 1) == -1) {
686 log_msg(LOG_ERR, "cannot extend stat file %s (%s)",
687 nsd->zonestatfname[idx], strerror(errno));
688 exit(1);
689 }
690 zonestat_remap(nsd, idx, sz);
691 /* zero the newly allocated region */
692 if(nsd->zonestatdesired > nsd->zonestatsize[idx]) {
693 memset(((char*)nsd->zonestat[idx])+sizeof(struct nsdst) *
694 nsd->zonestatsize[idx], 0, sizeof(struct nsdst) *
695 (nsd->zonestatdesired - nsd->zonestatsize[idx]));
696 }
697 nsd->zonestatsize[idx] = nsd->zonestatdesired;
698 #endif /* HAVE_MMAP */
699 }
700
701 /* switchover to use the other array for the new children, that
702 * briefly coexist with the old children. And we want to avoid them
703 * both writing to the same statistics arrays. */
704 void
server_zonestat_switch(struct nsd * nsd)705 server_zonestat_switch(struct nsd* nsd)
706 {
707 if(nsd->zonestatnow == nsd->zonestat[0]) {
708 nsd->zonestatnow = nsd->zonestat[1];
709 nsd->zonestatsizenow = nsd->zonestatsize[1];
710 } else {
711 nsd->zonestatnow = nsd->zonestat[0];
712 nsd->zonestatsizenow = nsd->zonestatsize[0];
713 }
714 }
715 #endif /* USE_ZONE_STATS */
716
717 #ifdef BIND8_STATS
718 void
server_stat_alloc(struct nsd * nsd)719 server_stat_alloc(struct nsd* nsd)
720 {
721 char tmpfile[256];
722 size_t sz = sizeof(struct nsdst) * nsd->child_count * 2;
723 uint8_t z = 0;
724
725 /* file name */
726 nsd->statfname = 0;
727 snprintf(tmpfile, sizeof(tmpfile), "%snsd-xfr-%d/nsd.%u.stat",
728 nsd->options->xfrdir, (int)getpid(), (unsigned)getpid());
729 nsd->statfname = region_strdup(nsd->region, tmpfile);
730
731 /* file descriptor */
732 nsd->statfd = open(nsd->statfname, O_CREAT|O_RDWR, 0600);
733 if(nsd->statfd == -1) {
734 log_msg(LOG_ERR, "cannot create %s: %s", nsd->statfname,
735 strerror(errno));
736 unlink(nsd->zonestatfname[0]);
737 unlink(nsd->zonestatfname[1]);
738 exit(1);
739 }
740
741 #ifdef HAVE_MMAP
742 if(lseek(nsd->statfd, (off_t)sz-1, SEEK_SET) == -1) {
743 log_msg(LOG_ERR, "lseek %s: %s", nsd->statfname,
744 strerror(errno));
745 goto fail_exit;
746 }
747 if(write(nsd->statfd, &z, 1) == -1) {
748 log_msg(LOG_ERR, "cannot extend stat file %s (%s)",
749 nsd->statfname, strerror(errno));
750 goto fail_exit;
751 }
752 nsd->stat_map = (struct nsdst*)mmap(NULL, sz, PROT_READ|PROT_WRITE,
753 MAP_SHARED, nsd->statfd, 0);
754 if(nsd->stat_map == MAP_FAILED) {
755 log_msg(LOG_ERR, "mmap failed: %s", strerror(errno));
756 fail_exit:
757 close(nsd->statfd);
758 unlink(nsd->statfname);
759 unlink(nsd->zonestatfname[0]);
760 unlink(nsd->zonestatfname[1]);
761 exit(1);
762 }
763 memset(nsd->stat_map, 0, sz);
764 nsd->stats_per_child[0] = nsd->stat_map;
765 nsd->stats_per_child[1] = &nsd->stat_map[nsd->child_count];
766 nsd->stat_current = 0;
767 nsd->st = &nsd->stats_per_child[nsd->stat_current][0];
768 #endif /* HAVE_MMAP */
769 }
770 #endif /* BIND8_STATS */
771
772 #ifdef BIND8_STATS
773 void
server_stat_free(struct nsd * nsd)774 server_stat_free(struct nsd* nsd)
775 {
776 unlink(nsd->statfname);
777 }
778 #endif /* BIND8_STATS */
779
780 static void
cleanup_dname_compression_tables(void * ptr)781 cleanup_dname_compression_tables(void *ptr)
782 {
783 free(ptr);
784 compressed_dname_offsets = NULL;
785 compression_table_capacity = 0;
786 }
787
788 static void
initialize_dname_compression_tables(struct nsd * nsd)789 initialize_dname_compression_tables(struct nsd *nsd)
790 {
791 size_t needed = domain_table_count(nsd->db->domains) + 1;
792 needed += EXTRA_DOMAIN_NUMBERS;
793 if(compression_table_capacity < needed) {
794 if(compressed_dname_offsets) {
795 region_remove_cleanup(nsd->db->region,
796 cleanup_dname_compression_tables,
797 compressed_dname_offsets);
798 free(compressed_dname_offsets);
799 }
800 compressed_dname_offsets = (uint16_t *) xmallocarray(
801 needed, sizeof(uint16_t));
802 region_add_cleanup(nsd->db->region, cleanup_dname_compression_tables,
803 compressed_dname_offsets);
804 compression_table_capacity = needed;
805 compression_table_size=domain_table_count(nsd->db->domains)+1;
806 }
807 memset(compressed_dname_offsets, 0, needed * sizeof(uint16_t));
808 compressed_dname_offsets[0] = QHEADERSZ; /* The original query name */
809 }
810
811 static int
set_cloexec(struct nsd_socket * sock)812 set_cloexec(struct nsd_socket *sock)
813 {
814 assert(sock != NULL);
815
816 if(fcntl(sock->s, F_SETFD, FD_CLOEXEC) == -1) {
817 const char *socktype =
818 sock->addr.ai_family == SOCK_DGRAM ? "udp" : "tcp";
819 log_msg(LOG_ERR, "fcntl(..., O_CLOEXEC) failed for %s: %s",
820 socktype, strerror(errno));
821 return -1;
822 }
823
824 return 1;
825 }
826
827 static int
set_reuseport(struct nsd_socket * sock)828 set_reuseport(struct nsd_socket *sock)
829 {
830 #ifdef SO_REUSEPORT
831 int on = 1;
832 #ifdef SO_REUSEPORT_LB
833 /* FreeBSD 12 has SO_REUSEPORT_LB that does load balancing like
834 * SO_REUSEPORT on Linux. This is what the users want with the config
835 * option in nsd.conf; if we actually need local address and port reuse
836 * they'll also need to have SO_REUSEPORT set for them, assume it was
837 * _LB they want.
838 */
839 int opt = SO_REUSEPORT_LB;
840 static const char optname[] = "SO_REUSEPORT_LB";
841 #else /* !SO_REUSEPORT_LB */
842 int opt = SO_REUSEPORT;
843 static const char optname[] = "SO_REUSEPORT";
844 #endif /* SO_REUSEPORT_LB */
845
846 if (0 == setsockopt(sock->s, SOL_SOCKET, opt, &on, sizeof(on))) {
847 return 1;
848 } else if(verbosity >= 3 || errno != ENOPROTOOPT) {
849 log_msg(LOG_ERR, "setsockopt(..., %s, ...) failed: %s",
850 optname, strerror(errno));
851 }
852 return -1;
853 #else
854 (void)sock;
855 #endif /* SO_REUSEPORT */
856
857 return 0;
858 }
859
860 static int
set_reuseaddr(struct nsd_socket * sock)861 set_reuseaddr(struct nsd_socket *sock)
862 {
863 #ifdef SO_REUSEADDR
864 int on = 1;
865 if(setsockopt(sock->s, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on)) == 0) {
866 return 1;
867 }
868 log_msg(LOG_ERR, "setsockopt(..., SO_REUSEADDR, ...) failed: %s",
869 strerror(errno));
870 return -1;
871 #endif /* SO_REUSEADDR */
872 return 0;
873 }
874
875 static int
set_rcvbuf(struct nsd_socket * sock,int rcv)876 set_rcvbuf(struct nsd_socket *sock, int rcv)
877 {
878 #ifdef SO_RCVBUF
879 #ifdef SO_RCVBUFFORCE
880 if(0 == setsockopt(
881 sock->s, SOL_SOCKET, SO_RCVBUFFORCE, &rcv, sizeof(rcv)))
882 {
883 return 1;
884 }
885 if(errno == EPERM || errno == ENOBUFS) {
886 return 0;
887 }
888 log_msg(LOG_ERR, "setsockopt(..., SO_RCVBUFFORCE, ...) failed: %s",
889 strerror(errno));
890 return -1;
891 #else /* !SO_RCVBUFFORCE */
892 if (0 == setsockopt(
893 sock->s, SOL_SOCKET, SO_RCVBUF, &rcv, sizeof(rcv)))
894 {
895 return 1;
896 }
897 if(errno == ENOSYS || errno == ENOBUFS) {
898 return 0;
899 }
900 log_msg(LOG_ERR, "setsockopt(..., SO_RCVBUF, ...) failed: %s",
901 strerror(errno));
902 return -1;
903 #endif /* SO_RCVBUFFORCE */
904 #endif /* SO_RCVBUF */
905
906 return 0;
907 }
908
909 static int
set_sndbuf(struct nsd_socket * sock,int snd)910 set_sndbuf(struct nsd_socket *sock, int snd)
911 {
912 #ifdef SO_SNDBUF
913 #ifdef SO_SNDBUFFORCE
914 if(0 == setsockopt(
915 sock->s, SOL_SOCKET, SO_SNDBUFFORCE, &snd, sizeof(snd)))
916 {
917 return 1;
918 }
919 if(errno == EPERM || errno == ENOBUFS) {
920 return 0;
921 }
922 log_msg(LOG_ERR, "setsockopt(..., SO_SNDBUFFORCE, ...) failed: %s",
923 strerror(errno));
924 return -1;
925 #else /* !SO_SNDBUFFORCE */
926 if(0 == setsockopt(
927 sock->s, SOL_SOCKET, SO_SNDBUF, &snd, sizeof(snd)))
928 {
929 return 1;
930 }
931 if(errno == ENOSYS || errno == ENOBUFS) {
932 return 0;
933 }
934 log_msg(LOG_ERR, "setsockopt(..., SO_SNDBUF, ...) failed: %s",
935 strerror(errno));
936 return -1;
937 #endif /* SO_SNDBUFFORCE */
938 #endif /* SO_SNDBUF */
939
940 return 0;
941 }
942
943 static int
set_nonblock(struct nsd_socket * sock)944 set_nonblock(struct nsd_socket *sock)
945 {
946 const char *socktype =
947 sock->addr.ai_socktype == SOCK_DGRAM ? "udp" : "tcp";
948
949 if(fcntl(sock->s, F_SETFL, O_NONBLOCK) == -1) {
950 log_msg(LOG_ERR, "fctnl(..., O_NONBLOCK) failed for %s: %s",
951 socktype, strerror(errno));
952 return -1;
953 }
954
955 return 1;
956 }
957
958 #ifdef INET6
959 static int
set_ipv6_v6only(struct nsd_socket * sock)960 set_ipv6_v6only(struct nsd_socket *sock)
961 {
962 #ifdef IPV6_V6ONLY
963 int on = 1;
964 const char *socktype =
965 sock->addr.ai_socktype == SOCK_DGRAM ? "udp" : "tcp";
966
967 if(0 == setsockopt(
968 sock->s, IPPROTO_IPV6, IPV6_V6ONLY, &on, sizeof(on)))
969 {
970 return 1;
971 }
972
973 log_msg(LOG_ERR, "setsockopt(..., IPV6_V6ONLY, ...) failed for %s: %s",
974 socktype, strerror(errno));
975 return -1;
976 #else
977 (void)sock;
978 #endif /* IPV6_V6ONLY */
979
980 return 0;
981 }
982 #endif /* INET6 */
983
984 #ifdef INET6
985 static int
set_ipv6_use_min_mtu(struct nsd_socket * sock)986 set_ipv6_use_min_mtu(struct nsd_socket *sock)
987 {
988 #if defined(IPV6_USE_MIN_MTU) || defined(IPV6_MTU)
989 #if defined(IPV6_USE_MIN_MTU)
990 /* There is no fragmentation of IPv6 datagrams during forwarding in the
991 * network. Therefore we do not send UDP datagrams larger than the
992 * minimum IPv6 MTU of 1280 octets. The EDNS0 message length can be
993 * larger if the network stack supports IPV6_USE_MIN_MTU.
994 */
995 int opt = IPV6_USE_MIN_MTU;
996 int optval = 1;
997 static const char optname[] = "IPV6_USE_MIN_MTU";
998 #elif defined(IPV6_MTU)
999 /* On Linux, PMTUD is disabled by default for datagrams so set the MTU
1000 * to the MIN MTU to get the same.
1001 */
1002 int opt = IPV6_MTU;
1003 int optval = IPV6_MIN_MTU;
1004 static const char optname[] = "IPV6_MTU";
1005 #endif
1006 if(0 == setsockopt(
1007 sock->s, IPPROTO_IPV6, opt, &optval, sizeof(optval)))
1008 {
1009 return 1;
1010 }
1011
1012 log_msg(LOG_ERR, "setsockopt(..., %s, ...) failed: %s",
1013 optname, strerror(errno));
1014 return -1;
1015 #else
1016 (void)sock;
1017 #endif /* INET6 */
1018
1019 return 0;
1020 }
1021 #endif /* INET6 */
1022
1023 static int
set_ipv4_no_pmtu_disc(struct nsd_socket * sock)1024 set_ipv4_no_pmtu_disc(struct nsd_socket *sock)
1025 {
1026 int ret = 0;
1027
1028 #if defined(IP_MTU_DISCOVER)
1029 int opt = IP_MTU_DISCOVER;
1030 int optval;
1031 # if defined(IP_PMTUDISC_OMIT)
1032 /* Linux 3.15 has IP_PMTUDISC_OMIT which makes sockets ignore PMTU
1033 * information and send packets with DF=0. Fragmentation is allowed if
1034 * and only if the packet size exceeds the outgoing interface MTU or
1035 * the packet encounters smaller MTU link in network. This mitigates
1036 * DNS fragmentation attacks by preventing forged PMTU information.
1037 * FreeBSD already has same semantics without setting the option.
1038 */
1039 optval = IP_PMTUDISC_OMIT;
1040 if(0 == setsockopt(
1041 sock->s, IPPROTO_IP, opt, &optval, sizeof(optval)))
1042 {
1043 return 1;
1044 }
1045
1046 log_msg(LOG_ERR, "setsockopt(..., %s, %s, ...) failed: %s",
1047 "IP_MTU_DISCOVER", "IP_PMTUDISC_OMIT", strerror(errno));
1048 # endif /* IP_PMTUDISC_OMIT */
1049 # if defined(IP_PMTUDISC_DONT)
1050 /* Use IP_PMTUDISC_DONT if IP_PMTUDISC_OMIT failed / undefined. */
1051 optval = IP_PMTUDISC_DONT;
1052 if(0 == setsockopt(
1053 sock->s, IPPROTO_IP, opt, &optval, sizeof(optval)))
1054 {
1055 return 1;
1056 }
1057
1058 log_msg(LOG_ERR, "setsockopt(..., %s, %s, ...) failed: %s",
1059 "IP_MTU_DISCOVER", "IP_PMTUDISC_DONT", strerror(errno));
1060 # endif
1061 ret = -1;
1062 #elif defined(IP_DONTFRAG)
1063 int off = 0;
1064 if (0 == setsockopt(
1065 sock->s, IPPROTO_IP, IP_DONTFRAG, &off, sizeof(off)))
1066 {
1067 return 1;
1068 }
1069
1070 log_msg(LOG_ERR, "setsockopt(..., IP_DONTFRAG, ...) failed: %s",
1071 strerror(errno));
1072 ret = -1;
1073 #else
1074 (void)sock;
1075 #endif
1076
1077 return ret;
1078 }
1079
1080 static int
set_ip_freebind(struct nsd_socket * sock)1081 set_ip_freebind(struct nsd_socket *sock)
1082 {
1083 #ifdef IP_FREEBIND
1084 int on = 1;
1085 const char *socktype =
1086 sock->addr.ai_socktype == SOCK_DGRAM ? "udp" : "tcp";
1087 if(setsockopt(sock->s, IPPROTO_IP, IP_FREEBIND, &on, sizeof(on)) == 0)
1088 {
1089 return 1;
1090 }
1091 log_msg(LOG_ERR, "setsockopt(..., IP_FREEBIND, ...) failed for %s: %s",
1092 socktype, strerror(errno));
1093 return -1;
1094 #else
1095 (void)sock;
1096 #endif /* IP_FREEBIND */
1097
1098 return 0;
1099 }
1100
1101 static int
set_ip_transparent(struct nsd_socket * sock)1102 set_ip_transparent(struct nsd_socket *sock)
1103 {
1104 /*
1105 The scandalous preprocessor blob here calls for some explanation :)
1106 POSIX does not specify an option to bind non-local IPs, so
1107 platforms developed several implementation-specific options,
1108 all set in the same way, but with different names.
1109 For additional complexity, some platform manage this setting
1110 differently for different address families (IPv4 vs IPv6).
1111 This scandalous preprocessor blob below abstracts such variability
1112 in the way which leaves the C code as lean and clear as possible.
1113 */
1114
1115 #if defined(IP_TRANSPARENT)
1116 # define NSD_SOCKET_OPTION_TRANSPARENT IP_TRANSPARENT
1117 # define NSD_SOCKET_OPTION_TRANSPARENT_OPTLEVEL IPPROTO_IP
1118 # define NSD_SOCKET_OPTION_TRANSPARENT_NAME "IP_TRANSPARENT"
1119 // as of 2020-01, Linux does not support this on IPv6 programmatically
1120 #elif defined(SO_BINDANY)
1121 # define NSD_SOCKET_OPTION_TRANSPARENT SO_BINDANY
1122 # define NSD_SOCKET_OPTION_TRANSPARENT_OPTLEVEL SOL_SOCKET
1123 # define NSD_SOCKET_OPTION_TRANSPARENT_NAME "SO_BINDANY"
1124 #elif defined(IP_BINDANY)
1125 # define NSD_SOCKET_OPTION_TRANSPARENT IP_BINDANY
1126 # define NSD_SOCKET_OPTION_TRANSPARENT6 IPV6_BINDANY
1127 # define NSD_SOCKET_OPTION_TRANSPARENT_OPTLEVEL IPPROTO_IP
1128 # define NSD_SOCKET_OPTION_TRANSPARENT_OPTLEVEL6 IPPROTO_IPV6
1129 # define NSD_SOCKET_OPTION_TRANSPARENT_NAME "IP_BINDANY"
1130 #endif
1131
1132 #ifndef NSD_SOCKET_OPTION_TRANSPARENT
1133 (void)sock;
1134 #else
1135 # ifndef NSD_SOCKET_OPTION_TRANSPARENT6
1136 # define NSD_SOCKET_OPTION_TRANSPARENT6 NSD_SOCKET_OPTION_TRANSPARENT
1137 # endif
1138 # ifndef NSD_SOCKET_OPTION_TRANSPARENT_OPTLEVEL6
1139 # define NSD_SOCKET_OPTION_TRANSPARENT_OPTLEVEL6 NSD_SOCKET_OPTION_TRANSPARENT_OPTLEVEL
1140 # endif
1141 # ifndef NSD_SOCKET_OPTION_TRANSPARENT_NAME6
1142 # define NSD_SOCKET_OPTION_TRANSPARENT_NAME6 NSD_SOCKET_OPTION_TRANSPARENT_NAME
1143 # endif
1144
1145 int on = 1;
1146 const char *socktype =
1147 sock->addr.ai_socktype == SOCK_DGRAM ? "udp" : "tcp";
1148 const int is_ip6 = (sock->addr.ai_family == AF_INET6);
1149
1150 if(0 == setsockopt(
1151 sock->s,
1152 is_ip6 ? NSD_SOCKET_OPTION_TRANSPARENT_OPTLEVEL6 : NSD_SOCKET_OPTION_TRANSPARENT_OPTLEVEL,
1153 is_ip6 ? NSD_SOCKET_OPTION_TRANSPARENT6 : NSD_SOCKET_OPTION_TRANSPARENT,
1154 &on, sizeof(on)))
1155 {
1156 return 1;
1157 }
1158
1159 log_msg(LOG_ERR, "setsockopt(..., %s, ...) failed for %s: %s",
1160 is_ip6 ? NSD_SOCKET_OPTION_TRANSPARENT_NAME6 : NSD_SOCKET_OPTION_TRANSPARENT_NAME, socktype, strerror(errno));
1161 return -1;
1162 #endif
1163
1164 return 0;
1165 }
1166
1167 static int
set_tcp_maxseg(struct nsd_socket * sock,int mss)1168 set_tcp_maxseg(struct nsd_socket *sock, int mss)
1169 {
1170 #if defined(IPPROTO_TCP) && defined(TCP_MAXSEG)
1171 if(setsockopt(sock->s, IPPROTO_TCP, TCP_MAXSEG, &mss, sizeof(mss)) == 0) {
1172 return 1;
1173 }
1174 log_msg(LOG_ERR, "setsockopt(..., TCP_MAXSEG, ...) failed for tcp: %s",
1175 strerror(errno));
1176 return -1;
1177 #else
1178 log_msg(LOG_ERR, "setsockopt(TCP_MAXSEG) unsupported");
1179 #endif
1180 return 0;
1181 }
1182
1183 #ifdef USE_TCP_FASTOPEN
1184 static int
set_tcp_fastopen(struct nsd_socket * sock)1185 set_tcp_fastopen(struct nsd_socket *sock)
1186 {
1187 /* qlen specifies how many outstanding TFO requests to allow. Limit is
1188 * a defense against IP spoofing attacks as suggested in RFC7413.
1189 */
1190 int qlen;
1191
1192 #ifdef __APPLE__
1193 /* macOS X implementation only supports qlen of 1 via this call. The
1194 * actual value is configured by the net.inet.tcp.fastopen_backlog
1195 * kernel parameter.
1196 */
1197 qlen = 1;
1198 #else
1199 /* 5 is recommended on Linux. */
1200 qlen = 5;
1201 #endif
1202 if (0 == setsockopt(
1203 sock->s, IPPROTO_TCP, TCP_FASTOPEN, &qlen, sizeof(qlen)))
1204 {
1205 return 1;
1206 }
1207
1208 if (errno == EPERM) {
1209 log_msg(LOG_ERR, "Setting TCP Fast Open as server failed: %s "
1210 "; this could likely be because sysctl "
1211 "net.inet.tcp.fastopen.enabled, "
1212 "net.inet.tcp.fastopen.server_enable, or "
1213 "net.ipv4.tcp_fastopen is disabled",
1214 strerror(errno));
1215 /* Squelch ENOPROTOOPT: FreeBSD server mode with kernel support
1216 * disabled, except when verbosity enabled for debugging
1217 */
1218 } else if(errno != ENOPROTOOPT || verbosity >= 3) {
1219 log_msg(LOG_ERR, "Setting TCP Fast Open as server failed: %s",
1220 strerror(errno));
1221 }
1222
1223 return (errno == ENOPROTOOPT ? 0 : -1);
1224 }
1225 #endif /* USE_TCP_FASTOPEN */
1226
1227 static int
set_bindtodevice(struct nsd_socket * sock)1228 set_bindtodevice(struct nsd_socket *sock)
1229 {
1230 #if defined(SO_BINDTODEVICE)
1231 if(setsockopt(sock->s, SOL_SOCKET, SO_BINDTODEVICE,
1232 sock->device, strlen(sock->device)) == -1)
1233 {
1234 log_msg(LOG_ERR, "setsockopt(..., %s, %s, ...) failed: %s",
1235 "SO_BINDTODEVICE", sock->device, strerror(errno));
1236 return -1;
1237 }
1238
1239 return 1;
1240 #else
1241 (void)sock;
1242 return 0;
1243 #endif
1244 }
1245
1246 static int
set_setfib(struct nsd_socket * sock)1247 set_setfib(struct nsd_socket *sock)
1248 {
1249 #if defined(SO_SETFIB)
1250 if(setsockopt(sock->s, SOL_SOCKET, SO_SETFIB,
1251 (const void *)&sock->fib, sizeof(sock->fib)) == -1)
1252 {
1253 log_msg(LOG_ERR, "setsockopt(..., %s, %d, ...) failed: %s",
1254 "SO_SETFIB", sock->fib, strerror(errno));
1255 return -1;
1256 }
1257
1258 return 1;
1259 #else
1260 (void)sock;
1261 return 0;
1262 #endif
1263 }
1264
1265 static int
open_udp_socket(struct nsd * nsd,struct nsd_socket * sock,int * reuseport_works)1266 open_udp_socket(struct nsd *nsd, struct nsd_socket *sock, int *reuseport_works)
1267 {
1268 int rcv = 1*1024*1024, snd = 1*1024*1024;
1269
1270 if(-1 == (sock->s = socket(
1271 sock->addr.ai_family, sock->addr.ai_socktype, 0)))
1272 {
1273 #ifdef INET6
1274 if((sock->flags & NSD_SOCKET_IS_OPTIONAL) &&
1275 (sock->addr.ai_family == AF_INET6) &&
1276 (errno == EAFNOSUPPORT))
1277 {
1278 log_msg(LOG_WARNING, "fallback to UDP4, no IPv6: "
1279 "not supported");
1280 return 0;
1281 }
1282 #endif
1283 log_msg(LOG_ERR, "can't create a socket: %s", strerror(errno));
1284 return -1;
1285 }
1286
1287 set_cloexec(sock);
1288
1289 if(nsd->reuseport && reuseport_works && *reuseport_works)
1290 *reuseport_works = (set_reuseport(sock) == 1);
1291
1292 if(nsd->options->receive_buffer_size > 0)
1293 rcv = nsd->options->receive_buffer_size;
1294 if(set_rcvbuf(sock, rcv) == -1)
1295 return -1;
1296
1297 if(nsd->options->send_buffer_size > 0)
1298 snd = nsd->options->send_buffer_size;
1299 if(set_sndbuf(sock, snd) == -1)
1300 return -1;
1301 #ifdef INET6
1302 if(sock->addr.ai_family == AF_INET6) {
1303 if(set_ipv6_v6only(sock) == -1 ||
1304 set_ipv6_use_min_mtu(sock) == -1)
1305 return -1;
1306 } else
1307 #endif /* INET6 */
1308 if(sock->addr.ai_family == AF_INET) {
1309 if(set_ipv4_no_pmtu_disc(sock) == -1)
1310 return -1;
1311 }
1312
1313 /* Set socket to non-blocking. Otherwise, on operating systems
1314 * with thundering herd problems, the UDP recv could block
1315 * after select returns readable.
1316 */
1317 set_nonblock(sock);
1318
1319 if(nsd->options->ip_freebind)
1320 (void)set_ip_freebind(sock);
1321 if(nsd->options->ip_transparent)
1322 (void)set_ip_transparent(sock);
1323 if((sock->flags & NSD_BIND_DEVICE) && set_bindtodevice(sock) == -1)
1324 return -1;
1325 if(sock->fib != -1 && set_setfib(sock) == -1)
1326 return -1;
1327
1328 if(bind(sock->s, (struct sockaddr *)&sock->addr.ai_addr, sock->addr.ai_addrlen) == -1) {
1329 char buf[256];
1330 addrport2str((void*)&sock->addr.ai_addr, buf, sizeof(buf));
1331 log_msg(LOG_ERR, "can't bind udp socket %s: %s",
1332 buf, strerror(errno));
1333 return -1;
1334 }
1335
1336 return 1;
1337 }
1338
1339 static int
open_tcp_socket(struct nsd * nsd,struct nsd_socket * sock,int * reuseport_works)1340 open_tcp_socket(struct nsd *nsd, struct nsd_socket *sock, int *reuseport_works)
1341 {
1342 #ifdef USE_TCP_FASTOPEN
1343 report_tcp_fastopen_config();
1344 #endif
1345
1346 (void)reuseport_works;
1347
1348 if(-1 == (sock->s = socket(
1349 sock->addr.ai_family, sock->addr.ai_socktype, 0)))
1350 {
1351 #ifdef INET6
1352 if((sock->flags & NSD_SOCKET_IS_OPTIONAL) &&
1353 (sock->addr.ai_family == AF_INET6) &&
1354 (errno == EAFNOSUPPORT))
1355 {
1356 log_msg(LOG_WARNING, "fallback to TCP4, no IPv6: "
1357 "not supported");
1358 return 0;
1359 }
1360 #endif /* INET6 */
1361 log_msg(LOG_ERR, "can't create a socket: %s", strerror(errno));
1362 return -1;
1363 }
1364
1365 set_cloexec(sock);
1366
1367 if(nsd->reuseport && reuseport_works && *reuseport_works)
1368 *reuseport_works = (set_reuseport(sock) == 1);
1369
1370 (void)set_reuseaddr(sock);
1371
1372 #ifdef INET6
1373 if(sock->addr.ai_family == AF_INET6) {
1374 if (set_ipv6_v6only(sock) == -1 ||
1375 set_ipv6_use_min_mtu(sock) == -1)
1376 return -1;
1377 }
1378 #endif
1379
1380 if(nsd->tcp_mss > 0)
1381 set_tcp_maxseg(sock, nsd->tcp_mss);
1382 /* (StevensUNP p463), if TCP listening socket is blocking, then
1383 it may block in accept, even if select() says readable. */
1384 (void)set_nonblock(sock);
1385 if(nsd->options->ip_freebind)
1386 (void)set_ip_freebind(sock);
1387 if(nsd->options->ip_transparent)
1388 (void)set_ip_transparent(sock);
1389 if((sock->flags & NSD_BIND_DEVICE) && set_bindtodevice(sock) == -1)
1390 return -1;
1391 if(sock->fib != -1 && set_setfib(sock) == -1)
1392 return -1;
1393
1394 if(bind(sock->s, (struct sockaddr *)&sock->addr.ai_addr, sock->addr.ai_addrlen) == -1) {
1395 char buf[256];
1396 addrport2str((void*)&sock->addr.ai_addr, buf, sizeof(buf));
1397 log_msg(LOG_ERR, "can't bind tcp socket %s: %s",
1398 buf, strerror(errno));
1399 return -1;
1400 }
1401
1402 #ifdef USE_TCP_FASTOPEN
1403 (void)set_tcp_fastopen(sock);
1404 #endif
1405
1406 if(listen(sock->s, TCP_BACKLOG) == -1) {
1407 log_msg(LOG_ERR, "can't listen: %s", strerror(errno));
1408 return -1;
1409 }
1410
1411 return 1;
1412 }
1413
1414 /*
1415 * Initialize the server, reuseport, create and bind the sockets.
1416 */
1417 int
server_init(struct nsd * nsd)1418 server_init(struct nsd *nsd)
1419 {
1420 size_t i;
1421 int reuseport = 1; /* Determine if REUSEPORT works. */
1422
1423 /* open server interface ports */
1424 for(i = 0; i < nsd->ifs; i++) {
1425 if(open_udp_socket(nsd, &nsd->udp[i], &reuseport) == -1 ||
1426 open_tcp_socket(nsd, &nsd->tcp[i], &reuseport) == -1)
1427 {
1428 return -1;
1429 }
1430 }
1431
1432 if(nsd->reuseport && reuseport) {
1433 size_t ifs = nsd->ifs * nsd->reuseport;
1434
1435 /* increase the size of the interface arrays, there are going
1436 * to be separate interface file descriptors for every server
1437 * instance */
1438 region_remove_cleanup(nsd->region, free, nsd->udp);
1439 region_remove_cleanup(nsd->region, free, nsd->tcp);
1440
1441 nsd->udp = xrealloc(nsd->udp, ifs * sizeof(*nsd->udp));
1442 nsd->tcp = xrealloc(nsd->tcp, ifs * sizeof(*nsd->tcp));
1443 region_add_cleanup(nsd->region, free, nsd->udp);
1444 region_add_cleanup(nsd->region, free, nsd->tcp);
1445 if(ifs > nsd->ifs) {
1446 memset(&nsd->udp[nsd->ifs], 0,
1447 (ifs-nsd->ifs)*sizeof(*nsd->udp));
1448 memset(&nsd->tcp[nsd->ifs], 0,
1449 (ifs-nsd->ifs)*sizeof(*nsd->tcp));
1450 }
1451
1452 for(i = nsd->ifs; i < ifs; i++) {
1453 nsd->udp[i] = nsd->udp[i%nsd->ifs];
1454 nsd->udp[i].s = -1;
1455 if(open_udp_socket(nsd, &nsd->udp[i], &reuseport) == -1) {
1456 return -1;
1457 }
1458 /* Turn off REUSEPORT for TCP by copying the socket
1459 * file descriptor.
1460 * This means we should not close TCP used by
1461 * other servers in reuseport enabled mode, in
1462 * server_child().
1463 */
1464 nsd->tcp[i] = nsd->tcp[i%nsd->ifs];
1465 }
1466
1467 nsd->ifs = ifs;
1468 } else {
1469 nsd->reuseport = 0;
1470 }
1471
1472 /* open server interface ports for verifiers */
1473 for(i = 0; i < nsd->verify_ifs; i++) {
1474 if(open_udp_socket(nsd, &nsd->verify_udp[i], NULL) == -1 ||
1475 open_tcp_socket(nsd, &nsd->verify_tcp[i], NULL) == -1)
1476 {
1477 return -1;
1478 }
1479 }
1480
1481 return 0;
1482 }
1483
1484 /*
1485 * Prepare the server for take off.
1486 *
1487 */
1488 int
server_prepare(struct nsd * nsd)1489 server_prepare(struct nsd *nsd)
1490 {
1491 #ifdef RATELIMIT
1492 /* set secret modifier for hashing (rate limits) */
1493 #ifdef HAVE_GETRANDOM
1494 uint32_t v;
1495 if(getrandom(&v, sizeof(v), 0) == -1) {
1496 log_msg(LOG_ERR, "getrandom failed: %s", strerror(errno));
1497 exit(1);
1498 }
1499 hash_set_raninit(v);
1500 #elif defined(HAVE_ARC4RANDOM)
1501 hash_set_raninit(arc4random());
1502 #else
1503 uint32_t v = getpid() ^ time(NULL);
1504 srandom((unsigned long)v);
1505 # ifdef HAVE_SSL
1506 if(RAND_status() && RAND_bytes((unsigned char*)&v, sizeof(v)) > 0)
1507 hash_set_raninit(v);
1508 else
1509 # endif
1510 hash_set_raninit(random());
1511 #endif
1512 rrl_mmap_init(nsd->child_count, nsd->options->rrl_size,
1513 nsd->options->rrl_ratelimit,
1514 nsd->options->rrl_whitelist_ratelimit,
1515 nsd->options->rrl_slip,
1516 nsd->options->rrl_ipv4_prefix_length,
1517 nsd->options->rrl_ipv6_prefix_length);
1518 #endif /* RATELIMIT */
1519
1520 /* Open the database... */
1521 if ((nsd->db = namedb_open(nsd->options)) == NULL) {
1522 log_msg(LOG_ERR, "unable to open the database: %s", strerror(errno));
1523 unlink(nsd->task[0]->fname);
1524 unlink(nsd->task[1]->fname);
1525 #ifdef USE_ZONE_STATS
1526 unlink(nsd->zonestatfname[0]);
1527 unlink(nsd->zonestatfname[1]);
1528 #endif
1529 #ifdef BIND8_STATS
1530 server_stat_free(nsd);
1531 #endif
1532 xfrd_del_tempdir(nsd);
1533 return -1;
1534 }
1535 /* check if zone files can be read */
1536 /* NULL for taskudb because we send soainfo in a moment, batched up,
1537 * for all zones */
1538 namedb_check_zonefiles(nsd, nsd->options, NULL, NULL);
1539 zonestatid_tree_set(nsd);
1540
1541 compression_table_capacity = 0;
1542 initialize_dname_compression_tables(nsd);
1543
1544 #ifdef BIND8_STATS
1545 /* Initialize times... */
1546 time(&nsd->st->boot);
1547 set_bind8_alarm(nsd);
1548 #endif /* BIND8_STATS */
1549
1550 return 0;
1551 }
1552
1553 /*
1554 * Fork the required number of servers.
1555 */
1556 static int
server_start_children(struct nsd * nsd,region_type * region,netio_type * netio,int * xfrd_sock_p)1557 server_start_children(struct nsd *nsd, region_type* region, netio_type* netio,
1558 int* xfrd_sock_p)
1559 {
1560 size_t i;
1561
1562 /* Start all child servers initially. */
1563 for (i = 0; i < nsd->child_count; ++i) {
1564 nsd->children[i].pid = 0;
1565 }
1566
1567 return restart_child_servers(nsd, region, netio, xfrd_sock_p);
1568 }
1569
1570 static void
server_close_socket(struct nsd_socket * sock)1571 server_close_socket(struct nsd_socket *sock)
1572 {
1573 if(sock->s != -1) {
1574 close(sock->s);
1575 sock->s = -1;
1576 }
1577 }
1578
1579 void
server_close_all_sockets(struct nsd_socket sockets[],size_t n)1580 server_close_all_sockets(struct nsd_socket sockets[], size_t n)
1581 {
1582 size_t i;
1583
1584 /* Close all the sockets... */
1585 for (i = 0; i < n; ++i) {
1586 server_close_socket(&sockets[i]);
1587 }
1588 }
1589
1590 /*
1591 * Close the sockets, shutdown the server and exit.
1592 * Does not return.
1593 */
1594 void
server_shutdown(struct nsd * nsd)1595 server_shutdown(struct nsd *nsd)
1596 {
1597 size_t i;
1598
1599 server_close_all_sockets(nsd->udp, nsd->ifs);
1600 server_close_all_sockets(nsd->tcp, nsd->ifs);
1601 /* CHILD: close command channel to parent */
1602 if(nsd->this_child && nsd->this_child->parent_fd != -1)
1603 {
1604 close(nsd->this_child->parent_fd);
1605 nsd->this_child->parent_fd = -1;
1606 }
1607 /* SERVER: close command channels to children */
1608 if(!nsd->this_child)
1609 {
1610 for(i=0; i < nsd->child_count; ++i)
1611 if(nsd->children[i].child_fd != -1)
1612 {
1613 close(nsd->children[i].child_fd);
1614 nsd->children[i].child_fd = -1;
1615 }
1616 }
1617
1618 tsig_finalize();
1619 daemon_remote_delete(nsd->rc); /* ssl-delete secret keys */
1620 #ifdef HAVE_SSL
1621 if (nsd->tls_ctx)
1622 SSL_CTX_free(nsd->tls_ctx);
1623 #endif
1624
1625 #ifdef MEMCLEAN /* OS collects memory pages */
1626 #ifdef RATELIMIT
1627 rrl_mmap_deinit_keep_mmap();
1628 #endif
1629 #ifdef USE_DNSTAP
1630 dt_collector_destroy(nsd->dt_collector, nsd);
1631 #endif
1632 udb_base_free_keep_mmap(nsd->task[0]);
1633 udb_base_free_keep_mmap(nsd->task[1]);
1634 namedb_free_ixfr(nsd->db);
1635 namedb_close(nsd->db);
1636 nsd_options_destroy(nsd->options);
1637 region_destroy(nsd->region);
1638 #endif
1639 log_finalize();
1640 exit(0);
1641 }
1642
1643 void
server_prepare_xfrd(struct nsd * nsd)1644 server_prepare_xfrd(struct nsd* nsd)
1645 {
1646 char tmpfile[256];
1647 /* create task mmaps */
1648 nsd->mytask = 0;
1649 snprintf(tmpfile, sizeof(tmpfile), "%snsd-xfr-%d/nsd.%u.task.0",
1650 nsd->options->xfrdir, (int)getpid(), (unsigned)getpid());
1651 nsd->task[0] = task_file_create(tmpfile);
1652 if(!nsd->task[0]) {
1653 #ifdef USE_ZONE_STATS
1654 unlink(nsd->zonestatfname[0]);
1655 unlink(nsd->zonestatfname[1]);
1656 #endif
1657 #ifdef BIND8_STATS
1658 server_stat_free(nsd);
1659 #endif
1660 xfrd_del_tempdir(nsd);
1661 exit(1);
1662 }
1663 snprintf(tmpfile, sizeof(tmpfile), "%snsd-xfr-%d/nsd.%u.task.1",
1664 nsd->options->xfrdir, (int)getpid(), (unsigned)getpid());
1665 nsd->task[1] = task_file_create(tmpfile);
1666 if(!nsd->task[1]) {
1667 unlink(nsd->task[0]->fname);
1668 #ifdef USE_ZONE_STATS
1669 unlink(nsd->zonestatfname[0]);
1670 unlink(nsd->zonestatfname[1]);
1671 #endif
1672 #ifdef BIND8_STATS
1673 server_stat_free(nsd);
1674 #endif
1675 xfrd_del_tempdir(nsd);
1676 exit(1);
1677 }
1678 assert(udb_base_get_userdata(nsd->task[0])->data == 0);
1679 assert(udb_base_get_userdata(nsd->task[1])->data == 0);
1680 /* create xfrd listener structure */
1681 nsd->xfrd_listener = region_alloc(nsd->region,
1682 sizeof(netio_handler_type));
1683 nsd->xfrd_listener->user_data = (struct ipc_handler_conn_data*)
1684 region_alloc(nsd->region, sizeof(struct ipc_handler_conn_data));
1685 nsd->xfrd_listener->fd = -1;
1686 ((struct ipc_handler_conn_data*)nsd->xfrd_listener->user_data)->nsd =
1687 nsd;
1688 ((struct ipc_handler_conn_data*)nsd->xfrd_listener->user_data)->conn =
1689 xfrd_tcp_create(nsd->region, QIOBUFSZ);
1690 }
1691
1692
1693 void
server_start_xfrd(struct nsd * nsd,int del_db,int reload_active)1694 server_start_xfrd(struct nsd *nsd, int del_db, int reload_active)
1695 {
1696 pid_t pid;
1697 int sockets[2] = {0,0};
1698 struct ipc_handler_conn_data *data;
1699
1700 if(nsd->xfrd_listener->fd != -1)
1701 close(nsd->xfrd_listener->fd);
1702 if(del_db) {
1703 /* recreate taskdb that xfrd was using, it may be corrupt */
1704 /* we (or reload) use nsd->mytask, and xfrd uses the other */
1705 char* tmpfile = nsd->task[1-nsd->mytask]->fname;
1706 nsd->task[1-nsd->mytask]->fname = NULL;
1707 /* free alloc already, so udb does not shrink itself */
1708 udb_alloc_delete(nsd->task[1-nsd->mytask]->alloc);
1709 nsd->task[1-nsd->mytask]->alloc = NULL;
1710 udb_base_free(nsd->task[1-nsd->mytask]);
1711 /* create new file, overwrite the old one */
1712 nsd->task[1-nsd->mytask] = task_file_create(tmpfile);
1713 free(tmpfile);
1714 }
1715 if (socketpair(AF_UNIX, SOCK_STREAM, 0, sockets) == -1) {
1716 log_msg(LOG_ERR, "startxfrd failed on socketpair: %s", strerror(errno));
1717 return;
1718 }
1719 pid = fork();
1720 switch (pid) {
1721 case -1:
1722 log_msg(LOG_ERR, "fork xfrd failed: %s", strerror(errno));
1723 break;
1724 default:
1725 /* PARENT: close first socket, use second one */
1726 close(sockets[0]);
1727 if (fcntl(sockets[1], F_SETFL, O_NONBLOCK) == -1) {
1728 log_msg(LOG_ERR, "cannot fcntl pipe: %s", strerror(errno));
1729 }
1730 if(del_db) xfrd_free_namedb(nsd);
1731 /* use other task than I am using, since if xfrd died and is
1732 * restarted, the reload is using nsd->mytask */
1733 nsd->mytask = 1 - nsd->mytask;
1734
1735 #ifdef HAVE_SETPROCTITLE
1736 setproctitle("xfrd");
1737 #endif
1738 #ifdef USE_LOG_PROCESS_ROLE
1739 log_set_process_role("xfrd");
1740 #endif
1741 #ifdef HAVE_CPUSET_T
1742 if(nsd->use_cpu_affinity) {
1743 set_cpu_affinity(nsd->xfrd_cpuset);
1744 }
1745 #endif
1746
1747 xfrd_init(sockets[1], nsd, del_db, reload_active, pid);
1748 /* ENOTREACH */
1749 break;
1750 case 0:
1751 /* CHILD: close second socket, use first one */
1752 close(sockets[1]);
1753 if (fcntl(sockets[0], F_SETFL, O_NONBLOCK) == -1) {
1754 log_msg(LOG_ERR, "cannot fcntl pipe: %s", strerror(errno));
1755 }
1756 nsd->xfrd_listener->fd = sockets[0];
1757 #ifdef HAVE_SETPROCTITLE
1758 setproctitle("main");
1759 #endif
1760 #ifdef USE_LOG_PROCESS_ROLE
1761 log_set_process_role("main");
1762 #endif
1763 break;
1764 }
1765 /* server-parent only */
1766 nsd->xfrd_listener->timeout = NULL;
1767 nsd->xfrd_listener->event_types = NETIO_EVENT_READ;
1768 nsd->xfrd_listener->event_handler = parent_handle_xfrd_command;
1769 /* clear ongoing ipc reads */
1770 data = (struct ipc_handler_conn_data *) nsd->xfrd_listener->user_data;
1771 data->conn->is_reading = 0;
1772 }
1773
1774 /** add all soainfo to taskdb */
1775 static void
add_all_soa_to_task(struct nsd * nsd,struct udb_base * taskudb)1776 add_all_soa_to_task(struct nsd* nsd, struct udb_base* taskudb)
1777 {
1778 struct radnode* n;
1779 udb_ptr task_last; /* last task, mytask is empty so NULL */
1780 /* add all SOA INFO to mytask */
1781 udb_ptr_init(&task_last, taskudb);
1782 for(n=radix_first(nsd->db->zonetree); n; n=radix_next(n)) {
1783 task_new_soainfo(taskudb, &task_last, (zone_type*)n->elem, 0);
1784 }
1785 udb_ptr_unlink(&task_last, taskudb);
1786 }
1787
1788 void
server_send_soa_xfrd(struct nsd * nsd,int shortsoa)1789 server_send_soa_xfrd(struct nsd* nsd, int shortsoa)
1790 {
1791 /* normally this exchanges the SOA from nsd->xfrd and the expire back.
1792 * parent fills one taskdb with soas, xfrd fills other with expires.
1793 * then they exchange and process.
1794 * shortsoa: xfrd crashes and needs to be restarted and one taskdb
1795 * may be in use by reload. Fill SOA in taskdb and give to xfrd.
1796 * expire notifications can be sent back via a normal reload later
1797 * (xfrd will wait for current running reload to finish if any).
1798 */
1799 sig_atomic_t cmd = 0;
1800 pid_t mypid;
1801 int xfrd_sock = nsd->xfrd_listener->fd;
1802 struct udb_base* taskudb = nsd->task[nsd->mytask];
1803 udb_ptr t;
1804 if(!shortsoa) {
1805 if(nsd->signal_hint_shutdown) {
1806 shutdown:
1807 log_msg(LOG_WARNING, "signal received, shutting down...");
1808 server_close_all_sockets(nsd->udp, nsd->ifs);
1809 server_close_all_sockets(nsd->tcp, nsd->ifs);
1810 daemon_remote_close(nsd->rc);
1811 /* Unlink it if possible... */
1812 unlinkpid(nsd->pidfile);
1813 unlink(nsd->task[0]->fname);
1814 unlink(nsd->task[1]->fname);
1815 #ifdef USE_ZONE_STATS
1816 unlink(nsd->zonestatfname[0]);
1817 unlink(nsd->zonestatfname[1]);
1818 #endif
1819 #ifdef BIND8_STATS
1820 server_stat_free(nsd);
1821 #endif
1822 server_shutdown(nsd);
1823 /* ENOTREACH */
1824 exit(0);
1825 }
1826 }
1827 if(shortsoa) {
1828 /* put SOA in xfrd task because mytask may be in use */
1829 taskudb = nsd->task[1-nsd->mytask];
1830 }
1831
1832 add_all_soa_to_task(nsd, taskudb);
1833 if(!shortsoa) {
1834 /* wait for xfrd to signal task is ready, RELOAD signal */
1835 if(block_read(nsd, xfrd_sock, &cmd, sizeof(cmd), -1) != sizeof(cmd) ||
1836 cmd != NSD_RELOAD) {
1837 log_msg(LOG_ERR, "did not get start signal from xfrd");
1838 exit(1);
1839 }
1840 if(nsd->signal_hint_shutdown) {
1841 goto shutdown;
1842 }
1843 }
1844 /* give xfrd our task, signal it with RELOAD_DONE */
1845 task_process_sync(taskudb);
1846 cmd = NSD_RELOAD_DONE;
1847 if(!write_socket(xfrd_sock, &cmd, sizeof(cmd))) {
1848 log_msg(LOG_ERR, "problems sending soa end from reload %d to xfrd: %s",
1849 (int)nsd->pid, strerror(errno));
1850 }
1851 mypid = getpid();
1852 if(!write_socket(nsd->xfrd_listener->fd, &mypid, sizeof(mypid))) {
1853 log_msg(LOG_ERR, "problems sending reloadpid to xfrd: %s",
1854 strerror(errno));
1855 }
1856
1857 if(!shortsoa) {
1858 /* process the xfrd task works (expiry data) */
1859 nsd->mytask = 1 - nsd->mytask;
1860 taskudb = nsd->task[nsd->mytask];
1861 task_remap(taskudb);
1862 udb_ptr_new(&t, taskudb, udb_base_get_userdata(taskudb));
1863 while(!udb_ptr_is_null(&t)) {
1864 task_process_expire(nsd->db, TASKLIST(&t));
1865 udb_ptr_set_rptr(&t, taskudb, &TASKLIST(&t)->next);
1866 }
1867 udb_ptr_unlink(&t, taskudb);
1868 task_clear(taskudb);
1869
1870 /* tell xfrd that the task is emptied, signal with RELOAD_DONE */
1871 cmd = NSD_RELOAD_DONE;
1872 if(!write_socket(xfrd_sock, &cmd, sizeof(cmd))) {
1873 log_msg(LOG_ERR, "problems sending soa end from reload %d to xfrd: %s",
1874 (int)nsd->pid, strerror(errno));
1875 }
1876 }
1877 }
1878
1879 #ifdef HAVE_SSL
1880 static void
log_crypto_from_err(const char * str,unsigned long err)1881 log_crypto_from_err(const char* str, unsigned long err)
1882 {
1883 /* error:[error code]:[library name]:[function name]:[reason string] */
1884 char buf[128];
1885 unsigned long e;
1886 ERR_error_string_n(err, buf, sizeof(buf));
1887 log_msg(LOG_ERR, "%s crypto %s", str, buf);
1888 while( (e=ERR_get_error()) ) {
1889 ERR_error_string_n(e, buf, sizeof(buf));
1890 log_msg(LOG_ERR, "and additionally crypto %s", buf);
1891 }
1892 }
1893
1894 void
log_crypto_err(const char * str)1895 log_crypto_err(const char* str)
1896 {
1897 log_crypto_from_err(str, ERR_get_error());
1898 }
1899
1900 /** true if the ssl handshake error has to be squelched from the logs */
1901 static int
squelch_err_ssl_handshake(unsigned long err)1902 squelch_err_ssl_handshake(unsigned long err)
1903 {
1904 if(verbosity >= 3)
1905 return 0; /* only squelch on low verbosity */
1906 /* this is very specific, we could filter on ERR_GET_REASON()
1907 * (the third element in ERR_PACK) */
1908 if(err == ERR_PACK(ERR_LIB_SSL, SSL_F_SSL3_GET_RECORD, SSL_R_HTTPS_PROXY_REQUEST) ||
1909 err == ERR_PACK(ERR_LIB_SSL, SSL_F_SSL3_GET_RECORD, SSL_R_HTTP_REQUEST) ||
1910 err == ERR_PACK(ERR_LIB_SSL, SSL_F_SSL3_GET_RECORD, SSL_R_WRONG_VERSION_NUMBER) ||
1911 err == ERR_PACK(ERR_LIB_SSL, SSL_F_SSL3_READ_BYTES, SSL_R_SSLV3_ALERT_BAD_CERTIFICATE)
1912 #ifdef SSL_F_TLS_POST_PROCESS_CLIENT_HELLO
1913 || err == ERR_PACK(ERR_LIB_SSL, SSL_F_TLS_POST_PROCESS_CLIENT_HELLO, SSL_R_NO_SHARED_CIPHER)
1914 #endif
1915 #ifdef SSL_F_TLS_EARLY_POST_PROCESS_CLIENT_HELLO
1916 || err == ERR_PACK(ERR_LIB_SSL, SSL_F_TLS_EARLY_POST_PROCESS_CLIENT_HELLO, SSL_R_UNKNOWN_PROTOCOL)
1917 || err == ERR_PACK(ERR_LIB_SSL, SSL_F_TLS_EARLY_POST_PROCESS_CLIENT_HELLO, SSL_R_UNSUPPORTED_PROTOCOL)
1918 # ifdef SSL_R_VERSION_TOO_LOW
1919 || err == ERR_PACK(ERR_LIB_SSL, SSL_F_TLS_EARLY_POST_PROCESS_CLIENT_HELLO, SSL_R_VERSION_TOO_LOW)
1920 # endif
1921 #endif
1922 )
1923 return 1;
1924 return 0;
1925 }
1926
1927 void
perform_openssl_init(void)1928 perform_openssl_init(void)
1929 {
1930 /* init SSL library */
1931 #ifdef HAVE_ERR_LOAD_CRYPTO_STRINGS
1932 ERR_load_crypto_strings();
1933 #endif
1934 #if defined(HAVE_ERR_LOAD_SSL_STRINGS) && !defined(DEPRECATED_ERR_LOAD_SSL_STRINGS)
1935 ERR_load_SSL_strings();
1936 #endif
1937 #if OPENSSL_VERSION_NUMBER < 0x10100000 || !defined(HAVE_OPENSSL_INIT_CRYPTO)
1938 OpenSSL_add_all_algorithms();
1939 #else
1940 OPENSSL_init_crypto(OPENSSL_INIT_ADD_ALL_CIPHERS
1941 | OPENSSL_INIT_ADD_ALL_DIGESTS
1942 | OPENSSL_INIT_LOAD_CRYPTO_STRINGS, NULL);
1943 #endif
1944 #if OPENSSL_VERSION_NUMBER < 0x10100000 || !defined(HAVE_OPENSSL_INIT_SSL)
1945 (void)SSL_library_init();
1946 #else
1947 OPENSSL_init_ssl(0, NULL);
1948 #endif
1949
1950 if(!RAND_status()) {
1951 /* try to seed it */
1952 unsigned char buf[256];
1953 unsigned int v, seed=(unsigned)time(NULL) ^ (unsigned)getpid();
1954 size_t i;
1955 v = seed;
1956 for(i=0; i<256/sizeof(v); i++) {
1957 memmove(buf+i*sizeof(v), &v, sizeof(v));
1958 v = v*seed + (unsigned int)i;
1959 }
1960 RAND_seed(buf, 256);
1961 log_msg(LOG_WARNING, "warning: no entropy, seeding openssl PRNG with time");
1962 }
1963 }
1964
1965 static int
get_ocsp(char * filename,unsigned char ** ocsp)1966 get_ocsp(char *filename, unsigned char **ocsp)
1967 {
1968 BIO *bio;
1969 OCSP_RESPONSE *response;
1970 int len = -1;
1971 unsigned char *p, *buf;
1972 assert(filename);
1973
1974 if ((bio = BIO_new_file(filename, "r")) == NULL) {
1975 log_crypto_err("get_ocsp: BIO_new_file failed");
1976 return -1;
1977 }
1978
1979 if ((response = d2i_OCSP_RESPONSE_bio(bio, NULL)) == NULL) {
1980 log_crypto_err("get_ocsp: d2i_OCSP_RESPONSE_bio failed");
1981 BIO_free(bio);
1982 return -1;
1983 }
1984
1985 if ((len = i2d_OCSP_RESPONSE(response, NULL)) <= 0) {
1986 log_crypto_err("get_ocsp: i2d_OCSP_RESPONSE #1 failed");
1987 OCSP_RESPONSE_free(response);
1988 BIO_free(bio);
1989 return -1;
1990 }
1991
1992 if ((buf = malloc((size_t) len)) == NULL) {
1993 log_msg(LOG_ERR, "get_ocsp: malloc failed");
1994 OCSP_RESPONSE_free(response);
1995 BIO_free(bio);
1996 return -1;
1997 }
1998
1999 p = buf;
2000 if ((len = i2d_OCSP_RESPONSE(response, &p)) <= 0) {
2001 log_crypto_err("get_ocsp: i2d_OCSP_RESPONSE #2 failed");
2002 free(buf);
2003 OCSP_RESPONSE_free(response);
2004 BIO_free(bio);
2005 return -1;
2006 }
2007
2008 OCSP_RESPONSE_free(response);
2009 BIO_free(bio);
2010
2011 *ocsp = buf;
2012 return len;
2013 }
2014
2015 /* further setup ssl ctx after the keys are loaded */
2016 static void
listen_sslctx_setup_2(void * ctxt)2017 listen_sslctx_setup_2(void* ctxt)
2018 {
2019 SSL_CTX* ctx = (SSL_CTX*)ctxt;
2020 (void)ctx;
2021 #if HAVE_DECL_SSL_CTX_SET_ECDH_AUTO
2022 if(!SSL_CTX_set_ecdh_auto(ctx,1)) {
2023 /* ENOTREACH */
2024 log_crypto_err("Error in SSL_CTX_ecdh_auto, not enabling ECDHE");
2025 }
2026 #elif defined(HAVE_DECL_SSL_CTX_SET_TMP_ECDH) && defined(NID_X9_62_prime256v1) && defined(HAVE_EC_KEY_NEW_BY_CURVE_NAME)
2027 if(1) {
2028 EC_KEY *ecdh = EC_KEY_new_by_curve_name (NID_X9_62_prime256v1);
2029 if (!ecdh) {
2030 log_crypto_err("could not find p256, not enabling ECDHE");
2031 } else {
2032 if (1 != SSL_CTX_set_tmp_ecdh (ctx, ecdh)) {
2033 log_crypto_err("Error in SSL_CTX_set_tmp_ecdh, not enabling ECDHE");
2034 }
2035 EC_KEY_free (ecdh);
2036 }
2037 }
2038 #endif
2039 }
2040
2041 static int
add_ocsp_data_cb(SSL * s,void * ATTR_UNUSED (arg))2042 add_ocsp_data_cb(SSL *s, void* ATTR_UNUSED(arg))
2043 {
2044 if(ocspdata) {
2045 unsigned char *p;
2046 if ((p=malloc(ocspdata_len)) == NULL) {
2047 log_msg(LOG_ERR, "add_ocsp_data_cb: malloc failure");
2048 return SSL_TLSEXT_ERR_NOACK;
2049 }
2050 memcpy(p, ocspdata, ocspdata_len);
2051 if ((SSL_set_tlsext_status_ocsp_resp(s, p, ocspdata_len)) != 1) {
2052 log_crypto_err("Error in SSL_set_tlsext_status_ocsp_resp");
2053 free(p);
2054 return SSL_TLSEXT_ERR_NOACK;
2055 }
2056 return SSL_TLSEXT_ERR_OK;
2057 } else {
2058 return SSL_TLSEXT_ERR_NOACK;
2059 }
2060 }
2061
2062 SSL_CTX*
server_tls_ctx_setup(char * key,char * pem,char * verifypem)2063 server_tls_ctx_setup(char* key, char* pem, char* verifypem)
2064 {
2065 SSL_CTX *ctx = SSL_CTX_new(SSLv23_server_method());
2066 if(!ctx) {
2067 log_crypto_err("could not SSL_CTX_new");
2068 return NULL;
2069 }
2070 /* no SSLv2, SSLv3 because has defects */
2071 #if SSL_OP_NO_SSLv2 != 0
2072 if((SSL_CTX_set_options(ctx, SSL_OP_NO_SSLv2) & SSL_OP_NO_SSLv2) != SSL_OP_NO_SSLv2){
2073 log_crypto_err("could not set SSL_OP_NO_SSLv2");
2074 SSL_CTX_free(ctx);
2075 return NULL;
2076 }
2077 #endif
2078 if((SSL_CTX_set_options(ctx, SSL_OP_NO_SSLv3) & SSL_OP_NO_SSLv3)
2079 != SSL_OP_NO_SSLv3){
2080 log_crypto_err("could not set SSL_OP_NO_SSLv3");
2081 SSL_CTX_free(ctx);
2082 return 0;
2083 }
2084 #if defined(SSL_OP_NO_TLSv1) && defined(SSL_OP_NO_TLSv1_1)
2085 /* if we have tls 1.1 disable 1.0 */
2086 if((SSL_CTX_set_options(ctx, SSL_OP_NO_TLSv1) & SSL_OP_NO_TLSv1)
2087 != SSL_OP_NO_TLSv1){
2088 log_crypto_err("could not set SSL_OP_NO_TLSv1");
2089 SSL_CTX_free(ctx);
2090 return 0;
2091 }
2092 #endif
2093 #if defined(SSL_OP_NO_TLSv1_1) && defined(SSL_OP_NO_TLSv1_2)
2094 /* if we have tls 1.2 disable 1.1 */
2095 if((SSL_CTX_set_options(ctx, SSL_OP_NO_TLSv1_1) & SSL_OP_NO_TLSv1_1)
2096 != SSL_OP_NO_TLSv1_1){
2097 log_crypto_err("could not set SSL_OP_NO_TLSv1_1");
2098 SSL_CTX_free(ctx);
2099 return 0;
2100 }
2101 #endif
2102 #if defined(SSL_OP_NO_RENEGOTIATION)
2103 /* disable client renegotiation */
2104 if((SSL_CTX_set_options(ctx, SSL_OP_NO_RENEGOTIATION) &
2105 SSL_OP_NO_RENEGOTIATION) != SSL_OP_NO_RENEGOTIATION) {
2106 log_crypto_err("could not set SSL_OP_NO_RENEGOTIATION");
2107 SSL_CTX_free(ctx);
2108 return 0;
2109 }
2110 #endif
2111 #if defined(SHA256_DIGEST_LENGTH) && defined(SSL_TXT_CHACHA20)
2112 /* if we detect system-wide crypto policies, use those */
2113 if (access( "/etc/crypto-policies/config", F_OK ) != 0 ) {
2114 /* if we have sha256, set the cipher list to have no known vulns */
2115 if(!SSL_CTX_set_cipher_list(ctx, "ECDHE+AESGCM:ECDHE+CHACHA20"))
2116 log_crypto_err("could not set cipher list with SSL_CTX_set_cipher_list");
2117 }
2118 #endif
2119 if((SSL_CTX_set_options(ctx, SSL_OP_CIPHER_SERVER_PREFERENCE) &
2120 SSL_OP_CIPHER_SERVER_PREFERENCE) !=
2121 SSL_OP_CIPHER_SERVER_PREFERENCE) {
2122 log_crypto_err("could not set SSL_OP_CIPHER_SERVER_PREFERENCE");
2123 SSL_CTX_free(ctx);
2124 return 0;
2125 }
2126 #ifdef HAVE_SSL_CTX_SET_SECURITY_LEVEL
2127 SSL_CTX_set_security_level(ctx, 0);
2128 #endif
2129 if(!SSL_CTX_use_certificate_chain_file(ctx, pem)) {
2130 log_msg(LOG_ERR, "error for cert file: %s", pem);
2131 log_crypto_err("error in SSL_CTX use_certificate_chain_file");
2132 SSL_CTX_free(ctx);
2133 return NULL;
2134 }
2135 if(!SSL_CTX_use_PrivateKey_file(ctx, key, SSL_FILETYPE_PEM)) {
2136 log_msg(LOG_ERR, "error for private key file: %s", key);
2137 log_crypto_err("Error in SSL_CTX use_PrivateKey_file");
2138 SSL_CTX_free(ctx);
2139 return NULL;
2140 }
2141 if(!SSL_CTX_check_private_key(ctx)) {
2142 log_msg(LOG_ERR, "error for key file: %s", key);
2143 log_crypto_err("Error in SSL_CTX check_private_key");
2144 SSL_CTX_free(ctx);
2145 return NULL;
2146 }
2147 listen_sslctx_setup_2(ctx);
2148 if(verifypem && verifypem[0]) {
2149 if(!SSL_CTX_load_verify_locations(ctx, verifypem, NULL)) {
2150 log_crypto_err("Error in SSL_CTX verify locations");
2151 SSL_CTX_free(ctx);
2152 return NULL;
2153 }
2154 SSL_CTX_set_client_CA_list(ctx, SSL_load_client_CA_file(verifypem));
2155 SSL_CTX_set_verify(ctx, SSL_VERIFY_PEER, NULL);
2156 }
2157 return ctx;
2158 }
2159
2160 SSL_CTX*
server_tls_ctx_create(struct nsd * nsd,char * verifypem,char * ocspfile)2161 server_tls_ctx_create(struct nsd* nsd, char* verifypem, char* ocspfile)
2162 {
2163 char *key, *pem;
2164 SSL_CTX *ctx;
2165
2166 key = nsd->options->tls_service_key;
2167 pem = nsd->options->tls_service_pem;
2168 if(!key || key[0] == 0) {
2169 log_msg(LOG_ERR, "error: no tls-service-key file specified");
2170 return NULL;
2171 }
2172 if(!pem || pem[0] == 0) {
2173 log_msg(LOG_ERR, "error: no tls-service-pem file specified");
2174 return NULL;
2175 }
2176
2177 /* NOTE:This mimics the existing code in Unbound 1.5.1 by supporting SSL but
2178 * raft-ietf-uta-tls-bcp-08 recommends only using TLSv1.2*/
2179 ctx = server_tls_ctx_setup(key, pem, verifypem);
2180 if(!ctx) {
2181 log_msg(LOG_ERR, "could not setup server TLS context");
2182 return NULL;
2183 }
2184 if(ocspfile && ocspfile[0]) {
2185 if ((ocspdata_len = get_ocsp(ocspfile, &ocspdata)) < 0) {
2186 log_crypto_err("Error reading OCSPfile");
2187 SSL_CTX_free(ctx);
2188 return NULL;
2189 } else {
2190 VERBOSITY(2, (LOG_INFO, "ocspfile %s loaded", ocspfile));
2191 if(!SSL_CTX_set_tlsext_status_cb(ctx, add_ocsp_data_cb)) {
2192 log_crypto_err("Error in SSL_CTX_set_tlsext_status_cb");
2193 SSL_CTX_free(ctx);
2194 return NULL;
2195 }
2196 }
2197 }
2198 return ctx;
2199 }
2200
2201 /* check if tcp_handler_accept_data created for TLS dedicated port */
2202 int
using_tls_port(struct sockaddr * addr,const char * tls_port)2203 using_tls_port(struct sockaddr* addr, const char* tls_port)
2204 {
2205 in_port_t port = 0;
2206
2207 if (addr->sa_family == AF_INET)
2208 port = ((struct sockaddr_in*)addr)->sin_port;
2209 #ifndef HAVE_STRUCT_SOCKADDR_IN6
2210 else
2211 port = ((struct sockaddr_in6*)addr)->sin6_port;
2212 #endif /* HAVE_STRUCT_SOCKADDR_IN6 */
2213 if (atoi(tls_port) == ntohs(port))
2214 return 1;
2215
2216 return 0;
2217 }
2218 #endif
2219
2220 /* pass timeout=-1 for blocking. Returns size, 0, -1(err), or -2(timeout) */
2221 ssize_t
block_read(struct nsd * nsd,int s,void * p,ssize_t sz,int timeout)2222 block_read(struct nsd* nsd, int s, void* p, ssize_t sz, int timeout)
2223 {
2224 uint8_t* buf = (uint8_t*) p;
2225 ssize_t total = 0;
2226 struct pollfd fd;
2227 memset(&fd, 0, sizeof(fd));
2228 fd.fd = s;
2229 fd.events = POLLIN;
2230
2231 while( total < sz) {
2232 ssize_t ret;
2233 ret = poll(&fd, 1, (timeout==-1)?-1:timeout*1000);
2234 if(ret == -1) {
2235 if(errno == EAGAIN)
2236 /* blocking read */
2237 continue;
2238 if(errno == EINTR) {
2239 if(nsd && (nsd->signal_hint_quit || nsd->signal_hint_shutdown))
2240 return -1;
2241 /* other signals can be handled later */
2242 continue;
2243 }
2244 /* some error */
2245 return -1;
2246 }
2247 if(ret == 0) {
2248 /* operation timed out */
2249 return -2;
2250 }
2251 ret = read(s, buf+total, sz-total);
2252 if(ret == -1) {
2253 if(errno == EAGAIN)
2254 /* blocking read */
2255 continue;
2256 if(errno == EINTR) {
2257 if(nsd && (nsd->signal_hint_quit || nsd->signal_hint_shutdown))
2258 return -1;
2259 /* other signals can be handled later */
2260 continue;
2261 }
2262 /* some error */
2263 return -1;
2264 }
2265 if(ret == 0) {
2266 /* closed connection! */
2267 return 0;
2268 }
2269 total += ret;
2270 }
2271 return total;
2272 }
2273
2274 static void
reload_process_tasks(struct nsd * nsd,udb_ptr * last_task,int cmdsocket)2275 reload_process_tasks(struct nsd* nsd, udb_ptr* last_task, int cmdsocket)
2276 {
2277 sig_atomic_t cmd = NSD_QUIT_SYNC;
2278 udb_ptr t, next;
2279 udb_base* u = nsd->task[nsd->mytask];
2280 udb_ptr_init(&next, u);
2281 udb_ptr_new(&t, u, udb_base_get_userdata(u));
2282 udb_base_set_userdata(u, 0);
2283 while(!udb_ptr_is_null(&t)) {
2284 /* store next in list so this one can be deleted or reused */
2285 udb_ptr_set_rptr(&next, u, &TASKLIST(&t)->next);
2286 udb_rptr_zero(&TASKLIST(&t)->next, u);
2287
2288 /* process task t */
2289 /* append results for task t and update last_task */
2290 task_process_in_reload(nsd, u, last_task, &t);
2291
2292 /* go to next */
2293 udb_ptr_set_ptr(&t, u, &next);
2294
2295 /* if the parent has quit, we must quit too, poll the fd for cmds */
2296 if(block_read(nsd, cmdsocket, &cmd, sizeof(cmd), 0) == sizeof(cmd)) {
2297 DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: ipc command from main %d", (int)cmd));
2298 if(cmd == NSD_QUIT) {
2299 DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: quit to follow nsd"));
2300 /* unlink files of remainder of tasks */
2301 while(!udb_ptr_is_null(&t)) {
2302 if(TASKLIST(&t)->task_type == task_apply_xfr) {
2303 xfrd_unlink_xfrfile(nsd, TASKLIST(&t)->yesno);
2304 }
2305 udb_ptr_set_rptr(&t, u, &TASKLIST(&t)->next);
2306 }
2307 udb_ptr_unlink(&t, u);
2308 udb_ptr_unlink(&next, u);
2309 exit(0);
2310 }
2311 }
2312
2313 }
2314 udb_ptr_unlink(&t, u);
2315 udb_ptr_unlink(&next, u);
2316 }
2317
2318 void server_verify(struct nsd *nsd, int cmdsocket);
2319
2320 struct quit_sync_event_data {
2321 struct event_base* base;
2322 size_t read;
2323 union {
2324 uint8_t buf[sizeof(sig_atomic_t)];
2325 sig_atomic_t cmd;
2326 } to_read;
2327 };
2328
server_reload_handle_sigchld(int sig,short event,void * ATTR_UNUSED (arg))2329 static void server_reload_handle_sigchld(int sig, short event,
2330 void* ATTR_UNUSED(arg))
2331 {
2332 assert(sig == SIGCHLD);
2333 assert(event & EV_SIGNAL);
2334
2335 /* reap the exited old-serve child(s) */
2336 while(waitpid(-1, NULL, WNOHANG) > 0) {
2337 /* pass */
2338 }
2339 }
2340
server_reload_handle_quit_sync_ack(int cmdsocket,short event,void * arg)2341 static void server_reload_handle_quit_sync_ack(int cmdsocket, short event,
2342 void* arg)
2343 {
2344 struct quit_sync_event_data* cb_data =
2345 (struct quit_sync_event_data*)arg;
2346 ssize_t r;
2347
2348 if(event & EV_TIMEOUT) {
2349 sig_atomic_t cmd = NSD_QUIT_SYNC;
2350
2351 DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: ipc send quit to main"));
2352 if (!write_socket(cmdsocket, &cmd, sizeof(cmd))) {
2353 log_msg(LOG_ERR, "problems sending command from "
2354 "reload to old-main: %s", strerror(errno));
2355 }
2356 /* Wait for cmdsocket to become readable or for next timeout,
2357 * (this works because event is added EV_TIMEOUT|EV_PERSIST).
2358 */
2359 return;
2360 }
2361 assert(event & EV_READ);
2362 assert(cb_data->read < sizeof(cb_data->to_read.cmd));
2363
2364 r = read(cmdsocket, cb_data->to_read.buf + cb_data->read,
2365 sizeof(cb_data->to_read.cmd) - cb_data->read);
2366 if(r == 0) {
2367 log_msg(LOG_ERR, "reload: old-main quit during quit sync");
2368 cb_data->to_read.cmd = NSD_RELOAD;
2369
2370 } else if(r == -1) {
2371 if(errno == EAGAIN || errno == EINTR)
2372 return;
2373
2374 log_msg(LOG_ERR, "reload: could not wait for parent to quit: "
2375 "%s", strerror(errno));
2376 cb_data->to_read.cmd = NSD_RELOAD;
2377
2378 } else if (cb_data->read + r < sizeof(cb_data->to_read.cmd)) {
2379 /* More to read */
2380 cb_data->read += r;
2381 return;
2382
2383 } else {
2384 assert(cb_data->read + r == sizeof(cb_data->to_read.cmd));
2385 DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: ipc reply main %d",
2386 (int)cb_data->to_read.cmd));
2387 }
2388 /* Done */
2389 event_base_loopexit(cb_data->base, NULL);
2390 }
2391
2392 /*
2393 * Reload the database, stop parent, re-fork children and continue.
2394 * as server_main.
2395 */
2396 static void
server_reload(struct nsd * nsd,region_type * server_region,netio_type * netio,int cmdsocket)2397 server_reload(struct nsd *nsd, region_type* server_region, netio_type* netio,
2398 int cmdsocket)
2399 {
2400 pid_t mypid;
2401 sig_atomic_t cmd;
2402 udb_ptr last_task;
2403 struct sigaction old_sigchld, ign_sigchld;
2404 struct radnode* node;
2405 zone_type* zone;
2406 enum soainfo_hint hint;
2407 struct quit_sync_event_data cb_data;
2408 struct event signal_event, cmd_event;
2409 struct timeval reload_sync_timeout;
2410
2411 /* ignore SIGCHLD from the previous server_main that used this pid */
2412 memset(&ign_sigchld, 0, sizeof(ign_sigchld));
2413 ign_sigchld.sa_handler = SIG_IGN;
2414 sigaction(SIGCHLD, &ign_sigchld, &old_sigchld);
2415
2416 #ifdef HAVE_CPUSET_T
2417 if(nsd->use_cpu_affinity) {
2418 set_cpu_affinity(nsd->cpuset);
2419 }
2420 #endif
2421
2422 /* see what tasks we got from xfrd */
2423 task_remap(nsd->task[nsd->mytask]);
2424 udb_ptr_init(&last_task, nsd->task[nsd->mytask]);
2425 reload_process_tasks(nsd, &last_task, cmdsocket);
2426
2427 #ifndef NDEBUG
2428 if(nsd_debug_level >= 1)
2429 region_log_stats(nsd->db->region);
2430 #endif /* NDEBUG */
2431 initialize_dname_compression_tables(nsd);
2432
2433 #ifdef BIND8_STATS
2434 /* Restart dumping stats if required. */
2435 time(&nsd->st->boot);
2436 set_bind8_alarm(nsd);
2437 /* Switch to a different set of stat array for new server processes,
2438 * because they can briefly coexist with the old processes. They
2439 * have their own stat structure. */
2440 nsd->stat_current = (nsd->stat_current==0?1:0);
2441 #endif
2442 #ifdef USE_ZONE_STATS
2443 server_zonestat_realloc(nsd); /* realloc for new children */
2444 server_zonestat_switch(nsd);
2445 #endif
2446
2447 if(nsd->options->verify_enable) {
2448 #ifdef RATELIMIT
2449 /* allocate resources for rate limiting. use a slot that is guaranteed
2450 not mapped to a file so no persistent data is overwritten */
2451 rrl_init(nsd->child_count + 1);
2452 #endif
2453
2454 /* spin-up server and execute verifiers for each zone */
2455 server_verify(nsd, cmdsocket);
2456 #ifdef RATELIMIT
2457 /* deallocate rate limiting resources */
2458 rrl_deinit(nsd->child_count + 1);
2459 #endif
2460 }
2461
2462 for(node = radix_first(nsd->db->zonetree);
2463 node != NULL;
2464 node = radix_next(node))
2465 {
2466 zone = (zone_type *)node->elem;
2467 if(zone->is_updated) {
2468 if(zone->is_bad) {
2469 nsd->mode = NSD_RELOAD_FAILED;
2470 hint = soainfo_bad;
2471 } else {
2472 hint = soainfo_ok;
2473 }
2474 /* update(s), verified or not, possibly with subsequent
2475 skipped update(s). skipped update(s) are picked up
2476 by failed update check in xfrd */
2477 task_new_soainfo(nsd->task[nsd->mytask], &last_task,
2478 zone, hint);
2479 } else if(zone->is_skipped) {
2480 /* corrupt or inconsistent update without preceding
2481 update(s), communicate soainfo_gone */
2482 task_new_soainfo(nsd->task[nsd->mytask], &last_task,
2483 zone, soainfo_gone);
2484 }
2485 zone->is_updated = 0;
2486 zone->is_skipped = 0;
2487 }
2488
2489 if(nsd->mode == NSD_RELOAD_FAILED) {
2490 exit(NSD_RELOAD_FAILED);
2491 }
2492
2493 /* listen for the signals of failed children again */
2494 sigaction(SIGCHLD, &old_sigchld, NULL);
2495 #ifdef USE_DNSTAP
2496 if (nsd->dt_collector) {
2497 int *swap_fd_send;
2498 DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: swap dnstap collector pipes"));
2499 /* Swap fd_send with fd_swap so old serve child and new serve
2500 * childs will not write to the same pipe ends simultaneously */
2501 swap_fd_send = nsd->dt_collector_fd_send;
2502 nsd->dt_collector_fd_send = nsd->dt_collector_fd_swap;
2503 nsd->dt_collector_fd_swap = swap_fd_send;
2504
2505 }
2506 #endif
2507 /* Start new child processes */
2508 if (server_start_children(nsd, server_region, netio, &nsd->
2509 xfrd_listener->fd) != 0) {
2510 send_children_quit(nsd);
2511 exit(1);
2512 }
2513
2514 /* if the old-main has quit, we must quit too, poll the fd for cmds */
2515 if(block_read(nsd, cmdsocket, &cmd, sizeof(cmd), 0) == sizeof(cmd)) {
2516 DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: ipc command from main %d", (int)cmd));
2517 if(cmd == NSD_QUIT) {
2518 DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: quit to follow nsd"));
2519 send_children_quit(nsd);
2520 exit(0);
2521 }
2522 }
2523
2524 /* Send quit command to old-main: blocking, wait for receipt.
2525 * The old-main process asks the old-serve processes to quit, however
2526 * if a reload succeeded before, this process is the parent of the
2527 * old-serve processes, so we need to reap the children for it.
2528 */
2529 DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: ipc send quit to main"));
2530 cmd = NSD_QUIT_SYNC;
2531 if (!write_socket(cmdsocket, &cmd, sizeof(cmd)))
2532 {
2533 log_msg(LOG_ERR, "problems sending command from reload to oldnsd: %s",
2534 strerror(errno));
2535 }
2536
2537 reload_sync_timeout.tv_sec = RELOAD_SYNC_TIMEOUT;
2538 reload_sync_timeout.tv_usec = 0;
2539
2540 cb_data.base = nsd_child_event_base();
2541 cb_data.to_read.cmd = cmd;
2542 cb_data.read = 0;
2543
2544 event_set(&signal_event, SIGCHLD, EV_SIGNAL|EV_PERSIST,
2545 server_reload_handle_sigchld, NULL);
2546 if(event_base_set(cb_data.base, &signal_event) != 0
2547 || event_add(&signal_event, NULL) != 0) {
2548 log_msg(LOG_ERR, "NSD quit sync: could not add signal event");
2549 }
2550
2551 event_set(&cmd_event, cmdsocket, EV_READ|EV_TIMEOUT|EV_PERSIST,
2552 server_reload_handle_quit_sync_ack, &cb_data);
2553 if(event_base_set(cb_data.base, &cmd_event) != 0
2554 || event_add(&cmd_event, &reload_sync_timeout) != 0) {
2555 log_msg(LOG_ERR, "NSD quit sync: could not add command event");
2556 }
2557
2558 /* short-lived main loop */
2559 event_base_dispatch(cb_data.base);
2560
2561 /* remove command and signal event handlers */
2562 event_del(&cmd_event);
2563 event_del(&signal_event);
2564 event_base_free(cb_data.base);
2565 cmd = cb_data.to_read.cmd;
2566
2567 if(cmd == NSD_QUIT) {
2568 /* small race condition possible here, parent got quit cmd. */
2569 send_children_quit(nsd);
2570 exit(1);
2571 }
2572 assert(cmd == NSD_RELOAD);
2573 udb_ptr_unlink(&last_task, nsd->task[nsd->mytask]);
2574 task_process_sync(nsd->task[nsd->mytask]);
2575 #ifdef USE_ZONE_STATS
2576 server_zonestat_realloc(nsd); /* realloc for next children */
2577 #endif
2578
2579 /* send soainfo to the xfrd process, signal it that reload is done,
2580 * it picks up the taskudb */
2581 cmd = NSD_RELOAD_DONE;
2582 if(!write_socket(nsd->xfrd_listener->fd, &cmd, sizeof(cmd))) {
2583 log_msg(LOG_ERR, "problems sending reload_done xfrd: %s",
2584 strerror(errno));
2585 }
2586 mypid = getpid();
2587 if(!write_socket(nsd->xfrd_listener->fd, &mypid, sizeof(mypid))) {
2588 log_msg(LOG_ERR, "problems sending reloadpid to xfrd: %s",
2589 strerror(errno));
2590 }
2591
2592 /* try to reopen file */
2593 if (nsd->file_rotation_ok)
2594 log_reopen(nsd->log_filename, 1);
2595 /* exit reload, continue as new server_main */
2596 }
2597
2598 /*
2599 * Get the mode depending on the signal hints that have been received.
2600 * Multiple signal hints can be received and will be handled in turn.
2601 */
2602 static sig_atomic_t
server_signal_mode(struct nsd * nsd)2603 server_signal_mode(struct nsd *nsd)
2604 {
2605 if(nsd->signal_hint_quit) {
2606 nsd->signal_hint_quit = 0;
2607 return NSD_QUIT;
2608 }
2609 else if(nsd->signal_hint_shutdown) {
2610 nsd->signal_hint_shutdown = 0;
2611 return NSD_SHUTDOWN;
2612 }
2613 else if(nsd->signal_hint_child) {
2614 nsd->signal_hint_child = 0;
2615 return NSD_REAP_CHILDREN;
2616 }
2617 else if(nsd->signal_hint_reload) {
2618 nsd->signal_hint_reload = 0;
2619 return NSD_RELOAD;
2620 }
2621 else if(nsd->signal_hint_reload_hup) {
2622 nsd->signal_hint_reload_hup = 0;
2623 return NSD_RELOAD_REQ;
2624 }
2625 else if(nsd->signal_hint_stats) {
2626 nsd->signal_hint_stats = 0;
2627 #ifdef BIND8_STATS
2628 set_bind8_alarm(nsd);
2629 #endif
2630 return NSD_STATS;
2631 }
2632 else if(nsd->signal_hint_statsusr) {
2633 nsd->signal_hint_statsusr = 0;
2634 return NSD_STATS;
2635 }
2636 return NSD_RUN;
2637 }
2638
2639 /*
2640 * The main server simply waits for signals and child processes to
2641 * terminate. Child processes are restarted as necessary.
2642 */
2643 void
server_main(struct nsd * nsd)2644 server_main(struct nsd *nsd)
2645 {
2646 region_type *server_region = region_create(xalloc, free);
2647 netio_type *netio = netio_create(server_region);
2648 netio_handler_type reload_listener;
2649 int reload_sockets[2] = {-1, -1};
2650 struct timespec timeout_spec;
2651 int status;
2652 pid_t child_pid;
2653 pid_t reload_pid = -1;
2654 sig_atomic_t mode;
2655
2656 /* Ensure we are the main process */
2657 assert(nsd->server_kind == NSD_SERVER_MAIN);
2658
2659 /* Add listener for the XFRD process */
2660 netio_add_handler(netio, nsd->xfrd_listener);
2661
2662 #ifdef BIND8_STATS
2663 nsd->st = &nsd->stat_map[0];
2664 nsd->st->db_disk = 0;
2665 nsd->st->db_mem = region_get_mem(nsd->db->region);
2666 #endif
2667
2668 /* Start the child processes that handle incoming queries */
2669 if (server_start_children(nsd, server_region, netio,
2670 &nsd->xfrd_listener->fd) != 0) {
2671 send_children_quit(nsd);
2672 exit(1);
2673 }
2674 reload_listener.fd = -1;
2675
2676 /* This_child MUST be 0, because this is the parent process */
2677 assert(nsd->this_child == 0);
2678
2679 /* Run the server until we get a shutdown signal */
2680 while ((mode = nsd->mode) != NSD_SHUTDOWN) {
2681 /* Did we receive a signal that changes our mode? */
2682 if(mode == NSD_RUN) {
2683 nsd->mode = mode = server_signal_mode(nsd);
2684 }
2685
2686 switch (mode) {
2687 case NSD_RUN:
2688 /* see if any child processes terminated */
2689 while((child_pid = waitpid(-1, &status, WNOHANG)) != -1 && child_pid != 0) {
2690 int is_child = delete_child_pid(nsd, child_pid);
2691 if (is_child != -1 && nsd->children[is_child].need_to_exit) {
2692 if(nsd->children[is_child].child_fd == -1)
2693 nsd->children[is_child].has_exited = 1;
2694 parent_check_all_children_exited(nsd);
2695 } else if(is_child != -1) {
2696 log_msg(LOG_WARNING,
2697 "server %d died unexpectedly with status %d, restarting",
2698 (int) child_pid, status);
2699 restart_child_servers(nsd, server_region, netio,
2700 &nsd->xfrd_listener->fd);
2701 } else if (child_pid == reload_pid) {
2702 sig_atomic_t cmd = NSD_RELOAD_FAILED;
2703 pid_t mypid;
2704 log_msg(LOG_WARNING,
2705 "Reload process %d failed with status %d, continuing with old database",
2706 (int) child_pid, status);
2707 #ifdef HAVE_SETPROCTITLE
2708 setproctitle("main");
2709 #endif
2710 #ifdef USE_LOG_PROCESS_ROLE
2711 log_set_process_role("main");
2712 #endif
2713 reload_pid = -1;
2714 if(reload_listener.fd != -1) close(reload_listener.fd);
2715 netio_remove_handler(netio, &reload_listener);
2716 reload_listener.fd = -1;
2717 reload_listener.event_types = NETIO_EVENT_NONE;
2718 task_process_sync(nsd->task[nsd->mytask]);
2719 /* inform xfrd reload attempt ended */
2720 if(!write_socket(nsd->xfrd_listener->fd,
2721 &cmd, sizeof(cmd))) {
2722 log_msg(LOG_ERR, "problems "
2723 "sending SOAEND to xfrd: %s",
2724 strerror(errno));
2725 }
2726 mypid = getpid();
2727 if(!write_socket(nsd->xfrd_listener->fd, &mypid, sizeof(mypid))) {
2728 log_msg(LOG_ERR, "problems sending reloadpid to xfrd: %s",
2729 strerror(errno));
2730 }
2731 #ifdef USE_DNSTAP
2732 } else if(nsd->dt_collector && child_pid == nsd->dt_collector->dt_pid) {
2733 log_msg(LOG_WARNING,
2734 "dnstap-collector %d terminated with status %d",
2735 (int) child_pid, status);
2736 if(nsd->dt_collector) {
2737 dt_collector_close(nsd->dt_collector, nsd);
2738 dt_collector_destroy(nsd->dt_collector, nsd);
2739 nsd->dt_collector = NULL;
2740 }
2741 /* Only respawn a crashed (or exited)
2742 * dnstap-collector when not reloading,
2743 * to not induce a reload during a
2744 * reload (which would seriously
2745 * disrupt nsd procedures and lead to
2746 * unpredictable results)!
2747 *
2748 * This will *leave* a dnstap-collector
2749 * process terminated, but because
2750 * signalling of the reload process to
2751 * the main process to respawn in this
2752 * situation will be cumbersome, and
2753 * because this situation is so
2754 * specific (and therefore hopefully
2755 * extremely rare or non-existing at
2756 * all), plus the fact that we are left
2757 * with a perfectly function NSD
2758 * (besides not logging dnstap
2759 * messages), I consider it acceptable
2760 * to leave this unresolved.
2761 */
2762 if(reload_pid == -1 && nsd->options->dnstap_enable) {
2763 nsd->dt_collector = dt_collector_create(nsd);
2764 dt_collector_start(nsd->dt_collector, nsd);
2765 nsd->mode = NSD_RELOAD_REQ;
2766 }
2767 #endif
2768 } else if(status != 0) {
2769 /* check for status, because we get
2770 * the old-servermain because reload
2771 * is the process-parent of old-main,
2772 * and we get older server-processes
2773 * that are exiting after a reload */
2774 log_msg(LOG_WARNING,
2775 "process %d terminated with status %d",
2776 (int) child_pid, status);
2777 }
2778 }
2779 if (child_pid == -1) {
2780 if (errno == EINTR) {
2781 continue;
2782 }
2783 if (errno != ECHILD)
2784 log_msg(LOG_WARNING, "wait failed: %s", strerror(errno));
2785 }
2786 if (nsd->mode != NSD_RUN)
2787 break;
2788
2789 /* timeout to collect processes. In case no sigchild happens. */
2790 timeout_spec.tv_sec = 1;
2791 timeout_spec.tv_nsec = 0;
2792
2793 /* listen on ports, timeout for collecting terminated children */
2794 if(netio_dispatch(netio, &timeout_spec, 0) == -1) {
2795 if (errno != EINTR) {
2796 log_msg(LOG_ERR, "netio_dispatch failed: %s", strerror(errno));
2797 }
2798 }
2799 if(nsd->restart_children) {
2800 restart_child_servers(nsd, server_region, netio,
2801 &nsd->xfrd_listener->fd);
2802 nsd->restart_children = 0;
2803 }
2804 if(nsd->reload_failed) {
2805 sig_atomic_t cmd = NSD_RELOAD_FAILED;
2806 pid_t mypid;
2807 nsd->reload_failed = 0;
2808 log_msg(LOG_WARNING,
2809 "Reload process %d failed, continuing with old database",
2810 (int) reload_pid);
2811 #ifdef HAVE_SETPROCTITLE
2812 setproctitle("main");
2813 #endif
2814 #ifdef USE_LOG_PROCESS_ROLE
2815 log_set_process_role("main");
2816 #endif
2817 reload_pid = -1;
2818 if(reload_listener.fd != -1) close(reload_listener.fd);
2819 netio_remove_handler(netio, &reload_listener);
2820 reload_listener.fd = -1;
2821 reload_listener.event_types = NETIO_EVENT_NONE;
2822 task_process_sync(nsd->task[nsd->mytask]);
2823 /* inform xfrd reload attempt ended */
2824 if(!write_socket(nsd->xfrd_listener->fd,
2825 &cmd, sizeof(cmd))) {
2826 log_msg(LOG_ERR, "problems "
2827 "sending SOAEND to xfrd: %s",
2828 strerror(errno));
2829 }
2830 mypid = getpid();
2831 if(!write_socket(nsd->xfrd_listener->fd, &mypid, sizeof(mypid))) {
2832 log_msg(LOG_ERR, "problems sending reloadpid to xfrd: %s",
2833 strerror(errno));
2834 }
2835 }
2836
2837 break;
2838 case NSD_RELOAD_REQ: {
2839 sig_atomic_t cmd = NSD_RELOAD_REQ;
2840 log_msg(LOG_WARNING, "SIGHUP received, reloading...");
2841 DEBUG(DEBUG_IPC,1, (LOG_INFO,
2842 "main: ipc send reload_req to xfrd"));
2843 if(!write_socket(nsd->xfrd_listener->fd,
2844 &cmd, sizeof(cmd))) {
2845 log_msg(LOG_ERR, "server_main: could not send "
2846 "reload_req to xfrd: %s", strerror(errno));
2847 }
2848 nsd->mode = NSD_RUN;
2849 } break;
2850 case NSD_RELOAD:
2851 /* Continue to run nsd after reload */
2852 nsd->mode = NSD_RUN;
2853 DEBUG(DEBUG_IPC,1, (LOG_INFO, "reloading..."));
2854 if (reload_pid != -1) {
2855 log_msg(LOG_WARNING, "Reload already in progress (pid = %d)",
2856 (int) reload_pid);
2857 break;
2858 }
2859
2860 /* switch the mytask to keep track of who owns task*/
2861 nsd->mytask = 1 - nsd->mytask;
2862 if (socketpair(AF_UNIX, SOCK_STREAM, 0, reload_sockets) == -1) {
2863 log_msg(LOG_ERR, "reload failed on socketpair: %s", strerror(errno));
2864 reload_pid = -1;
2865 break;
2866 }
2867
2868 /* Do actual reload */
2869 reload_pid = fork();
2870 switch (reload_pid) {
2871 case -1:
2872 log_msg(LOG_ERR, "fork failed: %s", strerror(errno));
2873 break;
2874 default:
2875 /* PARENT */
2876 close(reload_sockets[0]);
2877 #ifdef HAVE_SETPROCTITLE
2878 setproctitle("load");
2879 #endif
2880 #ifdef USE_LOG_PROCESS_ROLE
2881 log_set_process_role("load");
2882 #endif
2883 server_reload(nsd, server_region, netio,
2884 reload_sockets[1]);
2885 DEBUG(DEBUG_IPC,2, (LOG_INFO, "Reload exited to become new main"));
2886 #ifdef HAVE_SETPROCTITLE
2887 setproctitle("main");
2888 #endif
2889 #ifdef USE_LOG_PROCESS_ROLE
2890 log_set_process_role("main");
2891 #endif
2892 close(reload_sockets[1]);
2893 DEBUG(DEBUG_IPC,2, (LOG_INFO, "Reload closed"));
2894 /* drop stale xfrd ipc data */
2895 ((struct ipc_handler_conn_data*)nsd->
2896 xfrd_listener->user_data)
2897 ->conn->is_reading = 0;
2898 reload_pid = -1;
2899 reload_listener.fd = -1;
2900 reload_listener.event_types = NETIO_EVENT_NONE;
2901 DEBUG(DEBUG_IPC,2, (LOG_INFO, "Reload resetup; run"));
2902 break;
2903 case 0:
2904 /* CHILD */
2905 /* server_main keep running until NSD_QUIT_SYNC
2906 * received from reload. */
2907 close(reload_sockets[1]);
2908 #ifdef HAVE_SETPROCTITLE
2909 setproctitle("old-main");
2910 #endif
2911 #ifdef USE_LOG_PROCESS_ROLE
2912 log_set_process_role("old-main");
2913 #endif
2914 reload_listener.fd = reload_sockets[0];
2915 reload_listener.timeout = NULL;
2916 reload_listener.user_data = nsd;
2917 reload_listener.event_types = NETIO_EVENT_READ;
2918 reload_listener.event_handler = parent_handle_reload_command; /* listens to Quit */
2919 netio_add_handler(netio, &reload_listener);
2920 reload_pid = getppid();
2921 break;
2922 }
2923 break;
2924 case NSD_QUIT_SYNC:
2925 /* synchronisation of xfrd, parent and reload */
2926 if(!nsd->quit_sync_done && reload_listener.fd != -1) {
2927 sig_atomic_t cmd = NSD_RELOAD;
2928 /* stop xfrd ipc writes in progress */
2929 DEBUG(DEBUG_IPC,1, (LOG_INFO,
2930 "main: ipc send indication reload"));
2931 if(!write_socket(nsd->xfrd_listener->fd,
2932 &cmd, sizeof(cmd))) {
2933 log_msg(LOG_ERR, "server_main: could not send reload "
2934 "indication to xfrd: %s", strerror(errno));
2935 }
2936 /* wait for ACK from xfrd */
2937 DEBUG(DEBUG_IPC,1, (LOG_INFO, "main: wait ipc reply xfrd"));
2938 nsd->quit_sync_done = 1;
2939 }
2940 nsd->mode = NSD_RUN;
2941 break;
2942 case NSD_QUIT:
2943 /* silent shutdown during reload */
2944 if(reload_listener.fd != -1) {
2945 /* acknowledge the quit, to sync reload that we will really quit now */
2946 sig_atomic_t cmd = NSD_RELOAD;
2947 DEBUG(DEBUG_IPC,1, (LOG_INFO, "main: ipc ack reload"));
2948 if(!write_socket(reload_listener.fd, &cmd, sizeof(cmd))) {
2949 log_msg(LOG_ERR, "server_main: "
2950 "could not ack quit: %s", strerror(errno));
2951 }
2952 close(reload_listener.fd);
2953 }
2954 DEBUG(DEBUG_IPC,1, (LOG_INFO, "server_main: shutdown sequence"));
2955 /* only quit children after xfrd has acked */
2956 send_children_quit(nsd);
2957
2958 #ifdef MEMCLEAN /* OS collects memory pages */
2959 region_destroy(server_region);
2960 #endif
2961 server_shutdown(nsd);
2962
2963 /* ENOTREACH */
2964 break;
2965 case NSD_SHUTDOWN:
2966 break;
2967 case NSD_REAP_CHILDREN:
2968 /* continue; wait for child in run loop */
2969 nsd->mode = NSD_RUN;
2970 break;
2971 case NSD_STATS:
2972 #ifdef BIND8_STATS
2973 set_children_stats(nsd);
2974 #endif
2975 nsd->mode = NSD_RUN;
2976 break;
2977 default:
2978 log_msg(LOG_WARNING, "NSD main server mode invalid: %d", (int)nsd->mode);
2979 nsd->mode = NSD_RUN;
2980 break;
2981 }
2982 }
2983 log_msg(LOG_WARNING, "signal received, shutting down...");
2984
2985 /* close opened ports to avoid race with restart of nsd */
2986 server_close_all_sockets(nsd->udp, nsd->ifs);
2987 server_close_all_sockets(nsd->tcp, nsd->ifs);
2988 daemon_remote_close(nsd->rc);
2989 send_children_quit_and_wait(nsd);
2990
2991 /* Unlink it if possible... */
2992 unlinkpid(nsd->pidfile);
2993 unlink(nsd->task[0]->fname);
2994 unlink(nsd->task[1]->fname);
2995 #ifdef USE_ZONE_STATS
2996 unlink(nsd->zonestatfname[0]);
2997 unlink(nsd->zonestatfname[1]);
2998 #endif
2999 #ifdef BIND8_STATS
3000 server_stat_free(nsd);
3001 #endif
3002 #ifdef USE_DNSTAP
3003 dt_collector_close(nsd->dt_collector, nsd);
3004 #endif
3005
3006 if(reload_listener.fd != -1) {
3007 sig_atomic_t cmd = NSD_QUIT;
3008 DEBUG(DEBUG_IPC,1, (LOG_INFO,
3009 "main: ipc send quit to reload-process"));
3010 if(!write_socket(reload_listener.fd, &cmd, sizeof(cmd))) {
3011 log_msg(LOG_ERR, "server_main: could not send quit to reload: %s",
3012 strerror(errno));
3013 }
3014 fsync(reload_listener.fd);
3015 close(reload_listener.fd);
3016 /* wait for reload to finish processing */
3017 while(1) {
3018 if(waitpid(reload_pid, NULL, 0) == -1) {
3019 if(errno == EINTR) continue;
3020 if(errno == ECHILD) break;
3021 log_msg(LOG_ERR, "waitpid(reload %d): %s",
3022 (int)reload_pid, strerror(errno));
3023 }
3024 break;
3025 }
3026 }
3027 if(nsd->xfrd_listener->fd != -1) {
3028 /* complete quit, stop xfrd */
3029 sig_atomic_t cmd = NSD_QUIT;
3030 DEBUG(DEBUG_IPC,1, (LOG_INFO,
3031 "main: ipc send quit to xfrd"));
3032 if(!write_socket(nsd->xfrd_listener->fd, &cmd, sizeof(cmd))) {
3033 log_msg(LOG_ERR, "server_main: could not send quit to xfrd: %s",
3034 strerror(errno));
3035 }
3036 fsync(nsd->xfrd_listener->fd);
3037 close(nsd->xfrd_listener->fd);
3038 (void)kill(nsd->pid, SIGTERM);
3039 }
3040
3041 #ifdef MEMCLEAN /* OS collects memory pages */
3042 region_destroy(server_region);
3043 #endif
3044 server_shutdown(nsd);
3045 }
3046
3047 static query_state_type
server_process_query(struct nsd * nsd,struct query * query,uint32_t * now_p)3048 server_process_query(struct nsd *nsd, struct query *query, uint32_t *now_p)
3049 {
3050 return query_process(query, nsd, now_p);
3051 }
3052
3053 static query_state_type
server_process_query_udp(struct nsd * nsd,struct query * query,uint32_t * now_p)3054 server_process_query_udp(struct nsd *nsd, struct query *query, uint32_t *now_p)
3055 {
3056 #ifdef RATELIMIT
3057 if(query_process(query, nsd, now_p) != QUERY_DISCARDED) {
3058 if(query->edns.cookie_status != COOKIE_VALID
3059 && query->edns.cookie_status != COOKIE_VALID_REUSE
3060 && rrl_process_query(query))
3061 return rrl_slip(query);
3062 else return QUERY_PROCESSED;
3063 }
3064 return QUERY_DISCARDED;
3065 #else
3066 return query_process(query, nsd, now_p);
3067 #endif
3068 }
3069
3070 const char*
nsd_event_vs(void)3071 nsd_event_vs(void)
3072 {
3073 #ifdef USE_MINI_EVENT
3074 return "";
3075 #else
3076 return event_get_version();
3077 #endif
3078 }
3079
3080 #if !defined(USE_MINI_EVENT) && defined(EV_FEATURE_BACKENDS)
ub_ev_backend2str(int b)3081 static const char* ub_ev_backend2str(int b)
3082 {
3083 switch(b) {
3084 case EVBACKEND_SELECT: return "select";
3085 case EVBACKEND_POLL: return "poll";
3086 case EVBACKEND_EPOLL: return "epoll";
3087 case EVBACKEND_KQUEUE: return "kqueue";
3088 case EVBACKEND_DEVPOLL: return "devpoll";
3089 case EVBACKEND_PORT: return "evport";
3090 }
3091 return "unknown";
3092 }
3093 #endif
3094
3095 const char*
nsd_event_method(void)3096 nsd_event_method(void)
3097 {
3098 #ifdef USE_MINI_EVENT
3099 return "select";
3100 #else
3101 struct event_base* b = nsd_child_event_base();
3102 const char* m;
3103 # ifdef EV_FEATURE_BACKENDS
3104 m = ub_ev_backend2str(ev_backend((struct ev_loop*)b));
3105 # elif defined(HAVE_EVENT_BASE_GET_METHOD)
3106 m = event_base_get_method(b);
3107 # else
3108 m = "?";
3109 # endif
3110 # ifdef MEMCLEAN
3111 event_base_free(b);
3112 # endif
3113 return m;
3114 #endif
3115 }
3116
3117 struct event_base*
nsd_child_event_base(void)3118 nsd_child_event_base(void)
3119 {
3120 struct event_base* base;
3121 #ifdef USE_MINI_EVENT
3122 static time_t secs;
3123 static struct timeval now;
3124 base = event_init(&secs, &now);
3125 #else
3126 # if defined(HAVE_EV_LOOP) || defined(HAVE_EV_DEFAULT_LOOP)
3127 /* libev */
3128 base = (struct event_base *)ev_default_loop(EVFLAG_AUTO);
3129 # else
3130 /* libevent */
3131 # ifdef HAVE_EVENT_BASE_NEW
3132 base = event_base_new();
3133 # else
3134 base = event_init();
3135 # endif
3136 # endif
3137 #endif
3138 return base;
3139 }
3140
3141 static void
add_udp_handler(struct nsd * nsd,struct nsd_socket * sock,struct udp_handler_data * data)3142 add_udp_handler(
3143 struct nsd *nsd,
3144 struct nsd_socket *sock,
3145 struct udp_handler_data *data)
3146 {
3147 struct event *handler = &data->event;
3148
3149 data->nsd = nsd;
3150 data->socket = sock;
3151
3152 if(nsd->options->proxy_protocol_port &&
3153 sockaddr_uses_proxy_protocol_port(nsd->options,
3154 (struct sockaddr *)&sock->addr.ai_addr)) {
3155 data->pp2_enabled = 1;
3156 }
3157
3158 memset(handler, 0, sizeof(*handler));
3159 event_set(handler, sock->s, EV_PERSIST|EV_READ, handle_udp, data);
3160 if(event_base_set(nsd->event_base, handler) != 0)
3161 log_msg(LOG_ERR, "nsd udp: event_base_set failed");
3162 if(event_add(handler, NULL) != 0)
3163 log_msg(LOG_ERR, "nsd udp: event_add failed");
3164 }
3165
3166 void
add_tcp_handler(struct nsd * nsd,struct nsd_socket * sock,struct tcp_accept_handler_data * data)3167 add_tcp_handler(
3168 struct nsd *nsd,
3169 struct nsd_socket *sock,
3170 struct tcp_accept_handler_data *data)
3171 {
3172 struct event *handler = &data->event;
3173
3174 data->nsd = nsd;
3175 data->socket = sock;
3176
3177 if(nsd->options->proxy_protocol_port &&
3178 sockaddr_uses_proxy_protocol_port(nsd->options,
3179 (struct sockaddr *)&sock->addr.ai_addr)) {
3180 data->pp2_enabled = 1;
3181 }
3182
3183 #ifdef HAVE_SSL
3184 if (nsd->tls_ctx &&
3185 nsd->options->tls_port &&
3186 using_tls_port((struct sockaddr *)&sock->addr.ai_addr, nsd->options->tls_port))
3187 {
3188 data->tls_accept = 1;
3189 if(verbosity >= 2) {
3190 char buf[48];
3191 addrport2str((void*)(struct sockaddr_storage*)&sock->addr.ai_addr, buf, sizeof(buf));
3192 VERBOSITY(4, (LOG_NOTICE, "setup TCP for TLS service on interface %s", buf));
3193 }
3194 } else {
3195 data->tls_accept = 0;
3196 }
3197 #endif
3198
3199 memset(handler, 0, sizeof(*handler));
3200 event_set(handler, sock->s, EV_PERSIST|EV_READ, handle_tcp_accept, data);
3201 if(event_base_set(nsd->event_base, handler) != 0)
3202 log_msg(LOG_ERR, "nsd tcp: event_base_set failed");
3203 if(event_add(handler, NULL) != 0)
3204 log_msg(LOG_ERR, "nsd tcp: event_add failed");
3205 data->event_added = 1;
3206 }
3207
3208 /*
3209 * Serve DNS request to verifiers (short-lived)
3210 */
server_verify(struct nsd * nsd,int cmdsocket)3211 void server_verify(struct nsd *nsd, int cmdsocket)
3212 {
3213 size_t size = 0;
3214 struct event cmd_event, signal_event, exit_event;
3215 struct zone *zone;
3216
3217 assert(nsd != NULL);
3218
3219 zone = verify_next_zone(nsd, NULL);
3220 if(zone == NULL)
3221 return;
3222
3223 nsd->server_region = region_create(xalloc, free);
3224 nsd->event_base = nsd_child_event_base();
3225
3226 nsd->next_zone_to_verify = zone;
3227 nsd->verifier_count = 0;
3228 nsd->verifier_limit = nsd->options->verifier_count;
3229 size = sizeof(struct verifier) * nsd->verifier_limit;
3230 if(pipe(nsd->verifier_pipe) == -1) {
3231 log_msg(LOG_ERR, "verify: could not create pipe: %s",
3232 strerror(errno));
3233 goto fail_pipe;
3234 }
3235 fcntl(nsd->verifier_pipe[0], F_SETFD, FD_CLOEXEC);
3236 fcntl(nsd->verifier_pipe[1], F_SETFD, FD_CLOEXEC);
3237 nsd->verifiers = region_alloc_zero(nsd->server_region, size);
3238
3239 for(size_t i = 0; i < nsd->verifier_limit; i++) {
3240 nsd->verifiers[i].nsd = nsd;
3241 nsd->verifiers[i].zone = NULL;
3242 nsd->verifiers[i].pid = -1;
3243 nsd->verifiers[i].output_stream.fd = -1;
3244 nsd->verifiers[i].output_stream.priority = LOG_INFO;
3245 nsd->verifiers[i].error_stream.fd = -1;
3246 nsd->verifiers[i].error_stream.priority = LOG_ERR;
3247 }
3248
3249 event_set(&cmd_event, cmdsocket, EV_READ|EV_PERSIST, verify_handle_command, nsd);
3250 if(event_base_set(nsd->event_base, &cmd_event) != 0 ||
3251 event_add(&cmd_event, NULL) != 0)
3252 {
3253 log_msg(LOG_ERR, "verify: could not add command event");
3254 goto fail;
3255 }
3256
3257 event_set(&signal_event, SIGCHLD, EV_SIGNAL|EV_PERSIST, verify_handle_signal, nsd);
3258 if(event_base_set(nsd->event_base, &signal_event) != 0 ||
3259 signal_add(&signal_event, NULL) != 0)
3260 {
3261 log_msg(LOG_ERR, "verify: could not add signal event");
3262 goto fail;
3263 }
3264
3265 event_set(&exit_event, nsd->verifier_pipe[0], EV_READ|EV_PERSIST, verify_handle_exit, nsd);
3266 if(event_base_set(nsd->event_base, &exit_event) != 0 ||
3267 event_add(&exit_event, NULL) != 0)
3268 {
3269 log_msg(LOG_ERR, "verify: could not add exit event");
3270 goto fail;
3271 }
3272
3273 memset(msgs, 0, sizeof(msgs));
3274 for (int i = 0; i < NUM_RECV_PER_SELECT; i++) {
3275 queries[i] = query_create(nsd->server_region,
3276 compressed_dname_offsets,
3277 compression_table_size, compressed_dnames);
3278 query_reset(queries[i], UDP_MAX_MESSAGE_LEN, 0);
3279 iovecs[i].iov_base = buffer_begin(queries[i]->packet);
3280 iovecs[i].iov_len = buffer_remaining(queries[i]->packet);
3281 msgs[i].msg_hdr.msg_iov = &iovecs[i];
3282 msgs[i].msg_hdr.msg_iovlen = 1;
3283 msgs[i].msg_hdr.msg_name = &queries[i]->remote_addr;
3284 msgs[i].msg_hdr.msg_namelen = queries[i]->remote_addrlen;
3285 }
3286
3287 for (size_t i = 0; i < nsd->verify_ifs; i++) {
3288 struct udp_handler_data *data;
3289 data = region_alloc_zero(
3290 nsd->server_region, sizeof(*data));
3291 add_udp_handler(nsd, &nsd->verify_udp[i], data);
3292 }
3293
3294 tcp_accept_handler_count = nsd->verify_ifs;
3295 tcp_accept_handlers = region_alloc_array(nsd->server_region,
3296 nsd->verify_ifs, sizeof(*tcp_accept_handlers));
3297
3298 for (size_t i = 0; i < nsd->verify_ifs; i++) {
3299 struct tcp_accept_handler_data *data;
3300 data = &tcp_accept_handlers[i];
3301 memset(data, 0, sizeof(*data));
3302 add_tcp_handler(nsd, &nsd->verify_tcp[i], data);
3303 }
3304
3305 while(nsd->next_zone_to_verify != NULL &&
3306 nsd->verifier_count < nsd->verifier_limit)
3307 {
3308 verify_zone(nsd, nsd->next_zone_to_verify);
3309 nsd->next_zone_to_verify
3310 = verify_next_zone(nsd, nsd->next_zone_to_verify);
3311 }
3312
3313 /* short-lived main loop */
3314 event_base_dispatch(nsd->event_base);
3315
3316 /* remove command and exit event handlers */
3317 event_del(&exit_event);
3318 event_del(&signal_event);
3319 event_del(&cmd_event);
3320
3321 assert(nsd->next_zone_to_verify == NULL || nsd->mode == NSD_QUIT);
3322 assert(nsd->verifier_count == 0 || nsd->mode == NSD_QUIT);
3323 fail:
3324 close(nsd->verifier_pipe[0]);
3325 close(nsd->verifier_pipe[1]);
3326 fail_pipe:
3327 event_base_free(nsd->event_base);
3328 region_destroy(nsd->server_region);
3329
3330 nsd->event_base = NULL;
3331 nsd->server_region = NULL;
3332 nsd->verifier_limit = 0;
3333 nsd->verifier_pipe[0] = -1;
3334 nsd->verifier_pipe[1] = -1;
3335 nsd->verifiers = NULL;
3336 }
3337
3338 /*
3339 * Serve DNS requests.
3340 */
3341 void
server_child(struct nsd * nsd)3342 server_child(struct nsd *nsd)
3343 {
3344 size_t i, from, numifs;
3345 region_type *server_region = region_create(xalloc, free);
3346 struct event_base* event_base = nsd_child_event_base();
3347 sig_atomic_t mode;
3348 #ifdef USE_LOG_PROCESS_ROLE
3349 static char child_name[20];
3350 #endif
3351
3352 if(!event_base) {
3353 log_msg(LOG_ERR, "nsd server could not create event base");
3354 exit(1);
3355 }
3356 nsd->event_base = event_base;
3357 nsd->server_region = server_region;
3358
3359 #ifdef RATELIMIT
3360 rrl_init(nsd->this_child->child_num);
3361 #endif
3362
3363 assert(nsd->server_kind != NSD_SERVER_MAIN);
3364
3365 #ifdef HAVE_SETPROCTITLE
3366 setproctitle("server %d", nsd->this_child->child_num + 1);
3367 #endif
3368 #ifdef USE_LOG_PROCESS_ROLE
3369 snprintf(child_name, sizeof(child_name), "srv%d",
3370 nsd->this_child->child_num + 1);
3371 log_set_process_role(child_name);
3372 #endif
3373 DEBUG(DEBUG_IPC, 2, (LOG_INFO, "child process started"));
3374
3375 #ifdef HAVE_CPUSET_T
3376 if(nsd->use_cpu_affinity) {
3377 set_cpu_affinity(nsd->this_child->cpuset);
3378 }
3379 #endif
3380 #ifdef BIND8_STATS
3381 nsd->st = &nsd->stats_per_child[nsd->stat_current]
3382 [nsd->this_child->child_num];
3383 nsd->st->boot = nsd->stat_map[0].boot;
3384 memcpy(&nsd->stat_proc, nsd->st, sizeof(nsd->stat_proc));
3385 #endif
3386
3387 if (!(nsd->server_kind & NSD_SERVER_TCP)) {
3388 server_close_all_sockets(nsd->tcp, nsd->ifs);
3389 }
3390 if (!(nsd->server_kind & NSD_SERVER_UDP)) {
3391 server_close_all_sockets(nsd->udp, nsd->ifs);
3392 }
3393
3394 if (nsd->this_child->parent_fd != -1) {
3395 struct event *handler;
3396 struct ipc_handler_conn_data* user_data =
3397 (struct ipc_handler_conn_data*)region_alloc(
3398 server_region, sizeof(struct ipc_handler_conn_data));
3399 user_data->nsd = nsd;
3400 user_data->conn = xfrd_tcp_create(server_region, QIOBUFSZ);
3401
3402 handler = (struct event*) region_alloc(
3403 server_region, sizeof(*handler));
3404 memset(handler, 0, sizeof(*handler));
3405 event_set(handler, nsd->this_child->parent_fd, EV_PERSIST|
3406 EV_READ, child_handle_parent_command, user_data);
3407 if(event_base_set(event_base, handler) != 0)
3408 log_msg(LOG_ERR, "nsd ipcchild: event_base_set failed");
3409 if(event_add(handler, NULL) != 0)
3410 log_msg(LOG_ERR, "nsd ipcchild: event_add failed");
3411 }
3412
3413 if(nsd->reuseport) {
3414 numifs = nsd->ifs / nsd->reuseport;
3415 from = numifs * nsd->this_child->child_num;
3416 if(from+numifs > nsd->ifs) { /* should not happen */
3417 from = 0;
3418 numifs = nsd->ifs;
3419 }
3420 } else {
3421 from = 0;
3422 numifs = nsd->ifs;
3423 }
3424
3425 if (nsd->server_kind & NSD_SERVER_UDP) {
3426 int child = nsd->this_child->child_num;
3427 memset(msgs, 0, sizeof(msgs));
3428 for (i = 0; i < NUM_RECV_PER_SELECT; i++) {
3429 queries[i] = query_create(server_region,
3430 compressed_dname_offsets,
3431 compression_table_size, compressed_dnames);
3432 query_reset(queries[i], UDP_MAX_MESSAGE_LEN, 0);
3433 iovecs[i].iov_base = buffer_begin(queries[i]->packet);
3434 iovecs[i].iov_len = buffer_remaining(queries[i]->packet);
3435 msgs[i].msg_hdr.msg_iov = &iovecs[i];
3436 msgs[i].msg_hdr.msg_iovlen = 1;
3437 msgs[i].msg_hdr.msg_name = &queries[i]->remote_addr;
3438 msgs[i].msg_hdr.msg_namelen = queries[i]->remote_addrlen;
3439 }
3440
3441 for (i = 0; i < nsd->ifs; i++) {
3442 int listen;
3443 struct udp_handler_data *data;
3444
3445 listen = nsd_bitset_isset(nsd->udp[i].servers, child);
3446
3447 if(i >= from && i < (from + numifs) && listen) {
3448 data = region_alloc_zero(
3449 nsd->server_region, sizeof(*data));
3450 add_udp_handler(nsd, &nsd->udp[i], data);
3451 } else {
3452 /* close sockets intended for other servers */
3453 server_close_socket(&nsd->udp[i]);
3454 }
3455 }
3456 }
3457
3458 /*
3459 * Keep track of all the TCP accept handlers so we can enable
3460 * and disable them based on the current number of active TCP
3461 * connections.
3462 */
3463 if (nsd->server_kind & NSD_SERVER_TCP) {
3464 int child = nsd->this_child->child_num;
3465 tcp_accept_handler_count = numifs;
3466 tcp_accept_handlers = region_alloc_array(server_region,
3467 numifs, sizeof(*tcp_accept_handlers));
3468
3469 for (i = 0; i < nsd->ifs; i++) {
3470 int listen;
3471 struct tcp_accept_handler_data *data;
3472
3473 listen = nsd_bitset_isset(nsd->tcp[i].servers, child);
3474
3475 if(i >= from && i < (from + numifs) && listen) {
3476 data = &tcp_accept_handlers[i-from];
3477 memset(data, 0, sizeof(*data));
3478 add_tcp_handler(nsd, &nsd->tcp[i], data);
3479 } else {
3480 /* close sockets intended for other servers */
3481 /*
3482 * uncomment this once tcp servers are no
3483 * longer copied in the tcp fd copy line
3484 * in server_init().
3485 server_close_socket(&nsd->tcp[i]);
3486 */
3487 /* close sockets not meant for this server*/
3488 if(!listen)
3489 server_close_socket(&nsd->tcp[i]);
3490 }
3491 }
3492 } else {
3493 tcp_accept_handler_count = 0;
3494 }
3495
3496 /* The main loop... */
3497 while ((mode = nsd->mode) != NSD_QUIT) {
3498 if(mode == NSD_RUN) nsd->mode = mode = server_signal_mode(nsd);
3499
3500 /* Do we need to do the statistics... */
3501 if (mode == NSD_STATS) {
3502 #ifdef BIND8_STATS
3503 int p = nsd->st_period;
3504 nsd->st_period = 1; /* force stats printout */
3505 /* Dump the statistics */
3506 bind8_stats(nsd);
3507 nsd->st_period = p;
3508 #else /* !BIND8_STATS */
3509 log_msg(LOG_NOTICE, "Statistics support not enabled at compile time.");
3510 #endif /* BIND8_STATS */
3511
3512 nsd->mode = NSD_RUN;
3513 }
3514 else if (mode == NSD_REAP_CHILDREN) {
3515 /* got signal, notify parent. parent reaps terminated children. */
3516 if (nsd->this_child->parent_fd != -1) {
3517 sig_atomic_t parent_notify = NSD_REAP_CHILDREN;
3518 if (write(nsd->this_child->parent_fd,
3519 &parent_notify,
3520 sizeof(parent_notify)) == -1)
3521 {
3522 log_msg(LOG_ERR, "problems sending command from %d to parent: %s",
3523 (int) nsd->this_child->pid, strerror(errno));
3524 }
3525 } else /* no parent, so reap 'em */
3526 while (waitpid(-1, NULL, WNOHANG) > 0) ;
3527 nsd->mode = NSD_RUN;
3528 }
3529 else if(mode == NSD_RUN) {
3530 /* Wait for a query... */
3531 if(event_base_loop(event_base, EVLOOP_ONCE) == -1) {
3532 if (errno != EINTR) {
3533 log_msg(LOG_ERR, "dispatch failed: %s", strerror(errno));
3534 break;
3535 }
3536 }
3537 } else if(mode == NSD_QUIT) {
3538 /* ignore here, quit */
3539 } else {
3540 log_msg(LOG_ERR, "mode bad value %d, back to service.",
3541 (int)mode);
3542 nsd->mode = NSD_RUN;
3543 }
3544 }
3545
3546 service_remaining_tcp(nsd);
3547 #ifdef BIND8_STATS
3548 bind8_stats(nsd);
3549 #endif /* BIND8_STATS */
3550
3551 #ifdef MEMCLEAN /* OS collects memory pages */
3552 #ifdef RATELIMIT
3553 rrl_deinit(nsd->this_child->child_num);
3554 #endif
3555 event_base_free(event_base);
3556 region_destroy(server_region);
3557 #endif
3558 server_shutdown(nsd);
3559 }
3560
remaining_tcp_timeout(int ATTR_UNUSED (fd),short event,void * arg)3561 static void remaining_tcp_timeout(int ATTR_UNUSED(fd), short event, void* arg)
3562 {
3563 int* timed_out = (int*)arg;
3564 assert(event & EV_TIMEOUT); (void)event;
3565 /* wake up the service tcp thread, note event is no longer
3566 * registered */
3567 *timed_out = 1;
3568 }
3569
3570 void
service_remaining_tcp(struct nsd * nsd)3571 service_remaining_tcp(struct nsd* nsd)
3572 {
3573 struct tcp_handler_data* p;
3574 struct event_base* event_base;
3575 /* check if it is needed */
3576 if(nsd->current_tcp_count == 0 || tcp_active_list == NULL)
3577 return;
3578 VERBOSITY(4, (LOG_INFO, "service remaining TCP connections"));
3579 #ifdef USE_DNSTAP
3580 /* remove dnstap collector, we cannot write there because the new
3581 * child process is using the file descriptor, or the child
3582 * process after that. */
3583 dt_collector_destroy(nsd->dt_collector, nsd);
3584 nsd->dt_collector = NULL;
3585 #endif
3586 /* setup event base */
3587 event_base = nsd_child_event_base();
3588 if(!event_base) {
3589 log_msg(LOG_ERR, "nsd remain tcp could not create event base");
3590 return;
3591 }
3592 /* register tcp connections */
3593 for(p = tcp_active_list; p != NULL; p = p->next) {
3594 struct timeval timeout;
3595 int fd = p->event.ev_fd;
3596 #ifdef USE_MINI_EVENT
3597 short event = p->event.ev_flags & (EV_READ|EV_WRITE);
3598 #else
3599 short event = p->event.ev_events & (EV_READ|EV_WRITE);
3600 #endif
3601 void (*fn)(int, short, void*);
3602 #ifdef HAVE_SSL
3603 if(p->tls) {
3604 if((event&EV_READ))
3605 fn = handle_tls_reading;
3606 else fn = handle_tls_writing;
3607 } else {
3608 #endif
3609 if((event&EV_READ))
3610 fn = handle_tcp_reading;
3611 else fn = handle_tcp_writing;
3612 #ifdef HAVE_SSL
3613 }
3614 #endif
3615
3616 p->tcp_no_more_queries = 1;
3617 /* set timeout to 3 seconds (previously 1/10 second) */
3618 if(p->tcp_timeout > 3000)
3619 p->tcp_timeout = 3000;
3620 timeout.tv_sec = p->tcp_timeout / 1000;
3621 timeout.tv_usec = (p->tcp_timeout % 1000)*1000;
3622 event_del(&p->event);
3623 memset(&p->event, 0, sizeof(p->event));
3624 event_set(&p->event, fd, EV_PERSIST | event | EV_TIMEOUT,
3625 fn, p);
3626 if(event_base_set(event_base, &p->event) != 0)
3627 log_msg(LOG_ERR, "event base set failed");
3628 if(event_add(&p->event, &timeout) != 0)
3629 log_msg(LOG_ERR, "event add failed");
3630 }
3631
3632 /* handle it */
3633 while(nsd->current_tcp_count > 0) {
3634 mode_t m = server_signal_mode(nsd);
3635 struct event timeout;
3636 struct timeval tv;
3637 int timed_out = 0;
3638 if(m == NSD_QUIT || m == NSD_SHUTDOWN ||
3639 m == NSD_REAP_CHILDREN) {
3640 /* quit */
3641 break;
3642 }
3643 /* timer */
3644 /* have to do something every 3 seconds */
3645 tv.tv_sec = 3;
3646 tv.tv_usec = 0;
3647 memset(&timeout, 0, sizeof(timeout));
3648 event_set(&timeout, -1, EV_TIMEOUT, remaining_tcp_timeout,
3649 &timed_out);
3650 if(event_base_set(event_base, &timeout) != 0)
3651 log_msg(LOG_ERR, "remaintcp timer: event_base_set failed");
3652 if(event_add(&timeout, &tv) != 0)
3653 log_msg(LOG_ERR, "remaintcp timer: event_add failed");
3654
3655 /* service loop */
3656 if(event_base_loop(event_base, EVLOOP_ONCE) == -1) {
3657 if (errno != EINTR) {
3658 log_msg(LOG_ERR, "dispatch failed: %s", strerror(errno));
3659 break;
3660 }
3661 }
3662 if(!timed_out) {
3663 event_del(&timeout);
3664 } else {
3665 /* timed out, quit */
3666 VERBOSITY(4, (LOG_INFO, "service remaining TCP connections: timed out, quit"));
3667 break;
3668 }
3669 }
3670 #ifdef MEMCLEAN
3671 event_base_free(event_base);
3672 #endif
3673 /* continue to quit after return */
3674 }
3675
3676 /* Implement recvmmsg and sendmmsg if the platform does not. These functions
3677 * are always used, even if nonblocking operations are broken, in which case
3678 * NUM_RECV_PER_SELECT is defined to 1 (one).
3679 */
3680 #if defined(HAVE_RECVMMSG)
3681 #define nsd_recvmmsg recvmmsg
3682 #else /* !HAVE_RECVMMSG */
3683
3684 static int
nsd_recvmmsg(int sockfd,struct mmsghdr * msgvec,unsigned int vlen,int flags,struct timespec * timeout)3685 nsd_recvmmsg(int sockfd, struct mmsghdr *msgvec, unsigned int vlen,
3686 int flags, struct timespec *timeout)
3687 {
3688 unsigned int vpos = 0;
3689 ssize_t rcvd;
3690
3691 /* timeout is ignored, ensure caller does not expect it to work */
3692 assert(timeout == NULL); (void)timeout;
3693
3694 while(vpos < vlen) {
3695 rcvd = recvfrom(sockfd,
3696 msgvec[vpos].msg_hdr.msg_iov->iov_base,
3697 msgvec[vpos].msg_hdr.msg_iov->iov_len,
3698 flags,
3699 msgvec[vpos].msg_hdr.msg_name,
3700 &msgvec[vpos].msg_hdr.msg_namelen);
3701 if(rcvd < 0) {
3702 break;
3703 } else {
3704 assert((unsigned long long)rcvd <= (unsigned long long)UINT_MAX);
3705 msgvec[vpos].msg_len = (unsigned int)rcvd;
3706 vpos++;
3707 }
3708 }
3709
3710 if(vpos) {
3711 /* error will be picked up next time */
3712 return (int)vpos;
3713 } else if(errno == 0) {
3714 return 0;
3715 } else if(errno == EAGAIN) {
3716 return 0;
3717 }
3718
3719 return -1;
3720 }
3721 #endif /* HAVE_RECVMMSG */
3722
3723 #ifdef HAVE_SENDMMSG
3724 #define nsd_sendmmsg(...) sendmmsg(__VA_ARGS__)
3725 #else /* !HAVE_SENDMMSG */
3726
3727 static int
nsd_sendmmsg(int sockfd,struct mmsghdr * msgvec,unsigned int vlen,int flags)3728 nsd_sendmmsg(int sockfd, struct mmsghdr *msgvec, unsigned int vlen, int flags)
3729 {
3730 unsigned int vpos = 0;
3731 ssize_t snd;
3732
3733 while(vpos < vlen) {
3734 assert(msgvec[vpos].msg_hdr.msg_iovlen == 1);
3735 snd = sendto(sockfd,
3736 msgvec[vpos].msg_hdr.msg_iov->iov_base,
3737 msgvec[vpos].msg_hdr.msg_iov->iov_len,
3738 flags,
3739 msgvec[vpos].msg_hdr.msg_name,
3740 msgvec[vpos].msg_hdr.msg_namelen);
3741 if(snd < 0) {
3742 break;
3743 } else {
3744 msgvec[vpos].msg_len = (unsigned int)snd;
3745 vpos++;
3746 }
3747 }
3748
3749 if(vpos) {
3750 return (int)vpos;
3751 } else if(errno == 0) {
3752 return 0;
3753 }
3754
3755 return -1;
3756 }
3757 #endif /* HAVE_SENDMMSG */
3758
3759 static int
port_is_zero(struct sockaddr_storage * addr)3760 port_is_zero(
3761 #ifdef INET6
3762 struct sockaddr_storage *addr
3763 #else
3764 struct sockaddr_in *addr
3765 #endif
3766 )
3767 {
3768 #ifdef INET6
3769 if(addr->ss_family == AF_INET6) {
3770 return (((struct sockaddr_in6 *)addr)->sin6_port) == 0;
3771 } else if(addr->ss_family == AF_INET) {
3772 return (((struct sockaddr_in *)addr)->sin_port) == 0;
3773 }
3774 return 0;
3775 #else
3776 if(addr->sin_family == AF_INET) {
3777 return addr->sin_port == 0;
3778 }
3779 return 0;
3780 #endif
3781 }
3782
3783 /* Parses the PROXYv2 header from buf and updates the struct.
3784 * Returns 1 on success, 0 on failure. */
3785 static int
consume_pp2_header(struct buffer * buf,struct query * q,int stream)3786 consume_pp2_header(struct buffer* buf, struct query* q, int stream)
3787 {
3788 size_t size;
3789 struct pp2_header* header;
3790 int err = pp2_read_header(buffer_begin(buf), buffer_remaining(buf));
3791 if(err) {
3792 VERBOSITY(4, (LOG_ERR, "proxy-protocol: could not parse "
3793 "PROXYv2 header: %s", pp_lookup_error(err)));
3794 return 0;
3795 }
3796 header = (struct pp2_header*)buffer_begin(buf);
3797 size = PP2_HEADER_SIZE + read_uint16(&header->len);
3798 if(size > buffer_limit(buf)) {
3799 VERBOSITY(4, (LOG_ERR, "proxy-protocol: not enough buffer "
3800 "size to read PROXYv2 header"));
3801 return 0;
3802 }
3803 if((header->ver_cmd & 0xF) == PP2_CMD_LOCAL) {
3804 /* A connection from the proxy itself.
3805 * No need to do anything with addresses. */
3806 goto done;
3807 }
3808 if(header->fam_prot == PP2_UNSPEC_UNSPEC) {
3809 /* Unspecified family and protocol. This could be used for
3810 * health checks by proxies.
3811 * No need to do anything with addresses. */
3812 goto done;
3813 }
3814 /* Read the proxied address */
3815 switch(header->fam_prot) {
3816 case PP2_INET_STREAM:
3817 case PP2_INET_DGRAM:
3818 {
3819 struct sockaddr_in* addr =
3820 (struct sockaddr_in*)&q->client_addr;
3821 addr->sin_family = AF_INET;
3822 memmove(&addr->sin_addr.s_addr,
3823 &header->addr.addr4.src_addr, 4);
3824 memmove(&addr->sin_port, &header->addr.addr4.src_port,
3825 2);
3826 q->client_addrlen = (socklen_t)sizeof(struct sockaddr_in);
3827 }
3828 /* Ignore the destination address; it should be us. */
3829 break;
3830 #ifdef INET6
3831 case PP2_INET6_STREAM:
3832 case PP2_INET6_DGRAM:
3833 {
3834 struct sockaddr_in6* addr =
3835 (struct sockaddr_in6*)&q->client_addr;
3836 memset(addr, 0, sizeof(*addr));
3837 addr->sin6_family = AF_INET6;
3838 memmove(&addr->sin6_addr,
3839 header->addr.addr6.src_addr, 16);
3840 memmove(&addr->sin6_port, &header->addr.addr6.src_port,
3841 2);
3842 q->client_addrlen = (socklen_t)sizeof(struct sockaddr_in6);
3843 }
3844 /* Ignore the destination address; it should be us. */
3845 break;
3846 #endif /* INET6 */
3847 default:
3848 VERBOSITY(2, (LOG_ERR, "proxy-protocol: unsupported "
3849 "family and protocol 0x%x",
3850 (int)header->fam_prot));
3851 return 0;
3852 }
3853 q->is_proxied = 1;
3854 done:
3855 if(!stream) {
3856 /* We are reading a whole packet;
3857 * Move the rest of the data to overwrite the PROXYv2 header */
3858 /* XXX can we do better to avoid memmove? */
3859 memmove(header, ((char*)header)+size, buffer_limit(buf)-size);
3860 buffer_set_limit(buf, buffer_limit(buf)-size);
3861 }
3862 return 1;
3863 }
3864
3865 static void
handle_udp(int fd,short event,void * arg)3866 handle_udp(int fd, short event, void* arg)
3867 {
3868 struct udp_handler_data *data = (struct udp_handler_data *) arg;
3869 int received, sent, recvcount, i;
3870 struct query *q;
3871 uint32_t now = 0;
3872
3873 if (!(event & EV_READ)) {
3874 return;
3875 }
3876 recvcount = nsd_recvmmsg(fd, msgs, NUM_RECV_PER_SELECT, 0, NULL);
3877 /* this printf strangely gave a performance increase on Linux */
3878 /* printf("recvcount %d \n", recvcount); */
3879 if (recvcount == -1) {
3880 if (errno != EAGAIN && errno != EINTR) {
3881 log_msg(LOG_ERR, "recvmmsg failed: %s", strerror(errno));
3882 STATUP(data->nsd, rxerr);
3883 /* No zone statup */
3884 }
3885 /* Simply no data available */
3886 return;
3887 }
3888 for (i = 0; i < recvcount; i++) {
3889 loopstart:
3890 received = msgs[i].msg_len;
3891 queries[i]->remote_addrlen = msgs[i].msg_hdr.msg_namelen;
3892 queries[i]->client_addrlen = (socklen_t)sizeof(queries[i]->client_addr);
3893 queries[i]->is_proxied = 0;
3894 q = queries[i];
3895 if (received == -1) {
3896 log_msg(LOG_ERR, "recvmmsg %d failed %s", i, strerror(
3897 #if defined(HAVE_RECVMMSG)
3898 msgs[i].msg_hdr.msg_flags
3899 #else
3900 errno
3901 #endif
3902 ));
3903 STATUP(data->nsd, rxerr);
3904 /* No zone statup */
3905 query_reset(queries[i], UDP_MAX_MESSAGE_LEN, 0);
3906 iovecs[i].iov_len = buffer_remaining(q->packet);
3907 msgs[i].msg_hdr.msg_namelen = queries[i]->remote_addrlen;
3908 goto swap_drop;
3909 }
3910
3911 /* Account... */
3912 #ifdef BIND8_STATS
3913 if (data->socket->addr.ai_family == AF_INET) {
3914 STATUP(data->nsd, qudp);
3915 } else if (data->socket->addr.ai_family == AF_INET6) {
3916 STATUP(data->nsd, qudp6);
3917 }
3918 #endif
3919
3920 buffer_skip(q->packet, received);
3921 buffer_flip(q->packet);
3922 if(data->pp2_enabled && !consume_pp2_header(q->packet, q, 0)) {
3923 VERBOSITY(2, (LOG_ERR, "proxy-protocol: could not "
3924 "consume PROXYv2 header"));
3925 goto swap_drop;
3926 }
3927 if(!q->is_proxied) {
3928 q->client_addrlen = q->remote_addrlen;
3929 memmove(&q->client_addr, &q->remote_addr,
3930 q->remote_addrlen);
3931 }
3932 #ifdef USE_DNSTAP
3933 /*
3934 * sending UDP-query with server address (local) and client address to dnstap process
3935 */
3936 log_addr("query from client", &q->client_addr);
3937 log_addr("to server (local)", (void*)&data->socket->addr.ai_addr);
3938 if(verbosity >= 6 && q->is_proxied)
3939 log_addr("query via proxy", &q->remote_addr);
3940 dt_collector_submit_auth_query(data->nsd, (void*)&data->socket->addr.ai_addr, &q->client_addr, q->client_addrlen,
3941 q->tcp, q->packet);
3942 #endif /* USE_DNSTAP */
3943
3944 /* Process and answer the query... */
3945 if (server_process_query_udp(data->nsd, q, &now) != QUERY_DISCARDED) {
3946 if (RCODE(q->packet) == RCODE_OK && !AA(q->packet)) {
3947 STATUP(data->nsd, nona);
3948 ZTATUP(data->nsd, q->zone, nona);
3949 }
3950
3951 #ifdef USE_ZONE_STATS
3952 if (data->socket->addr.ai_family == AF_INET) {
3953 ZTATUP(data->nsd, q->zone, qudp);
3954 } else if (data->socket->addr.ai_family == AF_INET6) {
3955 ZTATUP(data->nsd, q->zone, qudp6);
3956 }
3957 #endif
3958
3959 /* Add EDNS0 and TSIG info if necessary. */
3960 query_add_optional(q, data->nsd, &now);
3961
3962 buffer_flip(q->packet);
3963 iovecs[i].iov_len = buffer_remaining(q->packet);
3964 #ifdef BIND8_STATS
3965 /* Account the rcode & TC... */
3966 STATUP2(data->nsd, rcode, RCODE(q->packet));
3967 ZTATUP2(data->nsd, q->zone, rcode, RCODE(q->packet));
3968 if (TC(q->packet)) {
3969 STATUP(data->nsd, truncated);
3970 ZTATUP(data->nsd, q->zone, truncated);
3971 }
3972 #endif /* BIND8_STATS */
3973 #ifdef USE_DNSTAP
3974 /*
3975 * sending UDP-response with server address (local) and client address to dnstap process
3976 */
3977 log_addr("from server (local)", (void*)&data->socket->addr.ai_addr);
3978 log_addr("response to client", &q->client_addr);
3979 if(verbosity >= 6 && q->is_proxied)
3980 log_addr("response via proxy", &q->remote_addr);
3981 dt_collector_submit_auth_response(data->nsd, (void*)&data->socket->addr.ai_addr,
3982 &q->client_addr, q->client_addrlen, q->tcp, q->packet,
3983 q->zone);
3984 #endif /* USE_DNSTAP */
3985 } else {
3986 query_reset(queries[i], UDP_MAX_MESSAGE_LEN, 0);
3987 iovecs[i].iov_len = buffer_remaining(q->packet);
3988 msgs[i].msg_hdr.msg_namelen = queries[i]->remote_addrlen;
3989 swap_drop:
3990 STATUP(data->nsd, dropped);
3991 ZTATUP(data->nsd, q->zone, dropped);
3992 if(i != recvcount-1) {
3993 /* swap with last and decrease recvcount */
3994 struct mmsghdr mtmp = msgs[i];
3995 struct iovec iotmp = iovecs[i];
3996 recvcount--;
3997 msgs[i] = msgs[recvcount];
3998 iovecs[i] = iovecs[recvcount];
3999 queries[i] = queries[recvcount];
4000 msgs[recvcount] = mtmp;
4001 iovecs[recvcount] = iotmp;
4002 queries[recvcount] = q;
4003 msgs[i].msg_hdr.msg_iov = &iovecs[i];
4004 msgs[recvcount].msg_hdr.msg_iov = &iovecs[recvcount];
4005 goto loopstart;
4006 } else { recvcount --; }
4007 }
4008 }
4009
4010 /* send until all are sent */
4011 i = 0;
4012 while(i<recvcount) {
4013 sent = nsd_sendmmsg(fd, &msgs[i], recvcount-i, 0);
4014 if(sent == -1) {
4015 if(errno == ENOBUFS ||
4016 #ifdef EWOULDBLOCK
4017 errno == EWOULDBLOCK ||
4018 #endif
4019 errno == EAGAIN) {
4020 /* block to wait until send buffer avail */
4021 int flag, errstore;
4022 if((flag = fcntl(fd, F_GETFL)) == -1) {
4023 log_msg(LOG_ERR, "cannot fcntl F_GETFL: %s", strerror(errno));
4024 flag = 0;
4025 }
4026 flag &= ~O_NONBLOCK;
4027 if(fcntl(fd, F_SETFL, flag) == -1)
4028 log_msg(LOG_ERR, "cannot fcntl F_SETFL 0: %s", strerror(errno));
4029 sent = nsd_sendmmsg(fd, &msgs[i], recvcount-i, 0);
4030 errstore = errno;
4031 flag |= O_NONBLOCK;
4032 if(fcntl(fd, F_SETFL, flag) == -1)
4033 log_msg(LOG_ERR, "cannot fcntl F_SETFL O_NONBLOCK: %s", strerror(errno));
4034 if(sent != -1) {
4035 i += sent;
4036 continue;
4037 }
4038 errno = errstore;
4039 }
4040 if(errno == EINVAL) {
4041 /* skip the invalid argument entry,
4042 * send the remaining packets in the list */
4043 if(!(port_is_zero((void*)&queries[i]->remote_addr) &&
4044 verbosity < 3)) {
4045 const char* es = strerror(errno);
4046 char a[64];
4047 addrport2str((void*)&queries[i]->remote_addr, a, sizeof(a));
4048 log_msg(LOG_ERR, "sendmmsg skip invalid argument [0]=%s count=%d failed: %s", a, (int)(recvcount-i), es);
4049 }
4050 i += 1;
4051 continue;
4052 }
4053 /* don't log transient network full errors, unless
4054 * on higher verbosity */
4055 if(!(errno == ENOBUFS && verbosity < 1) &&
4056 #ifdef EWOULDBLOCK
4057 errno != EWOULDBLOCK &&
4058 #endif
4059 errno != EAGAIN) {
4060 const char* es = strerror(errno);
4061 char a[64];
4062 addrport2str((void*)&queries[i]->remote_addr, a, sizeof(a));
4063 log_msg(LOG_ERR, "sendmmsg [0]=%s count=%d failed: %s", a, (int)(recvcount-i), es);
4064 }
4065 #ifdef BIND8_STATS
4066 data->nsd->st->txerr += recvcount-i;
4067 #endif /* BIND8_STATS */
4068 break;
4069 }
4070 i += sent;
4071 }
4072 for(i=0; i<recvcount; i++) {
4073 query_reset(queries[i], UDP_MAX_MESSAGE_LEN, 0);
4074 iovecs[i].iov_len = buffer_remaining(queries[i]->packet);
4075 msgs[i].msg_hdr.msg_namelen = queries[i]->remote_addrlen;
4076 }
4077 }
4078
4079 #ifdef HAVE_SSL
4080 /*
4081 * Setup an event for the tcp handler.
4082 */
4083 static void
tcp_handler_setup_event(struct tcp_handler_data * data,void (* fn)(int,short,void *),int fd,short event)4084 tcp_handler_setup_event(struct tcp_handler_data* data, void (*fn)(int, short, void *),
4085 int fd, short event)
4086 {
4087 struct timeval timeout;
4088 struct event_base* ev_base;
4089
4090 timeout.tv_sec = data->nsd->tcp_timeout;
4091 timeout.tv_usec = 0L;
4092
4093 ev_base = data->event.ev_base;
4094 event_del(&data->event);
4095 memset(&data->event, 0, sizeof(data->event));
4096 event_set(&data->event, fd, event, fn, data);
4097 if(event_base_set(ev_base, &data->event) != 0)
4098 log_msg(LOG_ERR, "event base set failed");
4099 if(event_add(&data->event, &timeout) != 0)
4100 log_msg(LOG_ERR, "event add failed");
4101 }
4102 #endif /* HAVE_SSL */
4103
4104 static void
cleanup_tcp_handler(struct tcp_handler_data * data)4105 cleanup_tcp_handler(struct tcp_handler_data* data)
4106 {
4107 event_del(&data->event);
4108 #ifdef HAVE_SSL
4109 if(data->tls) {
4110 SSL_shutdown(data->tls);
4111 SSL_free(data->tls);
4112 data->tls = NULL;
4113 }
4114 #endif
4115 data->pp2_header_state = pp2_header_none;
4116 close(data->event.ev_fd);
4117 if(data->prev)
4118 data->prev->next = data->next;
4119 else tcp_active_list = data->next;
4120 if(data->next)
4121 data->next->prev = data->prev;
4122
4123 /*
4124 * Enable the TCP accept handlers when the current number of
4125 * TCP connections is about to drop below the maximum number
4126 * of TCP connections.
4127 */
4128 if (slowaccept || data->nsd->current_tcp_count == data->nsd->maximum_tcp_count) {
4129 configure_handler_event_types(EV_READ|EV_PERSIST);
4130 if(slowaccept) {
4131 event_del(&slowaccept_event);
4132 slowaccept = 0;
4133 }
4134 }
4135 --data->nsd->current_tcp_count;
4136 assert(data->nsd->current_tcp_count >= 0);
4137
4138 region_destroy(data->region);
4139 }
4140
4141 /* Read more data into the buffer for tcp read. Pass the amount of additional
4142 * data required. Returns false if nothing needs to be done this event, or
4143 * true if the additional data is in the buffer. */
4144 static int
more_read_buf_tcp(int fd,struct tcp_handler_data * data,void * bufpos,size_t add_amount,ssize_t * received)4145 more_read_buf_tcp(int fd, struct tcp_handler_data* data, void* bufpos,
4146 size_t add_amount, ssize_t* received)
4147 {
4148 *received = read(fd, bufpos, add_amount);
4149 if (*received == -1) {
4150 if (errno == EAGAIN || errno == EINTR) {
4151 /*
4152 * Read would block, wait until more
4153 * data is available.
4154 */
4155 return 0;
4156 } else {
4157 char buf[48];
4158 addr2str(&data->query->remote_addr, buf, sizeof(buf));
4159 #ifdef ECONNRESET
4160 if (verbosity >= 2 || errno != ECONNRESET)
4161 #endif /* ECONNRESET */
4162 log_msg(LOG_ERR, "failed reading from %s tcp: %s", buf, strerror(errno));
4163 cleanup_tcp_handler(data);
4164 return 0;
4165 }
4166 } else if (*received == 0) {
4167 /* EOF */
4168 cleanup_tcp_handler(data);
4169 return 0;
4170 }
4171 return 1;
4172 }
4173
4174 static void
handle_tcp_reading(int fd,short event,void * arg)4175 handle_tcp_reading(int fd, short event, void* arg)
4176 {
4177 struct tcp_handler_data *data = (struct tcp_handler_data *) arg;
4178 ssize_t received;
4179 struct event_base* ev_base;
4180 struct timeval timeout;
4181 uint32_t now = 0;
4182
4183 if ((event & EV_TIMEOUT)) {
4184 /* Connection timed out. */
4185 cleanup_tcp_handler(data);
4186 return;
4187 }
4188
4189 if ((data->nsd->tcp_query_count > 0 &&
4190 data->query_count >= data->nsd->tcp_query_count) ||
4191 (data->query_count > 0 && data->tcp_no_more_queries))
4192 {
4193 /* No more queries allowed on this tcp connection. */
4194 cleanup_tcp_handler(data);
4195 return;
4196 }
4197
4198 assert((event & EV_READ));
4199
4200 if (data->bytes_transmitted == 0 && data->query_needs_reset) {
4201 query_reset(data->query, TCP_MAX_MESSAGE_LEN, 1);
4202 data->query_needs_reset = 0;
4203 }
4204
4205 if(data->pp2_enabled && data->pp2_header_state != pp2_header_done) {
4206 struct pp2_header* header = NULL;
4207 size_t want_read_size = 0;
4208 size_t current_read_size = 0;
4209 if(data->pp2_header_state == pp2_header_none) {
4210 want_read_size = PP2_HEADER_SIZE;
4211 if(buffer_remaining(data->query->packet) <
4212 want_read_size) {
4213 VERBOSITY(6, (LOG_ERR, "proxy-protocol: not enough buffer size to read PROXYv2 header"));
4214 cleanup_tcp_handler(data);
4215 return;
4216 }
4217 VERBOSITY(6, (LOG_INFO, "proxy-protocol: reading fixed part of PROXYv2 header (len %lu)", (unsigned long)want_read_size));
4218 current_read_size = want_read_size;
4219 if(data->bytes_transmitted < current_read_size) {
4220 if(!more_read_buf_tcp(fd, data,
4221 (void*)buffer_at(data->query->packet,
4222 data->bytes_transmitted),
4223 current_read_size - data->bytes_transmitted,
4224 &received))
4225 return;
4226 data->bytes_transmitted += received;
4227 buffer_skip(data->query->packet, received);
4228 if(data->bytes_transmitted != current_read_size)
4229 return;
4230 data->pp2_header_state = pp2_header_init;
4231 }
4232 }
4233 if(data->pp2_header_state == pp2_header_init) {
4234 int err;
4235 err = pp2_read_header(buffer_begin(data->query->packet),
4236 buffer_limit(data->query->packet));
4237 if(err) {
4238 VERBOSITY(6, (LOG_ERR, "proxy-protocol: could not parse PROXYv2 header: %s", pp_lookup_error(err)));
4239 cleanup_tcp_handler(data);
4240 return;
4241 }
4242 header = (struct pp2_header*)buffer_begin(data->query->packet);
4243 want_read_size = ntohs(header->len);
4244 if(buffer_limit(data->query->packet) <
4245 PP2_HEADER_SIZE + want_read_size) {
4246 VERBOSITY(6, (LOG_ERR, "proxy-protocol: not enough buffer size to read PROXYv2 header"));
4247 cleanup_tcp_handler(data);
4248 return;
4249 }
4250 VERBOSITY(6, (LOG_INFO, "proxy-protocol: reading variable part of PROXYv2 header (len %lu)", (unsigned long)want_read_size));
4251 current_read_size = PP2_HEADER_SIZE + want_read_size;
4252 if(want_read_size == 0) {
4253 /* nothing more to read; header is complete */
4254 data->pp2_header_state = pp2_header_done;
4255 } else if(data->bytes_transmitted < current_read_size) {
4256 if(!more_read_buf_tcp(fd, data,
4257 (void*)buffer_at(data->query->packet,
4258 data->bytes_transmitted),
4259 current_read_size - data->bytes_transmitted,
4260 &received))
4261 return;
4262 data->bytes_transmitted += received;
4263 buffer_skip(data->query->packet, received);
4264 if(data->bytes_transmitted != current_read_size)
4265 return;
4266 data->pp2_header_state = pp2_header_done;
4267 }
4268 }
4269 if(data->pp2_header_state != pp2_header_done || !header) {
4270 VERBOSITY(6, (LOG_ERR, "proxy-protocol: wrong state for the PROXYv2 header"));
4271
4272 cleanup_tcp_handler(data);
4273 return;
4274 }
4275 buffer_flip(data->query->packet);
4276 if(!consume_pp2_header(data->query->packet, data->query, 1)) {
4277 VERBOSITY(6, (LOG_ERR, "proxy-protocol: could not consume PROXYv2 header"));
4278
4279 cleanup_tcp_handler(data);
4280 return;
4281 }
4282 /* Clear and reset the buffer to read the following
4283 * DNS packet(s). */
4284 buffer_clear(data->query->packet);
4285 data->bytes_transmitted = 0;
4286 }
4287
4288 /*
4289 * Check if we received the leading packet length bytes yet.
4290 */
4291 if (data->bytes_transmitted < sizeof(uint16_t)) {
4292 if(!more_read_buf_tcp(fd, data,
4293 (char*) &data->query->tcplen + data->bytes_transmitted,
4294 sizeof(uint16_t) - data->bytes_transmitted, &received))
4295 return;
4296 data->bytes_transmitted += received;
4297 if (data->bytes_transmitted < sizeof(uint16_t)) {
4298 /*
4299 * Not done with the tcplen yet, wait for more
4300 * data to become available.
4301 */
4302 return;
4303 }
4304 assert(data->bytes_transmitted == sizeof(uint16_t));
4305
4306 data->query->tcplen = ntohs(data->query->tcplen);
4307
4308 /*
4309 * Minimum query size is:
4310 *
4311 * Size of the header (12)
4312 * + Root domain name (1)
4313 * + Query class (2)
4314 * + Query type (2)
4315 */
4316 if (data->query->tcplen < QHEADERSZ + 1 + sizeof(uint16_t) + sizeof(uint16_t)) {
4317 VERBOSITY(2, (LOG_WARNING, "packet too small, dropping tcp connection"));
4318 cleanup_tcp_handler(data);
4319 return;
4320 }
4321
4322 if (data->query->tcplen > data->query->maxlen) {
4323 VERBOSITY(2, (LOG_WARNING, "insufficient tcp buffer, dropping connection"));
4324 cleanup_tcp_handler(data);
4325 return;
4326 }
4327
4328 buffer_set_limit(data->query->packet, data->query->tcplen);
4329 }
4330
4331 assert(buffer_remaining(data->query->packet) > 0);
4332
4333 /* Read the (remaining) query data. */
4334 if(!more_read_buf_tcp(fd, data, buffer_current(data->query->packet),
4335 buffer_remaining(data->query->packet), &received))
4336 return;
4337 data->bytes_transmitted += received;
4338 buffer_skip(data->query->packet, received);
4339 if (buffer_remaining(data->query->packet) > 0) {
4340 /*
4341 * Message not yet complete, wait for more data to
4342 * become available.
4343 */
4344 return;
4345 }
4346
4347 assert(buffer_position(data->query->packet) == data->query->tcplen);
4348
4349 /* Account... */
4350 #ifdef BIND8_STATS
4351 #ifndef INET6
4352 STATUP(data->nsd, ctcp);
4353 #else
4354 if (data->query->remote_addr.ss_family == AF_INET) {
4355 STATUP(data->nsd, ctcp);
4356 } else if (data->query->remote_addr.ss_family == AF_INET6) {
4357 STATUP(data->nsd, ctcp6);
4358 }
4359 #endif
4360 #endif /* BIND8_STATS */
4361
4362 /* We have a complete query, process it. */
4363
4364 /* tcp-query-count: handle query counter ++ */
4365 data->query_count++;
4366
4367 buffer_flip(data->query->packet);
4368 #ifdef USE_DNSTAP
4369 /*
4370 * and send TCP-query with found address (local) and client address to dnstap process
4371 */
4372 log_addr("query from client", &data->query->client_addr);
4373 log_addr("to server (local)", (void*)&data->socket->addr.ai_addr);
4374 if(verbosity >= 6 && data->query->is_proxied)
4375 log_addr("query via proxy", &data->query->remote_addr);
4376 dt_collector_submit_auth_query(data->nsd, (void*)&data->socket->addr.ai_addr, &data->query->client_addr,
4377 data->query->client_addrlen, data->query->tcp, data->query->packet);
4378 #endif /* USE_DNSTAP */
4379 data->query_state = server_process_query(data->nsd, data->query, &now);
4380 if (data->query_state == QUERY_DISCARDED) {
4381 /* Drop the packet and the entire connection... */
4382 STATUP(data->nsd, dropped);
4383 ZTATUP(data->nsd, data->query->zone, dropped);
4384 cleanup_tcp_handler(data);
4385 return;
4386 }
4387
4388 #ifdef BIND8_STATS
4389 if (RCODE(data->query->packet) == RCODE_OK
4390 && !AA(data->query->packet))
4391 {
4392 STATUP(data->nsd, nona);
4393 ZTATUP(data->nsd, data->query->zone, nona);
4394 }
4395 #endif /* BIND8_STATS */
4396
4397 #ifdef USE_ZONE_STATS
4398 #ifndef INET6
4399 ZTATUP(data->nsd, data->query->zone, ctcp);
4400 #else
4401 if (data->query->remote_addr.ss_family == AF_INET) {
4402 ZTATUP(data->nsd, data->query->zone, ctcp);
4403 } else if (data->query->remote_addr.ss_family == AF_INET6) {
4404 ZTATUP(data->nsd, data->query->zone, ctcp6);
4405 }
4406 #endif
4407 #endif /* USE_ZONE_STATS */
4408
4409 query_add_optional(data->query, data->nsd, &now);
4410
4411 /* Switch to the tcp write handler. */
4412 buffer_flip(data->query->packet);
4413 data->query->tcplen = buffer_remaining(data->query->packet);
4414 #ifdef BIND8_STATS
4415 /* Account the rcode & TC... */
4416 STATUP2(data->nsd, rcode, RCODE(data->query->packet));
4417 ZTATUP2(data->nsd, data->query->zone, rcode, RCODE(data->query->packet));
4418 if (TC(data->query->packet)) {
4419 STATUP(data->nsd, truncated);
4420 ZTATUP(data->nsd, data->query->zone, truncated);
4421 }
4422 #endif /* BIND8_STATS */
4423 #ifdef USE_DNSTAP
4424 /*
4425 * sending TCP-response with found (earlier) address (local) and client address to dnstap process
4426 */
4427 log_addr("from server (local)", (void*)&data->socket->addr.ai_addr);
4428 log_addr("response to client", &data->query->client_addr);
4429 if(verbosity >= 6 && data->query->is_proxied)
4430 log_addr("response via proxy", &data->query->remote_addr);
4431 dt_collector_submit_auth_response(data->nsd, (void*)&data->socket->addr.ai_addr, &data->query->client_addr,
4432 data->query->client_addrlen, data->query->tcp, data->query->packet,
4433 data->query->zone);
4434 #endif /* USE_DNSTAP */
4435 data->bytes_transmitted = 0;
4436
4437 timeout.tv_sec = data->tcp_timeout / 1000;
4438 timeout.tv_usec = (data->tcp_timeout % 1000)*1000;
4439
4440 ev_base = data->event.ev_base;
4441 event_del(&data->event);
4442 memset(&data->event, 0, sizeof(data->event));
4443 event_set(&data->event, fd, EV_PERSIST | EV_WRITE | EV_TIMEOUT,
4444 handle_tcp_writing, data);
4445 if(event_base_set(ev_base, &data->event) != 0)
4446 log_msg(LOG_ERR, "event base set tcpr failed");
4447 if(event_add(&data->event, &timeout) != 0)
4448 log_msg(LOG_ERR, "event add tcpr failed");
4449 /* see if we can write the answer right away(usually so,EAGAIN ifnot)*/
4450 handle_tcp_writing(fd, EV_WRITE, data);
4451 }
4452
4453 static void
handle_tcp_writing(int fd,short event,void * arg)4454 handle_tcp_writing(int fd, short event, void* arg)
4455 {
4456 struct tcp_handler_data *data = (struct tcp_handler_data *) arg;
4457 ssize_t sent;
4458 struct query *q = data->query;
4459 struct timeval timeout;
4460 struct event_base* ev_base;
4461 uint32_t now = 0;
4462
4463 if ((event & EV_TIMEOUT)) {
4464 /* Connection timed out. */
4465 cleanup_tcp_handler(data);
4466 return;
4467 }
4468
4469 assert((event & EV_WRITE));
4470
4471 if (data->bytes_transmitted < sizeof(q->tcplen)) {
4472 /* Writing the response packet length. */
4473 uint16_t n_tcplen = htons(q->tcplen);
4474 #ifdef HAVE_WRITEV
4475 struct iovec iov[2];
4476 iov[0].iov_base = (uint8_t*)&n_tcplen + data->bytes_transmitted;
4477 iov[0].iov_len = sizeof(n_tcplen) - data->bytes_transmitted;
4478 iov[1].iov_base = buffer_begin(q->packet);
4479 iov[1].iov_len = buffer_limit(q->packet);
4480 sent = writev(fd, iov, 2);
4481 #else /* HAVE_WRITEV */
4482 sent = write(fd,
4483 (const char *) &n_tcplen + data->bytes_transmitted,
4484 sizeof(n_tcplen) - data->bytes_transmitted);
4485 #endif /* HAVE_WRITEV */
4486 if (sent == -1) {
4487 if (errno == EAGAIN || errno == EINTR) {
4488 /*
4489 * Write would block, wait until
4490 * socket becomes writable again.
4491 */
4492 return;
4493 } else {
4494 #ifdef ECONNRESET
4495 if(verbosity >= 2 || errno != ECONNRESET)
4496 #endif /* ECONNRESET */
4497 #ifdef EPIPE
4498 if(verbosity >= 2 || errno != EPIPE)
4499 #endif /* EPIPE 'broken pipe' */
4500 log_msg(LOG_ERR, "failed writing to tcp: %s", strerror(errno));
4501 cleanup_tcp_handler(data);
4502 return;
4503 }
4504 }
4505
4506 data->bytes_transmitted += sent;
4507 if (data->bytes_transmitted < sizeof(q->tcplen)) {
4508 /*
4509 * Writing not complete, wait until socket
4510 * becomes writable again.
4511 */
4512 return;
4513 }
4514
4515 #ifdef HAVE_WRITEV
4516 sent -= sizeof(n_tcplen);
4517 /* handle potential 'packet done' code */
4518 goto packet_could_be_done;
4519 #endif
4520 }
4521
4522 sent = write(fd,
4523 buffer_current(q->packet),
4524 buffer_remaining(q->packet));
4525 if (sent == -1) {
4526 if (errno == EAGAIN || errno == EINTR) {
4527 /*
4528 * Write would block, wait until
4529 * socket becomes writable again.
4530 */
4531 return;
4532 } else {
4533 #ifdef ECONNRESET
4534 if(verbosity >= 2 || errno != ECONNRESET)
4535 #endif /* ECONNRESET */
4536 #ifdef EPIPE
4537 if(verbosity >= 2 || errno != EPIPE)
4538 #endif /* EPIPE 'broken pipe' */
4539 log_msg(LOG_ERR, "failed writing to tcp: %s", strerror(errno));
4540 cleanup_tcp_handler(data);
4541 return;
4542 }
4543 }
4544
4545 data->bytes_transmitted += sent;
4546 #ifdef HAVE_WRITEV
4547 packet_could_be_done:
4548 #endif
4549 buffer_skip(q->packet, sent);
4550 if (data->bytes_transmitted < q->tcplen + sizeof(q->tcplen)) {
4551 /*
4552 * Still more data to write when socket becomes
4553 * writable again.
4554 */
4555 return;
4556 }
4557
4558 assert(data->bytes_transmitted == q->tcplen + sizeof(q->tcplen));
4559
4560 if (data->query_state == QUERY_IN_AXFR ||
4561 data->query_state == QUERY_IN_IXFR) {
4562 /* Continue processing AXFR and writing back results. */
4563 buffer_clear(q->packet);
4564 if(data->query_state == QUERY_IN_AXFR)
4565 data->query_state = query_axfr(data->nsd, q, 0);
4566 else data->query_state = query_ixfr(data->nsd, q);
4567 if (data->query_state != QUERY_PROCESSED) {
4568 query_add_optional(data->query, data->nsd, &now);
4569
4570 /* Reset data. */
4571 buffer_flip(q->packet);
4572 q->tcplen = buffer_remaining(q->packet);
4573 data->bytes_transmitted = 0;
4574 /* Reset timeout. */
4575 timeout.tv_sec = data->tcp_timeout / 1000;
4576 timeout.tv_usec = (data->tcp_timeout % 1000)*1000;
4577 ev_base = data->event.ev_base;
4578 event_del(&data->event);
4579 memset(&data->event, 0, sizeof(data->event));
4580 event_set(&data->event, fd, EV_PERSIST | EV_WRITE | EV_TIMEOUT,
4581 handle_tcp_writing, data);
4582 if(event_base_set(ev_base, &data->event) != 0)
4583 log_msg(LOG_ERR, "event base set tcpw failed");
4584 if(event_add(&data->event, &timeout) != 0)
4585 log_msg(LOG_ERR, "event add tcpw failed");
4586
4587 /*
4588 * Write data if/when the socket is writable
4589 * again.
4590 */
4591 return;
4592 }
4593 }
4594
4595 /*
4596 * Done sending, wait for the next request to arrive on the
4597 * TCP socket by installing the TCP read handler.
4598 */
4599 if ((data->nsd->tcp_query_count > 0 &&
4600 data->query_count >= data->nsd->tcp_query_count) ||
4601 data->tcp_no_more_queries) {
4602
4603 (void) shutdown(fd, SHUT_WR);
4604 }
4605
4606 data->bytes_transmitted = 0;
4607 data->query_needs_reset = 1;
4608
4609 timeout.tv_sec = data->tcp_timeout / 1000;
4610 timeout.tv_usec = (data->tcp_timeout % 1000)*1000;
4611 ev_base = data->event.ev_base;
4612 event_del(&data->event);
4613 memset(&data->event, 0, sizeof(data->event));
4614 event_set(&data->event, fd, EV_PERSIST | EV_READ | EV_TIMEOUT,
4615 handle_tcp_reading, data);
4616 if(event_base_set(ev_base, &data->event) != 0)
4617 log_msg(LOG_ERR, "event base set tcpw failed");
4618 if(event_add(&data->event, &timeout) != 0)
4619 log_msg(LOG_ERR, "event add tcpw failed");
4620 }
4621
4622 #ifdef HAVE_SSL
4623 /** create SSL object and associate fd */
4624 static SSL*
incoming_ssl_fd(SSL_CTX * ctx,int fd)4625 incoming_ssl_fd(SSL_CTX* ctx, int fd)
4626 {
4627 SSL* ssl = SSL_new((SSL_CTX*)ctx);
4628 if(!ssl) {
4629 log_crypto_err("could not SSL_new");
4630 return NULL;
4631 }
4632 SSL_set_accept_state(ssl);
4633 (void)SSL_set_mode(ssl, SSL_MODE_AUTO_RETRY);
4634 if(!SSL_set_fd(ssl, fd)) {
4635 log_crypto_err("could not SSL_set_fd");
4636 SSL_free(ssl);
4637 return NULL;
4638 }
4639 return ssl;
4640 }
4641
4642 /** TLS handshake to upgrade TCP connection */
4643 static int
tls_handshake(struct tcp_handler_data * data,int fd,int writing)4644 tls_handshake(struct tcp_handler_data* data, int fd, int writing)
4645 {
4646 int r;
4647 if(data->shake_state == tls_hs_read_event) {
4648 /* read condition satisfied back to writing */
4649 tcp_handler_setup_event(data, handle_tls_writing, fd, EV_PERSIST|EV_TIMEOUT|EV_WRITE);
4650 data->shake_state = tls_hs_none;
4651 return 1;
4652 }
4653 if(data->shake_state == tls_hs_write_event) {
4654 /* write condition satisfied back to reading */
4655 tcp_handler_setup_event(data, handle_tls_reading, fd, EV_PERSIST|EV_TIMEOUT|EV_READ);
4656 data->shake_state = tls_hs_none;
4657 return 1;
4658 }
4659
4660 /* (continue to) setup the TLS connection */
4661 ERR_clear_error();
4662 r = SSL_do_handshake(data->tls);
4663
4664 if(r != 1) {
4665 int want = SSL_get_error(data->tls, r);
4666 if(want == SSL_ERROR_WANT_READ) {
4667 if(data->shake_state == tls_hs_read) {
4668 /* try again later */
4669 return 1;
4670 }
4671 data->shake_state = tls_hs_read;
4672 /* switch back to reading mode */
4673 tcp_handler_setup_event(data, handle_tls_reading, fd, EV_PERSIST|EV_TIMEOUT|EV_READ);
4674 return 1;
4675 } else if(want == SSL_ERROR_WANT_WRITE) {
4676 if(data->shake_state == tls_hs_write) {
4677 /* try again later */
4678 return 1;
4679 }
4680 data->shake_state = tls_hs_write;
4681 /* switch back to writing mode */
4682 tcp_handler_setup_event(data, handle_tls_writing, fd, EV_PERSIST|EV_TIMEOUT|EV_WRITE);
4683 return 1;
4684 } else {
4685 if(r == 0)
4686 VERBOSITY(3, (LOG_ERR, "TLS handshake: connection closed prematurely"));
4687 else {
4688 unsigned long err = ERR_get_error();
4689 if(!squelch_err_ssl_handshake(err)) {
4690 char a[64], s[256];
4691 addr2str(&data->query->remote_addr, a, sizeof(a));
4692 snprintf(s, sizeof(s), "TLS handshake failed from %s", a);
4693 log_crypto_from_err(s, err);
4694 }
4695 }
4696 cleanup_tcp_handler(data);
4697 return 0;
4698 }
4699 }
4700
4701 /* Use to log successful upgrade for testing - could be removed*/
4702 VERBOSITY(3, (LOG_INFO, "TLS handshake succeeded."));
4703 /* set back to the event we need to have when reading (or writing) */
4704 if(data->shake_state == tls_hs_read && writing) {
4705 tcp_handler_setup_event(data, handle_tls_writing, fd, EV_PERSIST|EV_TIMEOUT|EV_WRITE);
4706 } else if(data->shake_state == tls_hs_write && !writing) {
4707 tcp_handler_setup_event(data, handle_tls_reading, fd, EV_PERSIST|EV_TIMEOUT|EV_READ);
4708 }
4709 data->shake_state = tls_hs_none;
4710 return 1;
4711 }
4712
4713 /* Read more data into the buffer for tls read. Pass the amount of additional
4714 * data required. Returns false if nothing needs to be done this event, or
4715 * true if the additional data is in the buffer. */
4716 static int
more_read_buf_tls(int fd,struct tcp_handler_data * data,void * bufpos,size_t add_amount,ssize_t * received)4717 more_read_buf_tls(int fd, struct tcp_handler_data* data, void* bufpos,
4718 size_t add_amount, ssize_t* received)
4719 {
4720 ERR_clear_error();
4721 if((*received=SSL_read(data->tls, bufpos, add_amount)) <= 0) {
4722 int want = SSL_get_error(data->tls, *received);
4723 if(want == SSL_ERROR_ZERO_RETURN) {
4724 cleanup_tcp_handler(data);
4725 return 0; /* shutdown, closed */
4726 } else if(want == SSL_ERROR_WANT_READ) {
4727 /* wants to be called again */
4728 return 0;
4729 }
4730 else if(want == SSL_ERROR_WANT_WRITE) {
4731 /* switch to writing */
4732 data->shake_state = tls_hs_write_event;
4733 tcp_handler_setup_event(data, handle_tls_writing, fd, EV_PERSIST | EV_WRITE | EV_TIMEOUT);
4734 return 0;
4735 }
4736 cleanup_tcp_handler(data);
4737 log_crypto_err("could not SSL_read");
4738 return 0;
4739 }
4740 return 1;
4741 }
4742
4743 /** handle TLS reading of incoming query */
4744 static void
handle_tls_reading(int fd,short event,void * arg)4745 handle_tls_reading(int fd, short event, void* arg)
4746 {
4747 struct tcp_handler_data *data = (struct tcp_handler_data *) arg;
4748 ssize_t received;
4749 uint32_t now = 0;
4750
4751 if ((event & EV_TIMEOUT)) {
4752 /* Connection timed out. */
4753 cleanup_tcp_handler(data);
4754 return;
4755 }
4756
4757 if ((data->nsd->tcp_query_count > 0 &&
4758 data->query_count >= data->nsd->tcp_query_count) ||
4759 (data->query_count > 0 && data->tcp_no_more_queries))
4760 {
4761 /* No more queries allowed on this tcp connection. */
4762 cleanup_tcp_handler(data);
4763 return;
4764 }
4765
4766 assert((event & EV_READ));
4767
4768 if (data->bytes_transmitted == 0 && data->query_needs_reset) {
4769 query_reset(data->query, TCP_MAX_MESSAGE_LEN, 1);
4770 data->query_needs_reset = 0;
4771 }
4772
4773 if(data->shake_state != tls_hs_none) {
4774 if(!tls_handshake(data, fd, 0))
4775 return;
4776 if(data->shake_state != tls_hs_none)
4777 return;
4778 }
4779
4780 if(data->pp2_enabled && data->pp2_header_state != pp2_header_done) {
4781 struct pp2_header* header = NULL;
4782 size_t want_read_size = 0;
4783 size_t current_read_size = 0;
4784 if(data->pp2_header_state == pp2_header_none) {
4785 want_read_size = PP2_HEADER_SIZE;
4786 if(buffer_remaining(data->query->packet) <
4787 want_read_size) {
4788 VERBOSITY(6, (LOG_ERR, "proxy-protocol: not enough buffer size to read PROXYv2 header"));
4789 cleanup_tcp_handler(data);
4790 return;
4791 }
4792 VERBOSITY(6, (LOG_INFO, "proxy-protocol: reading fixed part of PROXYv2 header (len %lu)", (unsigned long)want_read_size));
4793 current_read_size = want_read_size;
4794 if(data->bytes_transmitted < current_read_size) {
4795 if(!more_read_buf_tls(fd, data,
4796 buffer_at(data->query->packet,
4797 data->bytes_transmitted),
4798 current_read_size - data->bytes_transmitted,
4799 &received))
4800 return;
4801 data->bytes_transmitted += received;
4802 buffer_skip(data->query->packet, received);
4803 if(data->bytes_transmitted != current_read_size)
4804 return;
4805 data->pp2_header_state = pp2_header_init;
4806 }
4807 }
4808 if(data->pp2_header_state == pp2_header_init) {
4809 int err;
4810 err = pp2_read_header(buffer_begin(data->query->packet),
4811 buffer_limit(data->query->packet));
4812 if(err) {
4813 VERBOSITY(6, (LOG_ERR, "proxy-protocol: could not parse PROXYv2 header: %s", pp_lookup_error(err)));
4814 cleanup_tcp_handler(data);
4815 return;
4816 }
4817 header = (struct pp2_header*)buffer_begin(data->query->packet);
4818 want_read_size = ntohs(header->len);
4819 if(buffer_limit(data->query->packet) <
4820 PP2_HEADER_SIZE + want_read_size) {
4821 VERBOSITY(6, (LOG_ERR, "proxy-protocol: not enough buffer size to read PROXYv2 header"));
4822 cleanup_tcp_handler(data);
4823 return;
4824 }
4825 VERBOSITY(6, (LOG_INFO, "proxy-protocol: reading variable part of PROXYv2 header (len %lu)", (unsigned long)want_read_size));
4826 current_read_size = PP2_HEADER_SIZE + want_read_size;
4827 if(want_read_size == 0) {
4828 /* nothing more to read; header is complete */
4829 data->pp2_header_state = pp2_header_done;
4830 } else if(data->bytes_transmitted < current_read_size) {
4831 if(!more_read_buf_tls(fd, data,
4832 buffer_at(data->query->packet,
4833 data->bytes_transmitted),
4834 current_read_size - data->bytes_transmitted,
4835 &received))
4836 return;
4837 data->bytes_transmitted += received;
4838 buffer_skip(data->query->packet, received);
4839 if(data->bytes_transmitted != current_read_size)
4840 return;
4841 data->pp2_header_state = pp2_header_done;
4842 }
4843 }
4844 if(data->pp2_header_state != pp2_header_done || !header) {
4845 VERBOSITY(6, (LOG_ERR, "proxy-protocol: wrong state for the PROXYv2 header"));
4846 cleanup_tcp_handler(data);
4847 return;
4848 }
4849 buffer_flip(data->query->packet);
4850 if(!consume_pp2_header(data->query->packet, data->query, 1)) {
4851 VERBOSITY(6, (LOG_ERR, "proxy-protocol: could not consume PROXYv2 header"));
4852 cleanup_tcp_handler(data);
4853 return;
4854 }
4855 /* Clear and reset the buffer to read the following
4856 * DNS packet(s). */
4857 buffer_clear(data->query->packet);
4858 data->bytes_transmitted = 0;
4859 }
4860 /*
4861 * Check if we received the leading packet length bytes yet.
4862 */
4863 if(data->bytes_transmitted < sizeof(uint16_t)) {
4864 if(!more_read_buf_tls(fd, data,
4865 (char *) &data->query->tcplen + data->bytes_transmitted,
4866 sizeof(uint16_t) - data->bytes_transmitted, &received))
4867 return;
4868 data->bytes_transmitted += received;
4869 if (data->bytes_transmitted < sizeof(uint16_t)) {
4870 /*
4871 * Not done with the tcplen yet, wait for more
4872 * data to become available.
4873 */
4874 return;
4875 }
4876
4877 assert(data->bytes_transmitted == sizeof(uint16_t));
4878
4879 data->query->tcplen = ntohs(data->query->tcplen);
4880
4881 /*
4882 * Minimum query size is:
4883 *
4884 * Size of the header (12)
4885 * + Root domain name (1)
4886 * + Query class (2)
4887 * + Query type (2)
4888 */
4889 if (data->query->tcplen < QHEADERSZ + 1 + sizeof(uint16_t) + sizeof(uint16_t)) {
4890 VERBOSITY(2, (LOG_WARNING, "packet too small, dropping tcp connection"));
4891 cleanup_tcp_handler(data);
4892 return;
4893 }
4894
4895 if (data->query->tcplen > data->query->maxlen) {
4896 VERBOSITY(2, (LOG_WARNING, "insufficient tcp buffer, dropping connection"));
4897 cleanup_tcp_handler(data);
4898 return;
4899 }
4900
4901 buffer_set_limit(data->query->packet, data->query->tcplen);
4902 }
4903
4904 assert(buffer_remaining(data->query->packet) > 0);
4905
4906 /* Read the (remaining) query data. */
4907 if(!more_read_buf_tls(fd, data, buffer_current(data->query->packet),
4908 buffer_remaining(data->query->packet), &received))
4909 return;
4910 data->bytes_transmitted += received;
4911 buffer_skip(data->query->packet, received);
4912 if (buffer_remaining(data->query->packet) > 0) {
4913 /*
4914 * Message not yet complete, wait for more data to
4915 * become available.
4916 */
4917 return;
4918 }
4919
4920 assert(buffer_position(data->query->packet) == data->query->tcplen);
4921
4922 /* Account... */
4923 #ifndef INET6
4924 STATUP(data->nsd, ctls);
4925 #else
4926 if (data->query->remote_addr.ss_family == AF_INET) {
4927 STATUP(data->nsd, ctls);
4928 } else if (data->query->remote_addr.ss_family == AF_INET6) {
4929 STATUP(data->nsd, ctls6);
4930 }
4931 #endif
4932
4933 /* We have a complete query, process it. */
4934
4935 /* tcp-query-count: handle query counter ++ */
4936 data->query_count++;
4937
4938 buffer_flip(data->query->packet);
4939 #ifdef USE_DNSTAP
4940 /*
4941 * and send TCP-query with found address (local) and client address to dnstap process
4942 */
4943 log_addr("query from client", &data->query->client_addr);
4944 log_addr("to server (local)", (void*)&data->socket->addr.ai_addr);
4945 if(verbosity >= 6 && data->query->is_proxied)
4946 log_addr("query via proxy", &data->query->remote_addr);
4947 dt_collector_submit_auth_query(data->nsd, (void*)&data->socket->addr.ai_addr, &data->query->client_addr,
4948 data->query->client_addrlen, data->query->tcp, data->query->packet);
4949 #endif /* USE_DNSTAP */
4950 data->query_state = server_process_query(data->nsd, data->query, &now);
4951 if (data->query_state == QUERY_DISCARDED) {
4952 /* Drop the packet and the entire connection... */
4953 STATUP(data->nsd, dropped);
4954 ZTATUP(data->nsd, data->query->zone, dropped);
4955 cleanup_tcp_handler(data);
4956 return;
4957 }
4958
4959 #ifdef BIND8_STATS
4960 if (RCODE(data->query->packet) == RCODE_OK
4961 && !AA(data->query->packet))
4962 {
4963 STATUP(data->nsd, nona);
4964 ZTATUP(data->nsd, data->query->zone, nona);
4965 }
4966 #endif /* BIND8_STATS */
4967
4968 #ifdef USE_ZONE_STATS
4969 #ifndef INET6
4970 ZTATUP(data->nsd, data->query->zone, ctls);
4971 #else
4972 if (data->query->remote_addr.ss_family == AF_INET) {
4973 ZTATUP(data->nsd, data->query->zone, ctls);
4974 } else if (data->query->remote_addr.ss_family == AF_INET6) {
4975 ZTATUP(data->nsd, data->query->zone, ctls6);
4976 }
4977 #endif
4978 #endif /* USE_ZONE_STATS */
4979
4980 query_add_optional(data->query, data->nsd, &now);
4981
4982 /* Switch to the tcp write handler. */
4983 buffer_flip(data->query->packet);
4984 data->query->tcplen = buffer_remaining(data->query->packet);
4985 #ifdef BIND8_STATS
4986 /* Account the rcode & TC... */
4987 STATUP2(data->nsd, rcode, RCODE(data->query->packet));
4988 ZTATUP2(data->nsd, data->query->zone, rcode, RCODE(data->query->packet));
4989 if (TC(data->query->packet)) {
4990 STATUP(data->nsd, truncated);
4991 ZTATUP(data->nsd, data->query->zone, truncated);
4992 }
4993 #endif /* BIND8_STATS */
4994 #ifdef USE_DNSTAP
4995 /*
4996 * sending TCP-response with found (earlier) address (local) and client address to dnstap process
4997 */
4998 log_addr("from server (local)", (void*)&data->socket->addr.ai_addr);
4999 log_addr("response to client", &data->query->client_addr);
5000 if(verbosity >= 6 && data->query->is_proxied)
5001 log_addr("response via proxy", &data->query->remote_addr);
5002 dt_collector_submit_auth_response(data->nsd, (void*)&data->socket->addr.ai_addr, &data->query->client_addr,
5003 data->query->client_addrlen, data->query->tcp, data->query->packet,
5004 data->query->zone);
5005 #endif /* USE_DNSTAP */
5006 data->bytes_transmitted = 0;
5007
5008 tcp_handler_setup_event(data, handle_tls_writing, fd, EV_PERSIST | EV_WRITE | EV_TIMEOUT);
5009
5010 /* see if we can write the answer right away(usually so,EAGAIN ifnot)*/
5011 handle_tls_writing(fd, EV_WRITE, data);
5012 }
5013
5014 /** handle TLS writing of outgoing response */
5015 static void
handle_tls_writing(int fd,short event,void * arg)5016 handle_tls_writing(int fd, short event, void* arg)
5017 {
5018 struct tcp_handler_data *data = (struct tcp_handler_data *) arg;
5019 ssize_t sent;
5020 struct query *q = data->query;
5021 /* static variable that holds reassembly buffer used to put the
5022 * TCP length in front of the packet, like writev. */
5023 static buffer_type* global_tls_temp_buffer = NULL;
5024 buffer_type* write_buffer;
5025 uint32_t now = 0;
5026
5027 if ((event & EV_TIMEOUT)) {
5028 /* Connection timed out. */
5029 cleanup_tcp_handler(data);
5030 return;
5031 }
5032
5033 assert((event & EV_WRITE));
5034
5035 if(data->shake_state != tls_hs_none) {
5036 if(!tls_handshake(data, fd, 1))
5037 return;
5038 if(data->shake_state != tls_hs_none)
5039 return;
5040 }
5041
5042 (void)SSL_set_mode(data->tls, SSL_MODE_ENABLE_PARTIAL_WRITE);
5043
5044 /* If we are writing the start of a message, we must include the length
5045 * this is done with a copy into write_buffer. */
5046 write_buffer = NULL;
5047 if (data->bytes_transmitted == 0) {
5048 if(!global_tls_temp_buffer) {
5049 /* gets deallocated when nsd shuts down from
5050 * nsd.region */
5051 global_tls_temp_buffer = buffer_create(nsd.region,
5052 QIOBUFSZ + sizeof(q->tcplen));
5053 if (!global_tls_temp_buffer) {
5054 return;
5055 }
5056 }
5057 write_buffer = global_tls_temp_buffer;
5058 buffer_clear(write_buffer);
5059 buffer_write_u16(write_buffer, q->tcplen);
5060 buffer_write(write_buffer, buffer_current(q->packet),
5061 (int)buffer_remaining(q->packet));
5062 buffer_flip(write_buffer);
5063 } else {
5064 write_buffer = q->packet;
5065 }
5066
5067 /* Write the response */
5068 ERR_clear_error();
5069 sent = SSL_write(data->tls, buffer_current(write_buffer), buffer_remaining(write_buffer));
5070 if(sent <= 0) {
5071 int want = SSL_get_error(data->tls, sent);
5072 if(want == SSL_ERROR_ZERO_RETURN) {
5073 cleanup_tcp_handler(data);
5074 /* closed */
5075 } else if(want == SSL_ERROR_WANT_READ) {
5076 /* switch back to reading */
5077 data->shake_state = tls_hs_read_event;
5078 tcp_handler_setup_event(data, handle_tls_reading, fd, EV_PERSIST | EV_READ | EV_TIMEOUT);
5079 } else if(want != SSL_ERROR_WANT_WRITE) {
5080 cleanup_tcp_handler(data);
5081 log_crypto_err("could not SSL_write");
5082 }
5083 return;
5084 }
5085
5086 buffer_skip(write_buffer, sent);
5087 if(buffer_remaining(write_buffer) != 0) {
5088 /* If not all sent, sync up the real buffer if it wasn't used.*/
5089 if (data->bytes_transmitted == 0 && (ssize_t)sent > (ssize_t)sizeof(q->tcplen)) {
5090 buffer_skip(q->packet, (ssize_t)sent - (ssize_t)sizeof(q->tcplen));
5091 }
5092 }
5093
5094 data->bytes_transmitted += sent;
5095 if (data->bytes_transmitted < q->tcplen + sizeof(q->tcplen)) {
5096 /*
5097 * Still more data to write when socket becomes
5098 * writable again.
5099 */
5100 return;
5101 }
5102
5103 assert(data->bytes_transmitted == q->tcplen + sizeof(q->tcplen));
5104
5105 if (data->query_state == QUERY_IN_AXFR ||
5106 data->query_state == QUERY_IN_IXFR) {
5107 /* Continue processing AXFR and writing back results. */
5108 buffer_clear(q->packet);
5109 if(data->query_state == QUERY_IN_AXFR)
5110 data->query_state = query_axfr(data->nsd, q, 0);
5111 else data->query_state = query_ixfr(data->nsd, q);
5112 if (data->query_state != QUERY_PROCESSED) {
5113 query_add_optional(data->query, data->nsd, &now);
5114
5115 /* Reset data. */
5116 buffer_flip(q->packet);
5117 q->tcplen = buffer_remaining(q->packet);
5118 data->bytes_transmitted = 0;
5119 /* Reset to writing mode. */
5120 tcp_handler_setup_event(data, handle_tls_writing, fd, EV_PERSIST | EV_WRITE | EV_TIMEOUT);
5121
5122 /*
5123 * Write data if/when the socket is writable
5124 * again.
5125 */
5126 return;
5127 }
5128 }
5129
5130 /*
5131 * Done sending, wait for the next request to arrive on the
5132 * TCP socket by installing the TCP read handler.
5133 */
5134 if ((data->nsd->tcp_query_count > 0 &&
5135 data->query_count >= data->nsd->tcp_query_count) ||
5136 data->tcp_no_more_queries) {
5137
5138 (void) shutdown(fd, SHUT_WR);
5139 }
5140
5141 data->bytes_transmitted = 0;
5142 data->query_needs_reset = 1;
5143
5144 tcp_handler_setup_event(data, handle_tls_reading, fd, EV_PERSIST | EV_READ | EV_TIMEOUT);
5145 }
5146 #endif
5147
5148 static void
handle_slowaccept_timeout(int ATTR_UNUSED (fd),short ATTR_UNUSED (event),void * ATTR_UNUSED (arg))5149 handle_slowaccept_timeout(int ATTR_UNUSED(fd), short ATTR_UNUSED(event),
5150 void* ATTR_UNUSED(arg))
5151 {
5152 if(slowaccept) {
5153 configure_handler_event_types(EV_PERSIST | EV_READ);
5154 slowaccept = 0;
5155 }
5156 }
5157
perform_accept(int fd,struct sockaddr * addr,socklen_t * addrlen)5158 static int perform_accept(int fd, struct sockaddr *addr, socklen_t *addrlen)
5159 {
5160 #ifndef HAVE_ACCEPT4
5161 int s = accept(fd, addr, addrlen);
5162 if (s != -1) {
5163 if (fcntl(s, F_SETFL, O_NONBLOCK) == -1) {
5164 log_msg(LOG_ERR, "fcntl failed: %s", strerror(errno));
5165 close(s);
5166 s = -1;
5167 errno=EINTR; /* stop error printout as error in accept4
5168 by setting this errno, it omits printout, in
5169 later code that calls nsd_accept4 */
5170 }
5171 }
5172 return s;
5173 #else
5174 return accept4(fd, addr, addrlen, SOCK_NONBLOCK);
5175 #endif /* HAVE_ACCEPT4 */
5176 }
5177
5178 /*
5179 * Handle an incoming TCP connection. The connection is accepted and
5180 * a new TCP reader event handler is added. The TCP handler
5181 * is responsible for cleanup when the connection is closed.
5182 */
5183 static void
handle_tcp_accept(int fd,short event,void * arg)5184 handle_tcp_accept(int fd, short event, void* arg)
5185 {
5186 struct tcp_accept_handler_data *data
5187 = (struct tcp_accept_handler_data *) arg;
5188 int s;
5189 int reject = 0;
5190 struct tcp_handler_data *tcp_data;
5191 region_type *tcp_region;
5192 #ifdef INET6
5193 struct sockaddr_storage addr;
5194 #else
5195 struct sockaddr_in addr;
5196 #endif
5197 socklen_t addrlen;
5198 struct timeval timeout;
5199
5200 if (!(event & EV_READ)) {
5201 return;
5202 }
5203
5204 if (data->nsd->current_tcp_count >= data->nsd->maximum_tcp_count) {
5205 reject = data->nsd->options->tcp_reject_overflow;
5206 if (!reject) {
5207 return;
5208 }
5209 }
5210
5211 /* Accept it... */
5212 addrlen = sizeof(addr);
5213 s = perform_accept(fd, (struct sockaddr *) &addr, &addrlen);
5214 if (s == -1) {
5215 /**
5216 * EMFILE and ENFILE is a signal that the limit of open
5217 * file descriptors has been reached. Pause accept().
5218 * EINTR is a signal interrupt. The others are various OS ways
5219 * of saying that the client has closed the connection.
5220 */
5221 if (errno == EMFILE || errno == ENFILE) {
5222 if (!slowaccept) {
5223 /* disable accept events */
5224 struct timeval tv;
5225 configure_handler_event_types(0);
5226 tv.tv_sec = SLOW_ACCEPT_TIMEOUT;
5227 tv.tv_usec = 0L;
5228 memset(&slowaccept_event, 0,
5229 sizeof(slowaccept_event));
5230 event_set(&slowaccept_event, -1, EV_TIMEOUT,
5231 handle_slowaccept_timeout, NULL);
5232 (void)event_base_set(data->event.ev_base,
5233 &slowaccept_event);
5234 (void)event_add(&slowaccept_event, &tv);
5235 slowaccept = 1;
5236 /* We don't want to spam the logs here */
5237 }
5238 } else if (errno != EINTR
5239 && errno != EWOULDBLOCK
5240 #ifdef ECONNABORTED
5241 && errno != ECONNABORTED
5242 #endif /* ECONNABORTED */
5243 #ifdef EPROTO
5244 && errno != EPROTO
5245 #endif /* EPROTO */
5246 ) {
5247 log_msg(LOG_ERR, "accept failed: %s", strerror(errno));
5248 }
5249 return;
5250 }
5251
5252 if (reject) {
5253 shutdown(s, SHUT_RDWR);
5254 close(s);
5255 return;
5256 }
5257
5258 /*
5259 * This region is deallocated when the TCP connection is
5260 * closed by the TCP handler.
5261 */
5262 tcp_region = region_create(xalloc, free);
5263 tcp_data = (struct tcp_handler_data *) region_alloc(
5264 tcp_region, sizeof(struct tcp_handler_data));
5265 tcp_data->region = tcp_region;
5266 tcp_data->query = query_create(tcp_region, compressed_dname_offsets,
5267 compression_table_size, compressed_dnames);
5268 tcp_data->nsd = data->nsd;
5269 tcp_data->query_count = 0;
5270 #ifdef HAVE_SSL
5271 tcp_data->shake_state = tls_hs_none;
5272 tcp_data->tls = NULL;
5273 #endif
5274 tcp_data->query_needs_reset = 1;
5275 tcp_data->pp2_enabled = data->pp2_enabled;
5276 tcp_data->pp2_header_state = pp2_header_none;
5277 tcp_data->prev = NULL;
5278 tcp_data->next = NULL;
5279
5280 tcp_data->query_state = QUERY_PROCESSED;
5281 tcp_data->bytes_transmitted = 0;
5282 memcpy(&tcp_data->query->remote_addr, &addr, addrlen);
5283 tcp_data->query->remote_addrlen = addrlen;
5284 /* Copy remote_address to client_address.
5285 * Simplest way/time for streams to do that. */
5286 memcpy(&tcp_data->query->client_addr, &addr, addrlen);
5287 tcp_data->query->client_addrlen = addrlen;
5288 tcp_data->query->is_proxied = 0;
5289
5290 tcp_data->tcp_no_more_queries = 0;
5291 tcp_data->tcp_timeout = data->nsd->tcp_timeout * 1000;
5292 if (data->nsd->current_tcp_count > data->nsd->maximum_tcp_count/2) {
5293 /* very busy, give smaller timeout */
5294 tcp_data->tcp_timeout = 200;
5295 }
5296 memset(&tcp_data->event, 0, sizeof(tcp_data->event));
5297 timeout.tv_sec = tcp_data->tcp_timeout / 1000;
5298 timeout.tv_usec = (tcp_data->tcp_timeout % 1000)*1000;
5299
5300 #ifdef USE_DNSTAP
5301 /* save the address of the connection */
5302 tcp_data->socket = data->socket;
5303 #endif /* USE_DNSTAP */
5304
5305 #ifdef HAVE_SSL
5306 if (data->tls_accept) {
5307 tcp_data->tls = incoming_ssl_fd(tcp_data->nsd->tls_ctx, s);
5308 if(!tcp_data->tls) {
5309 close(s);
5310 return;
5311 }
5312 tcp_data->shake_state = tls_hs_read;
5313 memset(&tcp_data->event, 0, sizeof(tcp_data->event));
5314 event_set(&tcp_data->event, s, EV_PERSIST | EV_READ | EV_TIMEOUT,
5315 handle_tls_reading, tcp_data);
5316 } else {
5317 #endif
5318 memset(&tcp_data->event, 0, sizeof(tcp_data->event));
5319 event_set(&tcp_data->event, s, EV_PERSIST | EV_READ | EV_TIMEOUT,
5320 handle_tcp_reading, tcp_data);
5321 #ifdef HAVE_SSL
5322 }
5323 #endif
5324 if(event_base_set(data->event.ev_base, &tcp_data->event) != 0) {
5325 log_msg(LOG_ERR, "cannot set tcp event base");
5326 close(s);
5327 region_destroy(tcp_region);
5328 return;
5329 }
5330 if(event_add(&tcp_data->event, &timeout) != 0) {
5331 log_msg(LOG_ERR, "cannot add tcp to event base");
5332 close(s);
5333 region_destroy(tcp_region);
5334 return;
5335 }
5336 if(tcp_active_list) {
5337 tcp_active_list->prev = tcp_data;
5338 tcp_data->next = tcp_active_list;
5339 }
5340 tcp_active_list = tcp_data;
5341
5342 /*
5343 * Keep track of the total number of TCP handlers installed so
5344 * we can stop accepting connections when the maximum number
5345 * of simultaneous TCP connections is reached.
5346 *
5347 * If tcp-reject-overflow is enabled, however, then we do not
5348 * change the handler event type; we keep it as-is and accept
5349 * overflow TCP connections only so that we can forcibly kill
5350 * them off.
5351 */
5352 ++data->nsd->current_tcp_count;
5353 if (!data->nsd->options->tcp_reject_overflow &&
5354 data->nsd->current_tcp_count == data->nsd->maximum_tcp_count)
5355 {
5356 configure_handler_event_types(0);
5357 }
5358 }
5359
5360 static void
send_children_command(struct nsd * nsd,sig_atomic_t command,int timeout)5361 send_children_command(struct nsd* nsd, sig_atomic_t command, int timeout)
5362 {
5363 size_t i;
5364 assert(nsd->server_kind == NSD_SERVER_MAIN && nsd->this_child == 0);
5365 for (i = 0; i < nsd->child_count; ++i) {
5366 if (nsd->children[i].pid > 0 && nsd->children[i].child_fd != -1) {
5367 if (write(nsd->children[i].child_fd,
5368 &command,
5369 sizeof(command)) == -1)
5370 {
5371 if(errno != EAGAIN && errno != EINTR)
5372 log_msg(LOG_ERR, "problems sending command %d to server %d: %s",
5373 (int) command,
5374 (int) nsd->children[i].pid,
5375 strerror(errno));
5376 } else if (timeout > 0) {
5377 (void)block_read(NULL,
5378 nsd->children[i].child_fd,
5379 &command, sizeof(command), timeout);
5380 }
5381 fsync(nsd->children[i].child_fd);
5382 close(nsd->children[i].child_fd);
5383 nsd->children[i].child_fd = -1;
5384 }
5385 }
5386 }
5387
5388 static void
send_children_quit(struct nsd * nsd)5389 send_children_quit(struct nsd* nsd)
5390 {
5391 DEBUG(DEBUG_IPC, 1, (LOG_INFO, "send children quit"));
5392 send_children_command(nsd, NSD_QUIT, 0);
5393 }
5394
5395 static void
send_children_quit_and_wait(struct nsd * nsd)5396 send_children_quit_and_wait(struct nsd* nsd)
5397 {
5398 DEBUG(DEBUG_IPC, 1, (LOG_INFO, "send children quit and wait"));
5399 send_children_command(nsd, NSD_QUIT_CHILD, 3);
5400 }
5401
5402 #ifdef BIND8_STATS
5403 static void
set_children_stats(struct nsd * nsd)5404 set_children_stats(struct nsd* nsd)
5405 {
5406 size_t i;
5407 assert(nsd->server_kind == NSD_SERVER_MAIN && nsd->this_child == 0);
5408 DEBUG(DEBUG_IPC, 1, (LOG_INFO, "parent set stats to send to children"));
5409 for (i = 0; i < nsd->child_count; ++i) {
5410 nsd->children[i].need_to_send_STATS = 1;
5411 nsd->children[i].handler->event_types |= NETIO_EVENT_WRITE;
5412 }
5413 }
5414 #endif /* BIND8_STATS */
5415
5416 static void
configure_handler_event_types(short event_types)5417 configure_handler_event_types(short event_types)
5418 {
5419 size_t i;
5420
5421 for (i = 0; i < tcp_accept_handler_count; ++i) {
5422 struct event* handler = &tcp_accept_handlers[i].event;
5423 if(event_types) {
5424 /* reassign */
5425 int fd = handler->ev_fd;
5426 struct event_base* base = handler->ev_base;
5427 if(tcp_accept_handlers[i].event_added)
5428 event_del(handler);
5429 memset(handler, 0, sizeof(*handler));
5430 event_set(handler, fd, event_types,
5431 handle_tcp_accept, &tcp_accept_handlers[i]);
5432 if(event_base_set(base, handler) != 0)
5433 log_msg(LOG_ERR, "conhand: cannot event_base");
5434 if(event_add(handler, NULL) != 0)
5435 log_msg(LOG_ERR, "conhand: cannot event_add");
5436 tcp_accept_handlers[i].event_added = 1;
5437 } else {
5438 /* remove */
5439 if(tcp_accept_handlers[i].event_added) {
5440 event_del(handler);
5441 tcp_accept_handlers[i].event_added = 0;
5442 }
5443 }
5444 }
5445 }
5446