1 /* 2 * server.c -- nsd(8) network input/output 3 * 4 * Copyright (c) 2001-2006, NLnet Labs. All rights reserved. 5 * 6 * See LICENSE for the license. 7 * 8 */ 9 10 #include "config.h" 11 12 #include <sys/types.h> 13 #include <sys/param.h> 14 #include <limits.h> 15 #include <sys/socket.h> 16 #include <sys/uio.h> 17 #include <sys/wait.h> 18 19 #include <netinet/in.h> 20 #ifdef USE_TCP_FASTOPEN 21 #include <netinet/tcp.h> 22 #endif 23 #include <arpa/inet.h> 24 25 #include <assert.h> 26 #include <ctype.h> 27 #include <errno.h> 28 #include <fcntl.h> 29 #include <stddef.h> 30 #include <stdio.h> 31 #include <stdlib.h> 32 #include <string.h> 33 #include <time.h> 34 #include <unistd.h> 35 #include <signal.h> 36 #include <netdb.h> 37 #include <poll.h> 38 #ifdef HAVE_SYS_RANDOM_H 39 #include <sys/random.h> 40 #endif 41 #ifndef SHUT_WR 42 #define SHUT_WR 1 43 #endif 44 #ifdef HAVE_MMAP 45 #include <sys/mman.h> 46 #endif /* HAVE_MMAP */ 47 #ifdef HAVE_OPENSSL_RAND_H 48 #include <openssl/rand.h> 49 #endif 50 #ifdef HAVE_OPENSSL_SSL_H 51 #include <openssl/ssl.h> 52 #endif 53 #ifdef HAVE_OPENSSL_ERR_H 54 #include <openssl/err.h> 55 #endif 56 #ifdef HAVE_OPENSSL_OCSP_H 57 #include <openssl/ocsp.h> 58 #endif 59 #ifndef USE_MINI_EVENT 60 # ifdef HAVE_EVENT_H 61 # include <event.h> 62 # else 63 # include <event2/event.h> 64 # include "event2/event_struct.h" 65 # include "event2/event_compat.h" 66 # endif 67 #else 68 # include "mini_event.h" 69 #endif 70 71 #include "axfr.h" 72 #include "namedb.h" 73 #include "netio.h" 74 #include "xfrd.h" 75 #include "xfrd-tcp.h" 76 #include "xfrd-disk.h" 77 #include "difffile.h" 78 #include "nsec3.h" 79 #include "ipc.h" 80 #include "udb.h" 81 #include "remote.h" 82 #include "lookup3.h" 83 #include "rrl.h" 84 #include "ixfr.h" 85 #ifdef USE_DNSTAP 86 #include "dnstap/dnstap_collector.h" 87 #endif 88 #include "verify.h" 89 #include "util/proxy_protocol.h" 90 91 #define RELOAD_SYNC_TIMEOUT 25 /* seconds */ 92 93 #ifdef USE_DNSTAP 94 /* 95 * log_addr() - the function to print sockaddr_in/sockaddr_in6 structures content 96 * just like its done in Unbound via the same log_addr(VERB_LEVEL, const char*, sockaddr_storage*) 97 */ 98 static void 99 log_addr(const char* descr, 100 #ifdef INET6 101 struct sockaddr_storage* addr 102 #else 103 struct sockaddr_in* addr 104 #endif 105 ) 106 { 107 char str_buf[64]; 108 if(verbosity < 6) 109 return; 110 if( 111 #ifdef INET6 112 addr->ss_family == AF_INET 113 #else 114 addr->sin_family == AF_INET 115 #endif 116 ) { 117 struct sockaddr_in* s = (struct sockaddr_in*)addr; 118 inet_ntop(AF_INET, &s->sin_addr.s_addr, str_buf, sizeof(str_buf)); 119 VERBOSITY(6, (LOG_INFO, "%s: address is: %s, port is: %d", descr, str_buf, ntohs(s->sin_port))); 120 #ifdef INET6 121 } else { 122 struct sockaddr_in6* s6 = (struct sockaddr_in6*)addr; 123 inet_ntop(AF_INET6, &s6->sin6_addr.s6_addr, str_buf, sizeof(str_buf)); 124 VERBOSITY(6, (LOG_INFO, "%s: address is: %s, port is: %d", descr, str_buf, ntohs(s6->sin6_port))); 125 #endif 126 } 127 } 128 #endif /* USE_DNSTAP */ 129 130 #ifdef USE_TCP_FASTOPEN 131 #define TCP_FASTOPEN_FILE "/proc/sys/net/ipv4/tcp_fastopen" 132 #define TCP_FASTOPEN_SERVER_BIT_MASK 0x2 133 #endif 134 135 /* header state for the PROXYv2 header (for TCP) */ 136 enum pp2_header_state { 137 /* no header encounter yet */ 138 pp2_header_none = 0, 139 /* read the static part of the header */ 140 pp2_header_init, 141 /* read the full header */ 142 pp2_header_done 143 }; 144 145 /* 146 * Data for the UDP handlers. 147 */ 148 struct udp_handler_data 149 { 150 struct nsd *nsd; 151 struct nsd_socket *socket; 152 struct event event; 153 /* if set, PROXYv2 is expected on this connection */ 154 int pp2_enabled; 155 }; 156 157 struct tcp_accept_handler_data { 158 struct nsd *nsd; 159 struct nsd_socket *socket; 160 int event_added; 161 struct event event; 162 #ifdef HAVE_SSL 163 /* handler accepts TLS connections on the dedicated port */ 164 int tls_accept; 165 #endif 166 /* if set, PROXYv2 is expected on this connection */ 167 int pp2_enabled; 168 }; 169 170 /* 171 * These globals are used to enable the TCP accept handlers 172 * when the number of TCP connection drops below the maximum 173 * number of TCP connections. 174 */ 175 static size_t tcp_accept_handler_count; 176 static struct tcp_accept_handler_data *tcp_accept_handlers; 177 178 static struct event slowaccept_event; 179 static int slowaccept; 180 181 #ifdef HAVE_SSL 182 static unsigned char *ocspdata = NULL; 183 static long ocspdata_len = 0; 184 #endif 185 186 #ifdef NONBLOCKING_IS_BROKEN 187 /* Define NUM_RECV_PER_SELECT to 1 (one) to avoid opportunistically trying to 188 read multiple times from a socket when reported ready by select. */ 189 # define NUM_RECV_PER_SELECT (1) 190 #else /* !NONBLOCKING_IS_BROKEN */ 191 # define NUM_RECV_PER_SELECT (100) 192 #endif /* NONBLOCKING_IS_BROKEN */ 193 194 #ifndef HAVE_MMSGHDR 195 struct mmsghdr { 196 struct msghdr msg_hdr; 197 unsigned int msg_len; 198 }; 199 #endif 200 201 static struct mmsghdr msgs[NUM_RECV_PER_SELECT]; 202 static struct iovec iovecs[NUM_RECV_PER_SELECT]; 203 static struct query *queries[NUM_RECV_PER_SELECT]; 204 205 /* 206 * Data for the TCP connection handlers. 207 * 208 * The TCP handlers use non-blocking I/O. This is necessary to avoid 209 * blocking the entire server on a slow TCP connection, but does make 210 * reading from and writing to the socket more complicated. 211 * 212 * Basically, whenever a read/write would block (indicated by the 213 * EAGAIN errno variable) we remember the position we were reading 214 * from/writing to and return from the TCP reading/writing event 215 * handler. When the socket becomes readable/writable again we 216 * continue from the same position. 217 */ 218 struct tcp_handler_data 219 { 220 /* 221 * The region used to allocate all TCP connection related 222 * data, including this structure. This region is destroyed 223 * when the connection is closed. 224 */ 225 region_type* region; 226 227 /* 228 * The global nsd structure. 229 */ 230 struct nsd* nsd; 231 232 /* 233 * The current query data for this TCP connection. 234 */ 235 query_type* query; 236 237 /* 238 * The query_state is used to remember if we are performing an 239 * AXFR, if we're done processing, or if we should discard the 240 * query and connection. 241 */ 242 query_state_type query_state; 243 244 /* 245 * The event for the file descriptor and tcp timeout 246 */ 247 struct event event; 248 249 /* 250 * The bytes_transmitted field is used to remember the number 251 * of bytes transmitted when receiving or sending a DNS 252 * packet. The count includes the two additional bytes used 253 * to specify the packet length on a TCP connection. 254 */ 255 size_t bytes_transmitted; 256 257 /* If the query is restarted and needs a reset */ 258 int query_needs_reset; 259 260 /* 261 * The number of queries handled by this specific TCP connection. 262 */ 263 int query_count; 264 265 /* 266 * The timeout in msec for this tcp connection 267 */ 268 int tcp_timeout; 269 270 /* 271 * If the connection is allowed to have further queries on it. 272 */ 273 int tcp_no_more_queries; 274 275 #ifdef USE_DNSTAP 276 /* the socket of the accept socket to find proper service (local) address the socket is bound to. */ 277 struct nsd_socket *socket; 278 #endif /* USE_DNSTAP */ 279 280 /* if set, PROXYv2 is expected on this connection */ 281 int pp2_enabled; 282 283 /* header state for the PROXYv2 header (for TCP) */ 284 enum pp2_header_state pp2_header_state; 285 286 #ifdef HAVE_SSL 287 /* 288 * TLS object. 289 */ 290 SSL* tls; 291 292 /* 293 * TLS handshake state. 294 */ 295 enum { tls_hs_none, tls_hs_read, tls_hs_write, 296 tls_hs_read_event, tls_hs_write_event } shake_state; 297 #endif 298 /* list of connections, for service of remaining tcp channels */ 299 struct tcp_handler_data *prev, *next; 300 }; 301 /* global that is the list of active tcp channels */ 302 static struct tcp_handler_data *tcp_active_list = NULL; 303 304 /* 305 * Handle incoming queries on the UDP server sockets. 306 */ 307 static void handle_udp(int fd, short event, void* arg); 308 309 /* 310 * Handle incoming connections on the TCP sockets. These handlers 311 * usually wait for the NETIO_EVENT_READ event (indicating an incoming 312 * connection) but are disabled when the number of current TCP 313 * connections is equal to the maximum number of TCP connections. 314 * Disabling is done by changing the handler to wait for the 315 * NETIO_EVENT_NONE type. This is done using the function 316 * configure_tcp_accept_handlers. 317 */ 318 static void handle_tcp_accept(int fd, short event, void* arg); 319 320 /* 321 * Handle incoming queries on a TCP connection. The TCP connections 322 * are configured to be non-blocking and the handler may be called 323 * multiple times before a complete query is received. 324 */ 325 static void handle_tcp_reading(int fd, short event, void* arg); 326 327 /* 328 * Handle outgoing responses on a TCP connection. The TCP connections 329 * are configured to be non-blocking and the handler may be called 330 * multiple times before a complete response is sent. 331 */ 332 static void handle_tcp_writing(int fd, short event, void* arg); 333 334 #ifdef HAVE_SSL 335 /* Create SSL object and associate fd */ 336 static SSL* incoming_ssl_fd(SSL_CTX* ctx, int fd); 337 /* 338 * Handle TLS handshake. May be called multiple times if incomplete. 339 */ 340 static int tls_handshake(struct tcp_handler_data* data, int fd, int writing); 341 342 /* 343 * Handle incoming queries on a TLS over TCP connection. The TLS 344 * connections are configured to be non-blocking and the handler may 345 * be called multiple times before a complete query is received. 346 */ 347 static void handle_tls_reading(int fd, short event, void* arg); 348 349 /* 350 * Handle outgoing responses on a TLS over TCP connection. The TLS 351 * connections are configured to be non-blocking and the handler may 352 * be called multiple times before a complete response is sent. 353 */ 354 static void handle_tls_writing(int fd, short event, void* arg); 355 #endif 356 357 /* 358 * Send all children the quit nonblocking, then close pipe. 359 */ 360 static void send_children_quit(struct nsd* nsd); 361 /* same, for shutdown time, waits for child to exit to avoid restart issues */ 362 static void send_children_quit_and_wait(struct nsd* nsd); 363 364 /* set childrens flags to send NSD_STATS to them */ 365 #ifdef BIND8_STATS 366 static void set_children_stats(struct nsd* nsd); 367 #endif /* BIND8_STATS */ 368 369 /* 370 * Change the event types the HANDLERS are interested in to EVENT_TYPES. 371 */ 372 static void configure_handler_event_types(short event_types); 373 374 static uint16_t *compressed_dname_offsets = 0; 375 static uint32_t compression_table_capacity = 0; 376 static uint32_t compression_table_size = 0; 377 static domain_type* compressed_dnames[MAXRRSPP]; 378 379 #ifdef USE_TCP_FASTOPEN 380 /* Checks to see if the kernel value must be manually changed in order for 381 TCP Fast Open to support server mode */ 382 static void report_tcp_fastopen_config() { 383 384 int tcp_fastopen_fp; 385 uint8_t tcp_fastopen_value; 386 387 if ( (tcp_fastopen_fp = open(TCP_FASTOPEN_FILE, O_RDONLY)) == -1 ) { 388 log_msg(LOG_INFO,"Error opening " TCP_FASTOPEN_FILE ": %s\n", strerror(errno)); 389 } 390 if (read(tcp_fastopen_fp, &tcp_fastopen_value, 1) == -1 ) { 391 log_msg(LOG_INFO,"Error reading " TCP_FASTOPEN_FILE ": %s\n", strerror(errno)); 392 close(tcp_fastopen_fp); 393 } 394 if (!(tcp_fastopen_value & TCP_FASTOPEN_SERVER_BIT_MASK)) { 395 log_msg(LOG_WARNING, "Error: TCP Fast Open support is available and configured in NSD by default.\n"); 396 log_msg(LOG_WARNING, "However the kernel parameters are not configured to support TCP_FASTOPEN in server mode.\n"); 397 log_msg(LOG_WARNING, "To enable TFO use the command:"); 398 log_msg(LOG_WARNING, " 'sudo sysctl -w net.ipv4.tcp_fastopen=2' for pure server mode or\n"); 399 log_msg(LOG_WARNING, " 'sudo sysctl -w net.ipv4.tcp_fastopen=3' for both client and server mode\n"); 400 log_msg(LOG_WARNING, "NSD will not have TCP Fast Open available until this change is made.\n"); 401 close(tcp_fastopen_fp); 402 } 403 close(tcp_fastopen_fp); 404 } 405 #endif 406 407 /* 408 * Remove the specified pid from the list of child pids. Returns -1 if 409 * the pid is not in the list, child_num otherwise. The field is set to 0. 410 */ 411 static int 412 delete_child_pid(struct nsd *nsd, pid_t pid) 413 { 414 size_t i; 415 for (i = 0; i < nsd->child_count; ++i) { 416 if (nsd->children[i].pid == pid) { 417 nsd->children[i].pid = 0; 418 if(!nsd->children[i].need_to_exit) { 419 if(nsd->children[i].child_fd != -1) 420 close(nsd->children[i].child_fd); 421 nsd->children[i].child_fd = -1; 422 if(nsd->children[i].handler) 423 nsd->children[i].handler->fd = -1; 424 } 425 return i; 426 } 427 } 428 return -1; 429 } 430 431 /* 432 * Restart child servers if necessary. 433 */ 434 static int 435 restart_child_servers(struct nsd *nsd, region_type* region, netio_type* netio, 436 int* xfrd_sock_p) 437 { 438 struct main_ipc_handler_data *ipc_data; 439 size_t i; 440 int sv[2]; 441 442 /* Fork the child processes... */ 443 for (i = 0; i < nsd->child_count; ++i) { 444 if (nsd->children[i].pid <= 0) { 445 if (nsd->children[i].child_fd != -1) 446 close(nsd->children[i].child_fd); 447 if (socketpair(AF_UNIX, SOCK_STREAM, 0, sv) == -1) { 448 log_msg(LOG_ERR, "socketpair: %s", 449 strerror(errno)); 450 return -1; 451 } 452 nsd->children[i].child_fd = sv[0]; 453 nsd->children[i].parent_fd = sv[1]; 454 nsd->children[i].pid = fork(); 455 switch (nsd->children[i].pid) { 456 default: /* SERVER MAIN */ 457 close(nsd->children[i].parent_fd); 458 nsd->children[i].parent_fd = -1; 459 if (fcntl(nsd->children[i].child_fd, F_SETFL, O_NONBLOCK) == -1) { 460 log_msg(LOG_ERR, "cannot fcntl pipe: %s", strerror(errno)); 461 } 462 if(!nsd->children[i].handler) 463 { 464 ipc_data = (struct main_ipc_handler_data*) region_alloc( 465 region, sizeof(struct main_ipc_handler_data)); 466 ipc_data->nsd = nsd; 467 ipc_data->child = &nsd->children[i]; 468 ipc_data->child_num = i; 469 ipc_data->xfrd_sock = xfrd_sock_p; 470 ipc_data->packet = buffer_create(region, QIOBUFSZ); 471 ipc_data->forward_mode = 0; 472 ipc_data->got_bytes = 0; 473 ipc_data->total_bytes = 0; 474 ipc_data->acl_num = 0; 475 nsd->children[i].handler = (struct netio_handler*) region_alloc( 476 region, sizeof(struct netio_handler)); 477 nsd->children[i].handler->fd = nsd->children[i].child_fd; 478 nsd->children[i].handler->timeout = NULL; 479 nsd->children[i].handler->user_data = ipc_data; 480 nsd->children[i].handler->event_types = NETIO_EVENT_READ; 481 nsd->children[i].handler->event_handler = parent_handle_child_command; 482 netio_add_handler(netio, nsd->children[i].handler); 483 } 484 /* clear any ongoing ipc */ 485 ipc_data = (struct main_ipc_handler_data*) 486 nsd->children[i].handler->user_data; 487 ipc_data->forward_mode = 0; 488 /* restart - update fd */ 489 nsd->children[i].handler->fd = nsd->children[i].child_fd; 490 break; 491 case 0: /* CHILD */ 492 #ifdef MEMCLEAN /* OS collects memory pages */ 493 region_destroy(region); 494 #endif 495 496 if (pledge("stdio rpath inet", NULL) == -1) { 497 log_msg(LOG_ERR, "pledge"); 498 exit(1); 499 } 500 501 nsd->pid = 0; 502 nsd->child_count = 0; 503 nsd->server_kind = nsd->children[i].kind; 504 nsd->this_child = &nsd->children[i]; 505 nsd->this_child->child_num = i; 506 /* remove signal flags inherited from parent 507 the parent will handle them. */ 508 nsd->signal_hint_reload_hup = 0; 509 nsd->signal_hint_reload = 0; 510 nsd->signal_hint_child = 0; 511 nsd->signal_hint_quit = 0; 512 nsd->signal_hint_shutdown = 0; 513 nsd->signal_hint_stats = 0; 514 nsd->signal_hint_statsusr = 0; 515 close(*xfrd_sock_p); 516 close(nsd->this_child->child_fd); 517 nsd->this_child->child_fd = -1; 518 if (fcntl(nsd->this_child->parent_fd, F_SETFL, O_NONBLOCK) == -1) { 519 log_msg(LOG_ERR, "cannot fcntl pipe: %s", strerror(errno)); 520 } 521 server_child(nsd); 522 /* NOTREACH */ 523 exit(0); 524 case -1: 525 log_msg(LOG_ERR, "fork failed: %s", 526 strerror(errno)); 527 return -1; 528 } 529 } 530 } 531 return 0; 532 } 533 534 #ifdef BIND8_STATS 535 static void set_bind8_alarm(struct nsd* nsd) 536 { 537 /* resync so that the next alarm is on the next whole minute */ 538 if(nsd->st_period > 0) /* % by 0 gives divbyzero error */ 539 alarm(nsd->st_period - (time(NULL) % nsd->st_period)); 540 } 541 #endif 542 543 /* set zone stat ids for zones initially read in */ 544 static void 545 zonestatid_tree_set(struct nsd* nsd) 546 { 547 struct radnode* n; 548 for(n=radix_first(nsd->db->zonetree); n; n=radix_next(n)) { 549 zone_type* zone = (zone_type*)n->elem; 550 zone->zonestatid = getzonestatid(nsd->options, zone->opts); 551 } 552 } 553 554 #ifdef USE_ZONE_STATS 555 void 556 server_zonestat_alloc(struct nsd* nsd) 557 { 558 size_t num = (nsd->options->zonestatnames->count==0?1: 559 nsd->options->zonestatnames->count); 560 size_t sz = sizeof(struct nsdst)*num; 561 char tmpfile[256]; 562 uint8_t z = 0; 563 564 /* file names */ 565 nsd->zonestatfname[0] = 0; 566 nsd->zonestatfname[1] = 0; 567 snprintf(tmpfile, sizeof(tmpfile), "%snsd-xfr-%d/nsd.%u.zstat.0", 568 nsd->options->xfrdir, (int)getpid(), (unsigned)getpid()); 569 nsd->zonestatfname[0] = region_strdup(nsd->region, tmpfile); 570 snprintf(tmpfile, sizeof(tmpfile), "%snsd-xfr-%d/nsd.%u.zstat.1", 571 nsd->options->xfrdir, (int)getpid(), (unsigned)getpid()); 572 nsd->zonestatfname[1] = region_strdup(nsd->region, tmpfile); 573 574 /* file descriptors */ 575 nsd->zonestatfd[0] = open(nsd->zonestatfname[0], O_CREAT|O_RDWR, 0600); 576 if(nsd->zonestatfd[0] == -1) { 577 log_msg(LOG_ERR, "cannot create %s: %s", nsd->zonestatfname[0], 578 strerror(errno)); 579 exit(1); 580 } 581 nsd->zonestatfd[1] = open(nsd->zonestatfname[1], O_CREAT|O_RDWR, 0600); 582 if(nsd->zonestatfd[0] == -1) { 583 log_msg(LOG_ERR, "cannot create %s: %s", nsd->zonestatfname[1], 584 strerror(errno)); 585 close(nsd->zonestatfd[0]); 586 unlink(nsd->zonestatfname[0]); 587 exit(1); 588 } 589 590 #ifdef HAVE_MMAP 591 if(lseek(nsd->zonestatfd[0], (off_t)sz-1, SEEK_SET) == -1) { 592 log_msg(LOG_ERR, "lseek %s: %s", nsd->zonestatfname[0], 593 strerror(errno)); 594 exit(1); 595 } 596 if(write(nsd->zonestatfd[0], &z, 1) == -1) { 597 log_msg(LOG_ERR, "cannot extend stat file %s (%s)", 598 nsd->zonestatfname[0], strerror(errno)); 599 exit(1); 600 } 601 if(lseek(nsd->zonestatfd[1], (off_t)sz-1, SEEK_SET) == -1) { 602 log_msg(LOG_ERR, "lseek %s: %s", nsd->zonestatfname[1], 603 strerror(errno)); 604 exit(1); 605 } 606 if(write(nsd->zonestatfd[1], &z, 1) == -1) { 607 log_msg(LOG_ERR, "cannot extend stat file %s (%s)", 608 nsd->zonestatfname[1], strerror(errno)); 609 exit(1); 610 } 611 nsd->zonestat[0] = (struct nsdst*)mmap(NULL, sz, PROT_READ|PROT_WRITE, 612 MAP_SHARED, nsd->zonestatfd[0], 0); 613 if(nsd->zonestat[0] == MAP_FAILED) { 614 log_msg(LOG_ERR, "mmap failed: %s", strerror(errno)); 615 unlink(nsd->zonestatfname[0]); 616 unlink(nsd->zonestatfname[1]); 617 exit(1); 618 } 619 nsd->zonestat[1] = (struct nsdst*)mmap(NULL, sz, PROT_READ|PROT_WRITE, 620 MAP_SHARED, nsd->zonestatfd[1], 0); 621 if(nsd->zonestat[1] == MAP_FAILED) { 622 log_msg(LOG_ERR, "mmap failed: %s", strerror(errno)); 623 unlink(nsd->zonestatfname[0]); 624 unlink(nsd->zonestatfname[1]); 625 exit(1); 626 } 627 memset(nsd->zonestat[0], 0, sz); 628 memset(nsd->zonestat[1], 0, sz); 629 nsd->zonestatsize[0] = num; 630 nsd->zonestatsize[1] = num; 631 nsd->zonestatdesired = num; 632 nsd->zonestatsizenow = num; 633 nsd->zonestatnow = nsd->zonestat[0]; 634 #endif /* HAVE_MMAP */ 635 } 636 637 void 638 zonestat_remap(struct nsd* nsd, int idx, size_t sz) 639 { 640 #ifdef HAVE_MMAP 641 #ifdef MREMAP_MAYMOVE 642 nsd->zonestat[idx] = (struct nsdst*)mremap(nsd->zonestat[idx], 643 sizeof(struct nsdst)*nsd->zonestatsize[idx], sz, 644 MREMAP_MAYMOVE); 645 if(nsd->zonestat[idx] == MAP_FAILED) { 646 log_msg(LOG_ERR, "mremap failed: %s", strerror(errno)); 647 exit(1); 648 } 649 #else /* !HAVE MREMAP */ 650 if(msync(nsd->zonestat[idx], 651 sizeof(struct nsdst)*nsd->zonestatsize[idx], MS_ASYNC) != 0) 652 log_msg(LOG_ERR, "msync failed: %s", strerror(errno)); 653 if(munmap(nsd->zonestat[idx], 654 sizeof(struct nsdst)*nsd->zonestatsize[idx]) != 0) 655 log_msg(LOG_ERR, "munmap failed: %s", strerror(errno)); 656 nsd->zonestat[idx] = (struct nsdst*)mmap(NULL, sz, 657 PROT_READ|PROT_WRITE, MAP_SHARED, nsd->zonestatfd[idx], 0); 658 if(nsd->zonestat[idx] == MAP_FAILED) { 659 log_msg(LOG_ERR, "mmap failed: %s", strerror(errno)); 660 exit(1); 661 } 662 #endif /* MREMAP */ 663 #endif /* HAVE_MMAP */ 664 } 665 666 /* realloc the zonestat array for the one that is not currently in use, 667 * to match the desired new size of the array (if applicable) */ 668 void 669 server_zonestat_realloc(struct nsd* nsd) 670 { 671 #ifdef HAVE_MMAP 672 uint8_t z = 0; 673 size_t sz; 674 int idx = 0; /* index of the zonestat array that is not in use */ 675 if(nsd->zonestatnow == nsd->zonestat[0]) 676 idx = 1; 677 if(nsd->zonestatsize[idx] == nsd->zonestatdesired) 678 return; 679 sz = sizeof(struct nsdst)*nsd->zonestatdesired; 680 if(lseek(nsd->zonestatfd[idx], (off_t)sz-1, SEEK_SET) == -1) { 681 log_msg(LOG_ERR, "lseek %s: %s", nsd->zonestatfname[idx], 682 strerror(errno)); 683 exit(1); 684 } 685 if(write(nsd->zonestatfd[idx], &z, 1) == -1) { 686 log_msg(LOG_ERR, "cannot extend stat file %s (%s)", 687 nsd->zonestatfname[idx], strerror(errno)); 688 exit(1); 689 } 690 zonestat_remap(nsd, idx, sz); 691 /* zero the newly allocated region */ 692 if(nsd->zonestatdesired > nsd->zonestatsize[idx]) { 693 memset(((char*)nsd->zonestat[idx])+sizeof(struct nsdst) * 694 nsd->zonestatsize[idx], 0, sizeof(struct nsdst) * 695 (nsd->zonestatdesired - nsd->zonestatsize[idx])); 696 } 697 nsd->zonestatsize[idx] = nsd->zonestatdesired; 698 #endif /* HAVE_MMAP */ 699 } 700 701 /* switchover to use the other array for the new children, that 702 * briefly coexist with the old children. And we want to avoid them 703 * both writing to the same statistics arrays. */ 704 void 705 server_zonestat_switch(struct nsd* nsd) 706 { 707 if(nsd->zonestatnow == nsd->zonestat[0]) { 708 nsd->zonestatnow = nsd->zonestat[1]; 709 nsd->zonestatsizenow = nsd->zonestatsize[1]; 710 } else { 711 nsd->zonestatnow = nsd->zonestat[0]; 712 nsd->zonestatsizenow = nsd->zonestatsize[0]; 713 } 714 } 715 #endif /* USE_ZONE_STATS */ 716 717 #ifdef BIND8_STATS 718 void 719 server_stat_alloc(struct nsd* nsd) 720 { 721 char tmpfile[256]; 722 size_t sz = sizeof(struct nsdst) * nsd->child_count * 2; 723 uint8_t z = 0; 724 725 /* file name */ 726 nsd->statfname = 0; 727 snprintf(tmpfile, sizeof(tmpfile), "%snsd-xfr-%d/nsd.%u.stat", 728 nsd->options->xfrdir, (int)getpid(), (unsigned)getpid()); 729 nsd->statfname = region_strdup(nsd->region, tmpfile); 730 731 /* file descriptor */ 732 nsd->statfd = open(nsd->statfname, O_CREAT|O_RDWR, 0600); 733 if(nsd->statfd == -1) { 734 log_msg(LOG_ERR, "cannot create %s: %s", nsd->statfname, 735 strerror(errno)); 736 unlink(nsd->zonestatfname[0]); 737 unlink(nsd->zonestatfname[1]); 738 exit(1); 739 } 740 741 #ifdef HAVE_MMAP 742 if(lseek(nsd->statfd, (off_t)sz-1, SEEK_SET) == -1) { 743 log_msg(LOG_ERR, "lseek %s: %s", nsd->statfname, 744 strerror(errno)); 745 goto fail_exit; 746 } 747 if(write(nsd->statfd, &z, 1) == -1) { 748 log_msg(LOG_ERR, "cannot extend stat file %s (%s)", 749 nsd->statfname, strerror(errno)); 750 goto fail_exit; 751 } 752 nsd->stat_map = (struct nsdst*)mmap(NULL, sz, PROT_READ|PROT_WRITE, 753 MAP_SHARED, nsd->statfd, 0); 754 if(nsd->stat_map == MAP_FAILED) { 755 log_msg(LOG_ERR, "mmap failed: %s", strerror(errno)); 756 fail_exit: 757 close(nsd->statfd); 758 unlink(nsd->statfname); 759 unlink(nsd->zonestatfname[0]); 760 unlink(nsd->zonestatfname[1]); 761 exit(1); 762 } 763 memset(nsd->stat_map, 0, sz); 764 nsd->stats_per_child[0] = nsd->stat_map; 765 nsd->stats_per_child[1] = &nsd->stat_map[nsd->child_count]; 766 nsd->stat_current = 0; 767 nsd->st = &nsd->stats_per_child[nsd->stat_current][0]; 768 #endif /* HAVE_MMAP */ 769 } 770 #endif /* BIND8_STATS */ 771 772 #ifdef BIND8_STATS 773 void 774 server_stat_free(struct nsd* nsd) 775 { 776 unlink(nsd->statfname); 777 } 778 #endif /* BIND8_STATS */ 779 780 static void 781 cleanup_dname_compression_tables(void *ptr) 782 { 783 free(ptr); 784 compressed_dname_offsets = NULL; 785 compression_table_capacity = 0; 786 } 787 788 static void 789 initialize_dname_compression_tables(struct nsd *nsd) 790 { 791 size_t needed = domain_table_count(nsd->db->domains) + 1; 792 needed += EXTRA_DOMAIN_NUMBERS; 793 if(compression_table_capacity < needed) { 794 if(compressed_dname_offsets) { 795 region_remove_cleanup(nsd->db->region, 796 cleanup_dname_compression_tables, 797 compressed_dname_offsets); 798 free(compressed_dname_offsets); 799 } 800 compressed_dname_offsets = (uint16_t *) xmallocarray( 801 needed, sizeof(uint16_t)); 802 region_add_cleanup(nsd->db->region, cleanup_dname_compression_tables, 803 compressed_dname_offsets); 804 compression_table_capacity = needed; 805 compression_table_size=domain_table_count(nsd->db->domains)+1; 806 } 807 memset(compressed_dname_offsets, 0, needed * sizeof(uint16_t)); 808 compressed_dname_offsets[0] = QHEADERSZ; /* The original query name */ 809 } 810 811 static int 812 set_cloexec(struct nsd_socket *sock) 813 { 814 assert(sock != NULL); 815 816 if(fcntl(sock->s, F_SETFD, FD_CLOEXEC) == -1) { 817 const char *socktype = 818 sock->addr.ai_family == SOCK_DGRAM ? "udp" : "tcp"; 819 log_msg(LOG_ERR, "fcntl(..., O_CLOEXEC) failed for %s: %s", 820 socktype, strerror(errno)); 821 return -1; 822 } 823 824 return 1; 825 } 826 827 static int 828 set_reuseport(struct nsd_socket *sock) 829 { 830 #ifdef SO_REUSEPORT 831 int on = 1; 832 #ifdef SO_REUSEPORT_LB 833 /* FreeBSD 12 has SO_REUSEPORT_LB that does load balancing like 834 * SO_REUSEPORT on Linux. This is what the users want with the config 835 * option in nsd.conf; if we actually need local address and port reuse 836 * they'll also need to have SO_REUSEPORT set for them, assume it was 837 * _LB they want. 838 */ 839 int opt = SO_REUSEPORT_LB; 840 static const char optname[] = "SO_REUSEPORT_LB"; 841 #else /* !SO_REUSEPORT_LB */ 842 int opt = SO_REUSEPORT; 843 static const char optname[] = "SO_REUSEPORT"; 844 #endif /* SO_REUSEPORT_LB */ 845 846 if (0 == setsockopt(sock->s, SOL_SOCKET, opt, &on, sizeof(on))) { 847 return 1; 848 } else if(verbosity >= 3 || errno != ENOPROTOOPT) { 849 log_msg(LOG_ERR, "setsockopt(..., %s, ...) failed: %s", 850 optname, strerror(errno)); 851 } 852 return -1; 853 #else 854 (void)sock; 855 #endif /* SO_REUSEPORT */ 856 857 return 0; 858 } 859 860 static int 861 set_reuseaddr(struct nsd_socket *sock) 862 { 863 #ifdef SO_REUSEADDR 864 int on = 1; 865 if(setsockopt(sock->s, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on)) == 0) { 866 return 1; 867 } 868 log_msg(LOG_ERR, "setsockopt(..., SO_REUSEADDR, ...) failed: %s", 869 strerror(errno)); 870 return -1; 871 #endif /* SO_REUSEADDR */ 872 return 0; 873 } 874 875 static int 876 set_rcvbuf(struct nsd_socket *sock, int rcv) 877 { 878 #ifdef SO_RCVBUF 879 #ifdef SO_RCVBUFFORCE 880 if(0 == setsockopt( 881 sock->s, SOL_SOCKET, SO_RCVBUFFORCE, &rcv, sizeof(rcv))) 882 { 883 return 1; 884 } 885 if(errno == EPERM || errno == ENOBUFS) { 886 return 0; 887 } 888 log_msg(LOG_ERR, "setsockopt(..., SO_RCVBUFFORCE, ...) failed: %s", 889 strerror(errno)); 890 return -1; 891 #else /* !SO_RCVBUFFORCE */ 892 if (0 == setsockopt( 893 sock->s, SOL_SOCKET, SO_RCVBUF, &rcv, sizeof(rcv))) 894 { 895 return 1; 896 } 897 if(errno == ENOSYS || errno == ENOBUFS) { 898 return 0; 899 } 900 log_msg(LOG_ERR, "setsockopt(..., SO_RCVBUF, ...) failed: %s", 901 strerror(errno)); 902 return -1; 903 #endif /* SO_RCVBUFFORCE */ 904 #endif /* SO_RCVBUF */ 905 906 return 0; 907 } 908 909 static int 910 set_sndbuf(struct nsd_socket *sock, int snd) 911 { 912 #ifdef SO_SNDBUF 913 #ifdef SO_SNDBUFFORCE 914 if(0 == setsockopt( 915 sock->s, SOL_SOCKET, SO_SNDBUFFORCE, &snd, sizeof(snd))) 916 { 917 return 1; 918 } 919 if(errno == EPERM || errno == ENOBUFS) { 920 return 0; 921 } 922 log_msg(LOG_ERR, "setsockopt(..., SO_SNDBUFFORCE, ...) failed: %s", 923 strerror(errno)); 924 return -1; 925 #else /* !SO_SNDBUFFORCE */ 926 if(0 == setsockopt( 927 sock->s, SOL_SOCKET, SO_SNDBUF, &snd, sizeof(snd))) 928 { 929 return 1; 930 } 931 if(errno == ENOSYS || errno == ENOBUFS) { 932 return 0; 933 } 934 log_msg(LOG_ERR, "setsockopt(..., SO_SNDBUF, ...) failed: %s", 935 strerror(errno)); 936 return -1; 937 #endif /* SO_SNDBUFFORCE */ 938 #endif /* SO_SNDBUF */ 939 940 return 0; 941 } 942 943 static int 944 set_nonblock(struct nsd_socket *sock) 945 { 946 const char *socktype = 947 sock->addr.ai_socktype == SOCK_DGRAM ? "udp" : "tcp"; 948 949 if(fcntl(sock->s, F_SETFL, O_NONBLOCK) == -1) { 950 log_msg(LOG_ERR, "fctnl(..., O_NONBLOCK) failed for %s: %s", 951 socktype, strerror(errno)); 952 return -1; 953 } 954 955 return 1; 956 } 957 958 #ifdef INET6 959 static int 960 set_ipv6_v6only(struct nsd_socket *sock) 961 { 962 #ifdef IPV6_V6ONLY 963 int on = 1; 964 const char *socktype = 965 sock->addr.ai_socktype == SOCK_DGRAM ? "udp" : "tcp"; 966 967 if(0 == setsockopt( 968 sock->s, IPPROTO_IPV6, IPV6_V6ONLY, &on, sizeof(on))) 969 { 970 return 1; 971 } 972 973 log_msg(LOG_ERR, "setsockopt(..., IPV6_V6ONLY, ...) failed for %s: %s", 974 socktype, strerror(errno)); 975 return -1; 976 #else 977 (void)sock; 978 #endif /* IPV6_V6ONLY */ 979 980 return 0; 981 } 982 #endif /* INET6 */ 983 984 #ifdef INET6 985 static int 986 set_ipv6_use_min_mtu(struct nsd_socket *sock) 987 { 988 #if defined(IPV6_USE_MIN_MTU) || defined(IPV6_MTU) 989 #if defined(IPV6_USE_MIN_MTU) 990 /* There is no fragmentation of IPv6 datagrams during forwarding in the 991 * network. Therefore we do not send UDP datagrams larger than the 992 * minimum IPv6 MTU of 1280 octets. The EDNS0 message length can be 993 * larger if the network stack supports IPV6_USE_MIN_MTU. 994 */ 995 int opt = IPV6_USE_MIN_MTU; 996 int optval = 1; 997 static const char optname[] = "IPV6_USE_MIN_MTU"; 998 #elif defined(IPV6_MTU) 999 /* On Linux, PMTUD is disabled by default for datagrams so set the MTU 1000 * to the MIN MTU to get the same. 1001 */ 1002 int opt = IPV6_MTU; 1003 int optval = IPV6_MIN_MTU; 1004 static const char optname[] = "IPV6_MTU"; 1005 #endif 1006 if(0 == setsockopt( 1007 sock->s, IPPROTO_IPV6, opt, &optval, sizeof(optval))) 1008 { 1009 return 1; 1010 } 1011 1012 log_msg(LOG_ERR, "setsockopt(..., %s, ...) failed: %s", 1013 optname, strerror(errno)); 1014 return -1; 1015 #else 1016 (void)sock; 1017 #endif /* INET6 */ 1018 1019 return 0; 1020 } 1021 #endif /* INET6 */ 1022 1023 static int 1024 set_ipv4_no_pmtu_disc(struct nsd_socket *sock) 1025 { 1026 int ret = 0; 1027 1028 #if defined(IP_MTU_DISCOVER) 1029 int opt = IP_MTU_DISCOVER; 1030 int optval; 1031 # if defined(IP_PMTUDISC_OMIT) 1032 /* Linux 3.15 has IP_PMTUDISC_OMIT which makes sockets ignore PMTU 1033 * information and send packets with DF=0. Fragmentation is allowed if 1034 * and only if the packet size exceeds the outgoing interface MTU or 1035 * the packet encounters smaller MTU link in network. This mitigates 1036 * DNS fragmentation attacks by preventing forged PMTU information. 1037 * FreeBSD already has same semantics without setting the option. 1038 */ 1039 optval = IP_PMTUDISC_OMIT; 1040 if(0 == setsockopt( 1041 sock->s, IPPROTO_IP, opt, &optval, sizeof(optval))) 1042 { 1043 return 1; 1044 } 1045 1046 log_msg(LOG_ERR, "setsockopt(..., %s, %s, ...) failed: %s", 1047 "IP_MTU_DISCOVER", "IP_PMTUDISC_OMIT", strerror(errno)); 1048 # endif /* IP_PMTUDISC_OMIT */ 1049 # if defined(IP_PMTUDISC_DONT) 1050 /* Use IP_PMTUDISC_DONT if IP_PMTUDISC_OMIT failed / undefined. */ 1051 optval = IP_PMTUDISC_DONT; 1052 if(0 == setsockopt( 1053 sock->s, IPPROTO_IP, opt, &optval, sizeof(optval))) 1054 { 1055 return 1; 1056 } 1057 1058 log_msg(LOG_ERR, "setsockopt(..., %s, %s, ...) failed: %s", 1059 "IP_MTU_DISCOVER", "IP_PMTUDISC_DONT", strerror(errno)); 1060 # endif 1061 ret = -1; 1062 #elif defined(IP_DONTFRAG) 1063 int off = 0; 1064 if (0 == setsockopt( 1065 sock->s, IPPROTO_IP, IP_DONTFRAG, &off, sizeof(off))) 1066 { 1067 return 1; 1068 } 1069 1070 log_msg(LOG_ERR, "setsockopt(..., IP_DONTFRAG, ...) failed: %s", 1071 strerror(errno)); 1072 ret = -1; 1073 #else 1074 (void)sock; 1075 #endif 1076 1077 return ret; 1078 } 1079 1080 static int 1081 set_ip_freebind(struct nsd_socket *sock) 1082 { 1083 #ifdef IP_FREEBIND 1084 int on = 1; 1085 const char *socktype = 1086 sock->addr.ai_socktype == SOCK_DGRAM ? "udp" : "tcp"; 1087 if(setsockopt(sock->s, IPPROTO_IP, IP_FREEBIND, &on, sizeof(on)) == 0) 1088 { 1089 return 1; 1090 } 1091 log_msg(LOG_ERR, "setsockopt(..., IP_FREEBIND, ...) failed for %s: %s", 1092 socktype, strerror(errno)); 1093 return -1; 1094 #else 1095 (void)sock; 1096 #endif /* IP_FREEBIND */ 1097 1098 return 0; 1099 } 1100 1101 static int 1102 set_ip_transparent(struct nsd_socket *sock) 1103 { 1104 /* 1105 The scandalous preprocessor blob here calls for some explanation :) 1106 POSIX does not specify an option to bind non-local IPs, so 1107 platforms developed several implementation-specific options, 1108 all set in the same way, but with different names. 1109 For additional complexity, some platform manage this setting 1110 differently for different address families (IPv4 vs IPv6). 1111 This scandalous preprocessor blob below abstracts such variability 1112 in the way which leaves the C code as lean and clear as possible. 1113 */ 1114 1115 #if defined(IP_TRANSPARENT) 1116 # define NSD_SOCKET_OPTION_TRANSPARENT IP_TRANSPARENT 1117 # define NSD_SOCKET_OPTION_TRANSPARENT_OPTLEVEL IPPROTO_IP 1118 # define NSD_SOCKET_OPTION_TRANSPARENT_NAME "IP_TRANSPARENT" 1119 // as of 2020-01, Linux does not support this on IPv6 programmatically 1120 #elif defined(SO_BINDANY) 1121 # define NSD_SOCKET_OPTION_TRANSPARENT SO_BINDANY 1122 # define NSD_SOCKET_OPTION_TRANSPARENT_OPTLEVEL SOL_SOCKET 1123 # define NSD_SOCKET_OPTION_TRANSPARENT_NAME "SO_BINDANY" 1124 #elif defined(IP_BINDANY) 1125 # define NSD_SOCKET_OPTION_TRANSPARENT IP_BINDANY 1126 # define NSD_SOCKET_OPTION_TRANSPARENT6 IPV6_BINDANY 1127 # define NSD_SOCKET_OPTION_TRANSPARENT_OPTLEVEL IPPROTO_IP 1128 # define NSD_SOCKET_OPTION_TRANSPARENT_OPTLEVEL6 IPPROTO_IPV6 1129 # define NSD_SOCKET_OPTION_TRANSPARENT_NAME "IP_BINDANY" 1130 #endif 1131 1132 #ifndef NSD_SOCKET_OPTION_TRANSPARENT 1133 (void)sock; 1134 #else 1135 # ifndef NSD_SOCKET_OPTION_TRANSPARENT6 1136 # define NSD_SOCKET_OPTION_TRANSPARENT6 NSD_SOCKET_OPTION_TRANSPARENT 1137 # endif 1138 # ifndef NSD_SOCKET_OPTION_TRANSPARENT_OPTLEVEL6 1139 # define NSD_SOCKET_OPTION_TRANSPARENT_OPTLEVEL6 NSD_SOCKET_OPTION_TRANSPARENT_OPTLEVEL 1140 # endif 1141 # ifndef NSD_SOCKET_OPTION_TRANSPARENT_NAME6 1142 # define NSD_SOCKET_OPTION_TRANSPARENT_NAME6 NSD_SOCKET_OPTION_TRANSPARENT_NAME 1143 # endif 1144 1145 int on = 1; 1146 const char *socktype = 1147 sock->addr.ai_socktype == SOCK_DGRAM ? "udp" : "tcp"; 1148 const int is_ip6 = (sock->addr.ai_family == AF_INET6); 1149 1150 if(0 == setsockopt( 1151 sock->s, 1152 is_ip6 ? NSD_SOCKET_OPTION_TRANSPARENT_OPTLEVEL6 : NSD_SOCKET_OPTION_TRANSPARENT_OPTLEVEL, 1153 is_ip6 ? NSD_SOCKET_OPTION_TRANSPARENT6 : NSD_SOCKET_OPTION_TRANSPARENT, 1154 &on, sizeof(on))) 1155 { 1156 return 1; 1157 } 1158 1159 log_msg(LOG_ERR, "setsockopt(..., %s, ...) failed for %s: %s", 1160 is_ip6 ? NSD_SOCKET_OPTION_TRANSPARENT_NAME6 : NSD_SOCKET_OPTION_TRANSPARENT_NAME, socktype, strerror(errno)); 1161 return -1; 1162 #endif 1163 1164 return 0; 1165 } 1166 1167 static int 1168 set_tcp_maxseg(struct nsd_socket *sock, int mss) 1169 { 1170 #if defined(IPPROTO_TCP) && defined(TCP_MAXSEG) 1171 if(setsockopt(sock->s, IPPROTO_TCP, TCP_MAXSEG, &mss, sizeof(mss)) == 0) { 1172 return 1; 1173 } 1174 log_msg(LOG_ERR, "setsockopt(..., TCP_MAXSEG, ...) failed for tcp: %s", 1175 strerror(errno)); 1176 return -1; 1177 #else 1178 log_msg(LOG_ERR, "setsockopt(TCP_MAXSEG) unsupported"); 1179 #endif 1180 return 0; 1181 } 1182 1183 #ifdef USE_TCP_FASTOPEN 1184 static int 1185 set_tcp_fastopen(struct nsd_socket *sock) 1186 { 1187 /* qlen specifies how many outstanding TFO requests to allow. Limit is 1188 * a defense against IP spoofing attacks as suggested in RFC7413. 1189 */ 1190 int qlen; 1191 1192 #ifdef __APPLE__ 1193 /* macOS X implementation only supports qlen of 1 via this call. The 1194 * actual value is configured by the net.inet.tcp.fastopen_backlog 1195 * kernel parameter. 1196 */ 1197 qlen = 1; 1198 #else 1199 /* 5 is recommended on Linux. */ 1200 qlen = 5; 1201 #endif 1202 if (0 == setsockopt( 1203 sock->s, IPPROTO_TCP, TCP_FASTOPEN, &qlen, sizeof(qlen))) 1204 { 1205 return 1; 1206 } 1207 1208 if (errno == EPERM) { 1209 log_msg(LOG_ERR, "Setting TCP Fast Open as server failed: %s " 1210 "; this could likely be because sysctl " 1211 "net.inet.tcp.fastopen.enabled, " 1212 "net.inet.tcp.fastopen.server_enable, or " 1213 "net.ipv4.tcp_fastopen is disabled", 1214 strerror(errno)); 1215 /* Squelch ENOPROTOOPT: FreeBSD server mode with kernel support 1216 * disabled, except when verbosity enabled for debugging 1217 */ 1218 } else if(errno != ENOPROTOOPT || verbosity >= 3) { 1219 log_msg(LOG_ERR, "Setting TCP Fast Open as server failed: %s", 1220 strerror(errno)); 1221 } 1222 1223 return (errno == ENOPROTOOPT ? 0 : -1); 1224 } 1225 #endif /* USE_TCP_FASTOPEN */ 1226 1227 static int 1228 set_bindtodevice(struct nsd_socket *sock) 1229 { 1230 #if defined(SO_BINDTODEVICE) 1231 if(setsockopt(sock->s, SOL_SOCKET, SO_BINDTODEVICE, 1232 sock->device, strlen(sock->device)) == -1) 1233 { 1234 log_msg(LOG_ERR, "setsockopt(..., %s, %s, ...) failed: %s", 1235 "SO_BINDTODEVICE", sock->device, strerror(errno)); 1236 return -1; 1237 } 1238 1239 return 1; 1240 #else 1241 (void)sock; 1242 return 0; 1243 #endif 1244 } 1245 1246 static int 1247 set_setfib(struct nsd_socket *sock) 1248 { 1249 #if defined(SO_SETFIB) 1250 if(setsockopt(sock->s, SOL_SOCKET, SO_SETFIB, 1251 (const void *)&sock->fib, sizeof(sock->fib)) == -1) 1252 { 1253 log_msg(LOG_ERR, "setsockopt(..., %s, %d, ...) failed: %s", 1254 "SO_SETFIB", sock->fib, strerror(errno)); 1255 return -1; 1256 } 1257 1258 return 1; 1259 #else 1260 (void)sock; 1261 return 0; 1262 #endif 1263 } 1264 1265 static int 1266 open_udp_socket(struct nsd *nsd, struct nsd_socket *sock, int *reuseport_works) 1267 { 1268 int rcv = 1*1024*1024, snd = 1*1024*1024; 1269 1270 if(-1 == (sock->s = socket( 1271 sock->addr.ai_family, sock->addr.ai_socktype, 0))) 1272 { 1273 #ifdef INET6 1274 if((sock->flags & NSD_SOCKET_IS_OPTIONAL) && 1275 (sock->addr.ai_family == AF_INET6) && 1276 (errno == EAFNOSUPPORT)) 1277 { 1278 log_msg(LOG_WARNING, "fallback to UDP4, no IPv6: " 1279 "not supported"); 1280 return 0; 1281 } 1282 #endif 1283 log_msg(LOG_ERR, "can't create a socket: %s", strerror(errno)); 1284 return -1; 1285 } 1286 1287 set_cloexec(sock); 1288 1289 if(nsd->reuseport && reuseport_works && *reuseport_works) 1290 *reuseport_works = (set_reuseport(sock) == 1); 1291 1292 if(nsd->options->receive_buffer_size > 0) 1293 rcv = nsd->options->receive_buffer_size; 1294 if(set_rcvbuf(sock, rcv) == -1) 1295 return -1; 1296 1297 if(nsd->options->send_buffer_size > 0) 1298 snd = nsd->options->send_buffer_size; 1299 if(set_sndbuf(sock, snd) == -1) 1300 return -1; 1301 #ifdef INET6 1302 if(sock->addr.ai_family == AF_INET6) { 1303 if(set_ipv6_v6only(sock) == -1 || 1304 set_ipv6_use_min_mtu(sock) == -1) 1305 return -1; 1306 } else 1307 #endif /* INET6 */ 1308 if(sock->addr.ai_family == AF_INET) { 1309 if(set_ipv4_no_pmtu_disc(sock) == -1) 1310 return -1; 1311 } 1312 1313 /* Set socket to non-blocking. Otherwise, on operating systems 1314 * with thundering herd problems, the UDP recv could block 1315 * after select returns readable. 1316 */ 1317 set_nonblock(sock); 1318 1319 if(nsd->options->ip_freebind) 1320 (void)set_ip_freebind(sock); 1321 if(nsd->options->ip_transparent) 1322 (void)set_ip_transparent(sock); 1323 if((sock->flags & NSD_BIND_DEVICE) && set_bindtodevice(sock) == -1) 1324 return -1; 1325 if(sock->fib != -1 && set_setfib(sock) == -1) 1326 return -1; 1327 1328 if(bind(sock->s, (struct sockaddr *)&sock->addr.ai_addr, sock->addr.ai_addrlen) == -1) { 1329 char buf[256]; 1330 addrport2str((void*)&sock->addr.ai_addr, buf, sizeof(buf)); 1331 log_msg(LOG_ERR, "can't bind udp socket %s: %s", 1332 buf, strerror(errno)); 1333 return -1; 1334 } 1335 1336 return 1; 1337 } 1338 1339 static int 1340 open_tcp_socket(struct nsd *nsd, struct nsd_socket *sock, int *reuseport_works) 1341 { 1342 #ifdef USE_TCP_FASTOPEN 1343 report_tcp_fastopen_config(); 1344 #endif 1345 1346 (void)reuseport_works; 1347 1348 if(-1 == (sock->s = socket( 1349 sock->addr.ai_family, sock->addr.ai_socktype, 0))) 1350 { 1351 #ifdef INET6 1352 if((sock->flags & NSD_SOCKET_IS_OPTIONAL) && 1353 (sock->addr.ai_family == AF_INET6) && 1354 (errno == EAFNOSUPPORT)) 1355 { 1356 log_msg(LOG_WARNING, "fallback to TCP4, no IPv6: " 1357 "not supported"); 1358 return 0; 1359 } 1360 #endif /* INET6 */ 1361 log_msg(LOG_ERR, "can't create a socket: %s", strerror(errno)); 1362 return -1; 1363 } 1364 1365 set_cloexec(sock); 1366 1367 if(nsd->reuseport && reuseport_works && *reuseport_works) 1368 *reuseport_works = (set_reuseport(sock) == 1); 1369 1370 (void)set_reuseaddr(sock); 1371 1372 #ifdef INET6 1373 if(sock->addr.ai_family == AF_INET6) { 1374 if (set_ipv6_v6only(sock) == -1 || 1375 set_ipv6_use_min_mtu(sock) == -1) 1376 return -1; 1377 } 1378 #endif 1379 1380 if(nsd->tcp_mss > 0) 1381 set_tcp_maxseg(sock, nsd->tcp_mss); 1382 /* (StevensUNP p463), if TCP listening socket is blocking, then 1383 it may block in accept, even if select() says readable. */ 1384 (void)set_nonblock(sock); 1385 if(nsd->options->ip_freebind) 1386 (void)set_ip_freebind(sock); 1387 if(nsd->options->ip_transparent) 1388 (void)set_ip_transparent(sock); 1389 if((sock->flags & NSD_BIND_DEVICE) && set_bindtodevice(sock) == -1) 1390 return -1; 1391 if(sock->fib != -1 && set_setfib(sock) == -1) 1392 return -1; 1393 1394 if(bind(sock->s, (struct sockaddr *)&sock->addr.ai_addr, sock->addr.ai_addrlen) == -1) { 1395 char buf[256]; 1396 addrport2str((void*)&sock->addr.ai_addr, buf, sizeof(buf)); 1397 log_msg(LOG_ERR, "can't bind tcp socket %s: %s", 1398 buf, strerror(errno)); 1399 return -1; 1400 } 1401 1402 #ifdef USE_TCP_FASTOPEN 1403 (void)set_tcp_fastopen(sock); 1404 #endif 1405 1406 if(listen(sock->s, TCP_BACKLOG) == -1) { 1407 log_msg(LOG_ERR, "can't listen: %s", strerror(errno)); 1408 return -1; 1409 } 1410 1411 return 1; 1412 } 1413 1414 /* 1415 * Initialize the server, reuseport, create and bind the sockets. 1416 */ 1417 int 1418 server_init(struct nsd *nsd) 1419 { 1420 size_t i; 1421 int reuseport = 1; /* Determine if REUSEPORT works. */ 1422 1423 /* open server interface ports */ 1424 for(i = 0; i < nsd->ifs; i++) { 1425 if(open_udp_socket(nsd, &nsd->udp[i], &reuseport) == -1 || 1426 open_tcp_socket(nsd, &nsd->tcp[i], &reuseport) == -1) 1427 { 1428 return -1; 1429 } 1430 } 1431 1432 if(nsd->reuseport && reuseport) { 1433 size_t ifs = nsd->ifs * nsd->reuseport; 1434 1435 /* increase the size of the interface arrays, there are going 1436 * to be separate interface file descriptors for every server 1437 * instance */ 1438 region_remove_cleanup(nsd->region, free, nsd->udp); 1439 region_remove_cleanup(nsd->region, free, nsd->tcp); 1440 1441 nsd->udp = xrealloc(nsd->udp, ifs * sizeof(*nsd->udp)); 1442 nsd->tcp = xrealloc(nsd->tcp, ifs * sizeof(*nsd->tcp)); 1443 region_add_cleanup(nsd->region, free, nsd->udp); 1444 region_add_cleanup(nsd->region, free, nsd->tcp); 1445 if(ifs > nsd->ifs) { 1446 memset(&nsd->udp[nsd->ifs], 0, 1447 (ifs-nsd->ifs)*sizeof(*nsd->udp)); 1448 memset(&nsd->tcp[nsd->ifs], 0, 1449 (ifs-nsd->ifs)*sizeof(*nsd->tcp)); 1450 } 1451 1452 for(i = nsd->ifs; i < ifs; i++) { 1453 nsd->udp[i] = nsd->udp[i%nsd->ifs]; 1454 nsd->udp[i].s = -1; 1455 if(open_udp_socket(nsd, &nsd->udp[i], &reuseport) == -1) { 1456 return -1; 1457 } 1458 /* Turn off REUSEPORT for TCP by copying the socket 1459 * file descriptor. 1460 * This means we should not close TCP used by 1461 * other servers in reuseport enabled mode, in 1462 * server_child(). 1463 */ 1464 nsd->tcp[i] = nsd->tcp[i%nsd->ifs]; 1465 } 1466 1467 nsd->ifs = ifs; 1468 } else { 1469 nsd->reuseport = 0; 1470 } 1471 1472 /* open server interface ports for verifiers */ 1473 for(i = 0; i < nsd->verify_ifs; i++) { 1474 if(open_udp_socket(nsd, &nsd->verify_udp[i], NULL) == -1 || 1475 open_tcp_socket(nsd, &nsd->verify_tcp[i], NULL) == -1) 1476 { 1477 return -1; 1478 } 1479 } 1480 1481 return 0; 1482 } 1483 1484 /* 1485 * Prepare the server for take off. 1486 * 1487 */ 1488 int 1489 server_prepare(struct nsd *nsd) 1490 { 1491 #ifdef RATELIMIT 1492 /* set secret modifier for hashing (rate limits) */ 1493 #ifdef HAVE_GETRANDOM 1494 uint32_t v; 1495 if(getrandom(&v, sizeof(v), 0) == -1) { 1496 log_msg(LOG_ERR, "getrandom failed: %s", strerror(errno)); 1497 exit(1); 1498 } 1499 hash_set_raninit(v); 1500 #elif defined(HAVE_ARC4RANDOM) 1501 hash_set_raninit(arc4random()); 1502 #else 1503 uint32_t v = getpid() ^ time(NULL); 1504 srandom((unsigned long)v); 1505 # ifdef HAVE_SSL 1506 if(RAND_status() && RAND_bytes((unsigned char*)&v, sizeof(v)) > 0) 1507 hash_set_raninit(v); 1508 else 1509 # endif 1510 hash_set_raninit(random()); 1511 #endif 1512 rrl_mmap_init(nsd->child_count, nsd->options->rrl_size, 1513 nsd->options->rrl_ratelimit, 1514 nsd->options->rrl_whitelist_ratelimit, 1515 nsd->options->rrl_slip, 1516 nsd->options->rrl_ipv4_prefix_length, 1517 nsd->options->rrl_ipv6_prefix_length); 1518 #endif /* RATELIMIT */ 1519 1520 /* Open the database... */ 1521 if ((nsd->db = namedb_open(nsd->options)) == NULL) { 1522 log_msg(LOG_ERR, "unable to open the database: %s", strerror(errno)); 1523 unlink(nsd->task[0]->fname); 1524 unlink(nsd->task[1]->fname); 1525 #ifdef USE_ZONE_STATS 1526 unlink(nsd->zonestatfname[0]); 1527 unlink(nsd->zonestatfname[1]); 1528 #endif 1529 #ifdef BIND8_STATS 1530 server_stat_free(nsd); 1531 #endif 1532 xfrd_del_tempdir(nsd); 1533 return -1; 1534 } 1535 /* check if zone files can be read */ 1536 /* NULL for taskudb because we send soainfo in a moment, batched up, 1537 * for all zones */ 1538 namedb_check_zonefiles(nsd, nsd->options, NULL, NULL); 1539 zonestatid_tree_set(nsd); 1540 1541 compression_table_capacity = 0; 1542 initialize_dname_compression_tables(nsd); 1543 1544 #ifdef BIND8_STATS 1545 /* Initialize times... */ 1546 time(&nsd->st->boot); 1547 set_bind8_alarm(nsd); 1548 #endif /* BIND8_STATS */ 1549 1550 return 0; 1551 } 1552 1553 /* 1554 * Fork the required number of servers. 1555 */ 1556 static int 1557 server_start_children(struct nsd *nsd, region_type* region, netio_type* netio, 1558 int* xfrd_sock_p) 1559 { 1560 size_t i; 1561 1562 /* Start all child servers initially. */ 1563 for (i = 0; i < nsd->child_count; ++i) { 1564 nsd->children[i].pid = 0; 1565 } 1566 1567 return restart_child_servers(nsd, region, netio, xfrd_sock_p); 1568 } 1569 1570 static void 1571 server_close_socket(struct nsd_socket *sock) 1572 { 1573 if(sock->s != -1) { 1574 close(sock->s); 1575 sock->s = -1; 1576 } 1577 } 1578 1579 void 1580 server_close_all_sockets(struct nsd_socket sockets[], size_t n) 1581 { 1582 size_t i; 1583 1584 /* Close all the sockets... */ 1585 for (i = 0; i < n; ++i) { 1586 server_close_socket(&sockets[i]); 1587 } 1588 } 1589 1590 /* 1591 * Close the sockets, shutdown the server and exit. 1592 * Does not return. 1593 */ 1594 void 1595 server_shutdown(struct nsd *nsd) 1596 { 1597 size_t i; 1598 1599 server_close_all_sockets(nsd->udp, nsd->ifs); 1600 server_close_all_sockets(nsd->tcp, nsd->ifs); 1601 /* CHILD: close command channel to parent */ 1602 if(nsd->this_child && nsd->this_child->parent_fd != -1) 1603 { 1604 close(nsd->this_child->parent_fd); 1605 nsd->this_child->parent_fd = -1; 1606 } 1607 /* SERVER: close command channels to children */ 1608 if(!nsd->this_child) 1609 { 1610 for(i=0; i < nsd->child_count; ++i) 1611 if(nsd->children[i].child_fd != -1) 1612 { 1613 close(nsd->children[i].child_fd); 1614 nsd->children[i].child_fd = -1; 1615 } 1616 } 1617 1618 tsig_finalize(); 1619 daemon_remote_delete(nsd->rc); /* ssl-delete secret keys */ 1620 #ifdef HAVE_SSL 1621 if (nsd->tls_ctx) 1622 SSL_CTX_free(nsd->tls_ctx); 1623 #endif 1624 1625 #ifdef MEMCLEAN /* OS collects memory pages */ 1626 #ifdef RATELIMIT 1627 rrl_mmap_deinit_keep_mmap(); 1628 #endif 1629 #ifdef USE_DNSTAP 1630 dt_collector_destroy(nsd->dt_collector, nsd); 1631 #endif 1632 udb_base_free_keep_mmap(nsd->task[0]); 1633 udb_base_free_keep_mmap(nsd->task[1]); 1634 namedb_free_ixfr(nsd->db); 1635 namedb_close(nsd->db); 1636 nsd_options_destroy(nsd->options); 1637 region_destroy(nsd->region); 1638 #endif 1639 log_finalize(); 1640 exit(0); 1641 } 1642 1643 void 1644 server_prepare_xfrd(struct nsd* nsd) 1645 { 1646 char tmpfile[256]; 1647 /* create task mmaps */ 1648 nsd->mytask = 0; 1649 snprintf(tmpfile, sizeof(tmpfile), "%snsd-xfr-%d/nsd.%u.task.0", 1650 nsd->options->xfrdir, (int)getpid(), (unsigned)getpid()); 1651 nsd->task[0] = task_file_create(tmpfile); 1652 if(!nsd->task[0]) { 1653 #ifdef USE_ZONE_STATS 1654 unlink(nsd->zonestatfname[0]); 1655 unlink(nsd->zonestatfname[1]); 1656 #endif 1657 #ifdef BIND8_STATS 1658 server_stat_free(nsd); 1659 #endif 1660 xfrd_del_tempdir(nsd); 1661 exit(1); 1662 } 1663 snprintf(tmpfile, sizeof(tmpfile), "%snsd-xfr-%d/nsd.%u.task.1", 1664 nsd->options->xfrdir, (int)getpid(), (unsigned)getpid()); 1665 nsd->task[1] = task_file_create(tmpfile); 1666 if(!nsd->task[1]) { 1667 unlink(nsd->task[0]->fname); 1668 #ifdef USE_ZONE_STATS 1669 unlink(nsd->zonestatfname[0]); 1670 unlink(nsd->zonestatfname[1]); 1671 #endif 1672 #ifdef BIND8_STATS 1673 server_stat_free(nsd); 1674 #endif 1675 xfrd_del_tempdir(nsd); 1676 exit(1); 1677 } 1678 assert(udb_base_get_userdata(nsd->task[0])->data == 0); 1679 assert(udb_base_get_userdata(nsd->task[1])->data == 0); 1680 /* create xfrd listener structure */ 1681 nsd->xfrd_listener = region_alloc(nsd->region, 1682 sizeof(netio_handler_type)); 1683 nsd->xfrd_listener->user_data = (struct ipc_handler_conn_data*) 1684 region_alloc(nsd->region, sizeof(struct ipc_handler_conn_data)); 1685 nsd->xfrd_listener->fd = -1; 1686 ((struct ipc_handler_conn_data*)nsd->xfrd_listener->user_data)->nsd = 1687 nsd; 1688 ((struct ipc_handler_conn_data*)nsd->xfrd_listener->user_data)->conn = 1689 xfrd_tcp_create(nsd->region, QIOBUFSZ); 1690 } 1691 1692 1693 void 1694 server_start_xfrd(struct nsd *nsd, int del_db, int reload_active) 1695 { 1696 pid_t pid; 1697 int sockets[2] = {0,0}; 1698 struct ipc_handler_conn_data *data; 1699 1700 if(nsd->xfrd_listener->fd != -1) 1701 close(nsd->xfrd_listener->fd); 1702 if(del_db) { 1703 /* recreate taskdb that xfrd was using, it may be corrupt */ 1704 /* we (or reload) use nsd->mytask, and xfrd uses the other */ 1705 char* tmpfile = nsd->task[1-nsd->mytask]->fname; 1706 nsd->task[1-nsd->mytask]->fname = NULL; 1707 /* free alloc already, so udb does not shrink itself */ 1708 udb_alloc_delete(nsd->task[1-nsd->mytask]->alloc); 1709 nsd->task[1-nsd->mytask]->alloc = NULL; 1710 udb_base_free(nsd->task[1-nsd->mytask]); 1711 /* create new file, overwrite the old one */ 1712 nsd->task[1-nsd->mytask] = task_file_create(tmpfile); 1713 free(tmpfile); 1714 } 1715 if (socketpair(AF_UNIX, SOCK_STREAM, 0, sockets) == -1) { 1716 log_msg(LOG_ERR, "startxfrd failed on socketpair: %s", strerror(errno)); 1717 return; 1718 } 1719 pid = fork(); 1720 switch (pid) { 1721 case -1: 1722 log_msg(LOG_ERR, "fork xfrd failed: %s", strerror(errno)); 1723 break; 1724 default: 1725 /* PARENT: close first socket, use second one */ 1726 close(sockets[0]); 1727 if (fcntl(sockets[1], F_SETFL, O_NONBLOCK) == -1) { 1728 log_msg(LOG_ERR, "cannot fcntl pipe: %s", strerror(errno)); 1729 } 1730 if(del_db) xfrd_free_namedb(nsd); 1731 /* use other task than I am using, since if xfrd died and is 1732 * restarted, the reload is using nsd->mytask */ 1733 nsd->mytask = 1 - nsd->mytask; 1734 1735 #ifdef HAVE_SETPROCTITLE 1736 setproctitle("xfrd"); 1737 #endif 1738 #ifdef USE_LOG_PROCESS_ROLE 1739 log_set_process_role("xfrd"); 1740 #endif 1741 #ifdef HAVE_CPUSET_T 1742 if(nsd->use_cpu_affinity) { 1743 set_cpu_affinity(nsd->xfrd_cpuset); 1744 } 1745 #endif 1746 1747 xfrd_init(sockets[1], nsd, del_db, reload_active, pid); 1748 /* ENOTREACH */ 1749 break; 1750 case 0: 1751 /* CHILD: close second socket, use first one */ 1752 close(sockets[1]); 1753 if (fcntl(sockets[0], F_SETFL, O_NONBLOCK) == -1) { 1754 log_msg(LOG_ERR, "cannot fcntl pipe: %s", strerror(errno)); 1755 } 1756 nsd->xfrd_listener->fd = sockets[0]; 1757 #ifdef HAVE_SETPROCTITLE 1758 setproctitle("main"); 1759 #endif 1760 #ifdef USE_LOG_PROCESS_ROLE 1761 log_set_process_role("main"); 1762 #endif 1763 break; 1764 } 1765 /* server-parent only */ 1766 nsd->xfrd_listener->timeout = NULL; 1767 nsd->xfrd_listener->event_types = NETIO_EVENT_READ; 1768 nsd->xfrd_listener->event_handler = parent_handle_xfrd_command; 1769 /* clear ongoing ipc reads */ 1770 data = (struct ipc_handler_conn_data *) nsd->xfrd_listener->user_data; 1771 data->conn->is_reading = 0; 1772 } 1773 1774 /** add all soainfo to taskdb */ 1775 static void 1776 add_all_soa_to_task(struct nsd* nsd, struct udb_base* taskudb) 1777 { 1778 struct radnode* n; 1779 udb_ptr task_last; /* last task, mytask is empty so NULL */ 1780 /* add all SOA INFO to mytask */ 1781 udb_ptr_init(&task_last, taskudb); 1782 for(n=radix_first(nsd->db->zonetree); n; n=radix_next(n)) { 1783 task_new_soainfo(taskudb, &task_last, (zone_type*)n->elem, 0); 1784 } 1785 udb_ptr_unlink(&task_last, taskudb); 1786 } 1787 1788 void 1789 server_send_soa_xfrd(struct nsd* nsd, int shortsoa) 1790 { 1791 /* normally this exchanges the SOA from nsd->xfrd and the expire back. 1792 * parent fills one taskdb with soas, xfrd fills other with expires. 1793 * then they exchange and process. 1794 * shortsoa: xfrd crashes and needs to be restarted and one taskdb 1795 * may be in use by reload. Fill SOA in taskdb and give to xfrd. 1796 * expire notifications can be sent back via a normal reload later 1797 * (xfrd will wait for current running reload to finish if any). 1798 */ 1799 sig_atomic_t cmd = 0; 1800 pid_t mypid; 1801 int xfrd_sock = nsd->xfrd_listener->fd; 1802 struct udb_base* taskudb = nsd->task[nsd->mytask]; 1803 udb_ptr t; 1804 if(!shortsoa) { 1805 if(nsd->signal_hint_shutdown) { 1806 shutdown: 1807 log_msg(LOG_WARNING, "signal received, shutting down..."); 1808 server_close_all_sockets(nsd->udp, nsd->ifs); 1809 server_close_all_sockets(nsd->tcp, nsd->ifs); 1810 daemon_remote_close(nsd->rc); 1811 /* Unlink it if possible... */ 1812 unlinkpid(nsd->pidfile); 1813 unlink(nsd->task[0]->fname); 1814 unlink(nsd->task[1]->fname); 1815 #ifdef USE_ZONE_STATS 1816 unlink(nsd->zonestatfname[0]); 1817 unlink(nsd->zonestatfname[1]); 1818 #endif 1819 #ifdef BIND8_STATS 1820 server_stat_free(nsd); 1821 #endif 1822 server_shutdown(nsd); 1823 /* ENOTREACH */ 1824 exit(0); 1825 } 1826 } 1827 if(shortsoa) { 1828 /* put SOA in xfrd task because mytask may be in use */ 1829 taskudb = nsd->task[1-nsd->mytask]; 1830 } 1831 1832 add_all_soa_to_task(nsd, taskudb); 1833 if(!shortsoa) { 1834 /* wait for xfrd to signal task is ready, RELOAD signal */ 1835 if(block_read(nsd, xfrd_sock, &cmd, sizeof(cmd), -1) != sizeof(cmd) || 1836 cmd != NSD_RELOAD) { 1837 log_msg(LOG_ERR, "did not get start signal from xfrd"); 1838 exit(1); 1839 } 1840 if(nsd->signal_hint_shutdown) { 1841 goto shutdown; 1842 } 1843 } 1844 /* give xfrd our task, signal it with RELOAD_DONE */ 1845 task_process_sync(taskudb); 1846 cmd = NSD_RELOAD_DONE; 1847 if(!write_socket(xfrd_sock, &cmd, sizeof(cmd))) { 1848 log_msg(LOG_ERR, "problems sending soa end from reload %d to xfrd: %s", 1849 (int)nsd->pid, strerror(errno)); 1850 } 1851 mypid = getpid(); 1852 if(!write_socket(nsd->xfrd_listener->fd, &mypid, sizeof(mypid))) { 1853 log_msg(LOG_ERR, "problems sending reloadpid to xfrd: %s", 1854 strerror(errno)); 1855 } 1856 1857 if(!shortsoa) { 1858 /* process the xfrd task works (expiry data) */ 1859 nsd->mytask = 1 - nsd->mytask; 1860 taskudb = nsd->task[nsd->mytask]; 1861 task_remap(taskudb); 1862 udb_ptr_new(&t, taskudb, udb_base_get_userdata(taskudb)); 1863 while(!udb_ptr_is_null(&t)) { 1864 task_process_expire(nsd->db, TASKLIST(&t)); 1865 udb_ptr_set_rptr(&t, taskudb, &TASKLIST(&t)->next); 1866 } 1867 udb_ptr_unlink(&t, taskudb); 1868 task_clear(taskudb); 1869 1870 /* tell xfrd that the task is emptied, signal with RELOAD_DONE */ 1871 cmd = NSD_RELOAD_DONE; 1872 if(!write_socket(xfrd_sock, &cmd, sizeof(cmd))) { 1873 log_msg(LOG_ERR, "problems sending soa end from reload %d to xfrd: %s", 1874 (int)nsd->pid, strerror(errno)); 1875 } 1876 } 1877 } 1878 1879 #ifdef HAVE_SSL 1880 static void 1881 log_crypto_from_err(const char* str, unsigned long err) 1882 { 1883 /* error:[error code]:[library name]:[function name]:[reason string] */ 1884 char buf[128]; 1885 unsigned long e; 1886 ERR_error_string_n(err, buf, sizeof(buf)); 1887 log_msg(LOG_ERR, "%s crypto %s", str, buf); 1888 while( (e=ERR_get_error()) ) { 1889 ERR_error_string_n(e, buf, sizeof(buf)); 1890 log_msg(LOG_ERR, "and additionally crypto %s", buf); 1891 } 1892 } 1893 1894 void 1895 log_crypto_err(const char* str) 1896 { 1897 log_crypto_from_err(str, ERR_get_error()); 1898 } 1899 1900 /** true if the ssl handshake error has to be squelched from the logs */ 1901 static int 1902 squelch_err_ssl_handshake(unsigned long err) 1903 { 1904 if(verbosity >= 3) 1905 return 0; /* only squelch on low verbosity */ 1906 /* this is very specific, we could filter on ERR_GET_REASON() 1907 * (the third element in ERR_PACK) */ 1908 if(err == ERR_PACK(ERR_LIB_SSL, SSL_F_SSL3_GET_RECORD, SSL_R_HTTPS_PROXY_REQUEST) || 1909 err == ERR_PACK(ERR_LIB_SSL, SSL_F_SSL3_GET_RECORD, SSL_R_HTTP_REQUEST) || 1910 err == ERR_PACK(ERR_LIB_SSL, SSL_F_SSL3_GET_RECORD, SSL_R_WRONG_VERSION_NUMBER) || 1911 err == ERR_PACK(ERR_LIB_SSL, SSL_F_SSL3_READ_BYTES, SSL_R_SSLV3_ALERT_BAD_CERTIFICATE) 1912 #ifdef SSL_F_TLS_POST_PROCESS_CLIENT_HELLO 1913 || err == ERR_PACK(ERR_LIB_SSL, SSL_F_TLS_POST_PROCESS_CLIENT_HELLO, SSL_R_NO_SHARED_CIPHER) 1914 #endif 1915 #ifdef SSL_F_TLS_EARLY_POST_PROCESS_CLIENT_HELLO 1916 || err == ERR_PACK(ERR_LIB_SSL, SSL_F_TLS_EARLY_POST_PROCESS_CLIENT_HELLO, SSL_R_UNKNOWN_PROTOCOL) 1917 || err == ERR_PACK(ERR_LIB_SSL, SSL_F_TLS_EARLY_POST_PROCESS_CLIENT_HELLO, SSL_R_UNSUPPORTED_PROTOCOL) 1918 # ifdef SSL_R_VERSION_TOO_LOW 1919 || err == ERR_PACK(ERR_LIB_SSL, SSL_F_TLS_EARLY_POST_PROCESS_CLIENT_HELLO, SSL_R_VERSION_TOO_LOW) 1920 # endif 1921 #endif 1922 ) 1923 return 1; 1924 return 0; 1925 } 1926 1927 void 1928 perform_openssl_init(void) 1929 { 1930 /* init SSL library */ 1931 #ifdef HAVE_ERR_LOAD_CRYPTO_STRINGS 1932 ERR_load_crypto_strings(); 1933 #endif 1934 #if defined(HAVE_ERR_LOAD_SSL_STRINGS) && !defined(DEPRECATED_ERR_LOAD_SSL_STRINGS) 1935 ERR_load_SSL_strings(); 1936 #endif 1937 #if OPENSSL_VERSION_NUMBER < 0x10100000 || !defined(HAVE_OPENSSL_INIT_CRYPTO) 1938 OpenSSL_add_all_algorithms(); 1939 #else 1940 OPENSSL_init_crypto(OPENSSL_INIT_ADD_ALL_CIPHERS 1941 | OPENSSL_INIT_ADD_ALL_DIGESTS 1942 | OPENSSL_INIT_LOAD_CRYPTO_STRINGS, NULL); 1943 #endif 1944 #if OPENSSL_VERSION_NUMBER < 0x10100000 || !defined(HAVE_OPENSSL_INIT_SSL) 1945 (void)SSL_library_init(); 1946 #else 1947 OPENSSL_init_ssl(0, NULL); 1948 #endif 1949 1950 if(!RAND_status()) { 1951 /* try to seed it */ 1952 unsigned char buf[256]; 1953 unsigned int v, seed=(unsigned)time(NULL) ^ (unsigned)getpid(); 1954 size_t i; 1955 v = seed; 1956 for(i=0; i<256/sizeof(v); i++) { 1957 memmove(buf+i*sizeof(v), &v, sizeof(v)); 1958 v = v*seed + (unsigned int)i; 1959 } 1960 RAND_seed(buf, 256); 1961 log_msg(LOG_WARNING, "warning: no entropy, seeding openssl PRNG with time"); 1962 } 1963 } 1964 1965 static int 1966 get_ocsp(char *filename, unsigned char **ocsp) 1967 { 1968 BIO *bio; 1969 OCSP_RESPONSE *response; 1970 int len = -1; 1971 unsigned char *p, *buf; 1972 assert(filename); 1973 1974 if ((bio = BIO_new_file(filename, "r")) == NULL) { 1975 log_crypto_err("get_ocsp: BIO_new_file failed"); 1976 return -1; 1977 } 1978 1979 if ((response = d2i_OCSP_RESPONSE_bio(bio, NULL)) == NULL) { 1980 log_crypto_err("get_ocsp: d2i_OCSP_RESPONSE_bio failed"); 1981 BIO_free(bio); 1982 return -1; 1983 } 1984 1985 if ((len = i2d_OCSP_RESPONSE(response, NULL)) <= 0) { 1986 log_crypto_err("get_ocsp: i2d_OCSP_RESPONSE #1 failed"); 1987 OCSP_RESPONSE_free(response); 1988 BIO_free(bio); 1989 return -1; 1990 } 1991 1992 if ((buf = malloc((size_t) len)) == NULL) { 1993 log_msg(LOG_ERR, "get_ocsp: malloc failed"); 1994 OCSP_RESPONSE_free(response); 1995 BIO_free(bio); 1996 return -1; 1997 } 1998 1999 p = buf; 2000 if ((len = i2d_OCSP_RESPONSE(response, &p)) <= 0) { 2001 log_crypto_err("get_ocsp: i2d_OCSP_RESPONSE #2 failed"); 2002 free(buf); 2003 OCSP_RESPONSE_free(response); 2004 BIO_free(bio); 2005 return -1; 2006 } 2007 2008 OCSP_RESPONSE_free(response); 2009 BIO_free(bio); 2010 2011 *ocsp = buf; 2012 return len; 2013 } 2014 2015 /* further setup ssl ctx after the keys are loaded */ 2016 static void 2017 listen_sslctx_setup_2(void* ctxt) 2018 { 2019 SSL_CTX* ctx = (SSL_CTX*)ctxt; 2020 (void)ctx; 2021 #if HAVE_DECL_SSL_CTX_SET_ECDH_AUTO 2022 if(!SSL_CTX_set_ecdh_auto(ctx,1)) { 2023 /* ENOTREACH */ 2024 log_crypto_err("Error in SSL_CTX_ecdh_auto, not enabling ECDHE"); 2025 } 2026 #elif defined(HAVE_DECL_SSL_CTX_SET_TMP_ECDH) && defined(NID_X9_62_prime256v1) && defined(HAVE_EC_KEY_NEW_BY_CURVE_NAME) 2027 if(1) { 2028 EC_KEY *ecdh = EC_KEY_new_by_curve_name (NID_X9_62_prime256v1); 2029 if (!ecdh) { 2030 log_crypto_err("could not find p256, not enabling ECDHE"); 2031 } else { 2032 if (1 != SSL_CTX_set_tmp_ecdh (ctx, ecdh)) { 2033 log_crypto_err("Error in SSL_CTX_set_tmp_ecdh, not enabling ECDHE"); 2034 } 2035 EC_KEY_free (ecdh); 2036 } 2037 } 2038 #endif 2039 } 2040 2041 static int 2042 add_ocsp_data_cb(SSL *s, void* ATTR_UNUSED(arg)) 2043 { 2044 if(ocspdata) { 2045 unsigned char *p; 2046 if ((p=malloc(ocspdata_len)) == NULL) { 2047 log_msg(LOG_ERR, "add_ocsp_data_cb: malloc failure"); 2048 return SSL_TLSEXT_ERR_NOACK; 2049 } 2050 memcpy(p, ocspdata, ocspdata_len); 2051 if ((SSL_set_tlsext_status_ocsp_resp(s, p, ocspdata_len)) != 1) { 2052 log_crypto_err("Error in SSL_set_tlsext_status_ocsp_resp"); 2053 free(p); 2054 return SSL_TLSEXT_ERR_NOACK; 2055 } 2056 return SSL_TLSEXT_ERR_OK; 2057 } else { 2058 return SSL_TLSEXT_ERR_NOACK; 2059 } 2060 } 2061 2062 SSL_CTX* 2063 server_tls_ctx_setup(char* key, char* pem, char* verifypem) 2064 { 2065 SSL_CTX *ctx = SSL_CTX_new(SSLv23_server_method()); 2066 if(!ctx) { 2067 log_crypto_err("could not SSL_CTX_new"); 2068 return NULL; 2069 } 2070 /* no SSLv2, SSLv3 because has defects */ 2071 #if SSL_OP_NO_SSLv2 != 0 2072 if((SSL_CTX_set_options(ctx, SSL_OP_NO_SSLv2) & SSL_OP_NO_SSLv2) != SSL_OP_NO_SSLv2){ 2073 log_crypto_err("could not set SSL_OP_NO_SSLv2"); 2074 SSL_CTX_free(ctx); 2075 return NULL; 2076 } 2077 #endif 2078 if((SSL_CTX_set_options(ctx, SSL_OP_NO_SSLv3) & SSL_OP_NO_SSLv3) 2079 != SSL_OP_NO_SSLv3){ 2080 log_crypto_err("could not set SSL_OP_NO_SSLv3"); 2081 SSL_CTX_free(ctx); 2082 return 0; 2083 } 2084 #if defined(SSL_OP_NO_TLSv1) && defined(SSL_OP_NO_TLSv1_1) 2085 /* if we have tls 1.1 disable 1.0 */ 2086 if((SSL_CTX_set_options(ctx, SSL_OP_NO_TLSv1) & SSL_OP_NO_TLSv1) 2087 != SSL_OP_NO_TLSv1){ 2088 log_crypto_err("could not set SSL_OP_NO_TLSv1"); 2089 SSL_CTX_free(ctx); 2090 return 0; 2091 } 2092 #endif 2093 #if defined(SSL_OP_NO_TLSv1_1) && defined(SSL_OP_NO_TLSv1_2) 2094 /* if we have tls 1.2 disable 1.1 */ 2095 if((SSL_CTX_set_options(ctx, SSL_OP_NO_TLSv1_1) & SSL_OP_NO_TLSv1_1) 2096 != SSL_OP_NO_TLSv1_1){ 2097 log_crypto_err("could not set SSL_OP_NO_TLSv1_1"); 2098 SSL_CTX_free(ctx); 2099 return 0; 2100 } 2101 #endif 2102 #if defined(SSL_OP_NO_RENEGOTIATION) 2103 /* disable client renegotiation */ 2104 if((SSL_CTX_set_options(ctx, SSL_OP_NO_RENEGOTIATION) & 2105 SSL_OP_NO_RENEGOTIATION) != SSL_OP_NO_RENEGOTIATION) { 2106 log_crypto_err("could not set SSL_OP_NO_RENEGOTIATION"); 2107 SSL_CTX_free(ctx); 2108 return 0; 2109 } 2110 #endif 2111 #if defined(SHA256_DIGEST_LENGTH) && defined(SSL_TXT_CHACHA20) 2112 /* if we detect system-wide crypto policies, use those */ 2113 if (access( "/etc/crypto-policies/config", F_OK ) != 0 ) { 2114 /* if we have sha256, set the cipher list to have no known vulns */ 2115 if(!SSL_CTX_set_cipher_list(ctx, "ECDHE+AESGCM:ECDHE+CHACHA20")) 2116 log_crypto_err("could not set cipher list with SSL_CTX_set_cipher_list"); 2117 } 2118 #endif 2119 if((SSL_CTX_set_options(ctx, SSL_OP_CIPHER_SERVER_PREFERENCE) & 2120 SSL_OP_CIPHER_SERVER_PREFERENCE) != 2121 SSL_OP_CIPHER_SERVER_PREFERENCE) { 2122 log_crypto_err("could not set SSL_OP_CIPHER_SERVER_PREFERENCE"); 2123 SSL_CTX_free(ctx); 2124 return 0; 2125 } 2126 #ifdef HAVE_SSL_CTX_SET_SECURITY_LEVEL 2127 SSL_CTX_set_security_level(ctx, 0); 2128 #endif 2129 if(!SSL_CTX_use_certificate_chain_file(ctx, pem)) { 2130 log_msg(LOG_ERR, "error for cert file: %s", pem); 2131 log_crypto_err("error in SSL_CTX use_certificate_chain_file"); 2132 SSL_CTX_free(ctx); 2133 return NULL; 2134 } 2135 if(!SSL_CTX_use_PrivateKey_file(ctx, key, SSL_FILETYPE_PEM)) { 2136 log_msg(LOG_ERR, "error for private key file: %s", key); 2137 log_crypto_err("Error in SSL_CTX use_PrivateKey_file"); 2138 SSL_CTX_free(ctx); 2139 return NULL; 2140 } 2141 if(!SSL_CTX_check_private_key(ctx)) { 2142 log_msg(LOG_ERR, "error for key file: %s", key); 2143 log_crypto_err("Error in SSL_CTX check_private_key"); 2144 SSL_CTX_free(ctx); 2145 return NULL; 2146 } 2147 listen_sslctx_setup_2(ctx); 2148 if(verifypem && verifypem[0]) { 2149 if(!SSL_CTX_load_verify_locations(ctx, verifypem, NULL)) { 2150 log_crypto_err("Error in SSL_CTX verify locations"); 2151 SSL_CTX_free(ctx); 2152 return NULL; 2153 } 2154 SSL_CTX_set_client_CA_list(ctx, SSL_load_client_CA_file(verifypem)); 2155 SSL_CTX_set_verify(ctx, SSL_VERIFY_PEER, NULL); 2156 } 2157 return ctx; 2158 } 2159 2160 SSL_CTX* 2161 server_tls_ctx_create(struct nsd* nsd, char* verifypem, char* ocspfile) 2162 { 2163 char *key, *pem; 2164 SSL_CTX *ctx; 2165 2166 key = nsd->options->tls_service_key; 2167 pem = nsd->options->tls_service_pem; 2168 if(!key || key[0] == 0) { 2169 log_msg(LOG_ERR, "error: no tls-service-key file specified"); 2170 return NULL; 2171 } 2172 if(!pem || pem[0] == 0) { 2173 log_msg(LOG_ERR, "error: no tls-service-pem file specified"); 2174 return NULL; 2175 } 2176 2177 /* NOTE:This mimics the existing code in Unbound 1.5.1 by supporting SSL but 2178 * raft-ietf-uta-tls-bcp-08 recommends only using TLSv1.2*/ 2179 ctx = server_tls_ctx_setup(key, pem, verifypem); 2180 if(!ctx) { 2181 log_msg(LOG_ERR, "could not setup server TLS context"); 2182 return NULL; 2183 } 2184 if(ocspfile && ocspfile[0]) { 2185 if ((ocspdata_len = get_ocsp(ocspfile, &ocspdata)) < 0) { 2186 log_crypto_err("Error reading OCSPfile"); 2187 SSL_CTX_free(ctx); 2188 return NULL; 2189 } else { 2190 VERBOSITY(2, (LOG_INFO, "ocspfile %s loaded", ocspfile)); 2191 if(!SSL_CTX_set_tlsext_status_cb(ctx, add_ocsp_data_cb)) { 2192 log_crypto_err("Error in SSL_CTX_set_tlsext_status_cb"); 2193 SSL_CTX_free(ctx); 2194 return NULL; 2195 } 2196 } 2197 } 2198 return ctx; 2199 } 2200 2201 /* check if tcp_handler_accept_data created for TLS dedicated port */ 2202 int 2203 using_tls_port(struct sockaddr* addr, const char* tls_port) 2204 { 2205 in_port_t port = 0; 2206 2207 if (addr->sa_family == AF_INET) 2208 port = ((struct sockaddr_in*)addr)->sin_port; 2209 #ifndef HAVE_STRUCT_SOCKADDR_IN6 2210 else 2211 port = ((struct sockaddr_in6*)addr)->sin6_port; 2212 #endif /* HAVE_STRUCT_SOCKADDR_IN6 */ 2213 if (atoi(tls_port) == ntohs(port)) 2214 return 1; 2215 2216 return 0; 2217 } 2218 #endif 2219 2220 /* pass timeout=-1 for blocking. Returns size, 0, -1(err), or -2(timeout) */ 2221 ssize_t 2222 block_read(struct nsd* nsd, int s, void* p, ssize_t sz, int timeout) 2223 { 2224 uint8_t* buf = (uint8_t*) p; 2225 ssize_t total = 0; 2226 struct pollfd fd; 2227 memset(&fd, 0, sizeof(fd)); 2228 fd.fd = s; 2229 fd.events = POLLIN; 2230 2231 while( total < sz) { 2232 ssize_t ret; 2233 ret = poll(&fd, 1, (timeout==-1)?-1:timeout*1000); 2234 if(ret == -1) { 2235 if(errno == EAGAIN) 2236 /* blocking read */ 2237 continue; 2238 if(errno == EINTR) { 2239 if(nsd && (nsd->signal_hint_quit || nsd->signal_hint_shutdown)) 2240 return -1; 2241 /* other signals can be handled later */ 2242 continue; 2243 } 2244 /* some error */ 2245 return -1; 2246 } 2247 if(ret == 0) { 2248 /* operation timed out */ 2249 return -2; 2250 } 2251 ret = read(s, buf+total, sz-total); 2252 if(ret == -1) { 2253 if(errno == EAGAIN) 2254 /* blocking read */ 2255 continue; 2256 if(errno == EINTR) { 2257 if(nsd && (nsd->signal_hint_quit || nsd->signal_hint_shutdown)) 2258 return -1; 2259 /* other signals can be handled later */ 2260 continue; 2261 } 2262 /* some error */ 2263 return -1; 2264 } 2265 if(ret == 0) { 2266 /* closed connection! */ 2267 return 0; 2268 } 2269 total += ret; 2270 } 2271 return total; 2272 } 2273 2274 static void 2275 reload_process_tasks(struct nsd* nsd, udb_ptr* last_task, int cmdsocket) 2276 { 2277 sig_atomic_t cmd = NSD_QUIT_SYNC; 2278 udb_ptr t, next; 2279 udb_base* u = nsd->task[nsd->mytask]; 2280 udb_ptr_init(&next, u); 2281 udb_ptr_new(&t, u, udb_base_get_userdata(u)); 2282 udb_base_set_userdata(u, 0); 2283 while(!udb_ptr_is_null(&t)) { 2284 /* store next in list so this one can be deleted or reused */ 2285 udb_ptr_set_rptr(&next, u, &TASKLIST(&t)->next); 2286 udb_rptr_zero(&TASKLIST(&t)->next, u); 2287 2288 /* process task t */ 2289 /* append results for task t and update last_task */ 2290 task_process_in_reload(nsd, u, last_task, &t); 2291 2292 /* go to next */ 2293 udb_ptr_set_ptr(&t, u, &next); 2294 2295 /* if the parent has quit, we must quit too, poll the fd for cmds */ 2296 if(block_read(nsd, cmdsocket, &cmd, sizeof(cmd), 0) == sizeof(cmd)) { 2297 DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: ipc command from main %d", (int)cmd)); 2298 if(cmd == NSD_QUIT) { 2299 DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: quit to follow nsd")); 2300 /* unlink files of remainder of tasks */ 2301 while(!udb_ptr_is_null(&t)) { 2302 if(TASKLIST(&t)->task_type == task_apply_xfr) { 2303 xfrd_unlink_xfrfile(nsd, TASKLIST(&t)->yesno); 2304 } 2305 udb_ptr_set_rptr(&t, u, &TASKLIST(&t)->next); 2306 } 2307 udb_ptr_unlink(&t, u); 2308 udb_ptr_unlink(&next, u); 2309 exit(0); 2310 } 2311 } 2312 2313 } 2314 udb_ptr_unlink(&t, u); 2315 udb_ptr_unlink(&next, u); 2316 } 2317 2318 void server_verify(struct nsd *nsd, int cmdsocket); 2319 2320 struct quit_sync_event_data { 2321 struct event_base* base; 2322 size_t read; 2323 union { 2324 uint8_t buf[sizeof(sig_atomic_t)]; 2325 sig_atomic_t cmd; 2326 } to_read; 2327 }; 2328 2329 static void server_reload_handle_sigchld(int sig, short event, 2330 void* ATTR_UNUSED(arg)) 2331 { 2332 assert(sig == SIGCHLD); 2333 assert(event & EV_SIGNAL); 2334 2335 /* reap the exited old-serve child(s) */ 2336 while(waitpid(-1, NULL, WNOHANG) > 0) { 2337 /* pass */ 2338 } 2339 } 2340 2341 static void server_reload_handle_quit_sync_ack(int cmdsocket, short event, 2342 void* arg) 2343 { 2344 struct quit_sync_event_data* cb_data = 2345 (struct quit_sync_event_data*)arg; 2346 ssize_t r; 2347 2348 if(event & EV_TIMEOUT) { 2349 sig_atomic_t cmd = NSD_QUIT_SYNC; 2350 2351 DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: ipc send quit to main")); 2352 if (!write_socket(cmdsocket, &cmd, sizeof(cmd))) { 2353 log_msg(LOG_ERR, "problems sending command from " 2354 "reload to old-main: %s", strerror(errno)); 2355 } 2356 /* Wait for cmdsocket to become readable or for next timeout, 2357 * (this works because event is added EV_TIMEOUT|EV_PERSIST). 2358 */ 2359 return; 2360 } 2361 assert(event & EV_READ); 2362 assert(cb_data->read < sizeof(cb_data->to_read.cmd)); 2363 2364 r = read(cmdsocket, cb_data->to_read.buf + cb_data->read, 2365 sizeof(cb_data->to_read.cmd) - cb_data->read); 2366 if(r == 0) { 2367 log_msg(LOG_ERR, "reload: old-main quit during quit sync"); 2368 cb_data->to_read.cmd = NSD_RELOAD; 2369 2370 } else if(r == -1) { 2371 if(errno == EAGAIN || errno == EINTR) 2372 return; 2373 2374 log_msg(LOG_ERR, "reload: could not wait for parent to quit: " 2375 "%s", strerror(errno)); 2376 cb_data->to_read.cmd = NSD_RELOAD; 2377 2378 } else if (cb_data->read + r < sizeof(cb_data->to_read.cmd)) { 2379 /* More to read */ 2380 cb_data->read += r; 2381 return; 2382 2383 } else { 2384 assert(cb_data->read + r == sizeof(cb_data->to_read.cmd)); 2385 DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: ipc reply main %d", 2386 (int)cb_data->to_read.cmd)); 2387 } 2388 /* Done */ 2389 event_base_loopexit(cb_data->base, NULL); 2390 } 2391 2392 /* 2393 * Reload the database, stop parent, re-fork children and continue. 2394 * as server_main. 2395 */ 2396 static void 2397 server_reload(struct nsd *nsd, region_type* server_region, netio_type* netio, 2398 int cmdsocket) 2399 { 2400 pid_t mypid; 2401 sig_atomic_t cmd; 2402 udb_ptr last_task; 2403 struct sigaction old_sigchld, ign_sigchld; 2404 struct radnode* node; 2405 zone_type* zone; 2406 enum soainfo_hint hint; 2407 struct quit_sync_event_data cb_data; 2408 struct event signal_event, cmd_event; 2409 struct timeval reload_sync_timeout; 2410 2411 /* ignore SIGCHLD from the previous server_main that used this pid */ 2412 memset(&ign_sigchld, 0, sizeof(ign_sigchld)); 2413 ign_sigchld.sa_handler = SIG_IGN; 2414 sigaction(SIGCHLD, &ign_sigchld, &old_sigchld); 2415 2416 #ifdef HAVE_CPUSET_T 2417 if(nsd->use_cpu_affinity) { 2418 set_cpu_affinity(nsd->cpuset); 2419 } 2420 #endif 2421 2422 /* see what tasks we got from xfrd */ 2423 task_remap(nsd->task[nsd->mytask]); 2424 udb_ptr_init(&last_task, nsd->task[nsd->mytask]); 2425 reload_process_tasks(nsd, &last_task, cmdsocket); 2426 2427 #ifndef NDEBUG 2428 if(nsd_debug_level >= 1) 2429 region_log_stats(nsd->db->region); 2430 #endif /* NDEBUG */ 2431 initialize_dname_compression_tables(nsd); 2432 2433 #ifdef BIND8_STATS 2434 /* Restart dumping stats if required. */ 2435 time(&nsd->st->boot); 2436 set_bind8_alarm(nsd); 2437 /* Switch to a different set of stat array for new server processes, 2438 * because they can briefly coexist with the old processes. They 2439 * have their own stat structure. */ 2440 nsd->stat_current = (nsd->stat_current==0?1:0); 2441 #endif 2442 #ifdef USE_ZONE_STATS 2443 server_zonestat_realloc(nsd); /* realloc for new children */ 2444 server_zonestat_switch(nsd); 2445 #endif 2446 2447 if(nsd->options->verify_enable) { 2448 #ifdef RATELIMIT 2449 /* allocate resources for rate limiting. use a slot that is guaranteed 2450 not mapped to a file so no persistent data is overwritten */ 2451 rrl_init(nsd->child_count + 1); 2452 #endif 2453 2454 /* spin-up server and execute verifiers for each zone */ 2455 server_verify(nsd, cmdsocket); 2456 #ifdef RATELIMIT 2457 /* deallocate rate limiting resources */ 2458 rrl_deinit(nsd->child_count + 1); 2459 #endif 2460 } 2461 2462 for(node = radix_first(nsd->db->zonetree); 2463 node != NULL; 2464 node = radix_next(node)) 2465 { 2466 zone = (zone_type *)node->elem; 2467 if(zone->is_updated) { 2468 if(zone->is_bad) { 2469 nsd->mode = NSD_RELOAD_FAILED; 2470 hint = soainfo_bad; 2471 } else { 2472 hint = soainfo_ok; 2473 } 2474 /* update(s), verified or not, possibly with subsequent 2475 skipped update(s). skipped update(s) are picked up 2476 by failed update check in xfrd */ 2477 task_new_soainfo(nsd->task[nsd->mytask], &last_task, 2478 zone, hint); 2479 } else if(zone->is_skipped) { 2480 /* corrupt or inconsistent update without preceding 2481 update(s), communicate soainfo_gone */ 2482 task_new_soainfo(nsd->task[nsd->mytask], &last_task, 2483 zone, soainfo_gone); 2484 } 2485 zone->is_updated = 0; 2486 zone->is_skipped = 0; 2487 } 2488 2489 if(nsd->mode == NSD_RELOAD_FAILED) { 2490 exit(NSD_RELOAD_FAILED); 2491 } 2492 2493 /* listen for the signals of failed children again */ 2494 sigaction(SIGCHLD, &old_sigchld, NULL); 2495 #ifdef USE_DNSTAP 2496 if (nsd->dt_collector) { 2497 int *swap_fd_send; 2498 DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: swap dnstap collector pipes")); 2499 /* Swap fd_send with fd_swap so old serve child and new serve 2500 * childs will not write to the same pipe ends simultaneously */ 2501 swap_fd_send = nsd->dt_collector_fd_send; 2502 nsd->dt_collector_fd_send = nsd->dt_collector_fd_swap; 2503 nsd->dt_collector_fd_swap = swap_fd_send; 2504 2505 } 2506 #endif 2507 /* Start new child processes */ 2508 if (server_start_children(nsd, server_region, netio, &nsd-> 2509 xfrd_listener->fd) != 0) { 2510 send_children_quit(nsd); 2511 exit(1); 2512 } 2513 2514 /* if the old-main has quit, we must quit too, poll the fd for cmds */ 2515 if(block_read(nsd, cmdsocket, &cmd, sizeof(cmd), 0) == sizeof(cmd)) { 2516 DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: ipc command from main %d", (int)cmd)); 2517 if(cmd == NSD_QUIT) { 2518 DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: quit to follow nsd")); 2519 send_children_quit(nsd); 2520 exit(0); 2521 } 2522 } 2523 2524 /* Send quit command to old-main: blocking, wait for receipt. 2525 * The old-main process asks the old-serve processes to quit, however 2526 * if a reload succeeded before, this process is the parent of the 2527 * old-serve processes, so we need to reap the children for it. 2528 */ 2529 DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: ipc send quit to main")); 2530 cmd = NSD_QUIT_SYNC; 2531 if (!write_socket(cmdsocket, &cmd, sizeof(cmd))) 2532 { 2533 log_msg(LOG_ERR, "problems sending command from reload to oldnsd: %s", 2534 strerror(errno)); 2535 } 2536 2537 reload_sync_timeout.tv_sec = RELOAD_SYNC_TIMEOUT; 2538 reload_sync_timeout.tv_usec = 0; 2539 2540 cb_data.base = nsd_child_event_base(); 2541 cb_data.to_read.cmd = cmd; 2542 cb_data.read = 0; 2543 2544 event_set(&signal_event, SIGCHLD, EV_SIGNAL|EV_PERSIST, 2545 server_reload_handle_sigchld, NULL); 2546 if(event_base_set(cb_data.base, &signal_event) != 0 2547 || event_add(&signal_event, NULL) != 0) { 2548 log_msg(LOG_ERR, "NSD quit sync: could not add signal event"); 2549 } 2550 2551 event_set(&cmd_event, cmdsocket, EV_READ|EV_TIMEOUT|EV_PERSIST, 2552 server_reload_handle_quit_sync_ack, &cb_data); 2553 if(event_base_set(cb_data.base, &cmd_event) != 0 2554 || event_add(&cmd_event, &reload_sync_timeout) != 0) { 2555 log_msg(LOG_ERR, "NSD quit sync: could not add command event"); 2556 } 2557 2558 /* short-lived main loop */ 2559 event_base_dispatch(cb_data.base); 2560 2561 /* remove command and signal event handlers */ 2562 event_del(&cmd_event); 2563 event_del(&signal_event); 2564 event_base_free(cb_data.base); 2565 cmd = cb_data.to_read.cmd; 2566 2567 if(cmd == NSD_QUIT) { 2568 /* small race condition possible here, parent got quit cmd. */ 2569 send_children_quit(nsd); 2570 exit(1); 2571 } 2572 assert(cmd == NSD_RELOAD); 2573 udb_ptr_unlink(&last_task, nsd->task[nsd->mytask]); 2574 task_process_sync(nsd->task[nsd->mytask]); 2575 #ifdef USE_ZONE_STATS 2576 server_zonestat_realloc(nsd); /* realloc for next children */ 2577 #endif 2578 2579 /* send soainfo to the xfrd process, signal it that reload is done, 2580 * it picks up the taskudb */ 2581 cmd = NSD_RELOAD_DONE; 2582 if(!write_socket(nsd->xfrd_listener->fd, &cmd, sizeof(cmd))) { 2583 log_msg(LOG_ERR, "problems sending reload_done xfrd: %s", 2584 strerror(errno)); 2585 } 2586 mypid = getpid(); 2587 if(!write_socket(nsd->xfrd_listener->fd, &mypid, sizeof(mypid))) { 2588 log_msg(LOG_ERR, "problems sending reloadpid to xfrd: %s", 2589 strerror(errno)); 2590 } 2591 2592 /* try to reopen file */ 2593 if (nsd->file_rotation_ok) 2594 log_reopen(nsd->log_filename, 1); 2595 /* exit reload, continue as new server_main */ 2596 } 2597 2598 /* 2599 * Get the mode depending on the signal hints that have been received. 2600 * Multiple signal hints can be received and will be handled in turn. 2601 */ 2602 static sig_atomic_t 2603 server_signal_mode(struct nsd *nsd) 2604 { 2605 if(nsd->signal_hint_quit) { 2606 nsd->signal_hint_quit = 0; 2607 return NSD_QUIT; 2608 } 2609 else if(nsd->signal_hint_shutdown) { 2610 nsd->signal_hint_shutdown = 0; 2611 return NSD_SHUTDOWN; 2612 } 2613 else if(nsd->signal_hint_child) { 2614 nsd->signal_hint_child = 0; 2615 return NSD_REAP_CHILDREN; 2616 } 2617 else if(nsd->signal_hint_reload) { 2618 nsd->signal_hint_reload = 0; 2619 return NSD_RELOAD; 2620 } 2621 else if(nsd->signal_hint_reload_hup) { 2622 nsd->signal_hint_reload_hup = 0; 2623 return NSD_RELOAD_REQ; 2624 } 2625 else if(nsd->signal_hint_stats) { 2626 nsd->signal_hint_stats = 0; 2627 #ifdef BIND8_STATS 2628 set_bind8_alarm(nsd); 2629 #endif 2630 return NSD_STATS; 2631 } 2632 else if(nsd->signal_hint_statsusr) { 2633 nsd->signal_hint_statsusr = 0; 2634 return NSD_STATS; 2635 } 2636 return NSD_RUN; 2637 } 2638 2639 /* 2640 * The main server simply waits for signals and child processes to 2641 * terminate. Child processes are restarted as necessary. 2642 */ 2643 void 2644 server_main(struct nsd *nsd) 2645 { 2646 region_type *server_region = region_create(xalloc, free); 2647 netio_type *netio = netio_create(server_region); 2648 netio_handler_type reload_listener; 2649 int reload_sockets[2] = {-1, -1}; 2650 struct timespec timeout_spec; 2651 int status; 2652 pid_t child_pid; 2653 pid_t reload_pid = -1; 2654 sig_atomic_t mode; 2655 2656 /* Ensure we are the main process */ 2657 assert(nsd->server_kind == NSD_SERVER_MAIN); 2658 2659 /* Add listener for the XFRD process */ 2660 netio_add_handler(netio, nsd->xfrd_listener); 2661 2662 #ifdef BIND8_STATS 2663 nsd->st = &nsd->stat_map[0]; 2664 nsd->st->db_disk = 0; 2665 nsd->st->db_mem = region_get_mem(nsd->db->region); 2666 #endif 2667 2668 /* Start the child processes that handle incoming queries */ 2669 if (server_start_children(nsd, server_region, netio, 2670 &nsd->xfrd_listener->fd) != 0) { 2671 send_children_quit(nsd); 2672 exit(1); 2673 } 2674 reload_listener.fd = -1; 2675 2676 /* This_child MUST be 0, because this is the parent process */ 2677 assert(nsd->this_child == 0); 2678 2679 /* Run the server until we get a shutdown signal */ 2680 while ((mode = nsd->mode) != NSD_SHUTDOWN) { 2681 /* Did we receive a signal that changes our mode? */ 2682 if(mode == NSD_RUN) { 2683 nsd->mode = mode = server_signal_mode(nsd); 2684 } 2685 2686 switch (mode) { 2687 case NSD_RUN: 2688 /* see if any child processes terminated */ 2689 while((child_pid = waitpid(-1, &status, WNOHANG)) != -1 && child_pid != 0) { 2690 int is_child = delete_child_pid(nsd, child_pid); 2691 if (is_child != -1 && nsd->children[is_child].need_to_exit) { 2692 if(nsd->children[is_child].child_fd == -1) 2693 nsd->children[is_child].has_exited = 1; 2694 parent_check_all_children_exited(nsd); 2695 } else if(is_child != -1) { 2696 log_msg(LOG_WARNING, 2697 "server %d died unexpectedly with status %d, restarting", 2698 (int) child_pid, status); 2699 restart_child_servers(nsd, server_region, netio, 2700 &nsd->xfrd_listener->fd); 2701 } else if (child_pid == reload_pid) { 2702 sig_atomic_t cmd = NSD_RELOAD_FAILED; 2703 pid_t mypid; 2704 log_msg(LOG_WARNING, 2705 "Reload process %d failed with status %d, continuing with old database", 2706 (int) child_pid, status); 2707 #ifdef HAVE_SETPROCTITLE 2708 setproctitle("main"); 2709 #endif 2710 #ifdef USE_LOG_PROCESS_ROLE 2711 log_set_process_role("main"); 2712 #endif 2713 reload_pid = -1; 2714 if(reload_listener.fd != -1) close(reload_listener.fd); 2715 netio_remove_handler(netio, &reload_listener); 2716 reload_listener.fd = -1; 2717 reload_listener.event_types = NETIO_EVENT_NONE; 2718 task_process_sync(nsd->task[nsd->mytask]); 2719 /* inform xfrd reload attempt ended */ 2720 if(!write_socket(nsd->xfrd_listener->fd, 2721 &cmd, sizeof(cmd))) { 2722 log_msg(LOG_ERR, "problems " 2723 "sending SOAEND to xfrd: %s", 2724 strerror(errno)); 2725 } 2726 mypid = getpid(); 2727 if(!write_socket(nsd->xfrd_listener->fd, &mypid, sizeof(mypid))) { 2728 log_msg(LOG_ERR, "problems sending reloadpid to xfrd: %s", 2729 strerror(errno)); 2730 } 2731 #ifdef USE_DNSTAP 2732 } else if(nsd->dt_collector && child_pid == nsd->dt_collector->dt_pid) { 2733 log_msg(LOG_WARNING, 2734 "dnstap-collector %d terminated with status %d", 2735 (int) child_pid, status); 2736 if(nsd->dt_collector) { 2737 dt_collector_close(nsd->dt_collector, nsd); 2738 dt_collector_destroy(nsd->dt_collector, nsd); 2739 nsd->dt_collector = NULL; 2740 } 2741 /* Only respawn a crashed (or exited) 2742 * dnstap-collector when not reloading, 2743 * to not induce a reload during a 2744 * reload (which would seriously 2745 * disrupt nsd procedures and lead to 2746 * unpredictable results)! 2747 * 2748 * This will *leave* a dnstap-collector 2749 * process terminated, but because 2750 * signalling of the reload process to 2751 * the main process to respawn in this 2752 * situation will be cumbersome, and 2753 * because this situation is so 2754 * specific (and therefore hopefully 2755 * extremely rare or non-existing at 2756 * all), plus the fact that we are left 2757 * with a perfectly function NSD 2758 * (besides not logging dnstap 2759 * messages), I consider it acceptable 2760 * to leave this unresolved. 2761 */ 2762 if(reload_pid == -1 && nsd->options->dnstap_enable) { 2763 nsd->dt_collector = dt_collector_create(nsd); 2764 dt_collector_start(nsd->dt_collector, nsd); 2765 nsd->mode = NSD_RELOAD_REQ; 2766 } 2767 #endif 2768 } else if(status != 0) { 2769 /* check for status, because we get 2770 * the old-servermain because reload 2771 * is the process-parent of old-main, 2772 * and we get older server-processes 2773 * that are exiting after a reload */ 2774 log_msg(LOG_WARNING, 2775 "process %d terminated with status %d", 2776 (int) child_pid, status); 2777 } 2778 } 2779 if (child_pid == -1) { 2780 if (errno == EINTR) { 2781 continue; 2782 } 2783 if (errno != ECHILD) 2784 log_msg(LOG_WARNING, "wait failed: %s", strerror(errno)); 2785 } 2786 if (nsd->mode != NSD_RUN) 2787 break; 2788 2789 /* timeout to collect processes. In case no sigchild happens. */ 2790 timeout_spec.tv_sec = 1; 2791 timeout_spec.tv_nsec = 0; 2792 2793 /* listen on ports, timeout for collecting terminated children */ 2794 if(netio_dispatch(netio, &timeout_spec, 0) == -1) { 2795 if (errno != EINTR) { 2796 log_msg(LOG_ERR, "netio_dispatch failed: %s", strerror(errno)); 2797 } 2798 } 2799 if(nsd->restart_children) { 2800 restart_child_servers(nsd, server_region, netio, 2801 &nsd->xfrd_listener->fd); 2802 nsd->restart_children = 0; 2803 } 2804 if(nsd->reload_failed) { 2805 sig_atomic_t cmd = NSD_RELOAD_FAILED; 2806 pid_t mypid; 2807 nsd->reload_failed = 0; 2808 log_msg(LOG_WARNING, 2809 "Reload process %d failed, continuing with old database", 2810 (int) reload_pid); 2811 #ifdef HAVE_SETPROCTITLE 2812 setproctitle("main"); 2813 #endif 2814 #ifdef USE_LOG_PROCESS_ROLE 2815 log_set_process_role("main"); 2816 #endif 2817 reload_pid = -1; 2818 if(reload_listener.fd != -1) close(reload_listener.fd); 2819 netio_remove_handler(netio, &reload_listener); 2820 reload_listener.fd = -1; 2821 reload_listener.event_types = NETIO_EVENT_NONE; 2822 task_process_sync(nsd->task[nsd->mytask]); 2823 /* inform xfrd reload attempt ended */ 2824 if(!write_socket(nsd->xfrd_listener->fd, 2825 &cmd, sizeof(cmd))) { 2826 log_msg(LOG_ERR, "problems " 2827 "sending SOAEND to xfrd: %s", 2828 strerror(errno)); 2829 } 2830 mypid = getpid(); 2831 if(!write_socket(nsd->xfrd_listener->fd, &mypid, sizeof(mypid))) { 2832 log_msg(LOG_ERR, "problems sending reloadpid to xfrd: %s", 2833 strerror(errno)); 2834 } 2835 } 2836 2837 break; 2838 case NSD_RELOAD_REQ: { 2839 sig_atomic_t cmd = NSD_RELOAD_REQ; 2840 log_msg(LOG_WARNING, "SIGHUP received, reloading..."); 2841 DEBUG(DEBUG_IPC,1, (LOG_INFO, 2842 "main: ipc send reload_req to xfrd")); 2843 if(!write_socket(nsd->xfrd_listener->fd, 2844 &cmd, sizeof(cmd))) { 2845 log_msg(LOG_ERR, "server_main: could not send " 2846 "reload_req to xfrd: %s", strerror(errno)); 2847 } 2848 nsd->mode = NSD_RUN; 2849 } break; 2850 case NSD_RELOAD: 2851 /* Continue to run nsd after reload */ 2852 nsd->mode = NSD_RUN; 2853 DEBUG(DEBUG_IPC,1, (LOG_INFO, "reloading...")); 2854 if (reload_pid != -1) { 2855 log_msg(LOG_WARNING, "Reload already in progress (pid = %d)", 2856 (int) reload_pid); 2857 break; 2858 } 2859 2860 /* switch the mytask to keep track of who owns task*/ 2861 nsd->mytask = 1 - nsd->mytask; 2862 if (socketpair(AF_UNIX, SOCK_STREAM, 0, reload_sockets) == -1) { 2863 log_msg(LOG_ERR, "reload failed on socketpair: %s", strerror(errno)); 2864 reload_pid = -1; 2865 break; 2866 } 2867 2868 /* Do actual reload */ 2869 reload_pid = fork(); 2870 switch (reload_pid) { 2871 case -1: 2872 log_msg(LOG_ERR, "fork failed: %s", strerror(errno)); 2873 break; 2874 default: 2875 /* PARENT */ 2876 close(reload_sockets[0]); 2877 #ifdef HAVE_SETPROCTITLE 2878 setproctitle("load"); 2879 #endif 2880 #ifdef USE_LOG_PROCESS_ROLE 2881 log_set_process_role("load"); 2882 #endif 2883 server_reload(nsd, server_region, netio, 2884 reload_sockets[1]); 2885 DEBUG(DEBUG_IPC,2, (LOG_INFO, "Reload exited to become new main")); 2886 #ifdef HAVE_SETPROCTITLE 2887 setproctitle("main"); 2888 #endif 2889 #ifdef USE_LOG_PROCESS_ROLE 2890 log_set_process_role("main"); 2891 #endif 2892 close(reload_sockets[1]); 2893 DEBUG(DEBUG_IPC,2, (LOG_INFO, "Reload closed")); 2894 /* drop stale xfrd ipc data */ 2895 ((struct ipc_handler_conn_data*)nsd-> 2896 xfrd_listener->user_data) 2897 ->conn->is_reading = 0; 2898 reload_pid = -1; 2899 reload_listener.fd = -1; 2900 reload_listener.event_types = NETIO_EVENT_NONE; 2901 DEBUG(DEBUG_IPC,2, (LOG_INFO, "Reload resetup; run")); 2902 break; 2903 case 0: 2904 /* CHILD */ 2905 /* server_main keep running until NSD_QUIT_SYNC 2906 * received from reload. */ 2907 close(reload_sockets[1]); 2908 #ifdef HAVE_SETPROCTITLE 2909 setproctitle("old-main"); 2910 #endif 2911 #ifdef USE_LOG_PROCESS_ROLE 2912 log_set_process_role("old-main"); 2913 #endif 2914 reload_listener.fd = reload_sockets[0]; 2915 reload_listener.timeout = NULL; 2916 reload_listener.user_data = nsd; 2917 reload_listener.event_types = NETIO_EVENT_READ; 2918 reload_listener.event_handler = parent_handle_reload_command; /* listens to Quit */ 2919 netio_add_handler(netio, &reload_listener); 2920 reload_pid = getppid(); 2921 break; 2922 } 2923 break; 2924 case NSD_QUIT_SYNC: 2925 /* synchronisation of xfrd, parent and reload */ 2926 if(!nsd->quit_sync_done && reload_listener.fd != -1) { 2927 sig_atomic_t cmd = NSD_RELOAD; 2928 /* stop xfrd ipc writes in progress */ 2929 DEBUG(DEBUG_IPC,1, (LOG_INFO, 2930 "main: ipc send indication reload")); 2931 if(!write_socket(nsd->xfrd_listener->fd, 2932 &cmd, sizeof(cmd))) { 2933 log_msg(LOG_ERR, "server_main: could not send reload " 2934 "indication to xfrd: %s", strerror(errno)); 2935 } 2936 /* wait for ACK from xfrd */ 2937 DEBUG(DEBUG_IPC,1, (LOG_INFO, "main: wait ipc reply xfrd")); 2938 nsd->quit_sync_done = 1; 2939 } 2940 nsd->mode = NSD_RUN; 2941 break; 2942 case NSD_QUIT: 2943 /* silent shutdown during reload */ 2944 if(reload_listener.fd != -1) { 2945 /* acknowledge the quit, to sync reload that we will really quit now */ 2946 sig_atomic_t cmd = NSD_RELOAD; 2947 DEBUG(DEBUG_IPC,1, (LOG_INFO, "main: ipc ack reload")); 2948 if(!write_socket(reload_listener.fd, &cmd, sizeof(cmd))) { 2949 log_msg(LOG_ERR, "server_main: " 2950 "could not ack quit: %s", strerror(errno)); 2951 } 2952 close(reload_listener.fd); 2953 } 2954 DEBUG(DEBUG_IPC,1, (LOG_INFO, "server_main: shutdown sequence")); 2955 /* only quit children after xfrd has acked */ 2956 send_children_quit(nsd); 2957 2958 #ifdef MEMCLEAN /* OS collects memory pages */ 2959 region_destroy(server_region); 2960 #endif 2961 server_shutdown(nsd); 2962 2963 /* ENOTREACH */ 2964 break; 2965 case NSD_SHUTDOWN: 2966 break; 2967 case NSD_REAP_CHILDREN: 2968 /* continue; wait for child in run loop */ 2969 nsd->mode = NSD_RUN; 2970 break; 2971 case NSD_STATS: 2972 #ifdef BIND8_STATS 2973 set_children_stats(nsd); 2974 #endif 2975 nsd->mode = NSD_RUN; 2976 break; 2977 default: 2978 log_msg(LOG_WARNING, "NSD main server mode invalid: %d", (int)nsd->mode); 2979 nsd->mode = NSD_RUN; 2980 break; 2981 } 2982 } 2983 log_msg(LOG_WARNING, "signal received, shutting down..."); 2984 2985 /* close opened ports to avoid race with restart of nsd */ 2986 server_close_all_sockets(nsd->udp, nsd->ifs); 2987 server_close_all_sockets(nsd->tcp, nsd->ifs); 2988 daemon_remote_close(nsd->rc); 2989 send_children_quit_and_wait(nsd); 2990 2991 /* Unlink it if possible... */ 2992 unlinkpid(nsd->pidfile); 2993 unlink(nsd->task[0]->fname); 2994 unlink(nsd->task[1]->fname); 2995 #ifdef USE_ZONE_STATS 2996 unlink(nsd->zonestatfname[0]); 2997 unlink(nsd->zonestatfname[1]); 2998 #endif 2999 #ifdef BIND8_STATS 3000 server_stat_free(nsd); 3001 #endif 3002 #ifdef USE_DNSTAP 3003 dt_collector_close(nsd->dt_collector, nsd); 3004 #endif 3005 3006 if(reload_listener.fd != -1) { 3007 sig_atomic_t cmd = NSD_QUIT; 3008 DEBUG(DEBUG_IPC,1, (LOG_INFO, 3009 "main: ipc send quit to reload-process")); 3010 if(!write_socket(reload_listener.fd, &cmd, sizeof(cmd))) { 3011 log_msg(LOG_ERR, "server_main: could not send quit to reload: %s", 3012 strerror(errno)); 3013 } 3014 fsync(reload_listener.fd); 3015 close(reload_listener.fd); 3016 /* wait for reload to finish processing */ 3017 while(1) { 3018 if(waitpid(reload_pid, NULL, 0) == -1) { 3019 if(errno == EINTR) continue; 3020 if(errno == ECHILD) break; 3021 log_msg(LOG_ERR, "waitpid(reload %d): %s", 3022 (int)reload_pid, strerror(errno)); 3023 } 3024 break; 3025 } 3026 } 3027 if(nsd->xfrd_listener->fd != -1) { 3028 /* complete quit, stop xfrd */ 3029 sig_atomic_t cmd = NSD_QUIT; 3030 DEBUG(DEBUG_IPC,1, (LOG_INFO, 3031 "main: ipc send quit to xfrd")); 3032 if(!write_socket(nsd->xfrd_listener->fd, &cmd, sizeof(cmd))) { 3033 log_msg(LOG_ERR, "server_main: could not send quit to xfrd: %s", 3034 strerror(errno)); 3035 } 3036 fsync(nsd->xfrd_listener->fd); 3037 close(nsd->xfrd_listener->fd); 3038 (void)kill(nsd->pid, SIGTERM); 3039 } 3040 3041 #ifdef MEMCLEAN /* OS collects memory pages */ 3042 region_destroy(server_region); 3043 #endif 3044 server_shutdown(nsd); 3045 } 3046 3047 static query_state_type 3048 server_process_query(struct nsd *nsd, struct query *query, uint32_t *now_p) 3049 { 3050 return query_process(query, nsd, now_p); 3051 } 3052 3053 static query_state_type 3054 server_process_query_udp(struct nsd *nsd, struct query *query, uint32_t *now_p) 3055 { 3056 #ifdef RATELIMIT 3057 if(query_process(query, nsd, now_p) != QUERY_DISCARDED) { 3058 if(query->edns.cookie_status != COOKIE_VALID 3059 && query->edns.cookie_status != COOKIE_VALID_REUSE 3060 && rrl_process_query(query)) 3061 return rrl_slip(query); 3062 else return QUERY_PROCESSED; 3063 } 3064 return QUERY_DISCARDED; 3065 #else 3066 return query_process(query, nsd, now_p); 3067 #endif 3068 } 3069 3070 const char* 3071 nsd_event_vs(void) 3072 { 3073 #ifdef USE_MINI_EVENT 3074 return ""; 3075 #else 3076 return event_get_version(); 3077 #endif 3078 } 3079 3080 #if !defined(USE_MINI_EVENT) && defined(EV_FEATURE_BACKENDS) 3081 static const char* ub_ev_backend2str(int b) 3082 { 3083 switch(b) { 3084 case EVBACKEND_SELECT: return "select"; 3085 case EVBACKEND_POLL: return "poll"; 3086 case EVBACKEND_EPOLL: return "epoll"; 3087 case EVBACKEND_KQUEUE: return "kqueue"; 3088 case EVBACKEND_DEVPOLL: return "devpoll"; 3089 case EVBACKEND_PORT: return "evport"; 3090 } 3091 return "unknown"; 3092 } 3093 #endif 3094 3095 const char* 3096 nsd_event_method(void) 3097 { 3098 #ifdef USE_MINI_EVENT 3099 return "select"; 3100 #else 3101 struct event_base* b = nsd_child_event_base(); 3102 const char* m; 3103 # ifdef EV_FEATURE_BACKENDS 3104 m = ub_ev_backend2str(ev_backend((struct ev_loop*)b)); 3105 # elif defined(HAVE_EVENT_BASE_GET_METHOD) 3106 m = event_base_get_method(b); 3107 # else 3108 m = "?"; 3109 # endif 3110 # ifdef MEMCLEAN 3111 event_base_free(b); 3112 # endif 3113 return m; 3114 #endif 3115 } 3116 3117 struct event_base* 3118 nsd_child_event_base(void) 3119 { 3120 struct event_base* base; 3121 #ifdef USE_MINI_EVENT 3122 static time_t secs; 3123 static struct timeval now; 3124 base = event_init(&secs, &now); 3125 #else 3126 # if defined(HAVE_EV_LOOP) || defined(HAVE_EV_DEFAULT_LOOP) 3127 /* libev */ 3128 base = (struct event_base *)ev_default_loop(EVFLAG_AUTO); 3129 # else 3130 /* libevent */ 3131 # ifdef HAVE_EVENT_BASE_NEW 3132 base = event_base_new(); 3133 # else 3134 base = event_init(); 3135 # endif 3136 # endif 3137 #endif 3138 return base; 3139 } 3140 3141 static void 3142 add_udp_handler( 3143 struct nsd *nsd, 3144 struct nsd_socket *sock, 3145 struct udp_handler_data *data) 3146 { 3147 struct event *handler = &data->event; 3148 3149 data->nsd = nsd; 3150 data->socket = sock; 3151 3152 if(nsd->options->proxy_protocol_port && 3153 sockaddr_uses_proxy_protocol_port(nsd->options, 3154 (struct sockaddr *)&sock->addr.ai_addr)) { 3155 data->pp2_enabled = 1; 3156 } 3157 3158 memset(handler, 0, sizeof(*handler)); 3159 event_set(handler, sock->s, EV_PERSIST|EV_READ, handle_udp, data); 3160 if(event_base_set(nsd->event_base, handler) != 0) 3161 log_msg(LOG_ERR, "nsd udp: event_base_set failed"); 3162 if(event_add(handler, NULL) != 0) 3163 log_msg(LOG_ERR, "nsd udp: event_add failed"); 3164 } 3165 3166 void 3167 add_tcp_handler( 3168 struct nsd *nsd, 3169 struct nsd_socket *sock, 3170 struct tcp_accept_handler_data *data) 3171 { 3172 struct event *handler = &data->event; 3173 3174 data->nsd = nsd; 3175 data->socket = sock; 3176 3177 if(nsd->options->proxy_protocol_port && 3178 sockaddr_uses_proxy_protocol_port(nsd->options, 3179 (struct sockaddr *)&sock->addr.ai_addr)) { 3180 data->pp2_enabled = 1; 3181 } 3182 3183 #ifdef HAVE_SSL 3184 if (nsd->tls_ctx && 3185 nsd->options->tls_port && 3186 using_tls_port((struct sockaddr *)&sock->addr.ai_addr, nsd->options->tls_port)) 3187 { 3188 data->tls_accept = 1; 3189 if(verbosity >= 2) { 3190 char buf[48]; 3191 addrport2str((void*)(struct sockaddr_storage*)&sock->addr.ai_addr, buf, sizeof(buf)); 3192 VERBOSITY(4, (LOG_NOTICE, "setup TCP for TLS service on interface %s", buf)); 3193 } 3194 } else { 3195 data->tls_accept = 0; 3196 } 3197 #endif 3198 3199 memset(handler, 0, sizeof(*handler)); 3200 event_set(handler, sock->s, EV_PERSIST|EV_READ, handle_tcp_accept, data); 3201 if(event_base_set(nsd->event_base, handler) != 0) 3202 log_msg(LOG_ERR, "nsd tcp: event_base_set failed"); 3203 if(event_add(handler, NULL) != 0) 3204 log_msg(LOG_ERR, "nsd tcp: event_add failed"); 3205 data->event_added = 1; 3206 } 3207 3208 /* 3209 * Serve DNS request to verifiers (short-lived) 3210 */ 3211 void server_verify(struct nsd *nsd, int cmdsocket) 3212 { 3213 size_t size = 0; 3214 struct event cmd_event, signal_event, exit_event; 3215 struct zone *zone; 3216 3217 assert(nsd != NULL); 3218 3219 zone = verify_next_zone(nsd, NULL); 3220 if(zone == NULL) 3221 return; 3222 3223 nsd->server_region = region_create(xalloc, free); 3224 nsd->event_base = nsd_child_event_base(); 3225 3226 nsd->next_zone_to_verify = zone; 3227 nsd->verifier_count = 0; 3228 nsd->verifier_limit = nsd->options->verifier_count; 3229 size = sizeof(struct verifier) * nsd->verifier_limit; 3230 if(pipe(nsd->verifier_pipe) == -1) { 3231 log_msg(LOG_ERR, "verify: could not create pipe: %s", 3232 strerror(errno)); 3233 goto fail_pipe; 3234 } 3235 fcntl(nsd->verifier_pipe[0], F_SETFD, FD_CLOEXEC); 3236 fcntl(nsd->verifier_pipe[1], F_SETFD, FD_CLOEXEC); 3237 nsd->verifiers = region_alloc_zero(nsd->server_region, size); 3238 3239 for(size_t i = 0; i < nsd->verifier_limit; i++) { 3240 nsd->verifiers[i].nsd = nsd; 3241 nsd->verifiers[i].zone = NULL; 3242 nsd->verifiers[i].pid = -1; 3243 nsd->verifiers[i].output_stream.fd = -1; 3244 nsd->verifiers[i].output_stream.priority = LOG_INFO; 3245 nsd->verifiers[i].error_stream.fd = -1; 3246 nsd->verifiers[i].error_stream.priority = LOG_ERR; 3247 } 3248 3249 event_set(&cmd_event, cmdsocket, EV_READ|EV_PERSIST, verify_handle_command, nsd); 3250 if(event_base_set(nsd->event_base, &cmd_event) != 0 || 3251 event_add(&cmd_event, NULL) != 0) 3252 { 3253 log_msg(LOG_ERR, "verify: could not add command event"); 3254 goto fail; 3255 } 3256 3257 event_set(&signal_event, SIGCHLD, EV_SIGNAL|EV_PERSIST, verify_handle_signal, nsd); 3258 if(event_base_set(nsd->event_base, &signal_event) != 0 || 3259 signal_add(&signal_event, NULL) != 0) 3260 { 3261 log_msg(LOG_ERR, "verify: could not add signal event"); 3262 goto fail; 3263 } 3264 3265 event_set(&exit_event, nsd->verifier_pipe[0], EV_READ|EV_PERSIST, verify_handle_exit, nsd); 3266 if(event_base_set(nsd->event_base, &exit_event) != 0 || 3267 event_add(&exit_event, NULL) != 0) 3268 { 3269 log_msg(LOG_ERR, "verify: could not add exit event"); 3270 goto fail; 3271 } 3272 3273 memset(msgs, 0, sizeof(msgs)); 3274 for (int i = 0; i < NUM_RECV_PER_SELECT; i++) { 3275 queries[i] = query_create(nsd->server_region, 3276 compressed_dname_offsets, 3277 compression_table_size, compressed_dnames); 3278 query_reset(queries[i], UDP_MAX_MESSAGE_LEN, 0); 3279 iovecs[i].iov_base = buffer_begin(queries[i]->packet); 3280 iovecs[i].iov_len = buffer_remaining(queries[i]->packet); 3281 msgs[i].msg_hdr.msg_iov = &iovecs[i]; 3282 msgs[i].msg_hdr.msg_iovlen = 1; 3283 msgs[i].msg_hdr.msg_name = &queries[i]->remote_addr; 3284 msgs[i].msg_hdr.msg_namelen = queries[i]->remote_addrlen; 3285 } 3286 3287 for (size_t i = 0; i < nsd->verify_ifs; i++) { 3288 struct udp_handler_data *data; 3289 data = region_alloc_zero( 3290 nsd->server_region, sizeof(*data)); 3291 add_udp_handler(nsd, &nsd->verify_udp[i], data); 3292 } 3293 3294 tcp_accept_handler_count = nsd->verify_ifs; 3295 tcp_accept_handlers = region_alloc_array(nsd->server_region, 3296 nsd->verify_ifs, sizeof(*tcp_accept_handlers)); 3297 3298 for (size_t i = 0; i < nsd->verify_ifs; i++) { 3299 struct tcp_accept_handler_data *data; 3300 data = &tcp_accept_handlers[i]; 3301 memset(data, 0, sizeof(*data)); 3302 add_tcp_handler(nsd, &nsd->verify_tcp[i], data); 3303 } 3304 3305 while(nsd->next_zone_to_verify != NULL && 3306 nsd->verifier_count < nsd->verifier_limit) 3307 { 3308 verify_zone(nsd, nsd->next_zone_to_verify); 3309 nsd->next_zone_to_verify 3310 = verify_next_zone(nsd, nsd->next_zone_to_verify); 3311 } 3312 3313 /* short-lived main loop */ 3314 event_base_dispatch(nsd->event_base); 3315 3316 /* remove command and exit event handlers */ 3317 event_del(&exit_event); 3318 event_del(&signal_event); 3319 event_del(&cmd_event); 3320 3321 assert(nsd->next_zone_to_verify == NULL || nsd->mode == NSD_QUIT); 3322 assert(nsd->verifier_count == 0 || nsd->mode == NSD_QUIT); 3323 fail: 3324 close(nsd->verifier_pipe[0]); 3325 close(nsd->verifier_pipe[1]); 3326 fail_pipe: 3327 event_base_free(nsd->event_base); 3328 region_destroy(nsd->server_region); 3329 3330 nsd->event_base = NULL; 3331 nsd->server_region = NULL; 3332 nsd->verifier_limit = 0; 3333 nsd->verifier_pipe[0] = -1; 3334 nsd->verifier_pipe[1] = -1; 3335 nsd->verifiers = NULL; 3336 } 3337 3338 /* 3339 * Serve DNS requests. 3340 */ 3341 void 3342 server_child(struct nsd *nsd) 3343 { 3344 size_t i, from, numifs; 3345 region_type *server_region = region_create(xalloc, free); 3346 struct event_base* event_base = nsd_child_event_base(); 3347 sig_atomic_t mode; 3348 #ifdef USE_LOG_PROCESS_ROLE 3349 static char child_name[20]; 3350 #endif 3351 3352 if(!event_base) { 3353 log_msg(LOG_ERR, "nsd server could not create event base"); 3354 exit(1); 3355 } 3356 nsd->event_base = event_base; 3357 nsd->server_region = server_region; 3358 3359 #ifdef RATELIMIT 3360 rrl_init(nsd->this_child->child_num); 3361 #endif 3362 3363 assert(nsd->server_kind != NSD_SERVER_MAIN); 3364 3365 #ifdef HAVE_SETPROCTITLE 3366 setproctitle("server %d", nsd->this_child->child_num + 1); 3367 #endif 3368 #ifdef USE_LOG_PROCESS_ROLE 3369 snprintf(child_name, sizeof(child_name), "srv%d", 3370 nsd->this_child->child_num + 1); 3371 log_set_process_role(child_name); 3372 #endif 3373 DEBUG(DEBUG_IPC, 2, (LOG_INFO, "child process started")); 3374 3375 #ifdef HAVE_CPUSET_T 3376 if(nsd->use_cpu_affinity) { 3377 set_cpu_affinity(nsd->this_child->cpuset); 3378 } 3379 #endif 3380 #ifdef BIND8_STATS 3381 nsd->st = &nsd->stats_per_child[nsd->stat_current] 3382 [nsd->this_child->child_num]; 3383 nsd->st->boot = nsd->stat_map[0].boot; 3384 memcpy(&nsd->stat_proc, nsd->st, sizeof(nsd->stat_proc)); 3385 #endif 3386 3387 if (!(nsd->server_kind & NSD_SERVER_TCP)) { 3388 server_close_all_sockets(nsd->tcp, nsd->ifs); 3389 } 3390 if (!(nsd->server_kind & NSD_SERVER_UDP)) { 3391 server_close_all_sockets(nsd->udp, nsd->ifs); 3392 } 3393 3394 if (nsd->this_child->parent_fd != -1) { 3395 struct event *handler; 3396 struct ipc_handler_conn_data* user_data = 3397 (struct ipc_handler_conn_data*)region_alloc( 3398 server_region, sizeof(struct ipc_handler_conn_data)); 3399 user_data->nsd = nsd; 3400 user_data->conn = xfrd_tcp_create(server_region, QIOBUFSZ); 3401 3402 handler = (struct event*) region_alloc( 3403 server_region, sizeof(*handler)); 3404 memset(handler, 0, sizeof(*handler)); 3405 event_set(handler, nsd->this_child->parent_fd, EV_PERSIST| 3406 EV_READ, child_handle_parent_command, user_data); 3407 if(event_base_set(event_base, handler) != 0) 3408 log_msg(LOG_ERR, "nsd ipcchild: event_base_set failed"); 3409 if(event_add(handler, NULL) != 0) 3410 log_msg(LOG_ERR, "nsd ipcchild: event_add failed"); 3411 } 3412 3413 if(nsd->reuseport) { 3414 numifs = nsd->ifs / nsd->reuseport; 3415 from = numifs * nsd->this_child->child_num; 3416 if(from+numifs > nsd->ifs) { /* should not happen */ 3417 from = 0; 3418 numifs = nsd->ifs; 3419 } 3420 } else { 3421 from = 0; 3422 numifs = nsd->ifs; 3423 } 3424 3425 if (nsd->server_kind & NSD_SERVER_UDP) { 3426 int child = nsd->this_child->child_num; 3427 memset(msgs, 0, sizeof(msgs)); 3428 for (i = 0; i < NUM_RECV_PER_SELECT; i++) { 3429 queries[i] = query_create(server_region, 3430 compressed_dname_offsets, 3431 compression_table_size, compressed_dnames); 3432 query_reset(queries[i], UDP_MAX_MESSAGE_LEN, 0); 3433 iovecs[i].iov_base = buffer_begin(queries[i]->packet); 3434 iovecs[i].iov_len = buffer_remaining(queries[i]->packet); 3435 msgs[i].msg_hdr.msg_iov = &iovecs[i]; 3436 msgs[i].msg_hdr.msg_iovlen = 1; 3437 msgs[i].msg_hdr.msg_name = &queries[i]->remote_addr; 3438 msgs[i].msg_hdr.msg_namelen = queries[i]->remote_addrlen; 3439 } 3440 3441 for (i = 0; i < nsd->ifs; i++) { 3442 int listen; 3443 struct udp_handler_data *data; 3444 3445 listen = nsd_bitset_isset(nsd->udp[i].servers, child); 3446 3447 if(i >= from && i < (from + numifs) && listen) { 3448 data = region_alloc_zero( 3449 nsd->server_region, sizeof(*data)); 3450 add_udp_handler(nsd, &nsd->udp[i], data); 3451 } else { 3452 /* close sockets intended for other servers */ 3453 server_close_socket(&nsd->udp[i]); 3454 } 3455 } 3456 } 3457 3458 /* 3459 * Keep track of all the TCP accept handlers so we can enable 3460 * and disable them based on the current number of active TCP 3461 * connections. 3462 */ 3463 if (nsd->server_kind & NSD_SERVER_TCP) { 3464 int child = nsd->this_child->child_num; 3465 tcp_accept_handler_count = numifs; 3466 tcp_accept_handlers = region_alloc_array(server_region, 3467 numifs, sizeof(*tcp_accept_handlers)); 3468 3469 for (i = 0; i < nsd->ifs; i++) { 3470 int listen; 3471 struct tcp_accept_handler_data *data; 3472 3473 listen = nsd_bitset_isset(nsd->tcp[i].servers, child); 3474 3475 if(i >= from && i < (from + numifs) && listen) { 3476 data = &tcp_accept_handlers[i-from]; 3477 memset(data, 0, sizeof(*data)); 3478 add_tcp_handler(nsd, &nsd->tcp[i], data); 3479 } else { 3480 /* close sockets intended for other servers */ 3481 /* 3482 * uncomment this once tcp servers are no 3483 * longer copied in the tcp fd copy line 3484 * in server_init(). 3485 server_close_socket(&nsd->tcp[i]); 3486 */ 3487 /* close sockets not meant for this server*/ 3488 if(!listen) 3489 server_close_socket(&nsd->tcp[i]); 3490 } 3491 } 3492 } else { 3493 tcp_accept_handler_count = 0; 3494 } 3495 3496 /* The main loop... */ 3497 while ((mode = nsd->mode) != NSD_QUIT) { 3498 if(mode == NSD_RUN) nsd->mode = mode = server_signal_mode(nsd); 3499 3500 /* Do we need to do the statistics... */ 3501 if (mode == NSD_STATS) { 3502 #ifdef BIND8_STATS 3503 int p = nsd->st_period; 3504 nsd->st_period = 1; /* force stats printout */ 3505 /* Dump the statistics */ 3506 bind8_stats(nsd); 3507 nsd->st_period = p; 3508 #else /* !BIND8_STATS */ 3509 log_msg(LOG_NOTICE, "Statistics support not enabled at compile time."); 3510 #endif /* BIND8_STATS */ 3511 3512 nsd->mode = NSD_RUN; 3513 } 3514 else if (mode == NSD_REAP_CHILDREN) { 3515 /* got signal, notify parent. parent reaps terminated children. */ 3516 if (nsd->this_child->parent_fd != -1) { 3517 sig_atomic_t parent_notify = NSD_REAP_CHILDREN; 3518 if (write(nsd->this_child->parent_fd, 3519 &parent_notify, 3520 sizeof(parent_notify)) == -1) 3521 { 3522 log_msg(LOG_ERR, "problems sending command from %d to parent: %s", 3523 (int) nsd->this_child->pid, strerror(errno)); 3524 } 3525 } else /* no parent, so reap 'em */ 3526 while (waitpid(-1, NULL, WNOHANG) > 0) ; 3527 nsd->mode = NSD_RUN; 3528 } 3529 else if(mode == NSD_RUN) { 3530 /* Wait for a query... */ 3531 if(event_base_loop(event_base, EVLOOP_ONCE) == -1) { 3532 if (errno != EINTR) { 3533 log_msg(LOG_ERR, "dispatch failed: %s", strerror(errno)); 3534 break; 3535 } 3536 } 3537 } else if(mode == NSD_QUIT) { 3538 /* ignore here, quit */ 3539 } else { 3540 log_msg(LOG_ERR, "mode bad value %d, back to service.", 3541 (int)mode); 3542 nsd->mode = NSD_RUN; 3543 } 3544 } 3545 3546 service_remaining_tcp(nsd); 3547 #ifdef BIND8_STATS 3548 bind8_stats(nsd); 3549 #endif /* BIND8_STATS */ 3550 3551 #ifdef MEMCLEAN /* OS collects memory pages */ 3552 #ifdef RATELIMIT 3553 rrl_deinit(nsd->this_child->child_num); 3554 #endif 3555 event_base_free(event_base); 3556 region_destroy(server_region); 3557 #endif 3558 server_shutdown(nsd); 3559 } 3560 3561 static void remaining_tcp_timeout(int ATTR_UNUSED(fd), short event, void* arg) 3562 { 3563 int* timed_out = (int*)arg; 3564 assert(event & EV_TIMEOUT); (void)event; 3565 /* wake up the service tcp thread, note event is no longer 3566 * registered */ 3567 *timed_out = 1; 3568 } 3569 3570 void 3571 service_remaining_tcp(struct nsd* nsd) 3572 { 3573 struct tcp_handler_data* p; 3574 struct event_base* event_base; 3575 /* check if it is needed */ 3576 if(nsd->current_tcp_count == 0 || tcp_active_list == NULL) 3577 return; 3578 VERBOSITY(4, (LOG_INFO, "service remaining TCP connections")); 3579 #ifdef USE_DNSTAP 3580 /* remove dnstap collector, we cannot write there because the new 3581 * child process is using the file descriptor, or the child 3582 * process after that. */ 3583 dt_collector_destroy(nsd->dt_collector, nsd); 3584 nsd->dt_collector = NULL; 3585 #endif 3586 /* setup event base */ 3587 event_base = nsd_child_event_base(); 3588 if(!event_base) { 3589 log_msg(LOG_ERR, "nsd remain tcp could not create event base"); 3590 return; 3591 } 3592 /* register tcp connections */ 3593 for(p = tcp_active_list; p != NULL; p = p->next) { 3594 struct timeval timeout; 3595 int fd = p->event.ev_fd; 3596 #ifdef USE_MINI_EVENT 3597 short event = p->event.ev_flags & (EV_READ|EV_WRITE); 3598 #else 3599 short event = p->event.ev_events & (EV_READ|EV_WRITE); 3600 #endif 3601 void (*fn)(int, short, void*); 3602 #ifdef HAVE_SSL 3603 if(p->tls) { 3604 if((event&EV_READ)) 3605 fn = handle_tls_reading; 3606 else fn = handle_tls_writing; 3607 } else { 3608 #endif 3609 if((event&EV_READ)) 3610 fn = handle_tcp_reading; 3611 else fn = handle_tcp_writing; 3612 #ifdef HAVE_SSL 3613 } 3614 #endif 3615 3616 p->tcp_no_more_queries = 1; 3617 /* set timeout to 3 seconds (previously 1/10 second) */ 3618 if(p->tcp_timeout > 3000) 3619 p->tcp_timeout = 3000; 3620 timeout.tv_sec = p->tcp_timeout / 1000; 3621 timeout.tv_usec = (p->tcp_timeout % 1000)*1000; 3622 event_del(&p->event); 3623 memset(&p->event, 0, sizeof(p->event)); 3624 event_set(&p->event, fd, EV_PERSIST | event | EV_TIMEOUT, 3625 fn, p); 3626 if(event_base_set(event_base, &p->event) != 0) 3627 log_msg(LOG_ERR, "event base set failed"); 3628 if(event_add(&p->event, &timeout) != 0) 3629 log_msg(LOG_ERR, "event add failed"); 3630 } 3631 3632 /* handle it */ 3633 while(nsd->current_tcp_count > 0) { 3634 mode_t m = server_signal_mode(nsd); 3635 struct event timeout; 3636 struct timeval tv; 3637 int timed_out = 0; 3638 if(m == NSD_QUIT || m == NSD_SHUTDOWN || 3639 m == NSD_REAP_CHILDREN) { 3640 /* quit */ 3641 break; 3642 } 3643 /* timer */ 3644 /* have to do something every 3 seconds */ 3645 tv.tv_sec = 3; 3646 tv.tv_usec = 0; 3647 memset(&timeout, 0, sizeof(timeout)); 3648 event_set(&timeout, -1, EV_TIMEOUT, remaining_tcp_timeout, 3649 &timed_out); 3650 if(event_base_set(event_base, &timeout) != 0) 3651 log_msg(LOG_ERR, "remaintcp timer: event_base_set failed"); 3652 if(event_add(&timeout, &tv) != 0) 3653 log_msg(LOG_ERR, "remaintcp timer: event_add failed"); 3654 3655 /* service loop */ 3656 if(event_base_loop(event_base, EVLOOP_ONCE) == -1) { 3657 if (errno != EINTR) { 3658 log_msg(LOG_ERR, "dispatch failed: %s", strerror(errno)); 3659 break; 3660 } 3661 } 3662 if(!timed_out) { 3663 event_del(&timeout); 3664 } else { 3665 /* timed out, quit */ 3666 VERBOSITY(4, (LOG_INFO, "service remaining TCP connections: timed out, quit")); 3667 break; 3668 } 3669 } 3670 #ifdef MEMCLEAN 3671 event_base_free(event_base); 3672 #endif 3673 /* continue to quit after return */ 3674 } 3675 3676 /* Implement recvmmsg and sendmmsg if the platform does not. These functions 3677 * are always used, even if nonblocking operations are broken, in which case 3678 * NUM_RECV_PER_SELECT is defined to 1 (one). 3679 */ 3680 #if defined(HAVE_RECVMMSG) 3681 #define nsd_recvmmsg recvmmsg 3682 #else /* !HAVE_RECVMMSG */ 3683 3684 static int 3685 nsd_recvmmsg(int sockfd, struct mmsghdr *msgvec, unsigned int vlen, 3686 int flags, struct timespec *timeout) 3687 { 3688 unsigned int vpos = 0; 3689 ssize_t rcvd; 3690 3691 /* timeout is ignored, ensure caller does not expect it to work */ 3692 assert(timeout == NULL); (void)timeout; 3693 3694 while(vpos < vlen) { 3695 rcvd = recvfrom(sockfd, 3696 msgvec[vpos].msg_hdr.msg_iov->iov_base, 3697 msgvec[vpos].msg_hdr.msg_iov->iov_len, 3698 flags, 3699 msgvec[vpos].msg_hdr.msg_name, 3700 &msgvec[vpos].msg_hdr.msg_namelen); 3701 if(rcvd < 0) { 3702 break; 3703 } else { 3704 assert((unsigned long long)rcvd <= (unsigned long long)UINT_MAX); 3705 msgvec[vpos].msg_len = (unsigned int)rcvd; 3706 vpos++; 3707 } 3708 } 3709 3710 if(vpos) { 3711 /* error will be picked up next time */ 3712 return (int)vpos; 3713 } else if(errno == 0) { 3714 return 0; 3715 } else if(errno == EAGAIN) { 3716 return 0; 3717 } 3718 3719 return -1; 3720 } 3721 #endif /* HAVE_RECVMMSG */ 3722 3723 #ifdef HAVE_SENDMMSG 3724 #define nsd_sendmmsg(...) sendmmsg(__VA_ARGS__) 3725 #else /* !HAVE_SENDMMSG */ 3726 3727 static int 3728 nsd_sendmmsg(int sockfd, struct mmsghdr *msgvec, unsigned int vlen, int flags) 3729 { 3730 unsigned int vpos = 0; 3731 ssize_t snd; 3732 3733 while(vpos < vlen) { 3734 assert(msgvec[vpos].msg_hdr.msg_iovlen == 1); 3735 snd = sendto(sockfd, 3736 msgvec[vpos].msg_hdr.msg_iov->iov_base, 3737 msgvec[vpos].msg_hdr.msg_iov->iov_len, 3738 flags, 3739 msgvec[vpos].msg_hdr.msg_name, 3740 msgvec[vpos].msg_hdr.msg_namelen); 3741 if(snd < 0) { 3742 break; 3743 } else { 3744 msgvec[vpos].msg_len = (unsigned int)snd; 3745 vpos++; 3746 } 3747 } 3748 3749 if(vpos) { 3750 return (int)vpos; 3751 } else if(errno == 0) { 3752 return 0; 3753 } 3754 3755 return -1; 3756 } 3757 #endif /* HAVE_SENDMMSG */ 3758 3759 static int 3760 port_is_zero( 3761 #ifdef INET6 3762 struct sockaddr_storage *addr 3763 #else 3764 struct sockaddr_in *addr 3765 #endif 3766 ) 3767 { 3768 #ifdef INET6 3769 if(addr->ss_family == AF_INET6) { 3770 return (((struct sockaddr_in6 *)addr)->sin6_port) == 0; 3771 } else if(addr->ss_family == AF_INET) { 3772 return (((struct sockaddr_in *)addr)->sin_port) == 0; 3773 } 3774 return 0; 3775 #else 3776 if(addr->sin_family == AF_INET) { 3777 return addr->sin_port == 0; 3778 } 3779 return 0; 3780 #endif 3781 } 3782 3783 /* Parses the PROXYv2 header from buf and updates the struct. 3784 * Returns 1 on success, 0 on failure. */ 3785 static int 3786 consume_pp2_header(struct buffer* buf, struct query* q, int stream) 3787 { 3788 size_t size; 3789 struct pp2_header* header; 3790 int err = pp2_read_header(buffer_begin(buf), buffer_remaining(buf)); 3791 if(err) { 3792 VERBOSITY(4, (LOG_ERR, "proxy-protocol: could not parse " 3793 "PROXYv2 header: %s", pp_lookup_error(err))); 3794 return 0; 3795 } 3796 header = (struct pp2_header*)buffer_begin(buf); 3797 size = PP2_HEADER_SIZE + read_uint16(&header->len); 3798 if(size > buffer_limit(buf)) { 3799 VERBOSITY(4, (LOG_ERR, "proxy-protocol: not enough buffer " 3800 "size to read PROXYv2 header")); 3801 return 0; 3802 } 3803 if((header->ver_cmd & 0xF) == PP2_CMD_LOCAL) { 3804 /* A connection from the proxy itself. 3805 * No need to do anything with addresses. */ 3806 goto done; 3807 } 3808 if(header->fam_prot == PP2_UNSPEC_UNSPEC) { 3809 /* Unspecified family and protocol. This could be used for 3810 * health checks by proxies. 3811 * No need to do anything with addresses. */ 3812 goto done; 3813 } 3814 /* Read the proxied address */ 3815 switch(header->fam_prot) { 3816 case PP2_INET_STREAM: 3817 case PP2_INET_DGRAM: 3818 { 3819 struct sockaddr_in* addr = 3820 (struct sockaddr_in*)&q->client_addr; 3821 addr->sin_family = AF_INET; 3822 memmove(&addr->sin_addr.s_addr, 3823 &header->addr.addr4.src_addr, 4); 3824 memmove(&addr->sin_port, &header->addr.addr4.src_port, 3825 2); 3826 q->client_addrlen = (socklen_t)sizeof(struct sockaddr_in); 3827 } 3828 /* Ignore the destination address; it should be us. */ 3829 break; 3830 #ifdef INET6 3831 case PP2_INET6_STREAM: 3832 case PP2_INET6_DGRAM: 3833 { 3834 struct sockaddr_in6* addr = 3835 (struct sockaddr_in6*)&q->client_addr; 3836 memset(addr, 0, sizeof(*addr)); 3837 addr->sin6_family = AF_INET6; 3838 memmove(&addr->sin6_addr, 3839 header->addr.addr6.src_addr, 16); 3840 memmove(&addr->sin6_port, &header->addr.addr6.src_port, 3841 2); 3842 q->client_addrlen = (socklen_t)sizeof(struct sockaddr_in6); 3843 } 3844 /* Ignore the destination address; it should be us. */ 3845 break; 3846 #endif /* INET6 */ 3847 default: 3848 VERBOSITY(2, (LOG_ERR, "proxy-protocol: unsupported " 3849 "family and protocol 0x%x", 3850 (int)header->fam_prot)); 3851 return 0; 3852 } 3853 q->is_proxied = 1; 3854 done: 3855 if(!stream) { 3856 /* We are reading a whole packet; 3857 * Move the rest of the data to overwrite the PROXYv2 header */ 3858 /* XXX can we do better to avoid memmove? */ 3859 memmove(header, ((char*)header)+size, buffer_limit(buf)-size); 3860 buffer_set_limit(buf, buffer_limit(buf)-size); 3861 } 3862 return 1; 3863 } 3864 3865 static void 3866 handle_udp(int fd, short event, void* arg) 3867 { 3868 struct udp_handler_data *data = (struct udp_handler_data *) arg; 3869 int received, sent, recvcount, i; 3870 struct query *q; 3871 uint32_t now = 0; 3872 3873 if (!(event & EV_READ)) { 3874 return; 3875 } 3876 recvcount = nsd_recvmmsg(fd, msgs, NUM_RECV_PER_SELECT, 0, NULL); 3877 /* this printf strangely gave a performance increase on Linux */ 3878 /* printf("recvcount %d \n", recvcount); */ 3879 if (recvcount == -1) { 3880 if (errno != EAGAIN && errno != EINTR) { 3881 log_msg(LOG_ERR, "recvmmsg failed: %s", strerror(errno)); 3882 STATUP(data->nsd, rxerr); 3883 /* No zone statup */ 3884 } 3885 /* Simply no data available */ 3886 return; 3887 } 3888 for (i = 0; i < recvcount; i++) { 3889 loopstart: 3890 received = msgs[i].msg_len; 3891 queries[i]->remote_addrlen = msgs[i].msg_hdr.msg_namelen; 3892 queries[i]->client_addrlen = (socklen_t)sizeof(queries[i]->client_addr); 3893 queries[i]->is_proxied = 0; 3894 q = queries[i]; 3895 if (received == -1) { 3896 log_msg(LOG_ERR, "recvmmsg %d failed %s", i, strerror( 3897 #if defined(HAVE_RECVMMSG) 3898 msgs[i].msg_hdr.msg_flags 3899 #else 3900 errno 3901 #endif 3902 )); 3903 STATUP(data->nsd, rxerr); 3904 /* No zone statup */ 3905 query_reset(queries[i], UDP_MAX_MESSAGE_LEN, 0); 3906 iovecs[i].iov_len = buffer_remaining(q->packet); 3907 msgs[i].msg_hdr.msg_namelen = queries[i]->remote_addrlen; 3908 goto swap_drop; 3909 } 3910 3911 /* Account... */ 3912 #ifdef BIND8_STATS 3913 if (data->socket->addr.ai_family == AF_INET) { 3914 STATUP(data->nsd, qudp); 3915 } else if (data->socket->addr.ai_family == AF_INET6) { 3916 STATUP(data->nsd, qudp6); 3917 } 3918 #endif 3919 3920 buffer_skip(q->packet, received); 3921 buffer_flip(q->packet); 3922 if(data->pp2_enabled && !consume_pp2_header(q->packet, q, 0)) { 3923 VERBOSITY(2, (LOG_ERR, "proxy-protocol: could not " 3924 "consume PROXYv2 header")); 3925 goto swap_drop; 3926 } 3927 if(!q->is_proxied) { 3928 q->client_addrlen = q->remote_addrlen; 3929 memmove(&q->client_addr, &q->remote_addr, 3930 q->remote_addrlen); 3931 } 3932 #ifdef USE_DNSTAP 3933 /* 3934 * sending UDP-query with server address (local) and client address to dnstap process 3935 */ 3936 log_addr("query from client", &q->client_addr); 3937 log_addr("to server (local)", (void*)&data->socket->addr.ai_addr); 3938 if(verbosity >= 6 && q->is_proxied) 3939 log_addr("query via proxy", &q->remote_addr); 3940 dt_collector_submit_auth_query(data->nsd, (void*)&data->socket->addr.ai_addr, &q->client_addr, q->client_addrlen, 3941 q->tcp, q->packet); 3942 #endif /* USE_DNSTAP */ 3943 3944 /* Process and answer the query... */ 3945 if (server_process_query_udp(data->nsd, q, &now) != QUERY_DISCARDED) { 3946 if (RCODE(q->packet) == RCODE_OK && !AA(q->packet)) { 3947 STATUP(data->nsd, nona); 3948 ZTATUP(data->nsd, q->zone, nona); 3949 } 3950 3951 #ifdef USE_ZONE_STATS 3952 if (data->socket->addr.ai_family == AF_INET) { 3953 ZTATUP(data->nsd, q->zone, qudp); 3954 } else if (data->socket->addr.ai_family == AF_INET6) { 3955 ZTATUP(data->nsd, q->zone, qudp6); 3956 } 3957 #endif 3958 3959 /* Add EDNS0 and TSIG info if necessary. */ 3960 query_add_optional(q, data->nsd, &now); 3961 3962 buffer_flip(q->packet); 3963 iovecs[i].iov_len = buffer_remaining(q->packet); 3964 #ifdef BIND8_STATS 3965 /* Account the rcode & TC... */ 3966 STATUP2(data->nsd, rcode, RCODE(q->packet)); 3967 ZTATUP2(data->nsd, q->zone, rcode, RCODE(q->packet)); 3968 if (TC(q->packet)) { 3969 STATUP(data->nsd, truncated); 3970 ZTATUP(data->nsd, q->zone, truncated); 3971 } 3972 #endif /* BIND8_STATS */ 3973 #ifdef USE_DNSTAP 3974 /* 3975 * sending UDP-response with server address (local) and client address to dnstap process 3976 */ 3977 log_addr("from server (local)", (void*)&data->socket->addr.ai_addr); 3978 log_addr("response to client", &q->client_addr); 3979 if(verbosity >= 6 && q->is_proxied) 3980 log_addr("response via proxy", &q->remote_addr); 3981 dt_collector_submit_auth_response(data->nsd, (void*)&data->socket->addr.ai_addr, 3982 &q->client_addr, q->client_addrlen, q->tcp, q->packet, 3983 q->zone); 3984 #endif /* USE_DNSTAP */ 3985 } else { 3986 query_reset(queries[i], UDP_MAX_MESSAGE_LEN, 0); 3987 iovecs[i].iov_len = buffer_remaining(q->packet); 3988 msgs[i].msg_hdr.msg_namelen = queries[i]->remote_addrlen; 3989 swap_drop: 3990 STATUP(data->nsd, dropped); 3991 ZTATUP(data->nsd, q->zone, dropped); 3992 if(i != recvcount-1) { 3993 /* swap with last and decrease recvcount */ 3994 struct mmsghdr mtmp = msgs[i]; 3995 struct iovec iotmp = iovecs[i]; 3996 recvcount--; 3997 msgs[i] = msgs[recvcount]; 3998 iovecs[i] = iovecs[recvcount]; 3999 queries[i] = queries[recvcount]; 4000 msgs[recvcount] = mtmp; 4001 iovecs[recvcount] = iotmp; 4002 queries[recvcount] = q; 4003 msgs[i].msg_hdr.msg_iov = &iovecs[i]; 4004 msgs[recvcount].msg_hdr.msg_iov = &iovecs[recvcount]; 4005 goto loopstart; 4006 } else { recvcount --; } 4007 } 4008 } 4009 4010 /* send until all are sent */ 4011 i = 0; 4012 while(i<recvcount) { 4013 sent = nsd_sendmmsg(fd, &msgs[i], recvcount-i, 0); 4014 if(sent == -1) { 4015 if(errno == ENOBUFS || 4016 #ifdef EWOULDBLOCK 4017 errno == EWOULDBLOCK || 4018 #endif 4019 errno == EAGAIN) { 4020 /* block to wait until send buffer avail */ 4021 int flag, errstore; 4022 if((flag = fcntl(fd, F_GETFL)) == -1) { 4023 log_msg(LOG_ERR, "cannot fcntl F_GETFL: %s", strerror(errno)); 4024 flag = 0; 4025 } 4026 flag &= ~O_NONBLOCK; 4027 if(fcntl(fd, F_SETFL, flag) == -1) 4028 log_msg(LOG_ERR, "cannot fcntl F_SETFL 0: %s", strerror(errno)); 4029 sent = nsd_sendmmsg(fd, &msgs[i], recvcount-i, 0); 4030 errstore = errno; 4031 flag |= O_NONBLOCK; 4032 if(fcntl(fd, F_SETFL, flag) == -1) 4033 log_msg(LOG_ERR, "cannot fcntl F_SETFL O_NONBLOCK: %s", strerror(errno)); 4034 if(sent != -1) { 4035 i += sent; 4036 continue; 4037 } 4038 errno = errstore; 4039 } 4040 if(errno == EINVAL) { 4041 /* skip the invalid argument entry, 4042 * send the remaining packets in the list */ 4043 if(!(port_is_zero((void*)&queries[i]->remote_addr) && 4044 verbosity < 3)) { 4045 const char* es = strerror(errno); 4046 char a[64]; 4047 addrport2str((void*)&queries[i]->remote_addr, a, sizeof(a)); 4048 log_msg(LOG_ERR, "sendmmsg skip invalid argument [0]=%s count=%d failed: %s", a, (int)(recvcount-i), es); 4049 } 4050 i += 1; 4051 continue; 4052 } 4053 /* don't log transient network full errors, unless 4054 * on higher verbosity */ 4055 if(!(errno == ENOBUFS && verbosity < 1) && 4056 #ifdef EWOULDBLOCK 4057 errno != EWOULDBLOCK && 4058 #endif 4059 errno != EAGAIN) { 4060 const char* es = strerror(errno); 4061 char a[64]; 4062 addrport2str((void*)&queries[i]->remote_addr, a, sizeof(a)); 4063 log_msg(LOG_ERR, "sendmmsg [0]=%s count=%d failed: %s", a, (int)(recvcount-i), es); 4064 } 4065 #ifdef BIND8_STATS 4066 data->nsd->st->txerr += recvcount-i; 4067 #endif /* BIND8_STATS */ 4068 break; 4069 } 4070 i += sent; 4071 } 4072 for(i=0; i<recvcount; i++) { 4073 query_reset(queries[i], UDP_MAX_MESSAGE_LEN, 0); 4074 iovecs[i].iov_len = buffer_remaining(queries[i]->packet); 4075 msgs[i].msg_hdr.msg_namelen = queries[i]->remote_addrlen; 4076 } 4077 } 4078 4079 #ifdef HAVE_SSL 4080 /* 4081 * Setup an event for the tcp handler. 4082 */ 4083 static void 4084 tcp_handler_setup_event(struct tcp_handler_data* data, void (*fn)(int, short, void *), 4085 int fd, short event) 4086 { 4087 struct timeval timeout; 4088 struct event_base* ev_base; 4089 4090 timeout.tv_sec = data->nsd->tcp_timeout; 4091 timeout.tv_usec = 0L; 4092 4093 ev_base = data->event.ev_base; 4094 event_del(&data->event); 4095 memset(&data->event, 0, sizeof(data->event)); 4096 event_set(&data->event, fd, event, fn, data); 4097 if(event_base_set(ev_base, &data->event) != 0) 4098 log_msg(LOG_ERR, "event base set failed"); 4099 if(event_add(&data->event, &timeout) != 0) 4100 log_msg(LOG_ERR, "event add failed"); 4101 } 4102 #endif /* HAVE_SSL */ 4103 4104 static void 4105 cleanup_tcp_handler(struct tcp_handler_data* data) 4106 { 4107 event_del(&data->event); 4108 #ifdef HAVE_SSL 4109 if(data->tls) { 4110 SSL_shutdown(data->tls); 4111 SSL_free(data->tls); 4112 data->tls = NULL; 4113 } 4114 #endif 4115 data->pp2_header_state = pp2_header_none; 4116 close(data->event.ev_fd); 4117 if(data->prev) 4118 data->prev->next = data->next; 4119 else tcp_active_list = data->next; 4120 if(data->next) 4121 data->next->prev = data->prev; 4122 4123 /* 4124 * Enable the TCP accept handlers when the current number of 4125 * TCP connections is about to drop below the maximum number 4126 * of TCP connections. 4127 */ 4128 if (slowaccept || data->nsd->current_tcp_count == data->nsd->maximum_tcp_count) { 4129 configure_handler_event_types(EV_READ|EV_PERSIST); 4130 if(slowaccept) { 4131 event_del(&slowaccept_event); 4132 slowaccept = 0; 4133 } 4134 } 4135 --data->nsd->current_tcp_count; 4136 assert(data->nsd->current_tcp_count >= 0); 4137 4138 region_destroy(data->region); 4139 } 4140 4141 /* Read more data into the buffer for tcp read. Pass the amount of additional 4142 * data required. Returns false if nothing needs to be done this event, or 4143 * true if the additional data is in the buffer. */ 4144 static int 4145 more_read_buf_tcp(int fd, struct tcp_handler_data* data, void* bufpos, 4146 size_t add_amount, ssize_t* received) 4147 { 4148 *received = read(fd, bufpos, add_amount); 4149 if (*received == -1) { 4150 if (errno == EAGAIN || errno == EINTR) { 4151 /* 4152 * Read would block, wait until more 4153 * data is available. 4154 */ 4155 return 0; 4156 } else { 4157 char buf[48]; 4158 addr2str(&data->query->remote_addr, buf, sizeof(buf)); 4159 #ifdef ECONNRESET 4160 if (verbosity >= 2 || errno != ECONNRESET) 4161 #endif /* ECONNRESET */ 4162 log_msg(LOG_ERR, "failed reading from %s tcp: %s", buf, strerror(errno)); 4163 cleanup_tcp_handler(data); 4164 return 0; 4165 } 4166 } else if (*received == 0) { 4167 /* EOF */ 4168 cleanup_tcp_handler(data); 4169 return 0; 4170 } 4171 return 1; 4172 } 4173 4174 static void 4175 handle_tcp_reading(int fd, short event, void* arg) 4176 { 4177 struct tcp_handler_data *data = (struct tcp_handler_data *) arg; 4178 ssize_t received; 4179 struct event_base* ev_base; 4180 struct timeval timeout; 4181 uint32_t now = 0; 4182 4183 if ((event & EV_TIMEOUT)) { 4184 /* Connection timed out. */ 4185 cleanup_tcp_handler(data); 4186 return; 4187 } 4188 4189 if ((data->nsd->tcp_query_count > 0 && 4190 data->query_count >= data->nsd->tcp_query_count) || 4191 (data->query_count > 0 && data->tcp_no_more_queries)) 4192 { 4193 /* No more queries allowed on this tcp connection. */ 4194 cleanup_tcp_handler(data); 4195 return; 4196 } 4197 4198 assert((event & EV_READ)); 4199 4200 if (data->bytes_transmitted == 0 && data->query_needs_reset) { 4201 query_reset(data->query, TCP_MAX_MESSAGE_LEN, 1); 4202 data->query_needs_reset = 0; 4203 } 4204 4205 if(data->pp2_enabled && data->pp2_header_state != pp2_header_done) { 4206 struct pp2_header* header = NULL; 4207 size_t want_read_size = 0; 4208 size_t current_read_size = 0; 4209 if(data->pp2_header_state == pp2_header_none) { 4210 want_read_size = PP2_HEADER_SIZE; 4211 if(buffer_remaining(data->query->packet) < 4212 want_read_size) { 4213 VERBOSITY(6, (LOG_ERR, "proxy-protocol: not enough buffer size to read PROXYv2 header")); 4214 cleanup_tcp_handler(data); 4215 return; 4216 } 4217 VERBOSITY(6, (LOG_INFO, "proxy-protocol: reading fixed part of PROXYv2 header (len %lu)", (unsigned long)want_read_size)); 4218 current_read_size = want_read_size; 4219 if(data->bytes_transmitted < current_read_size) { 4220 if(!more_read_buf_tcp(fd, data, 4221 (void*)buffer_at(data->query->packet, 4222 data->bytes_transmitted), 4223 current_read_size - data->bytes_transmitted, 4224 &received)) 4225 return; 4226 data->bytes_transmitted += received; 4227 buffer_skip(data->query->packet, received); 4228 if(data->bytes_transmitted != current_read_size) 4229 return; 4230 data->pp2_header_state = pp2_header_init; 4231 } 4232 } 4233 if(data->pp2_header_state == pp2_header_init) { 4234 int err; 4235 err = pp2_read_header(buffer_begin(data->query->packet), 4236 buffer_limit(data->query->packet)); 4237 if(err) { 4238 VERBOSITY(6, (LOG_ERR, "proxy-protocol: could not parse PROXYv2 header: %s", pp_lookup_error(err))); 4239 cleanup_tcp_handler(data); 4240 return; 4241 } 4242 header = (struct pp2_header*)buffer_begin(data->query->packet); 4243 want_read_size = ntohs(header->len); 4244 if(buffer_limit(data->query->packet) < 4245 PP2_HEADER_SIZE + want_read_size) { 4246 VERBOSITY(6, (LOG_ERR, "proxy-protocol: not enough buffer size to read PROXYv2 header")); 4247 cleanup_tcp_handler(data); 4248 return; 4249 } 4250 VERBOSITY(6, (LOG_INFO, "proxy-protocol: reading variable part of PROXYv2 header (len %lu)", (unsigned long)want_read_size)); 4251 current_read_size = PP2_HEADER_SIZE + want_read_size; 4252 if(want_read_size == 0) { 4253 /* nothing more to read; header is complete */ 4254 data->pp2_header_state = pp2_header_done; 4255 } else if(data->bytes_transmitted < current_read_size) { 4256 if(!more_read_buf_tcp(fd, data, 4257 (void*)buffer_at(data->query->packet, 4258 data->bytes_transmitted), 4259 current_read_size - data->bytes_transmitted, 4260 &received)) 4261 return; 4262 data->bytes_transmitted += received; 4263 buffer_skip(data->query->packet, received); 4264 if(data->bytes_transmitted != current_read_size) 4265 return; 4266 data->pp2_header_state = pp2_header_done; 4267 } 4268 } 4269 if(data->pp2_header_state != pp2_header_done || !header) { 4270 VERBOSITY(6, (LOG_ERR, "proxy-protocol: wrong state for the PROXYv2 header")); 4271 4272 cleanup_tcp_handler(data); 4273 return; 4274 } 4275 buffer_flip(data->query->packet); 4276 if(!consume_pp2_header(data->query->packet, data->query, 1)) { 4277 VERBOSITY(6, (LOG_ERR, "proxy-protocol: could not consume PROXYv2 header")); 4278 4279 cleanup_tcp_handler(data); 4280 return; 4281 } 4282 /* Clear and reset the buffer to read the following 4283 * DNS packet(s). */ 4284 buffer_clear(data->query->packet); 4285 data->bytes_transmitted = 0; 4286 } 4287 4288 /* 4289 * Check if we received the leading packet length bytes yet. 4290 */ 4291 if (data->bytes_transmitted < sizeof(uint16_t)) { 4292 if(!more_read_buf_tcp(fd, data, 4293 (char*) &data->query->tcplen + data->bytes_transmitted, 4294 sizeof(uint16_t) - data->bytes_transmitted, &received)) 4295 return; 4296 data->bytes_transmitted += received; 4297 if (data->bytes_transmitted < sizeof(uint16_t)) { 4298 /* 4299 * Not done with the tcplen yet, wait for more 4300 * data to become available. 4301 */ 4302 return; 4303 } 4304 assert(data->bytes_transmitted == sizeof(uint16_t)); 4305 4306 data->query->tcplen = ntohs(data->query->tcplen); 4307 4308 /* 4309 * Minimum query size is: 4310 * 4311 * Size of the header (12) 4312 * + Root domain name (1) 4313 * + Query class (2) 4314 * + Query type (2) 4315 */ 4316 if (data->query->tcplen < QHEADERSZ + 1 + sizeof(uint16_t) + sizeof(uint16_t)) { 4317 VERBOSITY(2, (LOG_WARNING, "packet too small, dropping tcp connection")); 4318 cleanup_tcp_handler(data); 4319 return; 4320 } 4321 4322 if (data->query->tcplen > data->query->maxlen) { 4323 VERBOSITY(2, (LOG_WARNING, "insufficient tcp buffer, dropping connection")); 4324 cleanup_tcp_handler(data); 4325 return; 4326 } 4327 4328 buffer_set_limit(data->query->packet, data->query->tcplen); 4329 } 4330 4331 assert(buffer_remaining(data->query->packet) > 0); 4332 4333 /* Read the (remaining) query data. */ 4334 if(!more_read_buf_tcp(fd, data, buffer_current(data->query->packet), 4335 buffer_remaining(data->query->packet), &received)) 4336 return; 4337 data->bytes_transmitted += received; 4338 buffer_skip(data->query->packet, received); 4339 if (buffer_remaining(data->query->packet) > 0) { 4340 /* 4341 * Message not yet complete, wait for more data to 4342 * become available. 4343 */ 4344 return; 4345 } 4346 4347 assert(buffer_position(data->query->packet) == data->query->tcplen); 4348 4349 /* Account... */ 4350 #ifdef BIND8_STATS 4351 #ifndef INET6 4352 STATUP(data->nsd, ctcp); 4353 #else 4354 if (data->query->remote_addr.ss_family == AF_INET) { 4355 STATUP(data->nsd, ctcp); 4356 } else if (data->query->remote_addr.ss_family == AF_INET6) { 4357 STATUP(data->nsd, ctcp6); 4358 } 4359 #endif 4360 #endif /* BIND8_STATS */ 4361 4362 /* We have a complete query, process it. */ 4363 4364 /* tcp-query-count: handle query counter ++ */ 4365 data->query_count++; 4366 4367 buffer_flip(data->query->packet); 4368 #ifdef USE_DNSTAP 4369 /* 4370 * and send TCP-query with found address (local) and client address to dnstap process 4371 */ 4372 log_addr("query from client", &data->query->client_addr); 4373 log_addr("to server (local)", (void*)&data->socket->addr.ai_addr); 4374 if(verbosity >= 6 && data->query->is_proxied) 4375 log_addr("query via proxy", &data->query->remote_addr); 4376 dt_collector_submit_auth_query(data->nsd, (void*)&data->socket->addr.ai_addr, &data->query->client_addr, 4377 data->query->client_addrlen, data->query->tcp, data->query->packet); 4378 #endif /* USE_DNSTAP */ 4379 data->query_state = server_process_query(data->nsd, data->query, &now); 4380 if (data->query_state == QUERY_DISCARDED) { 4381 /* Drop the packet and the entire connection... */ 4382 STATUP(data->nsd, dropped); 4383 ZTATUP(data->nsd, data->query->zone, dropped); 4384 cleanup_tcp_handler(data); 4385 return; 4386 } 4387 4388 #ifdef BIND8_STATS 4389 if (RCODE(data->query->packet) == RCODE_OK 4390 && !AA(data->query->packet)) 4391 { 4392 STATUP(data->nsd, nona); 4393 ZTATUP(data->nsd, data->query->zone, nona); 4394 } 4395 #endif /* BIND8_STATS */ 4396 4397 #ifdef USE_ZONE_STATS 4398 #ifndef INET6 4399 ZTATUP(data->nsd, data->query->zone, ctcp); 4400 #else 4401 if (data->query->remote_addr.ss_family == AF_INET) { 4402 ZTATUP(data->nsd, data->query->zone, ctcp); 4403 } else if (data->query->remote_addr.ss_family == AF_INET6) { 4404 ZTATUP(data->nsd, data->query->zone, ctcp6); 4405 } 4406 #endif 4407 #endif /* USE_ZONE_STATS */ 4408 4409 query_add_optional(data->query, data->nsd, &now); 4410 4411 /* Switch to the tcp write handler. */ 4412 buffer_flip(data->query->packet); 4413 data->query->tcplen = buffer_remaining(data->query->packet); 4414 #ifdef BIND8_STATS 4415 /* Account the rcode & TC... */ 4416 STATUP2(data->nsd, rcode, RCODE(data->query->packet)); 4417 ZTATUP2(data->nsd, data->query->zone, rcode, RCODE(data->query->packet)); 4418 if (TC(data->query->packet)) { 4419 STATUP(data->nsd, truncated); 4420 ZTATUP(data->nsd, data->query->zone, truncated); 4421 } 4422 #endif /* BIND8_STATS */ 4423 #ifdef USE_DNSTAP 4424 /* 4425 * sending TCP-response with found (earlier) address (local) and client address to dnstap process 4426 */ 4427 log_addr("from server (local)", (void*)&data->socket->addr.ai_addr); 4428 log_addr("response to client", &data->query->client_addr); 4429 if(verbosity >= 6 && data->query->is_proxied) 4430 log_addr("response via proxy", &data->query->remote_addr); 4431 dt_collector_submit_auth_response(data->nsd, (void*)&data->socket->addr.ai_addr, &data->query->client_addr, 4432 data->query->client_addrlen, data->query->tcp, data->query->packet, 4433 data->query->zone); 4434 #endif /* USE_DNSTAP */ 4435 data->bytes_transmitted = 0; 4436 4437 timeout.tv_sec = data->tcp_timeout / 1000; 4438 timeout.tv_usec = (data->tcp_timeout % 1000)*1000; 4439 4440 ev_base = data->event.ev_base; 4441 event_del(&data->event); 4442 memset(&data->event, 0, sizeof(data->event)); 4443 event_set(&data->event, fd, EV_PERSIST | EV_WRITE | EV_TIMEOUT, 4444 handle_tcp_writing, data); 4445 if(event_base_set(ev_base, &data->event) != 0) 4446 log_msg(LOG_ERR, "event base set tcpr failed"); 4447 if(event_add(&data->event, &timeout) != 0) 4448 log_msg(LOG_ERR, "event add tcpr failed"); 4449 /* see if we can write the answer right away(usually so,EAGAIN ifnot)*/ 4450 handle_tcp_writing(fd, EV_WRITE, data); 4451 } 4452 4453 static void 4454 handle_tcp_writing(int fd, short event, void* arg) 4455 { 4456 struct tcp_handler_data *data = (struct tcp_handler_data *) arg; 4457 ssize_t sent; 4458 struct query *q = data->query; 4459 struct timeval timeout; 4460 struct event_base* ev_base; 4461 uint32_t now = 0; 4462 4463 if ((event & EV_TIMEOUT)) { 4464 /* Connection timed out. */ 4465 cleanup_tcp_handler(data); 4466 return; 4467 } 4468 4469 assert((event & EV_WRITE)); 4470 4471 if (data->bytes_transmitted < sizeof(q->tcplen)) { 4472 /* Writing the response packet length. */ 4473 uint16_t n_tcplen = htons(q->tcplen); 4474 #ifdef HAVE_WRITEV 4475 struct iovec iov[2]; 4476 iov[0].iov_base = (uint8_t*)&n_tcplen + data->bytes_transmitted; 4477 iov[0].iov_len = sizeof(n_tcplen) - data->bytes_transmitted; 4478 iov[1].iov_base = buffer_begin(q->packet); 4479 iov[1].iov_len = buffer_limit(q->packet); 4480 sent = writev(fd, iov, 2); 4481 #else /* HAVE_WRITEV */ 4482 sent = write(fd, 4483 (const char *) &n_tcplen + data->bytes_transmitted, 4484 sizeof(n_tcplen) - data->bytes_transmitted); 4485 #endif /* HAVE_WRITEV */ 4486 if (sent == -1) { 4487 if (errno == EAGAIN || errno == EINTR) { 4488 /* 4489 * Write would block, wait until 4490 * socket becomes writable again. 4491 */ 4492 return; 4493 } else { 4494 #ifdef ECONNRESET 4495 if(verbosity >= 2 || errno != ECONNRESET) 4496 #endif /* ECONNRESET */ 4497 #ifdef EPIPE 4498 if(verbosity >= 2 || errno != EPIPE) 4499 #endif /* EPIPE 'broken pipe' */ 4500 log_msg(LOG_ERR, "failed writing to tcp: %s", strerror(errno)); 4501 cleanup_tcp_handler(data); 4502 return; 4503 } 4504 } 4505 4506 data->bytes_transmitted += sent; 4507 if (data->bytes_transmitted < sizeof(q->tcplen)) { 4508 /* 4509 * Writing not complete, wait until socket 4510 * becomes writable again. 4511 */ 4512 return; 4513 } 4514 4515 #ifdef HAVE_WRITEV 4516 sent -= sizeof(n_tcplen); 4517 /* handle potential 'packet done' code */ 4518 goto packet_could_be_done; 4519 #endif 4520 } 4521 4522 sent = write(fd, 4523 buffer_current(q->packet), 4524 buffer_remaining(q->packet)); 4525 if (sent == -1) { 4526 if (errno == EAGAIN || errno == EINTR) { 4527 /* 4528 * Write would block, wait until 4529 * socket becomes writable again. 4530 */ 4531 return; 4532 } else { 4533 #ifdef ECONNRESET 4534 if(verbosity >= 2 || errno != ECONNRESET) 4535 #endif /* ECONNRESET */ 4536 #ifdef EPIPE 4537 if(verbosity >= 2 || errno != EPIPE) 4538 #endif /* EPIPE 'broken pipe' */ 4539 log_msg(LOG_ERR, "failed writing to tcp: %s", strerror(errno)); 4540 cleanup_tcp_handler(data); 4541 return; 4542 } 4543 } 4544 4545 data->bytes_transmitted += sent; 4546 #ifdef HAVE_WRITEV 4547 packet_could_be_done: 4548 #endif 4549 buffer_skip(q->packet, sent); 4550 if (data->bytes_transmitted < q->tcplen + sizeof(q->tcplen)) { 4551 /* 4552 * Still more data to write when socket becomes 4553 * writable again. 4554 */ 4555 return; 4556 } 4557 4558 assert(data->bytes_transmitted == q->tcplen + sizeof(q->tcplen)); 4559 4560 if (data->query_state == QUERY_IN_AXFR || 4561 data->query_state == QUERY_IN_IXFR) { 4562 /* Continue processing AXFR and writing back results. */ 4563 buffer_clear(q->packet); 4564 if(data->query_state == QUERY_IN_AXFR) 4565 data->query_state = query_axfr(data->nsd, q, 0); 4566 else data->query_state = query_ixfr(data->nsd, q); 4567 if (data->query_state != QUERY_PROCESSED) { 4568 query_add_optional(data->query, data->nsd, &now); 4569 4570 /* Reset data. */ 4571 buffer_flip(q->packet); 4572 q->tcplen = buffer_remaining(q->packet); 4573 data->bytes_transmitted = 0; 4574 /* Reset timeout. */ 4575 timeout.tv_sec = data->tcp_timeout / 1000; 4576 timeout.tv_usec = (data->tcp_timeout % 1000)*1000; 4577 ev_base = data->event.ev_base; 4578 event_del(&data->event); 4579 memset(&data->event, 0, sizeof(data->event)); 4580 event_set(&data->event, fd, EV_PERSIST | EV_WRITE | EV_TIMEOUT, 4581 handle_tcp_writing, data); 4582 if(event_base_set(ev_base, &data->event) != 0) 4583 log_msg(LOG_ERR, "event base set tcpw failed"); 4584 if(event_add(&data->event, &timeout) != 0) 4585 log_msg(LOG_ERR, "event add tcpw failed"); 4586 4587 /* 4588 * Write data if/when the socket is writable 4589 * again. 4590 */ 4591 return; 4592 } 4593 } 4594 4595 /* 4596 * Done sending, wait for the next request to arrive on the 4597 * TCP socket by installing the TCP read handler. 4598 */ 4599 if ((data->nsd->tcp_query_count > 0 && 4600 data->query_count >= data->nsd->tcp_query_count) || 4601 data->tcp_no_more_queries) { 4602 4603 (void) shutdown(fd, SHUT_WR); 4604 } 4605 4606 data->bytes_transmitted = 0; 4607 data->query_needs_reset = 1; 4608 4609 timeout.tv_sec = data->tcp_timeout / 1000; 4610 timeout.tv_usec = (data->tcp_timeout % 1000)*1000; 4611 ev_base = data->event.ev_base; 4612 event_del(&data->event); 4613 memset(&data->event, 0, sizeof(data->event)); 4614 event_set(&data->event, fd, EV_PERSIST | EV_READ | EV_TIMEOUT, 4615 handle_tcp_reading, data); 4616 if(event_base_set(ev_base, &data->event) != 0) 4617 log_msg(LOG_ERR, "event base set tcpw failed"); 4618 if(event_add(&data->event, &timeout) != 0) 4619 log_msg(LOG_ERR, "event add tcpw failed"); 4620 } 4621 4622 #ifdef HAVE_SSL 4623 /** create SSL object and associate fd */ 4624 static SSL* 4625 incoming_ssl_fd(SSL_CTX* ctx, int fd) 4626 { 4627 SSL* ssl = SSL_new((SSL_CTX*)ctx); 4628 if(!ssl) { 4629 log_crypto_err("could not SSL_new"); 4630 return NULL; 4631 } 4632 SSL_set_accept_state(ssl); 4633 (void)SSL_set_mode(ssl, SSL_MODE_AUTO_RETRY); 4634 if(!SSL_set_fd(ssl, fd)) { 4635 log_crypto_err("could not SSL_set_fd"); 4636 SSL_free(ssl); 4637 return NULL; 4638 } 4639 return ssl; 4640 } 4641 4642 /** TLS handshake to upgrade TCP connection */ 4643 static int 4644 tls_handshake(struct tcp_handler_data* data, int fd, int writing) 4645 { 4646 int r; 4647 if(data->shake_state == tls_hs_read_event) { 4648 /* read condition satisfied back to writing */ 4649 tcp_handler_setup_event(data, handle_tls_writing, fd, EV_PERSIST|EV_TIMEOUT|EV_WRITE); 4650 data->shake_state = tls_hs_none; 4651 return 1; 4652 } 4653 if(data->shake_state == tls_hs_write_event) { 4654 /* write condition satisfied back to reading */ 4655 tcp_handler_setup_event(data, handle_tls_reading, fd, EV_PERSIST|EV_TIMEOUT|EV_READ); 4656 data->shake_state = tls_hs_none; 4657 return 1; 4658 } 4659 4660 /* (continue to) setup the TLS connection */ 4661 ERR_clear_error(); 4662 r = SSL_do_handshake(data->tls); 4663 4664 if(r != 1) { 4665 int want = SSL_get_error(data->tls, r); 4666 if(want == SSL_ERROR_WANT_READ) { 4667 if(data->shake_state == tls_hs_read) { 4668 /* try again later */ 4669 return 1; 4670 } 4671 data->shake_state = tls_hs_read; 4672 /* switch back to reading mode */ 4673 tcp_handler_setup_event(data, handle_tls_reading, fd, EV_PERSIST|EV_TIMEOUT|EV_READ); 4674 return 1; 4675 } else if(want == SSL_ERROR_WANT_WRITE) { 4676 if(data->shake_state == tls_hs_write) { 4677 /* try again later */ 4678 return 1; 4679 } 4680 data->shake_state = tls_hs_write; 4681 /* switch back to writing mode */ 4682 tcp_handler_setup_event(data, handle_tls_writing, fd, EV_PERSIST|EV_TIMEOUT|EV_WRITE); 4683 return 1; 4684 } else { 4685 if(r == 0) 4686 VERBOSITY(3, (LOG_ERR, "TLS handshake: connection closed prematurely")); 4687 else { 4688 unsigned long err = ERR_get_error(); 4689 if(!squelch_err_ssl_handshake(err)) { 4690 char a[64], s[256]; 4691 addr2str(&data->query->remote_addr, a, sizeof(a)); 4692 snprintf(s, sizeof(s), "TLS handshake failed from %s", a); 4693 log_crypto_from_err(s, err); 4694 } 4695 } 4696 cleanup_tcp_handler(data); 4697 return 0; 4698 } 4699 } 4700 4701 /* Use to log successful upgrade for testing - could be removed*/ 4702 VERBOSITY(3, (LOG_INFO, "TLS handshake succeeded.")); 4703 /* set back to the event we need to have when reading (or writing) */ 4704 if(data->shake_state == tls_hs_read && writing) { 4705 tcp_handler_setup_event(data, handle_tls_writing, fd, EV_PERSIST|EV_TIMEOUT|EV_WRITE); 4706 } else if(data->shake_state == tls_hs_write && !writing) { 4707 tcp_handler_setup_event(data, handle_tls_reading, fd, EV_PERSIST|EV_TIMEOUT|EV_READ); 4708 } 4709 data->shake_state = tls_hs_none; 4710 return 1; 4711 } 4712 4713 /* Read more data into the buffer for tls read. Pass the amount of additional 4714 * data required. Returns false if nothing needs to be done this event, or 4715 * true if the additional data is in the buffer. */ 4716 static int 4717 more_read_buf_tls(int fd, struct tcp_handler_data* data, void* bufpos, 4718 size_t add_amount, ssize_t* received) 4719 { 4720 ERR_clear_error(); 4721 if((*received=SSL_read(data->tls, bufpos, add_amount)) <= 0) { 4722 int want = SSL_get_error(data->tls, *received); 4723 if(want == SSL_ERROR_ZERO_RETURN) { 4724 cleanup_tcp_handler(data); 4725 return 0; /* shutdown, closed */ 4726 } else if(want == SSL_ERROR_WANT_READ) { 4727 /* wants to be called again */ 4728 return 0; 4729 } 4730 else if(want == SSL_ERROR_WANT_WRITE) { 4731 /* switch to writing */ 4732 data->shake_state = tls_hs_write_event; 4733 tcp_handler_setup_event(data, handle_tls_writing, fd, EV_PERSIST | EV_WRITE | EV_TIMEOUT); 4734 return 0; 4735 } 4736 cleanup_tcp_handler(data); 4737 log_crypto_err("could not SSL_read"); 4738 return 0; 4739 } 4740 return 1; 4741 } 4742 4743 /** handle TLS reading of incoming query */ 4744 static void 4745 handle_tls_reading(int fd, short event, void* arg) 4746 { 4747 struct tcp_handler_data *data = (struct tcp_handler_data *) arg; 4748 ssize_t received; 4749 uint32_t now = 0; 4750 4751 if ((event & EV_TIMEOUT)) { 4752 /* Connection timed out. */ 4753 cleanup_tcp_handler(data); 4754 return; 4755 } 4756 4757 if ((data->nsd->tcp_query_count > 0 && 4758 data->query_count >= data->nsd->tcp_query_count) || 4759 (data->query_count > 0 && data->tcp_no_more_queries)) 4760 { 4761 /* No more queries allowed on this tcp connection. */ 4762 cleanup_tcp_handler(data); 4763 return; 4764 } 4765 4766 assert((event & EV_READ)); 4767 4768 if (data->bytes_transmitted == 0 && data->query_needs_reset) { 4769 query_reset(data->query, TCP_MAX_MESSAGE_LEN, 1); 4770 data->query_needs_reset = 0; 4771 } 4772 4773 if(data->shake_state != tls_hs_none) { 4774 if(!tls_handshake(data, fd, 0)) 4775 return; 4776 if(data->shake_state != tls_hs_none) 4777 return; 4778 } 4779 4780 if(data->pp2_enabled && data->pp2_header_state != pp2_header_done) { 4781 struct pp2_header* header = NULL; 4782 size_t want_read_size = 0; 4783 size_t current_read_size = 0; 4784 if(data->pp2_header_state == pp2_header_none) { 4785 want_read_size = PP2_HEADER_SIZE; 4786 if(buffer_remaining(data->query->packet) < 4787 want_read_size) { 4788 VERBOSITY(6, (LOG_ERR, "proxy-protocol: not enough buffer size to read PROXYv2 header")); 4789 cleanup_tcp_handler(data); 4790 return; 4791 } 4792 VERBOSITY(6, (LOG_INFO, "proxy-protocol: reading fixed part of PROXYv2 header (len %lu)", (unsigned long)want_read_size)); 4793 current_read_size = want_read_size; 4794 if(data->bytes_transmitted < current_read_size) { 4795 if(!more_read_buf_tls(fd, data, 4796 buffer_at(data->query->packet, 4797 data->bytes_transmitted), 4798 current_read_size - data->bytes_transmitted, 4799 &received)) 4800 return; 4801 data->bytes_transmitted += received; 4802 buffer_skip(data->query->packet, received); 4803 if(data->bytes_transmitted != current_read_size) 4804 return; 4805 data->pp2_header_state = pp2_header_init; 4806 } 4807 } 4808 if(data->pp2_header_state == pp2_header_init) { 4809 int err; 4810 err = pp2_read_header(buffer_begin(data->query->packet), 4811 buffer_limit(data->query->packet)); 4812 if(err) { 4813 VERBOSITY(6, (LOG_ERR, "proxy-protocol: could not parse PROXYv2 header: %s", pp_lookup_error(err))); 4814 cleanup_tcp_handler(data); 4815 return; 4816 } 4817 header = (struct pp2_header*)buffer_begin(data->query->packet); 4818 want_read_size = ntohs(header->len); 4819 if(buffer_limit(data->query->packet) < 4820 PP2_HEADER_SIZE + want_read_size) { 4821 VERBOSITY(6, (LOG_ERR, "proxy-protocol: not enough buffer size to read PROXYv2 header")); 4822 cleanup_tcp_handler(data); 4823 return; 4824 } 4825 VERBOSITY(6, (LOG_INFO, "proxy-protocol: reading variable part of PROXYv2 header (len %lu)", (unsigned long)want_read_size)); 4826 current_read_size = PP2_HEADER_SIZE + want_read_size; 4827 if(want_read_size == 0) { 4828 /* nothing more to read; header is complete */ 4829 data->pp2_header_state = pp2_header_done; 4830 } else if(data->bytes_transmitted < current_read_size) { 4831 if(!more_read_buf_tls(fd, data, 4832 buffer_at(data->query->packet, 4833 data->bytes_transmitted), 4834 current_read_size - data->bytes_transmitted, 4835 &received)) 4836 return; 4837 data->bytes_transmitted += received; 4838 buffer_skip(data->query->packet, received); 4839 if(data->bytes_transmitted != current_read_size) 4840 return; 4841 data->pp2_header_state = pp2_header_done; 4842 } 4843 } 4844 if(data->pp2_header_state != pp2_header_done || !header) { 4845 VERBOSITY(6, (LOG_ERR, "proxy-protocol: wrong state for the PROXYv2 header")); 4846 cleanup_tcp_handler(data); 4847 return; 4848 } 4849 buffer_flip(data->query->packet); 4850 if(!consume_pp2_header(data->query->packet, data->query, 1)) { 4851 VERBOSITY(6, (LOG_ERR, "proxy-protocol: could not consume PROXYv2 header")); 4852 cleanup_tcp_handler(data); 4853 return; 4854 } 4855 /* Clear and reset the buffer to read the following 4856 * DNS packet(s). */ 4857 buffer_clear(data->query->packet); 4858 data->bytes_transmitted = 0; 4859 } 4860 /* 4861 * Check if we received the leading packet length bytes yet. 4862 */ 4863 if(data->bytes_transmitted < sizeof(uint16_t)) { 4864 if(!more_read_buf_tls(fd, data, 4865 (char *) &data->query->tcplen + data->bytes_transmitted, 4866 sizeof(uint16_t) - data->bytes_transmitted, &received)) 4867 return; 4868 data->bytes_transmitted += received; 4869 if (data->bytes_transmitted < sizeof(uint16_t)) { 4870 /* 4871 * Not done with the tcplen yet, wait for more 4872 * data to become available. 4873 */ 4874 return; 4875 } 4876 4877 assert(data->bytes_transmitted == sizeof(uint16_t)); 4878 4879 data->query->tcplen = ntohs(data->query->tcplen); 4880 4881 /* 4882 * Minimum query size is: 4883 * 4884 * Size of the header (12) 4885 * + Root domain name (1) 4886 * + Query class (2) 4887 * + Query type (2) 4888 */ 4889 if (data->query->tcplen < QHEADERSZ + 1 + sizeof(uint16_t) + sizeof(uint16_t)) { 4890 VERBOSITY(2, (LOG_WARNING, "packet too small, dropping tcp connection")); 4891 cleanup_tcp_handler(data); 4892 return; 4893 } 4894 4895 if (data->query->tcplen > data->query->maxlen) { 4896 VERBOSITY(2, (LOG_WARNING, "insufficient tcp buffer, dropping connection")); 4897 cleanup_tcp_handler(data); 4898 return; 4899 } 4900 4901 buffer_set_limit(data->query->packet, data->query->tcplen); 4902 } 4903 4904 assert(buffer_remaining(data->query->packet) > 0); 4905 4906 /* Read the (remaining) query data. */ 4907 if(!more_read_buf_tls(fd, data, buffer_current(data->query->packet), 4908 buffer_remaining(data->query->packet), &received)) 4909 return; 4910 data->bytes_transmitted += received; 4911 buffer_skip(data->query->packet, received); 4912 if (buffer_remaining(data->query->packet) > 0) { 4913 /* 4914 * Message not yet complete, wait for more data to 4915 * become available. 4916 */ 4917 return; 4918 } 4919 4920 assert(buffer_position(data->query->packet) == data->query->tcplen); 4921 4922 /* Account... */ 4923 #ifndef INET6 4924 STATUP(data->nsd, ctls); 4925 #else 4926 if (data->query->remote_addr.ss_family == AF_INET) { 4927 STATUP(data->nsd, ctls); 4928 } else if (data->query->remote_addr.ss_family == AF_INET6) { 4929 STATUP(data->nsd, ctls6); 4930 } 4931 #endif 4932 4933 /* We have a complete query, process it. */ 4934 4935 /* tcp-query-count: handle query counter ++ */ 4936 data->query_count++; 4937 4938 buffer_flip(data->query->packet); 4939 #ifdef USE_DNSTAP 4940 /* 4941 * and send TCP-query with found address (local) and client address to dnstap process 4942 */ 4943 log_addr("query from client", &data->query->client_addr); 4944 log_addr("to server (local)", (void*)&data->socket->addr.ai_addr); 4945 if(verbosity >= 6 && data->query->is_proxied) 4946 log_addr("query via proxy", &data->query->remote_addr); 4947 dt_collector_submit_auth_query(data->nsd, (void*)&data->socket->addr.ai_addr, &data->query->client_addr, 4948 data->query->client_addrlen, data->query->tcp, data->query->packet); 4949 #endif /* USE_DNSTAP */ 4950 data->query_state = server_process_query(data->nsd, data->query, &now); 4951 if (data->query_state == QUERY_DISCARDED) { 4952 /* Drop the packet and the entire connection... */ 4953 STATUP(data->nsd, dropped); 4954 ZTATUP(data->nsd, data->query->zone, dropped); 4955 cleanup_tcp_handler(data); 4956 return; 4957 } 4958 4959 #ifdef BIND8_STATS 4960 if (RCODE(data->query->packet) == RCODE_OK 4961 && !AA(data->query->packet)) 4962 { 4963 STATUP(data->nsd, nona); 4964 ZTATUP(data->nsd, data->query->zone, nona); 4965 } 4966 #endif /* BIND8_STATS */ 4967 4968 #ifdef USE_ZONE_STATS 4969 #ifndef INET6 4970 ZTATUP(data->nsd, data->query->zone, ctls); 4971 #else 4972 if (data->query->remote_addr.ss_family == AF_INET) { 4973 ZTATUP(data->nsd, data->query->zone, ctls); 4974 } else if (data->query->remote_addr.ss_family == AF_INET6) { 4975 ZTATUP(data->nsd, data->query->zone, ctls6); 4976 } 4977 #endif 4978 #endif /* USE_ZONE_STATS */ 4979 4980 query_add_optional(data->query, data->nsd, &now); 4981 4982 /* Switch to the tcp write handler. */ 4983 buffer_flip(data->query->packet); 4984 data->query->tcplen = buffer_remaining(data->query->packet); 4985 #ifdef BIND8_STATS 4986 /* Account the rcode & TC... */ 4987 STATUP2(data->nsd, rcode, RCODE(data->query->packet)); 4988 ZTATUP2(data->nsd, data->query->zone, rcode, RCODE(data->query->packet)); 4989 if (TC(data->query->packet)) { 4990 STATUP(data->nsd, truncated); 4991 ZTATUP(data->nsd, data->query->zone, truncated); 4992 } 4993 #endif /* BIND8_STATS */ 4994 #ifdef USE_DNSTAP 4995 /* 4996 * sending TCP-response with found (earlier) address (local) and client address to dnstap process 4997 */ 4998 log_addr("from server (local)", (void*)&data->socket->addr.ai_addr); 4999 log_addr("response to client", &data->query->client_addr); 5000 if(verbosity >= 6 && data->query->is_proxied) 5001 log_addr("response via proxy", &data->query->remote_addr); 5002 dt_collector_submit_auth_response(data->nsd, (void*)&data->socket->addr.ai_addr, &data->query->client_addr, 5003 data->query->client_addrlen, data->query->tcp, data->query->packet, 5004 data->query->zone); 5005 #endif /* USE_DNSTAP */ 5006 data->bytes_transmitted = 0; 5007 5008 tcp_handler_setup_event(data, handle_tls_writing, fd, EV_PERSIST | EV_WRITE | EV_TIMEOUT); 5009 5010 /* see if we can write the answer right away(usually so,EAGAIN ifnot)*/ 5011 handle_tls_writing(fd, EV_WRITE, data); 5012 } 5013 5014 /** handle TLS writing of outgoing response */ 5015 static void 5016 handle_tls_writing(int fd, short event, void* arg) 5017 { 5018 struct tcp_handler_data *data = (struct tcp_handler_data *) arg; 5019 ssize_t sent; 5020 struct query *q = data->query; 5021 /* static variable that holds reassembly buffer used to put the 5022 * TCP length in front of the packet, like writev. */ 5023 static buffer_type* global_tls_temp_buffer = NULL; 5024 buffer_type* write_buffer; 5025 uint32_t now = 0; 5026 5027 if ((event & EV_TIMEOUT)) { 5028 /* Connection timed out. */ 5029 cleanup_tcp_handler(data); 5030 return; 5031 } 5032 5033 assert((event & EV_WRITE)); 5034 5035 if(data->shake_state != tls_hs_none) { 5036 if(!tls_handshake(data, fd, 1)) 5037 return; 5038 if(data->shake_state != tls_hs_none) 5039 return; 5040 } 5041 5042 (void)SSL_set_mode(data->tls, SSL_MODE_ENABLE_PARTIAL_WRITE); 5043 5044 /* If we are writing the start of a message, we must include the length 5045 * this is done with a copy into write_buffer. */ 5046 write_buffer = NULL; 5047 if (data->bytes_transmitted == 0) { 5048 if(!global_tls_temp_buffer) { 5049 /* gets deallocated when nsd shuts down from 5050 * nsd.region */ 5051 global_tls_temp_buffer = buffer_create(nsd.region, 5052 QIOBUFSZ + sizeof(q->tcplen)); 5053 if (!global_tls_temp_buffer) { 5054 return; 5055 } 5056 } 5057 write_buffer = global_tls_temp_buffer; 5058 buffer_clear(write_buffer); 5059 buffer_write_u16(write_buffer, q->tcplen); 5060 buffer_write(write_buffer, buffer_current(q->packet), 5061 (int)buffer_remaining(q->packet)); 5062 buffer_flip(write_buffer); 5063 } else { 5064 write_buffer = q->packet; 5065 } 5066 5067 /* Write the response */ 5068 ERR_clear_error(); 5069 sent = SSL_write(data->tls, buffer_current(write_buffer), buffer_remaining(write_buffer)); 5070 if(sent <= 0) { 5071 int want = SSL_get_error(data->tls, sent); 5072 if(want == SSL_ERROR_ZERO_RETURN) { 5073 cleanup_tcp_handler(data); 5074 /* closed */ 5075 } else if(want == SSL_ERROR_WANT_READ) { 5076 /* switch back to reading */ 5077 data->shake_state = tls_hs_read_event; 5078 tcp_handler_setup_event(data, handle_tls_reading, fd, EV_PERSIST | EV_READ | EV_TIMEOUT); 5079 } else if(want != SSL_ERROR_WANT_WRITE) { 5080 cleanup_tcp_handler(data); 5081 log_crypto_err("could not SSL_write"); 5082 } 5083 return; 5084 } 5085 5086 buffer_skip(write_buffer, sent); 5087 if(buffer_remaining(write_buffer) != 0) { 5088 /* If not all sent, sync up the real buffer if it wasn't used.*/ 5089 if (data->bytes_transmitted == 0 && (ssize_t)sent > (ssize_t)sizeof(q->tcplen)) { 5090 buffer_skip(q->packet, (ssize_t)sent - (ssize_t)sizeof(q->tcplen)); 5091 } 5092 } 5093 5094 data->bytes_transmitted += sent; 5095 if (data->bytes_transmitted < q->tcplen + sizeof(q->tcplen)) { 5096 /* 5097 * Still more data to write when socket becomes 5098 * writable again. 5099 */ 5100 return; 5101 } 5102 5103 assert(data->bytes_transmitted == q->tcplen + sizeof(q->tcplen)); 5104 5105 if (data->query_state == QUERY_IN_AXFR || 5106 data->query_state == QUERY_IN_IXFR) { 5107 /* Continue processing AXFR and writing back results. */ 5108 buffer_clear(q->packet); 5109 if(data->query_state == QUERY_IN_AXFR) 5110 data->query_state = query_axfr(data->nsd, q, 0); 5111 else data->query_state = query_ixfr(data->nsd, q); 5112 if (data->query_state != QUERY_PROCESSED) { 5113 query_add_optional(data->query, data->nsd, &now); 5114 5115 /* Reset data. */ 5116 buffer_flip(q->packet); 5117 q->tcplen = buffer_remaining(q->packet); 5118 data->bytes_transmitted = 0; 5119 /* Reset to writing mode. */ 5120 tcp_handler_setup_event(data, handle_tls_writing, fd, EV_PERSIST | EV_WRITE | EV_TIMEOUT); 5121 5122 /* 5123 * Write data if/when the socket is writable 5124 * again. 5125 */ 5126 return; 5127 } 5128 } 5129 5130 /* 5131 * Done sending, wait for the next request to arrive on the 5132 * TCP socket by installing the TCP read handler. 5133 */ 5134 if ((data->nsd->tcp_query_count > 0 && 5135 data->query_count >= data->nsd->tcp_query_count) || 5136 data->tcp_no_more_queries) { 5137 5138 (void) shutdown(fd, SHUT_WR); 5139 } 5140 5141 data->bytes_transmitted = 0; 5142 data->query_needs_reset = 1; 5143 5144 tcp_handler_setup_event(data, handle_tls_reading, fd, EV_PERSIST | EV_READ | EV_TIMEOUT); 5145 } 5146 #endif 5147 5148 static void 5149 handle_slowaccept_timeout(int ATTR_UNUSED(fd), short ATTR_UNUSED(event), 5150 void* ATTR_UNUSED(arg)) 5151 { 5152 if(slowaccept) { 5153 configure_handler_event_types(EV_PERSIST | EV_READ); 5154 slowaccept = 0; 5155 } 5156 } 5157 5158 static int perform_accept(int fd, struct sockaddr *addr, socklen_t *addrlen) 5159 { 5160 #ifndef HAVE_ACCEPT4 5161 int s = accept(fd, addr, addrlen); 5162 if (s != -1) { 5163 if (fcntl(s, F_SETFL, O_NONBLOCK) == -1) { 5164 log_msg(LOG_ERR, "fcntl failed: %s", strerror(errno)); 5165 close(s); 5166 s = -1; 5167 errno=EINTR; /* stop error printout as error in accept4 5168 by setting this errno, it omits printout, in 5169 later code that calls nsd_accept4 */ 5170 } 5171 } 5172 return s; 5173 #else 5174 return accept4(fd, addr, addrlen, SOCK_NONBLOCK); 5175 #endif /* HAVE_ACCEPT4 */ 5176 } 5177 5178 /* 5179 * Handle an incoming TCP connection. The connection is accepted and 5180 * a new TCP reader event handler is added. The TCP handler 5181 * is responsible for cleanup when the connection is closed. 5182 */ 5183 static void 5184 handle_tcp_accept(int fd, short event, void* arg) 5185 { 5186 struct tcp_accept_handler_data *data 5187 = (struct tcp_accept_handler_data *) arg; 5188 int s; 5189 int reject = 0; 5190 struct tcp_handler_data *tcp_data; 5191 region_type *tcp_region; 5192 #ifdef INET6 5193 struct sockaddr_storage addr; 5194 #else 5195 struct sockaddr_in addr; 5196 #endif 5197 socklen_t addrlen; 5198 struct timeval timeout; 5199 5200 if (!(event & EV_READ)) { 5201 return; 5202 } 5203 5204 if (data->nsd->current_tcp_count >= data->nsd->maximum_tcp_count) { 5205 reject = data->nsd->options->tcp_reject_overflow; 5206 if (!reject) { 5207 return; 5208 } 5209 } 5210 5211 /* Accept it... */ 5212 addrlen = sizeof(addr); 5213 s = perform_accept(fd, (struct sockaddr *) &addr, &addrlen); 5214 if (s == -1) { 5215 /** 5216 * EMFILE and ENFILE is a signal that the limit of open 5217 * file descriptors has been reached. Pause accept(). 5218 * EINTR is a signal interrupt. The others are various OS ways 5219 * of saying that the client has closed the connection. 5220 */ 5221 if (errno == EMFILE || errno == ENFILE) { 5222 if (!slowaccept) { 5223 /* disable accept events */ 5224 struct timeval tv; 5225 configure_handler_event_types(0); 5226 tv.tv_sec = SLOW_ACCEPT_TIMEOUT; 5227 tv.tv_usec = 0L; 5228 memset(&slowaccept_event, 0, 5229 sizeof(slowaccept_event)); 5230 event_set(&slowaccept_event, -1, EV_TIMEOUT, 5231 handle_slowaccept_timeout, NULL); 5232 (void)event_base_set(data->event.ev_base, 5233 &slowaccept_event); 5234 (void)event_add(&slowaccept_event, &tv); 5235 slowaccept = 1; 5236 /* We don't want to spam the logs here */ 5237 } 5238 } else if (errno != EINTR 5239 && errno != EWOULDBLOCK 5240 #ifdef ECONNABORTED 5241 && errno != ECONNABORTED 5242 #endif /* ECONNABORTED */ 5243 #ifdef EPROTO 5244 && errno != EPROTO 5245 #endif /* EPROTO */ 5246 ) { 5247 log_msg(LOG_ERR, "accept failed: %s", strerror(errno)); 5248 } 5249 return; 5250 } 5251 5252 if (reject) { 5253 shutdown(s, SHUT_RDWR); 5254 close(s); 5255 return; 5256 } 5257 5258 /* 5259 * This region is deallocated when the TCP connection is 5260 * closed by the TCP handler. 5261 */ 5262 tcp_region = region_create(xalloc, free); 5263 tcp_data = (struct tcp_handler_data *) region_alloc( 5264 tcp_region, sizeof(struct tcp_handler_data)); 5265 tcp_data->region = tcp_region; 5266 tcp_data->query = query_create(tcp_region, compressed_dname_offsets, 5267 compression_table_size, compressed_dnames); 5268 tcp_data->nsd = data->nsd; 5269 tcp_data->query_count = 0; 5270 #ifdef HAVE_SSL 5271 tcp_data->shake_state = tls_hs_none; 5272 tcp_data->tls = NULL; 5273 #endif 5274 tcp_data->query_needs_reset = 1; 5275 tcp_data->pp2_enabled = data->pp2_enabled; 5276 tcp_data->pp2_header_state = pp2_header_none; 5277 tcp_data->prev = NULL; 5278 tcp_data->next = NULL; 5279 5280 tcp_data->query_state = QUERY_PROCESSED; 5281 tcp_data->bytes_transmitted = 0; 5282 memcpy(&tcp_data->query->remote_addr, &addr, addrlen); 5283 tcp_data->query->remote_addrlen = addrlen; 5284 /* Copy remote_address to client_address. 5285 * Simplest way/time for streams to do that. */ 5286 memcpy(&tcp_data->query->client_addr, &addr, addrlen); 5287 tcp_data->query->client_addrlen = addrlen; 5288 tcp_data->query->is_proxied = 0; 5289 5290 tcp_data->tcp_no_more_queries = 0; 5291 tcp_data->tcp_timeout = data->nsd->tcp_timeout * 1000; 5292 if (data->nsd->current_tcp_count > data->nsd->maximum_tcp_count/2) { 5293 /* very busy, give smaller timeout */ 5294 tcp_data->tcp_timeout = 200; 5295 } 5296 memset(&tcp_data->event, 0, sizeof(tcp_data->event)); 5297 timeout.tv_sec = tcp_data->tcp_timeout / 1000; 5298 timeout.tv_usec = (tcp_data->tcp_timeout % 1000)*1000; 5299 5300 #ifdef USE_DNSTAP 5301 /* save the address of the connection */ 5302 tcp_data->socket = data->socket; 5303 #endif /* USE_DNSTAP */ 5304 5305 #ifdef HAVE_SSL 5306 if (data->tls_accept) { 5307 tcp_data->tls = incoming_ssl_fd(tcp_data->nsd->tls_ctx, s); 5308 if(!tcp_data->tls) { 5309 close(s); 5310 return; 5311 } 5312 tcp_data->shake_state = tls_hs_read; 5313 memset(&tcp_data->event, 0, sizeof(tcp_data->event)); 5314 event_set(&tcp_data->event, s, EV_PERSIST | EV_READ | EV_TIMEOUT, 5315 handle_tls_reading, tcp_data); 5316 } else { 5317 #endif 5318 memset(&tcp_data->event, 0, sizeof(tcp_data->event)); 5319 event_set(&tcp_data->event, s, EV_PERSIST | EV_READ | EV_TIMEOUT, 5320 handle_tcp_reading, tcp_data); 5321 #ifdef HAVE_SSL 5322 } 5323 #endif 5324 if(event_base_set(data->event.ev_base, &tcp_data->event) != 0) { 5325 log_msg(LOG_ERR, "cannot set tcp event base"); 5326 close(s); 5327 region_destroy(tcp_region); 5328 return; 5329 } 5330 if(event_add(&tcp_data->event, &timeout) != 0) { 5331 log_msg(LOG_ERR, "cannot add tcp to event base"); 5332 close(s); 5333 region_destroy(tcp_region); 5334 return; 5335 } 5336 if(tcp_active_list) { 5337 tcp_active_list->prev = tcp_data; 5338 tcp_data->next = tcp_active_list; 5339 } 5340 tcp_active_list = tcp_data; 5341 5342 /* 5343 * Keep track of the total number of TCP handlers installed so 5344 * we can stop accepting connections when the maximum number 5345 * of simultaneous TCP connections is reached. 5346 * 5347 * If tcp-reject-overflow is enabled, however, then we do not 5348 * change the handler event type; we keep it as-is and accept 5349 * overflow TCP connections only so that we can forcibly kill 5350 * them off. 5351 */ 5352 ++data->nsd->current_tcp_count; 5353 if (!data->nsd->options->tcp_reject_overflow && 5354 data->nsd->current_tcp_count == data->nsd->maximum_tcp_count) 5355 { 5356 configure_handler_event_types(0); 5357 } 5358 } 5359 5360 static void 5361 send_children_command(struct nsd* nsd, sig_atomic_t command, int timeout) 5362 { 5363 size_t i; 5364 assert(nsd->server_kind == NSD_SERVER_MAIN && nsd->this_child == 0); 5365 for (i = 0; i < nsd->child_count; ++i) { 5366 if (nsd->children[i].pid > 0 && nsd->children[i].child_fd != -1) { 5367 if (write(nsd->children[i].child_fd, 5368 &command, 5369 sizeof(command)) == -1) 5370 { 5371 if(errno != EAGAIN && errno != EINTR) 5372 log_msg(LOG_ERR, "problems sending command %d to server %d: %s", 5373 (int) command, 5374 (int) nsd->children[i].pid, 5375 strerror(errno)); 5376 } else if (timeout > 0) { 5377 (void)block_read(NULL, 5378 nsd->children[i].child_fd, 5379 &command, sizeof(command), timeout); 5380 } 5381 fsync(nsd->children[i].child_fd); 5382 close(nsd->children[i].child_fd); 5383 nsd->children[i].child_fd = -1; 5384 } 5385 } 5386 } 5387 5388 static void 5389 send_children_quit(struct nsd* nsd) 5390 { 5391 DEBUG(DEBUG_IPC, 1, (LOG_INFO, "send children quit")); 5392 send_children_command(nsd, NSD_QUIT, 0); 5393 } 5394 5395 static void 5396 send_children_quit_and_wait(struct nsd* nsd) 5397 { 5398 DEBUG(DEBUG_IPC, 1, (LOG_INFO, "send children quit and wait")); 5399 send_children_command(nsd, NSD_QUIT_CHILD, 3); 5400 } 5401 5402 #ifdef BIND8_STATS 5403 static void 5404 set_children_stats(struct nsd* nsd) 5405 { 5406 size_t i; 5407 assert(nsd->server_kind == NSD_SERVER_MAIN && nsd->this_child == 0); 5408 DEBUG(DEBUG_IPC, 1, (LOG_INFO, "parent set stats to send to children")); 5409 for (i = 0; i < nsd->child_count; ++i) { 5410 nsd->children[i].need_to_send_STATS = 1; 5411 nsd->children[i].handler->event_types |= NETIO_EVENT_WRITE; 5412 } 5413 } 5414 #endif /* BIND8_STATS */ 5415 5416 static void 5417 configure_handler_event_types(short event_types) 5418 { 5419 size_t i; 5420 5421 for (i = 0; i < tcp_accept_handler_count; ++i) { 5422 struct event* handler = &tcp_accept_handlers[i].event; 5423 if(event_types) { 5424 /* reassign */ 5425 int fd = handler->ev_fd; 5426 struct event_base* base = handler->ev_base; 5427 if(tcp_accept_handlers[i].event_added) 5428 event_del(handler); 5429 memset(handler, 0, sizeof(*handler)); 5430 event_set(handler, fd, event_types, 5431 handle_tcp_accept, &tcp_accept_handlers[i]); 5432 if(event_base_set(base, handler) != 0) 5433 log_msg(LOG_ERR, "conhand: cannot event_base"); 5434 if(event_add(handler, NULL) != 0) 5435 log_msg(LOG_ERR, "conhand: cannot event_add"); 5436 tcp_accept_handlers[i].event_added = 1; 5437 } else { 5438 /* remove */ 5439 if(tcp_accept_handlers[i].event_added) { 5440 event_del(handler); 5441 tcp_accept_handlers[i].event_added = 0; 5442 } 5443 } 5444 } 5445 } 5446