1 /* 2 * server.c -- nsd(8) network input/output 3 * 4 * Copyright (c) 2001-2011, NLnet Labs. All rights reserved. 5 * 6 * See LICENSE for the license. 7 * 8 */ 9 10 #include <config.h> 11 12 #include <sys/types.h> 13 #include <sys/param.h> 14 #include <sys/socket.h> 15 #include <sys/wait.h> 16 17 #include <netinet/in.h> 18 #include <arpa/inet.h> 19 20 #include <assert.h> 21 #include <ctype.h> 22 #include <errno.h> 23 #include <fcntl.h> 24 #include <stddef.h> 25 #include <stdio.h> 26 #include <stdlib.h> 27 #include <string.h> 28 #include <time.h> 29 #include <unistd.h> 30 #include <fcntl.h> 31 #include <netdb.h> 32 #ifndef SHUT_WR 33 #define SHUT_WR 1 34 #endif 35 36 #include "axfr.h" 37 #include "namedb.h" 38 #include "netio.h" 39 #include "xfrd.h" 40 #include "xfrd-tcp.h" 41 #include "difffile.h" 42 #include "nsec3.h" 43 #include "ipc.h" 44 45 /* 46 * Data for the UDP handlers. 47 */ 48 struct udp_handler_data 49 { 50 struct nsd *nsd; 51 struct nsd_socket *socket; 52 query_type *query; 53 }; 54 55 /* 56 * Data for the TCP accept handlers. Most data is simply passed along 57 * to the TCP connection handler. 58 */ 59 struct tcp_accept_handler_data { 60 struct nsd *nsd; 61 struct nsd_socket *socket; 62 size_t tcp_accept_handler_count; 63 netio_handler_type *tcp_accept_handlers; 64 }; 65 66 /* 67 * Data for the TCP connection handlers. 68 * 69 * The TCP handlers use non-blocking I/O. This is necessary to avoid 70 * blocking the entire server on a slow TCP connection, but does make 71 * reading from and writing to the socket more complicated. 72 * 73 * Basically, whenever a read/write would block (indicated by the 74 * EAGAIN errno variable) we remember the position we were reading 75 * from/writing to and return from the TCP reading/writing event 76 * handler. When the socket becomes readable/writable again we 77 * continue from the same position. 78 */ 79 struct tcp_handler_data 80 { 81 /* 82 * The region used to allocate all TCP connection related 83 * data, including this structure. This region is destroyed 84 * when the connection is closed. 85 */ 86 region_type* region; 87 88 /* 89 * The global nsd structure. 90 */ 91 struct nsd* nsd; 92 93 /* 94 * The current query data for this TCP connection. 95 */ 96 query_type* query; 97 98 /* 99 * These fields are used to enable the TCP accept handlers 100 * when the number of TCP connection drops below the maximum 101 * number of TCP connections. 102 */ 103 size_t tcp_accept_handler_count; 104 netio_handler_type* tcp_accept_handlers; 105 106 /* 107 * The query_state is used to remember if we are performing an 108 * AXFR, if we're done processing, or if we should discard the 109 * query and connection. 110 */ 111 query_state_type query_state; 112 113 /* 114 * The bytes_transmitted field is used to remember the number 115 * of bytes transmitted when receiving or sending a DNS 116 * packet. The count includes the two additional bytes used 117 * to specify the packet length on a TCP connection. 118 */ 119 size_t bytes_transmitted; 120 121 /* 122 * The number of queries handled by this specific TCP connection. 123 */ 124 int query_count; 125 }; 126 127 /* 128 * Handle incoming queries on the UDP server sockets. 129 */ 130 static void handle_udp(netio_type *netio, 131 netio_handler_type *handler, 132 netio_event_types_type event_types); 133 134 /* 135 * Handle incoming connections on the TCP sockets. These handlers 136 * usually wait for the NETIO_EVENT_READ event (indicating an incoming 137 * connection) but are disabled when the number of current TCP 138 * connections is equal to the maximum number of TCP connections. 139 * Disabling is done by changing the handler to wait for the 140 * NETIO_EVENT_NONE type. This is done using the function 141 * configure_tcp_accept_handlers. 142 */ 143 static void handle_tcp_accept(netio_type *netio, 144 netio_handler_type *handler, 145 netio_event_types_type event_types); 146 147 /* 148 * Handle incoming queries on a TCP connection. The TCP connections 149 * are configured to be non-blocking and the handler may be called 150 * multiple times before a complete query is received. 151 */ 152 static void handle_tcp_reading(netio_type *netio, 153 netio_handler_type *handler, 154 netio_event_types_type event_types); 155 156 /* 157 * Handle outgoing responses on a TCP connection. The TCP connections 158 * are configured to be non-blocking and the handler may be called 159 * multiple times before a complete response is sent. 160 */ 161 static void handle_tcp_writing(netio_type *netio, 162 netio_handler_type *handler, 163 netio_event_types_type event_types); 164 165 /* 166 * Send all children the quit nonblocking, then close pipe. 167 */ 168 static void send_children_quit(struct nsd* nsd); 169 170 /* set childrens flags to send NSD_STATS to them */ 171 #ifdef BIND8_STATS 172 static void set_children_stats(struct nsd* nsd); 173 #endif /* BIND8_STATS */ 174 175 /* 176 * Change the event types the HANDLERS are interested in to 177 * EVENT_TYPES. 178 */ 179 static void configure_handler_event_types(size_t count, 180 netio_handler_type *handlers, 181 netio_event_types_type event_types); 182 183 /* 184 * start xfrdaemon (again). 185 */ 186 static pid_t 187 server_start_xfrd(struct nsd *nsd, netio_handler_type* handler); 188 189 static uint16_t *compressed_dname_offsets = 0; 190 static uint32_t compression_table_capacity = 0; 191 static uint32_t compression_table_size = 0; 192 193 /* 194 * Remove the specified pid from the list of child pids. Returns -1 if 195 * the pid is not in the list, child_num otherwise. The field is set to 0. 196 */ 197 static int 198 delete_child_pid(struct nsd *nsd, pid_t pid) 199 { 200 size_t i; 201 for (i = 0; i < nsd->child_count; ++i) { 202 if (nsd->children[i].pid == pid) { 203 nsd->children[i].pid = 0; 204 if(!nsd->children[i].need_to_exit) { 205 if(nsd->children[i].child_fd > 0) 206 close(nsd->children[i].child_fd); 207 nsd->children[i].child_fd = -1; 208 if(nsd->children[i].handler) 209 nsd->children[i].handler->fd = -1; 210 } 211 return i; 212 } 213 } 214 return -1; 215 } 216 217 /* 218 * Restart child servers if necessary. 219 */ 220 static int 221 restart_child_servers(struct nsd *nsd, region_type* region, netio_type* netio, 222 int* xfrd_sock_p) 223 { 224 struct main_ipc_handler_data *ipc_data; 225 size_t i; 226 int sv[2]; 227 228 /* Fork the child processes... */ 229 for (i = 0; i < nsd->child_count; ++i) { 230 if (nsd->children[i].pid <= 0) { 231 if (nsd->children[i].child_fd > 0) 232 close(nsd->children[i].child_fd); 233 if (socketpair(AF_UNIX, SOCK_STREAM, 0, sv) == -1) { 234 log_msg(LOG_ERR, "socketpair: %s", 235 strerror(errno)); 236 return -1; 237 } 238 nsd->children[i].child_fd = sv[0]; 239 nsd->children[i].parent_fd = sv[1]; 240 nsd->children[i].pid = fork(); 241 switch (nsd->children[i].pid) { 242 default: /* SERVER MAIN */ 243 close(nsd->children[i].parent_fd); 244 nsd->children[i].parent_fd = -1; 245 if(!nsd->children[i].handler) 246 { 247 ipc_data = (struct main_ipc_handler_data*) region_alloc( 248 region, sizeof(struct main_ipc_handler_data)); 249 ipc_data->nsd = nsd; 250 ipc_data->child = &nsd->children[i]; 251 ipc_data->child_num = i; 252 ipc_data->xfrd_sock = xfrd_sock_p; 253 ipc_data->packet = buffer_create(region, QIOBUFSZ); 254 ipc_data->forward_mode = 0; 255 ipc_data->got_bytes = 0; 256 ipc_data->total_bytes = 0; 257 ipc_data->acl_num = 0; 258 ipc_data->busy_writing_zone_state = 0; 259 ipc_data->write_conn = xfrd_tcp_create(region); 260 nsd->children[i].handler = (struct netio_handler*) region_alloc( 261 region, sizeof(struct netio_handler)); 262 nsd->children[i].handler->fd = nsd->children[i].child_fd; 263 nsd->children[i].handler->timeout = NULL; 264 nsd->children[i].handler->user_data = ipc_data; 265 nsd->children[i].handler->event_types = NETIO_EVENT_READ; 266 nsd->children[i].handler->event_handler = parent_handle_child_command; 267 netio_add_handler(netio, nsd->children[i].handler); 268 } 269 /* clear any ongoing ipc */ 270 ipc_data = (struct main_ipc_handler_data*) 271 nsd->children[i].handler->user_data; 272 ipc_data->forward_mode = 0; 273 ipc_data->busy_writing_zone_state = 0; 274 /* restart - update fd */ 275 nsd->children[i].handler->fd = nsd->children[i].child_fd; 276 break; 277 case 0: /* CHILD */ 278 nsd->pid = 0; 279 nsd->child_count = 0; 280 nsd->server_kind = nsd->children[i].kind; 281 nsd->this_child = &nsd->children[i]; 282 /* remove signal flags inherited from parent 283 the parent will handle them. */ 284 nsd->signal_hint_reload = 0; 285 nsd->signal_hint_child = 0; 286 nsd->signal_hint_quit = 0; 287 nsd->signal_hint_shutdown = 0; 288 nsd->signal_hint_stats = 0; 289 nsd->signal_hint_statsusr = 0; 290 close(nsd->this_child->child_fd); 291 nsd->this_child->child_fd = -1; 292 server_child(nsd); 293 /* NOTREACH */ 294 exit(0); 295 case -1: 296 log_msg(LOG_ERR, "fork failed: %s", 297 strerror(errno)); 298 return -1; 299 } 300 } 301 } 302 return 0; 303 } 304 305 #ifdef BIND8_STATS 306 static void set_bind8_alarm(struct nsd* nsd) 307 { 308 /* resync so that the next alarm is on the next whole minute */ 309 if(nsd->st.period > 0) /* % by 0 gives divbyzero error */ 310 alarm(nsd->st.period - (time(NULL) % nsd->st.period)); 311 } 312 #endif 313 314 static void 315 cleanup_dname_compression_tables(void *ptr) 316 { 317 free(ptr); 318 compressed_dname_offsets = NULL; 319 compression_table_capacity = 0; 320 } 321 322 static void 323 initialize_dname_compression_tables(struct nsd *nsd) 324 { 325 size_t needed = domain_table_count(nsd->db->domains) + 1; 326 needed += EXTRA_DOMAIN_NUMBERS; 327 if(compression_table_capacity < needed) { 328 if(compressed_dname_offsets) { 329 region_remove_cleanup(nsd->db->region, 330 cleanup_dname_compression_tables, 331 compressed_dname_offsets); 332 free(compressed_dname_offsets); 333 } 334 compressed_dname_offsets = (uint16_t *) xalloc( 335 needed * sizeof(uint16_t)); 336 region_add_cleanup(nsd->db->region, cleanup_dname_compression_tables, 337 compressed_dname_offsets); 338 compression_table_capacity = needed; 339 compression_table_size=domain_table_count(nsd->db->domains)+1; 340 } 341 memset(compressed_dname_offsets, 0, needed * sizeof(uint16_t)); 342 compressed_dname_offsets[0] = QHEADERSZ; /* The original query name */ 343 } 344 345 /* 346 * Initialize the server, create and bind the sockets. 347 * 348 */ 349 int 350 server_init(struct nsd *nsd) 351 { 352 size_t i; 353 #if defined(SO_REUSEADDR) || (defined(INET6) && (defined(IPV6_V6ONLY) || defined(IPV6_USE_MIN_MTU) || defined(IPV6_MTU))) 354 int on = 1; 355 #endif 356 357 /* UDP */ 358 359 /* Make a socket... */ 360 for (i = 0; i < nsd->ifs; i++) { 361 if (!nsd->udp[i].addr) { 362 nsd->udp[i].s = -1; 363 continue; 364 } 365 if ((nsd->udp[i].s = socket(nsd->udp[i].addr->ai_family, nsd->udp[i].addr->ai_socktype, 0)) == -1) { 366 #if defined(INET6) 367 if (nsd->udp[i].addr->ai_family == AF_INET6 && 368 errno == EAFNOSUPPORT && nsd->grab_ip6_optional) { 369 log_msg(LOG_WARNING, "fallback to UDP4, no IPv6: not supported"); 370 continue; 371 } 372 #endif /* INET6 */ 373 log_msg(LOG_ERR, "can't create a socket: %s", strerror(errno)); 374 return -1; 375 } 376 377 #if defined(INET6) 378 if (nsd->udp[i].addr->ai_family == AF_INET6) { 379 # if defined(IPV6_V6ONLY) 380 if (setsockopt(nsd->udp[i].s, 381 IPPROTO_IPV6, IPV6_V6ONLY, 382 &on, sizeof(on)) < 0) 383 { 384 log_msg(LOG_ERR, "setsockopt(..., IPV6_V6ONLY, ...) failed: %s", 385 strerror(errno)); 386 return -1; 387 } 388 # endif 389 # if defined(IPV6_USE_MIN_MTU) 390 /* 391 * There is no fragmentation of IPv6 datagrams 392 * during forwarding in the network. Therefore 393 * we do not send UDP datagrams larger than 394 * the minimum IPv6 MTU of 1280 octets. The 395 * EDNS0 message length can be larger if the 396 * network stack supports IPV6_USE_MIN_MTU. 397 */ 398 if (setsockopt(nsd->udp[i].s, 399 IPPROTO_IPV6, IPV6_USE_MIN_MTU, 400 &on, sizeof(on)) < 0) 401 { 402 log_msg(LOG_ERR, "setsockopt(..., IPV6_USE_MIN_MTU, ...) failed: %s", 403 strerror(errno)); 404 return -1; 405 } 406 # elif defined(IPV6_MTU) 407 /* 408 * On Linux, PMTUD is disabled by default for datagrams 409 * so set the MTU equal to the MIN MTU to get the same. 410 */ 411 on = IPV6_MIN_MTU; 412 if (setsockopt(nsd->udp[i].s, IPPROTO_IPV6, IPV6_MTU, 413 &on, sizeof(on)) < 0) 414 { 415 log_msg(LOG_ERR, "setsockopt(..., IPV6_MTU, ...) failed: %s", 416 strerror(errno)); 417 return -1; 418 } 419 on = 1; 420 # endif 421 } 422 #endif 423 #if defined(AF_INET) 424 if (nsd->udp[i].addr->ai_family == AF_INET) { 425 # if defined(IP_MTU_DISCOVER) && defined(IP_PMTUDISC_DONT) 426 int action = IP_PMTUDISC_DONT; 427 if (setsockopt(nsd->udp[i].s, IPPROTO_IP, 428 IP_MTU_DISCOVER, &action, sizeof(action)) < 0) 429 { 430 log_msg(LOG_ERR, "setsockopt(..., IP_MTU_DISCOVER, IP_PMTUDISC_DONT...) failed: %s", 431 strerror(errno)); 432 return -1; 433 } 434 # elif defined(IP_DONTFRAG) 435 int off = 0; 436 if (setsockopt(nsd->udp[i].s, IPPROTO_IP, IP_DONTFRAG, 437 &off, sizeof(off)) < 0) 438 { 439 log_msg(LOG_ERR, "setsockopt(..., IP_DONTFRAG, ...) failed: %s", 440 strerror(errno)); 441 return -1; 442 } 443 # endif 444 } 445 #endif 446 /* set it nonblocking */ 447 /* otherwise, on OSes with thundering herd problems, the 448 UDP recv could block NSD after select returns readable. */ 449 if (fcntl(nsd->udp[i].s, F_SETFL, O_NONBLOCK) == -1) { 450 log_msg(LOG_ERR, "cannot fcntl udp: %s", strerror(errno)); 451 } 452 453 /* Bind it... */ 454 if (bind(nsd->udp[i].s, (struct sockaddr *) nsd->udp[i].addr->ai_addr, nsd->udp[i].addr->ai_addrlen) != 0) { 455 log_msg(LOG_ERR, "can't bind udp socket: %s", strerror(errno)); 456 return -1; 457 } 458 } 459 460 /* TCP */ 461 462 /* Make a socket... */ 463 for (i = 0; i < nsd->ifs; i++) { 464 if (!nsd->tcp[i].addr) { 465 nsd->tcp[i].s = -1; 466 continue; 467 } 468 if ((nsd->tcp[i].s = socket(nsd->tcp[i].addr->ai_family, nsd->tcp[i].addr->ai_socktype, 0)) == -1) { 469 #if defined(INET6) 470 if (nsd->tcp[i].addr->ai_family == AF_INET6 && 471 errno == EAFNOSUPPORT && nsd->grab_ip6_optional) { 472 log_msg(LOG_WARNING, "fallback to TCP4, no IPv6: not supported"); 473 continue; 474 } 475 #endif /* INET6 */ 476 log_msg(LOG_ERR, "can't create a socket: %s", strerror(errno)); 477 return -1; 478 } 479 480 #ifdef SO_REUSEADDR 481 if (setsockopt(nsd->tcp[i].s, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on)) < 0) { 482 log_msg(LOG_ERR, "setsockopt(..., SO_REUSEADDR, ...) failed: %s", strerror(errno)); 483 } 484 #endif /* SO_REUSEADDR */ 485 486 #if defined(INET6) && defined(IPV6_V6ONLY) 487 if (nsd->tcp[i].addr->ai_family == AF_INET6 && 488 setsockopt(nsd->tcp[i].s, IPPROTO_IPV6, IPV6_V6ONLY, &on, sizeof(on)) < 0) 489 { 490 log_msg(LOG_ERR, "setsockopt(..., IPV6_V6ONLY, ...) failed: %s", strerror(errno)); 491 return -1; 492 } 493 #endif 494 /* set it nonblocking */ 495 /* (StevensUNP p463), if tcp listening socket is blocking, then 496 it may block in accept, even if select() says readable. */ 497 if (fcntl(nsd->tcp[i].s, F_SETFL, O_NONBLOCK) == -1) { 498 log_msg(LOG_ERR, "cannot fcntl tcp: %s", strerror(errno)); 499 } 500 501 /* Bind it... */ 502 if (bind(nsd->tcp[i].s, (struct sockaddr *) nsd->tcp[i].addr->ai_addr, nsd->tcp[i].addr->ai_addrlen) != 0) { 503 log_msg(LOG_ERR, "can't bind tcp socket: %s", strerror(errno)); 504 return -1; 505 } 506 507 /* Listen to it... */ 508 if (listen(nsd->tcp[i].s, TCP_BACKLOG) == -1) { 509 log_msg(LOG_ERR, "can't listen: %s", strerror(errno)); 510 return -1; 511 } 512 } 513 514 return 0; 515 } 516 517 /* 518 * Prepare the server for take off. 519 * 520 */ 521 int 522 server_prepare(struct nsd *nsd) 523 { 524 /* Open the database... */ 525 if ((nsd->db = namedb_open(nsd->dbfile, nsd->options, nsd->child_count)) == NULL) { 526 log_msg(LOG_ERR, "unable to open the database %s: %s", 527 nsd->dbfile, strerror(errno)); 528 return -1; 529 } 530 531 /* Read diff file */ 532 if(!diff_read_file(nsd->db, nsd->options, NULL, nsd->child_count)) { 533 log_msg(LOG_ERR, "The diff file contains errors. Will continue " 534 "without it"); 535 } 536 537 #ifdef NSEC3 538 prehash(nsd->db, 0); 539 #endif 540 541 compression_table_capacity = 0; 542 initialize_dname_compression_tables(nsd); 543 544 #ifdef BIND8_STATS 545 /* Initialize times... */ 546 time(&nsd->st.boot); 547 set_bind8_alarm(nsd); 548 #endif /* BIND8_STATS */ 549 550 return 0; 551 } 552 553 /* 554 * Fork the required number of servers. 555 */ 556 static int 557 server_start_children(struct nsd *nsd, region_type* region, netio_type* netio, 558 int* xfrd_sock_p) 559 { 560 size_t i; 561 562 /* Start all child servers initially. */ 563 for (i = 0; i < nsd->child_count; ++i) { 564 nsd->children[i].pid = 0; 565 } 566 567 return restart_child_servers(nsd, region, netio, xfrd_sock_p); 568 } 569 570 static void 571 close_all_sockets(struct nsd_socket sockets[], size_t n) 572 { 573 size_t i; 574 575 /* Close all the sockets... */ 576 for (i = 0; i < n; ++i) { 577 if (sockets[i].s != -1) { 578 close(sockets[i].s); 579 freeaddrinfo(sockets[i].addr); 580 sockets[i].s = -1; 581 } 582 } 583 } 584 585 /* 586 * Close the sockets, shutdown the server and exit. 587 * Does not return. 588 * 589 */ 590 static void 591 server_shutdown(struct nsd *nsd) 592 { 593 size_t i; 594 595 close_all_sockets(nsd->udp, nsd->ifs); 596 close_all_sockets(nsd->tcp, nsd->ifs); 597 /* CHILD: close command channel to parent */ 598 if(nsd->this_child && nsd->this_child->parent_fd > 0) 599 { 600 close(nsd->this_child->parent_fd); 601 nsd->this_child->parent_fd = -1; 602 } 603 /* SERVER: close command channels to children */ 604 if(!nsd->this_child) 605 { 606 for(i=0; i < nsd->child_count; ++i) 607 if(nsd->children[i].child_fd > 0) 608 { 609 close(nsd->children[i].child_fd); 610 nsd->children[i].child_fd = -1; 611 } 612 } 613 614 log_finalize(); 615 tsig_finalize(); 616 617 nsd_options_destroy(nsd->options); 618 region_destroy(nsd->region); 619 620 exit(0); 621 } 622 623 static pid_t 624 server_start_xfrd(struct nsd *nsd, netio_handler_type* handler) 625 { 626 pid_t pid; 627 int sockets[2] = {0,0}; 628 zone_type* zone; 629 struct ipc_handler_conn_data *data; 630 /* no need to send updates for zones, because xfrd will read from fork-memory */ 631 for(zone = nsd->db->zones; zone; zone=zone->next) { 632 zone->updated = 0; 633 } 634 635 if(handler->fd != -1) 636 close(handler->fd); 637 if (socketpair(AF_UNIX, SOCK_STREAM, 0, sockets) == -1) { 638 log_msg(LOG_ERR, "startxfrd failed on socketpair: %s", strerror(errno)); 639 return -1; 640 } 641 pid = fork(); 642 switch (pid) { 643 case -1: 644 log_msg(LOG_ERR, "fork xfrd failed: %s", strerror(errno)); 645 break; 646 case 0: 647 /* CHILD: close first socket, use second one */ 648 close(sockets[0]); 649 xfrd_init(sockets[1], nsd); 650 /* ENOTREACH */ 651 break; 652 default: 653 /* PARENT: close second socket, use first one */ 654 close(sockets[1]); 655 handler->fd = sockets[0]; 656 break; 657 } 658 /* PARENT only */ 659 handler->timeout = NULL; 660 handler->event_types = NETIO_EVENT_READ; 661 handler->event_handler = parent_handle_xfrd_command; 662 /* clear ongoing ipc reads */ 663 data = (struct ipc_handler_conn_data *) handler->user_data; 664 data->conn->is_reading = 0; 665 return pid; 666 } 667 668 /* pass timeout=-1 for blocking. Returns size, 0, -1(err), or -2(timeout) */ 669 static ssize_t 670 block_read(struct nsd* nsd, int s, void* p, ssize_t sz, int timeout) 671 { 672 uint8_t* buf = (uint8_t*) p; 673 ssize_t total = 0; 674 fd_set rfds; 675 struct timeval tv; 676 FD_ZERO(&rfds); 677 678 while( total < sz) { 679 ssize_t ret; 680 FD_SET(s, &rfds); 681 tv.tv_sec = timeout; 682 tv.tv_usec = 0; 683 ret = select(s+1, &rfds, NULL, NULL, timeout==-1?NULL:&tv); 684 if(ret == -1) { 685 if(errno == EAGAIN) 686 /* blocking read */ 687 continue; 688 if(errno == EINTR) { 689 if(nsd->signal_hint_quit || nsd->signal_hint_shutdown) 690 return -1; 691 /* other signals can be handled later */ 692 continue; 693 } 694 /* some error */ 695 return -1; 696 } 697 if(ret == 0) { 698 /* operation timed out */ 699 return -2; 700 } 701 ret = read(s, buf+total, sz-total); 702 if(ret == -1) { 703 if(errno == EAGAIN) 704 /* blocking read */ 705 continue; 706 if(errno == EINTR) { 707 if(nsd->signal_hint_quit || nsd->signal_hint_shutdown) 708 return -1; 709 /* other signals can be handled later */ 710 continue; 711 } 712 /* some error */ 713 return -1; 714 } 715 if(ret == 0) { 716 /* closed connection! */ 717 return 0; 718 } 719 total += ret; 720 } 721 return total; 722 } 723 724 /* 725 * Reload the database, stop parent, re-fork children and continue. 726 * as server_main. 727 */ 728 static void 729 server_reload(struct nsd *nsd, region_type* server_region, netio_type* netio, 730 int cmdsocket, int* xfrd_sock_p) 731 { 732 pid_t old_pid; 733 sig_atomic_t cmd = NSD_QUIT_SYNC; 734 zone_type* zone; 735 int xfrd_sock = *xfrd_sock_p; 736 int ret; 737 738 if(db_crc_different(nsd->db) == 0) { 739 DEBUG(DEBUG_XFRD,1, (LOG_INFO, 740 "CRC the same. skipping %s.", nsd->db->filename)); 741 } else { 742 DEBUG(DEBUG_XFRD,1, (LOG_INFO, 743 "CRC different. reread of %s.", nsd->db->filename)); 744 namedb_close(nsd->db); 745 if ((nsd->db = namedb_open(nsd->dbfile, nsd->options, 746 nsd->child_count)) == NULL) { 747 log_msg(LOG_ERR, "unable to reload the database: %s", strerror(errno)); 748 exit(1); 749 } 750 } 751 if(!diff_read_file(nsd->db, nsd->options, NULL, nsd->child_count)) { 752 log_msg(LOG_ERR, "unable to load the diff file: %s", nsd->options->difffile); 753 exit(1); 754 } 755 log_msg(LOG_INFO, "memory recyclebin holds %lu bytes", (unsigned long) 756 region_get_recycle_size(nsd->db->region)); 757 #ifndef NDEBUG 758 if(nsd_debug_level >= 1) 759 region_log_stats(nsd->db->region); 760 #endif /* NDEBUG */ 761 #ifdef NSEC3 762 prehash(nsd->db, 1); 763 #endif /* NSEC3 */ 764 765 initialize_dname_compression_tables(nsd); 766 767 /* Get our new process id */ 768 old_pid = nsd->pid; 769 nsd->pid = getpid(); 770 771 #ifdef BIND8_STATS 772 /* Restart dumping stats if required. */ 773 time(&nsd->st.boot); 774 set_bind8_alarm(nsd); 775 #endif 776 777 /* Start new child processes */ 778 if (server_start_children(nsd, server_region, netio, xfrd_sock_p) != 0) { 779 send_children_quit(nsd); 780 exit(1); 781 } 782 783 /* if the parent has quit, we must quit too, poll the fd for cmds */ 784 if(block_read(nsd, cmdsocket, &cmd, sizeof(cmd), 0) == sizeof(cmd)) { 785 DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: ipc command from main %d", cmd)); 786 if(cmd == NSD_QUIT) { 787 DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: quit to follow nsd")); 788 send_children_quit(nsd); 789 exit(0); 790 } 791 } 792 793 /* Overwrite pid before closing old parent, to avoid race condition: 794 * - parent process already closed 795 * - pidfile still contains old_pid 796 * - control script contacts parent process, using contents of pidfile 797 */ 798 if (writepid(nsd) == -1) { 799 log_msg(LOG_ERR, "cannot overwrite the pidfile %s: %s", nsd->pidfile, strerror(errno)); 800 } 801 802 #define RELOAD_SYNC_TIMEOUT 25 /* seconds */ 803 /* Send quit command to parent: blocking, wait for receipt. */ 804 do { 805 DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: ipc send quit to main")); 806 if (write_socket(cmdsocket, &cmd, sizeof(cmd)) == -1) 807 { 808 log_msg(LOG_ERR, "problems sending command from reload %d to oldnsd %d: %s", 809 (int)nsd->pid, (int)old_pid, strerror(errno)); 810 } 811 /* blocking: wait for parent to really quit. (it sends RELOAD as ack) */ 812 DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: ipc wait for ack main")); 813 ret = block_read(nsd, cmdsocket, &cmd, sizeof(cmd), 814 RELOAD_SYNC_TIMEOUT); 815 if(ret == -2) { 816 DEBUG(DEBUG_IPC, 1, (LOG_ERR, "reload timeout QUITSYNC. retry")); 817 } 818 } while (ret == -2); 819 if(ret == -1) { 820 log_msg(LOG_ERR, "reload: could not wait for parent to quit: %s", 821 strerror(errno)); 822 } 823 DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: ipc reply main %d %d", ret, cmd)); 824 if(cmd == NSD_QUIT) { 825 /* small race condition possible here, parent got quit cmd. */ 826 send_children_quit(nsd); 827 unlinkpid(nsd->pidfile); 828 exit(1); 829 } 830 assert(ret==-1 || ret == 0 || cmd == NSD_RELOAD); 831 832 /* inform xfrd of new SOAs */ 833 cmd = NSD_SOA_BEGIN; 834 if(!write_socket(xfrd_sock, &cmd, sizeof(cmd))) { 835 log_msg(LOG_ERR, "problems sending soa begin from reload %d to xfrd: %s", 836 (int)nsd->pid, strerror(errno)); 837 } 838 for(zone= nsd->db->zones; zone; zone = zone->next) { 839 uint16_t sz; 840 const dname_type *dname_ns=0, *dname_em=0; 841 if(zone->updated == 0) 842 continue; 843 DEBUG(DEBUG_IPC,1, (LOG_INFO, "nsd: sending soa info for zone %s", 844 dname_to_string(domain_dname(zone->apex),0))); 845 cmd = NSD_SOA_INFO; 846 sz = dname_total_size(domain_dname(zone->apex)); 847 if(zone->soa_rrset) { 848 dname_ns = domain_dname( 849 rdata_atom_domain(zone->soa_rrset->rrs[0].rdatas[0])); 850 dname_em = domain_dname( 851 rdata_atom_domain(zone->soa_rrset->rrs[0].rdatas[1])); 852 sz += sizeof(uint32_t)*6 + sizeof(uint8_t)*2 853 + dname_ns->name_size + dname_em->name_size; 854 } 855 sz = htons(sz); 856 /* use blocking writes */ 857 if(!write_socket(xfrd_sock, &cmd, sizeof(cmd)) || 858 !write_socket(xfrd_sock, &sz, sizeof(sz)) || 859 !write_socket(xfrd_sock, domain_dname(zone->apex), 860 dname_total_size(domain_dname(zone->apex)))) 861 { 862 log_msg(LOG_ERR, "problems sending soa info from reload %d to xfrd: %s", 863 (int)nsd->pid, strerror(errno)); 864 } 865 if(zone->soa_rrset) { 866 uint32_t ttl = htonl(zone->soa_rrset->rrs[0].ttl); 867 assert(dname_ns && dname_em); 868 assert(zone->soa_rrset->rr_count > 0); 869 assert(rrset_rrtype(zone->soa_rrset) == TYPE_SOA); 870 assert(zone->soa_rrset->rrs[0].rdata_count == 7); 871 if(!write_socket(xfrd_sock, &ttl, sizeof(uint32_t)) 872 || !write_socket(xfrd_sock, &dname_ns->name_size, sizeof(uint8_t)) 873 || !write_socket(xfrd_sock, dname_name(dname_ns), dname_ns->name_size) 874 || !write_socket(xfrd_sock, &dname_em->name_size, sizeof(uint8_t)) 875 || !write_socket(xfrd_sock, dname_name(dname_em), dname_em->name_size) 876 || !write_socket(xfrd_sock, rdata_atom_data( 877 zone->soa_rrset->rrs[0].rdatas[2]), sizeof(uint32_t)) 878 || !write_socket(xfrd_sock, rdata_atom_data( 879 zone->soa_rrset->rrs[0].rdatas[3]), sizeof(uint32_t)) 880 || !write_socket(xfrd_sock, rdata_atom_data( 881 zone->soa_rrset->rrs[0].rdatas[4]), sizeof(uint32_t)) 882 || !write_socket(xfrd_sock, rdata_atom_data( 883 zone->soa_rrset->rrs[0].rdatas[5]), sizeof(uint32_t)) 884 || !write_socket(xfrd_sock, rdata_atom_data( 885 zone->soa_rrset->rrs[0].rdatas[6]), sizeof(uint32_t))) 886 { 887 log_msg(LOG_ERR, "problems sending soa info from reload %d to xfrd: %s", 888 (int)nsd->pid, strerror(errno)); 889 } 890 } 891 zone->updated = 0; 892 } 893 cmd = NSD_SOA_END; 894 if(!write_socket(xfrd_sock, &cmd, sizeof(cmd))) { 895 log_msg(LOG_ERR, "problems sending soa end from reload %d to xfrd: %s", 896 (int)nsd->pid, strerror(errno)); 897 } 898 899 /* try to reopen file */ 900 if (nsd->file_rotation_ok) 901 log_reopen(nsd->log_filename, 1); 902 /* exit reload, continue as new server_main */ 903 } 904 905 /* 906 * Get the mode depending on the signal hints that have been received. 907 * Multiple signal hints can be received and will be handled in turn. 908 */ 909 static sig_atomic_t 910 server_signal_mode(struct nsd *nsd) 911 { 912 if(nsd->signal_hint_quit) { 913 nsd->signal_hint_quit = 0; 914 return NSD_QUIT; 915 } 916 else if(nsd->signal_hint_shutdown) { 917 nsd->signal_hint_shutdown = 0; 918 return NSD_SHUTDOWN; 919 } 920 else if(nsd->signal_hint_child) { 921 nsd->signal_hint_child = 0; 922 return NSD_REAP_CHILDREN; 923 } 924 else if(nsd->signal_hint_reload) { 925 nsd->signal_hint_reload = 0; 926 return NSD_RELOAD; 927 } 928 else if(nsd->signal_hint_stats) { 929 nsd->signal_hint_stats = 0; 930 #ifdef BIND8_STATS 931 set_bind8_alarm(nsd); 932 #endif 933 return NSD_STATS; 934 } 935 else if(nsd->signal_hint_statsusr) { 936 nsd->signal_hint_statsusr = 0; 937 return NSD_STATS; 938 } 939 return NSD_RUN; 940 } 941 942 /* 943 * The main server simply waits for signals and child processes to 944 * terminate. Child processes are restarted as necessary. 945 */ 946 void 947 server_main(struct nsd *nsd) 948 { 949 region_type *server_region = region_create(xalloc, free); 950 netio_type *netio = netio_create(server_region); 951 netio_handler_type reload_listener; 952 netio_handler_type xfrd_listener; 953 int reload_sockets[2] = {-1, -1}; 954 struct timespec timeout_spec; 955 int fd; 956 int status; 957 pid_t child_pid; 958 pid_t reload_pid = -1; 959 pid_t xfrd_pid = -1; 960 sig_atomic_t mode; 961 962 /* Ensure we are the main process */ 963 assert(nsd->server_kind == NSD_SERVER_MAIN); 964 965 xfrd_listener.user_data = (struct ipc_handler_conn_data*)region_alloc( 966 server_region, sizeof(struct ipc_handler_conn_data)); 967 xfrd_listener.fd = -1; 968 ((struct ipc_handler_conn_data*)xfrd_listener.user_data)->nsd = nsd; 969 ((struct ipc_handler_conn_data*)xfrd_listener.user_data)->conn = 970 xfrd_tcp_create(server_region); 971 972 /* Start the XFRD process */ 973 xfrd_pid = server_start_xfrd(nsd, &xfrd_listener); 974 netio_add_handler(netio, &xfrd_listener); 975 976 /* Start the child processes that handle incoming queries */ 977 if (server_start_children(nsd, server_region, netio, &xfrd_listener.fd) != 0) { 978 send_children_quit(nsd); 979 exit(1); 980 } 981 reload_listener.fd = -1; 982 983 /* This_child MUST be 0, because this is the parent process */ 984 assert(nsd->this_child == 0); 985 986 /* Run the server until we get a shutdown signal */ 987 while ((mode = nsd->mode) != NSD_SHUTDOWN) { 988 /* Did we receive a signal that changes our mode? */ 989 if(mode == NSD_RUN) { 990 nsd->mode = mode = server_signal_mode(nsd); 991 } 992 993 switch (mode) { 994 case NSD_RUN: 995 /* see if any child processes terminated */ 996 while((child_pid = waitpid(0, &status, WNOHANG)) != -1 && child_pid != 0) { 997 int is_child = delete_child_pid(nsd, child_pid); 998 if (is_child != -1 && nsd->children[is_child].need_to_exit) { 999 if(nsd->children[is_child].child_fd == -1) 1000 nsd->children[is_child].has_exited = 1; 1001 parent_check_all_children_exited(nsd); 1002 } else if(is_child != -1) { 1003 log_msg(LOG_WARNING, 1004 "server %d died unexpectedly with status %d, restarting", 1005 (int) child_pid, status); 1006 restart_child_servers(nsd, server_region, netio, 1007 &xfrd_listener.fd); 1008 } else if (child_pid == reload_pid) { 1009 sig_atomic_t cmd = NSD_SOA_END; 1010 log_msg(LOG_WARNING, 1011 "Reload process %d failed with status %d, continuing with old database", 1012 (int) child_pid, status); 1013 reload_pid = -1; 1014 if(reload_listener.fd > 0) close(reload_listener.fd); 1015 reload_listener.fd = -1; 1016 reload_listener.event_types = NETIO_EVENT_NONE; 1017 /* inform xfrd reload attempt ended */ 1018 if(!write_socket(xfrd_listener.fd, 1019 &cmd, sizeof(cmd)) == -1) { 1020 log_msg(LOG_ERR, "problems " 1021 "sending SOAEND to xfrd: %s", 1022 strerror(errno)); 1023 } 1024 } else if (child_pid == xfrd_pid) { 1025 log_msg(LOG_WARNING, 1026 "xfrd process %d failed with status %d, restarting ", 1027 (int) child_pid, status); 1028 xfrd_pid = server_start_xfrd(nsd, &xfrd_listener); 1029 } else { 1030 log_msg(LOG_WARNING, 1031 "Unknown child %d terminated with status %d", 1032 (int) child_pid, status); 1033 } 1034 } 1035 if (child_pid == -1) { 1036 if (errno == EINTR) { 1037 continue; 1038 } 1039 log_msg(LOG_WARNING, "wait failed: %s", strerror(errno)); 1040 } 1041 if (nsd->mode != NSD_RUN) 1042 break; 1043 1044 /* timeout to collect processes. In case no sigchild happens. */ 1045 timeout_spec.tv_sec = 60; 1046 timeout_spec.tv_nsec = 0; 1047 1048 /* listen on ports, timeout for collecting terminated children */ 1049 if(netio_dispatch(netio, &timeout_spec, 0) == -1) { 1050 if (errno != EINTR) { 1051 log_msg(LOG_ERR, "netio_dispatch failed: %s", strerror(errno)); 1052 } 1053 } 1054 1055 break; 1056 case NSD_RELOAD: 1057 /* Continue to run nsd after reload */ 1058 nsd->mode = NSD_RUN; 1059 1060 if (reload_pid != -1) { 1061 log_msg(LOG_WARNING, "Reload already in progress (pid = %d)", 1062 (int) reload_pid); 1063 break; 1064 } 1065 1066 log_msg(LOG_WARNING, "signal received, reloading..."); 1067 1068 if (socketpair(AF_UNIX, SOCK_STREAM, 0, reload_sockets) == -1) { 1069 log_msg(LOG_ERR, "reload failed on socketpair: %s", strerror(errno)); 1070 reload_pid = -1; 1071 break; 1072 } 1073 1074 /* Do actual reload */ 1075 reload_pid = fork(); 1076 switch (reload_pid) { 1077 case -1: 1078 log_msg(LOG_ERR, "fork failed: %s", strerror(errno)); 1079 break; 1080 case 0: 1081 /* CHILD */ 1082 close(reload_sockets[0]); 1083 server_reload(nsd, server_region, netio, 1084 reload_sockets[1], &xfrd_listener.fd); 1085 DEBUG(DEBUG_IPC,2, (LOG_INFO, "Reload exited to become new main")); 1086 close(reload_sockets[1]); 1087 DEBUG(DEBUG_IPC,2, (LOG_INFO, "Reload closed")); 1088 /* drop stale xfrd ipc data */ 1089 ((struct ipc_handler_conn_data*)xfrd_listener.user_data) 1090 ->conn->is_reading = 0; 1091 reload_pid = -1; 1092 reload_listener.fd = -1; 1093 reload_listener.event_types = NETIO_EVENT_NONE; 1094 DEBUG(DEBUG_IPC,2, (LOG_INFO, "Reload resetup; run")); 1095 break; 1096 default: 1097 /* PARENT, keep running until NSD_QUIT_SYNC 1098 * received from CHILD. 1099 */ 1100 close(reload_sockets[1]); 1101 reload_listener.fd = reload_sockets[0]; 1102 reload_listener.timeout = NULL; 1103 reload_listener.user_data = nsd; 1104 reload_listener.event_types = NETIO_EVENT_READ; 1105 reload_listener.event_handler = parent_handle_reload_command; /* listens to Quit */ 1106 netio_add_handler(netio, &reload_listener); 1107 break; 1108 } 1109 break; 1110 case NSD_QUIT_SYNC: 1111 /* synchronisation of xfrd, parent and reload */ 1112 if(!nsd->quit_sync_done && reload_listener.fd > 0) { 1113 sig_atomic_t cmd = NSD_RELOAD; 1114 /* stop xfrd ipc writes in progress */ 1115 DEBUG(DEBUG_IPC,1, (LOG_INFO, 1116 "main: ipc send indication reload")); 1117 if(!write_socket(xfrd_listener.fd, &cmd, sizeof(cmd))) { 1118 log_msg(LOG_ERR, "server_main: could not send reload " 1119 "indication to xfrd: %s", strerror(errno)); 1120 } 1121 /* wait for ACK from xfrd */ 1122 DEBUG(DEBUG_IPC,1, (LOG_INFO, "main: wait ipc reply xfrd")); 1123 nsd->quit_sync_done = 1; 1124 } 1125 nsd->mode = NSD_RUN; 1126 break; 1127 case NSD_QUIT: 1128 /* silent shutdown during reload */ 1129 if(reload_listener.fd > 0) { 1130 /* acknowledge the quit, to sync reload that we will really quit now */ 1131 sig_atomic_t cmd = NSD_RELOAD; 1132 DEBUG(DEBUG_IPC,1, (LOG_INFO, "main: ipc ack reload")); 1133 if(!write_socket(reload_listener.fd, &cmd, sizeof(cmd))) { 1134 log_msg(LOG_ERR, "server_main: " 1135 "could not ack quit: %s", strerror(errno)); 1136 } 1137 close(reload_listener.fd); 1138 } 1139 /* only quit children after xfrd has acked */ 1140 send_children_quit(nsd); 1141 1142 namedb_fd_close(nsd->db); 1143 region_destroy(server_region); 1144 server_shutdown(nsd); 1145 1146 /* ENOTREACH */ 1147 break; 1148 case NSD_SHUTDOWN: 1149 send_children_quit(nsd); 1150 log_msg(LOG_WARNING, "signal received, shutting down..."); 1151 break; 1152 case NSD_REAP_CHILDREN: 1153 /* continue; wait for child in run loop */ 1154 nsd->mode = NSD_RUN; 1155 break; 1156 case NSD_STATS: 1157 #ifdef BIND8_STATS 1158 set_children_stats(nsd); 1159 #endif 1160 nsd->mode = NSD_RUN; 1161 break; 1162 default: 1163 log_msg(LOG_WARNING, "NSD main server mode invalid: %d", nsd->mode); 1164 nsd->mode = NSD_RUN; 1165 break; 1166 } 1167 } 1168 1169 /* Truncate the pid file. */ 1170 if ((fd = open(nsd->pidfile, O_WRONLY | O_TRUNC, 0644)) == -1) { 1171 log_msg(LOG_ERR, "can not truncate the pid file %s: %s", nsd->pidfile, strerror(errno)); 1172 } 1173 close(fd); 1174 1175 /* Unlink it if possible... */ 1176 unlinkpid(nsd->pidfile); 1177 1178 if(reload_listener.fd > 0) { 1179 sig_atomic_t cmd = NSD_QUIT; 1180 DEBUG(DEBUG_IPC,1, (LOG_INFO, 1181 "main: ipc send quit to reload-process")); 1182 if(!write_socket(reload_listener.fd, &cmd, sizeof(cmd))) { 1183 log_msg(LOG_ERR, "server_main: could not send quit to reload: %s", 1184 strerror(errno)); 1185 } 1186 fsync(reload_listener.fd); 1187 close(reload_listener.fd); 1188 } 1189 if(xfrd_listener.fd > 0) { 1190 /* complete quit, stop xfrd */ 1191 sig_atomic_t cmd = NSD_QUIT; 1192 DEBUG(DEBUG_IPC,1, (LOG_INFO, 1193 "main: ipc send quit to xfrd")); 1194 if(!write_socket(xfrd_listener.fd, &cmd, sizeof(cmd))) { 1195 log_msg(LOG_ERR, "server_main: could not send quit to xfrd: %s", 1196 strerror(errno)); 1197 } 1198 fsync(xfrd_listener.fd); 1199 close(xfrd_listener.fd); 1200 (void)kill(xfrd_pid, SIGTERM); 1201 } 1202 1203 namedb_fd_close(nsd->db); 1204 region_destroy(server_region); 1205 server_shutdown(nsd); 1206 } 1207 1208 static query_state_type 1209 server_process_query(struct nsd *nsd, struct query *query) 1210 { 1211 return query_process(query, nsd); 1212 } 1213 1214 1215 /* 1216 * Serve DNS requests. 1217 */ 1218 void 1219 server_child(struct nsd *nsd) 1220 { 1221 size_t i; 1222 region_type *server_region = region_create(xalloc, free); 1223 netio_type *netio = netio_create(server_region); 1224 netio_handler_type *tcp_accept_handlers; 1225 query_type *udp_query; 1226 sig_atomic_t mode; 1227 1228 assert(nsd->server_kind != NSD_SERVER_MAIN); 1229 DEBUG(DEBUG_IPC, 2, (LOG_INFO, "child process started")); 1230 1231 if (!(nsd->server_kind & NSD_SERVER_TCP)) { 1232 close_all_sockets(nsd->tcp, nsd->ifs); 1233 } 1234 if (!(nsd->server_kind & NSD_SERVER_UDP)) { 1235 close_all_sockets(nsd->udp, nsd->ifs); 1236 } 1237 1238 if (nsd->this_child && nsd->this_child->parent_fd != -1) { 1239 netio_handler_type *handler; 1240 1241 handler = (netio_handler_type *) region_alloc( 1242 server_region, sizeof(netio_handler_type)); 1243 handler->fd = nsd->this_child->parent_fd; 1244 handler->timeout = NULL; 1245 handler->user_data = (struct ipc_handler_conn_data*)region_alloc( 1246 server_region, sizeof(struct ipc_handler_conn_data)); 1247 ((struct ipc_handler_conn_data*)handler->user_data)->nsd = nsd; 1248 ((struct ipc_handler_conn_data*)handler->user_data)->conn = 1249 xfrd_tcp_create(server_region); 1250 handler->event_types = NETIO_EVENT_READ; 1251 handler->event_handler = child_handle_parent_command; 1252 netio_add_handler(netio, handler); 1253 } 1254 1255 if (nsd->server_kind & NSD_SERVER_UDP) { 1256 udp_query = query_create(server_region, 1257 compressed_dname_offsets, compression_table_size); 1258 1259 for (i = 0; i < nsd->ifs; ++i) { 1260 struct udp_handler_data *data; 1261 netio_handler_type *handler; 1262 1263 data = (struct udp_handler_data *) region_alloc( 1264 server_region, 1265 sizeof(struct udp_handler_data)); 1266 data->query = udp_query; 1267 data->nsd = nsd; 1268 data->socket = &nsd->udp[i]; 1269 1270 handler = (netio_handler_type *) region_alloc( 1271 server_region, sizeof(netio_handler_type)); 1272 handler->fd = nsd->udp[i].s; 1273 handler->timeout = NULL; 1274 handler->user_data = data; 1275 handler->event_types = NETIO_EVENT_READ; 1276 handler->event_handler = handle_udp; 1277 netio_add_handler(netio, handler); 1278 } 1279 } 1280 1281 /* 1282 * Keep track of all the TCP accept handlers so we can enable 1283 * and disable them based on the current number of active TCP 1284 * connections. 1285 */ 1286 tcp_accept_handlers = (netio_handler_type *) region_alloc( 1287 server_region, nsd->ifs * sizeof(netio_handler_type)); 1288 if (nsd->server_kind & NSD_SERVER_TCP) { 1289 for (i = 0; i < nsd->ifs; ++i) { 1290 struct tcp_accept_handler_data *data; 1291 netio_handler_type *handler; 1292 1293 data = (struct tcp_accept_handler_data *) region_alloc( 1294 server_region, 1295 sizeof(struct tcp_accept_handler_data)); 1296 data->nsd = nsd; 1297 data->socket = &nsd->tcp[i]; 1298 data->tcp_accept_handler_count = nsd->ifs; 1299 data->tcp_accept_handlers = tcp_accept_handlers; 1300 1301 handler = &tcp_accept_handlers[i]; 1302 handler->fd = nsd->tcp[i].s; 1303 handler->timeout = NULL; 1304 handler->user_data = data; 1305 handler->event_types = NETIO_EVENT_READ; 1306 handler->event_handler = handle_tcp_accept; 1307 netio_add_handler(netio, handler); 1308 } 1309 } 1310 1311 /* The main loop... */ 1312 while ((mode = nsd->mode) != NSD_QUIT) { 1313 if(mode == NSD_RUN) nsd->mode = mode = server_signal_mode(nsd); 1314 1315 /* Do we need to do the statistics... */ 1316 if (mode == NSD_STATS) { 1317 #ifdef BIND8_STATS 1318 /* Dump the statistics */ 1319 bind8_stats(nsd); 1320 #else /* !BIND8_STATS */ 1321 log_msg(LOG_NOTICE, "Statistics support not enabled at compile time."); 1322 #endif /* BIND8_STATS */ 1323 1324 nsd->mode = NSD_RUN; 1325 } 1326 else if (mode == NSD_REAP_CHILDREN) { 1327 /* got signal, notify parent. parent reaps terminated children. */ 1328 if (nsd->this_child->parent_fd > 0) { 1329 sig_atomic_t parent_notify = NSD_REAP_CHILDREN; 1330 if (write(nsd->this_child->parent_fd, 1331 &parent_notify, 1332 sizeof(parent_notify)) == -1) 1333 { 1334 log_msg(LOG_ERR, "problems sending command from %d to parent: %s", 1335 (int) nsd->this_child->pid, strerror(errno)); 1336 } 1337 } else /* no parent, so reap 'em */ 1338 while (waitpid(0, NULL, WNOHANG) > 0) ; 1339 nsd->mode = NSD_RUN; 1340 } 1341 else if(mode == NSD_RUN) { 1342 /* Wait for a query... */ 1343 if (netio_dispatch(netio, NULL, NULL) == -1) { 1344 if (errno != EINTR) { 1345 log_msg(LOG_ERR, "netio_dispatch failed: %s", strerror(errno)); 1346 break; 1347 } 1348 } 1349 } else if(mode == NSD_QUIT) { 1350 /* ignore here, quit */ 1351 } else { 1352 log_msg(LOG_ERR, "mode bad value %d, back to service.", 1353 mode); 1354 nsd->mode = NSD_RUN; 1355 } 1356 } 1357 1358 #ifdef BIND8_STATS 1359 bind8_stats(nsd); 1360 #endif /* BIND8_STATS */ 1361 1362 namedb_fd_close(nsd->db); 1363 region_destroy(server_region); 1364 server_shutdown(nsd); 1365 } 1366 1367 1368 static void 1369 handle_udp(netio_type *ATTR_UNUSED(netio), 1370 netio_handler_type *handler, 1371 netio_event_types_type event_types) 1372 { 1373 struct udp_handler_data *data 1374 = (struct udp_handler_data *) handler->user_data; 1375 int received, sent; 1376 struct query *q = data->query; 1377 1378 if (!(event_types & NETIO_EVENT_READ)) { 1379 return; 1380 } 1381 1382 /* Account... */ 1383 if (data->socket->addr->ai_family == AF_INET) { 1384 STATUP(data->nsd, qudp); 1385 } else if (data->socket->addr->ai_family == AF_INET6) { 1386 STATUP(data->nsd, qudp6); 1387 } 1388 1389 /* Initialize the query... */ 1390 query_reset(q, UDP_MAX_MESSAGE_LEN, 0); 1391 1392 received = recvfrom(handler->fd, 1393 buffer_begin(q->packet), 1394 buffer_remaining(q->packet), 1395 0, 1396 (struct sockaddr *)&q->addr, 1397 &q->addrlen); 1398 if (received == -1) { 1399 if (errno != EAGAIN && errno != EINTR) { 1400 log_msg(LOG_ERR, "recvfrom failed: %s", strerror(errno)); 1401 STATUP(data->nsd, rxerr); 1402 } 1403 } else { 1404 buffer_skip(q->packet, received); 1405 buffer_flip(q->packet); 1406 1407 /* Process and answer the query... */ 1408 if (server_process_query(data->nsd, q) != QUERY_DISCARDED) { 1409 if (RCODE(q->packet) == RCODE_OK && !AA(q->packet)) { 1410 STATUP(data->nsd, nona); 1411 } 1412 1413 /* Add EDNS0 and TSIG info if necessary. */ 1414 query_add_optional(q, data->nsd); 1415 1416 buffer_flip(q->packet); 1417 1418 sent = sendto(handler->fd, 1419 buffer_begin(q->packet), 1420 buffer_remaining(q->packet), 1421 0, 1422 (struct sockaddr *) &q->addr, 1423 q->addrlen); 1424 if (sent == -1) { 1425 log_msg(LOG_ERR, "sendto failed: %s", strerror(errno)); 1426 STATUP(data->nsd, txerr); 1427 } else if ((size_t) sent != buffer_remaining(q->packet)) { 1428 log_msg(LOG_ERR, "sent %d in place of %d bytes", sent, (int) buffer_remaining(q->packet)); 1429 } else { 1430 #ifdef BIND8_STATS 1431 /* Account the rcode & TC... */ 1432 STATUP2(data->nsd, rcode, RCODE(q->packet)); 1433 if (TC(q->packet)) 1434 STATUP(data->nsd, truncated); 1435 #endif /* BIND8_STATS */ 1436 } 1437 } else { 1438 STATUP(data->nsd, dropped); 1439 } 1440 } 1441 } 1442 1443 1444 static void 1445 cleanup_tcp_handler(netio_type *netio, netio_handler_type *handler) 1446 { 1447 struct tcp_handler_data *data 1448 = (struct tcp_handler_data *) handler->user_data; 1449 netio_remove_handler(netio, handler); 1450 close(handler->fd); 1451 1452 /* 1453 * Enable the TCP accept handlers when the current number of 1454 * TCP connections is about to drop below the maximum number 1455 * of TCP connections. 1456 */ 1457 if (data->nsd->current_tcp_count == data->nsd->maximum_tcp_count) { 1458 configure_handler_event_types(data->tcp_accept_handler_count, 1459 data->tcp_accept_handlers, 1460 NETIO_EVENT_READ); 1461 } 1462 --data->nsd->current_tcp_count; 1463 assert(data->nsd->current_tcp_count >= 0); 1464 1465 region_destroy(data->region); 1466 } 1467 1468 static void 1469 handle_tcp_reading(netio_type *netio, 1470 netio_handler_type *handler, 1471 netio_event_types_type event_types) 1472 { 1473 struct tcp_handler_data *data 1474 = (struct tcp_handler_data *) handler->user_data; 1475 ssize_t received; 1476 1477 if (event_types & NETIO_EVENT_TIMEOUT) { 1478 /* Connection timed out. */ 1479 cleanup_tcp_handler(netio, handler); 1480 return; 1481 } 1482 1483 if (data->nsd->tcp_query_count > 0 && 1484 data->query_count >= data->nsd->tcp_query_count) { 1485 /* No more queries allowed on this tcp connection. */ 1486 cleanup_tcp_handler(netio, handler); 1487 return; 1488 } 1489 1490 assert(event_types & NETIO_EVENT_READ); 1491 1492 if (data->bytes_transmitted == 0) { 1493 query_reset(data->query, TCP_MAX_MESSAGE_LEN, 1); 1494 } 1495 1496 /* 1497 * Check if we received the leading packet length bytes yet. 1498 */ 1499 if (data->bytes_transmitted < sizeof(uint16_t)) { 1500 received = read(handler->fd, 1501 (char *) &data->query->tcplen 1502 + data->bytes_transmitted, 1503 sizeof(uint16_t) - data->bytes_transmitted); 1504 if (received == -1) { 1505 if (errno == EAGAIN || errno == EINTR) { 1506 /* 1507 * Read would block, wait until more 1508 * data is available. 1509 */ 1510 return; 1511 } else { 1512 #ifdef ECONNRESET 1513 if (verbosity >= 2 || errno != ECONNRESET) 1514 #endif /* ECONNRESET */ 1515 log_msg(LOG_ERR, "failed reading from tcp: %s", strerror(errno)); 1516 cleanup_tcp_handler(netio, handler); 1517 return; 1518 } 1519 } else if (received == 0) { 1520 /* EOF */ 1521 cleanup_tcp_handler(netio, handler); 1522 return; 1523 } 1524 1525 data->bytes_transmitted += received; 1526 if (data->bytes_transmitted < sizeof(uint16_t)) { 1527 /* 1528 * Not done with the tcplen yet, wait for more 1529 * data to become available. 1530 */ 1531 return; 1532 } 1533 1534 assert(data->bytes_transmitted == sizeof(uint16_t)); 1535 1536 data->query->tcplen = ntohs(data->query->tcplen); 1537 1538 /* 1539 * Minimum query size is: 1540 * 1541 * Size of the header (12) 1542 * + Root domain name (1) 1543 * + Query class (2) 1544 * + Query type (2) 1545 */ 1546 if (data->query->tcplen < QHEADERSZ + 1 + sizeof(uint16_t) + sizeof(uint16_t)) { 1547 VERBOSITY(2, (LOG_WARNING, "packet too small, dropping tcp connection")); 1548 cleanup_tcp_handler(netio, handler); 1549 return; 1550 } 1551 1552 if (data->query->tcplen > data->query->maxlen) { 1553 VERBOSITY(2, (LOG_WARNING, "insufficient tcp buffer, dropping connection")); 1554 cleanup_tcp_handler(netio, handler); 1555 return; 1556 } 1557 1558 buffer_set_limit(data->query->packet, data->query->tcplen); 1559 } 1560 1561 assert(buffer_remaining(data->query->packet) > 0); 1562 1563 /* Read the (remaining) query data. */ 1564 received = read(handler->fd, 1565 buffer_current(data->query->packet), 1566 buffer_remaining(data->query->packet)); 1567 if (received == -1) { 1568 if (errno == EAGAIN || errno == EINTR) { 1569 /* 1570 * Read would block, wait until more data is 1571 * available. 1572 */ 1573 return; 1574 } else { 1575 #ifdef ECONNRESET 1576 if (verbosity >= 2 || errno != ECONNRESET) 1577 #endif /* ECONNRESET */ 1578 log_msg(LOG_ERR, "failed reading from tcp: %s", strerror(errno)); 1579 cleanup_tcp_handler(netio, handler); 1580 return; 1581 } 1582 } else if (received == 0) { 1583 /* EOF */ 1584 cleanup_tcp_handler(netio, handler); 1585 return; 1586 } 1587 1588 data->bytes_transmitted += received; 1589 buffer_skip(data->query->packet, received); 1590 if (buffer_remaining(data->query->packet) > 0) { 1591 /* 1592 * Message not yet complete, wait for more data to 1593 * become available. 1594 */ 1595 return; 1596 } 1597 1598 assert(buffer_position(data->query->packet) == data->query->tcplen); 1599 1600 /* Account... */ 1601 #ifndef INET6 1602 STATUP(data->nsd, ctcp); 1603 #else 1604 if (data->query->addr.ss_family == AF_INET) { 1605 STATUP(data->nsd, ctcp); 1606 } else if (data->query->addr.ss_family == AF_INET6) { 1607 STATUP(data->nsd, ctcp6); 1608 } 1609 #endif 1610 1611 /* We have a complete query, process it. */ 1612 1613 /* tcp-query-count: handle query counter ++ */ 1614 data->query_count++; 1615 1616 buffer_flip(data->query->packet); 1617 data->query_state = server_process_query(data->nsd, data->query); 1618 if (data->query_state == QUERY_DISCARDED) { 1619 /* Drop the packet and the entire connection... */ 1620 STATUP(data->nsd, dropped); 1621 cleanup_tcp_handler(netio, handler); 1622 return; 1623 } 1624 1625 if (RCODE(data->query->packet) == RCODE_OK 1626 && !AA(data->query->packet)) 1627 { 1628 STATUP(data->nsd, nona); 1629 } 1630 1631 query_add_optional(data->query, data->nsd); 1632 1633 /* Switch to the tcp write handler. */ 1634 buffer_flip(data->query->packet); 1635 data->query->tcplen = buffer_remaining(data->query->packet); 1636 data->bytes_transmitted = 0; 1637 1638 handler->timeout->tv_sec = data->nsd->tcp_timeout; 1639 handler->timeout->tv_nsec = 0L; 1640 timespec_add(handler->timeout, netio_current_time(netio)); 1641 1642 handler->event_types = NETIO_EVENT_WRITE | NETIO_EVENT_TIMEOUT; 1643 handler->event_handler = handle_tcp_writing; 1644 } 1645 1646 static void 1647 handle_tcp_writing(netio_type *netio, 1648 netio_handler_type *handler, 1649 netio_event_types_type event_types) 1650 { 1651 struct tcp_handler_data *data 1652 = (struct tcp_handler_data *) handler->user_data; 1653 ssize_t sent; 1654 struct query *q = data->query; 1655 1656 if (event_types & NETIO_EVENT_TIMEOUT) { 1657 /* Connection timed out. */ 1658 cleanup_tcp_handler(netio, handler); 1659 return; 1660 } 1661 1662 assert(event_types & NETIO_EVENT_WRITE); 1663 1664 if (data->bytes_transmitted < sizeof(q->tcplen)) { 1665 /* Writing the response packet length. */ 1666 uint16_t n_tcplen = htons(q->tcplen); 1667 sent = write(handler->fd, 1668 (const char *) &n_tcplen + data->bytes_transmitted, 1669 sizeof(n_tcplen) - data->bytes_transmitted); 1670 if (sent == -1) { 1671 if (errno == EAGAIN || errno == EINTR) { 1672 /* 1673 * Write would block, wait until 1674 * socket becomes writable again. 1675 */ 1676 return; 1677 } else { 1678 #ifdef ECONNRESET 1679 if(verbosity >= 2 || errno != ECONNRESET) 1680 #endif /* ECONNRESET */ 1681 log_msg(LOG_ERR, "failed writing to tcp: %s", strerror(errno)); 1682 cleanup_tcp_handler(netio, handler); 1683 return; 1684 } 1685 } 1686 1687 data->bytes_transmitted += sent; 1688 if (data->bytes_transmitted < sizeof(q->tcplen)) { 1689 /* 1690 * Writing not complete, wait until socket 1691 * becomes writable again. 1692 */ 1693 return; 1694 } 1695 1696 assert(data->bytes_transmitted == sizeof(q->tcplen)); 1697 } 1698 1699 assert(data->bytes_transmitted < q->tcplen + sizeof(q->tcplen)); 1700 1701 sent = write(handler->fd, 1702 buffer_current(q->packet), 1703 buffer_remaining(q->packet)); 1704 if (sent == -1) { 1705 if (errno == EAGAIN || errno == EINTR) { 1706 /* 1707 * Write would block, wait until 1708 * socket becomes writable again. 1709 */ 1710 return; 1711 } else { 1712 #ifdef ECONNRESET 1713 if(verbosity >= 2 || errno != ECONNRESET) 1714 #endif /* ECONNRESET */ 1715 log_msg(LOG_ERR, "failed writing to tcp: %s", strerror(errno)); 1716 cleanup_tcp_handler(netio, handler); 1717 return; 1718 } 1719 } 1720 1721 buffer_skip(q->packet, sent); 1722 data->bytes_transmitted += sent; 1723 if (data->bytes_transmitted < q->tcplen + sizeof(q->tcplen)) { 1724 /* 1725 * Still more data to write when socket becomes 1726 * writable again. 1727 */ 1728 return; 1729 } 1730 1731 assert(data->bytes_transmitted == q->tcplen + sizeof(q->tcplen)); 1732 1733 if (data->query_state == QUERY_IN_AXFR) { 1734 /* Continue processing AXFR and writing back results. */ 1735 buffer_clear(q->packet); 1736 data->query_state = query_axfr(data->nsd, q); 1737 if (data->query_state != QUERY_PROCESSED) { 1738 query_add_optional(data->query, data->nsd); 1739 1740 /* Reset data. */ 1741 buffer_flip(q->packet); 1742 q->tcplen = buffer_remaining(q->packet); 1743 data->bytes_transmitted = 0; 1744 /* Reset timeout. */ 1745 handler->timeout->tv_sec = data->nsd->tcp_timeout; 1746 handler->timeout->tv_nsec = 0; 1747 timespec_add(handler->timeout, netio_current_time(netio)); 1748 1749 /* 1750 * Write data if/when the socket is writable 1751 * again. 1752 */ 1753 return; 1754 } 1755 } 1756 1757 /* 1758 * Done sending, wait for the next request to arrive on the 1759 * TCP socket by installing the TCP read handler. 1760 */ 1761 if (data->nsd->tcp_query_count > 0 && 1762 data->query_count >= data->nsd->tcp_query_count) { 1763 1764 (void) shutdown(handler->fd, SHUT_WR); 1765 } 1766 1767 data->bytes_transmitted = 0; 1768 1769 handler->timeout->tv_sec = data->nsd->tcp_timeout; 1770 handler->timeout->tv_nsec = 0; 1771 timespec_add(handler->timeout, netio_current_time(netio)); 1772 1773 handler->event_types = NETIO_EVENT_READ | NETIO_EVENT_TIMEOUT; 1774 handler->event_handler = handle_tcp_reading; 1775 } 1776 1777 1778 /* 1779 * Handle an incoming TCP connection. The connection is accepted and 1780 * a new TCP reader event handler is added to NETIO. The TCP handler 1781 * is responsible for cleanup when the connection is closed. 1782 */ 1783 static void 1784 handle_tcp_accept(netio_type *netio, 1785 netio_handler_type *handler, 1786 netio_event_types_type event_types) 1787 { 1788 struct tcp_accept_handler_data *data 1789 = (struct tcp_accept_handler_data *) handler->user_data; 1790 int s; 1791 struct tcp_handler_data *tcp_data; 1792 region_type *tcp_region; 1793 netio_handler_type *tcp_handler; 1794 #ifdef INET6 1795 struct sockaddr_storage addr; 1796 #else 1797 struct sockaddr_in addr; 1798 #endif 1799 socklen_t addrlen; 1800 1801 if (!(event_types & NETIO_EVENT_READ)) { 1802 return; 1803 } 1804 1805 if (data->nsd->current_tcp_count >= data->nsd->maximum_tcp_count) { 1806 return; 1807 } 1808 1809 /* Accept it... */ 1810 addrlen = sizeof(addr); 1811 s = accept(handler->fd, (struct sockaddr *) &addr, &addrlen); 1812 if (s == -1) { 1813 /* EINTR is a signal interrupt. The others are various OS ways 1814 of saying that the client has closed the connection. */ 1815 if ( errno != EINTR 1816 && errno != EWOULDBLOCK 1817 #ifdef ECONNABORTED 1818 && errno != ECONNABORTED 1819 #endif /* ECONNABORTED */ 1820 #ifdef EPROTO 1821 && errno != EPROTO 1822 #endif /* EPROTO */ 1823 ) { 1824 log_msg(LOG_ERR, "accept failed: %s", strerror(errno)); 1825 } 1826 return; 1827 } 1828 1829 if (fcntl(s, F_SETFL, O_NONBLOCK) == -1) { 1830 log_msg(LOG_ERR, "fcntl failed: %s", strerror(errno)); 1831 close(s); 1832 return; 1833 } 1834 1835 /* 1836 * This region is deallocated when the TCP connection is 1837 * closed by the TCP handler. 1838 */ 1839 tcp_region = region_create(xalloc, free); 1840 tcp_data = (struct tcp_handler_data *) region_alloc( 1841 tcp_region, sizeof(struct tcp_handler_data)); 1842 tcp_data->region = tcp_region; 1843 tcp_data->query = query_create(tcp_region, compressed_dname_offsets, 1844 compression_table_size); 1845 tcp_data->nsd = data->nsd; 1846 tcp_data->query_count = 0; 1847 1848 tcp_data->tcp_accept_handler_count = data->tcp_accept_handler_count; 1849 tcp_data->tcp_accept_handlers = data->tcp_accept_handlers; 1850 1851 tcp_data->query_state = QUERY_PROCESSED; 1852 tcp_data->bytes_transmitted = 0; 1853 memcpy(&tcp_data->query->addr, &addr, addrlen); 1854 tcp_data->query->addrlen = addrlen; 1855 1856 tcp_handler = (netio_handler_type *) region_alloc( 1857 tcp_region, sizeof(netio_handler_type)); 1858 tcp_handler->fd = s; 1859 tcp_handler->timeout = (struct timespec *) region_alloc( 1860 tcp_region, sizeof(struct timespec)); 1861 tcp_handler->timeout->tv_sec = data->nsd->tcp_timeout; 1862 tcp_handler->timeout->tv_nsec = 0L; 1863 timespec_add(tcp_handler->timeout, netio_current_time(netio)); 1864 1865 tcp_handler->user_data = tcp_data; 1866 tcp_handler->event_types = NETIO_EVENT_READ | NETIO_EVENT_TIMEOUT; 1867 tcp_handler->event_handler = handle_tcp_reading; 1868 1869 netio_add_handler(netio, tcp_handler); 1870 1871 /* 1872 * Keep track of the total number of TCP handlers installed so 1873 * we can stop accepting connections when the maximum number 1874 * of simultaneous TCP connections is reached. 1875 */ 1876 ++data->nsd->current_tcp_count; 1877 if (data->nsd->current_tcp_count == data->nsd->maximum_tcp_count) { 1878 configure_handler_event_types(data->tcp_accept_handler_count, 1879 data->tcp_accept_handlers, 1880 NETIO_EVENT_NONE); 1881 } 1882 } 1883 1884 static void 1885 send_children_quit(struct nsd* nsd) 1886 { 1887 sig_atomic_t command = NSD_QUIT; 1888 size_t i; 1889 assert(nsd->server_kind == NSD_SERVER_MAIN && nsd->this_child == 0); 1890 for (i = 0; i < nsd->child_count; ++i) { 1891 if (nsd->children[i].pid > 0 && nsd->children[i].child_fd > 0) { 1892 if (write(nsd->children[i].child_fd, 1893 &command, 1894 sizeof(command)) == -1) 1895 { 1896 if(errno != EAGAIN && errno != EINTR) 1897 log_msg(LOG_ERR, "problems sending command %d to server %d: %s", 1898 (int) command, 1899 (int) nsd->children[i].pid, 1900 strerror(errno)); 1901 } 1902 fsync(nsd->children[i].child_fd); 1903 close(nsd->children[i].child_fd); 1904 nsd->children[i].child_fd = -1; 1905 } 1906 } 1907 } 1908 1909 #ifdef BIND8_STATS 1910 static void 1911 set_children_stats(struct nsd* nsd) 1912 { 1913 size_t i; 1914 assert(nsd->server_kind == NSD_SERVER_MAIN && nsd->this_child == 0); 1915 DEBUG(DEBUG_IPC, 1, (LOG_INFO, "parent set stats to send to children")); 1916 for (i = 0; i < nsd->child_count; ++i) { 1917 nsd->children[i].need_to_send_STATS = 1; 1918 nsd->children[i].handler->event_types |= NETIO_EVENT_WRITE; 1919 } 1920 } 1921 #endif /* BIND8_STATS */ 1922 1923 static void 1924 configure_handler_event_types(size_t count, 1925 netio_handler_type *handlers, 1926 netio_event_types_type event_types) 1927 { 1928 size_t i; 1929 1930 assert(handlers); 1931 1932 for (i = 0; i < count; ++i) { 1933 handlers[i].event_types = event_types; 1934 } 1935 } 1936