1 /* 2 * xfrd-tcp.c - XFR (transfer) Daemon TCP system source file. Manages tcp conn. 3 * 4 * Copyright (c) 2001-2006, NLnet Labs. All rights reserved. 5 * 6 * See LICENSE for the license. 7 * 8 */ 9 10 #include <config.h> 11 #include <assert.h> 12 #include <errno.h> 13 #include <fcntl.h> 14 #include <unistd.h> 15 #include <stdlib.h> 16 #include "xfrd-tcp.h" 17 #include "buffer.h" 18 #include "packet.h" 19 #include "dname.h" 20 #include "options.h" 21 #include "namedb.h" 22 #include "xfrd.h" 23 #include "util.h" 24 25 xfrd_tcp_set_t* xfrd_tcp_set_create(struct region* region) 26 { 27 int i; 28 xfrd_tcp_set_t* tcp_set = region_alloc(region, sizeof(xfrd_tcp_set_t)); 29 memset(tcp_set, 0, sizeof(xfrd_tcp_set_t)); 30 tcp_set->tcp_count = 0; 31 tcp_set->tcp_waiting_first = 0; 32 tcp_set->tcp_waiting_last = 0; 33 for(i=0; i<XFRD_MAX_TCP; i++) 34 tcp_set->tcp_state[i] = xfrd_tcp_create(region); 35 return tcp_set; 36 } 37 38 void 39 xfrd_setup_packet(buffer_type* packet, 40 uint16_t type, uint16_t klass, const dname_type* dname) 41 { 42 /* Set up the header */ 43 buffer_clear(packet); 44 ID_SET(packet, qid_generate()); 45 FLAGS_SET(packet, 0); 46 OPCODE_SET(packet, OPCODE_QUERY); 47 QDCOUNT_SET(packet, 1); 48 ANCOUNT_SET(packet, 0); 49 NSCOUNT_SET(packet, 0); 50 ARCOUNT_SET(packet, 0); 51 buffer_skip(packet, QHEADERSZ); 52 53 /* The question record. */ 54 buffer_write(packet, dname_name(dname), dname->name_size); 55 buffer_write_u16(packet, type); 56 buffer_write_u16(packet, klass); 57 } 58 59 static socklen_t 60 #ifdef INET6 61 xfrd_acl_sockaddr(acl_options_t* acl, unsigned int port, 62 struct sockaddr_storage *sck) 63 #else 64 xfrd_acl_sockaddr(acl_options_t* acl, unsigned int port, 65 struct sockaddr_in *sck, const char* fromto) 66 #endif /* INET6 */ 67 { 68 /* setup address structure */ 69 #ifdef INET6 70 memset(sck, 0, sizeof(struct sockaddr_storage)); 71 #else 72 memset(sck, 0, sizeof(struct sockaddr_in)); 73 #endif 74 if(acl->is_ipv6) { 75 #ifdef INET6 76 struct sockaddr_in6* sa = (struct sockaddr_in6*)sck; 77 sa->sin6_family = AF_INET6; 78 sa->sin6_port = htons(port); 79 sa->sin6_addr = acl->addr.addr6; 80 return sizeof(struct sockaddr_in6); 81 #else 82 log_msg(LOG_ERR, "xfrd: IPv6 connection %s %s attempted but no \ 83 INET6.", fromto, acl->ip_address_spec); 84 return 0; 85 #endif 86 } else { 87 struct sockaddr_in* sa = (struct sockaddr_in*)sck; 88 sa->sin_family = AF_INET; 89 sa->sin_port = htons(port); 90 sa->sin_addr = acl->addr.addr; 91 return sizeof(struct sockaddr_in); 92 } 93 } 94 95 socklen_t 96 #ifdef INET6 97 xfrd_acl_sockaddr_to(acl_options_t* acl, struct sockaddr_storage *to) 98 #else 99 xfrd_acl_sockaddr_to(acl_options_t* acl, struct sockaddr_in *to) 100 #endif /* INET6 */ 101 { 102 unsigned int port = acl->port?acl->port:(unsigned)atoi(TCP_PORT); 103 #ifdef INET6 104 return xfrd_acl_sockaddr(acl, port, to); 105 #else 106 return xfrd_acl_sockaddr(acl, port, to, "to"); 107 #endif /* INET6 */ 108 } 109 110 socklen_t 111 #ifdef INET6 112 xfrd_acl_sockaddr_frm(acl_options_t* acl, struct sockaddr_storage *frm) 113 #else 114 xfrd_acl_sockaddr_frm(acl_options_t* acl, struct sockaddr_in *frm) 115 #endif /* INET6 */ 116 { 117 unsigned int port = acl->port?acl->port:0; 118 #ifdef INET6 119 return xfrd_acl_sockaddr(acl, port, frm); 120 #else 121 return xfrd_acl_sockaddr(acl, port, frm, "from"); 122 #endif /* INET6 */ 123 } 124 125 void 126 xfrd_write_soa_buffer(struct buffer* packet, 127 const dname_type* apex, struct xfrd_soa* soa) 128 { 129 size_t rdlength_pos; 130 uint16_t rdlength; 131 buffer_write(packet, dname_name(apex), apex->name_size); 132 133 /* already in network order */ 134 buffer_write(packet, &soa->type, sizeof(soa->type)); 135 buffer_write(packet, &soa->klass, sizeof(soa->klass)); 136 buffer_write(packet, &soa->ttl, sizeof(soa->ttl)); 137 rdlength_pos = buffer_position(packet); 138 buffer_skip(packet, sizeof(rdlength)); 139 140 /* uncompressed dnames */ 141 buffer_write(packet, soa->prim_ns+1, soa->prim_ns[0]); 142 buffer_write(packet, soa->email+1, soa->email[0]); 143 144 buffer_write(packet, &soa->serial, sizeof(uint32_t)); 145 buffer_write(packet, &soa->refresh, sizeof(uint32_t)); 146 buffer_write(packet, &soa->retry, sizeof(uint32_t)); 147 buffer_write(packet, &soa->expire, sizeof(uint32_t)); 148 buffer_write(packet, &soa->minimum, sizeof(uint32_t)); 149 150 /* write length of RR */ 151 rdlength = buffer_position(packet) - rdlength_pos - sizeof(rdlength); 152 buffer_write_u16_at(packet, rdlength_pos, rdlength); 153 } 154 155 xfrd_tcp_t* 156 xfrd_tcp_create(region_type* region) 157 { 158 xfrd_tcp_t* tcp_state = (xfrd_tcp_t*)region_alloc( 159 region, sizeof(xfrd_tcp_t)); 160 memset(tcp_state, 0, sizeof(xfrd_tcp_t)); 161 tcp_state->packet = buffer_create(region, QIOBUFSZ); 162 tcp_state->fd = -1; 163 164 return tcp_state; 165 } 166 167 void 168 xfrd_tcp_obtain(xfrd_tcp_set_t* set, xfrd_zone_t* zone) 169 { 170 assert(zone->tcp_conn == -1); 171 assert(zone->tcp_waiting == 0); 172 173 if(set->tcp_count < XFRD_MAX_TCP) { 174 int i; 175 assert(!set->tcp_waiting_first); 176 set->tcp_count ++; 177 /* find a free tcp_buffer */ 178 for(i=0; i<XFRD_MAX_TCP; i++) { 179 if(set->tcp_state[i]->fd == -1) { 180 zone->tcp_conn = i; 181 break; 182 } 183 } 184 185 assert(zone->tcp_conn != -1); 186 187 zone->tcp_waiting = 0; 188 189 /* stop udp use (if any) */ 190 if(zone->zone_handler.fd != -1) 191 xfrd_udp_release(zone); 192 193 if(!xfrd_tcp_open(set, zone)) 194 return; 195 196 xfrd_tcp_xfr(set, zone); 197 return; 198 } 199 /* wait, at end of line */ 200 DEBUG(DEBUG_XFRD,2, (LOG_INFO, "xfrd: max number of tcp " 201 "connections (%d) reached.", XFRD_MAX_TCP)); 202 zone->tcp_waiting_next = 0; 203 zone->tcp_waiting = 1; 204 if(!set->tcp_waiting_last) { 205 set->tcp_waiting_first = zone; 206 set->tcp_waiting_last = zone; 207 } else { 208 set->tcp_waiting_last->tcp_waiting_next = zone; 209 set->tcp_waiting_last = zone; 210 } 211 xfrd_unset_timer(zone); 212 } 213 214 int 215 xfrd_tcp_open(xfrd_tcp_set_t* set, xfrd_zone_t* zone) 216 { 217 int fd, family, conn; 218 219 #ifdef INET6 220 struct sockaddr_storage to; 221 #else 222 struct sockaddr_in to; 223 #endif /* INET6 */ 224 socklen_t to_len; 225 226 assert(zone->tcp_conn != -1); 227 DEBUG(DEBUG_XFRD,1, (LOG_INFO, "xfrd: zone %s open tcp conn to %s", 228 zone->apex_str, zone->master->ip_address_spec)); 229 set->tcp_state[zone->tcp_conn]->is_reading = 0; 230 set->tcp_state[zone->tcp_conn]->total_bytes = 0; 231 set->tcp_state[zone->tcp_conn]->msglen = 0; 232 233 if(zone->master->is_ipv6) { 234 #ifdef INET6 235 family = PF_INET6; 236 #else 237 xfrd_set_refresh_now(zone); 238 xfrd_tcp_release(set, zone); 239 return 0; 240 #endif 241 } else { 242 family = PF_INET; 243 } 244 fd = socket(family, SOCK_STREAM, IPPROTO_TCP); 245 set->tcp_state[zone->tcp_conn]->fd = fd; 246 if(fd == -1) { 247 log_msg(LOG_ERR, "xfrd: %s cannot create tcp socket: %s", 248 zone->master->ip_address_spec, strerror(errno)); 249 xfrd_set_refresh_now(zone); 250 xfrd_tcp_release(set, zone); 251 return 0; 252 } 253 if(fcntl(fd, F_SETFL, O_NONBLOCK) == -1) { 254 log_msg(LOG_ERR, "xfrd: fcntl failed: %s", strerror(errno)); 255 xfrd_set_refresh_now(zone); 256 xfrd_tcp_release(set, zone); 257 return 0; 258 } 259 260 to_len = xfrd_acl_sockaddr_to(zone->master, &to); 261 262 /* bind it */ 263 if (!xfrd_bind_local_interface(fd, 264 zone->zone_options->outgoing_interface, zone->master, 1)) { 265 266 xfrd_set_refresh_now(zone); 267 xfrd_tcp_release(set, zone); 268 return 0; 269 } 270 271 conn = connect(fd, (struct sockaddr*)&to, to_len); 272 if (conn == -1 && errno != EINPROGRESS) { 273 log_msg(LOG_ERR, "xfrd: connect %s failed: %s", 274 zone->master->ip_address_spec, strerror(errno)); 275 xfrd_set_refresh_now(zone); 276 xfrd_tcp_release(set, zone); 277 return 0; 278 } 279 280 zone->zone_handler.fd = fd; 281 zone->zone_handler.event_types = NETIO_EVENT_TIMEOUT|NETIO_EVENT_WRITE; 282 xfrd_set_timer(zone, xfrd_time() + set->tcp_timeout); 283 return 1; 284 } 285 286 void 287 xfrd_tcp_xfr(xfrd_tcp_set_t* set, xfrd_zone_t* zone) 288 { 289 xfrd_tcp_t* tcp = set->tcp_state[zone->tcp_conn]; 290 assert(zone->tcp_conn != -1); 291 assert(zone->tcp_waiting == 0); 292 /* start AXFR or IXFR for the zone */ 293 if(zone->soa_disk_acquired == 0 || zone->master->use_axfr_only || 294 zone->master->ixfr_disabled) { 295 DEBUG(DEBUG_XFRD,1, (LOG_INFO, "request full zone transfer " 296 "(AXFR) for %s to %s", 297 zone->apex_str, zone->master->ip_address_spec)); 298 299 xfrd_setup_packet(tcp->packet, TYPE_AXFR, CLASS_IN, zone->apex); 300 } else { 301 DEBUG(DEBUG_XFRD,1, (LOG_INFO, "request incremental zone " 302 "transfer (IXFR) for %s to %s", 303 zone->apex_str, zone->master->ip_address_spec)); 304 305 xfrd_setup_packet(tcp->packet, TYPE_IXFR, CLASS_IN, zone->apex); 306 NSCOUNT_SET(tcp->packet, 1); 307 xfrd_write_soa_buffer(tcp->packet, zone->apex, &zone->soa_disk); 308 } 309 zone->query_id = ID(tcp->packet); 310 zone->msg_seq_nr = 0; 311 zone->msg_rr_count = 0; 312 if(zone->master->key_options && zone->master->key_options->tsig_key) { 313 xfrd_tsig_sign_request(tcp->packet, &zone->tsig, zone->master); 314 } 315 buffer_flip(tcp->packet); 316 DEBUG(DEBUG_XFRD,1, (LOG_INFO, "sent tcp query with ID %d", zone->query_id)); 317 tcp->msglen = buffer_limit(tcp->packet); 318 /* wait for select to complete connect before write */ 319 } 320 321 static void 322 tcp_conn_ready_for_reading(xfrd_tcp_t* tcp) 323 { 324 tcp->total_bytes = 0; 325 tcp->msglen = 0; 326 buffer_clear(tcp->packet); 327 } 328 329 int conn_write(xfrd_tcp_t* tcp) 330 { 331 ssize_t sent; 332 333 if(tcp->total_bytes < sizeof(tcp->msglen)) { 334 uint16_t sendlen = htons(tcp->msglen); 335 sent = write(tcp->fd, 336 (const char*)&sendlen + tcp->total_bytes, 337 sizeof(tcp->msglen) - tcp->total_bytes); 338 339 if(sent == -1) { 340 if(errno == EAGAIN || errno == EINTR) { 341 /* write would block, try later */ 342 return 0; 343 } else { 344 return -1; 345 } 346 } 347 348 tcp->total_bytes += sent; 349 if(tcp->total_bytes < sizeof(tcp->msglen)) { 350 /* incomplete write, resume later */ 351 return 0; 352 } 353 assert(tcp->total_bytes == sizeof(tcp->msglen)); 354 } 355 356 assert(tcp->total_bytes < tcp->msglen + sizeof(tcp->msglen)); 357 358 sent = write(tcp->fd, 359 buffer_current(tcp->packet), 360 buffer_remaining(tcp->packet)); 361 if(sent == -1) { 362 if(errno == EAGAIN || errno == EINTR) { 363 /* write would block, try later */ 364 return 0; 365 } else { 366 return -1; 367 } 368 } 369 370 buffer_skip(tcp->packet, sent); 371 tcp->total_bytes += sent; 372 373 if(tcp->total_bytes < tcp->msglen + sizeof(tcp->msglen)) { 374 /* more to write when socket becomes writable again */ 375 return 0; 376 } 377 378 assert(tcp->total_bytes == tcp->msglen + sizeof(tcp->msglen)); 379 return 1; 380 } 381 382 void 383 xfrd_tcp_write(xfrd_tcp_set_t* set, xfrd_zone_t* zone) 384 { 385 int ret; 386 xfrd_tcp_t* tcp = set->tcp_state[zone->tcp_conn]; 387 assert(zone->tcp_conn != -1); 388 if(tcp->total_bytes == 0) { 389 /* check for pending error from nonblocking connect */ 390 /* from Stevens, unix network programming, vol1, 3rd ed, p450 */ 391 int error = 0; 392 socklen_t len = sizeof(error); 393 if(getsockopt(tcp->fd, SOL_SOCKET, SO_ERROR, &error, &len) < 0){ 394 error = errno; /* on solaris errno is error */ 395 } 396 if(error == EINPROGRESS || error == EWOULDBLOCK) 397 return; /* try again later */ 398 if(error != 0) { 399 log_msg(LOG_ERR, "Could not tcp connect to %s: %s", 400 zone->master->ip_address_spec, strerror(error)); 401 xfrd_set_refresh_now(zone); 402 xfrd_tcp_release(set, zone); 403 return; 404 } 405 } 406 ret = conn_write(tcp); 407 if(ret == -1) { 408 log_msg(LOG_ERR, "xfrd: failed writing tcp %s", strerror(errno)); 409 xfrd_set_refresh_now(zone); 410 xfrd_tcp_release(set, zone); 411 return; 412 } 413 if(ret == 0) { 414 return; /* write again later */ 415 } 416 /* done writing, get ready for reading */ 417 tcp->is_reading = 1; 418 tcp_conn_ready_for_reading(tcp); 419 zone->zone_handler.event_types = NETIO_EVENT_READ|NETIO_EVENT_TIMEOUT; 420 xfrd_tcp_read(set, zone); 421 } 422 423 int 424 conn_read(xfrd_tcp_t* tcp) 425 { 426 ssize_t received; 427 /* receive leading packet length bytes */ 428 if(tcp->total_bytes < sizeof(tcp->msglen)) { 429 received = read(tcp->fd, 430 (char*) &tcp->msglen + tcp->total_bytes, 431 sizeof(tcp->msglen) - tcp->total_bytes); 432 if(received == -1) { 433 if(errno == EAGAIN || errno == EINTR) { 434 /* read would block, try later */ 435 return 0; 436 } else { 437 #ifdef ECONNRESET 438 if (verbosity >= 2 || errno != ECONNRESET) 439 #endif /* ECONNRESET */ 440 log_msg(LOG_ERR, "tcp read sz: %s", strerror(errno)); 441 return -1; 442 } 443 } else if(received == 0) { 444 /* EOF */ 445 return -1; 446 } 447 tcp->total_bytes += received; 448 if(tcp->total_bytes < sizeof(tcp->msglen)) { 449 /* not complete yet, try later */ 450 return 0; 451 } 452 453 assert(tcp->total_bytes == sizeof(tcp->msglen)); 454 tcp->msglen = ntohs(tcp->msglen); 455 456 if(tcp->msglen > buffer_capacity(tcp->packet)) { 457 log_msg(LOG_ERR, "buffer too small, dropping connection"); 458 return 0; 459 } 460 buffer_set_limit(tcp->packet, tcp->msglen); 461 } 462 463 assert(buffer_remaining(tcp->packet) > 0); 464 465 received = read(tcp->fd, buffer_current(tcp->packet), 466 buffer_remaining(tcp->packet)); 467 if(received == -1) { 468 if(errno == EAGAIN || errno == EINTR) { 469 /* read would block, try later */ 470 return 0; 471 } else { 472 #ifdef ECONNRESET 473 if (verbosity >= 2 || errno != ECONNRESET) 474 #endif /* ECONNRESET */ 475 log_msg(LOG_ERR, "tcp read %s", strerror(errno)); 476 return -1; 477 } 478 } else if(received == 0) { 479 /* EOF */ 480 return -1; 481 } 482 483 tcp->total_bytes += received; 484 buffer_skip(tcp->packet, received); 485 486 if(buffer_remaining(tcp->packet) > 0) { 487 /* not complete yet, wait for more */ 488 return 0; 489 } 490 491 /* completed */ 492 assert(buffer_position(tcp->packet) == tcp->msglen); 493 return 1; 494 } 495 496 void 497 xfrd_tcp_read(xfrd_tcp_set_t* set, xfrd_zone_t* zone) 498 { 499 xfrd_tcp_t* tcp = set->tcp_state[zone->tcp_conn]; 500 int ret; 501 502 assert(zone->tcp_conn != -1); 503 ret = conn_read(tcp); 504 if(ret == -1) { 505 xfrd_set_refresh_now(zone); 506 xfrd_tcp_release(set, zone); 507 return; 508 } 509 if(ret == 0) 510 return; 511 512 /* completed msg */ 513 buffer_flip(tcp->packet); 514 switch(xfrd_handle_received_xfr_packet(zone, tcp->packet)) { 515 case xfrd_packet_more: 516 tcp_conn_ready_for_reading(tcp); 517 break; 518 case xfrd_packet_transfer: 519 case xfrd_packet_newlease: 520 xfrd_tcp_release(set, zone); 521 assert(zone->round_num == -1); 522 break; 523 case xfrd_packet_notimpl: 524 zone->master->ixfr_disabled = time(NULL); 525 xfrd_tcp_release(set, zone); 526 /* query next server */ 527 xfrd_make_request(zone); 528 break; 529 case xfrd_packet_bad: 530 case xfrd_packet_tcp: 531 default: 532 xfrd_tcp_release(set, zone); 533 /* query next server */ 534 xfrd_make_request(zone); 535 break; 536 } 537 } 538 539 void 540 xfrd_tcp_release(xfrd_tcp_set_t* set, xfrd_zone_t* zone) 541 { 542 int conn = zone->tcp_conn; 543 DEBUG(DEBUG_XFRD,1, (LOG_INFO, "xfrd: zone %s released tcp conn to %s", 544 zone->apex_str, zone->master->ip_address_spec)); 545 assert(zone->tcp_conn != -1); 546 assert(zone->tcp_waiting == 0); 547 zone->tcp_conn = -1; 548 zone->tcp_waiting = 0; 549 zone->zone_handler.fd = -1; 550 zone->zone_handler.event_types = NETIO_EVENT_READ|NETIO_EVENT_TIMEOUT; 551 552 if(set->tcp_state[conn]->fd != -1) 553 close(set->tcp_state[conn]->fd); 554 555 set->tcp_state[conn]->fd = -1; 556 557 if(set->tcp_count == XFRD_MAX_TCP && set->tcp_waiting_first) { 558 /* pop first waiting process */ 559 zone = set->tcp_waiting_first; 560 if(set->tcp_waiting_last == zone) 561 set->tcp_waiting_last = 0; 562 563 set->tcp_waiting_first = zone->tcp_waiting_next; 564 zone->tcp_waiting_next = 0; 565 /* start it */ 566 assert(zone->tcp_conn == -1); 567 zone->tcp_conn = conn; 568 zone->tcp_waiting = 0; 569 /* stop udp (if any) */ 570 if(zone->zone_handler.fd != -1) 571 xfrd_udp_release(zone); 572 573 if(!xfrd_tcp_open(set, zone)) 574 return; 575 576 xfrd_tcp_xfr(set, zone); 577 } 578 else { 579 assert(!set->tcp_waiting_first); 580 set->tcp_count --; 581 assert(set->tcp_count >= 0); 582 } 583 } 584