xref: /openbsd/usr.sbin/nsd/xfrd-tcp.c (revision 3d8817e4)
1 /*
2  * xfrd-tcp.c - XFR (transfer) Daemon TCP system source file. Manages tcp conn.
3  *
4  * Copyright (c) 2001-2006, NLnet Labs. All rights reserved.
5  *
6  * See LICENSE for the license.
7  *
8  */
9 
10 #include <config.h>
11 #include <assert.h>
12 #include <errno.h>
13 #include <fcntl.h>
14 #include <unistd.h>
15 #include <stdlib.h>
16 #include "xfrd-tcp.h"
17 #include "buffer.h"
18 #include "packet.h"
19 #include "dname.h"
20 #include "options.h"
21 #include "namedb.h"
22 #include "xfrd.h"
23 #include "util.h"
24 
25 xfrd_tcp_set_t* xfrd_tcp_set_create(struct region* region)
26 {
27 	int i;
28 	xfrd_tcp_set_t* tcp_set = region_alloc(region, sizeof(xfrd_tcp_set_t));
29 	memset(tcp_set, 0, sizeof(xfrd_tcp_set_t));
30 	tcp_set->tcp_count = 0;
31 	tcp_set->tcp_waiting_first = 0;
32 	tcp_set->tcp_waiting_last = 0;
33 	for(i=0; i<XFRD_MAX_TCP; i++)
34 		tcp_set->tcp_state[i] = xfrd_tcp_create(region);
35 	return tcp_set;
36 }
37 
38 void
39 xfrd_setup_packet(buffer_type* packet,
40 	uint16_t type, uint16_t klass, const dname_type* dname)
41 {
42 	/* Set up the header */
43 	buffer_clear(packet);
44 	ID_SET(packet, qid_generate());
45 	FLAGS_SET(packet, 0);
46 	OPCODE_SET(packet, OPCODE_QUERY);
47 	QDCOUNT_SET(packet, 1);
48 	ANCOUNT_SET(packet, 0);
49 	NSCOUNT_SET(packet, 0);
50 	ARCOUNT_SET(packet, 0);
51 	buffer_skip(packet, QHEADERSZ);
52 
53 	/* The question record. */
54 	buffer_write(packet, dname_name(dname), dname->name_size);
55 	buffer_write_u16(packet, type);
56 	buffer_write_u16(packet, klass);
57 }
58 
59 static socklen_t
60 #ifdef INET6
61 xfrd_acl_sockaddr(acl_options_t* acl, unsigned int port,
62 	struct sockaddr_storage *sck)
63 #else
64 xfrd_acl_sockaddr(acl_options_t* acl, unsigned int port,
65 	struct sockaddr_in *sck, const char* fromto)
66 #endif /* INET6 */
67 {
68 	/* setup address structure */
69 #ifdef INET6
70 	memset(sck, 0, sizeof(struct sockaddr_storage));
71 #else
72 	memset(sck, 0, sizeof(struct sockaddr_in));
73 #endif
74 	if(acl->is_ipv6) {
75 #ifdef INET6
76 		struct sockaddr_in6* sa = (struct sockaddr_in6*)sck;
77 		sa->sin6_family = AF_INET6;
78 		sa->sin6_port = htons(port);
79 		sa->sin6_addr = acl->addr.addr6;
80 		return sizeof(struct sockaddr_in6);
81 #else
82 		log_msg(LOG_ERR, "xfrd: IPv6 connection %s %s attempted but no \
83 INET6.", fromto, acl->ip_address_spec);
84 		return 0;
85 #endif
86 	} else {
87 		struct sockaddr_in* sa = (struct sockaddr_in*)sck;
88 		sa->sin_family = AF_INET;
89 		sa->sin_port = htons(port);
90 		sa->sin_addr = acl->addr.addr;
91 		return sizeof(struct sockaddr_in);
92 	}
93 }
94 
95 socklen_t
96 #ifdef INET6
97 xfrd_acl_sockaddr_to(acl_options_t* acl, struct sockaddr_storage *to)
98 #else
99 xfrd_acl_sockaddr_to(acl_options_t* acl, struct sockaddr_in *to)
100 #endif /* INET6 */
101 {
102 	unsigned int port = acl->port?acl->port:(unsigned)atoi(TCP_PORT);
103 #ifdef INET6
104 	return xfrd_acl_sockaddr(acl, port, to);
105 #else
106 	return xfrd_acl_sockaddr(acl, port, to, "to");
107 #endif /* INET6 */
108 }
109 
110 socklen_t
111 #ifdef INET6
112 xfrd_acl_sockaddr_frm(acl_options_t* acl, struct sockaddr_storage *frm)
113 #else
114 xfrd_acl_sockaddr_frm(acl_options_t* acl, struct sockaddr_in *frm)
115 #endif /* INET6 */
116 {
117 	unsigned int port = acl->port?acl->port:0;
118 #ifdef INET6
119 	return xfrd_acl_sockaddr(acl, port, frm);
120 #else
121 	return xfrd_acl_sockaddr(acl, port, frm, "from");
122 #endif /* INET6 */
123 }
124 
125 void
126 xfrd_write_soa_buffer(struct buffer* packet,
127 	const dname_type* apex, struct xfrd_soa* soa)
128 {
129 	size_t rdlength_pos;
130 	uint16_t rdlength;
131 	buffer_write(packet, dname_name(apex), apex->name_size);
132 
133 	/* already in network order */
134 	buffer_write(packet, &soa->type, sizeof(soa->type));
135 	buffer_write(packet, &soa->klass, sizeof(soa->klass));
136 	buffer_write(packet, &soa->ttl, sizeof(soa->ttl));
137 	rdlength_pos = buffer_position(packet);
138 	buffer_skip(packet, sizeof(rdlength));
139 
140 	/* uncompressed dnames */
141 	buffer_write(packet, soa->prim_ns+1, soa->prim_ns[0]);
142 	buffer_write(packet, soa->email+1, soa->email[0]);
143 
144 	buffer_write(packet, &soa->serial, sizeof(uint32_t));
145 	buffer_write(packet, &soa->refresh, sizeof(uint32_t));
146 	buffer_write(packet, &soa->retry, sizeof(uint32_t));
147 	buffer_write(packet, &soa->expire, sizeof(uint32_t));
148 	buffer_write(packet, &soa->minimum, sizeof(uint32_t));
149 
150 	/* write length of RR */
151 	rdlength = buffer_position(packet) - rdlength_pos - sizeof(rdlength);
152 	buffer_write_u16_at(packet, rdlength_pos, rdlength);
153 }
154 
155 xfrd_tcp_t*
156 xfrd_tcp_create(region_type* region)
157 {
158 	xfrd_tcp_t* tcp_state = (xfrd_tcp_t*)region_alloc(
159 		region, sizeof(xfrd_tcp_t));
160 	memset(tcp_state, 0, sizeof(xfrd_tcp_t));
161 	tcp_state->packet = buffer_create(region, QIOBUFSZ);
162 	tcp_state->fd = -1;
163 
164 	return tcp_state;
165 }
166 
167 void
168 xfrd_tcp_obtain(xfrd_tcp_set_t* set, xfrd_zone_t* zone)
169 {
170 	assert(zone->tcp_conn == -1);
171 	assert(zone->tcp_waiting == 0);
172 
173 	if(set->tcp_count < XFRD_MAX_TCP) {
174 		int i;
175 		assert(!set->tcp_waiting_first);
176 		set->tcp_count ++;
177 		/* find a free tcp_buffer */
178 		for(i=0; i<XFRD_MAX_TCP; i++) {
179 			if(set->tcp_state[i]->fd == -1) {
180 				zone->tcp_conn = i;
181 				break;
182 			}
183 		}
184 
185 		assert(zone->tcp_conn != -1);
186 
187 		zone->tcp_waiting = 0;
188 
189 		/* stop udp use (if any) */
190 		if(zone->zone_handler.fd != -1)
191 			xfrd_udp_release(zone);
192 
193 		if(!xfrd_tcp_open(set, zone))
194 			return;
195 
196 		xfrd_tcp_xfr(set, zone);
197 		return;
198 	}
199 	/* wait, at end of line */
200 	DEBUG(DEBUG_XFRD,2, (LOG_INFO, "xfrd: max number of tcp "
201 		"connections (%d) reached.", XFRD_MAX_TCP));
202 	zone->tcp_waiting_next = 0;
203 	zone->tcp_waiting = 1;
204 	if(!set->tcp_waiting_last) {
205 		set->tcp_waiting_first = zone;
206 		set->tcp_waiting_last = zone;
207 	} else {
208 		set->tcp_waiting_last->tcp_waiting_next = zone;
209 		set->tcp_waiting_last = zone;
210 	}
211 	xfrd_unset_timer(zone);
212 }
213 
214 int
215 xfrd_tcp_open(xfrd_tcp_set_t* set, xfrd_zone_t* zone)
216 {
217 	int fd, family, conn;
218 
219 #ifdef INET6
220 	struct sockaddr_storage to;
221 #else
222 	struct sockaddr_in to;
223 #endif /* INET6 */
224 	socklen_t to_len;
225 
226 	assert(zone->tcp_conn != -1);
227 	DEBUG(DEBUG_XFRD,1, (LOG_INFO, "xfrd: zone %s open tcp conn to %s",
228 		zone->apex_str, zone->master->ip_address_spec));
229 	set->tcp_state[zone->tcp_conn]->is_reading = 0;
230 	set->tcp_state[zone->tcp_conn]->total_bytes = 0;
231 	set->tcp_state[zone->tcp_conn]->msglen = 0;
232 
233 	if(zone->master->is_ipv6) {
234 #ifdef INET6
235 		family = PF_INET6;
236 #else
237 		xfrd_set_refresh_now(zone);
238 		xfrd_tcp_release(set, zone);
239 		return 0;
240 #endif
241 	} else {
242 		family = PF_INET;
243 	}
244 	fd = socket(family, SOCK_STREAM, IPPROTO_TCP);
245 	set->tcp_state[zone->tcp_conn]->fd = fd;
246 	if(fd == -1) {
247 		log_msg(LOG_ERR, "xfrd: %s cannot create tcp socket: %s",
248 			zone->master->ip_address_spec, strerror(errno));
249 		xfrd_set_refresh_now(zone);
250 		xfrd_tcp_release(set, zone);
251 		return 0;
252 	}
253 	if(fcntl(fd, F_SETFL, O_NONBLOCK) == -1) {
254 		log_msg(LOG_ERR, "xfrd: fcntl failed: %s", strerror(errno));
255 		xfrd_set_refresh_now(zone);
256 		xfrd_tcp_release(set, zone);
257 		return 0;
258 	}
259 
260 	to_len = xfrd_acl_sockaddr_to(zone->master, &to);
261 
262 	/* bind it */
263 	if (!xfrd_bind_local_interface(fd,
264 		zone->zone_options->outgoing_interface, zone->master, 1)) {
265 
266 		xfrd_set_refresh_now(zone);
267 		xfrd_tcp_release(set, zone);
268 		return 0;
269         }
270 
271 	conn = connect(fd, (struct sockaddr*)&to, to_len);
272 	if (conn == -1 && errno != EINPROGRESS) {
273 		log_msg(LOG_ERR, "xfrd: connect %s failed: %s",
274 			zone->master->ip_address_spec, strerror(errno));
275 		xfrd_set_refresh_now(zone);
276 		xfrd_tcp_release(set, zone);
277 		return 0;
278 	}
279 
280 	zone->zone_handler.fd = fd;
281 	zone->zone_handler.event_types = NETIO_EVENT_TIMEOUT|NETIO_EVENT_WRITE;
282 	xfrd_set_timer(zone, xfrd_time() + set->tcp_timeout);
283 	return 1;
284 }
285 
286 void
287 xfrd_tcp_xfr(xfrd_tcp_set_t* set, xfrd_zone_t* zone)
288 {
289 	xfrd_tcp_t* tcp = set->tcp_state[zone->tcp_conn];
290 	assert(zone->tcp_conn != -1);
291 	assert(zone->tcp_waiting == 0);
292 	/* start AXFR or IXFR for the zone */
293 	if(zone->soa_disk_acquired == 0 || zone->master->use_axfr_only ||
294 						zone->master->ixfr_disabled) {
295 		DEBUG(DEBUG_XFRD,1, (LOG_INFO, "request full zone transfer "
296 						"(AXFR) for %s to %s",
297 			zone->apex_str, zone->master->ip_address_spec));
298 
299 		xfrd_setup_packet(tcp->packet, TYPE_AXFR, CLASS_IN, zone->apex);
300 	} else {
301 		DEBUG(DEBUG_XFRD,1, (LOG_INFO, "request incremental zone "
302 						"transfer (IXFR) for %s to %s",
303 			zone->apex_str, zone->master->ip_address_spec));
304 
305 		xfrd_setup_packet(tcp->packet, TYPE_IXFR, CLASS_IN, zone->apex);
306         	NSCOUNT_SET(tcp->packet, 1);
307 		xfrd_write_soa_buffer(tcp->packet, zone->apex, &zone->soa_disk);
308 	}
309 	zone->query_id = ID(tcp->packet);
310 	zone->msg_seq_nr = 0;
311 	zone->msg_rr_count = 0;
312 	if(zone->master->key_options && zone->master->key_options->tsig_key) {
313 		xfrd_tsig_sign_request(tcp->packet, &zone->tsig, zone->master);
314 	}
315 	buffer_flip(tcp->packet);
316 	DEBUG(DEBUG_XFRD,1, (LOG_INFO, "sent tcp query with ID %d", zone->query_id));
317 	tcp->msglen = buffer_limit(tcp->packet);
318 	/* wait for select to complete connect before write */
319 }
320 
321 static void
322 tcp_conn_ready_for_reading(xfrd_tcp_t* tcp)
323 {
324 	tcp->total_bytes = 0;
325 	tcp->msglen = 0;
326 	buffer_clear(tcp->packet);
327 }
328 
329 int conn_write(xfrd_tcp_t* tcp)
330 {
331 	ssize_t sent;
332 
333 	if(tcp->total_bytes < sizeof(tcp->msglen)) {
334 		uint16_t sendlen = htons(tcp->msglen);
335 		sent = write(tcp->fd,
336 			(const char*)&sendlen + tcp->total_bytes,
337 			sizeof(tcp->msglen) - tcp->total_bytes);
338 
339 		if(sent == -1) {
340 			if(errno == EAGAIN || errno == EINTR) {
341 				/* write would block, try later */
342 				return 0;
343 			} else {
344 				return -1;
345 			}
346 		}
347 
348 		tcp->total_bytes += sent;
349 		if(tcp->total_bytes < sizeof(tcp->msglen)) {
350 			/* incomplete write, resume later */
351 			return 0;
352 		}
353 		assert(tcp->total_bytes == sizeof(tcp->msglen));
354 	}
355 
356 	assert(tcp->total_bytes < tcp->msglen + sizeof(tcp->msglen));
357 
358 	sent = write(tcp->fd,
359 		buffer_current(tcp->packet),
360 		buffer_remaining(tcp->packet));
361 	if(sent == -1) {
362 		if(errno == EAGAIN || errno == EINTR) {
363 			/* write would block, try later */
364 			return 0;
365 		} else {
366 			return -1;
367 		}
368 	}
369 
370 	buffer_skip(tcp->packet, sent);
371 	tcp->total_bytes += sent;
372 
373 	if(tcp->total_bytes < tcp->msglen + sizeof(tcp->msglen)) {
374 		/* more to write when socket becomes writable again */
375 		return 0;
376 	}
377 
378 	assert(tcp->total_bytes == tcp->msglen + sizeof(tcp->msglen));
379 	return 1;
380 }
381 
382 void
383 xfrd_tcp_write(xfrd_tcp_set_t* set, xfrd_zone_t* zone)
384 {
385 	int ret;
386 	xfrd_tcp_t* tcp = set->tcp_state[zone->tcp_conn];
387 	assert(zone->tcp_conn != -1);
388 	if(tcp->total_bytes == 0) {
389 		/* check for pending error from nonblocking connect */
390 		/* from Stevens, unix network programming, vol1, 3rd ed, p450 */
391 		int error = 0;
392 		socklen_t len = sizeof(error);
393 		if(getsockopt(tcp->fd, SOL_SOCKET, SO_ERROR, &error, &len) < 0){
394 			error = errno; /* on solaris errno is error */
395 		}
396 		if(error == EINPROGRESS || error == EWOULDBLOCK)
397 			return; /* try again later */
398 		if(error != 0) {
399 			log_msg(LOG_ERR, "Could not tcp connect to %s: %s",
400 				zone->master->ip_address_spec, strerror(error));
401 			xfrd_set_refresh_now(zone);
402 			xfrd_tcp_release(set, zone);
403 			return;
404 		}
405 	}
406 	ret = conn_write(tcp);
407 	if(ret == -1) {
408 		log_msg(LOG_ERR, "xfrd: failed writing tcp %s", strerror(errno));
409 		xfrd_set_refresh_now(zone);
410 		xfrd_tcp_release(set, zone);
411 		return;
412 	}
413 	if(ret == 0) {
414 		return; /* write again later */
415 	}
416 	/* done writing, get ready for reading */
417 	tcp->is_reading = 1;
418 	tcp_conn_ready_for_reading(tcp);
419 	zone->zone_handler.event_types = NETIO_EVENT_READ|NETIO_EVENT_TIMEOUT;
420 	xfrd_tcp_read(set, zone);
421 }
422 
423 int
424 conn_read(xfrd_tcp_t* tcp)
425 {
426 	ssize_t received;
427 	/* receive leading packet length bytes */
428 	if(tcp->total_bytes < sizeof(tcp->msglen)) {
429 		received = read(tcp->fd,
430 			(char*) &tcp->msglen + tcp->total_bytes,
431 			sizeof(tcp->msglen) - tcp->total_bytes);
432 		if(received == -1) {
433 			if(errno == EAGAIN || errno == EINTR) {
434 				/* read would block, try later */
435 				return 0;
436 			} else {
437 #ifdef ECONNRESET
438 				if (verbosity >= 2 || errno != ECONNRESET)
439 #endif /* ECONNRESET */
440 				log_msg(LOG_ERR, "tcp read sz: %s", strerror(errno));
441 				return -1;
442 			}
443 		} else if(received == 0) {
444 			/* EOF */
445 			return -1;
446 		}
447 		tcp->total_bytes += received;
448 		if(tcp->total_bytes < sizeof(tcp->msglen)) {
449 			/* not complete yet, try later */
450 			return 0;
451 		}
452 
453 		assert(tcp->total_bytes == sizeof(tcp->msglen));
454 		tcp->msglen = ntohs(tcp->msglen);
455 
456 		if(tcp->msglen > buffer_capacity(tcp->packet)) {
457 			log_msg(LOG_ERR, "buffer too small, dropping connection");
458 			return 0;
459 		}
460 		buffer_set_limit(tcp->packet, tcp->msglen);
461 	}
462 
463 	assert(buffer_remaining(tcp->packet) > 0);
464 
465 	received = read(tcp->fd, buffer_current(tcp->packet),
466 		buffer_remaining(tcp->packet));
467 	if(received == -1) {
468 		if(errno == EAGAIN || errno == EINTR) {
469 			/* read would block, try later */
470 			return 0;
471 		} else {
472 #ifdef ECONNRESET
473 			if (verbosity >= 2 || errno != ECONNRESET)
474 #endif /* ECONNRESET */
475 			log_msg(LOG_ERR, "tcp read %s", strerror(errno));
476 			return -1;
477 		}
478 	} else if(received == 0) {
479 		/* EOF */
480 		return -1;
481 	}
482 
483 	tcp->total_bytes += received;
484 	buffer_skip(tcp->packet, received);
485 
486 	if(buffer_remaining(tcp->packet) > 0) {
487 		/* not complete yet, wait for more */
488 		return 0;
489 	}
490 
491 	/* completed */
492 	assert(buffer_position(tcp->packet) == tcp->msglen);
493 	return 1;
494 }
495 
496 void
497 xfrd_tcp_read(xfrd_tcp_set_t* set, xfrd_zone_t* zone)
498 {
499 	xfrd_tcp_t* tcp = set->tcp_state[zone->tcp_conn];
500 	int ret;
501 
502 	assert(zone->tcp_conn != -1);
503 	ret = conn_read(tcp);
504 	if(ret == -1) {
505 		xfrd_set_refresh_now(zone);
506 		xfrd_tcp_release(set, zone);
507 		return;
508 	}
509 	if(ret == 0)
510 		return;
511 
512 	/* completed msg */
513 	buffer_flip(tcp->packet);
514 	switch(xfrd_handle_received_xfr_packet(zone, tcp->packet)) {
515 		case xfrd_packet_more:
516 			tcp_conn_ready_for_reading(tcp);
517 			break;
518 		case xfrd_packet_transfer:
519 		case xfrd_packet_newlease:
520 			xfrd_tcp_release(set, zone);
521 			assert(zone->round_num == -1);
522 			break;
523 		case xfrd_packet_notimpl:
524 			zone->master->ixfr_disabled = time(NULL);
525 			xfrd_tcp_release(set, zone);
526 			/* query next server */
527 			xfrd_make_request(zone);
528 			break;
529 		case xfrd_packet_bad:
530 		case xfrd_packet_tcp:
531 		default:
532 			xfrd_tcp_release(set, zone);
533 			/* query next server */
534 			xfrd_make_request(zone);
535 			break;
536 	}
537 }
538 
539 void
540 xfrd_tcp_release(xfrd_tcp_set_t* set, xfrd_zone_t* zone)
541 {
542 	int conn = zone->tcp_conn;
543 	DEBUG(DEBUG_XFRD,1, (LOG_INFO, "xfrd: zone %s released tcp conn to %s",
544 		zone->apex_str, zone->master->ip_address_spec));
545 	assert(zone->tcp_conn != -1);
546 	assert(zone->tcp_waiting == 0);
547 	zone->tcp_conn = -1;
548 	zone->tcp_waiting = 0;
549 	zone->zone_handler.fd = -1;
550 	zone->zone_handler.event_types = NETIO_EVENT_READ|NETIO_EVENT_TIMEOUT;
551 
552 	if(set->tcp_state[conn]->fd != -1)
553 		close(set->tcp_state[conn]->fd);
554 
555 	set->tcp_state[conn]->fd = -1;
556 
557 	if(set->tcp_count == XFRD_MAX_TCP && set->tcp_waiting_first) {
558 		/* pop first waiting process */
559 		zone = set->tcp_waiting_first;
560 		if(set->tcp_waiting_last == zone)
561 			set->tcp_waiting_last = 0;
562 
563 		set->tcp_waiting_first = zone->tcp_waiting_next;
564 		zone->tcp_waiting_next = 0;
565 		/* start it */
566 		assert(zone->tcp_conn == -1);
567 		zone->tcp_conn = conn;
568 		zone->tcp_waiting = 0;
569 		/* stop udp (if any) */
570 		if(zone->zone_handler.fd != -1)
571 			xfrd_udp_release(zone);
572 
573 		if(!xfrd_tcp_open(set, zone))
574 			return;
575 
576 		xfrd_tcp_xfr(set, zone);
577 	}
578 	else {
579 		assert(!set->tcp_waiting_first);
580 		set->tcp_count --;
581 		assert(set->tcp_count >= 0);
582 	}
583 }
584