xref: /openbsd/usr.sbin/nsd/xfrd-tcp.c (revision bf87c3c0)
1 /*
2  * xfrd-tcp.c - XFR (transfer) Daemon TCP system source file. Manages tcp conn.
3  *
4  * Copyright (c) 2001-2006, NLnet Labs. All rights reserved.
5  *
6  * See LICENSE for the license.
7  *
8  */
9 
10 #include "config.h"
11 #include <assert.h>
12 #include <errno.h>
13 #include <fcntl.h>
14 #include <unistd.h>
15 #include <stdlib.h>
16 #include <sys/uio.h>
17 #include "nsd.h"
18 #include "xfrd-tcp.h"
19 #include "buffer.h"
20 #include "packet.h"
21 #include "dname.h"
22 #include "options.h"
23 #include "namedb.h"
24 #include "xfrd.h"
25 #include "xfrd-disk.h"
26 #include "util.h"
27 #ifdef HAVE_TLS_1_3
28 #include <openssl/ssl.h>
29 #include <openssl/err.h>
30 #endif
31 
32 #ifdef HAVE_TLS_1_3
33 void log_crypto_err(const char* str); /* in server.c */
34 
35 static SSL_CTX*
create_ssl_context()36 create_ssl_context()
37 {
38 	SSL_CTX *ctx;
39 	unsigned char protos[] = { 3, 'd', 'o', 't' };
40 	ctx = SSL_CTX_new(TLS_client_method());
41 	if (!ctx) {
42 		log_msg(LOG_ERR, "xfrd tls: Unable to create SSL ctxt");
43 	}
44 	else if (SSL_CTX_set_default_verify_paths(ctx) != 1) {
45 		SSL_CTX_free(ctx);
46 		log_msg(LOG_ERR, "xfrd tls: Unable to set default SSL verify paths");
47 		return NULL;
48 	}
49 	/* Only trust 1.3 as per the specification */
50 	else if (!SSL_CTX_set_min_proto_version(ctx, TLS1_3_VERSION)) {
51 		SSL_CTX_free(ctx);
52 		log_msg(LOG_ERR, "xfrd tls: Unable to set minimum TLS version 1.3");
53 		return NULL;
54 	}
55 
56 	if (SSL_CTX_set_alpn_protos(ctx, protos, sizeof(protos)) != 0) {
57 		SSL_CTX_free(ctx);
58 		log_msg(LOG_ERR, "xfrd tls: Unable to set ALPN protocols");
59 		return NULL;
60 	}
61 	return ctx;
62 }
63 
64 static int
tls_verify_callback(int preverify_ok,X509_STORE_CTX * ctx)65 tls_verify_callback(int preverify_ok, X509_STORE_CTX *ctx)
66 {
67 	int err = X509_STORE_CTX_get_error(ctx);
68 	int depth = X509_STORE_CTX_get_error_depth(ctx);
69 
70 	// report the specific cert error here - will need custom verify code if
71 	// SPKI pins are supported
72 	if (!preverify_ok)
73 		log_msg(LOG_ERR, "xfrd tls: TLS verify failed - (%d) depth: %d error: %s",
74 				err,
75 				depth,
76 				X509_verify_cert_error_string(err));
77 	return preverify_ok;
78 }
79 
80 static int
setup_ssl(struct xfrd_tcp_pipeline * tp,struct xfrd_tcp_set * tcp_set,const char * auth_domain_name)81 setup_ssl(struct xfrd_tcp_pipeline* tp, struct xfrd_tcp_set* tcp_set,
82 		  const char* auth_domain_name)
83 {
84 	if (!tcp_set->ssl_ctx) {
85 		log_msg(LOG_ERR, "xfrd tls: No TLS CTX, cannot set up XFR-over-TLS");
86 		return 0;
87 	}
88 	DEBUG(DEBUG_XFRD,1, (LOG_INFO, "xfrd: setting up TLS for tls_auth domain name %s",
89 						 auth_domain_name));
90 	tp->ssl = SSL_new((SSL_CTX*)tcp_set->ssl_ctx);
91 	if(!tp->ssl) {
92 		log_msg(LOG_ERR, "xfrd tls: Unable to create TLS object");
93 		return 0;
94 	}
95 	SSL_set_connect_state(tp->ssl);
96 	(void)SSL_set_mode(tp->ssl, SSL_MODE_AUTO_RETRY);
97 	if(!SSL_set_fd(tp->ssl, tp->tcp_w->fd)) {
98 		log_msg(LOG_ERR, "xfrd tls: Unable to set TLS fd");
99 		SSL_free(tp->ssl);
100 		tp->ssl = NULL;
101 		return 0;
102 	}
103 
104 	SSL_set_verify(tp->ssl, SSL_VERIFY_PEER, tls_verify_callback);
105 	if(!SSL_set1_host(tp->ssl, auth_domain_name)) {
106 		log_msg(LOG_ERR, "xfrd tls: TLS setting of hostname %s failed",
107 		auth_domain_name);
108 		SSL_free(tp->ssl);
109 		tp->ssl = NULL;
110 		return 0;
111 	}
112 	return 1;
113 }
114 
115 static int
ssl_handshake(struct xfrd_tcp_pipeline * tp)116 ssl_handshake(struct xfrd_tcp_pipeline* tp)
117 {
118 	int ret;
119 
120 	ERR_clear_error();
121 	ret = SSL_do_handshake(tp->ssl);
122 	if(ret == 1) {
123 		DEBUG(DEBUG_XFRD, 1, (LOG_INFO, "xfrd: TLS handshake successful"));
124 		tp->handshake_done = 1;
125 		return 1;
126 	}
127 	tp->handshake_want = SSL_get_error(tp->ssl, ret);
128 	if(tp->handshake_want == SSL_ERROR_WANT_READ
129 	|| tp->handshake_want == SSL_ERROR_WANT_WRITE)
130 		return 1;
131 
132 	return 0;
133 }
134 
password_cb(char * buf,int size,int ATTR_UNUSED (rwflag),void * u)135 int password_cb(char *buf, int size, int ATTR_UNUSED(rwflag), void *u)
136 {
137 	strlcpy(buf, (char*)u, size);
138 	return strlen(buf);
139 }
140 
141 #endif
142 
143 /* sort tcppipe, first on IP address, for an IPaddresss, sort on num_unused */
144 static int
xfrd_pipe_cmp(const void * a,const void * b)145 xfrd_pipe_cmp(const void* a, const void* b)
146 {
147 	const struct xfrd_tcp_pipeline* x = (struct xfrd_tcp_pipeline*)a;
148 	const struct xfrd_tcp_pipeline* y = (struct xfrd_tcp_pipeline*)b;
149 	int r;
150 	if(x == y)
151 		return 0;
152 	if(y->key.ip_len != x->key.ip_len)
153 		/* subtraction works because nonnegative and small numbers */
154 		return (int)y->key.ip_len - (int)x->key.ip_len;
155 	r = memcmp(&x->key.ip, &y->key.ip, x->key.ip_len);
156 	if(r != 0)
157 		return r;
158 	/* sort that num_unused is sorted ascending, */
159 	if(x->key.num_unused != y->key.num_unused) {
160 		return (x->key.num_unused < y->key.num_unused) ? -1 : 1;
161 	}
162 	/* different pipelines are different still, even with same numunused*/
163 	return (uintptr_t)x < (uintptr_t)y ? -1 : 1;
164 }
165 
xfrd_tcp_set_create(struct region * region,const char * tls_cert_bundle,int tcp_max,int tcp_pipeline)166 struct xfrd_tcp_set* xfrd_tcp_set_create(struct region* region, const char *tls_cert_bundle, int tcp_max, int tcp_pipeline)
167 {
168 	int i;
169 	struct xfrd_tcp_set* tcp_set = region_alloc(region,
170 		sizeof(struct xfrd_tcp_set));
171 	memset(tcp_set, 0, sizeof(struct xfrd_tcp_set));
172 	tcp_set->tcp_state = NULL;
173 	tcp_set->tcp_max = tcp_max;
174 	tcp_set->tcp_pipeline = tcp_pipeline;
175 	tcp_set->tcp_count = 0;
176 	tcp_set->tcp_waiting_first = 0;
177 	tcp_set->tcp_waiting_last = 0;
178 #ifdef HAVE_TLS_1_3
179 	/* Set up SSL context */
180 	tcp_set->ssl_ctx = create_ssl_context();
181 	if (tcp_set->ssl_ctx == NULL)
182 		log_msg(LOG_ERR, "xfrd: XFR-over-TLS not available");
183 
184 	else if (tls_cert_bundle && tls_cert_bundle[0] && SSL_CTX_load_verify_locations(
185 				tcp_set->ssl_ctx, tls_cert_bundle, NULL) != 1) {
186 		log_msg(LOG_ERR, "xfrd tls: Unable to set the certificate bundle file %s",
187 				tls_cert_bundle);
188 	}
189 #else
190 	(void)tls_cert_bundle;
191 	log_msg(LOG_INFO, "xfrd: No TLS 1.3 support - XFR-over-TLS not available");
192 #endif
193 	tcp_set->tcp_state = region_alloc(region,
194 		sizeof(*tcp_set->tcp_state)*tcp_set->tcp_max);
195 	for(i=0; i<tcp_set->tcp_max; i++)
196 		tcp_set->tcp_state[i] = xfrd_tcp_pipeline_create(region,
197 			tcp_pipeline);
198 	tcp_set->pipetree = rbtree_create(region, &xfrd_pipe_cmp);
199 	return tcp_set;
200 }
201 
pipeline_id_compare(const void * x,const void * y)202 static int pipeline_id_compare(const void* x, const void* y)
203 {
204 	struct xfrd_tcp_pipeline_id* a = (struct xfrd_tcp_pipeline_id*)x;
205 	struct xfrd_tcp_pipeline_id* b = (struct xfrd_tcp_pipeline_id*)y;
206 	if(a->id < b->id)
207 		return -1;
208 	if(a->id > b->id)
209 		return 1;
210 	return 0;
211 }
212 
pick_id_values(uint16_t * array,int num,int max)213 void pick_id_values(uint16_t* array, int num, int max)
214 {
215 	uint8_t inserted[65536];
216 	int j, done;
217 	if(num == 65536) {
218 		/* all of them, loop and insert */
219 		int i;
220 		for(i=0; i<num; i++)
221 			array[i] = (uint16_t)i;
222 		return;
223 	}
224 	assert(max <= 65536);
225 	/* This uses the Robert Floyd sampling algorithm */
226 	/* keep track if values are already inserted, using the bitmap
227 	 * in insert array */
228 	memset(inserted, 0, sizeof(inserted[0])*max);
229 	done=0;
230 	for(j = max-num; j<max; j++) {
231 		/* random generate creates from 0..arg-1 */
232 		int t;
233 		if(j+1 <= 1)
234 			t = 0;
235 		else	t = random_generate(j+1);
236 		if(!inserted[t]) {
237 			array[done++]=t;
238 			inserted[t] = 1;
239 		} else {
240 			array[done++]=j;
241 			inserted[j] = 1;
242 		}
243 	}
244 }
245 
246 static void
clear_pipeline_entry(struct xfrd_tcp_pipeline * tp,rbnode_type * node)247 clear_pipeline_entry(struct xfrd_tcp_pipeline* tp, rbnode_type* node)
248 {
249 	struct xfrd_tcp_pipeline_id *n;
250 	if(node == NULL || node == RBTREE_NULL)
251 		return;
252 	clear_pipeline_entry(tp, node->left);
253 	node->left = NULL;
254 	clear_pipeline_entry(tp, node->right);
255 	node->right = NULL;
256 	/* move the node into the free list */
257 	n = (struct xfrd_tcp_pipeline_id*)node;
258 	n->next_free = tp->pipe_id_free_list;
259 	tp->pipe_id_free_list = n;
260 }
261 
262 static void
xfrd_tcp_pipeline_cleanup(struct xfrd_tcp_pipeline * tp)263 xfrd_tcp_pipeline_cleanup(struct xfrd_tcp_pipeline* tp)
264 {
265 	/* move entries into free list */
266 	clear_pipeline_entry(tp, tp->zone_per_id->root);
267 	/* clear the tree */
268 	tp->zone_per_id->count = 0;
269 	tp->zone_per_id->root = RBTREE_NULL;
270 }
271 
272 static void
xfrd_tcp_pipeline_init(struct xfrd_tcp_pipeline * tp)273 xfrd_tcp_pipeline_init(struct xfrd_tcp_pipeline* tp)
274 {
275 	tp->key.node.key = tp;
276 	tp->key.num_unused = tp->pipe_num;
277 	tp->key.num_skip = 0;
278 	tp->tcp_send_first = NULL;
279 	tp->tcp_send_last = NULL;
280 	xfrd_tcp_pipeline_cleanup(tp);
281 	pick_id_values(tp->unused, tp->pipe_num, 65536);
282 }
283 
284 struct xfrd_tcp_pipeline*
xfrd_tcp_pipeline_create(region_type * region,int tcp_pipeline)285 xfrd_tcp_pipeline_create(region_type* region, int tcp_pipeline)
286 {
287 	int i;
288 	struct xfrd_tcp_pipeline* tp = (struct xfrd_tcp_pipeline*)
289 		region_alloc_zero(region, sizeof(*tp));
290 	if(tcp_pipeline < 0)
291 		tcp_pipeline = 0;
292 	if(tcp_pipeline > 65536)
293 		tcp_pipeline = 65536; /* max 16 bit ID numbers */
294 	tp->pipe_num = tcp_pipeline;
295 	tp->key.num_unused = tp->pipe_num;
296 	tp->zone_per_id = rbtree_create(region, &pipeline_id_compare);
297 	tp->pipe_id_free_list = NULL;
298 	for(i=0; i<tp->pipe_num; i++) {
299 		struct xfrd_tcp_pipeline_id* n = (struct xfrd_tcp_pipeline_id*)
300 			region_alloc_zero(region, sizeof(*n));
301 		n->next_free = tp->pipe_id_free_list;
302 		tp->pipe_id_free_list = n;
303 	}
304 	tp->unused = (uint16_t*)region_alloc_zero(region,
305 		sizeof(tp->unused[0])*tp->pipe_num);
306 	tp->tcp_r = xfrd_tcp_create(region, QIOBUFSZ);
307 	tp->tcp_w = xfrd_tcp_create(region, 512);
308 	xfrd_tcp_pipeline_init(tp);
309 	return tp;
310 }
311 
312 static struct xfrd_zone*
xfrd_tcp_pipeline_lookup_id(struct xfrd_tcp_pipeline * tp,uint16_t id)313 xfrd_tcp_pipeline_lookup_id(struct xfrd_tcp_pipeline* tp, uint16_t id)
314 {
315 	struct xfrd_tcp_pipeline_id key;
316 	rbnode_type* n;
317 	memset(&key, 0, sizeof(key));
318 	key.node.key = &key;
319 	key.id = id;
320 	n = rbtree_search(tp->zone_per_id, &key);
321 	if(n && n != RBTREE_NULL) {
322 		return ((struct xfrd_tcp_pipeline_id*)n)->zone;
323 	}
324 	return NULL;
325 }
326 
327 static void
xfrd_tcp_pipeline_insert_id(struct xfrd_tcp_pipeline * tp,uint16_t id,struct xfrd_zone * zone)328 xfrd_tcp_pipeline_insert_id(struct xfrd_tcp_pipeline* tp, uint16_t id,
329 	struct xfrd_zone* zone)
330 {
331 	struct xfrd_tcp_pipeline_id* n;
332 	/* because there are tp->pipe_num preallocated entries, and we have
333 	 * only tp->pipe_num id values, the list cannot be empty now. */
334 	assert(tp->pipe_id_free_list != NULL);
335 	/* pick up next free xfrd_tcp_pipeline_id node */
336 	n = tp->pipe_id_free_list;
337 	tp->pipe_id_free_list = n->next_free;
338 	n->next_free = NULL;
339 	memset(&n->node, 0, sizeof(n->node));
340 	n->node.key = n;
341 	n->id = id;
342 	n->zone = zone;
343 	rbtree_insert(tp->zone_per_id, &n->node);
344 }
345 
346 static void
xfrd_tcp_pipeline_remove_id(struct xfrd_tcp_pipeline * tp,uint16_t id)347 xfrd_tcp_pipeline_remove_id(struct xfrd_tcp_pipeline* tp, uint16_t id)
348 {
349 	struct xfrd_tcp_pipeline_id key;
350 	rbnode_type* node;
351 	memset(&key, 0, sizeof(key));
352 	key.node.key = &key;
353 	key.id = id;
354 	node = rbtree_delete(tp->zone_per_id, &key);
355 	if(node && node != RBTREE_NULL) {
356 		struct xfrd_tcp_pipeline_id* n =
357 			(struct xfrd_tcp_pipeline_id*)node;
358 		n->next_free = tp->pipe_id_free_list;
359 		tp->pipe_id_free_list = n;
360 	}
361 }
362 
363 static void
xfrd_tcp_pipeline_skip_id(struct xfrd_tcp_pipeline * tp,uint16_t id)364 xfrd_tcp_pipeline_skip_id(struct xfrd_tcp_pipeline* tp, uint16_t id)
365 {
366 	struct xfrd_tcp_pipeline_id key;
367 	rbnode_type* n;
368 	memset(&key, 0, sizeof(key));
369 	key.node.key = &key;
370 	key.id = id;
371 	n = rbtree_search(tp->zone_per_id, &key);
372 	if(n && n != RBTREE_NULL) {
373 		struct xfrd_tcp_pipeline_id* zid = (struct xfrd_tcp_pipeline_id*)n;
374 		zid->zone = TCP_NULL_SKIP;
375 	}
376 }
377 
378 void
xfrd_setup_packet(buffer_type * packet,uint16_t type,uint16_t klass,const dname_type * dname,uint16_t qid)379 xfrd_setup_packet(buffer_type* packet,
380 	uint16_t type, uint16_t klass, const dname_type* dname, uint16_t qid)
381 {
382 	/* Set up the header */
383 	buffer_clear(packet);
384 	ID_SET(packet, qid);
385 	FLAGS_SET(packet, 0);
386 	OPCODE_SET(packet, OPCODE_QUERY);
387 	QDCOUNT_SET(packet, 1);
388 	ANCOUNT_SET(packet, 0);
389 	NSCOUNT_SET(packet, 0);
390 	ARCOUNT_SET(packet, 0);
391 	buffer_skip(packet, QHEADERSZ);
392 
393 	/* The question record. */
394 	buffer_write(packet, dname_name(dname), dname->name_size);
395 	buffer_write_u16(packet, type);
396 	buffer_write_u16(packet, klass);
397 }
398 
399 static socklen_t
400 #ifdef INET6
xfrd_acl_sockaddr(acl_options_type * acl,unsigned int port,struct sockaddr_storage * sck)401 xfrd_acl_sockaddr(acl_options_type* acl, unsigned int port,
402 	struct sockaddr_storage *sck)
403 #else
404 xfrd_acl_sockaddr(acl_options_type* acl, unsigned int port,
405 	struct sockaddr_in *sck, const char* fromto)
406 #endif /* INET6 */
407 {
408 	/* setup address structure */
409 #ifdef INET6
410 	memset(sck, 0, sizeof(struct sockaddr_storage));
411 #else
412 	memset(sck, 0, sizeof(struct sockaddr_in));
413 #endif
414 	if(acl->is_ipv6) {
415 #ifdef INET6
416 		struct sockaddr_in6* sa = (struct sockaddr_in6*)sck;
417 		sa->sin6_family = AF_INET6;
418 		sa->sin6_port = htons(port);
419 		sa->sin6_addr = acl->addr.addr6;
420 		return sizeof(struct sockaddr_in6);
421 #else
422 		log_msg(LOG_ERR, "xfrd: IPv6 connection %s %s attempted but no \
423 INET6.", fromto, acl->ip_address_spec);
424 		return 0;
425 #endif
426 	} else {
427 		struct sockaddr_in* sa = (struct sockaddr_in*)sck;
428 		sa->sin_family = AF_INET;
429 		sa->sin_port = htons(port);
430 		sa->sin_addr = acl->addr.addr;
431 		return sizeof(struct sockaddr_in);
432 	}
433 }
434 
435 socklen_t
436 #ifdef INET6
xfrd_acl_sockaddr_to(acl_options_type * acl,struct sockaddr_storage * to)437 xfrd_acl_sockaddr_to(acl_options_type* acl, struct sockaddr_storage *to)
438 #else
439 xfrd_acl_sockaddr_to(acl_options_type* acl, struct sockaddr_in *to)
440 #endif /* INET6 */
441 {
442 #ifdef HAVE_TLS_1_3
443 	unsigned int port = acl->port?acl->port:(acl->tls_auth_options?
444 						(unsigned)atoi(TLS_PORT):(unsigned)atoi(TCP_PORT));
445 #else
446 	unsigned int port = acl->port?acl->port:(unsigned)atoi(TCP_PORT);
447 #endif
448 #ifdef INET6
449 	return xfrd_acl_sockaddr(acl, port, to);
450 #else
451 	return xfrd_acl_sockaddr(acl, port, to, "to");
452 #endif /* INET6 */
453 }
454 
455 socklen_t
456 #ifdef INET6
xfrd_acl_sockaddr_frm(acl_options_type * acl,struct sockaddr_storage * frm)457 xfrd_acl_sockaddr_frm(acl_options_type* acl, struct sockaddr_storage *frm)
458 #else
459 xfrd_acl_sockaddr_frm(acl_options_type* acl, struct sockaddr_in *frm)
460 #endif /* INET6 */
461 {
462 	unsigned int port = acl->port?acl->port:0;
463 #ifdef INET6
464 	return xfrd_acl_sockaddr(acl, port, frm);
465 #else
466 	return xfrd_acl_sockaddr(acl, port, frm, "from");
467 #endif /* INET6 */
468 }
469 
470 void
xfrd_write_soa_buffer(struct buffer * packet,const dname_type * apex,struct xfrd_soa * soa)471 xfrd_write_soa_buffer(struct buffer* packet,
472 	const dname_type* apex, struct xfrd_soa* soa)
473 {
474 	size_t rdlength_pos;
475 	uint16_t rdlength;
476 	buffer_write(packet, dname_name(apex), apex->name_size);
477 
478 	/* already in network order */
479 	buffer_write(packet, &soa->type, sizeof(soa->type));
480 	buffer_write(packet, &soa->klass, sizeof(soa->klass));
481 	buffer_write(packet, &soa->ttl, sizeof(soa->ttl));
482 	rdlength_pos = buffer_position(packet);
483 	buffer_skip(packet, sizeof(rdlength));
484 
485 	/* uncompressed dnames */
486 	buffer_write(packet, soa->prim_ns+1, soa->prim_ns[0]);
487 	buffer_write(packet, soa->email+1, soa->email[0]);
488 
489 	buffer_write(packet, &soa->serial, sizeof(uint32_t));
490 	buffer_write(packet, &soa->refresh, sizeof(uint32_t));
491 	buffer_write(packet, &soa->retry, sizeof(uint32_t));
492 	buffer_write(packet, &soa->expire, sizeof(uint32_t));
493 	buffer_write(packet, &soa->minimum, sizeof(uint32_t));
494 
495 	/* write length of RR */
496 	rdlength = buffer_position(packet) - rdlength_pos - sizeof(rdlength);
497 	buffer_write_u16_at(packet, rdlength_pos, rdlength);
498 }
499 
500 struct xfrd_tcp*
xfrd_tcp_create(region_type * region,size_t bufsize)501 xfrd_tcp_create(region_type* region, size_t bufsize)
502 {
503 	struct xfrd_tcp* tcp_state = (struct xfrd_tcp*)region_alloc(
504 		region, sizeof(struct xfrd_tcp));
505 	memset(tcp_state, 0, sizeof(struct xfrd_tcp));
506 	tcp_state->packet = buffer_create(region, bufsize);
507 	tcp_state->fd = -1;
508 
509 	return tcp_state;
510 }
511 
512 static struct xfrd_tcp_pipeline*
pipeline_find(struct xfrd_tcp_set * set,xfrd_zone_type * zone)513 pipeline_find(struct xfrd_tcp_set* set, xfrd_zone_type* zone)
514 {
515 	rbnode_type* sme = NULL;
516 	struct xfrd_tcp_pipeline* r;
517 	/* smaller buf than a full pipeline with 64kb ID array, only need
518 	 * the front part with the key info, this front part contains the
519 	 * members that the compare function uses. */
520 	struct xfrd_tcp_pipeline_key k, *key=&k;
521 	key->node.key = key;
522 	key->ip_len = xfrd_acl_sockaddr_to(zone->master, &key->ip);
523 	key->num_unused = set->tcp_pipeline;
524 	/* lookup existing tcp transfer to the master with highest unused */
525 	if(rbtree_find_less_equal(set->pipetree, key, &sme)) {
526 		/* exact match, strange, fully unused tcp cannot be open */
527 		assert(0);
528 	}
529 	if(!sme)
530 		return NULL;
531 	r = (struct xfrd_tcp_pipeline*)sme->key;
532 	/* <= key pointed at, is the master correct ? */
533 	if(r->key.ip_len != key->ip_len)
534 		return NULL;
535 	if(memcmp(&r->key.ip, &key->ip, key->ip_len) != 0)
536 		return NULL;
537 	/* correct master, is there a slot free for this transfer? */
538 	if(r->key.num_unused == 0)
539 		return NULL;
540 	return r;
541 }
542 
543 /* remove zone from tcp waiting list */
544 static void
tcp_zone_waiting_list_popfirst(struct xfrd_tcp_set * set,xfrd_zone_type * zone)545 tcp_zone_waiting_list_popfirst(struct xfrd_tcp_set* set, xfrd_zone_type* zone)
546 {
547 	assert(zone->tcp_waiting);
548 	set->tcp_waiting_first = zone->tcp_waiting_next;
549 	if(zone->tcp_waiting_next)
550 		zone->tcp_waiting_next->tcp_waiting_prev = NULL;
551 	else	set->tcp_waiting_last = 0;
552 	zone->tcp_waiting_next = 0;
553 	zone->tcp_waiting = 0;
554 }
555 
556 /* remove zone from tcp pipe write-wait list */
557 static void
tcp_pipe_sendlist_remove(struct xfrd_tcp_pipeline * tp,xfrd_zone_type * zone)558 tcp_pipe_sendlist_remove(struct xfrd_tcp_pipeline* tp, xfrd_zone_type* zone)
559 {
560 	if(zone->in_tcp_send) {
561 		if(zone->tcp_send_prev)
562 			zone->tcp_send_prev->tcp_send_next=zone->tcp_send_next;
563 		else	tp->tcp_send_first=zone->tcp_send_next;
564 		if(zone->tcp_send_next)
565 			zone->tcp_send_next->tcp_send_prev=zone->tcp_send_prev;
566 		else	tp->tcp_send_last=zone->tcp_send_prev;
567 		zone->in_tcp_send = 0;
568 	}
569 }
570 
571 /* remove first from write-wait list */
572 static void
tcp_pipe_sendlist_popfirst(struct xfrd_tcp_pipeline * tp,xfrd_zone_type * zone)573 tcp_pipe_sendlist_popfirst(struct xfrd_tcp_pipeline* tp, xfrd_zone_type* zone)
574 {
575 	tp->tcp_send_first = zone->tcp_send_next;
576 	if(tp->tcp_send_first)
577 		tp->tcp_send_first->tcp_send_prev = NULL;
578 	else	tp->tcp_send_last = NULL;
579 	zone->in_tcp_send = 0;
580 }
581 
582 /* remove zone from tcp pipe ID map */
583 static void
tcp_pipe_id_remove(struct xfrd_tcp_pipeline * tp,xfrd_zone_type * zone,int alsotree)584 tcp_pipe_id_remove(struct xfrd_tcp_pipeline* tp, xfrd_zone_type* zone,
585 	int alsotree)
586 {
587 	assert(tp->key.num_unused < tp->pipe_num && tp->key.num_unused >= 0);
588 	if(alsotree)
589 		xfrd_tcp_pipeline_remove_id(tp, zone->query_id);
590 	tp->unused[tp->key.num_unused] = zone->query_id;
591 	/* must remove and re-add for sort order in tree */
592 	(void)rbtree_delete(xfrd->tcp_set->pipetree, &tp->key.node);
593 	tp->key.num_unused++;
594 	(void)rbtree_insert(xfrd->tcp_set->pipetree, &tp->key.node);
595 }
596 
597 /* stop the tcp pipe (and all its zones need to retry) */
598 static void
xfrd_tcp_pipe_stop(struct xfrd_tcp_pipeline * tp)599 xfrd_tcp_pipe_stop(struct xfrd_tcp_pipeline* tp)
600 {
601 	struct xfrd_tcp_pipeline_id* zid;
602 	int conn = -1;
603 	assert(tp->key.num_unused < tp->pipe_num); /* at least one 'in-use' */
604 	assert(tp->pipe_num - tp->key.num_unused > tp->key.num_skip); /* at least one 'nonskip' */
605 	/* need to retry for all the zones connected to it */
606 	/* these could use different lists and go to a different nextmaster*/
607 	RBTREE_FOR(zid, struct xfrd_tcp_pipeline_id*, tp->zone_per_id) {
608 		xfrd_zone_type* zone = zid->zone;
609 		if(zone && zone != TCP_NULL_SKIP) {
610 			assert(zone->query_id == zid->id);
611 			conn = zone->tcp_conn;
612 			zone->tcp_conn = -1;
613 			zone->tcp_waiting = 0;
614 			tcp_pipe_sendlist_remove(tp, zone);
615 			tcp_pipe_id_remove(tp, zone, 0);
616 			xfrd_set_refresh_now(zone);
617 		}
618 	}
619 	xfrd_tcp_pipeline_cleanup(tp);
620 	assert(conn != -1);
621 	/* now release the entire tcp pipe */
622 	xfrd_tcp_pipe_release(xfrd->tcp_set, tp, conn);
623 }
624 
625 static void
tcp_pipe_reset_timeout(struct xfrd_tcp_pipeline * tp)626 tcp_pipe_reset_timeout(struct xfrd_tcp_pipeline* tp)
627 {
628 	int fd = tp->handler.ev_fd;
629 	struct timeval tv;
630 	tv.tv_sec = xfrd->tcp_set->tcp_timeout;
631 	tv.tv_usec = 0;
632 	if(tp->handler_added)
633 		event_del(&tp->handler);
634 	memset(&tp->handler, 0, sizeof(tp->handler));
635 	event_set(&tp->handler, fd, EV_PERSIST|EV_TIMEOUT|EV_READ|
636 #ifdef HAVE_TLS_1_3
637 		( tp->ssl
638 		? ( tp->handshake_done ?  ( tp->tcp_send_first ? EV_WRITE : 0 )
639 		  : tp->handshake_want == SSL_ERROR_WANT_WRITE ? EV_WRITE : 0 )
640 		: tp->tcp_send_first ? EV_WRITE : 0 ),
641 #else
642 		( tp->tcp_send_first ? EV_WRITE : 0 ),
643 #endif
644 		xfrd_handle_tcp_pipe, tp);
645 	if(event_base_set(xfrd->event_base, &tp->handler) != 0)
646 		log_msg(LOG_ERR, "xfrd tcp: event_base_set failed");
647 	if(event_add(&tp->handler, &tv) != 0)
648 		log_msg(LOG_ERR, "xfrd tcp: event_add failed");
649 	tp->handler_added = 1;
650 }
651 
652 /* handle event from fd of tcp pipe */
653 void
xfrd_handle_tcp_pipe(int ATTR_UNUSED (fd),short event,void * arg)654 xfrd_handle_tcp_pipe(int ATTR_UNUSED(fd), short event, void* arg)
655 {
656 	struct xfrd_tcp_pipeline* tp = (struct xfrd_tcp_pipeline*)arg;
657 	if((event & EV_WRITE)) {
658 		tcp_pipe_reset_timeout(tp);
659 		if(tp->tcp_send_first) {
660 			DEBUG(DEBUG_XFRD,1, (LOG_INFO, "xfrd: event tcp write, zone %s",
661 				tp->tcp_send_first->apex_str));
662 			xfrd_tcp_write(tp, tp->tcp_send_first);
663 		}
664 	}
665 	if((event & EV_READ) && tp->handler_added) {
666 		DEBUG(DEBUG_XFRD,1, (LOG_INFO, "xfrd: event tcp read"));
667 		tcp_pipe_reset_timeout(tp);
668 		xfrd_tcp_read(tp);
669 	}
670 	if((event & EV_TIMEOUT) && tp->handler_added) {
671 		/* tcp connection timed out */
672 		DEBUG(DEBUG_XFRD,1, (LOG_INFO, "xfrd: event tcp timeout"));
673 		xfrd_tcp_pipe_stop(tp);
674 	}
675 }
676 
677 /* add a zone to the pipeline, it starts to want to write its query */
678 static void
pipeline_setup_new_zone(struct xfrd_tcp_set * set,struct xfrd_tcp_pipeline * tp,xfrd_zone_type * zone)679 pipeline_setup_new_zone(struct xfrd_tcp_set* set, struct xfrd_tcp_pipeline* tp,
680 	xfrd_zone_type* zone)
681 {
682 	/* assign the ID */
683 	int idx;
684 	assert(tp->key.num_unused > 0);
685 	/* we pick a random ID, even though it is TCP anyway */
686 	idx = random_generate(tp->key.num_unused);
687 	zone->query_id = tp->unused[idx];
688 	tp->unused[idx] = tp->unused[tp->key.num_unused-1];
689 	xfrd_tcp_pipeline_insert_id(tp, zone->query_id, zone);
690 	/* decrement unused counter, and fixup tree */
691 	(void)rbtree_delete(set->pipetree, &tp->key.node);
692 	tp->key.num_unused--;
693 	(void)rbtree_insert(set->pipetree, &tp->key.node);
694 
695 	/* add to sendlist, at end */
696 	zone->tcp_send_next = NULL;
697 	zone->tcp_send_prev = tp->tcp_send_last;
698 	zone->in_tcp_send = 1;
699 	if(tp->tcp_send_last)
700 		tp->tcp_send_last->tcp_send_next = zone;
701 	else	tp->tcp_send_first = zone;
702 	tp->tcp_send_last = zone;
703 
704 	/* is it first in line? */
705 	if(tp->tcp_send_first == zone) {
706 		xfrd_tcp_setup_write_packet(tp, zone);
707 		/* add write to event handler */
708 		tcp_pipe_reset_timeout(tp);
709 	}
710 }
711 
712 void
xfrd_tcp_obtain(struct xfrd_tcp_set * set,xfrd_zone_type * zone)713 xfrd_tcp_obtain(struct xfrd_tcp_set* set, xfrd_zone_type* zone)
714 {
715 	struct xfrd_tcp_pipeline* tp;
716 	assert(zone->tcp_conn == -1);
717 	assert(zone->tcp_waiting == 0);
718 
719 	if(set->tcp_count < set->tcp_max) {
720 		int i;
721 		assert(!set->tcp_waiting_first);
722 		set->tcp_count ++;
723 		/* find a free tcp_buffer */
724 		for(i=0; i<set->tcp_max; i++) {
725 			if(set->tcp_state[i]->tcp_r->fd == -1) {
726 				zone->tcp_conn = i;
727 				break;
728 			}
729 		}
730 		/** What if there is no free tcp_buffer? return; */
731 		if (zone->tcp_conn < 0) {
732 			return;
733 		}
734 
735 		tp = set->tcp_state[zone->tcp_conn];
736 		zone->tcp_waiting = 0;
737 
738 		/* stop udp use (if any) */
739 		if(zone->zone_handler.ev_fd != -1)
740 			xfrd_udp_release(zone);
741 
742 		if(!xfrd_tcp_open(set, tp, zone)) {
743 			zone->tcp_conn = -1;
744 			set->tcp_count --;
745 			xfrd_set_refresh_now(zone);
746 			return;
747 		}
748 		/* ip and ip_len set by tcp_open */
749 		xfrd_tcp_pipeline_init(tp);
750 
751 		/* insert into tree */
752 		(void)rbtree_insert(set->pipetree, &tp->key.node);
753 		xfrd_deactivate_zone(zone);
754 		xfrd_unset_timer(zone);
755 		pipeline_setup_new_zone(set, tp, zone);
756 		return;
757 	}
758 	/* check for a pipeline to the same master with unused ID */
759 	if((tp = pipeline_find(set, zone))!= NULL) {
760 		int i;
761 		if(zone->zone_handler.ev_fd != -1)
762 			xfrd_udp_release(zone);
763 		for(i=0; i<set->tcp_max; i++) {
764 			if(set->tcp_state[i] == tp)
765 				zone->tcp_conn = i;
766 		}
767 		xfrd_deactivate_zone(zone);
768 		xfrd_unset_timer(zone);
769 		pipeline_setup_new_zone(set, tp, zone);
770 		return;
771 	}
772 
773 	/* wait, at end of line */
774 	DEBUG(DEBUG_XFRD,2, (LOG_INFO, "xfrd: max number of tcp "
775 		"connections (%d) reached.", set->tcp_max));
776 	zone->tcp_waiting_next = 0;
777 	zone->tcp_waiting_prev = set->tcp_waiting_last;
778 	zone->tcp_waiting = 1;
779 	if(!set->tcp_waiting_last) {
780 		set->tcp_waiting_first = zone;
781 		set->tcp_waiting_last = zone;
782 	} else {
783 		set->tcp_waiting_last->tcp_waiting_next = zone;
784 		set->tcp_waiting_last = zone;
785 	}
786 	xfrd_deactivate_zone(zone);
787 	xfrd_unset_timer(zone);
788 }
789 
790 int
xfrd_tcp_open(struct xfrd_tcp_set * set,struct xfrd_tcp_pipeline * tp,xfrd_zone_type * zone)791 xfrd_tcp_open(struct xfrd_tcp_set* set, struct xfrd_tcp_pipeline* tp,
792 	xfrd_zone_type* zone)
793 {
794 	int fd, family, conn;
795 	struct timeval tv;
796 	assert(zone->tcp_conn != -1);
797 
798 	/* if there is no next master, fallback to use the first one */
799 	/* but there really should be a master set */
800 	if(!zone->master) {
801 		zone->master = zone->zone_options->pattern->request_xfr;
802 		zone->master_num = 0;
803 	}
804 
805 	DEBUG(DEBUG_XFRD,1, (LOG_INFO, "xfrd: zone %s open tcp conn to %s",
806 		zone->apex_str, zone->master->ip_address_spec));
807 	tp->tcp_r->is_reading = 1;
808 	tp->tcp_r->total_bytes = 0;
809 	tp->tcp_r->msglen = 0;
810 	buffer_clear(tp->tcp_r->packet);
811 	tp->tcp_w->is_reading = 0;
812 	tp->tcp_w->total_bytes = 0;
813 	tp->tcp_w->msglen = 0;
814 	tp->connection_established = 0;
815 
816 	if(zone->master->is_ipv6) {
817 #ifdef INET6
818 		family = PF_INET6;
819 #else
820 		xfrd_set_refresh_now(zone);
821 		return 0;
822 #endif
823 	} else {
824 		family = PF_INET;
825 	}
826 	fd = socket(family, SOCK_STREAM, IPPROTO_TCP);
827 	if(fd == -1) {
828 		/* squelch 'Address family not supported by protocol' at low
829 		 * verbosity levels */
830 		if(errno != EAFNOSUPPORT || verbosity > 2)
831 		    log_msg(LOG_ERR, "xfrd: %s cannot create tcp socket: %s",
832 			zone->master->ip_address_spec, strerror(errno));
833 		xfrd_set_refresh_now(zone);
834 		return 0;
835 	}
836 	if(fcntl(fd, F_SETFL, O_NONBLOCK) == -1) {
837 		log_msg(LOG_ERR, "xfrd: fcntl failed: %s", strerror(errno));
838 		close(fd);
839 		xfrd_set_refresh_now(zone);
840 		return 0;
841 	}
842 
843 	if(xfrd->nsd->outgoing_tcp_mss > 0) {
844 #if defined(IPPROTO_TCP) && defined(TCP_MAXSEG)
845 		if(setsockopt(fd, IPPROTO_TCP, TCP_MAXSEG,
846 			(void*)&xfrd->nsd->outgoing_tcp_mss,
847 			sizeof(xfrd->nsd->outgoing_tcp_mss)) < 0) {
848 			log_msg(LOG_ERR, "xfrd: setsockopt(TCP_MAXSEG)"
849 					"failed: %s", strerror(errno));
850 		}
851 #else
852 		log_msg(LOG_ERR, "setsockopt(TCP_MAXSEG) unsupported");
853 #endif
854 	}
855 
856 	tp->key.ip_len = xfrd_acl_sockaddr_to(zone->master, &tp->key.ip);
857 
858 	/* bind it */
859 	if (!xfrd_bind_local_interface(fd, zone->zone_options->pattern->
860 		outgoing_interface, zone->master, 1)) {
861 		close(fd);
862 		xfrd_set_refresh_now(zone);
863 		return 0;
864         }
865 
866 	conn = connect(fd, (struct sockaddr*)&tp->key.ip, tp->key.ip_len);
867 	if (conn == -1 && errno != EINPROGRESS) {
868 		log_msg(LOG_ERR, "xfrd: connect %s failed: %s",
869 			zone->master->ip_address_spec, strerror(errno));
870 		close(fd);
871 		xfrd_set_refresh_now(zone);
872 		return 0;
873 	}
874 	tp->tcp_r->fd = fd;
875 	tp->tcp_w->fd = fd;
876 
877 	/* Check if an tls_auth name is configured which means we should try to
878 	   establish an SSL connection */
879 	if (zone->master->tls_auth_options &&
880 		zone->master->tls_auth_options->auth_domain_name) {
881 #ifdef HAVE_TLS_1_3
882 		if (!setup_ssl(tp, set, zone->master->tls_auth_options->auth_domain_name)) {
883 			log_msg(LOG_ERR, "xfrd: Cannot setup TLS on pipeline for %s to %s",
884 					zone->apex_str, zone->master->ip_address_spec);
885 			close(fd);
886 			xfrd_set_refresh_now(zone);
887 			return 0;
888 		}
889 
890 		/* Load client certificate (if provided) */
891 		if (zone->master->tls_auth_options->client_cert &&
892 		    zone->master->tls_auth_options->client_key) {
893 			if (SSL_CTX_use_certificate_chain_file(set->ssl_ctx,
894 			                                       zone->master->tls_auth_options->client_cert) != 1) {
895 				log_msg(LOG_ERR, "xfrd tls: Unable to load client certificate from file %s", zone->master->tls_auth_options->client_cert);
896 			}
897 
898 			if (zone->master->tls_auth_options->client_key_pw) {
899 				SSL_CTX_set_default_passwd_cb(set->ssl_ctx, password_cb);
900 				SSL_CTX_set_default_passwd_cb_userdata(set->ssl_ctx, zone->master->tls_auth_options->client_key_pw);
901 			}
902 
903 			if (SSL_CTX_use_PrivateKey_file(set->ssl_ctx, zone->master->tls_auth_options->client_key, SSL_FILETYPE_PEM) != 1) {
904 				log_msg(LOG_ERR, "xfrd tls: Unable to load private key from file %s", zone->master->tls_auth_options->client_key);
905 			}
906 		}
907 
908 		tp->handshake_done = 0;
909 		if(!ssl_handshake(tp)) {
910 			if(tp->handshake_want == SSL_ERROR_SYSCALL) {
911 				log_msg(LOG_ERR, "xfrd: TLS handshake failed "
912 					"for %s to %s: %s", zone->apex_str,
913 					zone->master->ip_address_spec,
914 					strerror(errno));
915 
916 			} else if(tp->handshake_want == SSL_ERROR_SSL) {
917 				char errmsg[1024];
918 				snprintf(errmsg, sizeof(errmsg), "xfrd: "
919 					"TLS handshake failed for %s to %s",
920 					zone->apex_str,
921 					zone->master->ip_address_spec);
922 				log_crypto_err(errmsg);
923 			} else {
924 				log_msg(LOG_ERR, "xfrd: TLS handshake failed "
925 					"for %s to %s with %d", zone->apex_str,
926 					zone->master->ip_address_spec,
927 					tp->handshake_want);
928 			}
929 			close(fd);
930 			xfrd_set_refresh_now(zone);
931 			return 0;
932 		}
933 #else
934 		log_msg(LOG_ERR, "xfrd: TLS 1.3 is not available, XFR-over-TLS is "
935 						 "not supported for %s to %s",
936 						  zone->apex_str, zone->master->ip_address_spec);
937 		close(fd);
938 		xfrd_set_refresh_now(zone);
939 		return 0;
940 #endif
941 	}
942 
943 	/* set the tcp pipe event */
944 	if(tp->handler_added)
945 		event_del(&tp->handler);
946 	memset(&tp->handler, 0, sizeof(tp->handler));
947 	event_set(&tp->handler, fd, EV_PERSIST|EV_TIMEOUT|EV_READ|
948 #ifdef HAVE_TLS_1_3
949 		( !tp->ssl
950 		|| tp->handshake_done
951 		|| tp->handshake_want == SSL_ERROR_WANT_WRITE ? EV_WRITE : 0),
952 #else
953 		EV_WRITE,
954 #endif
955 	        xfrd_handle_tcp_pipe, tp);
956 	if(event_base_set(xfrd->event_base, &tp->handler) != 0)
957 		log_msg(LOG_ERR, "xfrd tcp: event_base_set failed");
958 	tv.tv_sec = set->tcp_timeout;
959 	tv.tv_usec = 0;
960 	if(event_add(&tp->handler, &tv) != 0)
961 		log_msg(LOG_ERR, "xfrd tcp: event_add failed");
962 	tp->handler_added = 1;
963 	return 1;
964 }
965 
966 void
xfrd_tcp_setup_write_packet(struct xfrd_tcp_pipeline * tp,xfrd_zone_type * zone)967 xfrd_tcp_setup_write_packet(struct xfrd_tcp_pipeline* tp, xfrd_zone_type* zone)
968 {
969 	struct xfrd_tcp* tcp = tp->tcp_w;
970 	assert(zone->tcp_conn != -1);
971 	assert(zone->tcp_waiting == 0);
972 	/* start AXFR or IXFR for the zone */
973 	if(zone->soa_disk_acquired == 0 || zone->master->use_axfr_only ||
974 		zone->master->ixfr_disabled ||
975 		/* if zone expired, after the first round, do not ask for
976 		 * IXFR any more, but full AXFR (of any serial number) */
977 		(zone->state == xfrd_zone_expired && zone->round_num != 0)) {
978 		DEBUG(DEBUG_XFRD,1, (LOG_INFO, "request full zone transfer "
979 						"(AXFR) for %s to %s",
980 			zone->apex_str, zone->master->ip_address_spec));
981 
982 		xfrd_setup_packet(tcp->packet, TYPE_AXFR, CLASS_IN, zone->apex,
983 			zone->query_id);
984 		xfrd_prepare_zone_xfr(zone, TYPE_AXFR);
985 	} else {
986 		DEBUG(DEBUG_XFRD,1, (LOG_INFO, "request incremental zone "
987 						"transfer (IXFR) for %s to %s",
988 			zone->apex_str, zone->master->ip_address_spec));
989 
990 		xfrd_setup_packet(tcp->packet, TYPE_IXFR, CLASS_IN, zone->apex,
991 			zone->query_id);
992 		xfrd_prepare_zone_xfr(zone, TYPE_IXFR);
993 		NSCOUNT_SET(tcp->packet, 1);
994 		xfrd_write_soa_buffer(tcp->packet, zone->apex, &zone->soa_disk);
995 	}
996 	if(zone->master->key_options && zone->master->key_options->tsig_key) {
997 		xfrd_tsig_sign_request(
998 			tcp->packet, &zone->latest_xfr->tsig, zone->master);
999 	}
1000 	buffer_flip(tcp->packet);
1001 	DEBUG(DEBUG_XFRD,1, (LOG_INFO, "sent tcp query with ID %d", zone->query_id));
1002 	tcp->msglen = buffer_limit(tcp->packet);
1003 	tcp->total_bytes = 0;
1004 }
1005 
1006 static void
tcp_conn_ready_for_reading(struct xfrd_tcp * tcp)1007 tcp_conn_ready_for_reading(struct xfrd_tcp* tcp)
1008 {
1009 	tcp->total_bytes = 0;
1010 	tcp->msglen = 0;
1011 	buffer_clear(tcp->packet);
1012 }
1013 
1014 #ifdef HAVE_TLS_1_3
1015 static int
conn_write_ssl(struct xfrd_tcp * tcp,SSL * ssl)1016 conn_write_ssl(struct xfrd_tcp* tcp, SSL* ssl)
1017 {
1018 	int request_length;
1019 	ssize_t sent;
1020 
1021 	if(tcp->total_bytes < sizeof(tcp->msglen)) {
1022 		uint16_t sendlen = htons(tcp->msglen);
1023 		// send
1024 		request_length = sizeof(tcp->msglen) - tcp->total_bytes;
1025 		ERR_clear_error();
1026 		sent = SSL_write(ssl, (const char*)&sendlen + tcp->total_bytes,
1027 						 request_length);
1028 		switch(SSL_get_error(ssl,sent)) {
1029 			case SSL_ERROR_NONE:
1030 				break;
1031 			default:
1032 				log_msg(LOG_ERR, "xfrd: generic write problem with tls");
1033 		}
1034 
1035 		if(sent == -1) {
1036 			if(errno == EAGAIN || errno == EINTR) {
1037 				/* write would block, try later */
1038 				return 0;
1039 			} else {
1040 				return -1;
1041 			}
1042 		}
1043 
1044 		tcp->total_bytes += sent;
1045 		if(sent > (ssize_t)sizeof(tcp->msglen))
1046 			buffer_skip(tcp->packet, sent-sizeof(tcp->msglen));
1047 		if(tcp->total_bytes < sizeof(tcp->msglen)) {
1048 			/* incomplete write, resume later */
1049 			return 0;
1050 		}
1051 		assert(tcp->total_bytes >= sizeof(tcp->msglen));
1052 	}
1053 
1054 	assert(tcp->total_bytes < tcp->msglen + sizeof(tcp->msglen));
1055 
1056 	request_length = buffer_remaining(tcp->packet);
1057 	ERR_clear_error();
1058 	sent = SSL_write(ssl, buffer_current(tcp->packet), request_length);
1059 	switch(SSL_get_error(ssl,sent)) {
1060 		case SSL_ERROR_NONE:
1061 			break;
1062 		default:
1063 			log_msg(LOG_ERR, "xfrd: generic write problem with tls");
1064 	}
1065 	if(sent == -1) {
1066 		if(errno == EAGAIN || errno == EINTR) {
1067 			/* write would block, try later */
1068 			return 0;
1069 		} else {
1070 			return -1;
1071 		}
1072 	}
1073 
1074 	buffer_skip(tcp->packet, sent);
1075 	tcp->total_bytes += sent;
1076 
1077 	if(tcp->total_bytes < tcp->msglen + sizeof(tcp->msglen)) {
1078 		/* more to write when socket becomes writable again */
1079 		return 0;
1080 	}
1081 
1082 	assert(tcp->total_bytes == tcp->msglen + sizeof(tcp->msglen));
1083 	return 1;
1084 }
1085 #endif
1086 
conn_write(struct xfrd_tcp * tcp)1087 int conn_write(struct xfrd_tcp* tcp)
1088 {
1089 	ssize_t sent;
1090 
1091 	if(tcp->total_bytes < sizeof(tcp->msglen)) {
1092 		uint16_t sendlen = htons(tcp->msglen);
1093 #ifdef HAVE_WRITEV
1094 		struct iovec iov[2];
1095 		iov[0].iov_base = (uint8_t*)&sendlen + tcp->total_bytes;
1096 		iov[0].iov_len = sizeof(sendlen) - tcp->total_bytes;
1097 		iov[1].iov_base = buffer_begin(tcp->packet);
1098 		iov[1].iov_len = buffer_limit(tcp->packet);
1099 		sent = writev(tcp->fd, iov, 2);
1100 #else /* HAVE_WRITEV */
1101 		sent = write(tcp->fd,
1102 			(const char*)&sendlen + tcp->total_bytes,
1103 			sizeof(tcp->msglen) - tcp->total_bytes);
1104 #endif /* HAVE_WRITEV */
1105 
1106 		if(sent == -1) {
1107 			if(errno == EAGAIN || errno == EINTR) {
1108 				/* write would block, try later */
1109 				return 0;
1110 			} else {
1111 				return -1;
1112 			}
1113 		}
1114 
1115 		tcp->total_bytes += sent;
1116 		if(sent > (ssize_t)sizeof(tcp->msglen))
1117 			buffer_skip(tcp->packet, sent-sizeof(tcp->msglen));
1118 		if(tcp->total_bytes < sizeof(tcp->msglen)) {
1119 			/* incomplete write, resume later */
1120 			return 0;
1121 		}
1122 #ifdef HAVE_WRITEV
1123 		if(tcp->total_bytes == tcp->msglen + sizeof(tcp->msglen)) {
1124 			/* packet done */
1125 			return 1;
1126 		}
1127 #endif
1128 		assert(tcp->total_bytes >= sizeof(tcp->msglen));
1129 	}
1130 
1131 	assert(tcp->total_bytes < tcp->msglen + sizeof(tcp->msglen));
1132 
1133 	sent = write(tcp->fd,
1134 		buffer_current(tcp->packet),
1135 		buffer_remaining(tcp->packet));
1136 	if(sent == -1) {
1137 		if(errno == EAGAIN || errno == EINTR) {
1138 			/* write would block, try later */
1139 			return 0;
1140 		} else {
1141 			return -1;
1142 		}
1143 	}
1144 
1145 	buffer_skip(tcp->packet, sent);
1146 	tcp->total_bytes += sent;
1147 
1148 	if(tcp->total_bytes < tcp->msglen + sizeof(tcp->msglen)) {
1149 		/* more to write when socket becomes writable again */
1150 		return 0;
1151 	}
1152 
1153 	assert(tcp->total_bytes == tcp->msglen + sizeof(tcp->msglen));
1154 	return 1;
1155 }
1156 
1157 void
xfrd_tcp_write(struct xfrd_tcp_pipeline * tp,xfrd_zone_type * zone)1158 xfrd_tcp_write(struct xfrd_tcp_pipeline* tp, xfrd_zone_type* zone)
1159 {
1160 	int ret;
1161 	struct xfrd_tcp* tcp = tp->tcp_w;
1162 	assert(zone->tcp_conn != -1);
1163 	assert(zone == tp->tcp_send_first);
1164 	/* see if for non-established connection, there is a connect error */
1165 	if(!tp->connection_established) {
1166 		/* check for pending error from nonblocking connect */
1167 		/* from Stevens, unix network programming, vol1, 3rd ed, p450 */
1168 		int error = 0;
1169 		socklen_t len = sizeof(error);
1170 		if(getsockopt(tcp->fd, SOL_SOCKET, SO_ERROR, &error, &len) < 0){
1171 			error = errno; /* on solaris errno is error */
1172 		}
1173 		if(error == EINPROGRESS || error == EWOULDBLOCK)
1174 			return; /* try again later */
1175 		if(error != 0) {
1176 			log_msg(LOG_ERR, "%s: Could not tcp connect to %s: %s",
1177 				zone->apex_str, zone->master->ip_address_spec,
1178 				strerror(error));
1179 			xfrd_tcp_pipe_stop(tp);
1180 			return;
1181 		}
1182 	}
1183 #ifdef HAVE_TLS_1_3
1184 	if (tp->ssl) {
1185 		if(tp->handshake_done) {
1186 			ret = conn_write_ssl(tcp, tp->ssl);
1187 
1188 		} else if(ssl_handshake(tp)) {
1189 			tcp_pipe_reset_timeout(tp); /* reschedule */
1190 			return;
1191 
1192 		} else {
1193 			if(tp->handshake_want == SSL_ERROR_SYSCALL) {
1194 				log_msg(LOG_ERR, "xfrd: TLS handshake failed: %s",
1195 					strerror(errno));
1196 
1197 			} else if(tp->handshake_want == SSL_ERROR_SSL) {
1198 				log_crypto_err("xfrd: TLS handshake failed");
1199 			} else {
1200 				log_msg(LOG_ERR, "xfrd: TLS handshake failed "
1201 					"with value: %d", tp->handshake_want);
1202 			}
1203 			xfrd_tcp_pipe_stop(tp);
1204 			return;
1205 		}
1206 	} else
1207 #endif
1208 		ret = conn_write(tcp);
1209 	if(ret == -1) {
1210 		log_msg(LOG_ERR, "xfrd: failed writing tcp %s", strerror(errno));
1211 		xfrd_tcp_pipe_stop(tp);
1212 		return;
1213 	}
1214 	if(tcp->total_bytes != 0 && !tp->connection_established)
1215 		tp->connection_established = 1;
1216 	if(ret == 0) {
1217 		return; /* write again later */
1218 	}
1219 	/* done writing this message */
1220 
1221 	/* remove first zone from sendlist */
1222 	tcp_pipe_sendlist_popfirst(tp, zone);
1223 
1224 	/* see if other zone wants to write; init; let it write (now) */
1225 	/* and use a loop, because 64k stack calls is a too much */
1226 	while(tp->tcp_send_first) {
1227 		/* setup to write for this zone */
1228 		xfrd_tcp_setup_write_packet(tp, tp->tcp_send_first);
1229 		/* attempt to write for this zone (if success, continue loop)*/
1230 #ifdef HAVE_TLS_1_3
1231 		if (tp->ssl)
1232 			ret = conn_write_ssl(tcp, tp->ssl);
1233 		else
1234 #endif
1235 			ret = conn_write(tcp);
1236 		if(ret == -1) {
1237 			log_msg(LOG_ERR, "xfrd: failed writing tcp %s", strerror(errno));
1238 			xfrd_tcp_pipe_stop(tp);
1239 			return;
1240 		}
1241 		if(ret == 0)
1242 			return; /* write again later */
1243 		tcp_pipe_sendlist_popfirst(tp, tp->tcp_send_first);
1244 	}
1245 
1246 	/* if sendlist empty, remove WRITE from event */
1247 
1248 	/* listen to READ, and not WRITE events */
1249 	assert(tp->tcp_send_first == NULL);
1250 	tcp_pipe_reset_timeout(tp);
1251 }
1252 
1253 #ifdef HAVE_TLS_1_3
1254 static int
conn_read_ssl(struct xfrd_tcp * tcp,SSL * ssl)1255 conn_read_ssl(struct xfrd_tcp* tcp, SSL* ssl)
1256 {
1257 	ssize_t received;
1258 	/* receive leading packet length bytes */
1259 	if(tcp->total_bytes < sizeof(tcp->msglen)) {
1260 		ERR_clear_error();
1261 		received = SSL_read(ssl,
1262 						(char*) &tcp->msglen + tcp->total_bytes,
1263 						sizeof(tcp->msglen) - tcp->total_bytes);
1264 		if (received <= 0) {
1265 			int err = SSL_get_error(ssl, received);
1266 			if(err == SSL_ERROR_WANT_READ && errno == EAGAIN) {
1267 				return 0;
1268 			}
1269 			if(err == SSL_ERROR_ZERO_RETURN) {
1270 				/* EOF */
1271 				return -1;
1272 			}
1273 			if(err == SSL_ERROR_SYSCALL)
1274 				log_msg(LOG_ERR, "ssl_read returned error SSL_ERROR_SYSCALL with received %zd: %s", received, strerror(errno));
1275 			else
1276 				log_msg(LOG_ERR, "ssl_read returned error %d with received %zd", err, received);
1277 		}
1278 		if(received == -1) {
1279 			if(errno == EAGAIN || errno == EINTR) {
1280 				/* read would block, try later */
1281 				return 0;
1282 			} else {
1283 #ifdef ECONNRESET
1284 				if (verbosity >= 2 || errno != ECONNRESET)
1285 #endif /* ECONNRESET */
1286 					log_msg(LOG_ERR, "tls read sz: %s", strerror(errno));
1287 				return -1;
1288 			}
1289 		} else if(received == 0) {
1290 			/* EOF */
1291 			return -1;
1292 		}
1293 		tcp->total_bytes += received;
1294 		if(tcp->total_bytes < sizeof(tcp->msglen)) {
1295 			/* not complete yet, try later */
1296 			return 0;
1297 		}
1298 
1299 		assert(tcp->total_bytes == sizeof(tcp->msglen));
1300 		tcp->msglen = ntohs(tcp->msglen);
1301 
1302 		if(tcp->msglen == 0) {
1303 			buffer_set_limit(tcp->packet, tcp->msglen);
1304 			return 1;
1305 		}
1306 		if(tcp->msglen > buffer_capacity(tcp->packet)) {
1307 			log_msg(LOG_ERR, "buffer too small, dropping connection");
1308 			return 0;
1309 		}
1310 		buffer_set_limit(tcp->packet, tcp->msglen);
1311 	}
1312 
1313 	assert(buffer_remaining(tcp->packet) > 0);
1314 	ERR_clear_error();
1315 
1316 	received = SSL_read(ssl, buffer_current(tcp->packet),
1317 					buffer_remaining(tcp->packet));
1318 
1319 	if (received <= 0) {
1320 		int err = SSL_get_error(ssl, received);
1321 		if(err == SSL_ERROR_ZERO_RETURN) {
1322 			/* EOF */
1323 			return -1;
1324 		}
1325 		if(err == SSL_ERROR_SYSCALL)
1326 			log_msg(LOG_ERR, "ssl_read returned error SSL_ERROR_SYSCALL with received %zd: %s", received, strerror(errno));
1327 		else
1328 			log_msg(LOG_ERR, "ssl_read returned error %d with received %zd", err, received);
1329 	}
1330 	if(received == -1) {
1331 		if(errno == EAGAIN || errno == EINTR) {
1332 			/* read would block, try later */
1333 			return 0;
1334 		} else {
1335 #ifdef ECONNRESET
1336 			if (verbosity >= 2 || errno != ECONNRESET)
1337 #endif /* ECONNRESET */
1338 				log_msg(LOG_ERR, "tcp read %s", strerror(errno));
1339 			return -1;
1340 		}
1341 	} else if(received == 0) {
1342 		/* EOF */
1343 		return -1;
1344 	}
1345 
1346 	tcp->total_bytes += received;
1347 	buffer_skip(tcp->packet, received);
1348 
1349 	if(buffer_remaining(tcp->packet) > 0) {
1350 		/* not complete yet, wait for more */
1351 		return 0;
1352 	}
1353 
1354 	/* completed */
1355 	assert(buffer_position(tcp->packet) == tcp->msglen);
1356 	return 1;
1357 }
1358 #endif
1359 
1360 int
conn_read(struct xfrd_tcp * tcp)1361 conn_read(struct xfrd_tcp* tcp)
1362 {
1363 	ssize_t received;
1364 	/* receive leading packet length bytes */
1365 	if(tcp->total_bytes < sizeof(tcp->msglen)) {
1366 		received = read(tcp->fd,
1367 			(char*) &tcp->msglen + tcp->total_bytes,
1368 			sizeof(tcp->msglen) - tcp->total_bytes);
1369 		if(received == -1) {
1370 			if(errno == EAGAIN || errno == EINTR) {
1371 				/* read would block, try later */
1372 				return 0;
1373 			} else {
1374 #ifdef ECONNRESET
1375 				if (verbosity >= 2 || errno != ECONNRESET)
1376 #endif /* ECONNRESET */
1377 				log_msg(LOG_ERR, "tcp read sz: %s", strerror(errno));
1378 				return -1;
1379 			}
1380 		} else if(received == 0) {
1381 			/* EOF */
1382 			return -1;
1383 		}
1384 		tcp->total_bytes += received;
1385 		if(tcp->total_bytes < sizeof(tcp->msglen)) {
1386 			/* not complete yet, try later */
1387 			return 0;
1388 		}
1389 
1390 		assert(tcp->total_bytes == sizeof(tcp->msglen));
1391 		tcp->msglen = ntohs(tcp->msglen);
1392 
1393 		if(tcp->msglen == 0) {
1394 			buffer_set_limit(tcp->packet, tcp->msglen);
1395 			return 1;
1396 		}
1397 		if(tcp->msglen > buffer_capacity(tcp->packet)) {
1398 			log_msg(LOG_ERR, "buffer too small, dropping connection");
1399 			return 0;
1400 		}
1401 		buffer_set_limit(tcp->packet, tcp->msglen);
1402 	}
1403 
1404 	assert(buffer_remaining(tcp->packet) > 0);
1405 
1406 	received = read(tcp->fd, buffer_current(tcp->packet),
1407 		buffer_remaining(tcp->packet));
1408 	if(received == -1) {
1409 		if(errno == EAGAIN || errno == EINTR) {
1410 			/* read would block, try later */
1411 			return 0;
1412 		} else {
1413 #ifdef ECONNRESET
1414 			if (verbosity >= 2 || errno != ECONNRESET)
1415 #endif /* ECONNRESET */
1416 			log_msg(LOG_ERR, "tcp read %s", strerror(errno));
1417 			return -1;
1418 		}
1419 	} else if(received == 0) {
1420 		/* EOF */
1421 		return -1;
1422 	}
1423 
1424 	tcp->total_bytes += received;
1425 	buffer_skip(tcp->packet, received);
1426 
1427 	if(buffer_remaining(tcp->packet) > 0) {
1428 		/* not complete yet, wait for more */
1429 		return 0;
1430 	}
1431 
1432 	/* completed */
1433 	assert(buffer_position(tcp->packet) == tcp->msglen);
1434 	return 1;
1435 }
1436 
1437 void
xfrd_tcp_read(struct xfrd_tcp_pipeline * tp)1438 xfrd_tcp_read(struct xfrd_tcp_pipeline* tp)
1439 {
1440 	xfrd_zone_type* zone;
1441 	struct xfrd_tcp* tcp = tp->tcp_r;
1442 	int ret;
1443 	enum xfrd_packet_result pkt_result;
1444 #ifdef HAVE_TLS_1_3
1445 	if(tp->ssl) {
1446 		if(tp->handshake_done) {
1447 			ret = conn_read_ssl(tcp, tp->ssl);
1448 
1449 		} else if(ssl_handshake(tp)) {
1450 			tcp_pipe_reset_timeout(tp); /* reschedule */
1451 			return;
1452 
1453 		} else {
1454 			if(tp->handshake_want == SSL_ERROR_SYSCALL) {
1455 				log_msg(LOG_ERR, "xfrd: TLS handshake failed: %s",
1456 					strerror(errno));
1457 
1458 			} else if(tp->handshake_want == SSL_ERROR_SSL) {
1459 				log_crypto_err("xfrd: TLS handshake failed");
1460 			} else {
1461 				log_msg(LOG_ERR, "xfrd: TLS handshake failed "
1462 					"with value: %d", tp->handshake_want);
1463 			}
1464 			xfrd_tcp_pipe_stop(tp);
1465 			return;
1466 		}
1467 	} else
1468 #endif
1469 		ret = conn_read(tcp);
1470 	if(ret == -1) {
1471 		if(errno != 0)
1472 			log_msg(LOG_ERR, "xfrd: failed reading tcp %s", strerror(errno));
1473 		else
1474 			log_msg(LOG_ERR, "xfrd: failed reading tcp: closed");
1475 		xfrd_tcp_pipe_stop(tp);
1476 		return;
1477 	}
1478 	if(ret == 0)
1479 		return;
1480 	/* completed msg */
1481 	buffer_flip(tcp->packet);
1482 	/* see which ID number it is, if skip, handle skip, NULL: warn */
1483 	if(tcp->msglen < QHEADERSZ) {
1484 		/* too short for DNS header, skip it */
1485 		DEBUG(DEBUG_XFRD,1, (LOG_INFO,
1486 			"xfrd: tcp skip response that is too short"));
1487 		tcp_conn_ready_for_reading(tcp);
1488 		return;
1489 	}
1490 	zone = xfrd_tcp_pipeline_lookup_id(tp, ID(tcp->packet));
1491 	if(!zone || zone == TCP_NULL_SKIP) {
1492 		/* no zone for this id? skip it */
1493 		DEBUG(DEBUG_XFRD,1, (LOG_INFO,
1494 			"xfrd: tcp skip response with %s ID",
1495 			zone?"set-to-skip":"unknown"));
1496 		tcp_conn_ready_for_reading(tcp);
1497 		return;
1498 	}
1499 	assert(zone->tcp_conn != -1);
1500 
1501 	/* handle message for zone */
1502 	pkt_result = xfrd_handle_received_xfr_packet(zone, tcp->packet);
1503 	/* setup for reading the next packet on this connection */
1504 	tcp_conn_ready_for_reading(tcp);
1505 	switch(pkt_result) {
1506 		case xfrd_packet_more:
1507 			/* wait for next packet */
1508 			break;
1509 		case xfrd_packet_newlease:
1510 			/* set to skip if more packets with this ID */
1511 			xfrd_tcp_pipeline_skip_id(tp, zone->query_id);
1512 			tp->key.num_skip++;
1513 			/* fall through to remove zone from tp */
1514 			/* fallthrough */
1515 		case xfrd_packet_transfer:
1516 			if(zone->zone_options->pattern->multi_primary_check) {
1517 				xfrd_tcp_release(xfrd->tcp_set, zone);
1518 				xfrd_make_request(zone);
1519 				break;
1520 			}
1521 			xfrd_tcp_release(xfrd->tcp_set, zone);
1522 			assert(zone->round_num == -1);
1523 			break;
1524 		case xfrd_packet_notimpl:
1525 			xfrd_disable_ixfr(zone);
1526 			xfrd_tcp_release(xfrd->tcp_set, zone);
1527 			/* query next server */
1528 			xfrd_make_request(zone);
1529 			break;
1530 		case xfrd_packet_bad:
1531 		case xfrd_packet_tcp:
1532 		default:
1533 			/* set to skip if more packets with this ID */
1534 			xfrd_tcp_pipeline_skip_id(tp, zone->query_id);
1535 			tp->key.num_skip++;
1536 			xfrd_tcp_release(xfrd->tcp_set, zone);
1537 			/* query next server */
1538 			xfrd_make_request(zone);
1539 			break;
1540 	}
1541 }
1542 
1543 void
xfrd_tcp_release(struct xfrd_tcp_set * set,xfrd_zone_type * zone)1544 xfrd_tcp_release(struct xfrd_tcp_set* set, xfrd_zone_type* zone)
1545 {
1546 	int conn = zone->tcp_conn;
1547 	struct xfrd_tcp_pipeline* tp = set->tcp_state[conn];
1548 	DEBUG(DEBUG_XFRD,1, (LOG_INFO, "xfrd: zone %s released tcp conn to %s",
1549 		zone->apex_str, zone->master->ip_address_spec));
1550 	assert(zone->tcp_conn != -1);
1551 	assert(zone->tcp_waiting == 0);
1552 	zone->tcp_conn = -1;
1553 	zone->tcp_waiting = 0;
1554 
1555 	/* remove from tcp_send list */
1556 	tcp_pipe_sendlist_remove(tp, zone);
1557 	/* remove it from the ID list */
1558 	if(xfrd_tcp_pipeline_lookup_id(tp, zone->query_id) != TCP_NULL_SKIP)
1559 		tcp_pipe_id_remove(tp, zone, 1);
1560 	DEBUG(DEBUG_XFRD,1, (LOG_INFO, "xfrd: released tcp pipe now %d unused",
1561 		tp->key.num_unused));
1562 	/* if pipe was full, but no more, then see if waiting element is
1563 	 * for the same master, and can fill the unused ID */
1564 	if(tp->key.num_unused == 1 && set->tcp_waiting_first) {
1565 #ifdef INET6
1566 		struct sockaddr_storage to;
1567 #else
1568 		struct sockaddr_in to;
1569 #endif
1570 		socklen_t to_len = xfrd_acl_sockaddr_to(
1571 			set->tcp_waiting_first->master, &to);
1572 		if(to_len == tp->key.ip_len && memcmp(&to, &tp->key.ip, to_len) == 0) {
1573 			/* use this connection for the waiting zone */
1574 			zone = set->tcp_waiting_first;
1575 			assert(zone->tcp_conn == -1);
1576 			zone->tcp_conn = conn;
1577 			tcp_zone_waiting_list_popfirst(set, zone);
1578 			if(zone->zone_handler.ev_fd != -1)
1579 				xfrd_udp_release(zone);
1580 			xfrd_unset_timer(zone);
1581 			pipeline_setup_new_zone(set, tp, zone);
1582 			return;
1583 		}
1584 		/* waiting zone did not go to same server */
1585 	}
1586 
1587 	/* if all unused, or only skipped leftover, close the pipeline */
1588 	if(tp->key.num_unused >= tp->pipe_num || tp->key.num_skip >= tp->pipe_num - tp->key.num_unused)
1589 		xfrd_tcp_pipe_release(set, tp, conn);
1590 }
1591 
1592 void
xfrd_tcp_pipe_release(struct xfrd_tcp_set * set,struct xfrd_tcp_pipeline * tp,int conn)1593 xfrd_tcp_pipe_release(struct xfrd_tcp_set* set, struct xfrd_tcp_pipeline* tp,
1594 	int conn)
1595 {
1596 	DEBUG(DEBUG_XFRD,1, (LOG_INFO, "xfrd: tcp pipe released"));
1597 	/* one handler per tcp pipe */
1598 	if(tp->handler_added)
1599 		event_del(&tp->handler);
1600 	tp->handler_added = 0;
1601 
1602 #ifdef HAVE_TLS_1_3
1603 	/* close SSL */
1604 	if (tp->ssl) {
1605 		DEBUG(DEBUG_XFRD, 1, (LOG_INFO, "xfrd: Shutting down TLS"));
1606 		SSL_shutdown(tp->ssl);
1607 		SSL_free(tp->ssl);
1608 		tp->ssl = NULL;
1609 	}
1610 #endif
1611 
1612 	/* fd in tcp_r and tcp_w is the same, close once */
1613 	if(tp->tcp_r->fd != -1)
1614 		close(tp->tcp_r->fd);
1615 	tp->tcp_r->fd = -1;
1616 	tp->tcp_w->fd = -1;
1617 
1618 	/* remove from pipetree */
1619 	(void)rbtree_delete(xfrd->tcp_set->pipetree, &tp->key.node);
1620 
1621 	/* a waiting zone can use the free tcp slot (to another server) */
1622 	/* if that zone fails to set-up or connect, we try to start the next
1623 	 * waiting zone in the list */
1624 	while(set->tcp_count == set->tcp_max && set->tcp_waiting_first) {
1625 		/* pop first waiting process */
1626 		xfrd_zone_type* zone = set->tcp_waiting_first;
1627 		/* start it */
1628 		assert(zone->tcp_conn == -1);
1629 		zone->tcp_conn = conn;
1630 		tcp_zone_waiting_list_popfirst(set, zone);
1631 
1632 		/* stop udp (if any) */
1633 		if(zone->zone_handler.ev_fd != -1)
1634 			xfrd_udp_release(zone);
1635 		if(!xfrd_tcp_open(set, tp, zone)) {
1636 			zone->tcp_conn = -1;
1637 			xfrd_set_refresh_now(zone);
1638 			/* try to start the next zone (if any) */
1639 			continue;
1640 		}
1641 		/* re-init this tcppipe */
1642 		/* ip and ip_len set by tcp_open */
1643 		xfrd_tcp_pipeline_init(tp);
1644 
1645 		/* insert into tree */
1646 		(void)rbtree_insert(set->pipetree, &tp->key.node);
1647 		/* setup write */
1648 		xfrd_unset_timer(zone);
1649 		pipeline_setup_new_zone(set, tp, zone);
1650 		/* started a task, no need for cleanups, so return */
1651 		return;
1652 	}
1653 	/* no task to start, cleanup */
1654 	assert(!set->tcp_waiting_first);
1655 	set->tcp_count --;
1656 	assert(set->tcp_count >= 0);
1657 }
1658 
1659