xref: /netbsd/external/bsd/nsd/dist/xfrd-tcp.c (revision 66a1527d)
1 /*
2  * xfrd-tcp.c - XFR (transfer) Daemon TCP system source file. Manages tcp conn.
3  *
4  * Copyright (c) 2001-2006, NLnet Labs. All rights reserved.
5  *
6  * See LICENSE for the license.
7  *
8  */
9 
10 #include "config.h"
11 #include <assert.h>
12 #include <errno.h>
13 #include <fcntl.h>
14 #include <unistd.h>
15 #include <stdlib.h>
16 #include <sys/uio.h>
17 #include "nsd.h"
18 #include "xfrd-tcp.h"
19 #include "buffer.h"
20 #include "packet.h"
21 #include "dname.h"
22 #include "options.h"
23 #include "namedb.h"
24 #include "xfrd.h"
25 #include "xfrd-disk.h"
26 #include "util.h"
27 #ifdef HAVE_TLS_1_3
28 #include <openssl/ssl.h>
29 #include <openssl/err.h>
30 #endif
31 
32 #ifdef HAVE_TLS_1_3
33 void log_crypto_err(const char* str); /* in server.c */
34 
35 static SSL_CTX*
create_ssl_context()36 create_ssl_context()
37 {
38 	SSL_CTX *ctx;
39 	ctx = SSL_CTX_new(TLS_client_method());
40 	if (!ctx) {
41 		log_msg(LOG_ERR, "xfrd tls: Unable to create SSL ctxt");
42 	}
43 	else if (SSL_CTX_set_default_verify_paths(ctx) != 1) {
44 		SSL_CTX_free(ctx);
45 		log_msg(LOG_ERR, "xfrd tls: Unable to set default SSL verify paths");
46 		return NULL;
47 	}
48 	/* Only trust 1.3 as per the specification */
49 	else if (!SSL_CTX_set_min_proto_version(ctx, TLS1_3_VERSION)) {
50 		SSL_CTX_free(ctx);
51 		log_msg(LOG_ERR, "xfrd tls: Unable to set minimum TLS version 1.3");
52 		return NULL;
53 	}
54 	return ctx;
55 }
56 
57 static int
tls_verify_callback(int preverify_ok,X509_STORE_CTX * ctx)58 tls_verify_callback(int preverify_ok, X509_STORE_CTX *ctx)
59 {
60 	int err = X509_STORE_CTX_get_error(ctx);
61 	int depth = X509_STORE_CTX_get_error_depth(ctx);
62 
63 	// report the specific cert error here - will need custom verify code if
64 	// SPKI pins are supported
65 	if (!preverify_ok)
66 		log_msg(LOG_ERR, "xfrd tls: TLS verify failed - (%d) depth: %d error: %s",
67 				err,
68 				depth,
69 				X509_verify_cert_error_string(err));
70 	return preverify_ok;
71 }
72 
73 static int
setup_ssl(struct xfrd_tcp_pipeline * tp,struct xfrd_tcp_set * tcp_set,const char * auth_domain_name)74 setup_ssl(struct xfrd_tcp_pipeline* tp, struct xfrd_tcp_set* tcp_set,
75 		  const char* auth_domain_name)
76 {
77 	if (!tcp_set->ssl_ctx) {
78 		log_msg(LOG_ERR, "xfrd tls: No TLS CTX, cannot set up XFR-over-TLS");
79 		return 0;
80 	}
81 	DEBUG(DEBUG_XFRD,1, (LOG_INFO, "xfrd: setting up TLS for tls_auth domain name %s",
82 						 auth_domain_name));
83 	tp->ssl = SSL_new((SSL_CTX*)tcp_set->ssl_ctx);
84 	if(!tp->ssl) {
85 		log_msg(LOG_ERR, "xfrd tls: Unable to create TLS object");
86 		return 0;
87 	}
88 	SSL_set_connect_state(tp->ssl);
89 	(void)SSL_set_mode(tp->ssl, SSL_MODE_AUTO_RETRY);
90 	if(!SSL_set_fd(tp->ssl, tp->tcp_w->fd)) {
91 		log_msg(LOG_ERR, "xfrd tls: Unable to set TLS fd");
92 		SSL_free(tp->ssl);
93 		tp->ssl = NULL;
94 		return 0;
95 	}
96 
97 	SSL_set_verify(tp->ssl, SSL_VERIFY_PEER, tls_verify_callback);
98 	if(!SSL_set1_host(tp->ssl, auth_domain_name)) {
99 		log_msg(LOG_ERR, "xfrd tls: TLS setting of hostname %s failed",
100 		auth_domain_name);
101 		SSL_free(tp->ssl);
102 		tp->ssl = NULL;
103 		return 0;
104 	}
105 	return 1;
106 }
107 
108 static int
ssl_handshake(struct xfrd_tcp_pipeline * tp)109 ssl_handshake(struct xfrd_tcp_pipeline* tp)
110 {
111 	int ret;
112 
113 	ERR_clear_error();
114 	ret = SSL_do_handshake(tp->ssl);
115 	if(ret == 1) {
116 		DEBUG(DEBUG_XFRD, 1, (LOG_INFO, "xfrd: TLS handshake successful"));
117 		tp->handshake_done = 1;
118 		return 1;
119 	}
120 	tp->handshake_want = SSL_get_error(tp->ssl, ret);
121 	if(tp->handshake_want == SSL_ERROR_WANT_READ
122 	|| tp->handshake_want == SSL_ERROR_WANT_WRITE)
123 		return 1;
124 
125 	return 0;
126 }
127 
password_cb(char * buf,int size,int ATTR_UNUSED (rwflag),void * u)128 int password_cb(char *buf, int size, int ATTR_UNUSED(rwflag), void *u)
129 {
130 	strlcpy(buf, (char*)u, size);
131 	return strlen(buf);
132 }
133 
134 #endif
135 
136 /* sort tcppipe, first on IP address, for an IPaddresss, sort on num_unused */
137 static int
xfrd_pipe_cmp(const void * a,const void * b)138 xfrd_pipe_cmp(const void* a, const void* b)
139 {
140 	const struct xfrd_tcp_pipeline* x = (struct xfrd_tcp_pipeline*)a;
141 	const struct xfrd_tcp_pipeline* y = (struct xfrd_tcp_pipeline*)b;
142 	int r;
143 	if(x == y)
144 		return 0;
145 	if(y->key.ip_len != x->key.ip_len)
146 		/* subtraction works because nonnegative and small numbers */
147 		return (int)y->key.ip_len - (int)x->key.ip_len;
148 	r = memcmp(&x->key.ip, &y->key.ip, x->key.ip_len);
149 	if(r != 0)
150 		return r;
151 	/* sort that num_unused is sorted ascending, */
152 	if(x->key.num_unused != y->key.num_unused) {
153 		return (x->key.num_unused < y->key.num_unused) ? -1 : 1;
154 	}
155 	/* different pipelines are different still, even with same numunused*/
156 	return (uintptr_t)x < (uintptr_t)y ? -1 : 1;
157 }
158 
xfrd_tcp_set_create(struct region * region,const char * tls_cert_bundle,int tcp_max,int tcp_pipeline)159 struct xfrd_tcp_set* xfrd_tcp_set_create(struct region* region, const char *tls_cert_bundle, int tcp_max, int tcp_pipeline)
160 {
161 	int i;
162 	struct xfrd_tcp_set* tcp_set = region_alloc(region,
163 		sizeof(struct xfrd_tcp_set));
164 	memset(tcp_set, 0, sizeof(struct xfrd_tcp_set));
165 	tcp_set->tcp_state = NULL;
166 	tcp_set->tcp_max = tcp_max;
167 	tcp_set->tcp_pipeline = tcp_pipeline;
168 	tcp_set->tcp_count = 0;
169 	tcp_set->tcp_waiting_first = 0;
170 	tcp_set->tcp_waiting_last = 0;
171 #ifdef HAVE_TLS_1_3
172 	/* Set up SSL context */
173 	tcp_set->ssl_ctx = create_ssl_context();
174 	if (tcp_set->ssl_ctx == NULL)
175 		log_msg(LOG_ERR, "xfrd: XFR-over-TLS not available");
176 
177 	else if (tls_cert_bundle && tls_cert_bundle[0] && SSL_CTX_load_verify_locations(
178 				tcp_set->ssl_ctx, tls_cert_bundle, NULL) != 1) {
179 		log_msg(LOG_ERR, "xfrd tls: Unable to set the certificate bundle file %s",
180 				tls_cert_bundle);
181 	}
182 #else
183 	(void)tls_cert_bundle;
184 	log_msg(LOG_INFO, "xfrd: No TLS 1.3 support - XFR-over-TLS not available");
185 #endif
186 	tcp_set->tcp_state = region_alloc(region,
187 		sizeof(*tcp_set->tcp_state)*tcp_set->tcp_max);
188 	for(i=0; i<tcp_set->tcp_max; i++)
189 		tcp_set->tcp_state[i] = xfrd_tcp_pipeline_create(region,
190 			tcp_pipeline);
191 	tcp_set->pipetree = rbtree_create(region, &xfrd_pipe_cmp);
192 	return tcp_set;
193 }
194 
pipeline_id_compare(const void * x,const void * y)195 static int pipeline_id_compare(const void* x, const void* y)
196 {
197 	struct xfrd_tcp_pipeline_id* a = (struct xfrd_tcp_pipeline_id*)x;
198 	struct xfrd_tcp_pipeline_id* b = (struct xfrd_tcp_pipeline_id*)y;
199 	if(a->id < b->id)
200 		return -1;
201 	if(a->id > b->id)
202 		return 1;
203 	return 0;
204 }
205 
pick_id_values(uint16_t * array,int num,int max)206 void pick_id_values(uint16_t* array, int num, int max)
207 {
208 	uint8_t inserted[65536];
209 	int j, done;
210 	if(num == 65536) {
211 		/* all of them, loop and insert */
212 		int i;
213 		for(i=0; i<num; i++)
214 			array[i] = (uint16_t)i;
215 		return;
216 	}
217 	assert(max <= 65536);
218 	/* This uses the Robert Floyd sampling algorithm */
219 	/* keep track if values are already inserted, using the bitmap
220 	 * in insert array */
221 	memset(inserted, 0, sizeof(inserted[0])*max);
222 	done=0;
223 	for(j = max-num; j<max; j++) {
224 		/* random generate creates from 0..arg-1 */
225 		int t;
226 		if(j+1 <= 1)
227 			t = 0;
228 		else	t = random_generate(j+1);
229 		if(!inserted[t]) {
230 			array[done++]=t;
231 			inserted[t] = 1;
232 		} else {
233 			array[done++]=j;
234 			inserted[j] = 1;
235 		}
236 	}
237 }
238 
239 static void
clear_pipeline_entry(struct xfrd_tcp_pipeline * tp,rbnode_type * node)240 clear_pipeline_entry(struct xfrd_tcp_pipeline* tp, rbnode_type* node)
241 {
242 	struct xfrd_tcp_pipeline_id *n;
243 	if(node == NULL || node == RBTREE_NULL)
244 		return;
245 	clear_pipeline_entry(tp, node->left);
246 	node->left = NULL;
247 	clear_pipeline_entry(tp, node->right);
248 	node->right = NULL;
249 	/* move the node into the free list */
250 	n = (struct xfrd_tcp_pipeline_id*)node;
251 	n->next_free = tp->pipe_id_free_list;
252 	tp->pipe_id_free_list = n;
253 }
254 
255 static void
xfrd_tcp_pipeline_cleanup(struct xfrd_tcp_pipeline * tp)256 xfrd_tcp_pipeline_cleanup(struct xfrd_tcp_pipeline* tp)
257 {
258 	/* move entries into free list */
259 	clear_pipeline_entry(tp, tp->zone_per_id->root);
260 	/* clear the tree */
261 	tp->zone_per_id->count = 0;
262 	tp->zone_per_id->root = RBTREE_NULL;
263 }
264 
265 static void
xfrd_tcp_pipeline_init(struct xfrd_tcp_pipeline * tp)266 xfrd_tcp_pipeline_init(struct xfrd_tcp_pipeline* tp)
267 {
268 	tp->key.node.key = tp;
269 	tp->key.num_unused = tp->pipe_num;
270 	tp->key.num_skip = 0;
271 	tp->tcp_send_first = NULL;
272 	tp->tcp_send_last = NULL;
273 	xfrd_tcp_pipeline_cleanup(tp);
274 	pick_id_values(tp->unused, tp->pipe_num, 65536);
275 }
276 
277 struct xfrd_tcp_pipeline*
xfrd_tcp_pipeline_create(region_type * region,int tcp_pipeline)278 xfrd_tcp_pipeline_create(region_type* region, int tcp_pipeline)
279 {
280 	int i;
281 	struct xfrd_tcp_pipeline* tp = (struct xfrd_tcp_pipeline*)
282 		region_alloc_zero(region, sizeof(*tp));
283 	if(tcp_pipeline < 0)
284 		tcp_pipeline = 0;
285 	if(tcp_pipeline > 65536)
286 		tcp_pipeline = 65536; /* max 16 bit ID numbers */
287 	tp->pipe_num = tcp_pipeline;
288 	tp->key.num_unused = tp->pipe_num;
289 	tp->zone_per_id = rbtree_create(region, &pipeline_id_compare);
290 	tp->pipe_id_free_list = NULL;
291 	for(i=0; i<tp->pipe_num; i++) {
292 		struct xfrd_tcp_pipeline_id* n = (struct xfrd_tcp_pipeline_id*)
293 			region_alloc_zero(region, sizeof(*n));
294 		n->next_free = tp->pipe_id_free_list;
295 		tp->pipe_id_free_list = n;
296 	}
297 	tp->unused = (uint16_t*)region_alloc_zero(region,
298 		sizeof(tp->unused[0])*tp->pipe_num);
299 	tp->tcp_r = xfrd_tcp_create(region, QIOBUFSZ);
300 	tp->tcp_w = xfrd_tcp_create(region, 512);
301 	xfrd_tcp_pipeline_init(tp);
302 	return tp;
303 }
304 
305 static struct xfrd_zone*
xfrd_tcp_pipeline_lookup_id(struct xfrd_tcp_pipeline * tp,uint16_t id)306 xfrd_tcp_pipeline_lookup_id(struct xfrd_tcp_pipeline* tp, uint16_t id)
307 {
308 	struct xfrd_tcp_pipeline_id key;
309 	rbnode_type* n;
310 	memset(&key, 0, sizeof(key));
311 	key.node.key = &key;
312 	key.id = id;
313 	n = rbtree_search(tp->zone_per_id, &key);
314 	if(n && n != RBTREE_NULL) {
315 		return ((struct xfrd_tcp_pipeline_id*)n)->zone;
316 	}
317 	return NULL;
318 }
319 
320 static void
xfrd_tcp_pipeline_insert_id(struct xfrd_tcp_pipeline * tp,uint16_t id,struct xfrd_zone * zone)321 xfrd_tcp_pipeline_insert_id(struct xfrd_tcp_pipeline* tp, uint16_t id,
322 	struct xfrd_zone* zone)
323 {
324 	struct xfrd_tcp_pipeline_id* n;
325 	/* because there are tp->pipe_num preallocated entries, and we have
326 	 * only tp->pipe_num id values, the list cannot be empty now. */
327 	assert(tp->pipe_id_free_list != NULL);
328 	/* pick up next free xfrd_tcp_pipeline_id node */
329 	n = tp->pipe_id_free_list;
330 	tp->pipe_id_free_list = n->next_free;
331 	n->next_free = NULL;
332 	memset(&n->node, 0, sizeof(n->node));
333 	n->node.key = n;
334 	n->id = id;
335 	n->zone = zone;
336 	rbtree_insert(tp->zone_per_id, &n->node);
337 }
338 
339 static void
xfrd_tcp_pipeline_remove_id(struct xfrd_tcp_pipeline * tp,uint16_t id)340 xfrd_tcp_pipeline_remove_id(struct xfrd_tcp_pipeline* tp, uint16_t id)
341 {
342 	struct xfrd_tcp_pipeline_id key;
343 	rbnode_type* node;
344 	memset(&key, 0, sizeof(key));
345 	key.node.key = &key;
346 	key.id = id;
347 	node = rbtree_delete(tp->zone_per_id, &key);
348 	if(node && node != RBTREE_NULL) {
349 		struct xfrd_tcp_pipeline_id* n =
350 			(struct xfrd_tcp_pipeline_id*)node;
351 		n->next_free = tp->pipe_id_free_list;
352 		tp->pipe_id_free_list = n;
353 	}
354 }
355 
356 static void
xfrd_tcp_pipeline_skip_id(struct xfrd_tcp_pipeline * tp,uint16_t id)357 xfrd_tcp_pipeline_skip_id(struct xfrd_tcp_pipeline* tp, uint16_t id)
358 {
359 	struct xfrd_tcp_pipeline_id key;
360 	rbnode_type* n;
361 	memset(&key, 0, sizeof(key));
362 	key.node.key = &key;
363 	key.id = id;
364 	n = rbtree_search(tp->zone_per_id, &key);
365 	if(n && n != RBTREE_NULL) {
366 		struct xfrd_tcp_pipeline_id* zid = (struct xfrd_tcp_pipeline_id*)n;
367 		zid->zone = TCP_NULL_SKIP;
368 	}
369 }
370 
371 void
xfrd_setup_packet(buffer_type * packet,uint16_t type,uint16_t klass,const dname_type * dname,uint16_t qid)372 xfrd_setup_packet(buffer_type* packet,
373 	uint16_t type, uint16_t klass, const dname_type* dname, uint16_t qid)
374 {
375 	/* Set up the header */
376 	buffer_clear(packet);
377 	ID_SET(packet, qid);
378 	FLAGS_SET(packet, 0);
379 	OPCODE_SET(packet, OPCODE_QUERY);
380 	QDCOUNT_SET(packet, 1);
381 	ANCOUNT_SET(packet, 0);
382 	NSCOUNT_SET(packet, 0);
383 	ARCOUNT_SET(packet, 0);
384 	buffer_skip(packet, QHEADERSZ);
385 
386 	/* The question record. */
387 	buffer_write(packet, dname_name(dname), dname->name_size);
388 	buffer_write_u16(packet, type);
389 	buffer_write_u16(packet, klass);
390 }
391 
392 static socklen_t
393 #ifdef INET6
xfrd_acl_sockaddr(acl_options_type * acl,unsigned int port,struct sockaddr_storage * sck)394 xfrd_acl_sockaddr(acl_options_type* acl, unsigned int port,
395 	struct sockaddr_storage *sck)
396 #else
397 xfrd_acl_sockaddr(acl_options_type* acl, unsigned int port,
398 	struct sockaddr_in *sck, const char* fromto)
399 #endif /* INET6 */
400 {
401 	/* setup address structure */
402 #ifdef INET6
403 	memset(sck, 0, sizeof(struct sockaddr_storage));
404 #else
405 	memset(sck, 0, sizeof(struct sockaddr_in));
406 #endif
407 	if(acl->is_ipv6) {
408 #ifdef INET6
409 		struct sockaddr_in6* sa = (struct sockaddr_in6*)sck;
410 		sa->sin6_family = AF_INET6;
411 		sa->sin6_port = htons(port);
412 		sa->sin6_addr = acl->addr.addr6;
413 		return sizeof(struct sockaddr_in6);
414 #else
415 		log_msg(LOG_ERR, "xfrd: IPv6 connection %s %s attempted but no \
416 INET6.", fromto, acl->ip_address_spec);
417 		return 0;
418 #endif
419 	} else {
420 		struct sockaddr_in* sa = (struct sockaddr_in*)sck;
421 		sa->sin_family = AF_INET;
422 		sa->sin_port = htons(port);
423 		sa->sin_addr = acl->addr.addr;
424 		return sizeof(struct sockaddr_in);
425 	}
426 }
427 
428 socklen_t
429 #ifdef INET6
xfrd_acl_sockaddr_to(acl_options_type * acl,struct sockaddr_storage * to)430 xfrd_acl_sockaddr_to(acl_options_type* acl, struct sockaddr_storage *to)
431 #else
432 xfrd_acl_sockaddr_to(acl_options_type* acl, struct sockaddr_in *to)
433 #endif /* INET6 */
434 {
435 #ifdef HAVE_TLS_1_3
436 	unsigned int port = acl->port?acl->port:(acl->tls_auth_options?
437 						(unsigned)atoi(TLS_PORT):(unsigned)atoi(TCP_PORT));
438 #else
439 	unsigned int port = acl->port?acl->port:(unsigned)atoi(TCP_PORT);
440 #endif
441 #ifdef INET6
442 	return xfrd_acl_sockaddr(acl, port, to);
443 #else
444 	return xfrd_acl_sockaddr(acl, port, to, "to");
445 #endif /* INET6 */
446 }
447 
448 socklen_t
449 #ifdef INET6
xfrd_acl_sockaddr_frm(acl_options_type * acl,struct sockaddr_storage * frm)450 xfrd_acl_sockaddr_frm(acl_options_type* acl, struct sockaddr_storage *frm)
451 #else
452 xfrd_acl_sockaddr_frm(acl_options_type* acl, struct sockaddr_in *frm)
453 #endif /* INET6 */
454 {
455 	unsigned int port = acl->port?acl->port:0;
456 #ifdef INET6
457 	return xfrd_acl_sockaddr(acl, port, frm);
458 #else
459 	return xfrd_acl_sockaddr(acl, port, frm, "from");
460 #endif /* INET6 */
461 }
462 
463 void
xfrd_write_soa_buffer(struct buffer * packet,const dname_type * apex,struct xfrd_soa * soa)464 xfrd_write_soa_buffer(struct buffer* packet,
465 	const dname_type* apex, struct xfrd_soa* soa)
466 {
467 	size_t rdlength_pos;
468 	uint16_t rdlength;
469 	buffer_write(packet, dname_name(apex), apex->name_size);
470 
471 	/* already in network order */
472 	buffer_write(packet, &soa->type, sizeof(soa->type));
473 	buffer_write(packet, &soa->klass, sizeof(soa->klass));
474 	buffer_write(packet, &soa->ttl, sizeof(soa->ttl));
475 	rdlength_pos = buffer_position(packet);
476 	buffer_skip(packet, sizeof(rdlength));
477 
478 	/* uncompressed dnames */
479 	buffer_write(packet, soa->prim_ns+1, soa->prim_ns[0]);
480 	buffer_write(packet, soa->email+1, soa->email[0]);
481 
482 	buffer_write(packet, &soa->serial, sizeof(uint32_t));
483 	buffer_write(packet, &soa->refresh, sizeof(uint32_t));
484 	buffer_write(packet, &soa->retry, sizeof(uint32_t));
485 	buffer_write(packet, &soa->expire, sizeof(uint32_t));
486 	buffer_write(packet, &soa->minimum, sizeof(uint32_t));
487 
488 	/* write length of RR */
489 	rdlength = buffer_position(packet) - rdlength_pos - sizeof(rdlength);
490 	buffer_write_u16_at(packet, rdlength_pos, rdlength);
491 }
492 
493 struct xfrd_tcp*
xfrd_tcp_create(region_type * region,size_t bufsize)494 xfrd_tcp_create(region_type* region, size_t bufsize)
495 {
496 	struct xfrd_tcp* tcp_state = (struct xfrd_tcp*)region_alloc(
497 		region, sizeof(struct xfrd_tcp));
498 	memset(tcp_state, 0, sizeof(struct xfrd_tcp));
499 	tcp_state->packet = buffer_create(region, bufsize);
500 	tcp_state->fd = -1;
501 
502 	return tcp_state;
503 }
504 
505 static struct xfrd_tcp_pipeline*
pipeline_find(struct xfrd_tcp_set * set,xfrd_zone_type * zone)506 pipeline_find(struct xfrd_tcp_set* set, xfrd_zone_type* zone)
507 {
508 	rbnode_type* sme = NULL;
509 	struct xfrd_tcp_pipeline* r;
510 	/* smaller buf than a full pipeline with 64kb ID array, only need
511 	 * the front part with the key info, this front part contains the
512 	 * members that the compare function uses. */
513 	struct xfrd_tcp_pipeline_key k, *key=&k;
514 	key->node.key = key;
515 	key->ip_len = xfrd_acl_sockaddr_to(zone->master, &key->ip);
516 	key->num_unused = set->tcp_pipeline;
517 	/* lookup existing tcp transfer to the master with highest unused */
518 	if(rbtree_find_less_equal(set->pipetree, key, &sme)) {
519 		/* exact match, strange, fully unused tcp cannot be open */
520 		assert(0);
521 	}
522 	if(!sme)
523 		return NULL;
524 	r = (struct xfrd_tcp_pipeline*)sme->key;
525 	/* <= key pointed at, is the master correct ? */
526 	if(r->key.ip_len != key->ip_len)
527 		return NULL;
528 	if(memcmp(&r->key.ip, &key->ip, key->ip_len) != 0)
529 		return NULL;
530 	/* correct master, is there a slot free for this transfer? */
531 	if(r->key.num_unused == 0)
532 		return NULL;
533 	return r;
534 }
535 
536 /* remove zone from tcp waiting list */
537 static void
tcp_zone_waiting_list_popfirst(struct xfrd_tcp_set * set,xfrd_zone_type * zone)538 tcp_zone_waiting_list_popfirst(struct xfrd_tcp_set* set, xfrd_zone_type* zone)
539 {
540 	assert(zone->tcp_waiting);
541 	set->tcp_waiting_first = zone->tcp_waiting_next;
542 	if(zone->tcp_waiting_next)
543 		zone->tcp_waiting_next->tcp_waiting_prev = NULL;
544 	else	set->tcp_waiting_last = 0;
545 	zone->tcp_waiting_next = 0;
546 	zone->tcp_waiting = 0;
547 }
548 
549 /* remove zone from tcp pipe write-wait list */
550 static void
tcp_pipe_sendlist_remove(struct xfrd_tcp_pipeline * tp,xfrd_zone_type * zone)551 tcp_pipe_sendlist_remove(struct xfrd_tcp_pipeline* tp, xfrd_zone_type* zone)
552 {
553 	if(zone->in_tcp_send) {
554 		if(zone->tcp_send_prev)
555 			zone->tcp_send_prev->tcp_send_next=zone->tcp_send_next;
556 		else	tp->tcp_send_first=zone->tcp_send_next;
557 		if(zone->tcp_send_next)
558 			zone->tcp_send_next->tcp_send_prev=zone->tcp_send_prev;
559 		else	tp->tcp_send_last=zone->tcp_send_prev;
560 		zone->in_tcp_send = 0;
561 	}
562 }
563 
564 /* remove first from write-wait list */
565 static void
tcp_pipe_sendlist_popfirst(struct xfrd_tcp_pipeline * tp,xfrd_zone_type * zone)566 tcp_pipe_sendlist_popfirst(struct xfrd_tcp_pipeline* tp, xfrd_zone_type* zone)
567 {
568 	tp->tcp_send_first = zone->tcp_send_next;
569 	if(tp->tcp_send_first)
570 		tp->tcp_send_first->tcp_send_prev = NULL;
571 	else	tp->tcp_send_last = NULL;
572 	zone->in_tcp_send = 0;
573 }
574 
575 /* remove zone from tcp pipe ID map */
576 static void
tcp_pipe_id_remove(struct xfrd_tcp_pipeline * tp,xfrd_zone_type * zone,int alsotree)577 tcp_pipe_id_remove(struct xfrd_tcp_pipeline* tp, xfrd_zone_type* zone,
578 	int alsotree)
579 {
580 	assert(tp->key.num_unused < tp->pipe_num && tp->key.num_unused >= 0);
581 	if(alsotree)
582 		xfrd_tcp_pipeline_remove_id(tp, zone->query_id);
583 	tp->unused[tp->key.num_unused] = zone->query_id;
584 	/* must remove and re-add for sort order in tree */
585 	(void)rbtree_delete(xfrd->tcp_set->pipetree, &tp->key.node);
586 	tp->key.num_unused++;
587 	(void)rbtree_insert(xfrd->tcp_set->pipetree, &tp->key.node);
588 }
589 
590 /* stop the tcp pipe (and all its zones need to retry) */
591 static void
xfrd_tcp_pipe_stop(struct xfrd_tcp_pipeline * tp)592 xfrd_tcp_pipe_stop(struct xfrd_tcp_pipeline* tp)
593 {
594 	struct xfrd_tcp_pipeline_id* zid;
595 	int conn = -1;
596 	assert(tp->key.num_unused < tp->pipe_num); /* at least one 'in-use' */
597 	assert(tp->pipe_num - tp->key.num_unused > tp->key.num_skip); /* at least one 'nonskip' */
598 	/* need to retry for all the zones connected to it */
599 	/* these could use different lists and go to a different nextmaster*/
600 	RBTREE_FOR(zid, struct xfrd_tcp_pipeline_id*, tp->zone_per_id) {
601 		xfrd_zone_type* zone = zid->zone;
602 		if(zone && zone != TCP_NULL_SKIP) {
603 			assert(zone->query_id == zid->id);
604 			conn = zone->tcp_conn;
605 			zone->tcp_conn = -1;
606 			zone->tcp_waiting = 0;
607 			tcp_pipe_sendlist_remove(tp, zone);
608 			tcp_pipe_id_remove(tp, zone, 0);
609 			xfrd_set_refresh_now(zone);
610 		}
611 	}
612 	xfrd_tcp_pipeline_cleanup(tp);
613 	assert(conn != -1);
614 	/* now release the entire tcp pipe */
615 	xfrd_tcp_pipe_release(xfrd->tcp_set, tp, conn);
616 }
617 
618 static void
tcp_pipe_reset_timeout(struct xfrd_tcp_pipeline * tp)619 tcp_pipe_reset_timeout(struct xfrd_tcp_pipeline* tp)
620 {
621 	int fd = tp->handler.ev_fd;
622 	struct timeval tv;
623 	tv.tv_sec = xfrd->tcp_set->tcp_timeout;
624 	tv.tv_usec = 0;
625 	if(tp->handler_added)
626 		event_del(&tp->handler);
627 	memset(&tp->handler, 0, sizeof(tp->handler));
628 	event_set(&tp->handler, fd, EV_PERSIST|EV_TIMEOUT|EV_READ|
629 #ifdef HAVE_TLS_1_3
630 		( tp->ssl
631 		? ( tp->handshake_done ?  ( tp->tcp_send_first ? EV_WRITE : 0 )
632 		  : tp->handshake_want == SSL_ERROR_WANT_WRITE ? EV_WRITE : 0 )
633 		: tp->tcp_send_first ? EV_WRITE : 0 ),
634 #else
635 		( tp->tcp_send_first ? EV_WRITE : 0 ),
636 #endif
637 		xfrd_handle_tcp_pipe, tp);
638 	if(event_base_set(xfrd->event_base, &tp->handler) != 0)
639 		log_msg(LOG_ERR, "xfrd tcp: event_base_set failed");
640 	if(event_add(&tp->handler, &tv) != 0)
641 		log_msg(LOG_ERR, "xfrd tcp: event_add failed");
642 	tp->handler_added = 1;
643 }
644 
645 /* handle event from fd of tcp pipe */
646 void
xfrd_handle_tcp_pipe(int ATTR_UNUSED (fd),short event,void * arg)647 xfrd_handle_tcp_pipe(int ATTR_UNUSED(fd), short event, void* arg)
648 {
649 	struct xfrd_tcp_pipeline* tp = (struct xfrd_tcp_pipeline*)arg;
650 	if((event & EV_WRITE)) {
651 		tcp_pipe_reset_timeout(tp);
652 		if(tp->tcp_send_first) {
653 			DEBUG(DEBUG_XFRD,1, (LOG_INFO, "xfrd: event tcp write, zone %s",
654 				tp->tcp_send_first->apex_str));
655 			xfrd_tcp_write(tp, tp->tcp_send_first);
656 		}
657 	}
658 	if((event & EV_READ) && tp->handler_added) {
659 		DEBUG(DEBUG_XFRD,1, (LOG_INFO, "xfrd: event tcp read"));
660 		tcp_pipe_reset_timeout(tp);
661 		xfrd_tcp_read(tp);
662 	}
663 	if((event & EV_TIMEOUT) && tp->handler_added) {
664 		/* tcp connection timed out */
665 		DEBUG(DEBUG_XFRD,1, (LOG_INFO, "xfrd: event tcp timeout"));
666 		xfrd_tcp_pipe_stop(tp);
667 	}
668 }
669 
670 /* add a zone to the pipeline, it starts to want to write its query */
671 static void
pipeline_setup_new_zone(struct xfrd_tcp_set * set,struct xfrd_tcp_pipeline * tp,xfrd_zone_type * zone)672 pipeline_setup_new_zone(struct xfrd_tcp_set* set, struct xfrd_tcp_pipeline* tp,
673 	xfrd_zone_type* zone)
674 {
675 	/* assign the ID */
676 	int idx;
677 	assert(tp->key.num_unused > 0);
678 	/* we pick a random ID, even though it is TCP anyway */
679 	idx = random_generate(tp->key.num_unused);
680 	zone->query_id = tp->unused[idx];
681 	tp->unused[idx] = tp->unused[tp->key.num_unused-1];
682 	xfrd_tcp_pipeline_insert_id(tp, zone->query_id, zone);
683 	/* decrement unused counter, and fixup tree */
684 	(void)rbtree_delete(set->pipetree, &tp->key.node);
685 	tp->key.num_unused--;
686 	(void)rbtree_insert(set->pipetree, &tp->key.node);
687 
688 	/* add to sendlist, at end */
689 	zone->tcp_send_next = NULL;
690 	zone->tcp_send_prev = tp->tcp_send_last;
691 	zone->in_tcp_send = 1;
692 	if(tp->tcp_send_last)
693 		tp->tcp_send_last->tcp_send_next = zone;
694 	else	tp->tcp_send_first = zone;
695 	tp->tcp_send_last = zone;
696 
697 	/* is it first in line? */
698 	if(tp->tcp_send_first == zone) {
699 		xfrd_tcp_setup_write_packet(tp, zone);
700 		/* add write to event handler */
701 		tcp_pipe_reset_timeout(tp);
702 	}
703 }
704 
705 void
xfrd_tcp_obtain(struct xfrd_tcp_set * set,xfrd_zone_type * zone)706 xfrd_tcp_obtain(struct xfrd_tcp_set* set, xfrd_zone_type* zone)
707 {
708 	struct xfrd_tcp_pipeline* tp;
709 	assert(zone->tcp_conn == -1);
710 	assert(zone->tcp_waiting == 0);
711 
712 	if(set->tcp_count < set->tcp_max) {
713 		int i;
714 		assert(!set->tcp_waiting_first);
715 		set->tcp_count ++;
716 		/* find a free tcp_buffer */
717 		for(i=0; i<set->tcp_max; i++) {
718 			if(set->tcp_state[i]->tcp_r->fd == -1) {
719 				zone->tcp_conn = i;
720 				break;
721 			}
722 		}
723 		/** What if there is no free tcp_buffer? return; */
724 		if (zone->tcp_conn < 0) {
725 			return;
726 		}
727 
728 		tp = set->tcp_state[zone->tcp_conn];
729 		zone->tcp_waiting = 0;
730 
731 		/* stop udp use (if any) */
732 		if(zone->zone_handler.ev_fd != -1)
733 			xfrd_udp_release(zone);
734 
735 		if(!xfrd_tcp_open(set, tp, zone)) {
736 			zone->tcp_conn = -1;
737 			set->tcp_count --;
738 			xfrd_set_refresh_now(zone);
739 			return;
740 		}
741 		/* ip and ip_len set by tcp_open */
742 		xfrd_tcp_pipeline_init(tp);
743 
744 		/* insert into tree */
745 		(void)rbtree_insert(set->pipetree, &tp->key.node);
746 		xfrd_deactivate_zone(zone);
747 		xfrd_unset_timer(zone);
748 		pipeline_setup_new_zone(set, tp, zone);
749 		return;
750 	}
751 	/* check for a pipeline to the same master with unused ID */
752 	if((tp = pipeline_find(set, zone))!= NULL) {
753 		int i;
754 		if(zone->zone_handler.ev_fd != -1)
755 			xfrd_udp_release(zone);
756 		for(i=0; i<set->tcp_max; i++) {
757 			if(set->tcp_state[i] == tp)
758 				zone->tcp_conn = i;
759 		}
760 		xfrd_deactivate_zone(zone);
761 		xfrd_unset_timer(zone);
762 		pipeline_setup_new_zone(set, tp, zone);
763 		return;
764 	}
765 
766 	/* wait, at end of line */
767 	DEBUG(DEBUG_XFRD,2, (LOG_INFO, "xfrd: max number of tcp "
768 		"connections (%d) reached.", set->tcp_max));
769 	zone->tcp_waiting_next = 0;
770 	zone->tcp_waiting_prev = set->tcp_waiting_last;
771 	zone->tcp_waiting = 1;
772 	if(!set->tcp_waiting_last) {
773 		set->tcp_waiting_first = zone;
774 		set->tcp_waiting_last = zone;
775 	} else {
776 		set->tcp_waiting_last->tcp_waiting_next = zone;
777 		set->tcp_waiting_last = zone;
778 	}
779 	xfrd_deactivate_zone(zone);
780 	xfrd_unset_timer(zone);
781 }
782 
783 int
xfrd_tcp_open(struct xfrd_tcp_set * set,struct xfrd_tcp_pipeline * tp,xfrd_zone_type * zone)784 xfrd_tcp_open(struct xfrd_tcp_set* set, struct xfrd_tcp_pipeline* tp,
785 	xfrd_zone_type* zone)
786 {
787 	int fd, family, conn;
788 	struct timeval tv;
789 	assert(zone->tcp_conn != -1);
790 
791 	/* if there is no next master, fallback to use the first one */
792 	/* but there really should be a master set */
793 	if(!zone->master) {
794 		zone->master = zone->zone_options->pattern->request_xfr;
795 		zone->master_num = 0;
796 	}
797 
798 	DEBUG(DEBUG_XFRD,1, (LOG_INFO, "xfrd: zone %s open tcp conn to %s",
799 		zone->apex_str, zone->master->ip_address_spec));
800 	tp->tcp_r->is_reading = 1;
801 	tp->tcp_r->total_bytes = 0;
802 	tp->tcp_r->msglen = 0;
803 	buffer_clear(tp->tcp_r->packet);
804 	tp->tcp_w->is_reading = 0;
805 	tp->tcp_w->total_bytes = 0;
806 	tp->tcp_w->msglen = 0;
807 	tp->connection_established = 0;
808 
809 	if(zone->master->is_ipv6) {
810 #ifdef INET6
811 		family = PF_INET6;
812 #else
813 		xfrd_set_refresh_now(zone);
814 		return 0;
815 #endif
816 	} else {
817 		family = PF_INET;
818 	}
819 	fd = socket(family, SOCK_STREAM, IPPROTO_TCP);
820 	if(fd == -1) {
821 		/* squelch 'Address family not supported by protocol' at low
822 		 * verbosity levels */
823 		if(errno != EAFNOSUPPORT || verbosity > 2)
824 		    log_msg(LOG_ERR, "xfrd: %s cannot create tcp socket: %s",
825 			zone->master->ip_address_spec, strerror(errno));
826 		xfrd_set_refresh_now(zone);
827 		return 0;
828 	}
829 	if(fcntl(fd, F_SETFL, O_NONBLOCK) == -1) {
830 		log_msg(LOG_ERR, "xfrd: fcntl failed: %s", strerror(errno));
831 		close(fd);
832 		xfrd_set_refresh_now(zone);
833 		return 0;
834 	}
835 
836 	if(xfrd->nsd->outgoing_tcp_mss > 0) {
837 #if defined(IPPROTO_TCP) && defined(TCP_MAXSEG)
838 		if(setsockopt(fd, IPPROTO_TCP, TCP_MAXSEG,
839 			(void*)&xfrd->nsd->outgoing_tcp_mss,
840 			sizeof(xfrd->nsd->outgoing_tcp_mss)) < 0) {
841 			log_msg(LOG_ERR, "xfrd: setsockopt(TCP_MAXSEG)"
842 					"failed: %s", strerror(errno));
843 		}
844 #else
845 		log_msg(LOG_ERR, "setsockopt(TCP_MAXSEG) unsupported");
846 #endif
847 	}
848 
849 	tp->key.ip_len = xfrd_acl_sockaddr_to(zone->master, &tp->key.ip);
850 
851 	/* bind it */
852 	if (!xfrd_bind_local_interface(fd, zone->zone_options->pattern->
853 		outgoing_interface, zone->master, 1)) {
854 		close(fd);
855 		xfrd_set_refresh_now(zone);
856 		return 0;
857         }
858 
859 	conn = connect(fd, (struct sockaddr*)&tp->key.ip, tp->key.ip_len);
860 	if (conn == -1 && errno != EINPROGRESS) {
861 		log_msg(LOG_ERR, "xfrd: connect %s failed: %s",
862 			zone->master->ip_address_spec, strerror(errno));
863 		close(fd);
864 		xfrd_set_refresh_now(zone);
865 		return 0;
866 	}
867 	tp->tcp_r->fd = fd;
868 	tp->tcp_w->fd = fd;
869 
870 	/* Check if an tls_auth name is configured which means we should try to
871 	   establish an SSL connection */
872 	if (zone->master->tls_auth_options &&
873 		zone->master->tls_auth_options->auth_domain_name) {
874 #ifdef HAVE_TLS_1_3
875 		if (!setup_ssl(tp, set, zone->master->tls_auth_options->auth_domain_name)) {
876 			log_msg(LOG_ERR, "xfrd: Cannot setup TLS on pipeline for %s to %s",
877 					zone->apex_str, zone->master->ip_address_spec);
878 			close(fd);
879 			xfrd_set_refresh_now(zone);
880 			return 0;
881 		}
882 
883 		/* Load client certificate (if provided) */
884 		if (zone->master->tls_auth_options->client_cert &&
885 		    zone->master->tls_auth_options->client_key) {
886 			if (SSL_CTX_use_certificate_chain_file(set->ssl_ctx,
887 			                                       zone->master->tls_auth_options->client_cert) != 1) {
888 				log_msg(LOG_ERR, "xfrd tls: Unable to load client certificate from file %s", zone->master->tls_auth_options->client_cert);
889 			}
890 
891 			if (zone->master->tls_auth_options->client_key_pw) {
892 				SSL_CTX_set_default_passwd_cb(set->ssl_ctx, password_cb);
893 				SSL_CTX_set_default_passwd_cb_userdata(set->ssl_ctx, zone->master->tls_auth_options->client_key_pw);
894 			}
895 
896 			if (SSL_CTX_use_PrivateKey_file(set->ssl_ctx, zone->master->tls_auth_options->client_key, SSL_FILETYPE_PEM) != 1) {
897 				log_msg(LOG_ERR, "xfrd tls: Unable to load private key from file %s", zone->master->tls_auth_options->client_key);
898 			}
899 		}
900 
901 		tp->handshake_done = 0;
902 		if(!ssl_handshake(tp)) {
903 			if(tp->handshake_want == SSL_ERROR_SYSCALL) {
904 				log_msg(LOG_ERR, "xfrd: TLS handshake failed "
905 					"for %s to %s: %s", zone->apex_str,
906 					zone->master->ip_address_spec,
907 					strerror(errno));
908 
909 			} else if(tp->handshake_want == SSL_ERROR_SSL) {
910 				char errmsg[1024];
911 				snprintf(errmsg, sizeof(errmsg), "xfrd: "
912 					"TLS handshake failed for %s to %s",
913 					zone->apex_str,
914 					zone->master->ip_address_spec);
915 				log_crypto_err(errmsg);
916 			} else {
917 				log_msg(LOG_ERR, "xfrd: TLS handshake failed "
918 					"for %s to %s with %d", zone->apex_str,
919 					zone->master->ip_address_spec,
920 					tp->handshake_want);
921 			}
922 			close(fd);
923 			xfrd_set_refresh_now(zone);
924 			return 0;
925 		}
926 #else
927 		log_msg(LOG_ERR, "xfrd: TLS 1.3 is not available, XFR-over-TLS is "
928 						 "not supported for %s to %s",
929 						  zone->apex_str, zone->master->ip_address_spec);
930 		close(fd);
931 		xfrd_set_refresh_now(zone);
932 		return 0;
933 #endif
934 	}
935 
936 	/* set the tcp pipe event */
937 	if(tp->handler_added)
938 		event_del(&tp->handler);
939 	memset(&tp->handler, 0, sizeof(tp->handler));
940 	event_set(&tp->handler, fd, EV_PERSIST|EV_TIMEOUT|EV_READ|
941 #ifdef HAVE_TLS_1_3
942 		( !tp->ssl
943 		|| tp->handshake_done
944 		|| tp->handshake_want == SSL_ERROR_WANT_WRITE ? EV_WRITE : 0),
945 #else
946 		EV_WRITE,
947 #endif
948 	        xfrd_handle_tcp_pipe, tp);
949 	if(event_base_set(xfrd->event_base, &tp->handler) != 0)
950 		log_msg(LOG_ERR, "xfrd tcp: event_base_set failed");
951 	tv.tv_sec = set->tcp_timeout;
952 	tv.tv_usec = 0;
953 	if(event_add(&tp->handler, &tv) != 0)
954 		log_msg(LOG_ERR, "xfrd tcp: event_add failed");
955 	tp->handler_added = 1;
956 	return 1;
957 }
958 
959 void
xfrd_tcp_setup_write_packet(struct xfrd_tcp_pipeline * tp,xfrd_zone_type * zone)960 xfrd_tcp_setup_write_packet(struct xfrd_tcp_pipeline* tp, xfrd_zone_type* zone)
961 {
962 	struct xfrd_tcp* tcp = tp->tcp_w;
963 	assert(zone->tcp_conn != -1);
964 	assert(zone->tcp_waiting == 0);
965 	/* start AXFR or IXFR for the zone */
966 	if(zone->soa_disk_acquired == 0 || zone->master->use_axfr_only ||
967 		zone->master->ixfr_disabled ||
968 		/* if zone expired, after the first round, do not ask for
969 		 * IXFR any more, but full AXFR (of any serial number) */
970 		(zone->state == xfrd_zone_expired && zone->round_num != 0)) {
971 		DEBUG(DEBUG_XFRD,1, (LOG_INFO, "request full zone transfer "
972 						"(AXFR) for %s to %s",
973 			zone->apex_str, zone->master->ip_address_spec));
974 
975 		xfrd_setup_packet(tcp->packet, TYPE_AXFR, CLASS_IN, zone->apex,
976 			zone->query_id);
977 		xfrd_prepare_zone_xfr(zone, TYPE_AXFR);
978 	} else {
979 		DEBUG(DEBUG_XFRD,1, (LOG_INFO, "request incremental zone "
980 						"transfer (IXFR) for %s to %s",
981 			zone->apex_str, zone->master->ip_address_spec));
982 
983 		xfrd_setup_packet(tcp->packet, TYPE_IXFR, CLASS_IN, zone->apex,
984 			zone->query_id);
985 		xfrd_prepare_zone_xfr(zone, TYPE_IXFR);
986 		NSCOUNT_SET(tcp->packet, 1);
987 		xfrd_write_soa_buffer(tcp->packet, zone->apex, &zone->soa_disk);
988 	}
989 	if(zone->master->key_options && zone->master->key_options->tsig_key) {
990 		xfrd_tsig_sign_request(
991 			tcp->packet, &zone->latest_xfr->tsig, zone->master);
992 	}
993 	buffer_flip(tcp->packet);
994 	DEBUG(DEBUG_XFRD,1, (LOG_INFO, "sent tcp query with ID %d", zone->query_id));
995 	tcp->msglen = buffer_limit(tcp->packet);
996 	tcp->total_bytes = 0;
997 }
998 
999 static void
tcp_conn_ready_for_reading(struct xfrd_tcp * tcp)1000 tcp_conn_ready_for_reading(struct xfrd_tcp* tcp)
1001 {
1002 	tcp->total_bytes = 0;
1003 	tcp->msglen = 0;
1004 	buffer_clear(tcp->packet);
1005 }
1006 
1007 #ifdef HAVE_TLS_1_3
1008 static int
conn_write_ssl(struct xfrd_tcp * tcp,SSL * ssl)1009 conn_write_ssl(struct xfrd_tcp* tcp, SSL* ssl)
1010 {
1011 	int request_length;
1012 	ssize_t sent;
1013 
1014 	if(tcp->total_bytes < sizeof(tcp->msglen)) {
1015 		uint16_t sendlen = htons(tcp->msglen);
1016 		// send
1017 		request_length = sizeof(tcp->msglen) - tcp->total_bytes;
1018 		ERR_clear_error();
1019 		sent = SSL_write(ssl, (const char*)&sendlen + tcp->total_bytes,
1020 						 request_length);
1021 		switch(SSL_get_error(ssl,sent)) {
1022 			case SSL_ERROR_NONE:
1023 				break;
1024 			default:
1025 				log_msg(LOG_ERR, "xfrd: generic write problem with tls");
1026 		}
1027 
1028 		if(sent == -1) {
1029 			if(errno == EAGAIN || errno == EINTR) {
1030 				/* write would block, try later */
1031 				return 0;
1032 			} else {
1033 				return -1;
1034 			}
1035 		}
1036 
1037 		tcp->total_bytes += sent;
1038 		if(sent > (ssize_t)sizeof(tcp->msglen))
1039 			buffer_skip(tcp->packet, sent-sizeof(tcp->msglen));
1040 		if(tcp->total_bytes < sizeof(tcp->msglen)) {
1041 			/* incomplete write, resume later */
1042 			return 0;
1043 		}
1044 		assert(tcp->total_bytes >= sizeof(tcp->msglen));
1045 	}
1046 
1047 	assert(tcp->total_bytes < tcp->msglen + sizeof(tcp->msglen));
1048 
1049 	request_length = buffer_remaining(tcp->packet);
1050 	ERR_clear_error();
1051 	sent = SSL_write(ssl, buffer_current(tcp->packet), request_length);
1052 	switch(SSL_get_error(ssl,sent)) {
1053 		case SSL_ERROR_NONE:
1054 			break;
1055 		default:
1056 			log_msg(LOG_ERR, "xfrd: generic write problem with tls");
1057 	}
1058 	if(sent == -1) {
1059 		if(errno == EAGAIN || errno == EINTR) {
1060 			/* write would block, try later */
1061 			return 0;
1062 		} else {
1063 			return -1;
1064 		}
1065 	}
1066 
1067 	buffer_skip(tcp->packet, sent);
1068 	tcp->total_bytes += sent;
1069 
1070 	if(tcp->total_bytes < tcp->msglen + sizeof(tcp->msglen)) {
1071 		/* more to write when socket becomes writable again */
1072 		return 0;
1073 	}
1074 
1075 	assert(tcp->total_bytes == tcp->msglen + sizeof(tcp->msglen));
1076 	return 1;
1077 }
1078 #endif
1079 
conn_write(struct xfrd_tcp * tcp)1080 int conn_write(struct xfrd_tcp* tcp)
1081 {
1082 	ssize_t sent;
1083 
1084 	if(tcp->total_bytes < sizeof(tcp->msglen)) {
1085 		uint16_t sendlen = htons(tcp->msglen);
1086 #ifdef HAVE_WRITEV
1087 		struct iovec iov[2];
1088 		iov[0].iov_base = (uint8_t*)&sendlen + tcp->total_bytes;
1089 		iov[0].iov_len = sizeof(sendlen) - tcp->total_bytes;
1090 		iov[1].iov_base = buffer_begin(tcp->packet);
1091 		iov[1].iov_len = buffer_limit(tcp->packet);
1092 		sent = writev(tcp->fd, iov, 2);
1093 #else /* HAVE_WRITEV */
1094 		sent = write(tcp->fd,
1095 			(const char*)&sendlen + tcp->total_bytes,
1096 			sizeof(tcp->msglen) - tcp->total_bytes);
1097 #endif /* HAVE_WRITEV */
1098 
1099 		if(sent == -1) {
1100 			if(errno == EAGAIN || errno == EINTR) {
1101 				/* write would block, try later */
1102 				return 0;
1103 			} else {
1104 				return -1;
1105 			}
1106 		}
1107 
1108 		tcp->total_bytes += sent;
1109 		if(sent > (ssize_t)sizeof(tcp->msglen))
1110 			buffer_skip(tcp->packet, sent-sizeof(tcp->msglen));
1111 		if(tcp->total_bytes < sizeof(tcp->msglen)) {
1112 			/* incomplete write, resume later */
1113 			return 0;
1114 		}
1115 #ifdef HAVE_WRITEV
1116 		if(tcp->total_bytes == tcp->msglen + sizeof(tcp->msglen)) {
1117 			/* packet done */
1118 			return 1;
1119 		}
1120 #endif
1121 		assert(tcp->total_bytes >= sizeof(tcp->msglen));
1122 	}
1123 
1124 	assert(tcp->total_bytes < tcp->msglen + sizeof(tcp->msglen));
1125 
1126 	sent = write(tcp->fd,
1127 		buffer_current(tcp->packet),
1128 		buffer_remaining(tcp->packet));
1129 	if(sent == -1) {
1130 		if(errno == EAGAIN || errno == EINTR) {
1131 			/* write would block, try later */
1132 			return 0;
1133 		} else {
1134 			return -1;
1135 		}
1136 	}
1137 
1138 	buffer_skip(tcp->packet, sent);
1139 	tcp->total_bytes += sent;
1140 
1141 	if(tcp->total_bytes < tcp->msglen + sizeof(tcp->msglen)) {
1142 		/* more to write when socket becomes writable again */
1143 		return 0;
1144 	}
1145 
1146 	assert(tcp->total_bytes == tcp->msglen + sizeof(tcp->msglen));
1147 	return 1;
1148 }
1149 
1150 void
xfrd_tcp_write(struct xfrd_tcp_pipeline * tp,xfrd_zone_type * zone)1151 xfrd_tcp_write(struct xfrd_tcp_pipeline* tp, xfrd_zone_type* zone)
1152 {
1153 	int ret;
1154 	struct xfrd_tcp* tcp = tp->tcp_w;
1155 	assert(zone->tcp_conn != -1);
1156 	assert(zone == tp->tcp_send_first);
1157 	/* see if for non-established connection, there is a connect error */
1158 	if(!tp->connection_established) {
1159 		/* check for pending error from nonblocking connect */
1160 		/* from Stevens, unix network programming, vol1, 3rd ed, p450 */
1161 		int error = 0;
1162 		socklen_t len = sizeof(error);
1163 		if(getsockopt(tcp->fd, SOL_SOCKET, SO_ERROR, &error, &len) < 0){
1164 			error = errno; /* on solaris errno is error */
1165 		}
1166 		if(error == EINPROGRESS || error == EWOULDBLOCK)
1167 			return; /* try again later */
1168 		if(error != 0) {
1169 			log_msg(LOG_ERR, "%s: Could not tcp connect to %s: %s",
1170 				zone->apex_str, zone->master->ip_address_spec,
1171 				strerror(error));
1172 			xfrd_tcp_pipe_stop(tp);
1173 			return;
1174 		}
1175 	}
1176 #ifdef HAVE_TLS_1_3
1177 	if (tp->ssl) {
1178 		if(tp->handshake_done) {
1179 			ret = conn_write_ssl(tcp, tp->ssl);
1180 
1181 		} else if(ssl_handshake(tp)) {
1182 			tcp_pipe_reset_timeout(tp); /* reschedule */
1183 			return;
1184 
1185 		} else {
1186 			if(tp->handshake_want == SSL_ERROR_SYSCALL) {
1187 				log_msg(LOG_ERR, "xfrd: TLS handshake failed: %s",
1188 					strerror(errno));
1189 
1190 			} else if(tp->handshake_want == SSL_ERROR_SSL) {
1191 				log_crypto_err("xfrd: TLS handshake failed");
1192 			} else {
1193 				log_msg(LOG_ERR, "xfrd: TLS handshake failed "
1194 					"with value: %d", tp->handshake_want);
1195 			}
1196 			xfrd_tcp_pipe_stop(tp);
1197 			return;
1198 		}
1199 	} else
1200 #endif
1201 		ret = conn_write(tcp);
1202 	if(ret == -1) {
1203 		log_msg(LOG_ERR, "xfrd: failed writing tcp %s", strerror(errno));
1204 		xfrd_tcp_pipe_stop(tp);
1205 		return;
1206 	}
1207 	if(tcp->total_bytes != 0 && !tp->connection_established)
1208 		tp->connection_established = 1;
1209 	if(ret == 0) {
1210 		return; /* write again later */
1211 	}
1212 	/* done writing this message */
1213 
1214 	/* remove first zone from sendlist */
1215 	tcp_pipe_sendlist_popfirst(tp, zone);
1216 
1217 	/* see if other zone wants to write; init; let it write (now) */
1218 	/* and use a loop, because 64k stack calls is a too much */
1219 	while(tp->tcp_send_first) {
1220 		/* setup to write for this zone */
1221 		xfrd_tcp_setup_write_packet(tp, tp->tcp_send_first);
1222 		/* attempt to write for this zone (if success, continue loop)*/
1223 #ifdef HAVE_TLS_1_3
1224 		if (tp->ssl)
1225 			ret = conn_write_ssl(tcp, tp->ssl);
1226 		else
1227 #endif
1228 			ret = conn_write(tcp);
1229 		if(ret == -1) {
1230 			log_msg(LOG_ERR, "xfrd: failed writing tcp %s", strerror(errno));
1231 			xfrd_tcp_pipe_stop(tp);
1232 			return;
1233 		}
1234 		if(ret == 0)
1235 			return; /* write again later */
1236 		tcp_pipe_sendlist_popfirst(tp, tp->tcp_send_first);
1237 	}
1238 
1239 	/* if sendlist empty, remove WRITE from event */
1240 
1241 	/* listen to READ, and not WRITE events */
1242 	assert(tp->tcp_send_first == NULL);
1243 	tcp_pipe_reset_timeout(tp);
1244 }
1245 
1246 #ifdef HAVE_TLS_1_3
1247 static int
conn_read_ssl(struct xfrd_tcp * tcp,SSL * ssl)1248 conn_read_ssl(struct xfrd_tcp* tcp, SSL* ssl)
1249 {
1250 	ssize_t received;
1251 	/* receive leading packet length bytes */
1252 	if(tcp->total_bytes < sizeof(tcp->msglen)) {
1253 		ERR_clear_error();
1254 		received = SSL_read(ssl,
1255 						(char*) &tcp->msglen + tcp->total_bytes,
1256 						sizeof(tcp->msglen) - tcp->total_bytes);
1257 		if (received <= 0) {
1258 			int err = SSL_get_error(ssl, received);
1259 			if(err == SSL_ERROR_WANT_READ && errno == EAGAIN) {
1260 				return 0;
1261 			}
1262 			if(err == SSL_ERROR_ZERO_RETURN) {
1263 				/* EOF */
1264 				return 0;
1265 			}
1266 			log_msg(LOG_ERR, "ssl_read returned error %d with received %zd", err, received);
1267 		}
1268 		if(received == -1) {
1269 			if(errno == EAGAIN || errno == EINTR) {
1270 				/* read would block, try later */
1271 				return 0;
1272 			} else {
1273 #ifdef ECONNRESET
1274 				if (verbosity >= 2 || errno != ECONNRESET)
1275 #endif /* ECONNRESET */
1276 					log_msg(LOG_ERR, "tls read sz: %s", strerror(errno));
1277 				return -1;
1278 			}
1279 		} else if(received == 0) {
1280 			/* EOF */
1281 			return -1;
1282 		}
1283 		tcp->total_bytes += received;
1284 		if(tcp->total_bytes < sizeof(tcp->msglen)) {
1285 			/* not complete yet, try later */
1286 			return 0;
1287 		}
1288 
1289 		assert(tcp->total_bytes == sizeof(tcp->msglen));
1290 		tcp->msglen = ntohs(tcp->msglen);
1291 
1292 		if(tcp->msglen == 0) {
1293 			buffer_set_limit(tcp->packet, tcp->msglen);
1294 			return 1;
1295 		}
1296 		if(tcp->msglen > buffer_capacity(tcp->packet)) {
1297 			log_msg(LOG_ERR, "buffer too small, dropping connection");
1298 			return 0;
1299 		}
1300 		buffer_set_limit(tcp->packet, tcp->msglen);
1301 	}
1302 
1303 	assert(buffer_remaining(tcp->packet) > 0);
1304 	ERR_clear_error();
1305 
1306 	received = SSL_read(ssl, buffer_current(tcp->packet),
1307 					buffer_remaining(tcp->packet));
1308 
1309 	if (received <= 0) {
1310 		int err = SSL_get_error(ssl, received);
1311 		if(err == SSL_ERROR_ZERO_RETURN) {
1312 			/* EOF */
1313 			return 0;
1314 		}
1315 		log_msg(LOG_ERR, "ssl_read returned error %d with received %zd", err, received);
1316 	}
1317 	if(received == -1) {
1318 		if(errno == EAGAIN || errno == EINTR) {
1319 			/* read would block, try later */
1320 			return 0;
1321 		} else {
1322 #ifdef ECONNRESET
1323 			if (verbosity >= 2 || errno != ECONNRESET)
1324 #endif /* ECONNRESET */
1325 				log_msg(LOG_ERR, "tcp read %s", strerror(errno));
1326 			return -1;
1327 		}
1328 	} else if(received == 0) {
1329 		/* EOF */
1330 		return -1;
1331 	}
1332 
1333 	tcp->total_bytes += received;
1334 	buffer_skip(tcp->packet, received);
1335 
1336 	if(buffer_remaining(tcp->packet) > 0) {
1337 		/* not complete yet, wait for more */
1338 		return 0;
1339 	}
1340 
1341 	/* completed */
1342 	assert(buffer_position(tcp->packet) == tcp->msglen);
1343 	return 1;
1344 }
1345 #endif
1346 
1347 int
conn_read(struct xfrd_tcp * tcp)1348 conn_read(struct xfrd_tcp* tcp)
1349 {
1350 	ssize_t received;
1351 	/* receive leading packet length bytes */
1352 	if(tcp->total_bytes < sizeof(tcp->msglen)) {
1353 		received = read(tcp->fd,
1354 			(char*) &tcp->msglen + tcp->total_bytes,
1355 			sizeof(tcp->msglen) - tcp->total_bytes);
1356 		if(received == -1) {
1357 			if(errno == EAGAIN || errno == EINTR) {
1358 				/* read would block, try later */
1359 				return 0;
1360 			} else {
1361 #ifdef ECONNRESET
1362 				if (verbosity >= 2 || errno != ECONNRESET)
1363 #endif /* ECONNRESET */
1364 				log_msg(LOG_ERR, "tcp read sz: %s", strerror(errno));
1365 				return -1;
1366 			}
1367 		} else if(received == 0) {
1368 			/* EOF */
1369 			return -1;
1370 		}
1371 		tcp->total_bytes += received;
1372 		if(tcp->total_bytes < sizeof(tcp->msglen)) {
1373 			/* not complete yet, try later */
1374 			return 0;
1375 		}
1376 
1377 		assert(tcp->total_bytes == sizeof(tcp->msglen));
1378 		tcp->msglen = ntohs(tcp->msglen);
1379 
1380 		if(tcp->msglen == 0) {
1381 			buffer_set_limit(tcp->packet, tcp->msglen);
1382 			return 1;
1383 		}
1384 		if(tcp->msglen > buffer_capacity(tcp->packet)) {
1385 			log_msg(LOG_ERR, "buffer too small, dropping connection");
1386 			return 0;
1387 		}
1388 		buffer_set_limit(tcp->packet, tcp->msglen);
1389 	}
1390 
1391 	assert(buffer_remaining(tcp->packet) > 0);
1392 
1393 	received = read(tcp->fd, buffer_current(tcp->packet),
1394 		buffer_remaining(tcp->packet));
1395 	if(received == -1) {
1396 		if(errno == EAGAIN || errno == EINTR) {
1397 			/* read would block, try later */
1398 			return 0;
1399 		} else {
1400 #ifdef ECONNRESET
1401 			if (verbosity >= 2 || errno != ECONNRESET)
1402 #endif /* ECONNRESET */
1403 			log_msg(LOG_ERR, "tcp read %s", strerror(errno));
1404 			return -1;
1405 		}
1406 	} else if(received == 0) {
1407 		/* EOF */
1408 		return -1;
1409 	}
1410 
1411 	tcp->total_bytes += received;
1412 	buffer_skip(tcp->packet, received);
1413 
1414 	if(buffer_remaining(tcp->packet) > 0) {
1415 		/* not complete yet, wait for more */
1416 		return 0;
1417 	}
1418 
1419 	/* completed */
1420 	assert(buffer_position(tcp->packet) == tcp->msglen);
1421 	return 1;
1422 }
1423 
1424 void
xfrd_tcp_read(struct xfrd_tcp_pipeline * tp)1425 xfrd_tcp_read(struct xfrd_tcp_pipeline* tp)
1426 {
1427 	xfrd_zone_type* zone;
1428 	struct xfrd_tcp* tcp = tp->tcp_r;
1429 	int ret;
1430 	enum xfrd_packet_result pkt_result;
1431 #ifdef HAVE_TLS_1_3
1432 	if(tp->ssl) {
1433 		if(tp->handshake_done) {
1434 			ret = conn_read_ssl(tcp, tp->ssl);
1435 
1436 		} else if(ssl_handshake(tp)) {
1437 			tcp_pipe_reset_timeout(tp); /* reschedule */
1438 			return;
1439 
1440 		} else {
1441 			if(tp->handshake_want == SSL_ERROR_SYSCALL) {
1442 				log_msg(LOG_ERR, "xfrd: TLS handshake failed: %s",
1443 					strerror(errno));
1444 
1445 			} else if(tp->handshake_want == SSL_ERROR_SSL) {
1446 				log_crypto_err("xfrd: TLS handshake failed");
1447 			} else {
1448 				log_msg(LOG_ERR, "xfrd: TLS handshake failed "
1449 					"with value: %d", tp->handshake_want);
1450 			}
1451 			xfrd_tcp_pipe_stop(tp);
1452 			return;
1453 		}
1454 	} else
1455 #endif
1456 		ret = conn_read(tcp);
1457 	if(ret == -1) {
1458 		log_msg(LOG_ERR, "xfrd: failed reading tcp %s", strerror(errno));
1459 		xfrd_tcp_pipe_stop(tp);
1460 		return;
1461 	}
1462 	if(ret == 0)
1463 		return;
1464 	/* completed msg */
1465 	buffer_flip(tcp->packet);
1466 	/* see which ID number it is, if skip, handle skip, NULL: warn */
1467 	if(tcp->msglen < QHEADERSZ) {
1468 		/* too short for DNS header, skip it */
1469 		DEBUG(DEBUG_XFRD,1, (LOG_INFO,
1470 			"xfrd: tcp skip response that is too short"));
1471 		tcp_conn_ready_for_reading(tcp);
1472 		return;
1473 	}
1474 	zone = xfrd_tcp_pipeline_lookup_id(tp, ID(tcp->packet));
1475 	if(!zone || zone == TCP_NULL_SKIP) {
1476 		/* no zone for this id? skip it */
1477 		DEBUG(DEBUG_XFRD,1, (LOG_INFO,
1478 			"xfrd: tcp skip response with %s ID",
1479 			zone?"set-to-skip":"unknown"));
1480 		tcp_conn_ready_for_reading(tcp);
1481 		return;
1482 	}
1483 	assert(zone->tcp_conn != -1);
1484 
1485 	/* handle message for zone */
1486 	pkt_result = xfrd_handle_received_xfr_packet(zone, tcp->packet);
1487 	/* setup for reading the next packet on this connection */
1488 	tcp_conn_ready_for_reading(tcp);
1489 	switch(pkt_result) {
1490 		case xfrd_packet_more:
1491 			/* wait for next packet */
1492 			break;
1493 		case xfrd_packet_newlease:
1494 			/* set to skip if more packets with this ID */
1495 			xfrd_tcp_pipeline_skip_id(tp, zone->query_id);
1496 			tp->key.num_skip++;
1497 			/* fall through to remove zone from tp */
1498 			/* fallthrough */
1499 		case xfrd_packet_transfer:
1500 			if(zone->zone_options->pattern->multi_master_check) {
1501 				xfrd_tcp_release(xfrd->tcp_set, zone);
1502 				xfrd_make_request(zone);
1503 				break;
1504 			}
1505 			xfrd_tcp_release(xfrd->tcp_set, zone);
1506 			assert(zone->round_num == -1);
1507 			break;
1508 		case xfrd_packet_notimpl:
1509 			xfrd_disable_ixfr(zone);
1510 			xfrd_tcp_release(xfrd->tcp_set, zone);
1511 			/* query next server */
1512 			xfrd_make_request(zone);
1513 			break;
1514 		case xfrd_packet_bad:
1515 		case xfrd_packet_tcp:
1516 		default:
1517 			/* set to skip if more packets with this ID */
1518 			xfrd_tcp_pipeline_skip_id(tp, zone->query_id);
1519 			tp->key.num_skip++;
1520 			xfrd_tcp_release(xfrd->tcp_set, zone);
1521 			/* query next server */
1522 			xfrd_make_request(zone);
1523 			break;
1524 	}
1525 }
1526 
1527 void
xfrd_tcp_release(struct xfrd_tcp_set * set,xfrd_zone_type * zone)1528 xfrd_tcp_release(struct xfrd_tcp_set* set, xfrd_zone_type* zone)
1529 {
1530 	int conn = zone->tcp_conn;
1531 	struct xfrd_tcp_pipeline* tp = set->tcp_state[conn];
1532 	DEBUG(DEBUG_XFRD,1, (LOG_INFO, "xfrd: zone %s released tcp conn to %s",
1533 		zone->apex_str, zone->master->ip_address_spec));
1534 	assert(zone->tcp_conn != -1);
1535 	assert(zone->tcp_waiting == 0);
1536 	zone->tcp_conn = -1;
1537 	zone->tcp_waiting = 0;
1538 
1539 	/* remove from tcp_send list */
1540 	tcp_pipe_sendlist_remove(tp, zone);
1541 	/* remove it from the ID list */
1542 	if(xfrd_tcp_pipeline_lookup_id(tp, zone->query_id) != TCP_NULL_SKIP)
1543 		tcp_pipe_id_remove(tp, zone, 1);
1544 	DEBUG(DEBUG_XFRD,1, (LOG_INFO, "xfrd: released tcp pipe now %d unused",
1545 		tp->key.num_unused));
1546 	/* if pipe was full, but no more, then see if waiting element is
1547 	 * for the same master, and can fill the unused ID */
1548 	if(tp->key.num_unused == 1 && set->tcp_waiting_first) {
1549 #ifdef INET6
1550 		struct sockaddr_storage to;
1551 #else
1552 		struct sockaddr_in to;
1553 #endif
1554 		socklen_t to_len = xfrd_acl_sockaddr_to(
1555 			set->tcp_waiting_first->master, &to);
1556 		if(to_len == tp->key.ip_len && memcmp(&to, &tp->key.ip, to_len) == 0) {
1557 			/* use this connection for the waiting zone */
1558 			zone = set->tcp_waiting_first;
1559 			assert(zone->tcp_conn == -1);
1560 			zone->tcp_conn = conn;
1561 			tcp_zone_waiting_list_popfirst(set, zone);
1562 			if(zone->zone_handler.ev_fd != -1)
1563 				xfrd_udp_release(zone);
1564 			xfrd_unset_timer(zone);
1565 			pipeline_setup_new_zone(set, tp, zone);
1566 			return;
1567 		}
1568 		/* waiting zone did not go to same server */
1569 	}
1570 
1571 	/* if all unused, or only skipped leftover, close the pipeline */
1572 	if(tp->key.num_unused >= tp->pipe_num || tp->key.num_skip >= tp->pipe_num - tp->key.num_unused)
1573 		xfrd_tcp_pipe_release(set, tp, conn);
1574 }
1575 
1576 void
xfrd_tcp_pipe_release(struct xfrd_tcp_set * set,struct xfrd_tcp_pipeline * tp,int conn)1577 xfrd_tcp_pipe_release(struct xfrd_tcp_set* set, struct xfrd_tcp_pipeline* tp,
1578 	int conn)
1579 {
1580 	DEBUG(DEBUG_XFRD,1, (LOG_INFO, "xfrd: tcp pipe released"));
1581 	/* one handler per tcp pipe */
1582 	if(tp->handler_added)
1583 		event_del(&tp->handler);
1584 	tp->handler_added = 0;
1585 
1586 #ifdef HAVE_TLS_1_3
1587 	/* close SSL */
1588 	if (tp->ssl) {
1589 		DEBUG(DEBUG_XFRD, 1, (LOG_INFO, "xfrd: Shutting down TLS"));
1590 		SSL_shutdown(tp->ssl);
1591 		SSL_free(tp->ssl);
1592 		tp->ssl = NULL;
1593 	}
1594 #endif
1595 
1596 	/* fd in tcp_r and tcp_w is the same, close once */
1597 	if(tp->tcp_r->fd != -1)
1598 		close(tp->tcp_r->fd);
1599 	tp->tcp_r->fd = -1;
1600 	tp->tcp_w->fd = -1;
1601 
1602 	/* remove from pipetree */
1603 	(void)rbtree_delete(xfrd->tcp_set->pipetree, &tp->key.node);
1604 
1605 	/* a waiting zone can use the free tcp slot (to another server) */
1606 	/* if that zone fails to set-up or connect, we try to start the next
1607 	 * waiting zone in the list */
1608 	while(set->tcp_count == set->tcp_max && set->tcp_waiting_first) {
1609 		/* pop first waiting process */
1610 		xfrd_zone_type* zone = set->tcp_waiting_first;
1611 		/* start it */
1612 		assert(zone->tcp_conn == -1);
1613 		zone->tcp_conn = conn;
1614 		tcp_zone_waiting_list_popfirst(set, zone);
1615 
1616 		/* stop udp (if any) */
1617 		if(zone->zone_handler.ev_fd != -1)
1618 			xfrd_udp_release(zone);
1619 		if(!xfrd_tcp_open(set, tp, zone)) {
1620 			zone->tcp_conn = -1;
1621 			xfrd_set_refresh_now(zone);
1622 			/* try to start the next zone (if any) */
1623 			continue;
1624 		}
1625 		/* re-init this tcppipe */
1626 		/* ip and ip_len set by tcp_open */
1627 		xfrd_tcp_pipeline_init(tp);
1628 
1629 		/* insert into tree */
1630 		(void)rbtree_insert(set->pipetree, &tp->key.node);
1631 		/* setup write */
1632 		xfrd_unset_timer(zone);
1633 		pipeline_setup_new_zone(set, tp, zone);
1634 		/* started a task, no need for cleanups, so return */
1635 		return;
1636 	}
1637 	/* no task to start, cleanup */
1638 	assert(!set->tcp_waiting_first);
1639 	set->tcp_count --;
1640 	assert(set->tcp_count >= 0);
1641 }
1642 
1643