1 /*
2 * xfrd-tcp.c - XFR (transfer) Daemon TCP system source file. Manages tcp conn.
3 *
4 * Copyright (c) 2001-2006, NLnet Labs. All rights reserved.
5 *
6 * See LICENSE for the license.
7 *
8 */
9
10 #include "config.h"
11 #include <assert.h>
12 #include <errno.h>
13 #include <fcntl.h>
14 #include <unistd.h>
15 #include <stdlib.h>
16 #include <sys/uio.h>
17 #include "nsd.h"
18 #include "xfrd-tcp.h"
19 #include "buffer.h"
20 #include "packet.h"
21 #include "dname.h"
22 #include "options.h"
23 #include "namedb.h"
24 #include "xfrd.h"
25 #include "xfrd-disk.h"
26 #include "util.h"
27 #ifdef HAVE_TLS_1_3
28 #include <openssl/ssl.h>
29 #include <openssl/err.h>
30 #endif
31
32 #ifdef HAVE_TLS_1_3
33 void log_crypto_err(const char* str); /* in server.c */
34
35 static SSL_CTX*
create_ssl_context()36 create_ssl_context()
37 {
38 SSL_CTX *ctx;
39 unsigned char protos[] = { 3, 'd', 'o', 't' };
40 ctx = SSL_CTX_new(TLS_client_method());
41 if (!ctx) {
42 log_msg(LOG_ERR, "xfrd tls: Unable to create SSL ctxt");
43 }
44 else if (SSL_CTX_set_default_verify_paths(ctx) != 1) {
45 SSL_CTX_free(ctx);
46 log_msg(LOG_ERR, "xfrd tls: Unable to set default SSL verify paths");
47 return NULL;
48 }
49 /* Only trust 1.3 as per the specification */
50 else if (!SSL_CTX_set_min_proto_version(ctx, TLS1_3_VERSION)) {
51 SSL_CTX_free(ctx);
52 log_msg(LOG_ERR, "xfrd tls: Unable to set minimum TLS version 1.3");
53 return NULL;
54 }
55
56 if (SSL_CTX_set_alpn_protos(ctx, protos, sizeof(protos)) != 0) {
57 SSL_CTX_free(ctx);
58 log_msg(LOG_ERR, "xfrd tls: Unable to set ALPN protocols");
59 return NULL;
60 }
61 return ctx;
62 }
63
64 static int
tls_verify_callback(int preverify_ok,X509_STORE_CTX * ctx)65 tls_verify_callback(int preverify_ok, X509_STORE_CTX *ctx)
66 {
67 int err = X509_STORE_CTX_get_error(ctx);
68 int depth = X509_STORE_CTX_get_error_depth(ctx);
69
70 // report the specific cert error here - will need custom verify code if
71 // SPKI pins are supported
72 if (!preverify_ok)
73 log_msg(LOG_ERR, "xfrd tls: TLS verify failed - (%d) depth: %d error: %s",
74 err,
75 depth,
76 X509_verify_cert_error_string(err));
77 return preverify_ok;
78 }
79
80 static int
setup_ssl(struct xfrd_tcp_pipeline * tp,struct xfrd_tcp_set * tcp_set,const char * auth_domain_name)81 setup_ssl(struct xfrd_tcp_pipeline* tp, struct xfrd_tcp_set* tcp_set,
82 const char* auth_domain_name)
83 {
84 if (!tcp_set->ssl_ctx) {
85 log_msg(LOG_ERR, "xfrd tls: No TLS CTX, cannot set up XFR-over-TLS");
86 return 0;
87 }
88 DEBUG(DEBUG_XFRD,1, (LOG_INFO, "xfrd: setting up TLS for tls_auth domain name %s",
89 auth_domain_name));
90 tp->ssl = SSL_new((SSL_CTX*)tcp_set->ssl_ctx);
91 if(!tp->ssl) {
92 log_msg(LOG_ERR, "xfrd tls: Unable to create TLS object");
93 return 0;
94 }
95 SSL_set_connect_state(tp->ssl);
96 (void)SSL_set_mode(tp->ssl, SSL_MODE_AUTO_RETRY);
97 if(!SSL_set_fd(tp->ssl, tp->tcp_w->fd)) {
98 log_msg(LOG_ERR, "xfrd tls: Unable to set TLS fd");
99 SSL_free(tp->ssl);
100 tp->ssl = NULL;
101 return 0;
102 }
103
104 SSL_set_verify(tp->ssl, SSL_VERIFY_PEER, tls_verify_callback);
105 if(!SSL_set1_host(tp->ssl, auth_domain_name)) {
106 log_msg(LOG_ERR, "xfrd tls: TLS setting of hostname %s failed",
107 auth_domain_name);
108 SSL_free(tp->ssl);
109 tp->ssl = NULL;
110 return 0;
111 }
112 return 1;
113 }
114
115 static int
ssl_handshake(struct xfrd_tcp_pipeline * tp)116 ssl_handshake(struct xfrd_tcp_pipeline* tp)
117 {
118 int ret;
119
120 ERR_clear_error();
121 ret = SSL_do_handshake(tp->ssl);
122 if(ret == 1) {
123 DEBUG(DEBUG_XFRD, 1, (LOG_INFO, "xfrd: TLS handshake successful"));
124 tp->handshake_done = 1;
125 return 1;
126 }
127 tp->handshake_want = SSL_get_error(tp->ssl, ret);
128 if(tp->handshake_want == SSL_ERROR_WANT_READ
129 || tp->handshake_want == SSL_ERROR_WANT_WRITE)
130 return 1;
131
132 return 0;
133 }
134
password_cb(char * buf,int size,int ATTR_UNUSED (rwflag),void * u)135 int password_cb(char *buf, int size, int ATTR_UNUSED(rwflag), void *u)
136 {
137 strlcpy(buf, (char*)u, size);
138 return strlen(buf);
139 }
140
141 #endif
142
143 /* sort tcppipe, first on IP address, for an IPaddresss, sort on num_unused */
144 static int
xfrd_pipe_cmp(const void * a,const void * b)145 xfrd_pipe_cmp(const void* a, const void* b)
146 {
147 const struct xfrd_tcp_pipeline* x = (struct xfrd_tcp_pipeline*)a;
148 const struct xfrd_tcp_pipeline* y = (struct xfrd_tcp_pipeline*)b;
149 int r;
150 if(x == y)
151 return 0;
152 if(y->key.ip_len != x->key.ip_len)
153 /* subtraction works because nonnegative and small numbers */
154 return (int)y->key.ip_len - (int)x->key.ip_len;
155 r = memcmp(&x->key.ip, &y->key.ip, x->key.ip_len);
156 if(r != 0)
157 return r;
158 /* sort that num_unused is sorted ascending, */
159 if(x->key.num_unused != y->key.num_unused) {
160 return (x->key.num_unused < y->key.num_unused) ? -1 : 1;
161 }
162 /* different pipelines are different still, even with same numunused*/
163 return (uintptr_t)x < (uintptr_t)y ? -1 : 1;
164 }
165
xfrd_tcp_set_create(struct region * region,const char * tls_cert_bundle,int tcp_max,int tcp_pipeline)166 struct xfrd_tcp_set* xfrd_tcp_set_create(struct region* region, const char *tls_cert_bundle, int tcp_max, int tcp_pipeline)
167 {
168 int i;
169 struct xfrd_tcp_set* tcp_set = region_alloc(region,
170 sizeof(struct xfrd_tcp_set));
171 memset(tcp_set, 0, sizeof(struct xfrd_tcp_set));
172 tcp_set->tcp_state = NULL;
173 tcp_set->tcp_max = tcp_max;
174 tcp_set->tcp_pipeline = tcp_pipeline;
175 tcp_set->tcp_count = 0;
176 tcp_set->tcp_waiting_first = 0;
177 tcp_set->tcp_waiting_last = 0;
178 #ifdef HAVE_TLS_1_3
179 /* Set up SSL context */
180 tcp_set->ssl_ctx = create_ssl_context();
181 if (tcp_set->ssl_ctx == NULL)
182 log_msg(LOG_ERR, "xfrd: XFR-over-TLS not available");
183
184 else if (tls_cert_bundle && tls_cert_bundle[0] && SSL_CTX_load_verify_locations(
185 tcp_set->ssl_ctx, tls_cert_bundle, NULL) != 1) {
186 log_msg(LOG_ERR, "xfrd tls: Unable to set the certificate bundle file %s",
187 tls_cert_bundle);
188 }
189 #else
190 (void)tls_cert_bundle;
191 log_msg(LOG_INFO, "xfrd: No TLS 1.3 support - XFR-over-TLS not available");
192 #endif
193 tcp_set->tcp_state = region_alloc(region,
194 sizeof(*tcp_set->tcp_state)*tcp_set->tcp_max);
195 for(i=0; i<tcp_set->tcp_max; i++)
196 tcp_set->tcp_state[i] = xfrd_tcp_pipeline_create(region,
197 tcp_pipeline);
198 tcp_set->pipetree = rbtree_create(region, &xfrd_pipe_cmp);
199 return tcp_set;
200 }
201
pipeline_id_compare(const void * x,const void * y)202 static int pipeline_id_compare(const void* x, const void* y)
203 {
204 struct xfrd_tcp_pipeline_id* a = (struct xfrd_tcp_pipeline_id*)x;
205 struct xfrd_tcp_pipeline_id* b = (struct xfrd_tcp_pipeline_id*)y;
206 if(a->id < b->id)
207 return -1;
208 if(a->id > b->id)
209 return 1;
210 return 0;
211 }
212
pick_id_values(uint16_t * array,int num,int max)213 void pick_id_values(uint16_t* array, int num, int max)
214 {
215 uint8_t inserted[65536];
216 int j, done;
217 if(num == 65536) {
218 /* all of them, loop and insert */
219 int i;
220 for(i=0; i<num; i++)
221 array[i] = (uint16_t)i;
222 return;
223 }
224 assert(max <= 65536);
225 /* This uses the Robert Floyd sampling algorithm */
226 /* keep track if values are already inserted, using the bitmap
227 * in insert array */
228 memset(inserted, 0, sizeof(inserted[0])*max);
229 done=0;
230 for(j = max-num; j<max; j++) {
231 /* random generate creates from 0..arg-1 */
232 int t;
233 if(j+1 <= 1)
234 t = 0;
235 else t = random_generate(j+1);
236 if(!inserted[t]) {
237 array[done++]=t;
238 inserted[t] = 1;
239 } else {
240 array[done++]=j;
241 inserted[j] = 1;
242 }
243 }
244 }
245
246 static void
clear_pipeline_entry(struct xfrd_tcp_pipeline * tp,rbnode_type * node)247 clear_pipeline_entry(struct xfrd_tcp_pipeline* tp, rbnode_type* node)
248 {
249 struct xfrd_tcp_pipeline_id *n;
250 if(node == NULL || node == RBTREE_NULL)
251 return;
252 clear_pipeline_entry(tp, node->left);
253 node->left = NULL;
254 clear_pipeline_entry(tp, node->right);
255 node->right = NULL;
256 /* move the node into the free list */
257 n = (struct xfrd_tcp_pipeline_id*)node;
258 n->next_free = tp->pipe_id_free_list;
259 tp->pipe_id_free_list = n;
260 }
261
262 static void
xfrd_tcp_pipeline_cleanup(struct xfrd_tcp_pipeline * tp)263 xfrd_tcp_pipeline_cleanup(struct xfrd_tcp_pipeline* tp)
264 {
265 /* move entries into free list */
266 clear_pipeline_entry(tp, tp->zone_per_id->root);
267 /* clear the tree */
268 tp->zone_per_id->count = 0;
269 tp->zone_per_id->root = RBTREE_NULL;
270 }
271
272 static void
xfrd_tcp_pipeline_init(struct xfrd_tcp_pipeline * tp)273 xfrd_tcp_pipeline_init(struct xfrd_tcp_pipeline* tp)
274 {
275 tp->key.node.key = tp;
276 tp->key.num_unused = tp->pipe_num;
277 tp->key.num_skip = 0;
278 tp->tcp_send_first = NULL;
279 tp->tcp_send_last = NULL;
280 xfrd_tcp_pipeline_cleanup(tp);
281 pick_id_values(tp->unused, tp->pipe_num, 65536);
282 }
283
284 struct xfrd_tcp_pipeline*
xfrd_tcp_pipeline_create(region_type * region,int tcp_pipeline)285 xfrd_tcp_pipeline_create(region_type* region, int tcp_pipeline)
286 {
287 int i;
288 struct xfrd_tcp_pipeline* tp = (struct xfrd_tcp_pipeline*)
289 region_alloc_zero(region, sizeof(*tp));
290 if(tcp_pipeline < 0)
291 tcp_pipeline = 0;
292 if(tcp_pipeline > 65536)
293 tcp_pipeline = 65536; /* max 16 bit ID numbers */
294 tp->pipe_num = tcp_pipeline;
295 tp->key.num_unused = tp->pipe_num;
296 tp->zone_per_id = rbtree_create(region, &pipeline_id_compare);
297 tp->pipe_id_free_list = NULL;
298 for(i=0; i<tp->pipe_num; i++) {
299 struct xfrd_tcp_pipeline_id* n = (struct xfrd_tcp_pipeline_id*)
300 region_alloc_zero(region, sizeof(*n));
301 n->next_free = tp->pipe_id_free_list;
302 tp->pipe_id_free_list = n;
303 }
304 tp->unused = (uint16_t*)region_alloc_zero(region,
305 sizeof(tp->unused[0])*tp->pipe_num);
306 tp->tcp_r = xfrd_tcp_create(region, QIOBUFSZ);
307 tp->tcp_w = xfrd_tcp_create(region, 512);
308 xfrd_tcp_pipeline_init(tp);
309 return tp;
310 }
311
312 static struct xfrd_zone*
xfrd_tcp_pipeline_lookup_id(struct xfrd_tcp_pipeline * tp,uint16_t id)313 xfrd_tcp_pipeline_lookup_id(struct xfrd_tcp_pipeline* tp, uint16_t id)
314 {
315 struct xfrd_tcp_pipeline_id key;
316 rbnode_type* n;
317 memset(&key, 0, sizeof(key));
318 key.node.key = &key;
319 key.id = id;
320 n = rbtree_search(tp->zone_per_id, &key);
321 if(n && n != RBTREE_NULL) {
322 return ((struct xfrd_tcp_pipeline_id*)n)->zone;
323 }
324 return NULL;
325 }
326
327 static void
xfrd_tcp_pipeline_insert_id(struct xfrd_tcp_pipeline * tp,uint16_t id,struct xfrd_zone * zone)328 xfrd_tcp_pipeline_insert_id(struct xfrd_tcp_pipeline* tp, uint16_t id,
329 struct xfrd_zone* zone)
330 {
331 struct xfrd_tcp_pipeline_id* n;
332 /* because there are tp->pipe_num preallocated entries, and we have
333 * only tp->pipe_num id values, the list cannot be empty now. */
334 assert(tp->pipe_id_free_list != NULL);
335 /* pick up next free xfrd_tcp_pipeline_id node */
336 n = tp->pipe_id_free_list;
337 tp->pipe_id_free_list = n->next_free;
338 n->next_free = NULL;
339 memset(&n->node, 0, sizeof(n->node));
340 n->node.key = n;
341 n->id = id;
342 n->zone = zone;
343 rbtree_insert(tp->zone_per_id, &n->node);
344 }
345
346 static void
xfrd_tcp_pipeline_remove_id(struct xfrd_tcp_pipeline * tp,uint16_t id)347 xfrd_tcp_pipeline_remove_id(struct xfrd_tcp_pipeline* tp, uint16_t id)
348 {
349 struct xfrd_tcp_pipeline_id key;
350 rbnode_type* node;
351 memset(&key, 0, sizeof(key));
352 key.node.key = &key;
353 key.id = id;
354 node = rbtree_delete(tp->zone_per_id, &key);
355 if(node && node != RBTREE_NULL) {
356 struct xfrd_tcp_pipeline_id* n =
357 (struct xfrd_tcp_pipeline_id*)node;
358 n->next_free = tp->pipe_id_free_list;
359 tp->pipe_id_free_list = n;
360 }
361 }
362
363 static void
xfrd_tcp_pipeline_skip_id(struct xfrd_tcp_pipeline * tp,uint16_t id)364 xfrd_tcp_pipeline_skip_id(struct xfrd_tcp_pipeline* tp, uint16_t id)
365 {
366 struct xfrd_tcp_pipeline_id key;
367 rbnode_type* n;
368 memset(&key, 0, sizeof(key));
369 key.node.key = &key;
370 key.id = id;
371 n = rbtree_search(tp->zone_per_id, &key);
372 if(n && n != RBTREE_NULL) {
373 struct xfrd_tcp_pipeline_id* zid = (struct xfrd_tcp_pipeline_id*)n;
374 zid->zone = TCP_NULL_SKIP;
375 }
376 }
377
378 void
xfrd_setup_packet(buffer_type * packet,uint16_t type,uint16_t klass,const dname_type * dname,uint16_t qid)379 xfrd_setup_packet(buffer_type* packet,
380 uint16_t type, uint16_t klass, const dname_type* dname, uint16_t qid)
381 {
382 /* Set up the header */
383 buffer_clear(packet);
384 ID_SET(packet, qid);
385 FLAGS_SET(packet, 0);
386 OPCODE_SET(packet, OPCODE_QUERY);
387 QDCOUNT_SET(packet, 1);
388 ANCOUNT_SET(packet, 0);
389 NSCOUNT_SET(packet, 0);
390 ARCOUNT_SET(packet, 0);
391 buffer_skip(packet, QHEADERSZ);
392
393 /* The question record. */
394 buffer_write(packet, dname_name(dname), dname->name_size);
395 buffer_write_u16(packet, type);
396 buffer_write_u16(packet, klass);
397 }
398
399 static socklen_t
400 #ifdef INET6
xfrd_acl_sockaddr(acl_options_type * acl,unsigned int port,struct sockaddr_storage * sck)401 xfrd_acl_sockaddr(acl_options_type* acl, unsigned int port,
402 struct sockaddr_storage *sck)
403 #else
404 xfrd_acl_sockaddr(acl_options_type* acl, unsigned int port,
405 struct sockaddr_in *sck, const char* fromto)
406 #endif /* INET6 */
407 {
408 /* setup address structure */
409 #ifdef INET6
410 memset(sck, 0, sizeof(struct sockaddr_storage));
411 #else
412 memset(sck, 0, sizeof(struct sockaddr_in));
413 #endif
414 if(acl->is_ipv6) {
415 #ifdef INET6
416 struct sockaddr_in6* sa = (struct sockaddr_in6*)sck;
417 sa->sin6_family = AF_INET6;
418 sa->sin6_port = htons(port);
419 sa->sin6_addr = acl->addr.addr6;
420 return sizeof(struct sockaddr_in6);
421 #else
422 log_msg(LOG_ERR, "xfrd: IPv6 connection %s %s attempted but no \
423 INET6.", fromto, acl->ip_address_spec);
424 return 0;
425 #endif
426 } else {
427 struct sockaddr_in* sa = (struct sockaddr_in*)sck;
428 sa->sin_family = AF_INET;
429 sa->sin_port = htons(port);
430 sa->sin_addr = acl->addr.addr;
431 return sizeof(struct sockaddr_in);
432 }
433 }
434
435 socklen_t
436 #ifdef INET6
xfrd_acl_sockaddr_to(acl_options_type * acl,struct sockaddr_storage * to)437 xfrd_acl_sockaddr_to(acl_options_type* acl, struct sockaddr_storage *to)
438 #else
439 xfrd_acl_sockaddr_to(acl_options_type* acl, struct sockaddr_in *to)
440 #endif /* INET6 */
441 {
442 #ifdef HAVE_TLS_1_3
443 unsigned int port = acl->port?acl->port:(acl->tls_auth_options?
444 (unsigned)atoi(TLS_PORT):(unsigned)atoi(TCP_PORT));
445 #else
446 unsigned int port = acl->port?acl->port:(unsigned)atoi(TCP_PORT);
447 #endif
448 #ifdef INET6
449 return xfrd_acl_sockaddr(acl, port, to);
450 #else
451 return xfrd_acl_sockaddr(acl, port, to, "to");
452 #endif /* INET6 */
453 }
454
455 socklen_t
456 #ifdef INET6
xfrd_acl_sockaddr_frm(acl_options_type * acl,struct sockaddr_storage * frm)457 xfrd_acl_sockaddr_frm(acl_options_type* acl, struct sockaddr_storage *frm)
458 #else
459 xfrd_acl_sockaddr_frm(acl_options_type* acl, struct sockaddr_in *frm)
460 #endif /* INET6 */
461 {
462 unsigned int port = acl->port?acl->port:0;
463 #ifdef INET6
464 return xfrd_acl_sockaddr(acl, port, frm);
465 #else
466 return xfrd_acl_sockaddr(acl, port, frm, "from");
467 #endif /* INET6 */
468 }
469
470 void
xfrd_write_soa_buffer(struct buffer * packet,const dname_type * apex,struct xfrd_soa * soa)471 xfrd_write_soa_buffer(struct buffer* packet,
472 const dname_type* apex, struct xfrd_soa* soa)
473 {
474 size_t rdlength_pos;
475 uint16_t rdlength;
476 buffer_write(packet, dname_name(apex), apex->name_size);
477
478 /* already in network order */
479 buffer_write(packet, &soa->type, sizeof(soa->type));
480 buffer_write(packet, &soa->klass, sizeof(soa->klass));
481 buffer_write(packet, &soa->ttl, sizeof(soa->ttl));
482 rdlength_pos = buffer_position(packet);
483 buffer_skip(packet, sizeof(rdlength));
484
485 /* uncompressed dnames */
486 buffer_write(packet, soa->prim_ns+1, soa->prim_ns[0]);
487 buffer_write(packet, soa->email+1, soa->email[0]);
488
489 buffer_write(packet, &soa->serial, sizeof(uint32_t));
490 buffer_write(packet, &soa->refresh, sizeof(uint32_t));
491 buffer_write(packet, &soa->retry, sizeof(uint32_t));
492 buffer_write(packet, &soa->expire, sizeof(uint32_t));
493 buffer_write(packet, &soa->minimum, sizeof(uint32_t));
494
495 /* write length of RR */
496 rdlength = buffer_position(packet) - rdlength_pos - sizeof(rdlength);
497 buffer_write_u16_at(packet, rdlength_pos, rdlength);
498 }
499
500 struct xfrd_tcp*
xfrd_tcp_create(region_type * region,size_t bufsize)501 xfrd_tcp_create(region_type* region, size_t bufsize)
502 {
503 struct xfrd_tcp* tcp_state = (struct xfrd_tcp*)region_alloc(
504 region, sizeof(struct xfrd_tcp));
505 memset(tcp_state, 0, sizeof(struct xfrd_tcp));
506 tcp_state->packet = buffer_create(region, bufsize);
507 tcp_state->fd = -1;
508
509 return tcp_state;
510 }
511
512 static struct xfrd_tcp_pipeline*
pipeline_find(struct xfrd_tcp_set * set,xfrd_zone_type * zone)513 pipeline_find(struct xfrd_tcp_set* set, xfrd_zone_type* zone)
514 {
515 rbnode_type* sme = NULL;
516 struct xfrd_tcp_pipeline* r;
517 /* smaller buf than a full pipeline with 64kb ID array, only need
518 * the front part with the key info, this front part contains the
519 * members that the compare function uses. */
520 struct xfrd_tcp_pipeline_key k, *key=&k;
521 key->node.key = key;
522 key->ip_len = xfrd_acl_sockaddr_to(zone->master, &key->ip);
523 key->num_unused = set->tcp_pipeline;
524 /* lookup existing tcp transfer to the master with highest unused */
525 if(rbtree_find_less_equal(set->pipetree, key, &sme)) {
526 /* exact match, strange, fully unused tcp cannot be open */
527 assert(0);
528 }
529 if(!sme)
530 return NULL;
531 r = (struct xfrd_tcp_pipeline*)sme->key;
532 /* <= key pointed at, is the master correct ? */
533 if(r->key.ip_len != key->ip_len)
534 return NULL;
535 if(memcmp(&r->key.ip, &key->ip, key->ip_len) != 0)
536 return NULL;
537 /* correct master, is there a slot free for this transfer? */
538 if(r->key.num_unused == 0)
539 return NULL;
540 return r;
541 }
542
543 /* remove zone from tcp waiting list */
544 static void
tcp_zone_waiting_list_popfirst(struct xfrd_tcp_set * set,xfrd_zone_type * zone)545 tcp_zone_waiting_list_popfirst(struct xfrd_tcp_set* set, xfrd_zone_type* zone)
546 {
547 assert(zone->tcp_waiting);
548 set->tcp_waiting_first = zone->tcp_waiting_next;
549 if(zone->tcp_waiting_next)
550 zone->tcp_waiting_next->tcp_waiting_prev = NULL;
551 else set->tcp_waiting_last = 0;
552 zone->tcp_waiting_next = 0;
553 zone->tcp_waiting = 0;
554 }
555
556 /* remove zone from tcp pipe write-wait list */
557 static void
tcp_pipe_sendlist_remove(struct xfrd_tcp_pipeline * tp,xfrd_zone_type * zone)558 tcp_pipe_sendlist_remove(struct xfrd_tcp_pipeline* tp, xfrd_zone_type* zone)
559 {
560 if(zone->in_tcp_send) {
561 if(zone->tcp_send_prev)
562 zone->tcp_send_prev->tcp_send_next=zone->tcp_send_next;
563 else tp->tcp_send_first=zone->tcp_send_next;
564 if(zone->tcp_send_next)
565 zone->tcp_send_next->tcp_send_prev=zone->tcp_send_prev;
566 else tp->tcp_send_last=zone->tcp_send_prev;
567 zone->in_tcp_send = 0;
568 }
569 }
570
571 /* remove first from write-wait list */
572 static void
tcp_pipe_sendlist_popfirst(struct xfrd_tcp_pipeline * tp,xfrd_zone_type * zone)573 tcp_pipe_sendlist_popfirst(struct xfrd_tcp_pipeline* tp, xfrd_zone_type* zone)
574 {
575 tp->tcp_send_first = zone->tcp_send_next;
576 if(tp->tcp_send_first)
577 tp->tcp_send_first->tcp_send_prev = NULL;
578 else tp->tcp_send_last = NULL;
579 zone->in_tcp_send = 0;
580 }
581
582 /* remove zone from tcp pipe ID map */
583 static void
tcp_pipe_id_remove(struct xfrd_tcp_pipeline * tp,xfrd_zone_type * zone,int alsotree)584 tcp_pipe_id_remove(struct xfrd_tcp_pipeline* tp, xfrd_zone_type* zone,
585 int alsotree)
586 {
587 assert(tp->key.num_unused < tp->pipe_num && tp->key.num_unused >= 0);
588 if(alsotree)
589 xfrd_tcp_pipeline_remove_id(tp, zone->query_id);
590 tp->unused[tp->key.num_unused] = zone->query_id;
591 /* must remove and re-add for sort order in tree */
592 (void)rbtree_delete(xfrd->tcp_set->pipetree, &tp->key.node);
593 tp->key.num_unused++;
594 (void)rbtree_insert(xfrd->tcp_set->pipetree, &tp->key.node);
595 }
596
597 /* stop the tcp pipe (and all its zones need to retry) */
598 static void
xfrd_tcp_pipe_stop(struct xfrd_tcp_pipeline * tp)599 xfrd_tcp_pipe_stop(struct xfrd_tcp_pipeline* tp)
600 {
601 struct xfrd_tcp_pipeline_id* zid;
602 int conn = -1;
603 assert(tp->key.num_unused < tp->pipe_num); /* at least one 'in-use' */
604 assert(tp->pipe_num - tp->key.num_unused > tp->key.num_skip); /* at least one 'nonskip' */
605 /* need to retry for all the zones connected to it */
606 /* these could use different lists and go to a different nextmaster*/
607 RBTREE_FOR(zid, struct xfrd_tcp_pipeline_id*, tp->zone_per_id) {
608 xfrd_zone_type* zone = zid->zone;
609 if(zone && zone != TCP_NULL_SKIP) {
610 assert(zone->query_id == zid->id);
611 conn = zone->tcp_conn;
612 zone->tcp_conn = -1;
613 zone->tcp_waiting = 0;
614 tcp_pipe_sendlist_remove(tp, zone);
615 tcp_pipe_id_remove(tp, zone, 0);
616 xfrd_set_refresh_now(zone);
617 }
618 }
619 xfrd_tcp_pipeline_cleanup(tp);
620 assert(conn != -1);
621 /* now release the entire tcp pipe */
622 xfrd_tcp_pipe_release(xfrd->tcp_set, tp, conn);
623 }
624
625 static void
tcp_pipe_reset_timeout(struct xfrd_tcp_pipeline * tp)626 tcp_pipe_reset_timeout(struct xfrd_tcp_pipeline* tp)
627 {
628 int fd = tp->handler.ev_fd;
629 struct timeval tv;
630 tv.tv_sec = xfrd->tcp_set->tcp_timeout;
631 tv.tv_usec = 0;
632 if(tp->handler_added)
633 event_del(&tp->handler);
634 memset(&tp->handler, 0, sizeof(tp->handler));
635 event_set(&tp->handler, fd, EV_PERSIST|EV_TIMEOUT|EV_READ|
636 #ifdef HAVE_TLS_1_3
637 ( tp->ssl
638 ? ( tp->handshake_done ? ( tp->tcp_send_first ? EV_WRITE : 0 )
639 : tp->handshake_want == SSL_ERROR_WANT_WRITE ? EV_WRITE : 0 )
640 : tp->tcp_send_first ? EV_WRITE : 0 ),
641 #else
642 ( tp->tcp_send_first ? EV_WRITE : 0 ),
643 #endif
644 xfrd_handle_tcp_pipe, tp);
645 if(event_base_set(xfrd->event_base, &tp->handler) != 0)
646 log_msg(LOG_ERR, "xfrd tcp: event_base_set failed");
647 if(event_add(&tp->handler, &tv) != 0)
648 log_msg(LOG_ERR, "xfrd tcp: event_add failed");
649 tp->handler_added = 1;
650 }
651
652 /* handle event from fd of tcp pipe */
653 void
xfrd_handle_tcp_pipe(int ATTR_UNUSED (fd),short event,void * arg)654 xfrd_handle_tcp_pipe(int ATTR_UNUSED(fd), short event, void* arg)
655 {
656 struct xfrd_tcp_pipeline* tp = (struct xfrd_tcp_pipeline*)arg;
657 if((event & EV_WRITE)) {
658 tcp_pipe_reset_timeout(tp);
659 if(tp->tcp_send_first) {
660 DEBUG(DEBUG_XFRD,1, (LOG_INFO, "xfrd: event tcp write, zone %s",
661 tp->tcp_send_first->apex_str));
662 xfrd_tcp_write(tp, tp->tcp_send_first);
663 }
664 }
665 if((event & EV_READ) && tp->handler_added) {
666 DEBUG(DEBUG_XFRD,1, (LOG_INFO, "xfrd: event tcp read"));
667 tcp_pipe_reset_timeout(tp);
668 xfrd_tcp_read(tp);
669 }
670 if((event & EV_TIMEOUT) && tp->handler_added) {
671 /* tcp connection timed out */
672 DEBUG(DEBUG_XFRD,1, (LOG_INFO, "xfrd: event tcp timeout"));
673 xfrd_tcp_pipe_stop(tp);
674 }
675 }
676
677 /* add a zone to the pipeline, it starts to want to write its query */
678 static void
pipeline_setup_new_zone(struct xfrd_tcp_set * set,struct xfrd_tcp_pipeline * tp,xfrd_zone_type * zone)679 pipeline_setup_new_zone(struct xfrd_tcp_set* set, struct xfrd_tcp_pipeline* tp,
680 xfrd_zone_type* zone)
681 {
682 /* assign the ID */
683 int idx;
684 assert(tp->key.num_unused > 0);
685 /* we pick a random ID, even though it is TCP anyway */
686 idx = random_generate(tp->key.num_unused);
687 zone->query_id = tp->unused[idx];
688 tp->unused[idx] = tp->unused[tp->key.num_unused-1];
689 xfrd_tcp_pipeline_insert_id(tp, zone->query_id, zone);
690 /* decrement unused counter, and fixup tree */
691 (void)rbtree_delete(set->pipetree, &tp->key.node);
692 tp->key.num_unused--;
693 (void)rbtree_insert(set->pipetree, &tp->key.node);
694
695 /* add to sendlist, at end */
696 zone->tcp_send_next = NULL;
697 zone->tcp_send_prev = tp->tcp_send_last;
698 zone->in_tcp_send = 1;
699 if(tp->tcp_send_last)
700 tp->tcp_send_last->tcp_send_next = zone;
701 else tp->tcp_send_first = zone;
702 tp->tcp_send_last = zone;
703
704 /* is it first in line? */
705 if(tp->tcp_send_first == zone) {
706 xfrd_tcp_setup_write_packet(tp, zone);
707 /* add write to event handler */
708 tcp_pipe_reset_timeout(tp);
709 }
710 }
711
712 void
xfrd_tcp_obtain(struct xfrd_tcp_set * set,xfrd_zone_type * zone)713 xfrd_tcp_obtain(struct xfrd_tcp_set* set, xfrd_zone_type* zone)
714 {
715 struct xfrd_tcp_pipeline* tp;
716 assert(zone->tcp_conn == -1);
717 assert(zone->tcp_waiting == 0);
718
719 if(set->tcp_count < set->tcp_max) {
720 int i;
721 assert(!set->tcp_waiting_first);
722 set->tcp_count ++;
723 /* find a free tcp_buffer */
724 for(i=0; i<set->tcp_max; i++) {
725 if(set->tcp_state[i]->tcp_r->fd == -1) {
726 zone->tcp_conn = i;
727 break;
728 }
729 }
730 /** What if there is no free tcp_buffer? return; */
731 if (zone->tcp_conn < 0) {
732 return;
733 }
734
735 tp = set->tcp_state[zone->tcp_conn];
736 zone->tcp_waiting = 0;
737
738 /* stop udp use (if any) */
739 if(zone->zone_handler.ev_fd != -1)
740 xfrd_udp_release(zone);
741
742 if(!xfrd_tcp_open(set, tp, zone)) {
743 zone->tcp_conn = -1;
744 set->tcp_count --;
745 xfrd_set_refresh_now(zone);
746 return;
747 }
748 /* ip and ip_len set by tcp_open */
749 xfrd_tcp_pipeline_init(tp);
750
751 /* insert into tree */
752 (void)rbtree_insert(set->pipetree, &tp->key.node);
753 xfrd_deactivate_zone(zone);
754 xfrd_unset_timer(zone);
755 pipeline_setup_new_zone(set, tp, zone);
756 return;
757 }
758 /* check for a pipeline to the same master with unused ID */
759 if((tp = pipeline_find(set, zone))!= NULL) {
760 int i;
761 if(zone->zone_handler.ev_fd != -1)
762 xfrd_udp_release(zone);
763 for(i=0; i<set->tcp_max; i++) {
764 if(set->tcp_state[i] == tp)
765 zone->tcp_conn = i;
766 }
767 xfrd_deactivate_zone(zone);
768 xfrd_unset_timer(zone);
769 pipeline_setup_new_zone(set, tp, zone);
770 return;
771 }
772
773 /* wait, at end of line */
774 DEBUG(DEBUG_XFRD,2, (LOG_INFO, "xfrd: max number of tcp "
775 "connections (%d) reached.", set->tcp_max));
776 zone->tcp_waiting_next = 0;
777 zone->tcp_waiting_prev = set->tcp_waiting_last;
778 zone->tcp_waiting = 1;
779 if(!set->tcp_waiting_last) {
780 set->tcp_waiting_first = zone;
781 set->tcp_waiting_last = zone;
782 } else {
783 set->tcp_waiting_last->tcp_waiting_next = zone;
784 set->tcp_waiting_last = zone;
785 }
786 xfrd_deactivate_zone(zone);
787 xfrd_unset_timer(zone);
788 }
789
790 int
xfrd_tcp_open(struct xfrd_tcp_set * set,struct xfrd_tcp_pipeline * tp,xfrd_zone_type * zone)791 xfrd_tcp_open(struct xfrd_tcp_set* set, struct xfrd_tcp_pipeline* tp,
792 xfrd_zone_type* zone)
793 {
794 int fd, family, conn;
795 struct timeval tv;
796 assert(zone->tcp_conn != -1);
797
798 /* if there is no next master, fallback to use the first one */
799 /* but there really should be a master set */
800 if(!zone->master) {
801 zone->master = zone->zone_options->pattern->request_xfr;
802 zone->master_num = 0;
803 }
804
805 DEBUG(DEBUG_XFRD,1, (LOG_INFO, "xfrd: zone %s open tcp conn to %s",
806 zone->apex_str, zone->master->ip_address_spec));
807 tp->tcp_r->is_reading = 1;
808 tp->tcp_r->total_bytes = 0;
809 tp->tcp_r->msglen = 0;
810 buffer_clear(tp->tcp_r->packet);
811 tp->tcp_w->is_reading = 0;
812 tp->tcp_w->total_bytes = 0;
813 tp->tcp_w->msglen = 0;
814 tp->connection_established = 0;
815
816 if(zone->master->is_ipv6) {
817 #ifdef INET6
818 family = PF_INET6;
819 #else
820 xfrd_set_refresh_now(zone);
821 return 0;
822 #endif
823 } else {
824 family = PF_INET;
825 }
826 fd = socket(family, SOCK_STREAM, IPPROTO_TCP);
827 if(fd == -1) {
828 /* squelch 'Address family not supported by protocol' at low
829 * verbosity levels */
830 if(errno != EAFNOSUPPORT || verbosity > 2)
831 log_msg(LOG_ERR, "xfrd: %s cannot create tcp socket: %s",
832 zone->master->ip_address_spec, strerror(errno));
833 xfrd_set_refresh_now(zone);
834 return 0;
835 }
836 if(fcntl(fd, F_SETFL, O_NONBLOCK) == -1) {
837 log_msg(LOG_ERR, "xfrd: fcntl failed: %s", strerror(errno));
838 close(fd);
839 xfrd_set_refresh_now(zone);
840 return 0;
841 }
842
843 if(xfrd->nsd->outgoing_tcp_mss > 0) {
844 #if defined(IPPROTO_TCP) && defined(TCP_MAXSEG)
845 if(setsockopt(fd, IPPROTO_TCP, TCP_MAXSEG,
846 (void*)&xfrd->nsd->outgoing_tcp_mss,
847 sizeof(xfrd->nsd->outgoing_tcp_mss)) < 0) {
848 log_msg(LOG_ERR, "xfrd: setsockopt(TCP_MAXSEG)"
849 "failed: %s", strerror(errno));
850 }
851 #else
852 log_msg(LOG_ERR, "setsockopt(TCP_MAXSEG) unsupported");
853 #endif
854 }
855
856 tp->key.ip_len = xfrd_acl_sockaddr_to(zone->master, &tp->key.ip);
857
858 /* bind it */
859 if (!xfrd_bind_local_interface(fd, zone->zone_options->pattern->
860 outgoing_interface, zone->master, 1)) {
861 close(fd);
862 xfrd_set_refresh_now(zone);
863 return 0;
864 }
865
866 conn = connect(fd, (struct sockaddr*)&tp->key.ip, tp->key.ip_len);
867 if (conn == -1 && errno != EINPROGRESS) {
868 log_msg(LOG_ERR, "xfrd: connect %s failed: %s",
869 zone->master->ip_address_spec, strerror(errno));
870 close(fd);
871 xfrd_set_refresh_now(zone);
872 return 0;
873 }
874 tp->tcp_r->fd = fd;
875 tp->tcp_w->fd = fd;
876
877 /* Check if an tls_auth name is configured which means we should try to
878 establish an SSL connection */
879 if (zone->master->tls_auth_options &&
880 zone->master->tls_auth_options->auth_domain_name) {
881 #ifdef HAVE_TLS_1_3
882 if (!setup_ssl(tp, set, zone->master->tls_auth_options->auth_domain_name)) {
883 log_msg(LOG_ERR, "xfrd: Cannot setup TLS on pipeline for %s to %s",
884 zone->apex_str, zone->master->ip_address_spec);
885 close(fd);
886 xfrd_set_refresh_now(zone);
887 return 0;
888 }
889
890 /* Load client certificate (if provided) */
891 if (zone->master->tls_auth_options->client_cert &&
892 zone->master->tls_auth_options->client_key) {
893 if (SSL_CTX_use_certificate_chain_file(set->ssl_ctx,
894 zone->master->tls_auth_options->client_cert) != 1) {
895 log_msg(LOG_ERR, "xfrd tls: Unable to load client certificate from file %s", zone->master->tls_auth_options->client_cert);
896 }
897
898 if (zone->master->tls_auth_options->client_key_pw) {
899 SSL_CTX_set_default_passwd_cb(set->ssl_ctx, password_cb);
900 SSL_CTX_set_default_passwd_cb_userdata(set->ssl_ctx, zone->master->tls_auth_options->client_key_pw);
901 }
902
903 if (SSL_CTX_use_PrivateKey_file(set->ssl_ctx, zone->master->tls_auth_options->client_key, SSL_FILETYPE_PEM) != 1) {
904 log_msg(LOG_ERR, "xfrd tls: Unable to load private key from file %s", zone->master->tls_auth_options->client_key);
905 }
906 }
907
908 tp->handshake_done = 0;
909 if(!ssl_handshake(tp)) {
910 if(tp->handshake_want == SSL_ERROR_SYSCALL) {
911 log_msg(LOG_ERR, "xfrd: TLS handshake failed "
912 "for %s to %s: %s", zone->apex_str,
913 zone->master->ip_address_spec,
914 strerror(errno));
915
916 } else if(tp->handshake_want == SSL_ERROR_SSL) {
917 char errmsg[1024];
918 snprintf(errmsg, sizeof(errmsg), "xfrd: "
919 "TLS handshake failed for %s to %s",
920 zone->apex_str,
921 zone->master->ip_address_spec);
922 log_crypto_err(errmsg);
923 } else {
924 log_msg(LOG_ERR, "xfrd: TLS handshake failed "
925 "for %s to %s with %d", zone->apex_str,
926 zone->master->ip_address_spec,
927 tp->handshake_want);
928 }
929 close(fd);
930 xfrd_set_refresh_now(zone);
931 return 0;
932 }
933 #else
934 log_msg(LOG_ERR, "xfrd: TLS 1.3 is not available, XFR-over-TLS is "
935 "not supported for %s to %s",
936 zone->apex_str, zone->master->ip_address_spec);
937 close(fd);
938 xfrd_set_refresh_now(zone);
939 return 0;
940 #endif
941 }
942
943 /* set the tcp pipe event */
944 if(tp->handler_added)
945 event_del(&tp->handler);
946 memset(&tp->handler, 0, sizeof(tp->handler));
947 event_set(&tp->handler, fd, EV_PERSIST|EV_TIMEOUT|EV_READ|
948 #ifdef HAVE_TLS_1_3
949 ( !tp->ssl
950 || tp->handshake_done
951 || tp->handshake_want == SSL_ERROR_WANT_WRITE ? EV_WRITE : 0),
952 #else
953 EV_WRITE,
954 #endif
955 xfrd_handle_tcp_pipe, tp);
956 if(event_base_set(xfrd->event_base, &tp->handler) != 0)
957 log_msg(LOG_ERR, "xfrd tcp: event_base_set failed");
958 tv.tv_sec = set->tcp_timeout;
959 tv.tv_usec = 0;
960 if(event_add(&tp->handler, &tv) != 0)
961 log_msg(LOG_ERR, "xfrd tcp: event_add failed");
962 tp->handler_added = 1;
963 return 1;
964 }
965
966 void
xfrd_tcp_setup_write_packet(struct xfrd_tcp_pipeline * tp,xfrd_zone_type * zone)967 xfrd_tcp_setup_write_packet(struct xfrd_tcp_pipeline* tp, xfrd_zone_type* zone)
968 {
969 struct xfrd_tcp* tcp = tp->tcp_w;
970 assert(zone->tcp_conn != -1);
971 assert(zone->tcp_waiting == 0);
972 /* start AXFR or IXFR for the zone */
973 if(zone->soa_disk_acquired == 0 || zone->master->use_axfr_only ||
974 zone->master->ixfr_disabled ||
975 /* if zone expired, after the first round, do not ask for
976 * IXFR any more, but full AXFR (of any serial number) */
977 (zone->state == xfrd_zone_expired && zone->round_num != 0)) {
978 DEBUG(DEBUG_XFRD,1, (LOG_INFO, "request full zone transfer "
979 "(AXFR) for %s to %s",
980 zone->apex_str, zone->master->ip_address_spec));
981
982 xfrd_setup_packet(tcp->packet, TYPE_AXFR, CLASS_IN, zone->apex,
983 zone->query_id);
984 xfrd_prepare_zone_xfr(zone, TYPE_AXFR);
985 } else {
986 DEBUG(DEBUG_XFRD,1, (LOG_INFO, "request incremental zone "
987 "transfer (IXFR) for %s to %s",
988 zone->apex_str, zone->master->ip_address_spec));
989
990 xfrd_setup_packet(tcp->packet, TYPE_IXFR, CLASS_IN, zone->apex,
991 zone->query_id);
992 xfrd_prepare_zone_xfr(zone, TYPE_IXFR);
993 NSCOUNT_SET(tcp->packet, 1);
994 xfrd_write_soa_buffer(tcp->packet, zone->apex, &zone->soa_disk);
995 }
996 if(zone->master->key_options && zone->master->key_options->tsig_key) {
997 xfrd_tsig_sign_request(
998 tcp->packet, &zone->latest_xfr->tsig, zone->master);
999 }
1000 buffer_flip(tcp->packet);
1001 DEBUG(DEBUG_XFRD,1, (LOG_INFO, "sent tcp query with ID %d", zone->query_id));
1002 tcp->msglen = buffer_limit(tcp->packet);
1003 tcp->total_bytes = 0;
1004 }
1005
1006 static void
tcp_conn_ready_for_reading(struct xfrd_tcp * tcp)1007 tcp_conn_ready_for_reading(struct xfrd_tcp* tcp)
1008 {
1009 tcp->total_bytes = 0;
1010 tcp->msglen = 0;
1011 buffer_clear(tcp->packet);
1012 }
1013
1014 #ifdef HAVE_TLS_1_3
1015 static int
conn_write_ssl(struct xfrd_tcp * tcp,SSL * ssl)1016 conn_write_ssl(struct xfrd_tcp* tcp, SSL* ssl)
1017 {
1018 int request_length;
1019 ssize_t sent;
1020
1021 if(tcp->total_bytes < sizeof(tcp->msglen)) {
1022 uint16_t sendlen = htons(tcp->msglen);
1023 // send
1024 request_length = sizeof(tcp->msglen) - tcp->total_bytes;
1025 ERR_clear_error();
1026 sent = SSL_write(ssl, (const char*)&sendlen + tcp->total_bytes,
1027 request_length);
1028 switch(SSL_get_error(ssl,sent)) {
1029 case SSL_ERROR_NONE:
1030 break;
1031 default:
1032 log_msg(LOG_ERR, "xfrd: generic write problem with tls");
1033 }
1034
1035 if(sent == -1) {
1036 if(errno == EAGAIN || errno == EINTR) {
1037 /* write would block, try later */
1038 return 0;
1039 } else {
1040 return -1;
1041 }
1042 }
1043
1044 tcp->total_bytes += sent;
1045 if(sent > (ssize_t)sizeof(tcp->msglen))
1046 buffer_skip(tcp->packet, sent-sizeof(tcp->msglen));
1047 if(tcp->total_bytes < sizeof(tcp->msglen)) {
1048 /* incomplete write, resume later */
1049 return 0;
1050 }
1051 assert(tcp->total_bytes >= sizeof(tcp->msglen));
1052 }
1053
1054 assert(tcp->total_bytes < tcp->msglen + sizeof(tcp->msglen));
1055
1056 request_length = buffer_remaining(tcp->packet);
1057 ERR_clear_error();
1058 sent = SSL_write(ssl, buffer_current(tcp->packet), request_length);
1059 switch(SSL_get_error(ssl,sent)) {
1060 case SSL_ERROR_NONE:
1061 break;
1062 default:
1063 log_msg(LOG_ERR, "xfrd: generic write problem with tls");
1064 }
1065 if(sent == -1) {
1066 if(errno == EAGAIN || errno == EINTR) {
1067 /* write would block, try later */
1068 return 0;
1069 } else {
1070 return -1;
1071 }
1072 }
1073
1074 buffer_skip(tcp->packet, sent);
1075 tcp->total_bytes += sent;
1076
1077 if(tcp->total_bytes < tcp->msglen + sizeof(tcp->msglen)) {
1078 /* more to write when socket becomes writable again */
1079 return 0;
1080 }
1081
1082 assert(tcp->total_bytes == tcp->msglen + sizeof(tcp->msglen));
1083 return 1;
1084 }
1085 #endif
1086
conn_write(struct xfrd_tcp * tcp)1087 int conn_write(struct xfrd_tcp* tcp)
1088 {
1089 ssize_t sent;
1090
1091 if(tcp->total_bytes < sizeof(tcp->msglen)) {
1092 uint16_t sendlen = htons(tcp->msglen);
1093 #ifdef HAVE_WRITEV
1094 struct iovec iov[2];
1095 iov[0].iov_base = (uint8_t*)&sendlen + tcp->total_bytes;
1096 iov[0].iov_len = sizeof(sendlen) - tcp->total_bytes;
1097 iov[1].iov_base = buffer_begin(tcp->packet);
1098 iov[1].iov_len = buffer_limit(tcp->packet);
1099 sent = writev(tcp->fd, iov, 2);
1100 #else /* HAVE_WRITEV */
1101 sent = write(tcp->fd,
1102 (const char*)&sendlen + tcp->total_bytes,
1103 sizeof(tcp->msglen) - tcp->total_bytes);
1104 #endif /* HAVE_WRITEV */
1105
1106 if(sent == -1) {
1107 if(errno == EAGAIN || errno == EINTR) {
1108 /* write would block, try later */
1109 return 0;
1110 } else {
1111 return -1;
1112 }
1113 }
1114
1115 tcp->total_bytes += sent;
1116 if(sent > (ssize_t)sizeof(tcp->msglen))
1117 buffer_skip(tcp->packet, sent-sizeof(tcp->msglen));
1118 if(tcp->total_bytes < sizeof(tcp->msglen)) {
1119 /* incomplete write, resume later */
1120 return 0;
1121 }
1122 #ifdef HAVE_WRITEV
1123 if(tcp->total_bytes == tcp->msglen + sizeof(tcp->msglen)) {
1124 /* packet done */
1125 return 1;
1126 }
1127 #endif
1128 assert(tcp->total_bytes >= sizeof(tcp->msglen));
1129 }
1130
1131 assert(tcp->total_bytes < tcp->msglen + sizeof(tcp->msglen));
1132
1133 sent = write(tcp->fd,
1134 buffer_current(tcp->packet),
1135 buffer_remaining(tcp->packet));
1136 if(sent == -1) {
1137 if(errno == EAGAIN || errno == EINTR) {
1138 /* write would block, try later */
1139 return 0;
1140 } else {
1141 return -1;
1142 }
1143 }
1144
1145 buffer_skip(tcp->packet, sent);
1146 tcp->total_bytes += sent;
1147
1148 if(tcp->total_bytes < tcp->msglen + sizeof(tcp->msglen)) {
1149 /* more to write when socket becomes writable again */
1150 return 0;
1151 }
1152
1153 assert(tcp->total_bytes == tcp->msglen + sizeof(tcp->msglen));
1154 return 1;
1155 }
1156
1157 void
xfrd_tcp_write(struct xfrd_tcp_pipeline * tp,xfrd_zone_type * zone)1158 xfrd_tcp_write(struct xfrd_tcp_pipeline* tp, xfrd_zone_type* zone)
1159 {
1160 int ret;
1161 struct xfrd_tcp* tcp = tp->tcp_w;
1162 assert(zone->tcp_conn != -1);
1163 assert(zone == tp->tcp_send_first);
1164 /* see if for non-established connection, there is a connect error */
1165 if(!tp->connection_established) {
1166 /* check for pending error from nonblocking connect */
1167 /* from Stevens, unix network programming, vol1, 3rd ed, p450 */
1168 int error = 0;
1169 socklen_t len = sizeof(error);
1170 if(getsockopt(tcp->fd, SOL_SOCKET, SO_ERROR, &error, &len) < 0){
1171 error = errno; /* on solaris errno is error */
1172 }
1173 if(error == EINPROGRESS || error == EWOULDBLOCK)
1174 return; /* try again later */
1175 if(error != 0) {
1176 log_msg(LOG_ERR, "%s: Could not tcp connect to %s: %s",
1177 zone->apex_str, zone->master->ip_address_spec,
1178 strerror(error));
1179 xfrd_tcp_pipe_stop(tp);
1180 return;
1181 }
1182 }
1183 #ifdef HAVE_TLS_1_3
1184 if (tp->ssl) {
1185 if(tp->handshake_done) {
1186 ret = conn_write_ssl(tcp, tp->ssl);
1187
1188 } else if(ssl_handshake(tp)) {
1189 tcp_pipe_reset_timeout(tp); /* reschedule */
1190 return;
1191
1192 } else {
1193 if(tp->handshake_want == SSL_ERROR_SYSCALL) {
1194 log_msg(LOG_ERR, "xfrd: TLS handshake failed: %s",
1195 strerror(errno));
1196
1197 } else if(tp->handshake_want == SSL_ERROR_SSL) {
1198 log_crypto_err("xfrd: TLS handshake failed");
1199 } else {
1200 log_msg(LOG_ERR, "xfrd: TLS handshake failed "
1201 "with value: %d", tp->handshake_want);
1202 }
1203 xfrd_tcp_pipe_stop(tp);
1204 return;
1205 }
1206 } else
1207 #endif
1208 ret = conn_write(tcp);
1209 if(ret == -1) {
1210 log_msg(LOG_ERR, "xfrd: failed writing tcp %s", strerror(errno));
1211 xfrd_tcp_pipe_stop(tp);
1212 return;
1213 }
1214 if(tcp->total_bytes != 0 && !tp->connection_established)
1215 tp->connection_established = 1;
1216 if(ret == 0) {
1217 return; /* write again later */
1218 }
1219 /* done writing this message */
1220
1221 /* remove first zone from sendlist */
1222 tcp_pipe_sendlist_popfirst(tp, zone);
1223
1224 /* see if other zone wants to write; init; let it write (now) */
1225 /* and use a loop, because 64k stack calls is a too much */
1226 while(tp->tcp_send_first) {
1227 /* setup to write for this zone */
1228 xfrd_tcp_setup_write_packet(tp, tp->tcp_send_first);
1229 /* attempt to write for this zone (if success, continue loop)*/
1230 #ifdef HAVE_TLS_1_3
1231 if (tp->ssl)
1232 ret = conn_write_ssl(tcp, tp->ssl);
1233 else
1234 #endif
1235 ret = conn_write(tcp);
1236 if(ret == -1) {
1237 log_msg(LOG_ERR, "xfrd: failed writing tcp %s", strerror(errno));
1238 xfrd_tcp_pipe_stop(tp);
1239 return;
1240 }
1241 if(ret == 0)
1242 return; /* write again later */
1243 tcp_pipe_sendlist_popfirst(tp, tp->tcp_send_first);
1244 }
1245
1246 /* if sendlist empty, remove WRITE from event */
1247
1248 /* listen to READ, and not WRITE events */
1249 assert(tp->tcp_send_first == NULL);
1250 tcp_pipe_reset_timeout(tp);
1251 }
1252
1253 #ifdef HAVE_TLS_1_3
1254 static int
conn_read_ssl(struct xfrd_tcp * tcp,SSL * ssl)1255 conn_read_ssl(struct xfrd_tcp* tcp, SSL* ssl)
1256 {
1257 ssize_t received;
1258 /* receive leading packet length bytes */
1259 if(tcp->total_bytes < sizeof(tcp->msglen)) {
1260 ERR_clear_error();
1261 received = SSL_read(ssl,
1262 (char*) &tcp->msglen + tcp->total_bytes,
1263 sizeof(tcp->msglen) - tcp->total_bytes);
1264 if (received <= 0) {
1265 int err = SSL_get_error(ssl, received);
1266 if(err == SSL_ERROR_WANT_READ && errno == EAGAIN) {
1267 return 0;
1268 }
1269 if(err == SSL_ERROR_ZERO_RETURN) {
1270 /* EOF */
1271 return -1;
1272 }
1273 if(err == SSL_ERROR_SYSCALL)
1274 log_msg(LOG_ERR, "ssl_read returned error SSL_ERROR_SYSCALL with received %zd: %s", received, strerror(errno));
1275 else
1276 log_msg(LOG_ERR, "ssl_read returned error %d with received %zd", err, received);
1277 }
1278 if(received == -1) {
1279 if(errno == EAGAIN || errno == EINTR) {
1280 /* read would block, try later */
1281 return 0;
1282 } else {
1283 #ifdef ECONNRESET
1284 if (verbosity >= 2 || errno != ECONNRESET)
1285 #endif /* ECONNRESET */
1286 log_msg(LOG_ERR, "tls read sz: %s", strerror(errno));
1287 return -1;
1288 }
1289 } else if(received == 0) {
1290 /* EOF */
1291 return -1;
1292 }
1293 tcp->total_bytes += received;
1294 if(tcp->total_bytes < sizeof(tcp->msglen)) {
1295 /* not complete yet, try later */
1296 return 0;
1297 }
1298
1299 assert(tcp->total_bytes == sizeof(tcp->msglen));
1300 tcp->msglen = ntohs(tcp->msglen);
1301
1302 if(tcp->msglen == 0) {
1303 buffer_set_limit(tcp->packet, tcp->msglen);
1304 return 1;
1305 }
1306 if(tcp->msglen > buffer_capacity(tcp->packet)) {
1307 log_msg(LOG_ERR, "buffer too small, dropping connection");
1308 return 0;
1309 }
1310 buffer_set_limit(tcp->packet, tcp->msglen);
1311 }
1312
1313 assert(buffer_remaining(tcp->packet) > 0);
1314 ERR_clear_error();
1315
1316 received = SSL_read(ssl, buffer_current(tcp->packet),
1317 buffer_remaining(tcp->packet));
1318
1319 if (received <= 0) {
1320 int err = SSL_get_error(ssl, received);
1321 if(err == SSL_ERROR_ZERO_RETURN) {
1322 /* EOF */
1323 return -1;
1324 }
1325 if(err == SSL_ERROR_SYSCALL)
1326 log_msg(LOG_ERR, "ssl_read returned error SSL_ERROR_SYSCALL with received %zd: %s", received, strerror(errno));
1327 else
1328 log_msg(LOG_ERR, "ssl_read returned error %d with received %zd", err, received);
1329 }
1330 if(received == -1) {
1331 if(errno == EAGAIN || errno == EINTR) {
1332 /* read would block, try later */
1333 return 0;
1334 } else {
1335 #ifdef ECONNRESET
1336 if (verbosity >= 2 || errno != ECONNRESET)
1337 #endif /* ECONNRESET */
1338 log_msg(LOG_ERR, "tcp read %s", strerror(errno));
1339 return -1;
1340 }
1341 } else if(received == 0) {
1342 /* EOF */
1343 return -1;
1344 }
1345
1346 tcp->total_bytes += received;
1347 buffer_skip(tcp->packet, received);
1348
1349 if(buffer_remaining(tcp->packet) > 0) {
1350 /* not complete yet, wait for more */
1351 return 0;
1352 }
1353
1354 /* completed */
1355 assert(buffer_position(tcp->packet) == tcp->msglen);
1356 return 1;
1357 }
1358 #endif
1359
1360 int
conn_read(struct xfrd_tcp * tcp)1361 conn_read(struct xfrd_tcp* tcp)
1362 {
1363 ssize_t received;
1364 /* receive leading packet length bytes */
1365 if(tcp->total_bytes < sizeof(tcp->msglen)) {
1366 received = read(tcp->fd,
1367 (char*) &tcp->msglen + tcp->total_bytes,
1368 sizeof(tcp->msglen) - tcp->total_bytes);
1369 if(received == -1) {
1370 if(errno == EAGAIN || errno == EINTR) {
1371 /* read would block, try later */
1372 return 0;
1373 } else {
1374 #ifdef ECONNRESET
1375 if (verbosity >= 2 || errno != ECONNRESET)
1376 #endif /* ECONNRESET */
1377 log_msg(LOG_ERR, "tcp read sz: %s", strerror(errno));
1378 return -1;
1379 }
1380 } else if(received == 0) {
1381 /* EOF */
1382 return -1;
1383 }
1384 tcp->total_bytes += received;
1385 if(tcp->total_bytes < sizeof(tcp->msglen)) {
1386 /* not complete yet, try later */
1387 return 0;
1388 }
1389
1390 assert(tcp->total_bytes == sizeof(tcp->msglen));
1391 tcp->msglen = ntohs(tcp->msglen);
1392
1393 if(tcp->msglen == 0) {
1394 buffer_set_limit(tcp->packet, tcp->msglen);
1395 return 1;
1396 }
1397 if(tcp->msglen > buffer_capacity(tcp->packet)) {
1398 log_msg(LOG_ERR, "buffer too small, dropping connection");
1399 return 0;
1400 }
1401 buffer_set_limit(tcp->packet, tcp->msglen);
1402 }
1403
1404 assert(buffer_remaining(tcp->packet) > 0);
1405
1406 received = read(tcp->fd, buffer_current(tcp->packet),
1407 buffer_remaining(tcp->packet));
1408 if(received == -1) {
1409 if(errno == EAGAIN || errno == EINTR) {
1410 /* read would block, try later */
1411 return 0;
1412 } else {
1413 #ifdef ECONNRESET
1414 if (verbosity >= 2 || errno != ECONNRESET)
1415 #endif /* ECONNRESET */
1416 log_msg(LOG_ERR, "tcp read %s", strerror(errno));
1417 return -1;
1418 }
1419 } else if(received == 0) {
1420 /* EOF */
1421 return -1;
1422 }
1423
1424 tcp->total_bytes += received;
1425 buffer_skip(tcp->packet, received);
1426
1427 if(buffer_remaining(tcp->packet) > 0) {
1428 /* not complete yet, wait for more */
1429 return 0;
1430 }
1431
1432 /* completed */
1433 assert(buffer_position(tcp->packet) == tcp->msglen);
1434 return 1;
1435 }
1436
1437 void
xfrd_tcp_read(struct xfrd_tcp_pipeline * tp)1438 xfrd_tcp_read(struct xfrd_tcp_pipeline* tp)
1439 {
1440 xfrd_zone_type* zone;
1441 struct xfrd_tcp* tcp = tp->tcp_r;
1442 int ret;
1443 enum xfrd_packet_result pkt_result;
1444 #ifdef HAVE_TLS_1_3
1445 if(tp->ssl) {
1446 if(tp->handshake_done) {
1447 ret = conn_read_ssl(tcp, tp->ssl);
1448
1449 } else if(ssl_handshake(tp)) {
1450 tcp_pipe_reset_timeout(tp); /* reschedule */
1451 return;
1452
1453 } else {
1454 if(tp->handshake_want == SSL_ERROR_SYSCALL) {
1455 log_msg(LOG_ERR, "xfrd: TLS handshake failed: %s",
1456 strerror(errno));
1457
1458 } else if(tp->handshake_want == SSL_ERROR_SSL) {
1459 log_crypto_err("xfrd: TLS handshake failed");
1460 } else {
1461 log_msg(LOG_ERR, "xfrd: TLS handshake failed "
1462 "with value: %d", tp->handshake_want);
1463 }
1464 xfrd_tcp_pipe_stop(tp);
1465 return;
1466 }
1467 } else
1468 #endif
1469 ret = conn_read(tcp);
1470 if(ret == -1) {
1471 if(errno != 0)
1472 log_msg(LOG_ERR, "xfrd: failed reading tcp %s", strerror(errno));
1473 else
1474 log_msg(LOG_ERR, "xfrd: failed reading tcp: closed");
1475 xfrd_tcp_pipe_stop(tp);
1476 return;
1477 }
1478 if(ret == 0)
1479 return;
1480 /* completed msg */
1481 buffer_flip(tcp->packet);
1482 /* see which ID number it is, if skip, handle skip, NULL: warn */
1483 if(tcp->msglen < QHEADERSZ) {
1484 /* too short for DNS header, skip it */
1485 DEBUG(DEBUG_XFRD,1, (LOG_INFO,
1486 "xfrd: tcp skip response that is too short"));
1487 tcp_conn_ready_for_reading(tcp);
1488 return;
1489 }
1490 zone = xfrd_tcp_pipeline_lookup_id(tp, ID(tcp->packet));
1491 if(!zone || zone == TCP_NULL_SKIP) {
1492 /* no zone for this id? skip it */
1493 DEBUG(DEBUG_XFRD,1, (LOG_INFO,
1494 "xfrd: tcp skip response with %s ID",
1495 zone?"set-to-skip":"unknown"));
1496 tcp_conn_ready_for_reading(tcp);
1497 return;
1498 }
1499 assert(zone->tcp_conn != -1);
1500
1501 /* handle message for zone */
1502 pkt_result = xfrd_handle_received_xfr_packet(zone, tcp->packet);
1503 /* setup for reading the next packet on this connection */
1504 tcp_conn_ready_for_reading(tcp);
1505 switch(pkt_result) {
1506 case xfrd_packet_more:
1507 /* wait for next packet */
1508 break;
1509 case xfrd_packet_newlease:
1510 /* set to skip if more packets with this ID */
1511 xfrd_tcp_pipeline_skip_id(tp, zone->query_id);
1512 tp->key.num_skip++;
1513 /* fall through to remove zone from tp */
1514 /* fallthrough */
1515 case xfrd_packet_transfer:
1516 if(zone->zone_options->pattern->multi_primary_check) {
1517 xfrd_tcp_release(xfrd->tcp_set, zone);
1518 xfrd_make_request(zone);
1519 break;
1520 }
1521 xfrd_tcp_release(xfrd->tcp_set, zone);
1522 assert(zone->round_num == -1);
1523 break;
1524 case xfrd_packet_notimpl:
1525 xfrd_disable_ixfr(zone);
1526 xfrd_tcp_release(xfrd->tcp_set, zone);
1527 /* query next server */
1528 xfrd_make_request(zone);
1529 break;
1530 case xfrd_packet_bad:
1531 case xfrd_packet_tcp:
1532 default:
1533 /* set to skip if more packets with this ID */
1534 xfrd_tcp_pipeline_skip_id(tp, zone->query_id);
1535 tp->key.num_skip++;
1536 xfrd_tcp_release(xfrd->tcp_set, zone);
1537 /* query next server */
1538 xfrd_make_request(zone);
1539 break;
1540 }
1541 }
1542
1543 void
xfrd_tcp_release(struct xfrd_tcp_set * set,xfrd_zone_type * zone)1544 xfrd_tcp_release(struct xfrd_tcp_set* set, xfrd_zone_type* zone)
1545 {
1546 int conn = zone->tcp_conn;
1547 struct xfrd_tcp_pipeline* tp = set->tcp_state[conn];
1548 DEBUG(DEBUG_XFRD,1, (LOG_INFO, "xfrd: zone %s released tcp conn to %s",
1549 zone->apex_str, zone->master->ip_address_spec));
1550 assert(zone->tcp_conn != -1);
1551 assert(zone->tcp_waiting == 0);
1552 zone->tcp_conn = -1;
1553 zone->tcp_waiting = 0;
1554
1555 /* remove from tcp_send list */
1556 tcp_pipe_sendlist_remove(tp, zone);
1557 /* remove it from the ID list */
1558 if(xfrd_tcp_pipeline_lookup_id(tp, zone->query_id) != TCP_NULL_SKIP)
1559 tcp_pipe_id_remove(tp, zone, 1);
1560 DEBUG(DEBUG_XFRD,1, (LOG_INFO, "xfrd: released tcp pipe now %d unused",
1561 tp->key.num_unused));
1562 /* if pipe was full, but no more, then see if waiting element is
1563 * for the same master, and can fill the unused ID */
1564 if(tp->key.num_unused == 1 && set->tcp_waiting_first) {
1565 #ifdef INET6
1566 struct sockaddr_storage to;
1567 #else
1568 struct sockaddr_in to;
1569 #endif
1570 socklen_t to_len = xfrd_acl_sockaddr_to(
1571 set->tcp_waiting_first->master, &to);
1572 if(to_len == tp->key.ip_len && memcmp(&to, &tp->key.ip, to_len) == 0) {
1573 /* use this connection for the waiting zone */
1574 zone = set->tcp_waiting_first;
1575 assert(zone->tcp_conn == -1);
1576 zone->tcp_conn = conn;
1577 tcp_zone_waiting_list_popfirst(set, zone);
1578 if(zone->zone_handler.ev_fd != -1)
1579 xfrd_udp_release(zone);
1580 xfrd_unset_timer(zone);
1581 pipeline_setup_new_zone(set, tp, zone);
1582 return;
1583 }
1584 /* waiting zone did not go to same server */
1585 }
1586
1587 /* if all unused, or only skipped leftover, close the pipeline */
1588 if(tp->key.num_unused >= tp->pipe_num || tp->key.num_skip >= tp->pipe_num - tp->key.num_unused)
1589 xfrd_tcp_pipe_release(set, tp, conn);
1590 }
1591
1592 void
xfrd_tcp_pipe_release(struct xfrd_tcp_set * set,struct xfrd_tcp_pipeline * tp,int conn)1593 xfrd_tcp_pipe_release(struct xfrd_tcp_set* set, struct xfrd_tcp_pipeline* tp,
1594 int conn)
1595 {
1596 DEBUG(DEBUG_XFRD,1, (LOG_INFO, "xfrd: tcp pipe released"));
1597 /* one handler per tcp pipe */
1598 if(tp->handler_added)
1599 event_del(&tp->handler);
1600 tp->handler_added = 0;
1601
1602 #ifdef HAVE_TLS_1_3
1603 /* close SSL */
1604 if (tp->ssl) {
1605 DEBUG(DEBUG_XFRD, 1, (LOG_INFO, "xfrd: Shutting down TLS"));
1606 SSL_shutdown(tp->ssl);
1607 SSL_free(tp->ssl);
1608 tp->ssl = NULL;
1609 }
1610 #endif
1611
1612 /* fd in tcp_r and tcp_w is the same, close once */
1613 if(tp->tcp_r->fd != -1)
1614 close(tp->tcp_r->fd);
1615 tp->tcp_r->fd = -1;
1616 tp->tcp_w->fd = -1;
1617
1618 /* remove from pipetree */
1619 (void)rbtree_delete(xfrd->tcp_set->pipetree, &tp->key.node);
1620
1621 /* a waiting zone can use the free tcp slot (to another server) */
1622 /* if that zone fails to set-up or connect, we try to start the next
1623 * waiting zone in the list */
1624 while(set->tcp_count == set->tcp_max && set->tcp_waiting_first) {
1625 /* pop first waiting process */
1626 xfrd_zone_type* zone = set->tcp_waiting_first;
1627 /* start it */
1628 assert(zone->tcp_conn == -1);
1629 zone->tcp_conn = conn;
1630 tcp_zone_waiting_list_popfirst(set, zone);
1631
1632 /* stop udp (if any) */
1633 if(zone->zone_handler.ev_fd != -1)
1634 xfrd_udp_release(zone);
1635 if(!xfrd_tcp_open(set, tp, zone)) {
1636 zone->tcp_conn = -1;
1637 xfrd_set_refresh_now(zone);
1638 /* try to start the next zone (if any) */
1639 continue;
1640 }
1641 /* re-init this tcppipe */
1642 /* ip and ip_len set by tcp_open */
1643 xfrd_tcp_pipeline_init(tp);
1644
1645 /* insert into tree */
1646 (void)rbtree_insert(set->pipetree, &tp->key.node);
1647 /* setup write */
1648 xfrd_unset_timer(zone);
1649 pipeline_setup_new_zone(set, tp, zone);
1650 /* started a task, no need for cleanups, so return */
1651 return;
1652 }
1653 /* no task to start, cleanup */
1654 assert(!set->tcp_waiting_first);
1655 set->tcp_count --;
1656 assert(set->tcp_count >= 0);
1657 }
1658
1659