1 /*
2 * xfrd-tcp.c - XFR (transfer) Daemon TCP system source file. Manages tcp conn.
3 *
4 * Copyright (c) 2001-2006, NLnet Labs. All rights reserved.
5 *
6 * See LICENSE for the license.
7 *
8 */
9
10 #include "config.h"
11 #include <assert.h>
12 #include <errno.h>
13 #include <fcntl.h>
14 #include <unistd.h>
15 #include <stdlib.h>
16 #include <sys/uio.h>
17 #include "nsd.h"
18 #include "xfrd-tcp.h"
19 #include "buffer.h"
20 #include "packet.h"
21 #include "dname.h"
22 #include "options.h"
23 #include "namedb.h"
24 #include "xfrd.h"
25 #include "xfrd-disk.h"
26 #include "util.h"
27 #ifdef HAVE_TLS_1_3
28 #include <openssl/ssl.h>
29 #include <openssl/err.h>
30 #endif
31
32 #ifdef HAVE_TLS_1_3
33 void log_crypto_err(const char* str); /* in server.c */
34
35 static SSL_CTX*
create_ssl_context()36 create_ssl_context()
37 {
38 SSL_CTX *ctx;
39 ctx = SSL_CTX_new(TLS_client_method());
40 if (!ctx) {
41 log_msg(LOG_ERR, "xfrd tls: Unable to create SSL ctxt");
42 }
43 else if (SSL_CTX_set_default_verify_paths(ctx) != 1) {
44 SSL_CTX_free(ctx);
45 log_msg(LOG_ERR, "xfrd tls: Unable to set default SSL verify paths");
46 return NULL;
47 }
48 /* Only trust 1.3 as per the specification */
49 else if (!SSL_CTX_set_min_proto_version(ctx, TLS1_3_VERSION)) {
50 SSL_CTX_free(ctx);
51 log_msg(LOG_ERR, "xfrd tls: Unable to set minimum TLS version 1.3");
52 return NULL;
53 }
54 return ctx;
55 }
56
57 static int
tls_verify_callback(int preverify_ok,X509_STORE_CTX * ctx)58 tls_verify_callback(int preverify_ok, X509_STORE_CTX *ctx)
59 {
60 int err = X509_STORE_CTX_get_error(ctx);
61 int depth = X509_STORE_CTX_get_error_depth(ctx);
62
63 // report the specific cert error here - will need custom verify code if
64 // SPKI pins are supported
65 if (!preverify_ok)
66 log_msg(LOG_ERR, "xfrd tls: TLS verify failed - (%d) depth: %d error: %s",
67 err,
68 depth,
69 X509_verify_cert_error_string(err));
70 return preverify_ok;
71 }
72
73 static int
setup_ssl(struct xfrd_tcp_pipeline * tp,struct xfrd_tcp_set * tcp_set,const char * auth_domain_name)74 setup_ssl(struct xfrd_tcp_pipeline* tp, struct xfrd_tcp_set* tcp_set,
75 const char* auth_domain_name)
76 {
77 if (!tcp_set->ssl_ctx) {
78 log_msg(LOG_ERR, "xfrd tls: No TLS CTX, cannot set up XFR-over-TLS");
79 return 0;
80 }
81 DEBUG(DEBUG_XFRD,1, (LOG_INFO, "xfrd: setting up TLS for tls_auth domain name %s",
82 auth_domain_name));
83 tp->ssl = SSL_new((SSL_CTX*)tcp_set->ssl_ctx);
84 if(!tp->ssl) {
85 log_msg(LOG_ERR, "xfrd tls: Unable to create TLS object");
86 return 0;
87 }
88 SSL_set_connect_state(tp->ssl);
89 (void)SSL_set_mode(tp->ssl, SSL_MODE_AUTO_RETRY);
90 if(!SSL_set_fd(tp->ssl, tp->tcp_w->fd)) {
91 log_msg(LOG_ERR, "xfrd tls: Unable to set TLS fd");
92 SSL_free(tp->ssl);
93 tp->ssl = NULL;
94 return 0;
95 }
96
97 SSL_set_verify(tp->ssl, SSL_VERIFY_PEER, tls_verify_callback);
98 if(!SSL_set1_host(tp->ssl, auth_domain_name)) {
99 log_msg(LOG_ERR, "xfrd tls: TLS setting of hostname %s failed",
100 auth_domain_name);
101 SSL_free(tp->ssl);
102 tp->ssl = NULL;
103 return 0;
104 }
105 return 1;
106 }
107
108 static int
ssl_handshake(struct xfrd_tcp_pipeline * tp)109 ssl_handshake(struct xfrd_tcp_pipeline* tp)
110 {
111 int ret;
112
113 ERR_clear_error();
114 ret = SSL_do_handshake(tp->ssl);
115 if(ret == 1) {
116 DEBUG(DEBUG_XFRD, 1, (LOG_INFO, "xfrd: TLS handshake successful"));
117 tp->handshake_done = 1;
118 return 1;
119 }
120 tp->handshake_want = SSL_get_error(tp->ssl, ret);
121 if(tp->handshake_want == SSL_ERROR_WANT_READ
122 || tp->handshake_want == SSL_ERROR_WANT_WRITE)
123 return 1;
124
125 return 0;
126 }
127
password_cb(char * buf,int size,int ATTR_UNUSED (rwflag),void * u)128 int password_cb(char *buf, int size, int ATTR_UNUSED(rwflag), void *u)
129 {
130 strlcpy(buf, (char*)u, size);
131 return strlen(buf);
132 }
133
134 #endif
135
136 /* sort tcppipe, first on IP address, for an IPaddresss, sort on num_unused */
137 static int
xfrd_pipe_cmp(const void * a,const void * b)138 xfrd_pipe_cmp(const void* a, const void* b)
139 {
140 const struct xfrd_tcp_pipeline* x = (struct xfrd_tcp_pipeline*)a;
141 const struct xfrd_tcp_pipeline* y = (struct xfrd_tcp_pipeline*)b;
142 int r;
143 if(x == y)
144 return 0;
145 if(y->key.ip_len != x->key.ip_len)
146 /* subtraction works because nonnegative and small numbers */
147 return (int)y->key.ip_len - (int)x->key.ip_len;
148 r = memcmp(&x->key.ip, &y->key.ip, x->key.ip_len);
149 if(r != 0)
150 return r;
151 /* sort that num_unused is sorted ascending, */
152 if(x->key.num_unused != y->key.num_unused) {
153 return (x->key.num_unused < y->key.num_unused) ? -1 : 1;
154 }
155 /* different pipelines are different still, even with same numunused*/
156 return (uintptr_t)x < (uintptr_t)y ? -1 : 1;
157 }
158
xfrd_tcp_set_create(struct region * region,const char * tls_cert_bundle,int tcp_max,int tcp_pipeline)159 struct xfrd_tcp_set* xfrd_tcp_set_create(struct region* region, const char *tls_cert_bundle, int tcp_max, int tcp_pipeline)
160 {
161 int i;
162 struct xfrd_tcp_set* tcp_set = region_alloc(region,
163 sizeof(struct xfrd_tcp_set));
164 memset(tcp_set, 0, sizeof(struct xfrd_tcp_set));
165 tcp_set->tcp_state = NULL;
166 tcp_set->tcp_max = tcp_max;
167 tcp_set->tcp_pipeline = tcp_pipeline;
168 tcp_set->tcp_count = 0;
169 tcp_set->tcp_waiting_first = 0;
170 tcp_set->tcp_waiting_last = 0;
171 #ifdef HAVE_TLS_1_3
172 /* Set up SSL context */
173 tcp_set->ssl_ctx = create_ssl_context();
174 if (tcp_set->ssl_ctx == NULL)
175 log_msg(LOG_ERR, "xfrd: XFR-over-TLS not available");
176
177 else if (tls_cert_bundle && tls_cert_bundle[0] && SSL_CTX_load_verify_locations(
178 tcp_set->ssl_ctx, tls_cert_bundle, NULL) != 1) {
179 log_msg(LOG_ERR, "xfrd tls: Unable to set the certificate bundle file %s",
180 tls_cert_bundle);
181 }
182 #else
183 (void)tls_cert_bundle;
184 log_msg(LOG_INFO, "xfrd: No TLS 1.3 support - XFR-over-TLS not available");
185 #endif
186 tcp_set->tcp_state = region_alloc(region,
187 sizeof(*tcp_set->tcp_state)*tcp_set->tcp_max);
188 for(i=0; i<tcp_set->tcp_max; i++)
189 tcp_set->tcp_state[i] = xfrd_tcp_pipeline_create(region,
190 tcp_pipeline);
191 tcp_set->pipetree = rbtree_create(region, &xfrd_pipe_cmp);
192 return tcp_set;
193 }
194
pipeline_id_compare(const void * x,const void * y)195 static int pipeline_id_compare(const void* x, const void* y)
196 {
197 struct xfrd_tcp_pipeline_id* a = (struct xfrd_tcp_pipeline_id*)x;
198 struct xfrd_tcp_pipeline_id* b = (struct xfrd_tcp_pipeline_id*)y;
199 if(a->id < b->id)
200 return -1;
201 if(a->id > b->id)
202 return 1;
203 return 0;
204 }
205
pick_id_values(uint16_t * array,int num,int max)206 void pick_id_values(uint16_t* array, int num, int max)
207 {
208 uint8_t inserted[65536];
209 int j, done;
210 if(num == 65536) {
211 /* all of them, loop and insert */
212 int i;
213 for(i=0; i<num; i++)
214 array[i] = (uint16_t)i;
215 return;
216 }
217 assert(max <= 65536);
218 /* This uses the Robert Floyd sampling algorithm */
219 /* keep track if values are already inserted, using the bitmap
220 * in insert array */
221 memset(inserted, 0, sizeof(inserted[0])*max);
222 done=0;
223 for(j = max-num; j<max; j++) {
224 /* random generate creates from 0..arg-1 */
225 int t;
226 if(j+1 <= 1)
227 t = 0;
228 else t = random_generate(j+1);
229 if(!inserted[t]) {
230 array[done++]=t;
231 inserted[t] = 1;
232 } else {
233 array[done++]=j;
234 inserted[j] = 1;
235 }
236 }
237 }
238
239 static void
clear_pipeline_entry(struct xfrd_tcp_pipeline * tp,rbnode_type * node)240 clear_pipeline_entry(struct xfrd_tcp_pipeline* tp, rbnode_type* node)
241 {
242 struct xfrd_tcp_pipeline_id *n;
243 if(node == NULL || node == RBTREE_NULL)
244 return;
245 clear_pipeline_entry(tp, node->left);
246 node->left = NULL;
247 clear_pipeline_entry(tp, node->right);
248 node->right = NULL;
249 /* move the node into the free list */
250 n = (struct xfrd_tcp_pipeline_id*)node;
251 n->next_free = tp->pipe_id_free_list;
252 tp->pipe_id_free_list = n;
253 }
254
255 static void
xfrd_tcp_pipeline_cleanup(struct xfrd_tcp_pipeline * tp)256 xfrd_tcp_pipeline_cleanup(struct xfrd_tcp_pipeline* tp)
257 {
258 /* move entries into free list */
259 clear_pipeline_entry(tp, tp->zone_per_id->root);
260 /* clear the tree */
261 tp->zone_per_id->count = 0;
262 tp->zone_per_id->root = RBTREE_NULL;
263 }
264
265 static void
xfrd_tcp_pipeline_init(struct xfrd_tcp_pipeline * tp)266 xfrd_tcp_pipeline_init(struct xfrd_tcp_pipeline* tp)
267 {
268 tp->key.node.key = tp;
269 tp->key.num_unused = tp->pipe_num;
270 tp->key.num_skip = 0;
271 tp->tcp_send_first = NULL;
272 tp->tcp_send_last = NULL;
273 xfrd_tcp_pipeline_cleanup(tp);
274 pick_id_values(tp->unused, tp->pipe_num, 65536);
275 }
276
277 struct xfrd_tcp_pipeline*
xfrd_tcp_pipeline_create(region_type * region,int tcp_pipeline)278 xfrd_tcp_pipeline_create(region_type* region, int tcp_pipeline)
279 {
280 int i;
281 struct xfrd_tcp_pipeline* tp = (struct xfrd_tcp_pipeline*)
282 region_alloc_zero(region, sizeof(*tp));
283 if(tcp_pipeline < 0)
284 tcp_pipeline = 0;
285 if(tcp_pipeline > 65536)
286 tcp_pipeline = 65536; /* max 16 bit ID numbers */
287 tp->pipe_num = tcp_pipeline;
288 tp->key.num_unused = tp->pipe_num;
289 tp->zone_per_id = rbtree_create(region, &pipeline_id_compare);
290 tp->pipe_id_free_list = NULL;
291 for(i=0; i<tp->pipe_num; i++) {
292 struct xfrd_tcp_pipeline_id* n = (struct xfrd_tcp_pipeline_id*)
293 region_alloc_zero(region, sizeof(*n));
294 n->next_free = tp->pipe_id_free_list;
295 tp->pipe_id_free_list = n;
296 }
297 tp->unused = (uint16_t*)region_alloc_zero(region,
298 sizeof(tp->unused[0])*tp->pipe_num);
299 tp->tcp_r = xfrd_tcp_create(region, QIOBUFSZ);
300 tp->tcp_w = xfrd_tcp_create(region, 512);
301 xfrd_tcp_pipeline_init(tp);
302 return tp;
303 }
304
305 static struct xfrd_zone*
xfrd_tcp_pipeline_lookup_id(struct xfrd_tcp_pipeline * tp,uint16_t id)306 xfrd_tcp_pipeline_lookup_id(struct xfrd_tcp_pipeline* tp, uint16_t id)
307 {
308 struct xfrd_tcp_pipeline_id key;
309 rbnode_type* n;
310 memset(&key, 0, sizeof(key));
311 key.node.key = &key;
312 key.id = id;
313 n = rbtree_search(tp->zone_per_id, &key);
314 if(n && n != RBTREE_NULL) {
315 return ((struct xfrd_tcp_pipeline_id*)n)->zone;
316 }
317 return NULL;
318 }
319
320 static void
xfrd_tcp_pipeline_insert_id(struct xfrd_tcp_pipeline * tp,uint16_t id,struct xfrd_zone * zone)321 xfrd_tcp_pipeline_insert_id(struct xfrd_tcp_pipeline* tp, uint16_t id,
322 struct xfrd_zone* zone)
323 {
324 struct xfrd_tcp_pipeline_id* n;
325 /* because there are tp->pipe_num preallocated entries, and we have
326 * only tp->pipe_num id values, the list cannot be empty now. */
327 assert(tp->pipe_id_free_list != NULL);
328 /* pick up next free xfrd_tcp_pipeline_id node */
329 n = tp->pipe_id_free_list;
330 tp->pipe_id_free_list = n->next_free;
331 n->next_free = NULL;
332 memset(&n->node, 0, sizeof(n->node));
333 n->node.key = n;
334 n->id = id;
335 n->zone = zone;
336 rbtree_insert(tp->zone_per_id, &n->node);
337 }
338
339 static void
xfrd_tcp_pipeline_remove_id(struct xfrd_tcp_pipeline * tp,uint16_t id)340 xfrd_tcp_pipeline_remove_id(struct xfrd_tcp_pipeline* tp, uint16_t id)
341 {
342 struct xfrd_tcp_pipeline_id key;
343 rbnode_type* node;
344 memset(&key, 0, sizeof(key));
345 key.node.key = &key;
346 key.id = id;
347 node = rbtree_delete(tp->zone_per_id, &key);
348 if(node && node != RBTREE_NULL) {
349 struct xfrd_tcp_pipeline_id* n =
350 (struct xfrd_tcp_pipeline_id*)node;
351 n->next_free = tp->pipe_id_free_list;
352 tp->pipe_id_free_list = n;
353 }
354 }
355
356 static void
xfrd_tcp_pipeline_skip_id(struct xfrd_tcp_pipeline * tp,uint16_t id)357 xfrd_tcp_pipeline_skip_id(struct xfrd_tcp_pipeline* tp, uint16_t id)
358 {
359 struct xfrd_tcp_pipeline_id key;
360 rbnode_type* n;
361 memset(&key, 0, sizeof(key));
362 key.node.key = &key;
363 key.id = id;
364 n = rbtree_search(tp->zone_per_id, &key);
365 if(n && n != RBTREE_NULL) {
366 struct xfrd_tcp_pipeline_id* zid = (struct xfrd_tcp_pipeline_id*)n;
367 zid->zone = TCP_NULL_SKIP;
368 }
369 }
370
371 void
xfrd_setup_packet(buffer_type * packet,uint16_t type,uint16_t klass,const dname_type * dname,uint16_t qid)372 xfrd_setup_packet(buffer_type* packet,
373 uint16_t type, uint16_t klass, const dname_type* dname, uint16_t qid)
374 {
375 /* Set up the header */
376 buffer_clear(packet);
377 ID_SET(packet, qid);
378 FLAGS_SET(packet, 0);
379 OPCODE_SET(packet, OPCODE_QUERY);
380 QDCOUNT_SET(packet, 1);
381 ANCOUNT_SET(packet, 0);
382 NSCOUNT_SET(packet, 0);
383 ARCOUNT_SET(packet, 0);
384 buffer_skip(packet, QHEADERSZ);
385
386 /* The question record. */
387 buffer_write(packet, dname_name(dname), dname->name_size);
388 buffer_write_u16(packet, type);
389 buffer_write_u16(packet, klass);
390 }
391
392 static socklen_t
393 #ifdef INET6
xfrd_acl_sockaddr(acl_options_type * acl,unsigned int port,struct sockaddr_storage * sck)394 xfrd_acl_sockaddr(acl_options_type* acl, unsigned int port,
395 struct sockaddr_storage *sck)
396 #else
397 xfrd_acl_sockaddr(acl_options_type* acl, unsigned int port,
398 struct sockaddr_in *sck, const char* fromto)
399 #endif /* INET6 */
400 {
401 /* setup address structure */
402 #ifdef INET6
403 memset(sck, 0, sizeof(struct sockaddr_storage));
404 #else
405 memset(sck, 0, sizeof(struct sockaddr_in));
406 #endif
407 if(acl->is_ipv6) {
408 #ifdef INET6
409 struct sockaddr_in6* sa = (struct sockaddr_in6*)sck;
410 sa->sin6_family = AF_INET6;
411 sa->sin6_port = htons(port);
412 sa->sin6_addr = acl->addr.addr6;
413 return sizeof(struct sockaddr_in6);
414 #else
415 log_msg(LOG_ERR, "xfrd: IPv6 connection %s %s attempted but no \
416 INET6.", fromto, acl->ip_address_spec);
417 return 0;
418 #endif
419 } else {
420 struct sockaddr_in* sa = (struct sockaddr_in*)sck;
421 sa->sin_family = AF_INET;
422 sa->sin_port = htons(port);
423 sa->sin_addr = acl->addr.addr;
424 return sizeof(struct sockaddr_in);
425 }
426 }
427
428 socklen_t
429 #ifdef INET6
xfrd_acl_sockaddr_to(acl_options_type * acl,struct sockaddr_storage * to)430 xfrd_acl_sockaddr_to(acl_options_type* acl, struct sockaddr_storage *to)
431 #else
432 xfrd_acl_sockaddr_to(acl_options_type* acl, struct sockaddr_in *to)
433 #endif /* INET6 */
434 {
435 #ifdef HAVE_TLS_1_3
436 unsigned int port = acl->port?acl->port:(acl->tls_auth_options?
437 (unsigned)atoi(TLS_PORT):(unsigned)atoi(TCP_PORT));
438 #else
439 unsigned int port = acl->port?acl->port:(unsigned)atoi(TCP_PORT);
440 #endif
441 #ifdef INET6
442 return xfrd_acl_sockaddr(acl, port, to);
443 #else
444 return xfrd_acl_sockaddr(acl, port, to, "to");
445 #endif /* INET6 */
446 }
447
448 socklen_t
449 #ifdef INET6
xfrd_acl_sockaddr_frm(acl_options_type * acl,struct sockaddr_storage * frm)450 xfrd_acl_sockaddr_frm(acl_options_type* acl, struct sockaddr_storage *frm)
451 #else
452 xfrd_acl_sockaddr_frm(acl_options_type* acl, struct sockaddr_in *frm)
453 #endif /* INET6 */
454 {
455 unsigned int port = acl->port?acl->port:0;
456 #ifdef INET6
457 return xfrd_acl_sockaddr(acl, port, frm);
458 #else
459 return xfrd_acl_sockaddr(acl, port, frm, "from");
460 #endif /* INET6 */
461 }
462
463 void
xfrd_write_soa_buffer(struct buffer * packet,const dname_type * apex,struct xfrd_soa * soa)464 xfrd_write_soa_buffer(struct buffer* packet,
465 const dname_type* apex, struct xfrd_soa* soa)
466 {
467 size_t rdlength_pos;
468 uint16_t rdlength;
469 buffer_write(packet, dname_name(apex), apex->name_size);
470
471 /* already in network order */
472 buffer_write(packet, &soa->type, sizeof(soa->type));
473 buffer_write(packet, &soa->klass, sizeof(soa->klass));
474 buffer_write(packet, &soa->ttl, sizeof(soa->ttl));
475 rdlength_pos = buffer_position(packet);
476 buffer_skip(packet, sizeof(rdlength));
477
478 /* uncompressed dnames */
479 buffer_write(packet, soa->prim_ns+1, soa->prim_ns[0]);
480 buffer_write(packet, soa->email+1, soa->email[0]);
481
482 buffer_write(packet, &soa->serial, sizeof(uint32_t));
483 buffer_write(packet, &soa->refresh, sizeof(uint32_t));
484 buffer_write(packet, &soa->retry, sizeof(uint32_t));
485 buffer_write(packet, &soa->expire, sizeof(uint32_t));
486 buffer_write(packet, &soa->minimum, sizeof(uint32_t));
487
488 /* write length of RR */
489 rdlength = buffer_position(packet) - rdlength_pos - sizeof(rdlength);
490 buffer_write_u16_at(packet, rdlength_pos, rdlength);
491 }
492
493 struct xfrd_tcp*
xfrd_tcp_create(region_type * region,size_t bufsize)494 xfrd_tcp_create(region_type* region, size_t bufsize)
495 {
496 struct xfrd_tcp* tcp_state = (struct xfrd_tcp*)region_alloc(
497 region, sizeof(struct xfrd_tcp));
498 memset(tcp_state, 0, sizeof(struct xfrd_tcp));
499 tcp_state->packet = buffer_create(region, bufsize);
500 tcp_state->fd = -1;
501
502 return tcp_state;
503 }
504
505 static struct xfrd_tcp_pipeline*
pipeline_find(struct xfrd_tcp_set * set,xfrd_zone_type * zone)506 pipeline_find(struct xfrd_tcp_set* set, xfrd_zone_type* zone)
507 {
508 rbnode_type* sme = NULL;
509 struct xfrd_tcp_pipeline* r;
510 /* smaller buf than a full pipeline with 64kb ID array, only need
511 * the front part with the key info, this front part contains the
512 * members that the compare function uses. */
513 struct xfrd_tcp_pipeline_key k, *key=&k;
514 key->node.key = key;
515 key->ip_len = xfrd_acl_sockaddr_to(zone->master, &key->ip);
516 key->num_unused = set->tcp_pipeline;
517 /* lookup existing tcp transfer to the master with highest unused */
518 if(rbtree_find_less_equal(set->pipetree, key, &sme)) {
519 /* exact match, strange, fully unused tcp cannot be open */
520 assert(0);
521 }
522 if(!sme)
523 return NULL;
524 r = (struct xfrd_tcp_pipeline*)sme->key;
525 /* <= key pointed at, is the master correct ? */
526 if(r->key.ip_len != key->ip_len)
527 return NULL;
528 if(memcmp(&r->key.ip, &key->ip, key->ip_len) != 0)
529 return NULL;
530 /* correct master, is there a slot free for this transfer? */
531 if(r->key.num_unused == 0)
532 return NULL;
533 return r;
534 }
535
536 /* remove zone from tcp waiting list */
537 static void
tcp_zone_waiting_list_popfirst(struct xfrd_tcp_set * set,xfrd_zone_type * zone)538 tcp_zone_waiting_list_popfirst(struct xfrd_tcp_set* set, xfrd_zone_type* zone)
539 {
540 assert(zone->tcp_waiting);
541 set->tcp_waiting_first = zone->tcp_waiting_next;
542 if(zone->tcp_waiting_next)
543 zone->tcp_waiting_next->tcp_waiting_prev = NULL;
544 else set->tcp_waiting_last = 0;
545 zone->tcp_waiting_next = 0;
546 zone->tcp_waiting = 0;
547 }
548
549 /* remove zone from tcp pipe write-wait list */
550 static void
tcp_pipe_sendlist_remove(struct xfrd_tcp_pipeline * tp,xfrd_zone_type * zone)551 tcp_pipe_sendlist_remove(struct xfrd_tcp_pipeline* tp, xfrd_zone_type* zone)
552 {
553 if(zone->in_tcp_send) {
554 if(zone->tcp_send_prev)
555 zone->tcp_send_prev->tcp_send_next=zone->tcp_send_next;
556 else tp->tcp_send_first=zone->tcp_send_next;
557 if(zone->tcp_send_next)
558 zone->tcp_send_next->tcp_send_prev=zone->tcp_send_prev;
559 else tp->tcp_send_last=zone->tcp_send_prev;
560 zone->in_tcp_send = 0;
561 }
562 }
563
564 /* remove first from write-wait list */
565 static void
tcp_pipe_sendlist_popfirst(struct xfrd_tcp_pipeline * tp,xfrd_zone_type * zone)566 tcp_pipe_sendlist_popfirst(struct xfrd_tcp_pipeline* tp, xfrd_zone_type* zone)
567 {
568 tp->tcp_send_first = zone->tcp_send_next;
569 if(tp->tcp_send_first)
570 tp->tcp_send_first->tcp_send_prev = NULL;
571 else tp->tcp_send_last = NULL;
572 zone->in_tcp_send = 0;
573 }
574
575 /* remove zone from tcp pipe ID map */
576 static void
tcp_pipe_id_remove(struct xfrd_tcp_pipeline * tp,xfrd_zone_type * zone,int alsotree)577 tcp_pipe_id_remove(struct xfrd_tcp_pipeline* tp, xfrd_zone_type* zone,
578 int alsotree)
579 {
580 assert(tp->key.num_unused < tp->pipe_num && tp->key.num_unused >= 0);
581 if(alsotree)
582 xfrd_tcp_pipeline_remove_id(tp, zone->query_id);
583 tp->unused[tp->key.num_unused] = zone->query_id;
584 /* must remove and re-add for sort order in tree */
585 (void)rbtree_delete(xfrd->tcp_set->pipetree, &tp->key.node);
586 tp->key.num_unused++;
587 (void)rbtree_insert(xfrd->tcp_set->pipetree, &tp->key.node);
588 }
589
590 /* stop the tcp pipe (and all its zones need to retry) */
591 static void
xfrd_tcp_pipe_stop(struct xfrd_tcp_pipeline * tp)592 xfrd_tcp_pipe_stop(struct xfrd_tcp_pipeline* tp)
593 {
594 struct xfrd_tcp_pipeline_id* zid;
595 int conn = -1;
596 assert(tp->key.num_unused < tp->pipe_num); /* at least one 'in-use' */
597 assert(tp->pipe_num - tp->key.num_unused > tp->key.num_skip); /* at least one 'nonskip' */
598 /* need to retry for all the zones connected to it */
599 /* these could use different lists and go to a different nextmaster*/
600 RBTREE_FOR(zid, struct xfrd_tcp_pipeline_id*, tp->zone_per_id) {
601 xfrd_zone_type* zone = zid->zone;
602 if(zone && zone != TCP_NULL_SKIP) {
603 assert(zone->query_id == zid->id);
604 conn = zone->tcp_conn;
605 zone->tcp_conn = -1;
606 zone->tcp_waiting = 0;
607 tcp_pipe_sendlist_remove(tp, zone);
608 tcp_pipe_id_remove(tp, zone, 0);
609 xfrd_set_refresh_now(zone);
610 }
611 }
612 xfrd_tcp_pipeline_cleanup(tp);
613 assert(conn != -1);
614 /* now release the entire tcp pipe */
615 xfrd_tcp_pipe_release(xfrd->tcp_set, tp, conn);
616 }
617
618 static void
tcp_pipe_reset_timeout(struct xfrd_tcp_pipeline * tp)619 tcp_pipe_reset_timeout(struct xfrd_tcp_pipeline* tp)
620 {
621 int fd = tp->handler.ev_fd;
622 struct timeval tv;
623 tv.tv_sec = xfrd->tcp_set->tcp_timeout;
624 tv.tv_usec = 0;
625 if(tp->handler_added)
626 event_del(&tp->handler);
627 memset(&tp->handler, 0, sizeof(tp->handler));
628 event_set(&tp->handler, fd, EV_PERSIST|EV_TIMEOUT|EV_READ|
629 #ifdef HAVE_TLS_1_3
630 ( tp->ssl
631 ? ( tp->handshake_done ? ( tp->tcp_send_first ? EV_WRITE : 0 )
632 : tp->handshake_want == SSL_ERROR_WANT_WRITE ? EV_WRITE : 0 )
633 : tp->tcp_send_first ? EV_WRITE : 0 ),
634 #else
635 ( tp->tcp_send_first ? EV_WRITE : 0 ),
636 #endif
637 xfrd_handle_tcp_pipe, tp);
638 if(event_base_set(xfrd->event_base, &tp->handler) != 0)
639 log_msg(LOG_ERR, "xfrd tcp: event_base_set failed");
640 if(event_add(&tp->handler, &tv) != 0)
641 log_msg(LOG_ERR, "xfrd tcp: event_add failed");
642 tp->handler_added = 1;
643 }
644
645 /* handle event from fd of tcp pipe */
646 void
xfrd_handle_tcp_pipe(int ATTR_UNUSED (fd),short event,void * arg)647 xfrd_handle_tcp_pipe(int ATTR_UNUSED(fd), short event, void* arg)
648 {
649 struct xfrd_tcp_pipeline* tp = (struct xfrd_tcp_pipeline*)arg;
650 if((event & EV_WRITE)) {
651 tcp_pipe_reset_timeout(tp);
652 if(tp->tcp_send_first) {
653 DEBUG(DEBUG_XFRD,1, (LOG_INFO, "xfrd: event tcp write, zone %s",
654 tp->tcp_send_first->apex_str));
655 xfrd_tcp_write(tp, tp->tcp_send_first);
656 }
657 }
658 if((event & EV_READ) && tp->handler_added) {
659 DEBUG(DEBUG_XFRD,1, (LOG_INFO, "xfrd: event tcp read"));
660 tcp_pipe_reset_timeout(tp);
661 xfrd_tcp_read(tp);
662 }
663 if((event & EV_TIMEOUT) && tp->handler_added) {
664 /* tcp connection timed out */
665 DEBUG(DEBUG_XFRD,1, (LOG_INFO, "xfrd: event tcp timeout"));
666 xfrd_tcp_pipe_stop(tp);
667 }
668 }
669
670 /* add a zone to the pipeline, it starts to want to write its query */
671 static void
pipeline_setup_new_zone(struct xfrd_tcp_set * set,struct xfrd_tcp_pipeline * tp,xfrd_zone_type * zone)672 pipeline_setup_new_zone(struct xfrd_tcp_set* set, struct xfrd_tcp_pipeline* tp,
673 xfrd_zone_type* zone)
674 {
675 /* assign the ID */
676 int idx;
677 assert(tp->key.num_unused > 0);
678 /* we pick a random ID, even though it is TCP anyway */
679 idx = random_generate(tp->key.num_unused);
680 zone->query_id = tp->unused[idx];
681 tp->unused[idx] = tp->unused[tp->key.num_unused-1];
682 xfrd_tcp_pipeline_insert_id(tp, zone->query_id, zone);
683 /* decrement unused counter, and fixup tree */
684 (void)rbtree_delete(set->pipetree, &tp->key.node);
685 tp->key.num_unused--;
686 (void)rbtree_insert(set->pipetree, &tp->key.node);
687
688 /* add to sendlist, at end */
689 zone->tcp_send_next = NULL;
690 zone->tcp_send_prev = tp->tcp_send_last;
691 zone->in_tcp_send = 1;
692 if(tp->tcp_send_last)
693 tp->tcp_send_last->tcp_send_next = zone;
694 else tp->tcp_send_first = zone;
695 tp->tcp_send_last = zone;
696
697 /* is it first in line? */
698 if(tp->tcp_send_first == zone) {
699 xfrd_tcp_setup_write_packet(tp, zone);
700 /* add write to event handler */
701 tcp_pipe_reset_timeout(tp);
702 }
703 }
704
705 void
xfrd_tcp_obtain(struct xfrd_tcp_set * set,xfrd_zone_type * zone)706 xfrd_tcp_obtain(struct xfrd_tcp_set* set, xfrd_zone_type* zone)
707 {
708 struct xfrd_tcp_pipeline* tp;
709 assert(zone->tcp_conn == -1);
710 assert(zone->tcp_waiting == 0);
711
712 if(set->tcp_count < set->tcp_max) {
713 int i;
714 assert(!set->tcp_waiting_first);
715 set->tcp_count ++;
716 /* find a free tcp_buffer */
717 for(i=0; i<set->tcp_max; i++) {
718 if(set->tcp_state[i]->tcp_r->fd == -1) {
719 zone->tcp_conn = i;
720 break;
721 }
722 }
723 /** What if there is no free tcp_buffer? return; */
724 if (zone->tcp_conn < 0) {
725 return;
726 }
727
728 tp = set->tcp_state[zone->tcp_conn];
729 zone->tcp_waiting = 0;
730
731 /* stop udp use (if any) */
732 if(zone->zone_handler.ev_fd != -1)
733 xfrd_udp_release(zone);
734
735 if(!xfrd_tcp_open(set, tp, zone)) {
736 zone->tcp_conn = -1;
737 set->tcp_count --;
738 xfrd_set_refresh_now(zone);
739 return;
740 }
741 /* ip and ip_len set by tcp_open */
742 xfrd_tcp_pipeline_init(tp);
743
744 /* insert into tree */
745 (void)rbtree_insert(set->pipetree, &tp->key.node);
746 xfrd_deactivate_zone(zone);
747 xfrd_unset_timer(zone);
748 pipeline_setup_new_zone(set, tp, zone);
749 return;
750 }
751 /* check for a pipeline to the same master with unused ID */
752 if((tp = pipeline_find(set, zone))!= NULL) {
753 int i;
754 if(zone->zone_handler.ev_fd != -1)
755 xfrd_udp_release(zone);
756 for(i=0; i<set->tcp_max; i++) {
757 if(set->tcp_state[i] == tp)
758 zone->tcp_conn = i;
759 }
760 xfrd_deactivate_zone(zone);
761 xfrd_unset_timer(zone);
762 pipeline_setup_new_zone(set, tp, zone);
763 return;
764 }
765
766 /* wait, at end of line */
767 DEBUG(DEBUG_XFRD,2, (LOG_INFO, "xfrd: max number of tcp "
768 "connections (%d) reached.", set->tcp_max));
769 zone->tcp_waiting_next = 0;
770 zone->tcp_waiting_prev = set->tcp_waiting_last;
771 zone->tcp_waiting = 1;
772 if(!set->tcp_waiting_last) {
773 set->tcp_waiting_first = zone;
774 set->tcp_waiting_last = zone;
775 } else {
776 set->tcp_waiting_last->tcp_waiting_next = zone;
777 set->tcp_waiting_last = zone;
778 }
779 xfrd_deactivate_zone(zone);
780 xfrd_unset_timer(zone);
781 }
782
783 int
xfrd_tcp_open(struct xfrd_tcp_set * set,struct xfrd_tcp_pipeline * tp,xfrd_zone_type * zone)784 xfrd_tcp_open(struct xfrd_tcp_set* set, struct xfrd_tcp_pipeline* tp,
785 xfrd_zone_type* zone)
786 {
787 int fd, family, conn;
788 struct timeval tv;
789 assert(zone->tcp_conn != -1);
790
791 /* if there is no next master, fallback to use the first one */
792 /* but there really should be a master set */
793 if(!zone->master) {
794 zone->master = zone->zone_options->pattern->request_xfr;
795 zone->master_num = 0;
796 }
797
798 DEBUG(DEBUG_XFRD,1, (LOG_INFO, "xfrd: zone %s open tcp conn to %s",
799 zone->apex_str, zone->master->ip_address_spec));
800 tp->tcp_r->is_reading = 1;
801 tp->tcp_r->total_bytes = 0;
802 tp->tcp_r->msglen = 0;
803 buffer_clear(tp->tcp_r->packet);
804 tp->tcp_w->is_reading = 0;
805 tp->tcp_w->total_bytes = 0;
806 tp->tcp_w->msglen = 0;
807 tp->connection_established = 0;
808
809 if(zone->master->is_ipv6) {
810 #ifdef INET6
811 family = PF_INET6;
812 #else
813 xfrd_set_refresh_now(zone);
814 return 0;
815 #endif
816 } else {
817 family = PF_INET;
818 }
819 fd = socket(family, SOCK_STREAM, IPPROTO_TCP);
820 if(fd == -1) {
821 /* squelch 'Address family not supported by protocol' at low
822 * verbosity levels */
823 if(errno != EAFNOSUPPORT || verbosity > 2)
824 log_msg(LOG_ERR, "xfrd: %s cannot create tcp socket: %s",
825 zone->master->ip_address_spec, strerror(errno));
826 xfrd_set_refresh_now(zone);
827 return 0;
828 }
829 if(fcntl(fd, F_SETFL, O_NONBLOCK) == -1) {
830 log_msg(LOG_ERR, "xfrd: fcntl failed: %s", strerror(errno));
831 close(fd);
832 xfrd_set_refresh_now(zone);
833 return 0;
834 }
835
836 if(xfrd->nsd->outgoing_tcp_mss > 0) {
837 #if defined(IPPROTO_TCP) && defined(TCP_MAXSEG)
838 if(setsockopt(fd, IPPROTO_TCP, TCP_MAXSEG,
839 (void*)&xfrd->nsd->outgoing_tcp_mss,
840 sizeof(xfrd->nsd->outgoing_tcp_mss)) < 0) {
841 log_msg(LOG_ERR, "xfrd: setsockopt(TCP_MAXSEG)"
842 "failed: %s", strerror(errno));
843 }
844 #else
845 log_msg(LOG_ERR, "setsockopt(TCP_MAXSEG) unsupported");
846 #endif
847 }
848
849 tp->key.ip_len = xfrd_acl_sockaddr_to(zone->master, &tp->key.ip);
850
851 /* bind it */
852 if (!xfrd_bind_local_interface(fd, zone->zone_options->pattern->
853 outgoing_interface, zone->master, 1)) {
854 close(fd);
855 xfrd_set_refresh_now(zone);
856 return 0;
857 }
858
859 conn = connect(fd, (struct sockaddr*)&tp->key.ip, tp->key.ip_len);
860 if (conn == -1 && errno != EINPROGRESS) {
861 log_msg(LOG_ERR, "xfrd: connect %s failed: %s",
862 zone->master->ip_address_spec, strerror(errno));
863 close(fd);
864 xfrd_set_refresh_now(zone);
865 return 0;
866 }
867 tp->tcp_r->fd = fd;
868 tp->tcp_w->fd = fd;
869
870 /* Check if an tls_auth name is configured which means we should try to
871 establish an SSL connection */
872 if (zone->master->tls_auth_options &&
873 zone->master->tls_auth_options->auth_domain_name) {
874 #ifdef HAVE_TLS_1_3
875 if (!setup_ssl(tp, set, zone->master->tls_auth_options->auth_domain_name)) {
876 log_msg(LOG_ERR, "xfrd: Cannot setup TLS on pipeline for %s to %s",
877 zone->apex_str, zone->master->ip_address_spec);
878 close(fd);
879 xfrd_set_refresh_now(zone);
880 return 0;
881 }
882
883 /* Load client certificate (if provided) */
884 if (zone->master->tls_auth_options->client_cert &&
885 zone->master->tls_auth_options->client_key) {
886 if (SSL_CTX_use_certificate_chain_file(set->ssl_ctx,
887 zone->master->tls_auth_options->client_cert) != 1) {
888 log_msg(LOG_ERR, "xfrd tls: Unable to load client certificate from file %s", zone->master->tls_auth_options->client_cert);
889 }
890
891 if (zone->master->tls_auth_options->client_key_pw) {
892 SSL_CTX_set_default_passwd_cb(set->ssl_ctx, password_cb);
893 SSL_CTX_set_default_passwd_cb_userdata(set->ssl_ctx, zone->master->tls_auth_options->client_key_pw);
894 }
895
896 if (SSL_CTX_use_PrivateKey_file(set->ssl_ctx, zone->master->tls_auth_options->client_key, SSL_FILETYPE_PEM) != 1) {
897 log_msg(LOG_ERR, "xfrd tls: Unable to load private key from file %s", zone->master->tls_auth_options->client_key);
898 }
899 }
900
901 tp->handshake_done = 0;
902 if(!ssl_handshake(tp)) {
903 if(tp->handshake_want == SSL_ERROR_SYSCALL) {
904 log_msg(LOG_ERR, "xfrd: TLS handshake failed "
905 "for %s to %s: %s", zone->apex_str,
906 zone->master->ip_address_spec,
907 strerror(errno));
908
909 } else if(tp->handshake_want == SSL_ERROR_SSL) {
910 char errmsg[1024];
911 snprintf(errmsg, sizeof(errmsg), "xfrd: "
912 "TLS handshake failed for %s to %s",
913 zone->apex_str,
914 zone->master->ip_address_spec);
915 log_crypto_err(errmsg);
916 } else {
917 log_msg(LOG_ERR, "xfrd: TLS handshake failed "
918 "for %s to %s with %d", zone->apex_str,
919 zone->master->ip_address_spec,
920 tp->handshake_want);
921 }
922 close(fd);
923 xfrd_set_refresh_now(zone);
924 return 0;
925 }
926 #else
927 log_msg(LOG_ERR, "xfrd: TLS 1.3 is not available, XFR-over-TLS is "
928 "not supported for %s to %s",
929 zone->apex_str, zone->master->ip_address_spec);
930 close(fd);
931 xfrd_set_refresh_now(zone);
932 return 0;
933 #endif
934 }
935
936 /* set the tcp pipe event */
937 if(tp->handler_added)
938 event_del(&tp->handler);
939 memset(&tp->handler, 0, sizeof(tp->handler));
940 event_set(&tp->handler, fd, EV_PERSIST|EV_TIMEOUT|EV_READ|
941 #ifdef HAVE_TLS_1_3
942 ( !tp->ssl
943 || tp->handshake_done
944 || tp->handshake_want == SSL_ERROR_WANT_WRITE ? EV_WRITE : 0),
945 #else
946 EV_WRITE,
947 #endif
948 xfrd_handle_tcp_pipe, tp);
949 if(event_base_set(xfrd->event_base, &tp->handler) != 0)
950 log_msg(LOG_ERR, "xfrd tcp: event_base_set failed");
951 tv.tv_sec = set->tcp_timeout;
952 tv.tv_usec = 0;
953 if(event_add(&tp->handler, &tv) != 0)
954 log_msg(LOG_ERR, "xfrd tcp: event_add failed");
955 tp->handler_added = 1;
956 return 1;
957 }
958
959 void
xfrd_tcp_setup_write_packet(struct xfrd_tcp_pipeline * tp,xfrd_zone_type * zone)960 xfrd_tcp_setup_write_packet(struct xfrd_tcp_pipeline* tp, xfrd_zone_type* zone)
961 {
962 struct xfrd_tcp* tcp = tp->tcp_w;
963 assert(zone->tcp_conn != -1);
964 assert(zone->tcp_waiting == 0);
965 /* start AXFR or IXFR for the zone */
966 if(zone->soa_disk_acquired == 0 || zone->master->use_axfr_only ||
967 zone->master->ixfr_disabled ||
968 /* if zone expired, after the first round, do not ask for
969 * IXFR any more, but full AXFR (of any serial number) */
970 (zone->state == xfrd_zone_expired && zone->round_num != 0)) {
971 DEBUG(DEBUG_XFRD,1, (LOG_INFO, "request full zone transfer "
972 "(AXFR) for %s to %s",
973 zone->apex_str, zone->master->ip_address_spec));
974
975 xfrd_setup_packet(tcp->packet, TYPE_AXFR, CLASS_IN, zone->apex,
976 zone->query_id);
977 xfrd_prepare_zone_xfr(zone, TYPE_AXFR);
978 } else {
979 DEBUG(DEBUG_XFRD,1, (LOG_INFO, "request incremental zone "
980 "transfer (IXFR) for %s to %s",
981 zone->apex_str, zone->master->ip_address_spec));
982
983 xfrd_setup_packet(tcp->packet, TYPE_IXFR, CLASS_IN, zone->apex,
984 zone->query_id);
985 xfrd_prepare_zone_xfr(zone, TYPE_IXFR);
986 NSCOUNT_SET(tcp->packet, 1);
987 xfrd_write_soa_buffer(tcp->packet, zone->apex, &zone->soa_disk);
988 }
989 if(zone->master->key_options && zone->master->key_options->tsig_key) {
990 xfrd_tsig_sign_request(
991 tcp->packet, &zone->latest_xfr->tsig, zone->master);
992 }
993 buffer_flip(tcp->packet);
994 DEBUG(DEBUG_XFRD,1, (LOG_INFO, "sent tcp query with ID %d", zone->query_id));
995 tcp->msglen = buffer_limit(tcp->packet);
996 tcp->total_bytes = 0;
997 }
998
999 static void
tcp_conn_ready_for_reading(struct xfrd_tcp * tcp)1000 tcp_conn_ready_for_reading(struct xfrd_tcp* tcp)
1001 {
1002 tcp->total_bytes = 0;
1003 tcp->msglen = 0;
1004 buffer_clear(tcp->packet);
1005 }
1006
1007 #ifdef HAVE_TLS_1_3
1008 static int
conn_write_ssl(struct xfrd_tcp * tcp,SSL * ssl)1009 conn_write_ssl(struct xfrd_tcp* tcp, SSL* ssl)
1010 {
1011 int request_length;
1012 ssize_t sent;
1013
1014 if(tcp->total_bytes < sizeof(tcp->msglen)) {
1015 uint16_t sendlen = htons(tcp->msglen);
1016 // send
1017 request_length = sizeof(tcp->msglen) - tcp->total_bytes;
1018 ERR_clear_error();
1019 sent = SSL_write(ssl, (const char*)&sendlen + tcp->total_bytes,
1020 request_length);
1021 switch(SSL_get_error(ssl,sent)) {
1022 case SSL_ERROR_NONE:
1023 break;
1024 default:
1025 log_msg(LOG_ERR, "xfrd: generic write problem with tls");
1026 }
1027
1028 if(sent == -1) {
1029 if(errno == EAGAIN || errno == EINTR) {
1030 /* write would block, try later */
1031 return 0;
1032 } else {
1033 return -1;
1034 }
1035 }
1036
1037 tcp->total_bytes += sent;
1038 if(sent > (ssize_t)sizeof(tcp->msglen))
1039 buffer_skip(tcp->packet, sent-sizeof(tcp->msglen));
1040 if(tcp->total_bytes < sizeof(tcp->msglen)) {
1041 /* incomplete write, resume later */
1042 return 0;
1043 }
1044 assert(tcp->total_bytes >= sizeof(tcp->msglen));
1045 }
1046
1047 assert(tcp->total_bytes < tcp->msglen + sizeof(tcp->msglen));
1048
1049 request_length = buffer_remaining(tcp->packet);
1050 ERR_clear_error();
1051 sent = SSL_write(ssl, buffer_current(tcp->packet), request_length);
1052 switch(SSL_get_error(ssl,sent)) {
1053 case SSL_ERROR_NONE:
1054 break;
1055 default:
1056 log_msg(LOG_ERR, "xfrd: generic write problem with tls");
1057 }
1058 if(sent == -1) {
1059 if(errno == EAGAIN || errno == EINTR) {
1060 /* write would block, try later */
1061 return 0;
1062 } else {
1063 return -1;
1064 }
1065 }
1066
1067 buffer_skip(tcp->packet, sent);
1068 tcp->total_bytes += sent;
1069
1070 if(tcp->total_bytes < tcp->msglen + sizeof(tcp->msglen)) {
1071 /* more to write when socket becomes writable again */
1072 return 0;
1073 }
1074
1075 assert(tcp->total_bytes == tcp->msglen + sizeof(tcp->msglen));
1076 return 1;
1077 }
1078 #endif
1079
conn_write(struct xfrd_tcp * tcp)1080 int conn_write(struct xfrd_tcp* tcp)
1081 {
1082 ssize_t sent;
1083
1084 if(tcp->total_bytes < sizeof(tcp->msglen)) {
1085 uint16_t sendlen = htons(tcp->msglen);
1086 #ifdef HAVE_WRITEV
1087 struct iovec iov[2];
1088 iov[0].iov_base = (uint8_t*)&sendlen + tcp->total_bytes;
1089 iov[0].iov_len = sizeof(sendlen) - tcp->total_bytes;
1090 iov[1].iov_base = buffer_begin(tcp->packet);
1091 iov[1].iov_len = buffer_limit(tcp->packet);
1092 sent = writev(tcp->fd, iov, 2);
1093 #else /* HAVE_WRITEV */
1094 sent = write(tcp->fd,
1095 (const char*)&sendlen + tcp->total_bytes,
1096 sizeof(tcp->msglen) - tcp->total_bytes);
1097 #endif /* HAVE_WRITEV */
1098
1099 if(sent == -1) {
1100 if(errno == EAGAIN || errno == EINTR) {
1101 /* write would block, try later */
1102 return 0;
1103 } else {
1104 return -1;
1105 }
1106 }
1107
1108 tcp->total_bytes += sent;
1109 if(sent > (ssize_t)sizeof(tcp->msglen))
1110 buffer_skip(tcp->packet, sent-sizeof(tcp->msglen));
1111 if(tcp->total_bytes < sizeof(tcp->msglen)) {
1112 /* incomplete write, resume later */
1113 return 0;
1114 }
1115 #ifdef HAVE_WRITEV
1116 if(tcp->total_bytes == tcp->msglen + sizeof(tcp->msglen)) {
1117 /* packet done */
1118 return 1;
1119 }
1120 #endif
1121 assert(tcp->total_bytes >= sizeof(tcp->msglen));
1122 }
1123
1124 assert(tcp->total_bytes < tcp->msglen + sizeof(tcp->msglen));
1125
1126 sent = write(tcp->fd,
1127 buffer_current(tcp->packet),
1128 buffer_remaining(tcp->packet));
1129 if(sent == -1) {
1130 if(errno == EAGAIN || errno == EINTR) {
1131 /* write would block, try later */
1132 return 0;
1133 } else {
1134 return -1;
1135 }
1136 }
1137
1138 buffer_skip(tcp->packet, sent);
1139 tcp->total_bytes += sent;
1140
1141 if(tcp->total_bytes < tcp->msglen + sizeof(tcp->msglen)) {
1142 /* more to write when socket becomes writable again */
1143 return 0;
1144 }
1145
1146 assert(tcp->total_bytes == tcp->msglen + sizeof(tcp->msglen));
1147 return 1;
1148 }
1149
1150 void
xfrd_tcp_write(struct xfrd_tcp_pipeline * tp,xfrd_zone_type * zone)1151 xfrd_tcp_write(struct xfrd_tcp_pipeline* tp, xfrd_zone_type* zone)
1152 {
1153 int ret;
1154 struct xfrd_tcp* tcp = tp->tcp_w;
1155 assert(zone->tcp_conn != -1);
1156 assert(zone == tp->tcp_send_first);
1157 /* see if for non-established connection, there is a connect error */
1158 if(!tp->connection_established) {
1159 /* check for pending error from nonblocking connect */
1160 /* from Stevens, unix network programming, vol1, 3rd ed, p450 */
1161 int error = 0;
1162 socklen_t len = sizeof(error);
1163 if(getsockopt(tcp->fd, SOL_SOCKET, SO_ERROR, &error, &len) < 0){
1164 error = errno; /* on solaris errno is error */
1165 }
1166 if(error == EINPROGRESS || error == EWOULDBLOCK)
1167 return; /* try again later */
1168 if(error != 0) {
1169 log_msg(LOG_ERR, "%s: Could not tcp connect to %s: %s",
1170 zone->apex_str, zone->master->ip_address_spec,
1171 strerror(error));
1172 xfrd_tcp_pipe_stop(tp);
1173 return;
1174 }
1175 }
1176 #ifdef HAVE_TLS_1_3
1177 if (tp->ssl) {
1178 if(tp->handshake_done) {
1179 ret = conn_write_ssl(tcp, tp->ssl);
1180
1181 } else if(ssl_handshake(tp)) {
1182 tcp_pipe_reset_timeout(tp); /* reschedule */
1183 return;
1184
1185 } else {
1186 if(tp->handshake_want == SSL_ERROR_SYSCALL) {
1187 log_msg(LOG_ERR, "xfrd: TLS handshake failed: %s",
1188 strerror(errno));
1189
1190 } else if(tp->handshake_want == SSL_ERROR_SSL) {
1191 log_crypto_err("xfrd: TLS handshake failed");
1192 } else {
1193 log_msg(LOG_ERR, "xfrd: TLS handshake failed "
1194 "with value: %d", tp->handshake_want);
1195 }
1196 xfrd_tcp_pipe_stop(tp);
1197 return;
1198 }
1199 } else
1200 #endif
1201 ret = conn_write(tcp);
1202 if(ret == -1) {
1203 log_msg(LOG_ERR, "xfrd: failed writing tcp %s", strerror(errno));
1204 xfrd_tcp_pipe_stop(tp);
1205 return;
1206 }
1207 if(tcp->total_bytes != 0 && !tp->connection_established)
1208 tp->connection_established = 1;
1209 if(ret == 0) {
1210 return; /* write again later */
1211 }
1212 /* done writing this message */
1213
1214 /* remove first zone from sendlist */
1215 tcp_pipe_sendlist_popfirst(tp, zone);
1216
1217 /* see if other zone wants to write; init; let it write (now) */
1218 /* and use a loop, because 64k stack calls is a too much */
1219 while(tp->tcp_send_first) {
1220 /* setup to write for this zone */
1221 xfrd_tcp_setup_write_packet(tp, tp->tcp_send_first);
1222 /* attempt to write for this zone (if success, continue loop)*/
1223 #ifdef HAVE_TLS_1_3
1224 if (tp->ssl)
1225 ret = conn_write_ssl(tcp, tp->ssl);
1226 else
1227 #endif
1228 ret = conn_write(tcp);
1229 if(ret == -1) {
1230 log_msg(LOG_ERR, "xfrd: failed writing tcp %s", strerror(errno));
1231 xfrd_tcp_pipe_stop(tp);
1232 return;
1233 }
1234 if(ret == 0)
1235 return; /* write again later */
1236 tcp_pipe_sendlist_popfirst(tp, tp->tcp_send_first);
1237 }
1238
1239 /* if sendlist empty, remove WRITE from event */
1240
1241 /* listen to READ, and not WRITE events */
1242 assert(tp->tcp_send_first == NULL);
1243 tcp_pipe_reset_timeout(tp);
1244 }
1245
1246 #ifdef HAVE_TLS_1_3
1247 static int
conn_read_ssl(struct xfrd_tcp * tcp,SSL * ssl)1248 conn_read_ssl(struct xfrd_tcp* tcp, SSL* ssl)
1249 {
1250 ssize_t received;
1251 /* receive leading packet length bytes */
1252 if(tcp->total_bytes < sizeof(tcp->msglen)) {
1253 ERR_clear_error();
1254 received = SSL_read(ssl,
1255 (char*) &tcp->msglen + tcp->total_bytes,
1256 sizeof(tcp->msglen) - tcp->total_bytes);
1257 if (received <= 0) {
1258 int err = SSL_get_error(ssl, received);
1259 if(err == SSL_ERROR_WANT_READ && errno == EAGAIN) {
1260 return 0;
1261 }
1262 if(err == SSL_ERROR_ZERO_RETURN) {
1263 /* EOF */
1264 return 0;
1265 }
1266 log_msg(LOG_ERR, "ssl_read returned error %d with received %zd", err, received);
1267 }
1268 if(received == -1) {
1269 if(errno == EAGAIN || errno == EINTR) {
1270 /* read would block, try later */
1271 return 0;
1272 } else {
1273 #ifdef ECONNRESET
1274 if (verbosity >= 2 || errno != ECONNRESET)
1275 #endif /* ECONNRESET */
1276 log_msg(LOG_ERR, "tls read sz: %s", strerror(errno));
1277 return -1;
1278 }
1279 } else if(received == 0) {
1280 /* EOF */
1281 return -1;
1282 }
1283 tcp->total_bytes += received;
1284 if(tcp->total_bytes < sizeof(tcp->msglen)) {
1285 /* not complete yet, try later */
1286 return 0;
1287 }
1288
1289 assert(tcp->total_bytes == sizeof(tcp->msglen));
1290 tcp->msglen = ntohs(tcp->msglen);
1291
1292 if(tcp->msglen == 0) {
1293 buffer_set_limit(tcp->packet, tcp->msglen);
1294 return 1;
1295 }
1296 if(tcp->msglen > buffer_capacity(tcp->packet)) {
1297 log_msg(LOG_ERR, "buffer too small, dropping connection");
1298 return 0;
1299 }
1300 buffer_set_limit(tcp->packet, tcp->msglen);
1301 }
1302
1303 assert(buffer_remaining(tcp->packet) > 0);
1304 ERR_clear_error();
1305
1306 received = SSL_read(ssl, buffer_current(tcp->packet),
1307 buffer_remaining(tcp->packet));
1308
1309 if (received <= 0) {
1310 int err = SSL_get_error(ssl, received);
1311 if(err == SSL_ERROR_ZERO_RETURN) {
1312 /* EOF */
1313 return 0;
1314 }
1315 log_msg(LOG_ERR, "ssl_read returned error %d with received %zd", err, received);
1316 }
1317 if(received == -1) {
1318 if(errno == EAGAIN || errno == EINTR) {
1319 /* read would block, try later */
1320 return 0;
1321 } else {
1322 #ifdef ECONNRESET
1323 if (verbosity >= 2 || errno != ECONNRESET)
1324 #endif /* ECONNRESET */
1325 log_msg(LOG_ERR, "tcp read %s", strerror(errno));
1326 return -1;
1327 }
1328 } else if(received == 0) {
1329 /* EOF */
1330 return -1;
1331 }
1332
1333 tcp->total_bytes += received;
1334 buffer_skip(tcp->packet, received);
1335
1336 if(buffer_remaining(tcp->packet) > 0) {
1337 /* not complete yet, wait for more */
1338 return 0;
1339 }
1340
1341 /* completed */
1342 assert(buffer_position(tcp->packet) == tcp->msglen);
1343 return 1;
1344 }
1345 #endif
1346
1347 int
conn_read(struct xfrd_tcp * tcp)1348 conn_read(struct xfrd_tcp* tcp)
1349 {
1350 ssize_t received;
1351 /* receive leading packet length bytes */
1352 if(tcp->total_bytes < sizeof(tcp->msglen)) {
1353 received = read(tcp->fd,
1354 (char*) &tcp->msglen + tcp->total_bytes,
1355 sizeof(tcp->msglen) - tcp->total_bytes);
1356 if(received == -1) {
1357 if(errno == EAGAIN || errno == EINTR) {
1358 /* read would block, try later */
1359 return 0;
1360 } else {
1361 #ifdef ECONNRESET
1362 if (verbosity >= 2 || errno != ECONNRESET)
1363 #endif /* ECONNRESET */
1364 log_msg(LOG_ERR, "tcp read sz: %s", strerror(errno));
1365 return -1;
1366 }
1367 } else if(received == 0) {
1368 /* EOF */
1369 return -1;
1370 }
1371 tcp->total_bytes += received;
1372 if(tcp->total_bytes < sizeof(tcp->msglen)) {
1373 /* not complete yet, try later */
1374 return 0;
1375 }
1376
1377 assert(tcp->total_bytes == sizeof(tcp->msglen));
1378 tcp->msglen = ntohs(tcp->msglen);
1379
1380 if(tcp->msglen == 0) {
1381 buffer_set_limit(tcp->packet, tcp->msglen);
1382 return 1;
1383 }
1384 if(tcp->msglen > buffer_capacity(tcp->packet)) {
1385 log_msg(LOG_ERR, "buffer too small, dropping connection");
1386 return 0;
1387 }
1388 buffer_set_limit(tcp->packet, tcp->msglen);
1389 }
1390
1391 assert(buffer_remaining(tcp->packet) > 0);
1392
1393 received = read(tcp->fd, buffer_current(tcp->packet),
1394 buffer_remaining(tcp->packet));
1395 if(received == -1) {
1396 if(errno == EAGAIN || errno == EINTR) {
1397 /* read would block, try later */
1398 return 0;
1399 } else {
1400 #ifdef ECONNRESET
1401 if (verbosity >= 2 || errno != ECONNRESET)
1402 #endif /* ECONNRESET */
1403 log_msg(LOG_ERR, "tcp read %s", strerror(errno));
1404 return -1;
1405 }
1406 } else if(received == 0) {
1407 /* EOF */
1408 return -1;
1409 }
1410
1411 tcp->total_bytes += received;
1412 buffer_skip(tcp->packet, received);
1413
1414 if(buffer_remaining(tcp->packet) > 0) {
1415 /* not complete yet, wait for more */
1416 return 0;
1417 }
1418
1419 /* completed */
1420 assert(buffer_position(tcp->packet) == tcp->msglen);
1421 return 1;
1422 }
1423
1424 void
xfrd_tcp_read(struct xfrd_tcp_pipeline * tp)1425 xfrd_tcp_read(struct xfrd_tcp_pipeline* tp)
1426 {
1427 xfrd_zone_type* zone;
1428 struct xfrd_tcp* tcp = tp->tcp_r;
1429 int ret;
1430 enum xfrd_packet_result pkt_result;
1431 #ifdef HAVE_TLS_1_3
1432 if(tp->ssl) {
1433 if(tp->handshake_done) {
1434 ret = conn_read_ssl(tcp, tp->ssl);
1435
1436 } else if(ssl_handshake(tp)) {
1437 tcp_pipe_reset_timeout(tp); /* reschedule */
1438 return;
1439
1440 } else {
1441 if(tp->handshake_want == SSL_ERROR_SYSCALL) {
1442 log_msg(LOG_ERR, "xfrd: TLS handshake failed: %s",
1443 strerror(errno));
1444
1445 } else if(tp->handshake_want == SSL_ERROR_SSL) {
1446 log_crypto_err("xfrd: TLS handshake failed");
1447 } else {
1448 log_msg(LOG_ERR, "xfrd: TLS handshake failed "
1449 "with value: %d", tp->handshake_want);
1450 }
1451 xfrd_tcp_pipe_stop(tp);
1452 return;
1453 }
1454 } else
1455 #endif
1456 ret = conn_read(tcp);
1457 if(ret == -1) {
1458 log_msg(LOG_ERR, "xfrd: failed reading tcp %s", strerror(errno));
1459 xfrd_tcp_pipe_stop(tp);
1460 return;
1461 }
1462 if(ret == 0)
1463 return;
1464 /* completed msg */
1465 buffer_flip(tcp->packet);
1466 /* see which ID number it is, if skip, handle skip, NULL: warn */
1467 if(tcp->msglen < QHEADERSZ) {
1468 /* too short for DNS header, skip it */
1469 DEBUG(DEBUG_XFRD,1, (LOG_INFO,
1470 "xfrd: tcp skip response that is too short"));
1471 tcp_conn_ready_for_reading(tcp);
1472 return;
1473 }
1474 zone = xfrd_tcp_pipeline_lookup_id(tp, ID(tcp->packet));
1475 if(!zone || zone == TCP_NULL_SKIP) {
1476 /* no zone for this id? skip it */
1477 DEBUG(DEBUG_XFRD,1, (LOG_INFO,
1478 "xfrd: tcp skip response with %s ID",
1479 zone?"set-to-skip":"unknown"));
1480 tcp_conn_ready_for_reading(tcp);
1481 return;
1482 }
1483 assert(zone->tcp_conn != -1);
1484
1485 /* handle message for zone */
1486 pkt_result = xfrd_handle_received_xfr_packet(zone, tcp->packet);
1487 /* setup for reading the next packet on this connection */
1488 tcp_conn_ready_for_reading(tcp);
1489 switch(pkt_result) {
1490 case xfrd_packet_more:
1491 /* wait for next packet */
1492 break;
1493 case xfrd_packet_newlease:
1494 /* set to skip if more packets with this ID */
1495 xfrd_tcp_pipeline_skip_id(tp, zone->query_id);
1496 tp->key.num_skip++;
1497 /* fall through to remove zone from tp */
1498 /* fallthrough */
1499 case xfrd_packet_transfer:
1500 if(zone->zone_options->pattern->multi_master_check) {
1501 xfrd_tcp_release(xfrd->tcp_set, zone);
1502 xfrd_make_request(zone);
1503 break;
1504 }
1505 xfrd_tcp_release(xfrd->tcp_set, zone);
1506 assert(zone->round_num == -1);
1507 break;
1508 case xfrd_packet_notimpl:
1509 xfrd_disable_ixfr(zone);
1510 xfrd_tcp_release(xfrd->tcp_set, zone);
1511 /* query next server */
1512 xfrd_make_request(zone);
1513 break;
1514 case xfrd_packet_bad:
1515 case xfrd_packet_tcp:
1516 default:
1517 /* set to skip if more packets with this ID */
1518 xfrd_tcp_pipeline_skip_id(tp, zone->query_id);
1519 tp->key.num_skip++;
1520 xfrd_tcp_release(xfrd->tcp_set, zone);
1521 /* query next server */
1522 xfrd_make_request(zone);
1523 break;
1524 }
1525 }
1526
1527 void
xfrd_tcp_release(struct xfrd_tcp_set * set,xfrd_zone_type * zone)1528 xfrd_tcp_release(struct xfrd_tcp_set* set, xfrd_zone_type* zone)
1529 {
1530 int conn = zone->tcp_conn;
1531 struct xfrd_tcp_pipeline* tp = set->tcp_state[conn];
1532 DEBUG(DEBUG_XFRD,1, (LOG_INFO, "xfrd: zone %s released tcp conn to %s",
1533 zone->apex_str, zone->master->ip_address_spec));
1534 assert(zone->tcp_conn != -1);
1535 assert(zone->tcp_waiting == 0);
1536 zone->tcp_conn = -1;
1537 zone->tcp_waiting = 0;
1538
1539 /* remove from tcp_send list */
1540 tcp_pipe_sendlist_remove(tp, zone);
1541 /* remove it from the ID list */
1542 if(xfrd_tcp_pipeline_lookup_id(tp, zone->query_id) != TCP_NULL_SKIP)
1543 tcp_pipe_id_remove(tp, zone, 1);
1544 DEBUG(DEBUG_XFRD,1, (LOG_INFO, "xfrd: released tcp pipe now %d unused",
1545 tp->key.num_unused));
1546 /* if pipe was full, but no more, then see if waiting element is
1547 * for the same master, and can fill the unused ID */
1548 if(tp->key.num_unused == 1 && set->tcp_waiting_first) {
1549 #ifdef INET6
1550 struct sockaddr_storage to;
1551 #else
1552 struct sockaddr_in to;
1553 #endif
1554 socklen_t to_len = xfrd_acl_sockaddr_to(
1555 set->tcp_waiting_first->master, &to);
1556 if(to_len == tp->key.ip_len && memcmp(&to, &tp->key.ip, to_len) == 0) {
1557 /* use this connection for the waiting zone */
1558 zone = set->tcp_waiting_first;
1559 assert(zone->tcp_conn == -1);
1560 zone->tcp_conn = conn;
1561 tcp_zone_waiting_list_popfirst(set, zone);
1562 if(zone->zone_handler.ev_fd != -1)
1563 xfrd_udp_release(zone);
1564 xfrd_unset_timer(zone);
1565 pipeline_setup_new_zone(set, tp, zone);
1566 return;
1567 }
1568 /* waiting zone did not go to same server */
1569 }
1570
1571 /* if all unused, or only skipped leftover, close the pipeline */
1572 if(tp->key.num_unused >= tp->pipe_num || tp->key.num_skip >= tp->pipe_num - tp->key.num_unused)
1573 xfrd_tcp_pipe_release(set, tp, conn);
1574 }
1575
1576 void
xfrd_tcp_pipe_release(struct xfrd_tcp_set * set,struct xfrd_tcp_pipeline * tp,int conn)1577 xfrd_tcp_pipe_release(struct xfrd_tcp_set* set, struct xfrd_tcp_pipeline* tp,
1578 int conn)
1579 {
1580 DEBUG(DEBUG_XFRD,1, (LOG_INFO, "xfrd: tcp pipe released"));
1581 /* one handler per tcp pipe */
1582 if(tp->handler_added)
1583 event_del(&tp->handler);
1584 tp->handler_added = 0;
1585
1586 #ifdef HAVE_TLS_1_3
1587 /* close SSL */
1588 if (tp->ssl) {
1589 DEBUG(DEBUG_XFRD, 1, (LOG_INFO, "xfrd: Shutting down TLS"));
1590 SSL_shutdown(tp->ssl);
1591 SSL_free(tp->ssl);
1592 tp->ssl = NULL;
1593 }
1594 #endif
1595
1596 /* fd in tcp_r and tcp_w is the same, close once */
1597 if(tp->tcp_r->fd != -1)
1598 close(tp->tcp_r->fd);
1599 tp->tcp_r->fd = -1;
1600 tp->tcp_w->fd = -1;
1601
1602 /* remove from pipetree */
1603 (void)rbtree_delete(xfrd->tcp_set->pipetree, &tp->key.node);
1604
1605 /* a waiting zone can use the free tcp slot (to another server) */
1606 /* if that zone fails to set-up or connect, we try to start the next
1607 * waiting zone in the list */
1608 while(set->tcp_count == set->tcp_max && set->tcp_waiting_first) {
1609 /* pop first waiting process */
1610 xfrd_zone_type* zone = set->tcp_waiting_first;
1611 /* start it */
1612 assert(zone->tcp_conn == -1);
1613 zone->tcp_conn = conn;
1614 tcp_zone_waiting_list_popfirst(set, zone);
1615
1616 /* stop udp (if any) */
1617 if(zone->zone_handler.ev_fd != -1)
1618 xfrd_udp_release(zone);
1619 if(!xfrd_tcp_open(set, tp, zone)) {
1620 zone->tcp_conn = -1;
1621 xfrd_set_refresh_now(zone);
1622 /* try to start the next zone (if any) */
1623 continue;
1624 }
1625 /* re-init this tcppipe */
1626 /* ip and ip_len set by tcp_open */
1627 xfrd_tcp_pipeline_init(tp);
1628
1629 /* insert into tree */
1630 (void)rbtree_insert(set->pipetree, &tp->key.node);
1631 /* setup write */
1632 xfrd_unset_timer(zone);
1633 pipeline_setup_new_zone(set, tp, zone);
1634 /* started a task, no need for cleanups, so return */
1635 return;
1636 }
1637 /* no task to start, cleanup */
1638 assert(!set->tcp_waiting_first);
1639 set->tcp_count --;
1640 assert(set->tcp_count >= 0);
1641 }
1642
1643