1 /*
2 * Copyright (C) Internet Systems Consortium, Inc. ("ISC")
3 *
4 * SPDX-License-Identifier: MPL-2.0
5 *
6 * This Source Code Form is subject to the terms of the Mozilla Public
7 * License, v. 2.0. If a copy of the MPL was not distributed with this
8 * file, you can obtain one at https://mozilla.org/MPL/2.0/.
9 *
10 * See the COPYRIGHT file distributed with this work for additional
11 * information regarding copyright ownership.
12 */
13
14 #include <unistd.h>
15 #include <uv.h>
16
17 #include <isc/atomic.h>
18 #include <isc/barrier.h>
19 #include <isc/buffer.h>
20 #include <isc/condition.h>
21 #include <isc/errno.h>
22 #include <isc/magic.h>
23 #include <isc/mem.h>
24 #include <isc/netmgr.h>
25 #include <isc/random.h>
26 #include <isc/refcount.h>
27 #include <isc/region.h>
28 #include <isc/result.h>
29 #include <isc/sockaddr.h>
30 #include <isc/thread.h>
31 #include <isc/util.h>
32
33 #include "netmgr-int.h"
34 #include "uv-compat.h"
35
36 static isc_result_t
37 udp_send_direct(isc_nmsocket_t *sock, isc__nm_uvreq_t *req,
38 isc_sockaddr_t *peer);
39
40 static void
41 udp_recv_cb(uv_udp_t *handle, ssize_t nrecv, const uv_buf_t *buf,
42 const struct sockaddr *addr, unsigned flags);
43
44 static void
45 udp_send_cb(uv_udp_send_t *req, int status);
46
47 static void
48 udp_close_cb(uv_handle_t *handle);
49
50 static void
51 read_timer_close_cb(uv_handle_t *handle);
52
53 static void
54 write_timer_close_cb(uv_handle_t *handle);
55
56 static void
57 udp_close_direct(isc_nmsocket_t *sock);
58
59 static void
60 stop_udp_parent(isc_nmsocket_t *sock);
61 static void
62 stop_udp_child(isc_nmsocket_t *sock);
63
64 static uv_os_sock_t
isc__nm_udp_lb_socket(sa_family_t sa_family)65 isc__nm_udp_lb_socket(sa_family_t sa_family) {
66 isc_result_t result;
67 uv_os_sock_t sock;
68
69 result = isc__nm_socket(sa_family, SOCK_DGRAM, 0, &sock);
70 RUNTIME_CHECK(result == ISC_R_SUCCESS);
71
72 (void)isc__nm_socket_incoming_cpu(sock);
73 (void)isc__nm_socket_disable_pmtud(sock, sa_family);
74
75 result = isc__nm_socket_reuse(sock);
76 RUNTIME_CHECK(result == ISC_R_SUCCESS);
77
78 #if HAVE_SO_REUSEPORT_LB
79 result = isc__nm_socket_reuse_lb(sock);
80 RUNTIME_CHECK(result == ISC_R_SUCCESS);
81 #endif
82
83 return (sock);
84 }
85
86 static void
start_udp_child(isc_nm_t * mgr,isc_sockaddr_t * iface,isc_nmsocket_t * sock,uv_os_sock_t fd,int tid)87 start_udp_child(isc_nm_t *mgr, isc_sockaddr_t *iface, isc_nmsocket_t *sock,
88 uv_os_sock_t fd, int tid) {
89 isc_nmsocket_t *csock;
90 isc__netievent_udplisten_t *ievent = NULL;
91
92 csock = &sock->children[tid];
93
94 isc__nmsocket_init(csock, mgr, isc_nm_udpsocket, iface);
95 csock->parent = sock;
96 csock->iface = sock->iface;
97 csock->reading = true;
98 csock->recv_cb = sock->recv_cb;
99 csock->recv_cbarg = sock->recv_cbarg;
100 csock->extrahandlesize = sock->extrahandlesize;
101 csock->tid = tid;
102
103 #if HAVE_SO_REUSEPORT_LB || defined(WIN32)
104 UNUSED(fd);
105 csock->fd = isc__nm_udp_lb_socket(iface->type.sa.sa_family);
106 #else
107 csock->fd = dup(fd);
108 #endif
109 REQUIRE(csock->fd >= 0);
110
111 ievent = isc__nm_get_netievent_udplisten(mgr, csock);
112 isc__nm_maybe_enqueue_ievent(&mgr->workers[tid],
113 (isc__netievent_t *)ievent);
114 }
115
116 static void
enqueue_stoplistening(isc_nmsocket_t * sock)117 enqueue_stoplistening(isc_nmsocket_t *sock) {
118 isc__netievent_udpstop_t *ievent =
119 isc__nm_get_netievent_udpstop(sock->mgr, sock);
120 isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid],
121 (isc__netievent_t *)ievent);
122 }
123
124 isc_result_t
isc_nm_listenudp(isc_nm_t * mgr,isc_sockaddr_t * iface,isc_nm_recv_cb_t cb,void * cbarg,size_t extrahandlesize,isc_nmsocket_t ** sockp)125 isc_nm_listenudp(isc_nm_t *mgr, isc_sockaddr_t *iface, isc_nm_recv_cb_t cb,
126 void *cbarg, size_t extrahandlesize, isc_nmsocket_t **sockp) {
127 isc_result_t result = ISC_R_SUCCESS;
128 isc_nmsocket_t *sock = NULL;
129 size_t children_size = 0;
130 REQUIRE(VALID_NM(mgr));
131 uv_os_sock_t fd = -1;
132
133 /*
134 * We are creating mgr->nworkers duplicated sockets, one
135 * socket for each worker thread.
136 */
137 sock = isc_mem_get(mgr->mctx, sizeof(isc_nmsocket_t));
138 isc__nmsocket_init(sock, mgr, isc_nm_udplistener, iface);
139
140 atomic_init(&sock->rchildren, 0);
141 #if defined(WIN32)
142 sock->nchildren = 1;
143 #else
144 sock->nchildren = mgr->nworkers;
145 #endif
146
147 children_size = sock->nchildren * sizeof(sock->children[0]);
148 sock->children = isc_mem_get(mgr->mctx, children_size);
149 memset(sock->children, 0, children_size);
150
151 sock->recv_cb = cb;
152 sock->recv_cbarg = cbarg;
153 sock->extrahandlesize = extrahandlesize;
154 sock->result = ISC_R_UNSET;
155
156 sock->tid = 0;
157 sock->fd = -1;
158
159 #if !HAVE_SO_REUSEPORT_LB && !defined(WIN32)
160 fd = isc__nm_udp_lb_socket(iface->type.sa.sa_family);
161 #endif
162
163 isc_barrier_init(&sock->startlistening, sock->nchildren);
164
165 for (size_t i = 0; i < sock->nchildren; i++) {
166 if ((int)i == isc_nm_tid()) {
167 continue;
168 }
169 start_udp_child(mgr, iface, sock, fd, i);
170 }
171
172 if (isc__nm_in_netthread()) {
173 start_udp_child(mgr, iface, sock, fd, isc_nm_tid());
174 }
175
176 #if !HAVE_SO_REUSEPORT_LB && !defined(WIN32)
177 isc__nm_closesocket(fd);
178 #endif
179
180 LOCK(&sock->lock);
181 while (atomic_load(&sock->rchildren) != sock->nchildren) {
182 WAIT(&sock->cond, &sock->lock);
183 }
184 result = sock->result;
185 atomic_store(&sock->active, true);
186 UNLOCK(&sock->lock);
187
188 INSIST(result != ISC_R_UNSET);
189
190 if (result == ISC_R_SUCCESS) {
191 REQUIRE(atomic_load(&sock->rchildren) == sock->nchildren);
192 *sockp = sock;
193 } else {
194 atomic_store(&sock->active, false);
195 enqueue_stoplistening(sock);
196 isc_nmsocket_close(&sock);
197 }
198
199 return (result);
200 }
201
202 /*
203 * Asynchronous 'udplisten' call handler: start listening on a UDP socket.
204 */
205 void
isc__nm_async_udplisten(isc__networker_t * worker,isc__netievent_t * ev0)206 isc__nm_async_udplisten(isc__networker_t *worker, isc__netievent_t *ev0) {
207 isc__netievent_udplisten_t *ievent = (isc__netievent_udplisten_t *)ev0;
208 isc_nmsocket_t *sock = NULL;
209 int r, uv_bind_flags = 0;
210 int uv_init_flags = 0;
211 sa_family_t sa_family;
212 isc_result_t result = ISC_R_UNSET;
213
214 REQUIRE(VALID_NMSOCK(ievent->sock));
215 REQUIRE(ievent->sock->tid == isc_nm_tid());
216 REQUIRE(VALID_NMSOCK(ievent->sock->parent));
217
218 sock = ievent->sock;
219 sa_family = sock->iface.type.sa.sa_family;
220
221 REQUIRE(sock->type == isc_nm_udpsocket);
222 REQUIRE(sock->parent != NULL);
223 REQUIRE(sock->tid == isc_nm_tid());
224
225 #if HAVE_DECL_UV_UDP_RECVMMSG
226 uv_init_flags |= UV_UDP_RECVMMSG;
227 #endif
228 r = uv_udp_init_ex(&worker->loop, &sock->uv_handle.udp, uv_init_flags);
229 UV_RUNTIME_CHECK(uv_udp_init_ex, r);
230 uv_handle_set_data(&sock->uv_handle.handle, sock);
231 /* This keeps the socket alive after everything else is gone */
232 isc__nmsocket_attach(sock, &(isc_nmsocket_t *){ NULL });
233
234 r = uv_timer_init(&worker->loop, &sock->read_timer);
235 UV_RUNTIME_CHECK(uv_timer_init, r);
236 uv_handle_set_data((uv_handle_t *)&sock->read_timer, sock);
237
238 r = uv_timer_init(&worker->loop, &sock->write_timer);
239 UV_RUNTIME_CHECK(uv_timer_init, r);
240 uv_handle_set_data((uv_handle_t *)&sock->write_timer, sock);
241
242 LOCK(&sock->parent->lock);
243
244 r = uv_udp_open(&sock->uv_handle.udp, sock->fd);
245 if (r < 0) {
246 isc__nm_closesocket(sock->fd);
247 isc__nm_incstats(sock->mgr, sock->statsindex[STATID_OPENFAIL]);
248 goto done;
249 }
250 isc__nm_incstats(sock->mgr, sock->statsindex[STATID_OPEN]);
251
252 if (sa_family == AF_INET6) {
253 uv_bind_flags |= UV_UDP_IPV6ONLY;
254 }
255
256 #if HAVE_SO_REUSEPORT_LB || defined(WIN32)
257 r = isc_uv_udp_freebind(&sock->uv_handle.udp,
258 &sock->parent->iface.type.sa, uv_bind_flags);
259 if (r < 0) {
260 isc__nm_incstats(sock->mgr, sock->statsindex[STATID_BINDFAIL]);
261 goto done;
262 }
263 #else
264 if (sock->parent->fd == -1) {
265 /* This thread is first, bind the socket */
266 r = isc_uv_udp_freebind(&sock->uv_handle.udp,
267 &sock->parent->iface.type.sa,
268 uv_bind_flags);
269 if (r < 0) {
270 isc__nm_incstats(sock->mgr,
271 sock->statsindex[STATID_BINDFAIL]);
272 goto done;
273 }
274 sock->parent->uv_handle.udp.flags = sock->uv_handle.udp.flags;
275 sock->parent->fd = sock->fd;
276 } else {
277 /* The socket is already bound, just copy the flags */
278 sock->uv_handle.udp.flags = sock->parent->uv_handle.udp.flags;
279 }
280 #endif
281
282 #ifdef ISC_RECV_BUFFER_SIZE
283 uv_recv_buffer_size(&sock->uv_handle.handle,
284 &(int){ ISC_RECV_BUFFER_SIZE });
285 #endif
286 #ifdef ISC_SEND_BUFFER_SIZE
287 uv_send_buffer_size(&sock->uv_handle.handle,
288 &(int){ ISC_SEND_BUFFER_SIZE });
289 #endif
290 r = uv_udp_recv_start(&sock->uv_handle.udp, isc__nm_alloc_cb,
291 udp_recv_cb);
292 if (r != 0) {
293 isc__nm_incstats(sock->mgr, sock->statsindex[STATID_BINDFAIL]);
294 goto done;
295 }
296
297 atomic_store(&sock->listening, true);
298
299 done:
300 result = isc__nm_uverr2result(r);
301 atomic_fetch_add(&sock->parent->rchildren, 1);
302 if (sock->parent->result == ISC_R_UNSET) {
303 sock->parent->result = result;
304 }
305 SIGNAL(&sock->parent->cond);
306 UNLOCK(&sock->parent->lock);
307
308 isc_barrier_wait(&sock->parent->startlistening);
309 }
310
311 void
isc__nm_udp_stoplistening(isc_nmsocket_t * sock)312 isc__nm_udp_stoplistening(isc_nmsocket_t *sock) {
313 REQUIRE(VALID_NMSOCK(sock));
314 REQUIRE(sock->type == isc_nm_udplistener);
315
316 if (!atomic_compare_exchange_strong(&sock->closing, &(bool){ false },
317 true)) {
318 INSIST(0);
319 ISC_UNREACHABLE();
320 }
321
322 if (!isc__nm_in_netthread()) {
323 enqueue_stoplistening(sock);
324 } else {
325 stop_udp_parent(sock);
326 }
327 }
328
329 /*
330 * Asynchronous 'udpstop' call handler: stop listening on a UDP socket.
331 */
332 void
isc__nm_async_udpstop(isc__networker_t * worker,isc__netievent_t * ev0)333 isc__nm_async_udpstop(isc__networker_t *worker, isc__netievent_t *ev0) {
334 isc__netievent_udpstop_t *ievent = (isc__netievent_udpstop_t *)ev0;
335 isc_nmsocket_t *sock = ievent->sock;
336
337 UNUSED(worker);
338
339 REQUIRE(VALID_NMSOCK(sock));
340 REQUIRE(sock->tid == isc_nm_tid());
341
342 if (sock->parent != NULL) {
343 stop_udp_child(sock);
344 return;
345 }
346
347 stop_udp_parent(sock);
348 }
349
350 /*
351 * udp_recv_cb handles incoming UDP packet from uv. The buffer here is
352 * reused for a series of packets, so we need to allocate a new one.
353 * This new one can be reused to send the response then.
354 */
355 static void
udp_recv_cb(uv_udp_t * handle,ssize_t nrecv,const uv_buf_t * buf,const struct sockaddr * addr,unsigned flags)356 udp_recv_cb(uv_udp_t *handle, ssize_t nrecv, const uv_buf_t *buf,
357 const struct sockaddr *addr, unsigned flags) {
358 isc_nmsocket_t *sock = uv_handle_get_data((uv_handle_t *)handle);
359 isc__nm_uvreq_t *req = NULL;
360 uint32_t maxudp;
361 isc_sockaddr_t sockaddr;
362 isc_result_t result;
363
364 REQUIRE(VALID_NMSOCK(sock));
365 REQUIRE(sock->tid == isc_nm_tid());
366 REQUIRE(sock->reading);
367
368 /*
369 * When using recvmmsg(2), if no errors occur, there will be a final
370 * callback with nrecv set to 0, addr set to NULL and the buffer
371 * pointing at the initially allocated data with the UV_UDP_MMSG_CHUNK
372 * flag cleared and the UV_UDP_MMSG_FREE flag set.
373 */
374 #if HAVE_DECL_UV_UDP_MMSG_FREE
375 if ((flags & UV_UDP_MMSG_FREE) == UV_UDP_MMSG_FREE) {
376 INSIST(nrecv == 0);
377 INSIST(addr == NULL);
378 goto free;
379 }
380 #else
381 UNUSED(flags);
382 #endif
383
384 /*
385 * - If we're simulating a firewall blocking UDP packets
386 * bigger than 'maxudp' bytes for testing purposes.
387 */
388 maxudp = atomic_load(&sock->mgr->maxudp);
389 if ((maxudp != 0 && (uint32_t)nrecv > maxudp)) {
390 /*
391 * We need to keep the read_cb intact in case, so the
392 * readtimeout_cb can trigger and not crash because of
393 * missing read_req.
394 */
395 goto free;
396 }
397
398 /*
399 * - If addr == NULL, in which case it's the end of stream;
400 * we can free the buffer and bail.
401 */
402 if (addr == NULL) {
403 isc__nm_failed_read_cb(sock, ISC_R_EOF, false);
404 goto free;
405 }
406
407 /*
408 * - If the socket is no longer active.
409 */
410 if (!isc__nmsocket_active(sock)) {
411 isc__nm_failed_read_cb(sock, ISC_R_CANCELED, false);
412 goto free;
413 }
414
415 if (nrecv < 0) {
416 isc__nm_failed_read_cb(sock, isc__nm_uverr2result(nrecv),
417 false);
418 goto free;
419 }
420
421 result = isc_sockaddr_fromsockaddr(&sockaddr, addr);
422 RUNTIME_CHECK(result == ISC_R_SUCCESS);
423
424 req = isc__nm_get_read_req(sock, &sockaddr);
425
426 /*
427 * The callback will be called synchronously, because result is
428 * ISC_R_SUCCESS, so we are ok of passing the buf directly.
429 */
430 req->uvbuf.base = buf->base;
431 req->uvbuf.len = nrecv;
432
433 sock->recv_read = false;
434
435 REQUIRE(!sock->processing);
436 sock->processing = true;
437 isc__nm_readcb(sock, req, ISC_R_SUCCESS);
438 sock->processing = false;
439
440 free:
441 #if HAVE_DECL_UV_UDP_MMSG_CHUNK
442 /*
443 * When using recvmmsg(2), chunks will have the UV_UDP_MMSG_CHUNK flag
444 * set, those must not be freed.
445 */
446 if ((flags & UV_UDP_MMSG_CHUNK) == UV_UDP_MMSG_CHUNK) {
447 return;
448 }
449 #endif
450
451 /*
452 * When using recvmmsg(2), if a UDP socket error occurs, nrecv will be <
453 * 0. In either scenario, the callee can now safely free the provided
454 * buffer.
455 */
456 if (nrecv < 0) {
457 /*
458 * The buffer may be a null buffer on error.
459 */
460 if (buf->base == NULL && buf->len == 0) {
461 return;
462 }
463 }
464
465 isc__nm_free_uvbuf(sock, buf);
466 }
467
468 /*
469 * Send the data in 'region' to a peer via a UDP socket. We try to find
470 * a proper sibling/child socket so that we won't have to jump to
471 * another thread.
472 */
473 void
isc__nm_udp_send(isc_nmhandle_t * handle,const isc_region_t * region,isc_nm_cb_t cb,void * cbarg)474 isc__nm_udp_send(isc_nmhandle_t *handle, const isc_region_t *region,
475 isc_nm_cb_t cb, void *cbarg) {
476 isc_nmsocket_t *sock = handle->sock;
477 isc_nmsocket_t *rsock = NULL;
478 isc_sockaddr_t *peer = &handle->peer;
479 isc__nm_uvreq_t *uvreq = NULL;
480 uint32_t maxudp = atomic_load(&sock->mgr->maxudp);
481 int ntid;
482
483 INSIST(sock->type == isc_nm_udpsocket);
484
485 /*
486 * We're simulating a firewall blocking UDP packets bigger than
487 * 'maxudp' bytes, for testing purposes.
488 *
489 * The client would ordinarily have unreferenced the handle
490 * in the callback, but that won't happen in this case, so
491 * we need to do so here.
492 */
493 if (maxudp != 0 && region->length > maxudp) {
494 isc_nmhandle_detach(&handle);
495 return;
496 }
497
498 if (atomic_load(&sock->client)) {
499 /*
500 * When we are sending from the client socket, we directly use
501 * the socket provided.
502 */
503 rsock = sock;
504 goto send;
505 } else {
506 /*
507 * When we are sending from the server socket, we either use the
508 * socket associated with the network thread we are in, or we
509 * use the thread from the socket associated with the handle.
510 */
511 INSIST(sock->parent != NULL);
512
513 #if defined(WIN32)
514 /* On Windows, we have only a single listening listener */
515 rsock = sock;
516 #else
517 if (isc__nm_in_netthread()) {
518 ntid = isc_nm_tid();
519 } else {
520 ntid = sock->tid;
521 }
522 rsock = &sock->parent->children[ntid];
523 #endif
524 }
525
526 send:
527 uvreq = isc__nm_uvreq_get(rsock->mgr, rsock);
528 uvreq->uvbuf.base = (char *)region->base;
529 uvreq->uvbuf.len = region->length;
530
531 isc_nmhandle_attach(handle, &uvreq->handle);
532
533 uvreq->cb.send = cb;
534 uvreq->cbarg = cbarg;
535
536 if (isc_nm_tid() == rsock->tid) {
537 REQUIRE(rsock->tid == isc_nm_tid());
538 isc__netievent_udpsend_t ievent = { .sock = rsock,
539 .req = uvreq,
540 .peer = *peer };
541
542 isc__nm_async_udpsend(NULL, (isc__netievent_t *)&ievent);
543 } else {
544 isc__netievent_udpsend_t *ievent =
545 isc__nm_get_netievent_udpsend(sock->mgr, rsock);
546 ievent->peer = *peer;
547 ievent->req = uvreq;
548
549 isc__nm_enqueue_ievent(&sock->mgr->workers[rsock->tid],
550 (isc__netievent_t *)ievent);
551 }
552 }
553
554 /*
555 * Asynchronous 'udpsend' event handler: send a packet on a UDP socket.
556 */
557 void
isc__nm_async_udpsend(isc__networker_t * worker,isc__netievent_t * ev0)558 isc__nm_async_udpsend(isc__networker_t *worker, isc__netievent_t *ev0) {
559 isc_result_t result;
560 isc__netievent_udpsend_t *ievent = (isc__netievent_udpsend_t *)ev0;
561 isc_nmsocket_t *sock = ievent->sock;
562 isc__nm_uvreq_t *uvreq = ievent->req;
563
564 REQUIRE(sock->type == isc_nm_udpsocket);
565 REQUIRE(sock->tid == isc_nm_tid());
566 UNUSED(worker);
567
568 if (isc__nmsocket_closing(sock)) {
569 isc__nm_failed_send_cb(sock, uvreq, ISC_R_CANCELED);
570 return;
571 }
572
573 result = udp_send_direct(sock, uvreq, &ievent->peer);
574 if (result != ISC_R_SUCCESS) {
575 isc__nm_incstats(sock->mgr, sock->statsindex[STATID_SENDFAIL]);
576 isc__nm_failed_send_cb(sock, uvreq, result);
577 }
578 }
579
580 static void
udp_send_cb(uv_udp_send_t * req,int status)581 udp_send_cb(uv_udp_send_t *req, int status) {
582 isc_result_t result = ISC_R_SUCCESS;
583 isc__nm_uvreq_t *uvreq = uv_handle_get_data((uv_handle_t *)req);
584 isc_nmsocket_t *sock = NULL;
585
586 REQUIRE(VALID_UVREQ(uvreq));
587 REQUIRE(VALID_NMHANDLE(uvreq->handle));
588
589 sock = uvreq->sock;
590
591 REQUIRE(sock->tid == isc_nm_tid());
592
593 if (status < 0) {
594 result = isc__nm_uverr2result(status);
595 isc__nm_incstats(sock->mgr, sock->statsindex[STATID_SENDFAIL]);
596 }
597
598 isc__nm_sendcb(sock, uvreq, result, false);
599 }
600
601 /*
602 * udp_send_direct sends buf to a peer on a socket. Sock has to be in
603 * the same thread as the callee.
604 */
605 static isc_result_t
udp_send_direct(isc_nmsocket_t * sock,isc__nm_uvreq_t * req,isc_sockaddr_t * peer)606 udp_send_direct(isc_nmsocket_t *sock, isc__nm_uvreq_t *req,
607 isc_sockaddr_t *peer) {
608 const struct sockaddr *sa = &peer->type.sa;
609 int r;
610
611 REQUIRE(VALID_NMSOCK(sock));
612 REQUIRE(VALID_UVREQ(req));
613 REQUIRE(sock->tid == isc_nm_tid());
614 REQUIRE(sock->type == isc_nm_udpsocket);
615
616 if (isc__nmsocket_closing(sock)) {
617 return (ISC_R_CANCELED);
618 }
619
620 #if UV_VERSION_HEX >= UV_VERSION(1, 27, 0)
621 /*
622 * If we used uv_udp_connect() (and not the shim version for
623 * older versions of libuv), then the peer address has to be
624 * set to NULL or else uv_udp_send() could fail or assert,
625 * depending on the libuv version.
626 */
627 if (atomic_load(&sock->connected)) {
628 sa = NULL;
629 }
630 #endif
631
632 r = uv_udp_send(&req->uv_req.udp_send, &sock->uv_handle.udp,
633 &req->uvbuf, 1, sa, udp_send_cb);
634 if (r < 0) {
635 return (isc__nm_uverr2result(r));
636 }
637
638 return (ISC_R_SUCCESS);
639 }
640
641 static isc_result_t
udp_connect_direct(isc_nmsocket_t * sock,isc__nm_uvreq_t * req)642 udp_connect_direct(isc_nmsocket_t *sock, isc__nm_uvreq_t *req) {
643 isc__networker_t *worker = NULL;
644 int uv_bind_flags = UV_UDP_REUSEADDR;
645 isc_result_t result = ISC_R_UNSET;
646 int tries = 3;
647 int r;
648
649 REQUIRE(isc__nm_in_netthread());
650 REQUIRE(sock->tid == isc_nm_tid());
651
652 worker = &sock->mgr->workers[isc_nm_tid()];
653
654 atomic_store(&sock->connecting, true);
655
656 r = uv_udp_init(&worker->loop, &sock->uv_handle.udp);
657 UV_RUNTIME_CHECK(uv_udp_init, r);
658 uv_handle_set_data(&sock->uv_handle.handle, sock);
659
660 r = uv_timer_init(&worker->loop, &sock->read_timer);
661 UV_RUNTIME_CHECK(uv_timer_init, r);
662 uv_handle_set_data((uv_handle_t *)&sock->read_timer, sock);
663
664 r = uv_timer_init(&worker->loop, &sock->write_timer);
665 UV_RUNTIME_CHECK(uv_timer_init, r);
666 uv_handle_set_data((uv_handle_t *)&sock->write_timer, sock);
667
668 r = uv_udp_open(&sock->uv_handle.udp, sock->fd);
669 if (r != 0) {
670 isc__nm_incstats(sock->mgr, sock->statsindex[STATID_OPENFAIL]);
671 goto done;
672 }
673 isc__nm_incstats(sock->mgr, sock->statsindex[STATID_OPEN]);
674
675 if (sock->iface.type.sa.sa_family == AF_INET6) {
676 uv_bind_flags |= UV_UDP_IPV6ONLY;
677 }
678
679 r = uv_udp_bind(&sock->uv_handle.udp, &sock->iface.type.sa,
680 uv_bind_flags);
681 if (r != 0) {
682 isc__nm_incstats(sock->mgr, sock->statsindex[STATID_BINDFAIL]);
683 goto done;
684 }
685
686 #ifdef ISC_RECV_BUFFER_SIZE
687 uv_recv_buffer_size(&sock->uv_handle.handle,
688 &(int){ ISC_RECV_BUFFER_SIZE });
689 #endif
690 #ifdef ISC_SEND_BUFFER_SIZE
691 uv_send_buffer_size(&sock->uv_handle.handle,
692 &(int){ ISC_SEND_BUFFER_SIZE });
693 #endif
694
695 /*
696 * On FreeBSD the UDP connect() call sometimes results in a
697 * spurious transient EADDRINUSE. Try a few more times before
698 * giving up.
699 */
700 do {
701 r = isc_uv_udp_connect(&sock->uv_handle.udp,
702 &req->peer.type.sa);
703 } while (r == UV_EADDRINUSE && --tries > 0);
704 if (r != 0) {
705 isc__nm_incstats(sock->mgr,
706 sock->statsindex[STATID_CONNECTFAIL]);
707 goto done;
708 }
709 isc__nm_incstats(sock->mgr, sock->statsindex[STATID_CONNECT]);
710
711 atomic_store(&sock->connecting, false);
712 atomic_store(&sock->connected, true);
713
714 done:
715 result = isc__nm_uverr2result(r);
716
717 LOCK(&sock->lock);
718 sock->result = result;
719 SIGNAL(&sock->cond);
720 if (!atomic_load(&sock->active)) {
721 WAIT(&sock->scond, &sock->lock);
722 }
723 INSIST(atomic_load(&sock->active));
724 UNLOCK(&sock->lock);
725
726 return (result);
727 }
728
729 /*
730 * Asynchronous 'udpconnect' call handler: open a new UDP socket and
731 * call the 'open' callback with a handle.
732 */
733 void
isc__nm_async_udpconnect(isc__networker_t * worker,isc__netievent_t * ev0)734 isc__nm_async_udpconnect(isc__networker_t *worker, isc__netievent_t *ev0) {
735 isc__netievent_udpconnect_t *ievent =
736 (isc__netievent_udpconnect_t *)ev0;
737 isc_nmsocket_t *sock = ievent->sock;
738 isc__nm_uvreq_t *req = ievent->req;
739 isc_result_t result;
740
741 UNUSED(worker);
742
743 REQUIRE(VALID_NMSOCK(sock));
744 REQUIRE(sock->type == isc_nm_udpsocket);
745 REQUIRE(sock->parent == NULL);
746 REQUIRE(sock->tid == isc_nm_tid());
747
748 result = udp_connect_direct(sock, req);
749 if (result != ISC_R_SUCCESS) {
750 atomic_store(&sock->active, false);
751 isc__nm_udp_close(sock);
752 isc__nm_connectcb(sock, req, result, true);
753 } else {
754 /*
755 * The callback has to be called after the socket has been
756 * initialized
757 */
758 isc__nm_connectcb(sock, req, ISC_R_SUCCESS, true);
759 }
760
761 /*
762 * The sock is now attached to the handle.
763 */
764 isc__nmsocket_detach(&sock);
765 }
766
767 void
isc_nm_udpconnect(isc_nm_t * mgr,isc_sockaddr_t * local,isc_sockaddr_t * peer,isc_nm_cb_t cb,void * cbarg,unsigned int timeout,size_t extrahandlesize)768 isc_nm_udpconnect(isc_nm_t *mgr, isc_sockaddr_t *local, isc_sockaddr_t *peer,
769 isc_nm_cb_t cb, void *cbarg, unsigned int timeout,
770 size_t extrahandlesize) {
771 isc_result_t result = ISC_R_SUCCESS;
772 isc_nmsocket_t *sock = NULL;
773 isc__netievent_udpconnect_t *event = NULL;
774 isc__nm_uvreq_t *req = NULL;
775 sa_family_t sa_family;
776
777 REQUIRE(VALID_NM(mgr));
778 REQUIRE(local != NULL);
779 REQUIRE(peer != NULL);
780
781 sa_family = peer->type.sa.sa_family;
782
783 sock = isc_mem_get(mgr->mctx, sizeof(isc_nmsocket_t));
784 isc__nmsocket_init(sock, mgr, isc_nm_udpsocket, local);
785
786 sock->connect_cb = cb;
787 sock->connect_cbarg = cbarg;
788 sock->read_timeout = timeout;
789 sock->extrahandlesize = extrahandlesize;
790 sock->peer = *peer;
791 sock->result = ISC_R_UNSET;
792 atomic_init(&sock->client, true);
793
794 req = isc__nm_uvreq_get(mgr, sock);
795 req->cb.connect = cb;
796 req->cbarg = cbarg;
797 req->peer = *peer;
798 req->local = *local;
799 req->handle = isc__nmhandle_get(sock, &req->peer, &sock->iface);
800
801 result = isc__nm_socket(sa_family, SOCK_DGRAM, 0, &sock->fd);
802 if (result != ISC_R_SUCCESS) {
803 if (isc__nm_in_netthread()) {
804 sock->tid = isc_nm_tid();
805 }
806 isc__nmsocket_clearcb(sock);
807 isc__nm_connectcb(sock, req, result, true);
808 atomic_store(&sock->closed, true);
809 isc__nmsocket_detach(&sock);
810 return;
811 }
812
813 result = isc__nm_socket_reuse(sock->fd);
814 RUNTIME_CHECK(result == ISC_R_SUCCESS ||
815 result == ISC_R_NOTIMPLEMENTED);
816
817 result = isc__nm_socket_reuse_lb(sock->fd);
818 RUNTIME_CHECK(result == ISC_R_SUCCESS ||
819 result == ISC_R_NOTIMPLEMENTED);
820
821 (void)isc__nm_socket_incoming_cpu(sock->fd);
822
823 (void)isc__nm_socket_disable_pmtud(sock->fd, sa_family);
824
825 event = isc__nm_get_netievent_udpconnect(mgr, sock, req);
826
827 if (isc__nm_in_netthread()) {
828 atomic_store(&sock->active, true);
829 sock->tid = isc_nm_tid();
830 isc__nm_async_udpconnect(&mgr->workers[sock->tid],
831 (isc__netievent_t *)event);
832 isc__nm_put_netievent_udpconnect(mgr, event);
833 } else {
834 atomic_init(&sock->active, false);
835 sock->tid = isc_random_uniform(mgr->nworkers);
836 isc__nm_enqueue_ievent(&mgr->workers[sock->tid],
837 (isc__netievent_t *)event);
838 }
839 LOCK(&sock->lock);
840 while (sock->result == ISC_R_UNSET) {
841 WAIT(&sock->cond, &sock->lock);
842 }
843 atomic_store(&sock->active, true);
844 BROADCAST(&sock->scond);
845 UNLOCK(&sock->lock);
846 }
847
848 void
isc__nm_udp_read_cb(uv_udp_t * handle,ssize_t nrecv,const uv_buf_t * buf,const struct sockaddr * addr,unsigned flags)849 isc__nm_udp_read_cb(uv_udp_t *handle, ssize_t nrecv, const uv_buf_t *buf,
850 const struct sockaddr *addr, unsigned flags) {
851 isc_nmsocket_t *sock = uv_handle_get_data((uv_handle_t *)handle);
852 REQUIRE(VALID_NMSOCK(sock));
853
854 udp_recv_cb(handle, nrecv, buf, addr, flags);
855 /*
856 * If a caller calls isc_nm_read() on a listening socket, we can
857 * get here, but we MUST NOT stop reading from the listener
858 * socket. The only difference between listener and connected
859 * sockets is that the former has sock->parent set and later
860 * does not.
861 */
862 if (!sock->parent) {
863 isc__nm_stop_reading(sock);
864 }
865 }
866
867 void
isc__nm_udp_failed_read_cb(isc_nmsocket_t * sock,isc_result_t result)868 isc__nm_udp_failed_read_cb(isc_nmsocket_t *sock, isc_result_t result) {
869 REQUIRE(VALID_NMSOCK(sock));
870 REQUIRE(result != ISC_R_SUCCESS);
871
872 if (atomic_load(&sock->client)) {
873 isc__nmsocket_timer_stop(sock);
874 isc__nm_stop_reading(sock);
875
876 if (!sock->recv_read) {
877 goto destroy;
878 }
879 sock->recv_read = false;
880
881 if (sock->recv_cb != NULL) {
882 isc__nm_uvreq_t *req = isc__nm_get_read_req(sock, NULL);
883 isc__nmsocket_clearcb(sock);
884 isc__nm_readcb(sock, req, result);
885 }
886
887 destroy:
888 isc__nmsocket_prep_destroy(sock);
889 return;
890 }
891
892 /*
893 * For UDP server socket, we don't have child socket via
894 * "accept", so we:
895 * - we continue to read
896 * - we don't clear the callbacks
897 * - we don't destroy it (only stoplistening could do that)
898 */
899 if (!sock->recv_read) {
900 return;
901 }
902 sock->recv_read = false;
903
904 if (sock->recv_cb != NULL) {
905 isc__nm_uvreq_t *req = isc__nm_get_read_req(sock, NULL);
906 isc__nm_readcb(sock, req, result);
907 }
908 }
909
910 /*
911 * Asynchronous 'udpread' call handler: start or resume reading on a
912 * socket; pause reading and call the 'recv' callback after each
913 * datagram.
914 */
915 void
isc__nm_async_udpread(isc__networker_t * worker,isc__netievent_t * ev0)916 isc__nm_async_udpread(isc__networker_t *worker, isc__netievent_t *ev0) {
917 isc__netievent_udpread_t *ievent = (isc__netievent_udpread_t *)ev0;
918 isc_nmsocket_t *sock = ievent->sock;
919
920 UNUSED(worker);
921
922 REQUIRE(VALID_NMSOCK(sock));
923 REQUIRE(sock->tid == isc_nm_tid());
924
925 if (isc__nmsocket_closing(sock)) {
926 sock->reading = true;
927 isc__nm_failed_read_cb(sock, ISC_R_CANCELED, false);
928 return;
929 }
930
931 isc__nm_start_reading(sock);
932 isc__nmsocket_timer_start(sock);
933 }
934
935 void
isc__nm_udp_read(isc_nmhandle_t * handle,isc_nm_recv_cb_t cb,void * cbarg)936 isc__nm_udp_read(isc_nmhandle_t *handle, isc_nm_recv_cb_t cb, void *cbarg) {
937 REQUIRE(VALID_NMHANDLE(handle));
938 REQUIRE(VALID_NMSOCK(handle->sock));
939
940 isc_nmsocket_t *sock = handle->sock;
941
942 REQUIRE(sock->type == isc_nm_udpsocket);
943 REQUIRE(sock->statichandle == handle);
944 REQUIRE(sock->tid == isc_nm_tid());
945 REQUIRE(!sock->recv_read);
946
947 sock->recv_cb = cb;
948 sock->recv_cbarg = cbarg;
949 sock->recv_read = true;
950
951 if (!sock->reading && sock->tid == isc_nm_tid()) {
952 isc__netievent_udpread_t ievent = { .sock = sock };
953 isc__nm_async_udpread(NULL, (isc__netievent_t *)&ievent);
954 } else {
955 isc__netievent_udpread_t *ievent =
956 isc__nm_get_netievent_udpread(sock->mgr, sock);
957 isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid],
958 (isc__netievent_t *)ievent);
959 }
960 }
961
962 static void
udp_stop_cb(uv_handle_t * handle)963 udp_stop_cb(uv_handle_t *handle) {
964 isc_nmsocket_t *sock = uv_handle_get_data(handle);
965 uv_handle_set_data(handle, NULL);
966
967 REQUIRE(VALID_NMSOCK(sock));
968 REQUIRE(sock->tid == isc_nm_tid());
969 REQUIRE(atomic_load(&sock->closing));
970
971 if (!atomic_compare_exchange_strong(&sock->closed, &(bool){ false },
972 true)) {
973 INSIST(0);
974 ISC_UNREACHABLE();
975 }
976
977 isc__nm_incstats(sock->mgr, sock->statsindex[STATID_CLOSE]);
978
979 atomic_store(&sock->listening, false);
980
981 isc__nmsocket_detach(&sock);
982 }
983
984 static void
udp_close_cb(uv_handle_t * handle)985 udp_close_cb(uv_handle_t *handle) {
986 isc_nmsocket_t *sock = uv_handle_get_data(handle);
987 uv_handle_set_data(handle, NULL);
988
989 REQUIRE(VALID_NMSOCK(sock));
990 REQUIRE(sock->tid == isc_nm_tid());
991 REQUIRE(atomic_load(&sock->closing));
992
993 if (!atomic_compare_exchange_strong(&sock->closed, &(bool){ false },
994 true)) {
995 INSIST(0);
996 ISC_UNREACHABLE();
997 }
998
999 isc__nm_incstats(sock->mgr, sock->statsindex[STATID_CLOSE]);
1000
1001 if (sock->server != NULL) {
1002 isc__nmsocket_detach(&sock->server);
1003 }
1004
1005 atomic_store(&sock->connected, false);
1006 atomic_store(&sock->listening, false);
1007
1008 isc__nmsocket_prep_destroy(sock);
1009 }
1010
1011 static void
read_timer_close_cb(uv_handle_t * handle)1012 read_timer_close_cb(uv_handle_t *handle) {
1013 isc_nmsocket_t *sock = uv_handle_get_data(handle);
1014 uv_handle_set_data(handle, NULL);
1015
1016 if (sock->parent) {
1017 uv_close(&sock->uv_handle.handle, udp_stop_cb);
1018 } else {
1019 uv_close(&sock->uv_handle.handle, udp_close_cb);
1020 }
1021 }
1022
1023 static void
write_timer_close_cb(uv_handle_t * timer)1024 write_timer_close_cb(uv_handle_t *timer) {
1025 isc_nmsocket_t *sock = uv_handle_get_data(timer);
1026 uv_handle_set_data(timer, NULL);
1027
1028 REQUIRE(VALID_NMSOCK(sock));
1029
1030 uv_handle_set_data((uv_handle_t *)&sock->read_timer, sock);
1031 uv_close((uv_handle_t *)&sock->read_timer, read_timer_close_cb);
1032 }
1033
1034 static void
stop_udp_child(isc_nmsocket_t * sock)1035 stop_udp_child(isc_nmsocket_t *sock) {
1036 REQUIRE(sock->type == isc_nm_udpsocket);
1037 REQUIRE(sock->tid == isc_nm_tid());
1038
1039 if (!atomic_compare_exchange_strong(&sock->closing, &(bool){ false },
1040 true)) {
1041 return;
1042 }
1043
1044 udp_close_direct(sock);
1045
1046 atomic_fetch_sub(&sock->parent->rchildren, 1);
1047
1048 isc_barrier_wait(&sock->parent->stoplistening);
1049 }
1050
1051 static void
stop_udp_parent(isc_nmsocket_t * sock)1052 stop_udp_parent(isc_nmsocket_t *sock) {
1053 isc_nmsocket_t *csock = NULL;
1054
1055 REQUIRE(VALID_NMSOCK(sock));
1056 REQUIRE(sock->tid == isc_nm_tid());
1057 REQUIRE(sock->type == isc_nm_udplistener);
1058
1059 isc_barrier_init(&sock->stoplistening, sock->nchildren);
1060
1061 for (size_t i = 0; i < sock->nchildren; i++) {
1062 csock = &sock->children[i];
1063 REQUIRE(VALID_NMSOCK(csock));
1064
1065 if ((int)i == isc_nm_tid()) {
1066 /*
1067 * We need to schedule closing the other sockets first
1068 */
1069 continue;
1070 }
1071
1072 atomic_store(&csock->active, false);
1073 enqueue_stoplistening(csock);
1074 }
1075
1076 csock = &sock->children[isc_nm_tid()];
1077 atomic_store(&csock->active, false);
1078 stop_udp_child(csock);
1079
1080 atomic_store(&sock->closed, true);
1081 isc__nmsocket_prep_destroy(sock);
1082 }
1083
1084 static void
udp_close_direct(isc_nmsocket_t * sock)1085 udp_close_direct(isc_nmsocket_t *sock) {
1086 int r;
1087 REQUIRE(VALID_NMSOCK(sock));
1088 REQUIRE(sock->tid == isc_nm_tid());
1089
1090 r = uv_timer_stop(&sock->write_timer);
1091 UV_RUNTIME_CHECK(uv_timer_stop, r);
1092 uv_handle_set_data((uv_handle_t *)&sock->write_timer, sock);
1093 uv_close((uv_handle_t *)&sock->write_timer, write_timer_close_cb);
1094 }
1095
1096 void
isc__nm_async_udpclose(isc__networker_t * worker,isc__netievent_t * ev0)1097 isc__nm_async_udpclose(isc__networker_t *worker, isc__netievent_t *ev0) {
1098 isc__netievent_udpclose_t *ievent = (isc__netievent_udpclose_t *)ev0;
1099 isc_nmsocket_t *sock = ievent->sock;
1100
1101 REQUIRE(VALID_NMSOCK(sock));
1102 REQUIRE(sock->tid == isc_nm_tid());
1103 UNUSED(worker);
1104
1105 udp_close_direct(sock);
1106 }
1107
1108 void
isc__nm_udp_close(isc_nmsocket_t * sock)1109 isc__nm_udp_close(isc_nmsocket_t *sock) {
1110 REQUIRE(VALID_NMSOCK(sock));
1111 REQUIRE(sock->type == isc_nm_udpsocket);
1112 REQUIRE(!isc__nmsocket_active(sock));
1113
1114 if (!atomic_compare_exchange_strong(&sock->closing, &(bool){ false },
1115 true)) {
1116 return;
1117 }
1118
1119 if (sock->tid == isc_nm_tid()) {
1120 udp_close_direct(sock);
1121 } else {
1122 isc__netievent_udpclose_t *ievent =
1123 isc__nm_get_netievent_udpclose(sock->mgr, sock);
1124 isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid],
1125 (isc__netievent_t *)ievent);
1126 }
1127 }
1128
1129 void
isc__nm_udp_shutdown(isc_nmsocket_t * sock)1130 isc__nm_udp_shutdown(isc_nmsocket_t *sock) {
1131 REQUIRE(VALID_NMSOCK(sock));
1132 REQUIRE(sock->tid == isc_nm_tid());
1133 REQUIRE(sock->type == isc_nm_udpsocket);
1134
1135 /*
1136 * If the socket is active, mark it inactive and
1137 * continue. If it isn't active, stop now.
1138 */
1139 if (!isc__nmsocket_deactivate(sock)) {
1140 return;
1141 }
1142
1143 /*
1144 * If the socket is connecting, the cancel will happen in the
1145 * async_udpconnect() due socket being inactive now.
1146 */
1147 if (atomic_load(&sock->connecting)) {
1148 return;
1149 }
1150
1151 /*
1152 * When the client detaches the last handle, the
1153 * sock->statichandle would be NULL, in that case, nobody is
1154 * interested in the callback.
1155 */
1156 if (sock->statichandle != NULL) {
1157 isc__nm_failed_read_cb(sock, ISC_R_CANCELED, false);
1158 return;
1159 }
1160
1161 /*
1162 * Otherwise, we just send the socket to abyss...
1163 */
1164 if (sock->parent == NULL) {
1165 isc__nmsocket_prep_destroy(sock);
1166 }
1167 }
1168
1169 void
isc__nm_udp_cancelread(isc_nmhandle_t * handle)1170 isc__nm_udp_cancelread(isc_nmhandle_t *handle) {
1171 isc_nmsocket_t *sock = NULL;
1172 isc__netievent_udpcancel_t *ievent = NULL;
1173
1174 REQUIRE(VALID_NMHANDLE(handle));
1175
1176 sock = handle->sock;
1177
1178 REQUIRE(VALID_NMSOCK(sock));
1179 REQUIRE(sock->type == isc_nm_udpsocket);
1180
1181 ievent = isc__nm_get_netievent_udpcancel(sock->mgr, sock, handle);
1182
1183 isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid],
1184 (isc__netievent_t *)ievent);
1185 }
1186
1187 void
isc__nm_async_udpcancel(isc__networker_t * worker,isc__netievent_t * ev0)1188 isc__nm_async_udpcancel(isc__networker_t *worker, isc__netievent_t *ev0) {
1189 isc__netievent_udpcancel_t *ievent = (isc__netievent_udpcancel_t *)ev0;
1190 isc_nmsocket_t *sock = NULL;
1191
1192 UNUSED(worker);
1193
1194 REQUIRE(VALID_NMSOCK(ievent->sock));
1195
1196 sock = ievent->sock;
1197
1198 REQUIRE(sock->tid == isc_nm_tid());
1199 REQUIRE(atomic_load(&sock->client));
1200
1201 isc__nm_failed_read_cb(sock, ISC_R_EOF, false);
1202 }
1203