1 /*
2 * Copyright (C) Internet Systems Consortium, Inc. ("ISC")
3 *
4 * This Source Code Form is subject to the terms of the Mozilla Public
5 * License, v. 2.0. If a copy of the MPL was not distributed with this
6 * file, you can obtain one at https://mozilla.org/MPL/2.0/.
7 *
8 * See the COPYRIGHT file distributed with this work for additional
9 * information regarding copyright ownership.
10 */
11
12 #include <inttypes.h>
13 #include <unistd.h>
14 #include <uv.h>
15
16 #include <isc/atomic.h>
17 #include <isc/backtrace.h>
18 #include <isc/barrier.h>
19 #include <isc/buffer.h>
20 #include <isc/condition.h>
21 #include <isc/errno.h>
22 #include <isc/log.h>
23 #include <isc/magic.h>
24 #include <isc/mem.h>
25 #include <isc/netmgr.h>
26 #include <isc/print.h>
27 #include <isc/quota.h>
28 #include <isc/random.h>
29 #include <isc/refcount.h>
30 #include <isc/region.h>
31 #include <isc/result.h>
32 #include <isc/sockaddr.h>
33 #include <isc/stats.h>
34 #include <isc/strerr.h>
35 #include <isc/task.h>
36 #include <isc/thread.h>
37 #include <isc/tls.h>
38 #include <isc/util.h>
39
40 #include "netmgr-int.h"
41 #include "netmgr_p.h"
42 #include "openssl_shim.h"
43 #include "trampoline_p.h"
44 #include "uv-compat.h"
45
46 /*%
47 * How many isc_nmhandles and isc_nm_uvreqs will we be
48 * caching for reuse in a socket.
49 */
50 #define ISC_NM_HANDLES_STACK_SIZE 600
51 #define ISC_NM_REQS_STACK_SIZE 600
52
53 /*%
54 * Shortcut index arrays to get access to statistics counters.
55 */
56
57 static const isc_statscounter_t udp4statsindex[] = {
58 isc_sockstatscounter_udp4open,
59 isc_sockstatscounter_udp4openfail,
60 isc_sockstatscounter_udp4close,
61 isc_sockstatscounter_udp4bindfail,
62 isc_sockstatscounter_udp4connectfail,
63 isc_sockstatscounter_udp4connect,
64 -1,
65 -1,
66 isc_sockstatscounter_udp4sendfail,
67 isc_sockstatscounter_udp4recvfail,
68 isc_sockstatscounter_udp4active
69 };
70
71 static const isc_statscounter_t udp6statsindex[] = {
72 isc_sockstatscounter_udp6open,
73 isc_sockstatscounter_udp6openfail,
74 isc_sockstatscounter_udp6close,
75 isc_sockstatscounter_udp6bindfail,
76 isc_sockstatscounter_udp6connectfail,
77 isc_sockstatscounter_udp6connect,
78 -1,
79 -1,
80 isc_sockstatscounter_udp6sendfail,
81 isc_sockstatscounter_udp6recvfail,
82 isc_sockstatscounter_udp6active
83 };
84
85 static const isc_statscounter_t tcp4statsindex[] = {
86 isc_sockstatscounter_tcp4open, isc_sockstatscounter_tcp4openfail,
87 isc_sockstatscounter_tcp4close, isc_sockstatscounter_tcp4bindfail,
88 isc_sockstatscounter_tcp4connectfail, isc_sockstatscounter_tcp4connect,
89 isc_sockstatscounter_tcp4acceptfail, isc_sockstatscounter_tcp4accept,
90 isc_sockstatscounter_tcp4sendfail, isc_sockstatscounter_tcp4recvfail,
91 isc_sockstatscounter_tcp4active
92 };
93
94 static const isc_statscounter_t tcp6statsindex[] = {
95 isc_sockstatscounter_tcp6open, isc_sockstatscounter_tcp6openfail,
96 isc_sockstatscounter_tcp6close, isc_sockstatscounter_tcp6bindfail,
97 isc_sockstatscounter_tcp6connectfail, isc_sockstatscounter_tcp6connect,
98 isc_sockstatscounter_tcp6acceptfail, isc_sockstatscounter_tcp6accept,
99 isc_sockstatscounter_tcp6sendfail, isc_sockstatscounter_tcp6recvfail,
100 isc_sockstatscounter_tcp6active
101 };
102
103 #if 0
104 /* XXX: not currently used */
105 static const isc_statscounter_t unixstatsindex[] = {
106 isc_sockstatscounter_unixopen,
107 isc_sockstatscounter_unixopenfail,
108 isc_sockstatscounter_unixclose,
109 isc_sockstatscounter_unixbindfail,
110 isc_sockstatscounter_unixconnectfail,
111 isc_sockstatscounter_unixconnect,
112 isc_sockstatscounter_unixacceptfail,
113 isc_sockstatscounter_unixaccept,
114 isc_sockstatscounter_unixsendfail,
115 isc_sockstatscounter_unixrecvfail,
116 isc_sockstatscounter_unixactive
117 };
118 #endif /* if 0 */
119
120 /*
121 * libuv is not thread safe, but has mechanisms to pass messages
122 * between threads. Each socket is owned by a thread. For UDP
123 * sockets we have a set of sockets for each interface and we can
124 * choose a sibling and send the message directly. For TCP, or if
125 * we're calling from a non-networking thread, we need to pass the
126 * request using async_cb.
127 */
128
129 static thread_local int isc__nm_tid_v = ISC_NETMGR_TID_UNKNOWN;
130
131 static void
132 nmsocket_maybe_destroy(isc_nmsocket_t *sock FLARG);
133 static void
134 nmhandle_free(isc_nmsocket_t *sock, isc_nmhandle_t *handle);
135 static isc_threadresult_t
136 nm_thread(isc_threadarg_t worker0);
137 static void
138 async_cb(uv_async_t *handle);
139 static bool
140 process_netievent(isc__networker_t *worker, isc__netievent_t *ievent);
141 static isc_result_t
142 process_queue(isc__networker_t *worker, netievent_type_t type);
143 static void
144 wait_for_priority_queue(isc__networker_t *worker);
145 static void
146 drain_queue(isc__networker_t *worker, netievent_type_t type);
147
148 #define ENQUEUE_NETIEVENT(worker, queue, event) \
149 isc_queue_enqueue(worker->ievents[queue], (uintptr_t)event)
150 #define DEQUEUE_NETIEVENT(worker, queue) \
151 (isc__netievent_t *)isc_queue_dequeue(worker->ievents[queue])
152
153 #define ENQUEUE_PRIORITY_NETIEVENT(worker, event) \
154 ENQUEUE_NETIEVENT(worker, NETIEVENT_PRIORITY, event)
155 #define ENQUEUE_PRIVILEGED_NETIEVENT(worker, event) \
156 ENQUEUE_NETIEVENT(worker, NETIEVENT_PRIVILEGED, event)
157 #define ENQUEUE_TASK_NETIEVENT(worker, event) \
158 ENQUEUE_NETIEVENT(worker, NETIEVENT_TASK, event)
159 #define ENQUEUE_NORMAL_NETIEVENT(worker, event) \
160 ENQUEUE_NETIEVENT(worker, NETIEVENT_NORMAL, event)
161
162 #define DEQUEUE_PRIORITY_NETIEVENT(worker) \
163 DEQUEUE_NETIEVENT(worker, NETIEVENT_PRIORITY)
164 #define DEQUEUE_PRIVILEGED_NETIEVENT(worker) \
165 DEQUEUE_NETIEVENT(worker, NETIEVENT_PRIVILEGED)
166 #define DEQUEUE_TASK_NETIEVENT(worker) DEQUEUE_NETIEVENT(worker, NETIEVENT_TASK)
167 #define DEQUEUE_NORMAL_NETIEVENT(worker) \
168 DEQUEUE_NETIEVENT(worker, NETIEVENT_NORMAL)
169
170 #define INCREMENT_NETIEVENT(worker, queue) \
171 atomic_fetch_add_release(&worker->nievents[queue], 1)
172 #define DECREMENT_NETIEVENT(worker, queue) \
173 atomic_fetch_sub_release(&worker->nievents[queue], 1)
174
175 #define INCREMENT_PRIORITY_NETIEVENT(worker) \
176 INCREMENT_NETIEVENT(worker, NETIEVENT_PRIORITY)
177 #define INCREMENT_PRIVILEGED_NETIEVENT(worker) \
178 INCREMENT_NETIEVENT(worker, NETIEVENT_PRIVILEGED)
179 #define INCREMENT_TASK_NETIEVENT(worker) \
180 INCREMENT_NETIEVENT(worker, NETIEVENT_TASK)
181 #define INCREMENT_NORMAL_NETIEVENT(worker) \
182 INCREMENT_NETIEVENT(worker, NETIEVENT_NORMAL)
183
184 #define DECREMENT_PRIORITY_NETIEVENT(worker) \
185 DECREMENT_NETIEVENT(worker, NETIEVENT_PRIORITY)
186 #define DECREMENT_PRIVILEGED_NETIEVENT(worker) \
187 DECREMENT_NETIEVENT(worker, NETIEVENT_PRIVILEGED)
188 #define DECREMENT_TASK_NETIEVENT(worker) \
189 DECREMENT_NETIEVENT(worker, NETIEVENT_TASK)
190 #define DECREMENT_NORMAL_NETIEVENT(worker) \
191 DECREMENT_NETIEVENT(worker, NETIEVENT_NORMAL)
192
193 static void
194 isc__nm_async_stop(isc__networker_t *worker, isc__netievent_t *ev0);
195 static void
196 isc__nm_async_pause(isc__networker_t *worker, isc__netievent_t *ev0);
197 static void
198 isc__nm_async_resume(isc__networker_t *worker, isc__netievent_t *ev0);
199 static void
200 isc__nm_async_detach(isc__networker_t *worker, isc__netievent_t *ev0);
201 static void
202 isc__nm_async_close(isc__networker_t *worker, isc__netievent_t *ev0);
203
204 static void
205 isc__nm_threadpool_initialize(uint32_t workers);
206 static void
207 isc__nm_work_cb(uv_work_t *req);
208 static void
209 isc__nm_after_work_cb(uv_work_t *req, int status);
210
211 /*%<
212 * Issue a 'handle closed' callback on the socket.
213 */
214
215 static void
216 nmhandle_detach_cb(isc_nmhandle_t **handlep FLARG);
217
218 int
isc_nm_tid(void)219 isc_nm_tid(void) {
220 return (isc__nm_tid_v);
221 }
222
223 bool
isc__nm_in_netthread(void)224 isc__nm_in_netthread(void) {
225 return (isc__nm_tid_v >= 0);
226 }
227
228 void
isc__nm_force_tid(int tid)229 isc__nm_force_tid(int tid) {
230 isc__nm_tid_v = tid;
231 }
232
233 static void
isc__nm_threadpool_initialize(uint32_t workers)234 isc__nm_threadpool_initialize(uint32_t workers) {
235 char buf[11];
236 int r = uv_os_getenv("UV_THREADPOOL_SIZE", buf,
237 &(size_t){ sizeof(buf) });
238 if (r == UV_ENOENT) {
239 snprintf(buf, sizeof(buf), "%" PRIu32, workers);
240 uv_os_setenv("UV_THREADPOOL_SIZE", buf);
241 }
242 }
243
244 void
isc__netmgr_create(isc_mem_t * mctx,uint32_t workers,isc_nm_t ** netmgrp)245 isc__netmgr_create(isc_mem_t *mctx, uint32_t workers, isc_nm_t **netmgrp) {
246 isc_nm_t *mgr = NULL;
247 char name[32];
248
249 REQUIRE(workers > 0);
250
251 isc__nm_threadpool_initialize(workers);
252
253 mgr = isc_mem_get(mctx, sizeof(*mgr));
254 *mgr = (isc_nm_t){ .nworkers = workers };
255
256 isc_mem_attach(mctx, &mgr->mctx);
257 isc_mutex_init(&mgr->lock);
258 isc_condition_init(&mgr->wkstatecond);
259 isc_condition_init(&mgr->wkpausecond);
260 isc_refcount_init(&mgr->references, 1);
261 atomic_init(&mgr->maxudp, 0);
262 atomic_init(&mgr->interlocked, ISC_NETMGR_NON_INTERLOCKED);
263 atomic_init(&mgr->workers_paused, 0);
264 atomic_init(&mgr->paused, false);
265 atomic_init(&mgr->closing, false);
266 atomic_init(&mgr->recv_tcp_buffer_size, 0);
267 atomic_init(&mgr->send_tcp_buffer_size, 0);
268 atomic_init(&mgr->recv_udp_buffer_size, 0);
269 atomic_init(&mgr->send_udp_buffer_size, 0);
270
271 #ifdef NETMGR_TRACE
272 ISC_LIST_INIT(mgr->active_sockets);
273 #endif
274
275 /*
276 * Default TCP timeout values.
277 * May be updated by isc_nm_tcptimeouts().
278 */
279 atomic_init(&mgr->init, 30000);
280 atomic_init(&mgr->idle, 30000);
281 atomic_init(&mgr->keepalive, 30000);
282 atomic_init(&mgr->advertised, 30000);
283
284 isc_barrier_init(&mgr->pausing, workers);
285 isc_barrier_init(&mgr->resuming, workers);
286
287 mgr->workers = isc_mem_get(mctx, workers * sizeof(isc__networker_t));
288 for (size_t i = 0; i < workers; i++) {
289 int r;
290 isc__networker_t *worker = &mgr->workers[i];
291 *worker = (isc__networker_t){
292 .mgr = mgr,
293 .id = i,
294 };
295
296 r = uv_loop_init(&worker->loop);
297 RUNTIME_CHECK(r == 0);
298
299 worker->loop.data = &mgr->workers[i];
300
301 r = uv_async_init(&worker->loop, &worker->async, async_cb);
302 RUNTIME_CHECK(r == 0);
303
304 isc_mutex_init(&worker->lock);
305 isc_condition_init(&worker->cond_prio);
306
307 for (size_t type = 0; type < NETIEVENT_MAX; type++) {
308 worker->ievents[type] = isc_queue_new(mgr->mctx, 128);
309 atomic_init(&worker->nievents[type], 0);
310 }
311
312 worker->recvbuf = isc_mem_get(mctx, ISC_NETMGR_RECVBUF_SIZE);
313 worker->sendbuf = isc_mem_get(mctx, ISC_NETMGR_SENDBUF_SIZE);
314
315 /*
316 * We need to do this here and not in nm_thread to avoid a
317 * race - we could exit isc_nm_start, launch nm_destroy,
318 * and nm_thread would still not be up.
319 */
320 mgr->workers_running++;
321 isc_thread_create(nm_thread, &mgr->workers[i], &worker->thread);
322
323 snprintf(name, sizeof(name), "isc-net-%04zu", i);
324 isc_thread_setname(worker->thread, name);
325 }
326
327 mgr->magic = NM_MAGIC;
328 *netmgrp = mgr;
329 }
330
331 /*
332 * Free the resources of the network manager.
333 */
334 static void
nm_destroy(isc_nm_t ** mgr0)335 nm_destroy(isc_nm_t **mgr0) {
336 REQUIRE(VALID_NM(*mgr0));
337 REQUIRE(!isc__nm_in_netthread());
338
339 isc_nm_t *mgr = *mgr0;
340 *mgr0 = NULL;
341
342 isc_refcount_destroy(&mgr->references);
343
344 mgr->magic = 0;
345
346 for (int i = 0; i < mgr->nworkers; i++) {
347 isc__networker_t *worker = &mgr->workers[i];
348 isc__netievent_t *event = isc__nm_get_netievent_stop(mgr);
349 isc__nm_enqueue_ievent(worker, event);
350 }
351
352 LOCK(&mgr->lock);
353 while (mgr->workers_running > 0) {
354 WAIT(&mgr->wkstatecond, &mgr->lock);
355 }
356 UNLOCK(&mgr->lock);
357
358 for (int i = 0; i < mgr->nworkers; i++) {
359 isc__networker_t *worker = &mgr->workers[i];
360 isc__netievent_t *ievent = NULL;
361 int r;
362
363 /* Empty the async event queues */
364 while ((ievent = DEQUEUE_PRIORITY_NETIEVENT(worker)) != NULL) {
365 isc_mem_put(mgr->mctx, ievent, sizeof(*ievent));
366 }
367
368 INSIST(DEQUEUE_PRIVILEGED_NETIEVENT(worker) == NULL);
369 INSIST(DEQUEUE_TASK_NETIEVENT(worker) == NULL);
370
371 while ((ievent = DEQUEUE_PRIORITY_NETIEVENT(worker)) != NULL) {
372 isc_mem_put(mgr->mctx, ievent, sizeof(*ievent));
373 }
374 isc_condition_destroy(&worker->cond_prio);
375
376 r = uv_loop_close(&worker->loop);
377 INSIST(r == 0);
378
379 for (size_t type = 0; type < NETIEVENT_MAX; type++) {
380 isc_queue_destroy(worker->ievents[type]);
381 }
382
383 isc_mem_put(mgr->mctx, worker->sendbuf,
384 ISC_NETMGR_SENDBUF_SIZE);
385 isc_mem_put(mgr->mctx, worker->recvbuf,
386 ISC_NETMGR_RECVBUF_SIZE);
387 isc_thread_join(worker->thread, NULL);
388 }
389
390 if (mgr->stats != NULL) {
391 isc_stats_detach(&mgr->stats);
392 }
393
394 isc_barrier_destroy(&mgr->resuming);
395 isc_barrier_destroy(&mgr->pausing);
396
397 isc_condition_destroy(&mgr->wkstatecond);
398 isc_condition_destroy(&mgr->wkpausecond);
399 isc_mutex_destroy(&mgr->lock);
400
401 isc_mem_put(mgr->mctx, mgr->workers,
402 mgr->nworkers * sizeof(isc__networker_t));
403 isc_mem_putanddetach(&mgr->mctx, mgr, sizeof(*mgr));
404 }
405
406 static void
enqueue_pause(isc__networker_t * worker)407 enqueue_pause(isc__networker_t *worker) {
408 isc__netievent_pause_t *event =
409 isc__nm_get_netievent_pause(worker->mgr);
410 isc__nm_enqueue_ievent(worker, (isc__netievent_t *)event);
411 }
412
413 static void
isc__nm_async_pause(isc__networker_t * worker,isc__netievent_t * ev0)414 isc__nm_async_pause(isc__networker_t *worker, isc__netievent_t *ev0) {
415 UNUSED(ev0);
416 REQUIRE(worker->paused == false);
417
418 worker->paused = true;
419 uv_stop(&worker->loop);
420 }
421
422 void
isc_nm_pause(isc_nm_t * mgr)423 isc_nm_pause(isc_nm_t *mgr) {
424 REQUIRE(VALID_NM(mgr));
425 REQUIRE(!atomic_load(&mgr->paused));
426
427 isc__nm_acquire_interlocked_force(mgr);
428
429 if (isc__nm_in_netthread()) {
430 REQUIRE(isc_nm_tid() == 0);
431 }
432
433 for (int i = 0; i < mgr->nworkers; i++) {
434 isc__networker_t *worker = &mgr->workers[i];
435 if (i == isc_nm_tid()) {
436 isc__nm_async_pause(worker, NULL);
437 } else {
438 enqueue_pause(worker);
439 }
440 }
441
442 if (isc__nm_in_netthread()) {
443 atomic_fetch_add(&mgr->workers_paused, 1);
444 isc_barrier_wait(&mgr->pausing);
445 }
446
447 LOCK(&mgr->lock);
448 while (atomic_load(&mgr->workers_paused) != mgr->workers_running) {
449 WAIT(&mgr->wkstatecond, &mgr->lock);
450 }
451 UNLOCK(&mgr->lock);
452
453 REQUIRE(atomic_compare_exchange_strong(&mgr->paused, &(bool){ false },
454 true));
455 }
456
457 static void
enqueue_resume(isc__networker_t * worker)458 enqueue_resume(isc__networker_t *worker) {
459 isc__netievent_resume_t *event =
460 isc__nm_get_netievent_resume(worker->mgr);
461 isc__nm_enqueue_ievent(worker, (isc__netievent_t *)event);
462 }
463
464 static void
isc__nm_async_resume(isc__networker_t * worker,isc__netievent_t * ev0)465 isc__nm_async_resume(isc__networker_t *worker, isc__netievent_t *ev0) {
466 UNUSED(ev0);
467 REQUIRE(worker->paused == true);
468
469 worker->paused = false;
470 }
471
472 void
isc_nm_resume(isc_nm_t * mgr)473 isc_nm_resume(isc_nm_t *mgr) {
474 REQUIRE(VALID_NM(mgr));
475 REQUIRE(atomic_load(&mgr->paused));
476
477 if (isc__nm_in_netthread()) {
478 REQUIRE(isc_nm_tid() == 0);
479 drain_queue(&mgr->workers[isc_nm_tid()], NETIEVENT_PRIORITY);
480 }
481
482 for (int i = 0; i < mgr->nworkers; i++) {
483 isc__networker_t *worker = &mgr->workers[i];
484 if (i == isc_nm_tid()) {
485 isc__nm_async_resume(worker, NULL);
486 } else {
487 enqueue_resume(worker);
488 }
489 }
490
491 if (isc__nm_in_netthread()) {
492 drain_queue(&mgr->workers[isc_nm_tid()], NETIEVENT_PRIVILEGED);
493
494 atomic_fetch_sub(&mgr->workers_paused, 1);
495 isc_barrier_wait(&mgr->resuming);
496 }
497
498 LOCK(&mgr->lock);
499 while (atomic_load(&mgr->workers_paused) != 0) {
500 WAIT(&mgr->wkstatecond, &mgr->lock);
501 }
502 UNLOCK(&mgr->lock);
503
504 REQUIRE(atomic_compare_exchange_strong(&mgr->paused, &(bool){ true },
505 false));
506
507 isc__nm_drop_interlocked(mgr);
508 }
509
510 void
isc_nm_attach(isc_nm_t * mgr,isc_nm_t ** dst)511 isc_nm_attach(isc_nm_t *mgr, isc_nm_t **dst) {
512 REQUIRE(VALID_NM(mgr));
513 REQUIRE(dst != NULL && *dst == NULL);
514
515 isc_refcount_increment(&mgr->references);
516
517 *dst = mgr;
518 }
519
520 void
isc_nm_detach(isc_nm_t ** mgr0)521 isc_nm_detach(isc_nm_t **mgr0) {
522 isc_nm_t *mgr = NULL;
523
524 REQUIRE(mgr0 != NULL);
525 REQUIRE(VALID_NM(*mgr0));
526
527 mgr = *mgr0;
528 *mgr0 = NULL;
529
530 if (isc_refcount_decrement(&mgr->references) == 1) {
531 nm_destroy(&mgr);
532 }
533 }
534
535 void
isc__netmgr_shutdown(isc_nm_t * mgr)536 isc__netmgr_shutdown(isc_nm_t *mgr) {
537 REQUIRE(VALID_NM(mgr));
538
539 atomic_store(&mgr->closing, true);
540 for (int i = 0; i < mgr->nworkers; i++) {
541 isc__netievent_t *event = NULL;
542 event = isc__nm_get_netievent_shutdown(mgr);
543 isc__nm_enqueue_ievent(&mgr->workers[i], event);
544 }
545 }
546
547 void
isc__netmgr_destroy(isc_nm_t ** netmgrp)548 isc__netmgr_destroy(isc_nm_t **netmgrp) {
549 isc_nm_t *mgr = NULL;
550 int counter = 0;
551
552 REQUIRE(VALID_NM(*netmgrp));
553
554 mgr = *netmgrp;
555
556 /*
557 * Close active connections.
558 */
559 isc__netmgr_shutdown(mgr);
560
561 /*
562 * Wait for the manager to be dereferenced elsewhere.
563 */
564 while (isc_refcount_current(&mgr->references) > 1 && counter++ < 1000) {
565 uv_sleep(10);
566 }
567
568 #ifdef NETMGR_TRACE
569 if (isc_refcount_current(&mgr->references) > 1) {
570 isc__nm_dump_active(mgr);
571 INSIST(0);
572 ISC_UNREACHABLE();
573 }
574 #endif
575
576 /*
577 * Now just patiently wait
578 */
579 while (isc_refcount_current(&mgr->references) > 1) {
580 uv_sleep(10);
581 }
582
583 /*
584 * Detach final reference.
585 */
586 isc_nm_detach(netmgrp);
587 }
588
589 void
isc_nm_maxudp(isc_nm_t * mgr,uint32_t maxudp)590 isc_nm_maxudp(isc_nm_t *mgr, uint32_t maxudp) {
591 REQUIRE(VALID_NM(mgr));
592
593 atomic_store(&mgr->maxudp, maxudp);
594 }
595
596 void
isc_nm_settimeouts(isc_nm_t * mgr,uint32_t init,uint32_t idle,uint32_t keepalive,uint32_t advertised)597 isc_nm_settimeouts(isc_nm_t *mgr, uint32_t init, uint32_t idle,
598 uint32_t keepalive, uint32_t advertised) {
599 REQUIRE(VALID_NM(mgr));
600
601 atomic_store(&mgr->init, init);
602 atomic_store(&mgr->idle, idle);
603 atomic_store(&mgr->keepalive, keepalive);
604 atomic_store(&mgr->advertised, advertised);
605 }
606
607 void
isc_nm_setnetbuffers(isc_nm_t * mgr,int32_t recv_tcp,int32_t send_tcp,int32_t recv_udp,int32_t send_udp)608 isc_nm_setnetbuffers(isc_nm_t *mgr, int32_t recv_tcp, int32_t send_tcp,
609 int32_t recv_udp, int32_t send_udp) {
610 REQUIRE(VALID_NM(mgr));
611
612 atomic_store(&mgr->recv_tcp_buffer_size, recv_tcp);
613 atomic_store(&mgr->send_tcp_buffer_size, send_tcp);
614 atomic_store(&mgr->recv_udp_buffer_size, recv_udp);
615 atomic_store(&mgr->send_udp_buffer_size, send_udp);
616 }
617
618 void
isc_nm_gettimeouts(isc_nm_t * mgr,uint32_t * initial,uint32_t * idle,uint32_t * keepalive,uint32_t * advertised)619 isc_nm_gettimeouts(isc_nm_t *mgr, uint32_t *initial, uint32_t *idle,
620 uint32_t *keepalive, uint32_t *advertised) {
621 REQUIRE(VALID_NM(mgr));
622
623 if (initial != NULL) {
624 *initial = atomic_load(&mgr->init);
625 }
626
627 if (idle != NULL) {
628 *idle = atomic_load(&mgr->idle);
629 }
630
631 if (keepalive != NULL) {
632 *keepalive = atomic_load(&mgr->keepalive);
633 }
634
635 if (advertised != NULL) {
636 *advertised = atomic_load(&mgr->advertised);
637 }
638 }
639
640 /*
641 * nm_thread is a single worker thread, that runs uv_run event loop
642 * until asked to stop.
643 *
644 * There are four queues for asynchronous events:
645 *
646 * 1. priority queue - netievents on the priority queue are run even when
647 * the taskmgr enters exclusive mode and the netmgr is paused. This
648 * is needed to properly start listening on the interfaces, free
649 * resources on shutdown, or resume from a pause.
650 *
651 * 2. privileged task queue - only privileged tasks are queued here and
652 * this is the first queue that gets processed when network manager
653 * is unpaused using isc_nm_resume(). All netmgr workers need to
654 * clean the privileged task queue before they all proceed to normal
655 * operation. Both task queues are processed when the workers are
656 * shutting down.
657 *
658 * 3. task queue - only (traditional) tasks are scheduled here, and this
659 * queue and the privileged task queue are both processed when the
660 * netmgr workers are finishing. This is needed to process the task
661 * shutdown events.
662 *
663 * 4. normal queue - this is the queue with netmgr events, e.g. reading,
664 * sending, callbacks, etc.
665 */
666
667 static isc_threadresult_t
nm_thread(isc_threadarg_t worker0)668 nm_thread(isc_threadarg_t worker0) {
669 isc__networker_t *worker = (isc__networker_t *)worker0;
670 isc_nm_t *mgr = worker->mgr;
671
672 isc__nm_tid_v = worker->id;
673
674 while (true) {
675 /*
676 * uv_run() runs async_cb() in a loop, which processes
677 * all four event queues until a "pause" or "stop" event
678 * is encountered. On pause, we process only priority and
679 * privileged events until resuming.
680 */
681 int r = uv_run(&worker->loop, UV_RUN_DEFAULT);
682 INSIST(r > 0 || worker->finished);
683
684 if (worker->paused) {
685 INSIST(atomic_load(&mgr->interlocked) != isc_nm_tid());
686
687 atomic_fetch_add(&mgr->workers_paused, 1);
688 if (isc_barrier_wait(&mgr->pausing) != 0) {
689 LOCK(&mgr->lock);
690 SIGNAL(&mgr->wkstatecond);
691 UNLOCK(&mgr->lock);
692 }
693
694 while (worker->paused) {
695 wait_for_priority_queue(worker);
696 }
697
698 /*
699 * All workers must drain the privileged event
700 * queue before we resume from pause.
701 */
702 drain_queue(worker, NETIEVENT_PRIVILEGED);
703
704 atomic_fetch_sub(&mgr->workers_paused, 1);
705 if (isc_barrier_wait(&mgr->resuming) != 0) {
706 LOCK(&mgr->lock);
707 SIGNAL(&mgr->wkstatecond);
708 UNLOCK(&mgr->lock);
709 }
710 }
711
712 if (r == 0) {
713 INSIST(worker->finished);
714 break;
715 }
716
717 INSIST(!worker->finished);
718 }
719
720 /*
721 * We are shutting down. Process the task queues
722 * (they may include shutdown events) but do not process
723 * the netmgr event queue.
724 */
725 drain_queue(worker, NETIEVENT_PRIVILEGED);
726 drain_queue(worker, NETIEVENT_TASK);
727
728 LOCK(&mgr->lock);
729 mgr->workers_running--;
730 SIGNAL(&mgr->wkstatecond);
731 UNLOCK(&mgr->lock);
732
733 return ((isc_threadresult_t)0);
734 }
735
736 static bool
process_all_queues(isc__networker_t * worker)737 process_all_queues(isc__networker_t *worker) {
738 bool reschedule = false;
739 /*
740 * The queue processing functions will return false when the
741 * system is pausing or stopping and we don't want to process
742 * the other queues in such case, but we need the async event
743 * to be rescheduled in the next uv_run().
744 */
745 for (size_t type = 0; type < NETIEVENT_MAX; type++) {
746 isc_result_t result = process_queue(worker, type);
747 switch (result) {
748 case ISC_R_SUSPEND:
749 return (true);
750 case ISC_R_EMPTY:
751 /* empty queue */
752 break;
753 case ISC_R_SUCCESS:
754 reschedule = true;
755 break;
756 default:
757 INSIST(0);
758 ISC_UNREACHABLE();
759 }
760 }
761
762 return (reschedule);
763 }
764
765 /*
766 * async_cb() is a universal callback for 'async' events sent to event loop.
767 * It's the only way to safely pass data to the libuv event loop. We use a
768 * single async event and a set of lockless queues of 'isc__netievent_t'
769 * structures passed from other threads.
770 */
771 static void
async_cb(uv_async_t * handle)772 async_cb(uv_async_t *handle) {
773 isc__networker_t *worker = (isc__networker_t *)handle->loop->data;
774
775 if (process_all_queues(worker)) {
776 /*
777 * If we didn't process all the events, we need to enqueue
778 * async_cb to be run in the next iteration of the uv_loop
779 */
780 uv_async_send(handle);
781 }
782 }
783
784 static void
isc__nm_async_stop(isc__networker_t * worker,isc__netievent_t * ev0)785 isc__nm_async_stop(isc__networker_t *worker, isc__netievent_t *ev0) {
786 UNUSED(ev0);
787 worker->finished = true;
788 /* Close the async handler */
789 uv_close((uv_handle_t *)&worker->async, NULL);
790 }
791
792 void
isc_nm_task_enqueue(isc_nm_t * nm,isc_task_t * task,int threadid)793 isc_nm_task_enqueue(isc_nm_t *nm, isc_task_t *task, int threadid) {
794 isc__netievent_t *event = NULL;
795 int tid;
796 isc__networker_t *worker = NULL;
797
798 if (threadid == -1) {
799 tid = (int)isc_random_uniform(nm->nworkers);
800 } else {
801 tid = threadid % nm->nworkers;
802 }
803
804 worker = &nm->workers[tid];
805
806 if (isc_task_privileged(task)) {
807 event = (isc__netievent_t *)
808 isc__nm_get_netievent_privilegedtask(nm, task);
809 } else {
810 event = (isc__netievent_t *)isc__nm_get_netievent_task(nm,
811 task);
812 }
813
814 isc__nm_enqueue_ievent(worker, event);
815 }
816
817 #define isc__nm_async_privilegedtask(worker, ev0) \
818 isc__nm_async_task(worker, ev0)
819
820 static void
isc__nm_async_task(isc__networker_t * worker,isc__netievent_t * ev0)821 isc__nm_async_task(isc__networker_t *worker, isc__netievent_t *ev0) {
822 isc__netievent_task_t *ievent = (isc__netievent_task_t *)ev0;
823 isc_result_t result;
824
825 UNUSED(worker);
826
827 result = isc_task_run(ievent->task);
828
829 switch (result) {
830 case ISC_R_QUOTA:
831 isc_task_ready(ievent->task);
832 return;
833 case ISC_R_SUCCESS:
834 return;
835 default:
836 INSIST(0);
837 ISC_UNREACHABLE();
838 }
839 }
840
841 static void
wait_for_priority_queue(isc__networker_t * worker)842 wait_for_priority_queue(isc__networker_t *worker) {
843 isc_condition_t *cond = &worker->cond_prio;
844 bool wait_for_work = true;
845
846 while (true) {
847 isc__netievent_t *ievent;
848 LOCK(&worker->lock);
849 ievent = DEQUEUE_PRIORITY_NETIEVENT(worker);
850 if (wait_for_work) {
851 while (ievent == NULL) {
852 WAIT(cond, &worker->lock);
853 ievent = DEQUEUE_PRIORITY_NETIEVENT(worker);
854 }
855 }
856 UNLOCK(&worker->lock);
857 wait_for_work = false;
858
859 if (ievent == NULL) {
860 return;
861 }
862 DECREMENT_PRIORITY_NETIEVENT(worker);
863
864 (void)process_netievent(worker, ievent);
865 }
866 }
867
868 static void
drain_queue(isc__networker_t * worker,netievent_type_t type)869 drain_queue(isc__networker_t *worker, netievent_type_t type) {
870 while (process_queue(worker, type) != ISC_R_EMPTY) {
871 ;
872 }
873 }
874
875 /*
876 * The two macros here generate the individual cases for the process_netievent()
877 * function. The NETIEVENT_CASE(type) macro is the common case, and
878 * NETIEVENT_CASE_NOMORE(type) is a macro that causes the loop in the
879 * process_queue() to stop, e.g. it's only used for the netievent that
880 * stops/pauses processing the enqueued netievents.
881 */
882 #define NETIEVENT_CASE(type) \
883 case netievent_##type: { \
884 isc__nm_async_##type(worker, ievent); \
885 isc__nm_put_netievent_##type( \
886 worker->mgr, (isc__netievent_##type##_t *)ievent); \
887 return (true); \
888 }
889
890 #define NETIEVENT_CASE_NOMORE(type) \
891 case netievent_##type: { \
892 isc__nm_async_##type(worker, ievent); \
893 isc__nm_put_netievent_##type(worker->mgr, ievent); \
894 return (false); \
895 }
896
897 static bool
process_netievent(isc__networker_t * worker,isc__netievent_t * ievent)898 process_netievent(isc__networker_t *worker, isc__netievent_t *ievent) {
899 REQUIRE(worker->id == isc_nm_tid());
900
901 switch (ievent->type) {
902 /* Don't process more ievents when we are stopping */
903 NETIEVENT_CASE_NOMORE(stop);
904
905 NETIEVENT_CASE(privilegedtask);
906 NETIEVENT_CASE(task);
907
908 NETIEVENT_CASE(udpconnect);
909 NETIEVENT_CASE(udplisten);
910 NETIEVENT_CASE(udpstop);
911 NETIEVENT_CASE(udpsend);
912 NETIEVENT_CASE(udpread);
913 NETIEVENT_CASE(udpcancel);
914 NETIEVENT_CASE(udpclose);
915
916 NETIEVENT_CASE(tcpaccept);
917 NETIEVENT_CASE(tcpconnect);
918 NETIEVENT_CASE(tcplisten);
919 NETIEVENT_CASE(tcpstartread);
920 NETIEVENT_CASE(tcppauseread);
921 NETIEVENT_CASE(tcpsend);
922 NETIEVENT_CASE(tcpstop);
923 NETIEVENT_CASE(tcpcancel);
924 NETIEVENT_CASE(tcpclose);
925
926 NETIEVENT_CASE(tcpdnsaccept);
927 NETIEVENT_CASE(tcpdnslisten);
928 NETIEVENT_CASE(tcpdnsconnect);
929 NETIEVENT_CASE(tcpdnssend);
930 NETIEVENT_CASE(tcpdnscancel);
931 NETIEVENT_CASE(tcpdnsclose);
932 NETIEVENT_CASE(tcpdnsread);
933 NETIEVENT_CASE(tcpdnsstop);
934
935 NETIEVENT_CASE(tlsdnscycle);
936 NETIEVENT_CASE(tlsdnsaccept);
937 NETIEVENT_CASE(tlsdnslisten);
938 NETIEVENT_CASE(tlsdnsconnect);
939 NETIEVENT_CASE(tlsdnssend);
940 NETIEVENT_CASE(tlsdnscancel);
941 NETIEVENT_CASE(tlsdnsclose);
942 NETIEVENT_CASE(tlsdnsread);
943 NETIEVENT_CASE(tlsdnsstop);
944 NETIEVENT_CASE(tlsdnsshutdown);
945
946 #if HAVE_LIBNGHTTP2
947 NETIEVENT_CASE(tlsstartread);
948 NETIEVENT_CASE(tlssend);
949 NETIEVENT_CASE(tlsclose);
950 NETIEVENT_CASE(tlsdobio);
951 NETIEVENT_CASE(tlscancel);
952
953 NETIEVENT_CASE(httpstop);
954 NETIEVENT_CASE(httpsend);
955 NETIEVENT_CASE(httpclose);
956 #endif
957
958 NETIEVENT_CASE(connectcb);
959 NETIEVENT_CASE(readcb);
960 NETIEVENT_CASE(sendcb);
961
962 NETIEVENT_CASE(close);
963 NETIEVENT_CASE(detach);
964
965 NETIEVENT_CASE(shutdown);
966 NETIEVENT_CASE(resume);
967 NETIEVENT_CASE_NOMORE(pause);
968 default:
969 INSIST(0);
970 ISC_UNREACHABLE();
971 }
972 return (true);
973 }
974
975 static isc_result_t
process_queue(isc__networker_t * worker,netievent_type_t type)976 process_queue(isc__networker_t *worker, netievent_type_t type) {
977 /*
978 * The number of items on the queue is only loosely synchronized with
979 * the items on the queue. But there's a guarantee that if there's an
980 * item on the queue, it will be accounted for. However there's a
981 * possibility that the counter might be higher than the items on the
982 * queue stored.
983 */
984 uint_fast32_t waiting = atomic_load_acquire(&worker->nievents[type]);
985 isc__netievent_t *ievent = DEQUEUE_NETIEVENT(worker, type);
986
987 if (ievent == NULL && waiting == 0) {
988 /* There's nothing scheduled */
989 return (ISC_R_EMPTY);
990 } else if (ievent == NULL) {
991 /* There's at least one item scheduled, but not on the queue yet
992 */
993 return (ISC_R_SUCCESS);
994 }
995
996 while (ievent != NULL) {
997 DECREMENT_NETIEVENT(worker, type);
998 bool stop = !process_netievent(worker, ievent);
999
1000 if (stop) {
1001 /* Netievent told us to stop */
1002 return (ISC_R_SUSPEND);
1003 }
1004
1005 if (waiting-- == 0) {
1006 /* We reached this round "quota" */
1007 break;
1008 }
1009
1010 ievent = DEQUEUE_NETIEVENT(worker, type);
1011 }
1012
1013 /* We processed at least one */
1014 return (ISC_R_SUCCESS);
1015 }
1016
1017 void *
isc__nm_get_netievent(isc_nm_t * mgr,isc__netievent_type type)1018 isc__nm_get_netievent(isc_nm_t *mgr, isc__netievent_type type) {
1019 isc__netievent_storage_t *event = isc_mem_get(mgr->mctx,
1020 sizeof(*event));
1021
1022 *event = (isc__netievent_storage_t){ .ni.type = type };
1023 return (event);
1024 }
1025
1026 void
isc__nm_put_netievent(isc_nm_t * mgr,void * ievent)1027 isc__nm_put_netievent(isc_nm_t *mgr, void *ievent) {
1028 isc_mem_put(mgr->mctx, ievent, sizeof(isc__netievent_storage_t));
1029 }
1030
1031 NETIEVENT_SOCKET_DEF(tcpclose);
1032 NETIEVENT_SOCKET_DEF(tcplisten);
1033 NETIEVENT_SOCKET_DEF(tcppauseread);
1034 NETIEVENT_SOCKET_DEF(tcpstartread);
1035 NETIEVENT_SOCKET_DEF(tcpstop);
1036 NETIEVENT_SOCKET_DEF(tlsclose);
1037 NETIEVENT_SOCKET_DEF(tlsconnect);
1038 NETIEVENT_SOCKET_DEF(tlsdobio);
1039 NETIEVENT_SOCKET_DEF(tlsstartread);
1040 NETIEVENT_SOCKET_HANDLE_DEF(tlscancel);
1041 NETIEVENT_SOCKET_DEF(udpclose);
1042 NETIEVENT_SOCKET_DEF(udplisten);
1043 NETIEVENT_SOCKET_DEF(udpread);
1044 NETIEVENT_SOCKET_DEF(udpsend);
1045 NETIEVENT_SOCKET_DEF(udpstop);
1046
1047 NETIEVENT_SOCKET_DEF(tcpdnsclose);
1048 NETIEVENT_SOCKET_DEF(tcpdnsread);
1049 NETIEVENT_SOCKET_DEF(tcpdnsstop);
1050 NETIEVENT_SOCKET_DEF(tcpdnslisten);
1051 NETIEVENT_SOCKET_REQ_DEF(tcpdnsconnect);
1052 NETIEVENT_SOCKET_REQ_DEF(tcpdnssend);
1053 NETIEVENT_SOCKET_HANDLE_DEF(tcpdnscancel);
1054 NETIEVENT_SOCKET_QUOTA_DEF(tcpdnsaccept);
1055
1056 NETIEVENT_SOCKET_DEF(tlsdnsclose);
1057 NETIEVENT_SOCKET_DEF(tlsdnsread);
1058 NETIEVENT_SOCKET_DEF(tlsdnsstop);
1059 NETIEVENT_SOCKET_DEF(tlsdnslisten);
1060 NETIEVENT_SOCKET_REQ_DEF(tlsdnsconnect);
1061 NETIEVENT_SOCKET_REQ_DEF(tlsdnssend);
1062 NETIEVENT_SOCKET_HANDLE_DEF(tlsdnscancel);
1063 NETIEVENT_SOCKET_QUOTA_DEF(tlsdnsaccept);
1064 NETIEVENT_SOCKET_DEF(tlsdnscycle);
1065 NETIEVENT_SOCKET_DEF(tlsdnsshutdown);
1066
1067 NETIEVENT_SOCKET_DEF(httpstop);
1068 NETIEVENT_SOCKET_REQ_DEF(httpsend);
1069 NETIEVENT_SOCKET_DEF(httpclose);
1070
1071 NETIEVENT_SOCKET_REQ_DEF(tcpconnect);
1072 NETIEVENT_SOCKET_REQ_DEF(tcpsend);
1073 NETIEVENT_SOCKET_REQ_DEF(tlssend);
1074 NETIEVENT_SOCKET_REQ_DEF(udpconnect);
1075 NETIEVENT_SOCKET_REQ_RESULT_DEF(connectcb);
1076 NETIEVENT_SOCKET_REQ_RESULT_DEF(readcb);
1077 NETIEVENT_SOCKET_REQ_RESULT_DEF(sendcb);
1078
1079 NETIEVENT_SOCKET_DEF(detach);
1080 NETIEVENT_SOCKET_HANDLE_DEF(tcpcancel);
1081 NETIEVENT_SOCKET_HANDLE_DEF(udpcancel);
1082
1083 NETIEVENT_SOCKET_QUOTA_DEF(tcpaccept);
1084
1085 NETIEVENT_SOCKET_DEF(close);
1086 NETIEVENT_DEF(pause);
1087 NETIEVENT_DEF(resume);
1088 NETIEVENT_DEF(shutdown);
1089 NETIEVENT_DEF(stop);
1090
1091 NETIEVENT_TASK_DEF(task);
1092 NETIEVENT_TASK_DEF(privilegedtask);
1093
1094 void
isc__nm_maybe_enqueue_ievent(isc__networker_t * worker,isc__netievent_t * event)1095 isc__nm_maybe_enqueue_ievent(isc__networker_t *worker,
1096 isc__netievent_t *event) {
1097 /*
1098 * If we are already in the matching nmthread, process the ievent
1099 * directly.
1100 */
1101 if (worker->id == isc_nm_tid()) {
1102 process_netievent(worker, event);
1103 return;
1104 }
1105
1106 isc__nm_enqueue_ievent(worker, event);
1107 }
1108
1109 void
isc__nm_enqueue_ievent(isc__networker_t * worker,isc__netievent_t * event)1110 isc__nm_enqueue_ievent(isc__networker_t *worker, isc__netievent_t *event) {
1111 if (event->type > netievent_prio) {
1112 /*
1113 * We need to make sure this signal will be delivered and
1114 * the queue will be processed.
1115 */
1116 LOCK(&worker->lock);
1117 INCREMENT_PRIORITY_NETIEVENT(worker);
1118 ENQUEUE_PRIORITY_NETIEVENT(worker, event);
1119 SIGNAL(&worker->cond_prio);
1120 UNLOCK(&worker->lock);
1121 } else if (event->type == netievent_privilegedtask) {
1122 INCREMENT_PRIVILEGED_NETIEVENT(worker);
1123 ENQUEUE_PRIVILEGED_NETIEVENT(worker, event);
1124 } else if (event->type == netievent_task) {
1125 INCREMENT_TASK_NETIEVENT(worker);
1126 ENQUEUE_TASK_NETIEVENT(worker, event);
1127 } else {
1128 INCREMENT_NORMAL_NETIEVENT(worker);
1129 ENQUEUE_NORMAL_NETIEVENT(worker, event);
1130 }
1131 uv_async_send(&worker->async);
1132 }
1133
1134 bool
isc__nmsocket_active(isc_nmsocket_t * sock)1135 isc__nmsocket_active(isc_nmsocket_t *sock) {
1136 REQUIRE(VALID_NMSOCK(sock));
1137 if (sock->parent != NULL) {
1138 return (atomic_load(&sock->parent->active));
1139 }
1140
1141 return (atomic_load(&sock->active));
1142 }
1143
1144 bool
isc__nmsocket_deactivate(isc_nmsocket_t * sock)1145 isc__nmsocket_deactivate(isc_nmsocket_t *sock) {
1146 REQUIRE(VALID_NMSOCK(sock));
1147
1148 if (sock->parent != NULL) {
1149 return (atomic_compare_exchange_strong(&sock->parent->active,
1150 &(bool){ true }, false));
1151 }
1152
1153 return (atomic_compare_exchange_strong(&sock->active, &(bool){ true },
1154 false));
1155 }
1156
1157 void
isc___nmsocket_attach(isc_nmsocket_t * sock,isc_nmsocket_t ** target FLARG)1158 isc___nmsocket_attach(isc_nmsocket_t *sock, isc_nmsocket_t **target FLARG) {
1159 REQUIRE(VALID_NMSOCK(sock));
1160 REQUIRE(target != NULL && *target == NULL);
1161
1162 isc_nmsocket_t *rsock = NULL;
1163
1164 if (sock->parent != NULL) {
1165 rsock = sock->parent;
1166 INSIST(rsock->parent == NULL); /* sanity check */
1167 } else {
1168 rsock = sock;
1169 }
1170
1171 NETMGR_TRACE_LOG("isc__nmsocket_attach():%p->references = %" PRIuFAST32
1172 "\n",
1173 rsock, isc_refcount_current(&rsock->references) + 1);
1174
1175 isc_refcount_increment0(&rsock->references);
1176
1177 *target = sock;
1178 }
1179
1180 /*
1181 * Free all resources inside a socket (including its children if any).
1182 */
1183 static void
nmsocket_cleanup(isc_nmsocket_t * sock,bool dofree FLARG)1184 nmsocket_cleanup(isc_nmsocket_t *sock, bool dofree FLARG) {
1185 isc_nmhandle_t *handle = NULL;
1186 isc__nm_uvreq_t *uvreq = NULL;
1187
1188 REQUIRE(VALID_NMSOCK(sock));
1189 REQUIRE(!isc__nmsocket_active(sock));
1190
1191 NETMGR_TRACE_LOG("nmsocket_cleanup():%p->references = %" PRIuFAST32
1192 "\n",
1193 sock, isc_refcount_current(&sock->references));
1194
1195 atomic_store(&sock->destroying, true);
1196
1197 if (sock->parent == NULL && sock->children != NULL) {
1198 /*
1199 * We shouldn't be here unless there are no active handles,
1200 * so we can clean up and free the children.
1201 */
1202 for (size_t i = 0; i < sock->nchildren; i++) {
1203 if (!atomic_load(&sock->children[i].destroying)) {
1204 nmsocket_cleanup(&sock->children[i],
1205 false FLARG_PASS);
1206 }
1207 }
1208
1209 /*
1210 * This was a parent socket: destroy the listening
1211 * barriers that synchronized the children.
1212 */
1213 isc_barrier_destroy(&sock->startlistening);
1214 isc_barrier_destroy(&sock->stoplistening);
1215
1216 /*
1217 * Now free them.
1218 */
1219 isc_mem_put(sock->mgr->mctx, sock->children,
1220 sock->nchildren * sizeof(*sock));
1221 sock->children = NULL;
1222 sock->nchildren = 0;
1223 }
1224 if (sock->statsindex != NULL) {
1225 isc__nm_decstats(sock->mgr, sock->statsindex[STATID_ACTIVE]);
1226 }
1227
1228 sock->statichandle = NULL;
1229
1230 if (sock->outerhandle != NULL) {
1231 isc__nmhandle_detach(&sock->outerhandle FLARG_PASS);
1232 }
1233
1234 if (sock->outer != NULL) {
1235 isc___nmsocket_detach(&sock->outer FLARG_PASS);
1236 }
1237
1238 while ((handle = isc_astack_pop(sock->inactivehandles)) != NULL) {
1239 nmhandle_free(sock, handle);
1240 }
1241
1242 if (sock->buf != NULL) {
1243 isc_mem_put(sock->mgr->mctx, sock->buf, sock->buf_size);
1244 }
1245
1246 if (sock->quota != NULL) {
1247 isc_quota_detach(&sock->quota);
1248 }
1249
1250 sock->pquota = NULL;
1251
1252 isc_astack_destroy(sock->inactivehandles);
1253
1254 while ((uvreq = isc_astack_pop(sock->inactivereqs)) != NULL) {
1255 isc_mem_put(sock->mgr->mctx, uvreq, sizeof(*uvreq));
1256 }
1257
1258 isc_astack_destroy(sock->inactivereqs);
1259 sock->magic = 0;
1260
1261 isc_mem_put(sock->mgr->mctx, sock->ah_frees,
1262 sock->ah_size * sizeof(sock->ah_frees[0]));
1263 isc_mem_put(sock->mgr->mctx, sock->ah_handles,
1264 sock->ah_size * sizeof(sock->ah_handles[0]));
1265 isc_mutex_destroy(&sock->lock);
1266 isc_condition_destroy(&sock->scond);
1267 #if HAVE_LIBNGHTTP2
1268 isc__nm_tls_cleanup_data(sock);
1269 isc__nm_http_cleanup_data(sock);
1270 #endif
1271 #ifdef NETMGR_TRACE
1272 LOCK(&sock->mgr->lock);
1273 ISC_LIST_UNLINK(sock->mgr->active_sockets, sock, active_link);
1274 UNLOCK(&sock->mgr->lock);
1275 #endif
1276 if (dofree) {
1277 isc_nm_t *mgr = sock->mgr;
1278 isc_mem_put(mgr->mctx, sock, sizeof(*sock));
1279 isc_nm_detach(&mgr);
1280 } else {
1281 isc_nm_detach(&sock->mgr);
1282 }
1283 }
1284
1285 static void
nmsocket_maybe_destroy(isc_nmsocket_t * sock FLARG)1286 nmsocket_maybe_destroy(isc_nmsocket_t *sock FLARG) {
1287 int active_handles;
1288 bool destroy = false;
1289
1290 NETMGR_TRACE_LOG("%s():%p->references = %" PRIuFAST32 "\n", __func__,
1291 sock, isc_refcount_current(&sock->references));
1292
1293 if (sock->parent != NULL) {
1294 /*
1295 * This is a child socket and cannot be destroyed except
1296 * as a side effect of destroying the parent, so let's go
1297 * see if the parent is ready to be destroyed.
1298 */
1299 nmsocket_maybe_destroy(sock->parent FLARG_PASS);
1300 return;
1301 }
1302
1303 /*
1304 * This is a parent socket (or a standalone). See whether the
1305 * children have active handles before deciding whether to
1306 * accept destruction.
1307 */
1308 LOCK(&sock->lock);
1309 if (atomic_load(&sock->active) || atomic_load(&sock->destroying) ||
1310 !atomic_load(&sock->closed) || atomic_load(&sock->references) != 0)
1311 {
1312 UNLOCK(&sock->lock);
1313 return;
1314 }
1315
1316 active_handles = atomic_load(&sock->ah);
1317 if (sock->children != NULL) {
1318 for (size_t i = 0; i < sock->nchildren; i++) {
1319 LOCK(&sock->children[i].lock);
1320 active_handles += atomic_load(&sock->children[i].ah);
1321 UNLOCK(&sock->children[i].lock);
1322 }
1323 }
1324
1325 if (active_handles == 0 || sock->statichandle != NULL) {
1326 destroy = true;
1327 }
1328
1329 NETMGR_TRACE_LOG("%s:%p->active_handles = %d, .statichandle = %p\n",
1330 __func__, sock, active_handles, sock->statichandle);
1331
1332 if (destroy) {
1333 atomic_store(&sock->destroying, true);
1334 UNLOCK(&sock->lock);
1335 nmsocket_cleanup(sock, true FLARG_PASS);
1336 } else {
1337 UNLOCK(&sock->lock);
1338 }
1339 }
1340
1341 void
isc___nmsocket_prep_destroy(isc_nmsocket_t * sock FLARG)1342 isc___nmsocket_prep_destroy(isc_nmsocket_t *sock FLARG) {
1343 REQUIRE(sock->parent == NULL);
1344
1345 NETMGR_TRACE_LOG("isc___nmsocket_prep_destroy():%p->references = "
1346 "%" PRIuFAST32 "\n",
1347 sock, isc_refcount_current(&sock->references));
1348
1349 /*
1350 * The final external reference to the socket is gone. We can try
1351 * destroying the socket, but we have to wait for all the inflight
1352 * handles to finish first.
1353 */
1354 atomic_store(&sock->active, false);
1355
1356 /*
1357 * If the socket has children, they'll need to be marked inactive
1358 * so they can be cleaned up too.
1359 */
1360 if (sock->children != NULL) {
1361 for (size_t i = 0; i < sock->nchildren; i++) {
1362 atomic_store(&sock->children[i].active, false);
1363 }
1364 }
1365
1366 /*
1367 * If we're here then we already stopped listening; otherwise
1368 * we'd have a hanging reference from the listening process.
1369 *
1370 * If it's a regular socket we may need to close it.
1371 */
1372 if (!atomic_load(&sock->closed)) {
1373 switch (sock->type) {
1374 case isc_nm_udpsocket:
1375 isc__nm_udp_close(sock);
1376 return;
1377 case isc_nm_tcpsocket:
1378 isc__nm_tcp_close(sock);
1379 return;
1380 case isc_nm_tcpdnssocket:
1381 isc__nm_tcpdns_close(sock);
1382 return;
1383 case isc_nm_tlsdnssocket:
1384 isc__nm_tlsdns_close(sock);
1385 return;
1386 #if HAVE_LIBNGHTTP2
1387 case isc_nm_tlssocket:
1388 isc__nm_tls_close(sock);
1389 break;
1390 case isc_nm_httpsocket:
1391 isc__nm_http_close(sock);
1392 return;
1393 #endif
1394 default:
1395 break;
1396 }
1397 }
1398
1399 nmsocket_maybe_destroy(sock FLARG_PASS);
1400 }
1401
1402 void
isc___nmsocket_detach(isc_nmsocket_t ** sockp FLARG)1403 isc___nmsocket_detach(isc_nmsocket_t **sockp FLARG) {
1404 REQUIRE(sockp != NULL && *sockp != NULL);
1405 REQUIRE(VALID_NMSOCK(*sockp));
1406
1407 isc_nmsocket_t *sock = *sockp, *rsock = NULL;
1408 *sockp = NULL;
1409
1410 /*
1411 * If the socket is a part of a set (a child socket) we are
1412 * counting references for the whole set at the parent.
1413 */
1414 if (sock->parent != NULL) {
1415 rsock = sock->parent;
1416 INSIST(rsock->parent == NULL); /* Sanity check */
1417 } else {
1418 rsock = sock;
1419 }
1420
1421 NETMGR_TRACE_LOG("isc__nmsocket_detach():%p->references = %" PRIuFAST32
1422 "\n",
1423 rsock, isc_refcount_current(&rsock->references) - 1);
1424
1425 if (isc_refcount_decrement(&rsock->references) == 1) {
1426 isc___nmsocket_prep_destroy(rsock FLARG_PASS);
1427 }
1428 }
1429
1430 void
isc_nmsocket_close(isc_nmsocket_t ** sockp)1431 isc_nmsocket_close(isc_nmsocket_t **sockp) {
1432 REQUIRE(sockp != NULL);
1433 REQUIRE(VALID_NMSOCK(*sockp));
1434 REQUIRE((*sockp)->type == isc_nm_udplistener ||
1435 (*sockp)->type == isc_nm_tcplistener ||
1436 (*sockp)->type == isc_nm_tcpdnslistener ||
1437 (*sockp)->type == isc_nm_tlsdnslistener ||
1438 (*sockp)->type == isc_nm_tlslistener ||
1439 (*sockp)->type == isc_nm_httplistener);
1440
1441 isc__nmsocket_detach(sockp);
1442 }
1443
1444 void
isc___nmsocket_init(isc_nmsocket_t * sock,isc_nm_t * mgr,isc_nmsocket_type type,isc_sockaddr_t * iface FLARG)1445 isc___nmsocket_init(isc_nmsocket_t *sock, isc_nm_t *mgr, isc_nmsocket_type type,
1446 isc_sockaddr_t *iface FLARG) {
1447 uint16_t family;
1448
1449 REQUIRE(sock != NULL);
1450 REQUIRE(mgr != NULL);
1451 REQUIRE(iface != NULL);
1452
1453 family = iface->type.sa.sa_family;
1454
1455 *sock = (isc_nmsocket_t){ .type = type,
1456 .iface = *iface,
1457 .fd = -1,
1458 .ah_size = 32,
1459 .inactivehandles = isc_astack_new(
1460 mgr->mctx, ISC_NM_HANDLES_STACK_SIZE),
1461 .inactivereqs = isc_astack_new(
1462 mgr->mctx, ISC_NM_REQS_STACK_SIZE) };
1463
1464 #if NETMGR_TRACE
1465 sock->backtrace_size = isc_backtrace(sock->backtrace, TRACE_SIZE);
1466 ISC_LINK_INIT(sock, active_link);
1467 ISC_LIST_INIT(sock->active_handles);
1468 LOCK(&mgr->lock);
1469 ISC_LIST_APPEND(mgr->active_sockets, sock, active_link);
1470 UNLOCK(&mgr->lock);
1471 #endif
1472
1473 isc_nm_attach(mgr, &sock->mgr);
1474 sock->uv_handle.handle.data = sock;
1475
1476 sock->ah_frees = isc_mem_get(mgr->mctx,
1477 sock->ah_size * sizeof(sock->ah_frees[0]));
1478 sock->ah_handles = isc_mem_get(
1479 mgr->mctx, sock->ah_size * sizeof(sock->ah_handles[0]));
1480 ISC_LINK_INIT(&sock->quotacb, link);
1481 for (size_t i = 0; i < 32; i++) {
1482 sock->ah_frees[i] = i;
1483 sock->ah_handles[i] = NULL;
1484 }
1485
1486 switch (type) {
1487 case isc_nm_udpsocket:
1488 case isc_nm_udplistener:
1489 if (family == AF_INET) {
1490 sock->statsindex = udp4statsindex;
1491 } else {
1492 sock->statsindex = udp6statsindex;
1493 }
1494 isc__nm_incstats(sock->mgr, sock->statsindex[STATID_ACTIVE]);
1495 break;
1496 case isc_nm_tcpsocket:
1497 case isc_nm_tcplistener:
1498 case isc_nm_tcpdnssocket:
1499 case isc_nm_tcpdnslistener:
1500 case isc_nm_tlsdnssocket:
1501 case isc_nm_tlsdnslistener:
1502 case isc_nm_httpsocket:
1503 case isc_nm_httplistener:
1504 if (family == AF_INET) {
1505 sock->statsindex = tcp4statsindex;
1506 } else {
1507 sock->statsindex = tcp6statsindex;
1508 }
1509 isc__nm_incstats(sock->mgr, sock->statsindex[STATID_ACTIVE]);
1510 break;
1511 default:
1512 break;
1513 }
1514
1515 isc_mutex_init(&sock->lock);
1516 isc_condition_init(&sock->cond);
1517 isc_condition_init(&sock->scond);
1518 isc_refcount_init(&sock->references, 1);
1519
1520 #if HAVE_LIBNGHTTP2
1521 memset(&sock->tlsstream, 0, sizeof(sock->tlsstream));
1522 #endif /* HAVE_LIBNGHTTP2 */
1523
1524 NETMGR_TRACE_LOG("isc__nmsocket_init():%p->references = %" PRIuFAST32
1525 "\n",
1526 sock, isc_refcount_current(&sock->references));
1527
1528 atomic_init(&sock->active, true);
1529 atomic_init(&sock->sequential, false);
1530 atomic_init(&sock->readpaused, false);
1531 atomic_init(&sock->closing, false);
1532 atomic_init(&sock->listening, 0);
1533 atomic_init(&sock->closed, 0);
1534 atomic_init(&sock->destroying, 0);
1535 atomic_init(&sock->ah, 0);
1536 atomic_init(&sock->client, 0);
1537 atomic_init(&sock->connecting, false);
1538 atomic_init(&sock->keepalive, false);
1539 atomic_init(&sock->connected, false);
1540
1541 atomic_init(&sock->active_child_connections, 0);
1542
1543 #if HAVE_LIBNGHTTP2
1544 isc__nm_http_initsocket(sock);
1545 #endif
1546
1547 sock->magic = NMSOCK_MAGIC;
1548 }
1549
1550 void
isc__nmsocket_clearcb(isc_nmsocket_t * sock)1551 isc__nmsocket_clearcb(isc_nmsocket_t *sock) {
1552 REQUIRE(VALID_NMSOCK(sock));
1553 REQUIRE(!isc__nm_in_netthread() || sock->tid == isc_nm_tid());
1554
1555 sock->recv_cb = NULL;
1556 sock->recv_cbarg = NULL;
1557 sock->accept_cb = NULL;
1558 sock->accept_cbarg = NULL;
1559 sock->connect_cb = NULL;
1560 sock->connect_cbarg = NULL;
1561 }
1562
1563 void
isc__nm_free_uvbuf(isc_nmsocket_t * sock,const uv_buf_t * buf)1564 isc__nm_free_uvbuf(isc_nmsocket_t *sock, const uv_buf_t *buf) {
1565 isc__networker_t *worker = NULL;
1566
1567 REQUIRE(VALID_NMSOCK(sock));
1568 if (buf->base == NULL) {
1569 /* Empty buffer: might happen in case of error. */
1570 return;
1571 }
1572 worker = &sock->mgr->workers[sock->tid];
1573
1574 REQUIRE(worker->recvbuf_inuse);
1575 if (sock->type == isc_nm_udpsocket && buf->base > worker->recvbuf &&
1576 buf->base <= worker->recvbuf + ISC_NETMGR_RECVBUF_SIZE)
1577 {
1578 /* Can happen in case of out-of-order recvmmsg in libuv1.36 */
1579 return;
1580 }
1581 REQUIRE(buf->base == worker->recvbuf);
1582 worker->recvbuf_inuse = false;
1583 }
1584
1585 static isc_nmhandle_t *
alloc_handle(isc_nmsocket_t * sock)1586 alloc_handle(isc_nmsocket_t *sock) {
1587 isc_nmhandle_t *handle =
1588 isc_mem_get(sock->mgr->mctx,
1589 sizeof(isc_nmhandle_t) + sock->extrahandlesize);
1590
1591 *handle = (isc_nmhandle_t){ .magic = NMHANDLE_MAGIC };
1592 #ifdef NETMGR_TRACE
1593 ISC_LINK_INIT(handle, active_link);
1594 #endif
1595 isc_refcount_init(&handle->references, 1);
1596
1597 return (handle);
1598 }
1599
1600 isc_nmhandle_t *
isc___nmhandle_get(isc_nmsocket_t * sock,isc_sockaddr_t * peer,isc_sockaddr_t * local FLARG)1601 isc___nmhandle_get(isc_nmsocket_t *sock, isc_sockaddr_t *peer,
1602 isc_sockaddr_t *local FLARG) {
1603 isc_nmhandle_t *handle = NULL;
1604 size_t handlenum;
1605 int pos;
1606
1607 REQUIRE(VALID_NMSOCK(sock));
1608
1609 handle = isc_astack_pop(sock->inactivehandles);
1610
1611 if (handle == NULL) {
1612 handle = alloc_handle(sock);
1613 } else {
1614 isc_refcount_init(&handle->references, 1);
1615 INSIST(VALID_NMHANDLE(handle));
1616 }
1617
1618 NETMGR_TRACE_LOG(
1619 "isc__nmhandle_get():handle %p->references = %" PRIuFAST32 "\n",
1620 handle, isc_refcount_current(&handle->references));
1621
1622 isc___nmsocket_attach(sock, &handle->sock FLARG_PASS);
1623
1624 #if NETMGR_TRACE
1625 handle->backtrace_size = isc_backtrace(handle->backtrace, TRACE_SIZE);
1626 #endif
1627
1628 if (peer != NULL) {
1629 handle->peer = *peer;
1630 } else {
1631 handle->peer = sock->peer;
1632 }
1633
1634 if (local != NULL) {
1635 handle->local = *local;
1636 } else {
1637 handle->local = sock->iface;
1638 }
1639
1640 LOCK(&sock->lock);
1641 /* We need to add this handle to the list of active handles */
1642 if ((size_t)atomic_load(&sock->ah) == sock->ah_size) {
1643 sock->ah_frees = isc_mem_reget(
1644 sock->mgr->mctx, sock->ah_frees,
1645 sock->ah_size * sizeof(sock->ah_frees[0]),
1646 sock->ah_size * 2 * sizeof(sock->ah_frees[0]));
1647 sock->ah_handles = isc_mem_reget(
1648 sock->mgr->mctx, sock->ah_handles,
1649 sock->ah_size * sizeof(sock->ah_handles[0]),
1650 sock->ah_size * 2 * sizeof(sock->ah_handles[0]));
1651
1652 for (size_t i = sock->ah_size; i < sock->ah_size * 2; i++) {
1653 sock->ah_frees[i] = i;
1654 sock->ah_handles[i] = NULL;
1655 }
1656
1657 sock->ah_size *= 2;
1658 }
1659
1660 handlenum = atomic_fetch_add(&sock->ah, 1);
1661 pos = sock->ah_frees[handlenum];
1662
1663 INSIST(sock->ah_handles[pos] == NULL);
1664 sock->ah_handles[pos] = handle;
1665 handle->ah_pos = pos;
1666 #ifdef NETMGR_TRACE
1667 ISC_LIST_APPEND(sock->active_handles, handle, active_link);
1668 #endif
1669 UNLOCK(&sock->lock);
1670
1671 switch (sock->type) {
1672 case isc_nm_udpsocket:
1673 case isc_nm_tcpdnssocket:
1674 case isc_nm_tlsdnssocket:
1675 if (!atomic_load(&sock->client)) {
1676 break;
1677 }
1678 /* fallthrough */
1679 case isc_nm_tcpsocket:
1680 case isc_nm_tlssocket:
1681 INSIST(sock->statichandle == NULL);
1682
1683 /*
1684 * statichandle must be assigned, not attached;
1685 * otherwise, if a handle was detached elsewhere
1686 * it could never reach 0 references, and the
1687 * handle and socket would never be freed.
1688 */
1689 sock->statichandle = handle;
1690 break;
1691 default:
1692 break;
1693 }
1694
1695 #if HAVE_LIBNGHTTP2
1696 if (sock->type == isc_nm_httpsocket && sock->h2.session) {
1697 isc__nm_httpsession_attach(sock->h2.session,
1698 &handle->httpsession);
1699 }
1700 #endif
1701
1702 return (handle);
1703 }
1704
1705 void
isc__nmhandle_attach(isc_nmhandle_t * handle,isc_nmhandle_t ** handlep FLARG)1706 isc__nmhandle_attach(isc_nmhandle_t *handle, isc_nmhandle_t **handlep FLARG) {
1707 REQUIRE(VALID_NMHANDLE(handle));
1708 REQUIRE(handlep != NULL && *handlep == NULL);
1709
1710 NETMGR_TRACE_LOG("isc__nmhandle_attach():handle %p->references = "
1711 "%" PRIuFAST32 "\n",
1712 handle, isc_refcount_current(&handle->references) + 1);
1713
1714 isc_refcount_increment(&handle->references);
1715 *handlep = handle;
1716 }
1717
1718 bool
isc_nmhandle_is_stream(isc_nmhandle_t * handle)1719 isc_nmhandle_is_stream(isc_nmhandle_t *handle) {
1720 REQUIRE(VALID_NMHANDLE(handle));
1721
1722 return (handle->sock->type == isc_nm_tcpsocket ||
1723 handle->sock->type == isc_nm_tcpdnssocket ||
1724 handle->sock->type == isc_nm_tlssocket ||
1725 handle->sock->type == isc_nm_tlsdnssocket ||
1726 handle->sock->type == isc_nm_httpsocket);
1727 }
1728
1729 static void
nmhandle_free(isc_nmsocket_t * sock,isc_nmhandle_t * handle)1730 nmhandle_free(isc_nmsocket_t *sock, isc_nmhandle_t *handle) {
1731 size_t extra = sock->extrahandlesize;
1732
1733 isc_refcount_destroy(&handle->references);
1734
1735 if (handle->dofree != NULL) {
1736 handle->dofree(handle->opaque);
1737 }
1738
1739 *handle = (isc_nmhandle_t){ .magic = 0 };
1740
1741 isc_mem_put(sock->mgr->mctx, handle, sizeof(isc_nmhandle_t) + extra);
1742 }
1743
1744 static void
nmhandle_deactivate(isc_nmsocket_t * sock,isc_nmhandle_t * handle)1745 nmhandle_deactivate(isc_nmsocket_t *sock, isc_nmhandle_t *handle) {
1746 size_t handlenum;
1747 bool reuse = false;
1748
1749 /*
1750 * We do all of this under lock to avoid races with socket
1751 * destruction. We have to do this now, because at this point the
1752 * socket is either unused or still attached to event->sock.
1753 */
1754 LOCK(&sock->lock);
1755
1756 INSIST(sock->ah_handles[handle->ah_pos] == handle);
1757 INSIST(sock->ah_size > handle->ah_pos);
1758 INSIST(atomic_load(&sock->ah) > 0);
1759
1760 #ifdef NETMGR_TRACE
1761 ISC_LIST_UNLINK(sock->active_handles, handle, active_link);
1762 #endif
1763
1764 sock->ah_handles[handle->ah_pos] = NULL;
1765 handlenum = atomic_fetch_sub(&sock->ah, 1) - 1;
1766 sock->ah_frees[handlenum] = handle->ah_pos;
1767 handle->ah_pos = 0;
1768 if (atomic_load(&sock->active)) {
1769 reuse = isc_astack_trypush(sock->inactivehandles, handle);
1770 }
1771 if (!reuse) {
1772 nmhandle_free(sock, handle);
1773 }
1774 UNLOCK(&sock->lock);
1775 }
1776
1777 void
isc__nmhandle_detach(isc_nmhandle_t ** handlep FLARG)1778 isc__nmhandle_detach(isc_nmhandle_t **handlep FLARG) {
1779 isc_nmsocket_t *sock = NULL;
1780 isc_nmhandle_t *handle = NULL;
1781
1782 REQUIRE(handlep != NULL);
1783 REQUIRE(VALID_NMHANDLE(*handlep));
1784
1785 handle = *handlep;
1786 *handlep = NULL;
1787
1788 sock = handle->sock;
1789 if (sock->tid == isc_nm_tid()) {
1790 nmhandle_detach_cb(&handle FLARG_PASS);
1791 } else {
1792 isc__netievent_detach_t *event =
1793 isc__nm_get_netievent_detach(sock->mgr, sock);
1794 /*
1795 * we are using implicit "attach" as the last reference
1796 * need to be destroyed explicitly in the async callback
1797 */
1798 event->handle = handle;
1799 FLARG_IEVENT_PASS(event);
1800 isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid],
1801 (isc__netievent_t *)event);
1802 }
1803 }
1804
1805 void
1806 isc__nmsocket_shutdown(isc_nmsocket_t *sock);
1807
1808 static void
nmhandle_detach_cb(isc_nmhandle_t ** handlep FLARG)1809 nmhandle_detach_cb(isc_nmhandle_t **handlep FLARG) {
1810 isc_nmsocket_t *sock = NULL;
1811 isc_nmhandle_t *handle = NULL;
1812
1813 REQUIRE(handlep != NULL);
1814 REQUIRE(VALID_NMHANDLE(*handlep));
1815
1816 handle = *handlep;
1817 *handlep = NULL;
1818
1819 NETMGR_TRACE_LOG("isc__nmhandle_detach():%p->references = %" PRIuFAST32
1820 "\n",
1821 handle, isc_refcount_current(&handle->references) - 1);
1822
1823 if (isc_refcount_decrement(&handle->references) > 1) {
1824 return;
1825 }
1826
1827 /* We need an acquire memory barrier here */
1828 (void)isc_refcount_current(&handle->references);
1829
1830 sock = handle->sock;
1831 handle->sock = NULL;
1832
1833 if (handle->doreset != NULL) {
1834 handle->doreset(handle->opaque);
1835 }
1836
1837 #if HAVE_LIBNGHTTP2
1838 if (sock->type == isc_nm_httpsocket && handle->httpsession != NULL) {
1839 isc__nm_httpsession_detach(&handle->httpsession);
1840 }
1841 #endif
1842
1843 nmhandle_deactivate(sock, handle);
1844
1845 /*
1846 * The handle is gone now. If the socket has a callback configured
1847 * for that (e.g., to perform cleanup after request processing),
1848 * call it now, or schedule it to run asynchronously.
1849 */
1850 if (sock->closehandle_cb != NULL) {
1851 if (sock->tid == isc_nm_tid()) {
1852 sock->closehandle_cb(sock);
1853 } else {
1854 isc__netievent_close_t *event =
1855 isc__nm_get_netievent_close(sock->mgr, sock);
1856 isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid],
1857 (isc__netievent_t *)event);
1858 }
1859 }
1860
1861 if (handle == sock->statichandle) {
1862 /* statichandle is assigned, not attached. */
1863 sock->statichandle = NULL;
1864 }
1865
1866 isc___nmsocket_detach(&sock FLARG_PASS);
1867 }
1868
1869 void *
isc_nmhandle_getdata(isc_nmhandle_t * handle)1870 isc_nmhandle_getdata(isc_nmhandle_t *handle) {
1871 REQUIRE(VALID_NMHANDLE(handle));
1872
1873 return (handle->opaque);
1874 }
1875
1876 void
isc_nmhandle_setdata(isc_nmhandle_t * handle,void * arg,isc_nm_opaquecb_t doreset,isc_nm_opaquecb_t dofree)1877 isc_nmhandle_setdata(isc_nmhandle_t *handle, void *arg,
1878 isc_nm_opaquecb_t doreset, isc_nm_opaquecb_t dofree) {
1879 REQUIRE(VALID_NMHANDLE(handle));
1880
1881 handle->opaque = arg;
1882 handle->doreset = doreset;
1883 handle->dofree = dofree;
1884 }
1885
1886 void
isc__nm_alloc_dnsbuf(isc_nmsocket_t * sock,size_t len)1887 isc__nm_alloc_dnsbuf(isc_nmsocket_t *sock, size_t len) {
1888 REQUIRE(len <= NM_BIG_BUF);
1889
1890 if (sock->buf == NULL) {
1891 /* We don't have the buffer at all */
1892 size_t alloc_len = len < NM_REG_BUF ? NM_REG_BUF : NM_BIG_BUF;
1893 sock->buf = isc_mem_get(sock->mgr->mctx, alloc_len);
1894 sock->buf_size = alloc_len;
1895 } else {
1896 /* We have the buffer but it's too small */
1897 sock->buf = isc_mem_reget(sock->mgr->mctx, sock->buf,
1898 sock->buf_size, NM_BIG_BUF);
1899 sock->buf_size = NM_BIG_BUF;
1900 }
1901 }
1902
1903 void
isc__nm_failed_send_cb(isc_nmsocket_t * sock,isc__nm_uvreq_t * req,isc_result_t eresult)1904 isc__nm_failed_send_cb(isc_nmsocket_t *sock, isc__nm_uvreq_t *req,
1905 isc_result_t eresult) {
1906 REQUIRE(VALID_NMSOCK(sock));
1907 REQUIRE(VALID_UVREQ(req));
1908
1909 if (req->cb.send != NULL) {
1910 isc__nm_sendcb(sock, req, eresult, true);
1911 } else {
1912 isc__nm_uvreq_put(&req, sock);
1913 }
1914 }
1915
1916 void
isc__nm_failed_accept_cb(isc_nmsocket_t * sock,isc_result_t eresult)1917 isc__nm_failed_accept_cb(isc_nmsocket_t *sock, isc_result_t eresult) {
1918 REQUIRE(atomic_load(&sock->accepting));
1919 REQUIRE(sock->server);
1920
1921 /*
1922 * Detach the quota early to make room for other connections;
1923 * otherwise it'd be detached later asynchronously, and clog
1924 * the quota unnecessarily.
1925 */
1926 if (sock->quota != NULL) {
1927 isc_quota_detach(&sock->quota);
1928 }
1929
1930 isc__nmsocket_detach(&sock->server);
1931
1932 atomic_store(&sock->accepting, false);
1933
1934 switch (eresult) {
1935 case ISC_R_NOTCONNECTED:
1936 /* IGNORE: The client disconnected before we could accept */
1937 break;
1938 default:
1939 isc_log_write(isc_lctx, ISC_LOGCATEGORY_GENERAL,
1940 ISC_LOGMODULE_NETMGR, ISC_LOG_ERROR,
1941 "Accepting TCP connection failed: %s",
1942 isc_result_totext(eresult));
1943 }
1944 }
1945
1946 void
isc__nm_failed_connect_cb(isc_nmsocket_t * sock,isc__nm_uvreq_t * req,isc_result_t eresult,bool async)1947 isc__nm_failed_connect_cb(isc_nmsocket_t *sock, isc__nm_uvreq_t *req,
1948 isc_result_t eresult, bool async) {
1949 REQUIRE(VALID_NMSOCK(sock));
1950 REQUIRE(VALID_UVREQ(req));
1951 REQUIRE(sock->tid == isc_nm_tid());
1952 REQUIRE(req->cb.connect != NULL);
1953
1954 isc__nmsocket_timer_stop(sock);
1955 uv_handle_set_data((uv_handle_t *)&sock->timer, sock);
1956
1957 INSIST(atomic_compare_exchange_strong(&sock->connecting,
1958 &(bool){ true }, false));
1959
1960 isc__nmsocket_clearcb(sock);
1961 isc__nm_connectcb(sock, req, eresult, async);
1962
1963 isc__nmsocket_prep_destroy(sock);
1964 }
1965
1966 void
isc__nm_failed_read_cb(isc_nmsocket_t * sock,isc_result_t result,bool async)1967 isc__nm_failed_read_cb(isc_nmsocket_t *sock, isc_result_t result, bool async) {
1968 REQUIRE(VALID_NMSOCK(sock));
1969 switch (sock->type) {
1970 case isc_nm_udpsocket:
1971 isc__nm_udp_failed_read_cb(sock, result);
1972 return;
1973 case isc_nm_tcpsocket:
1974 isc__nm_tcp_failed_read_cb(sock, result);
1975 return;
1976 case isc_nm_tcpdnssocket:
1977 isc__nm_tcpdns_failed_read_cb(sock, result);
1978 return;
1979 case isc_nm_tlsdnssocket:
1980 isc__nm_tlsdns_failed_read_cb(sock, result, async);
1981 return;
1982 default:
1983 INSIST(0);
1984 ISC_UNREACHABLE();
1985 }
1986 }
1987
1988 void
isc__nmsocket_connecttimeout_cb(uv_timer_t * timer)1989 isc__nmsocket_connecttimeout_cb(uv_timer_t *timer) {
1990 uv_connect_t *uvreq = uv_handle_get_data((uv_handle_t *)timer);
1991 isc_nmsocket_t *sock = uv_handle_get_data((uv_handle_t *)uvreq->handle);
1992 isc__nm_uvreq_t *req = uv_handle_get_data((uv_handle_t *)uvreq);
1993
1994 REQUIRE(VALID_NMSOCK(sock));
1995 REQUIRE(sock->tid == isc_nm_tid());
1996 REQUIRE(atomic_load(&sock->connecting));
1997 REQUIRE(VALID_UVREQ(req));
1998 REQUIRE(VALID_NMHANDLE(req->handle));
1999
2000 isc__nmsocket_timer_stop(sock);
2001
2002 if (sock->tls.pending_req != NULL) {
2003 REQUIRE(req == sock->tls.pending_req);
2004 sock->tls.pending_req = NULL;
2005 }
2006
2007 /* Call the connect callback directly */
2008
2009 req->cb.connect(req->handle, ISC_R_TIMEDOUT, req->cbarg);
2010
2011 /* Timer is not running, cleanup and shutdown everything */
2012 if (!isc__nmsocket_timer_running(sock)) {
2013 INSIST(atomic_compare_exchange_strong(&sock->connecting,
2014 &(bool){ true }, false));
2015 isc__nm_uvreq_put(&req, sock);
2016 isc__nmsocket_clearcb(sock);
2017 isc__nmsocket_shutdown(sock);
2018 }
2019 }
2020
2021 static void
isc__nmsocket_readtimeout_cb(uv_timer_t * timer)2022 isc__nmsocket_readtimeout_cb(uv_timer_t *timer) {
2023 isc_nmsocket_t *sock = uv_handle_get_data((uv_handle_t *)timer);
2024
2025 REQUIRE(VALID_NMSOCK(sock));
2026 REQUIRE(sock->tid == isc_nm_tid());
2027 REQUIRE(atomic_load(&sock->reading));
2028
2029 if (atomic_load(&sock->client)) {
2030 uv_timer_stop(timer);
2031
2032 sock->recv_read = false;
2033
2034 if (sock->recv_cb != NULL) {
2035 isc__nm_uvreq_t *req = isc__nm_get_read_req(sock, NULL);
2036 isc__nm_readcb(sock, req, ISC_R_TIMEDOUT);
2037 }
2038
2039 if (!isc__nmsocket_timer_running(sock)) {
2040 isc__nmsocket_clearcb(sock);
2041 isc__nm_failed_read_cb(sock, ISC_R_CANCELED, false);
2042 }
2043 } else {
2044 isc__nm_failed_read_cb(sock, ISC_R_TIMEDOUT, false);
2045 }
2046 }
2047
2048 void
isc__nmsocket_timer_restart(isc_nmsocket_t * sock)2049 isc__nmsocket_timer_restart(isc_nmsocket_t *sock) {
2050 int r = 0;
2051
2052 REQUIRE(VALID_NMSOCK(sock));
2053
2054 if (atomic_load(&sock->connecting)) {
2055 if (sock->connect_timeout == 0) {
2056 return;
2057 }
2058
2059 r = uv_timer_start(&sock->timer,
2060 isc__nmsocket_connecttimeout_cb,
2061 sock->connect_timeout + 10, 0);
2062
2063 } else {
2064 if (sock->read_timeout == 0) {
2065 return;
2066 }
2067
2068 r = uv_timer_start(&sock->timer, isc__nmsocket_readtimeout_cb,
2069 sock->read_timeout, 0);
2070 }
2071
2072 RUNTIME_CHECK(r == 0);
2073 }
2074
2075 bool
isc__nmsocket_timer_running(isc_nmsocket_t * sock)2076 isc__nmsocket_timer_running(isc_nmsocket_t *sock) {
2077 REQUIRE(VALID_NMSOCK(sock));
2078
2079 return (uv_is_active((uv_handle_t *)&sock->timer));
2080 }
2081
2082 void
isc__nmsocket_timer_start(isc_nmsocket_t * sock)2083 isc__nmsocket_timer_start(isc_nmsocket_t *sock) {
2084 REQUIRE(VALID_NMSOCK(sock));
2085
2086 if (isc__nmsocket_timer_running(sock)) {
2087 return;
2088 }
2089
2090 isc__nmsocket_timer_restart(sock);
2091 }
2092
2093 void
isc__nmsocket_timer_stop(isc_nmsocket_t * sock)2094 isc__nmsocket_timer_stop(isc_nmsocket_t *sock) {
2095 REQUIRE(VALID_NMSOCK(sock));
2096
2097 /* uv_timer_stop() is idempotent, no need to check if running */
2098
2099 int r = uv_timer_stop(&sock->timer);
2100 RUNTIME_CHECK(r == 0);
2101 }
2102
2103 isc__nm_uvreq_t *
isc__nm_get_read_req(isc_nmsocket_t * sock,isc_sockaddr_t * sockaddr)2104 isc__nm_get_read_req(isc_nmsocket_t *sock, isc_sockaddr_t *sockaddr) {
2105 isc__nm_uvreq_t *req = NULL;
2106
2107 req = isc__nm_uvreq_get(sock->mgr, sock);
2108 req->cb.recv = sock->recv_cb;
2109 req->cbarg = sock->recv_cbarg;
2110
2111 switch (sock->type) {
2112 case isc_nm_tcpsocket:
2113 case isc_nm_tlssocket:
2114 isc_nmhandle_attach(sock->statichandle, &req->handle);
2115 break;
2116 default:
2117 if (atomic_load(&sock->client)) {
2118 isc_nmhandle_attach(sock->statichandle, &req->handle);
2119 } else {
2120 req->handle = isc__nmhandle_get(sock, sockaddr, NULL);
2121 }
2122 break;
2123 }
2124
2125 return (req);
2126 }
2127
2128 /*%<
2129 * Allocator for read operations. Limited to size 2^16.
2130 *
2131 * Note this doesn't actually allocate anything, it just assigns the
2132 * worker's receive buffer to a socket, and marks it as "in use".
2133 */
2134 void
isc__nm_alloc_cb(uv_handle_t * handle,size_t size,uv_buf_t * buf)2135 isc__nm_alloc_cb(uv_handle_t *handle, size_t size, uv_buf_t *buf) {
2136 isc_nmsocket_t *sock = uv_handle_get_data(handle);
2137 isc__networker_t *worker = NULL;
2138
2139 REQUIRE(VALID_NMSOCK(sock));
2140 REQUIRE(isc__nm_in_netthread());
2141
2142 switch (sock->type) {
2143 case isc_nm_udpsocket:
2144 REQUIRE(size <= ISC_NETMGR_RECVBUF_SIZE);
2145 size = ISC_NETMGR_RECVBUF_SIZE;
2146 break;
2147 case isc_nm_tcpsocket:
2148 case isc_nm_tcpdnssocket:
2149 break;
2150 case isc_nm_tlsdnssocket:
2151 /*
2152 * We need to limit the individual chunks to be read, so the
2153 * BIO_write() will always succeed and the consumed before the
2154 * next readcb is called.
2155 */
2156 if (size >= ISC_NETMGR_TLSBUF_SIZE) {
2157 size = ISC_NETMGR_TLSBUF_SIZE;
2158 }
2159 break;
2160 default:
2161 INSIST(0);
2162 ISC_UNREACHABLE();
2163 }
2164
2165 worker = &sock->mgr->workers[sock->tid];
2166 INSIST(!worker->recvbuf_inuse || sock->type == isc_nm_udpsocket);
2167
2168 buf->base = worker->recvbuf;
2169 buf->len = size;
2170 worker->recvbuf_inuse = true;
2171 }
2172
2173 void
isc__nm_start_reading(isc_nmsocket_t * sock)2174 isc__nm_start_reading(isc_nmsocket_t *sock) {
2175 int r;
2176
2177 if (atomic_load(&sock->reading)) {
2178 return;
2179 }
2180
2181 switch (sock->type) {
2182 case isc_nm_udpsocket:
2183 r = uv_udp_recv_start(&sock->uv_handle.udp, isc__nm_alloc_cb,
2184 isc__nm_udp_read_cb);
2185 break;
2186 case isc_nm_tcpsocket:
2187 r = uv_read_start(&sock->uv_handle.stream, isc__nm_alloc_cb,
2188 isc__nm_tcp_read_cb);
2189 break;
2190 case isc_nm_tcpdnssocket:
2191 r = uv_read_start(&sock->uv_handle.stream, isc__nm_alloc_cb,
2192 isc__nm_tcpdns_read_cb);
2193 break;
2194 case isc_nm_tlsdnssocket:
2195 r = uv_read_start(&sock->uv_handle.stream, isc__nm_alloc_cb,
2196 isc__nm_tlsdns_read_cb);
2197 break;
2198 default:
2199 INSIST(0);
2200 ISC_UNREACHABLE();
2201 }
2202 RUNTIME_CHECK(r == 0);
2203 atomic_store(&sock->reading, true);
2204 }
2205
2206 void
isc__nm_stop_reading(isc_nmsocket_t * sock)2207 isc__nm_stop_reading(isc_nmsocket_t *sock) {
2208 int r;
2209
2210 if (!atomic_load(&sock->reading)) {
2211 return;
2212 }
2213
2214 switch (sock->type) {
2215 case isc_nm_udpsocket:
2216 r = uv_udp_recv_stop(&sock->uv_handle.udp);
2217 break;
2218 case isc_nm_tcpsocket:
2219 case isc_nm_tcpdnssocket:
2220 case isc_nm_tlsdnssocket:
2221 r = uv_read_stop(&sock->uv_handle.stream);
2222 break;
2223 default:
2224 INSIST(0);
2225 ISC_UNREACHABLE();
2226 }
2227 RUNTIME_CHECK(r == 0);
2228 atomic_store(&sock->reading, false);
2229 }
2230
2231 bool
isc__nm_closing(isc_nmsocket_t * sock)2232 isc__nm_closing(isc_nmsocket_t *sock) {
2233 return (atomic_load(&sock->mgr->closing));
2234 }
2235
2236 bool
isc__nmsocket_closing(isc_nmsocket_t * sock)2237 isc__nmsocket_closing(isc_nmsocket_t *sock) {
2238 return (!isc__nmsocket_active(sock) || atomic_load(&sock->closing) ||
2239 isc__nm_closing(sock) ||
2240 (sock->server != NULL && !isc__nmsocket_active(sock->server)));
2241 }
2242
2243 static isc_result_t
processbuffer(isc_nmsocket_t * sock)2244 processbuffer(isc_nmsocket_t *sock) {
2245 switch (sock->type) {
2246 case isc_nm_tcpdnssocket:
2247 return (isc__nm_tcpdns_processbuffer(sock));
2248 case isc_nm_tlsdnssocket:
2249 return (isc__nm_tlsdns_processbuffer(sock));
2250 default:
2251 INSIST(0);
2252 ISC_UNREACHABLE();
2253 }
2254 }
2255
2256 /*
2257 * Process a DNS message.
2258 *
2259 * If we only have an incomplete DNS message, we don't touch any
2260 * timers. If we do have a full message, reset the timer.
2261 *
2262 * Stop reading if this is a client socket, or if the server socket
2263 * has been set to sequential mode, or the number of queries we are
2264 * processing simultaneously has reached the clients-per-connection
2265 * limit. In this case we'll be called again later by
2266 * isc__nm_resume_processing().
2267 */
2268 void
isc__nm_process_sock_buffer(isc_nmsocket_t * sock)2269 isc__nm_process_sock_buffer(isc_nmsocket_t *sock) {
2270 for (;;) {
2271 int_fast32_t ah = atomic_load(&sock->ah);
2272 isc_result_t result = processbuffer(sock);
2273 switch (result) {
2274 case ISC_R_NOMORE:
2275 /*
2276 * Don't reset the timer until we have a
2277 * full DNS message.
2278 */
2279 isc__nm_start_reading(sock);
2280 /*
2281 * Start the timer only if there are no externally used
2282 * active handles, there's always one active handle
2283 * attached internally to sock->recv_handle in
2284 * accept_connection()
2285 */
2286 if (ah == 1) {
2287 isc__nmsocket_timer_start(sock);
2288 }
2289 return;
2290 case ISC_R_CANCELED:
2291 isc__nmsocket_timer_stop(sock);
2292 isc__nm_stop_reading(sock);
2293 return;
2294 case ISC_R_SUCCESS:
2295 /*
2296 * Stop the timer on the successful message read, this
2297 * also allows to restart the timer when we have no more
2298 * data.
2299 */
2300 isc__nmsocket_timer_stop(sock);
2301
2302 if (atomic_load(&sock->client) ||
2303 atomic_load(&sock->sequential) ||
2304 ah >= STREAM_CLIENTS_PER_CONN)
2305 {
2306 isc__nm_stop_reading(sock);
2307 return;
2308 }
2309 break;
2310 default:
2311 INSIST(0);
2312 }
2313 }
2314 }
2315
2316 void
isc__nm_resume_processing(void * arg)2317 isc__nm_resume_processing(void *arg) {
2318 isc_nmsocket_t *sock = (isc_nmsocket_t *)arg;
2319
2320 REQUIRE(VALID_NMSOCK(sock));
2321 REQUIRE(sock->tid == isc_nm_tid());
2322 REQUIRE(!atomic_load(&sock->client));
2323
2324 if (isc__nmsocket_closing(sock)) {
2325 return;
2326 }
2327
2328 isc__nm_process_sock_buffer(sock);
2329 }
2330
2331 void
isc_nmhandle_cleartimeout(isc_nmhandle_t * handle)2332 isc_nmhandle_cleartimeout(isc_nmhandle_t *handle) {
2333 REQUIRE(VALID_NMHANDLE(handle));
2334 REQUIRE(VALID_NMSOCK(handle->sock));
2335
2336 switch (handle->sock->type) {
2337 #if HAVE_LIBNGHTTP2
2338 case isc_nm_httpsocket:
2339 isc__nm_http_cleartimeout(handle);
2340 return;
2341 case isc_nm_tlssocket:
2342 isc__nm_tls_cleartimeout(handle);
2343 return;
2344 #endif
2345 default:
2346 handle->sock->read_timeout = 0;
2347
2348 if (uv_is_active((uv_handle_t *)&handle->sock->timer)) {
2349 isc__nmsocket_timer_stop(handle->sock);
2350 }
2351 }
2352 }
2353
2354 void
isc_nmhandle_settimeout(isc_nmhandle_t * handle,uint32_t timeout)2355 isc_nmhandle_settimeout(isc_nmhandle_t *handle, uint32_t timeout) {
2356 REQUIRE(VALID_NMHANDLE(handle));
2357 REQUIRE(VALID_NMSOCK(handle->sock));
2358
2359 switch (handle->sock->type) {
2360 #if HAVE_LIBNGHTTP2
2361 case isc_nm_httpsocket:
2362 isc__nm_http_settimeout(handle, timeout);
2363 return;
2364 case isc_nm_tlssocket:
2365 isc__nm_tls_settimeout(handle, timeout);
2366 return;
2367 #endif
2368 default:
2369 handle->sock->read_timeout = timeout;
2370 isc__nmsocket_timer_restart(handle->sock);
2371 }
2372 }
2373
2374 void
isc_nmhandle_keepalive(isc_nmhandle_t * handle,bool value)2375 isc_nmhandle_keepalive(isc_nmhandle_t *handle, bool value) {
2376 isc_nmsocket_t *sock = NULL;
2377
2378 REQUIRE(VALID_NMHANDLE(handle));
2379 REQUIRE(VALID_NMSOCK(handle->sock));
2380
2381 sock = handle->sock;
2382
2383 switch (sock->type) {
2384 case isc_nm_tcpsocket:
2385 case isc_nm_tcpdnssocket:
2386 case isc_nm_tlsdnssocket:
2387 atomic_store(&sock->keepalive, value);
2388 sock->read_timeout = value ? atomic_load(&sock->mgr->keepalive)
2389 : atomic_load(&sock->mgr->idle);
2390 break;
2391 #if HAVE_LIBNGHTTP2
2392 case isc_nm_tlssocket:
2393 isc__nmhandle_tls_keepalive(handle, value);
2394 break;
2395 case isc_nm_httpsocket:
2396 isc__nmhandle_http_keepalive(handle, value);
2397 break;
2398 #endif /* HAVE_LIBNGHTTP2 */
2399 default:
2400 /*
2401 * For any other protocol, this is a no-op.
2402 */
2403 return;
2404 }
2405 }
2406
2407 bool
isc_nmhandle_timer_running(isc_nmhandle_t * handle)2408 isc_nmhandle_timer_running(isc_nmhandle_t *handle) {
2409 REQUIRE(VALID_NMHANDLE(handle));
2410 REQUIRE(VALID_NMSOCK(handle->sock));
2411
2412 return (isc__nmsocket_timer_running(handle->sock));
2413 }
2414
2415 void *
isc_nmhandle_getextra(isc_nmhandle_t * handle)2416 isc_nmhandle_getextra(isc_nmhandle_t *handle) {
2417 REQUIRE(VALID_NMHANDLE(handle));
2418
2419 return (handle->extra);
2420 }
2421
2422 isc_sockaddr_t
isc_nmhandle_peeraddr(isc_nmhandle_t * handle)2423 isc_nmhandle_peeraddr(isc_nmhandle_t *handle) {
2424 REQUIRE(VALID_NMHANDLE(handle));
2425
2426 return (handle->peer);
2427 }
2428
2429 isc_sockaddr_t
isc_nmhandle_localaddr(isc_nmhandle_t * handle)2430 isc_nmhandle_localaddr(isc_nmhandle_t *handle) {
2431 REQUIRE(VALID_NMHANDLE(handle));
2432
2433 return (handle->local);
2434 }
2435
2436 isc_nm_t *
isc_nmhandle_netmgr(isc_nmhandle_t * handle)2437 isc_nmhandle_netmgr(isc_nmhandle_t *handle) {
2438 REQUIRE(VALID_NMHANDLE(handle));
2439 REQUIRE(VALID_NMSOCK(handle->sock));
2440
2441 return (handle->sock->mgr);
2442 }
2443
2444 isc__nm_uvreq_t *
isc___nm_uvreq_get(isc_nm_t * mgr,isc_nmsocket_t * sock FLARG)2445 isc___nm_uvreq_get(isc_nm_t *mgr, isc_nmsocket_t *sock FLARG) {
2446 isc__nm_uvreq_t *req = NULL;
2447
2448 REQUIRE(VALID_NM(mgr));
2449 REQUIRE(VALID_NMSOCK(sock));
2450
2451 if (sock != NULL && isc__nmsocket_active(sock)) {
2452 /* Try to reuse one */
2453 req = isc_astack_pop(sock->inactivereqs);
2454 }
2455
2456 if (req == NULL) {
2457 req = isc_mem_get(mgr->mctx, sizeof(*req));
2458 }
2459
2460 *req = (isc__nm_uvreq_t){ .magic = 0 };
2461 ISC_LINK_INIT(req, link);
2462 req->uv_req.req.data = req;
2463 isc___nmsocket_attach(sock, &req->sock FLARG_PASS);
2464 req->magic = UVREQ_MAGIC;
2465
2466 return (req);
2467 }
2468
2469 void
isc___nm_uvreq_put(isc__nm_uvreq_t ** req0,isc_nmsocket_t * sock FLARG)2470 isc___nm_uvreq_put(isc__nm_uvreq_t **req0, isc_nmsocket_t *sock FLARG) {
2471 isc__nm_uvreq_t *req = NULL;
2472 isc_nmhandle_t *handle = NULL;
2473
2474 REQUIRE(req0 != NULL);
2475 REQUIRE(VALID_UVREQ(*req0));
2476
2477 req = *req0;
2478 *req0 = NULL;
2479
2480 INSIST(sock == req->sock);
2481
2482 req->magic = 0;
2483
2484 /*
2485 * We need to save this first to make sure that handle,
2486 * sock, and the netmgr won't all disappear.
2487 */
2488 handle = req->handle;
2489 req->handle = NULL;
2490
2491 if (!isc__nmsocket_active(sock) ||
2492 !isc_astack_trypush(sock->inactivereqs, req)) {
2493 isc_mem_put(sock->mgr->mctx, req, sizeof(*req));
2494 }
2495
2496 if (handle != NULL) {
2497 isc__nmhandle_detach(&handle FLARG_PASS);
2498 }
2499
2500 isc___nmsocket_detach(&sock FLARG_PASS);
2501 }
2502
2503 void
isc_nm_send(isc_nmhandle_t * handle,isc_region_t * region,isc_nm_cb_t cb,void * cbarg)2504 isc_nm_send(isc_nmhandle_t *handle, isc_region_t *region, isc_nm_cb_t cb,
2505 void *cbarg) {
2506 REQUIRE(VALID_NMHANDLE(handle));
2507
2508 switch (handle->sock->type) {
2509 case isc_nm_udpsocket:
2510 case isc_nm_udplistener:
2511 isc__nm_udp_send(handle, region, cb, cbarg);
2512 break;
2513 case isc_nm_tcpsocket:
2514 isc__nm_tcp_send(handle, region, cb, cbarg);
2515 break;
2516 case isc_nm_tcpdnssocket:
2517 isc__nm_tcpdns_send(handle, region, cb, cbarg);
2518 break;
2519 case isc_nm_tlsdnssocket:
2520 isc__nm_tlsdns_send(handle, region, cb, cbarg);
2521 break;
2522 #if HAVE_LIBNGHTTP2
2523 case isc_nm_tlssocket:
2524 isc__nm_tls_send(handle, region, cb, cbarg);
2525 break;
2526 case isc_nm_httpsocket:
2527 isc__nm_http_send(handle, region, cb, cbarg);
2528 break;
2529 #endif
2530 default:
2531 INSIST(0);
2532 ISC_UNREACHABLE();
2533 }
2534 }
2535
2536 void
isc_nm_read(isc_nmhandle_t * handle,isc_nm_recv_cb_t cb,void * cbarg)2537 isc_nm_read(isc_nmhandle_t *handle, isc_nm_recv_cb_t cb, void *cbarg) {
2538 REQUIRE(VALID_NMHANDLE(handle));
2539
2540 switch (handle->sock->type) {
2541 case isc_nm_udpsocket:
2542 isc__nm_udp_read(handle, cb, cbarg);
2543 break;
2544 case isc_nm_tcpsocket:
2545 isc__nm_tcp_read(handle, cb, cbarg);
2546 break;
2547 case isc_nm_tcpdnssocket:
2548 isc__nm_tcpdns_read(handle, cb, cbarg);
2549 break;
2550 case isc_nm_tlsdnssocket:
2551 isc__nm_tlsdns_read(handle, cb, cbarg);
2552 break;
2553 #if HAVE_LIBNGHTTP2
2554 case isc_nm_tlssocket:
2555 isc__nm_tls_read(handle, cb, cbarg);
2556 break;
2557 case isc_nm_httpsocket:
2558 isc__nm_http_read(handle, cb, cbarg);
2559 break;
2560 #endif
2561 default:
2562 INSIST(0);
2563 ISC_UNREACHABLE();
2564 }
2565 }
2566
2567 void
isc_nm_cancelread(isc_nmhandle_t * handle)2568 isc_nm_cancelread(isc_nmhandle_t *handle) {
2569 REQUIRE(VALID_NMHANDLE(handle));
2570
2571 switch (handle->sock->type) {
2572 case isc_nm_udpsocket:
2573 isc__nm_udp_cancelread(handle);
2574 break;
2575 case isc_nm_tcpsocket:
2576 isc__nm_tcp_cancelread(handle);
2577 break;
2578 case isc_nm_tcpdnssocket:
2579 isc__nm_tcpdns_cancelread(handle);
2580 break;
2581 case isc_nm_tlsdnssocket:
2582 isc__nm_tlsdns_cancelread(handle);
2583 break;
2584 #if HAVE_LIBNGHTTP2
2585 case isc_nm_tlssocket:
2586 isc__nm_tls_cancelread(handle);
2587 break;
2588 #endif
2589 default:
2590 INSIST(0);
2591 ISC_UNREACHABLE();
2592 }
2593 }
2594
2595 void
isc_nm_pauseread(isc_nmhandle_t * handle)2596 isc_nm_pauseread(isc_nmhandle_t *handle) {
2597 REQUIRE(VALID_NMHANDLE(handle));
2598
2599 isc_nmsocket_t *sock = handle->sock;
2600
2601 switch (sock->type) {
2602 case isc_nm_tcpsocket:
2603 isc__nm_tcp_pauseread(handle);
2604 break;
2605 #if HAVE_LIBNGHTTP2
2606 case isc_nm_tlssocket:
2607 isc__nm_tls_pauseread(handle);
2608 break;
2609 #endif
2610 default:
2611 INSIST(0);
2612 ISC_UNREACHABLE();
2613 }
2614 }
2615
2616 void
isc_nm_resumeread(isc_nmhandle_t * handle)2617 isc_nm_resumeread(isc_nmhandle_t *handle) {
2618 REQUIRE(VALID_NMHANDLE(handle));
2619
2620 isc_nmsocket_t *sock = handle->sock;
2621
2622 switch (sock->type) {
2623 case isc_nm_tcpsocket:
2624 isc__nm_tcp_resumeread(handle);
2625 break;
2626 #if HAVE_LIBNGHTTP2
2627 case isc_nm_tlssocket:
2628 isc__nm_tls_resumeread(handle);
2629 break;
2630 #endif
2631 default:
2632 INSIST(0);
2633 ISC_UNREACHABLE();
2634 }
2635 }
2636
2637 void
isc_nm_stoplistening(isc_nmsocket_t * sock)2638 isc_nm_stoplistening(isc_nmsocket_t *sock) {
2639 REQUIRE(VALID_NMSOCK(sock));
2640
2641 switch (sock->type) {
2642 case isc_nm_udplistener:
2643 isc__nm_udp_stoplistening(sock);
2644 break;
2645 case isc_nm_tcpdnslistener:
2646 isc__nm_tcpdns_stoplistening(sock);
2647 break;
2648 case isc_nm_tcplistener:
2649 isc__nm_tcp_stoplistening(sock);
2650 break;
2651 case isc_nm_tlsdnslistener:
2652 isc__nm_tlsdns_stoplistening(sock);
2653 break;
2654 #if HAVE_LIBNGHTTP2
2655 case isc_nm_tlslistener:
2656 isc__nm_tls_stoplistening(sock);
2657 break;
2658 case isc_nm_httplistener:
2659 isc__nm_http_stoplistening(sock);
2660 break;
2661 #endif
2662 default:
2663 INSIST(0);
2664 ISC_UNREACHABLE();
2665 }
2666 }
2667
2668 void
isc__nm_connectcb(isc_nmsocket_t * sock,isc__nm_uvreq_t * uvreq,isc_result_t eresult,bool async)2669 isc__nm_connectcb(isc_nmsocket_t *sock, isc__nm_uvreq_t *uvreq,
2670 isc_result_t eresult, bool async) {
2671 REQUIRE(VALID_NMSOCK(sock));
2672 REQUIRE(VALID_UVREQ(uvreq));
2673 REQUIRE(VALID_NMHANDLE(uvreq->handle));
2674
2675 if (!async) {
2676 isc__netievent_connectcb_t ievent = { .sock = sock,
2677 .req = uvreq,
2678 .result = eresult };
2679 isc__nm_async_connectcb(NULL, (isc__netievent_t *)&ievent);
2680 } else {
2681 isc__netievent_connectcb_t *ievent =
2682 isc__nm_get_netievent_connectcb(sock->mgr, sock, uvreq,
2683 eresult);
2684 isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid],
2685 (isc__netievent_t *)ievent);
2686 }
2687 }
2688
2689 void
isc__nm_async_connectcb(isc__networker_t * worker,isc__netievent_t * ev0)2690 isc__nm_async_connectcb(isc__networker_t *worker, isc__netievent_t *ev0) {
2691 isc__netievent_connectcb_t *ievent = (isc__netievent_connectcb_t *)ev0;
2692 isc_nmsocket_t *sock = ievent->sock;
2693 isc__nm_uvreq_t *uvreq = ievent->req;
2694 isc_result_t eresult = ievent->result;
2695
2696 UNUSED(worker);
2697
2698 REQUIRE(VALID_NMSOCK(sock));
2699 REQUIRE(VALID_UVREQ(uvreq));
2700 REQUIRE(VALID_NMHANDLE(uvreq->handle));
2701 REQUIRE(ievent->sock->tid == isc_nm_tid());
2702 REQUIRE(uvreq->cb.connect != NULL);
2703
2704 uvreq->cb.connect(uvreq->handle, eresult, uvreq->cbarg);
2705
2706 isc__nm_uvreq_put(&uvreq, sock);
2707 }
2708
2709 void
isc__nm_readcb(isc_nmsocket_t * sock,isc__nm_uvreq_t * uvreq,isc_result_t eresult)2710 isc__nm_readcb(isc_nmsocket_t *sock, isc__nm_uvreq_t *uvreq,
2711 isc_result_t eresult) {
2712 REQUIRE(VALID_NMSOCK(sock));
2713 REQUIRE(VALID_UVREQ(uvreq));
2714 REQUIRE(VALID_NMHANDLE(uvreq->handle));
2715
2716 if (eresult == ISC_R_SUCCESS || eresult == ISC_R_TIMEDOUT) {
2717 isc__netievent_readcb_t ievent = { .sock = sock,
2718 .req = uvreq,
2719 .result = eresult };
2720
2721 isc__nm_async_readcb(NULL, (isc__netievent_t *)&ievent);
2722 } else {
2723 isc__netievent_readcb_t *ievent = isc__nm_get_netievent_readcb(
2724 sock->mgr, sock, uvreq, eresult);
2725 isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid],
2726 (isc__netievent_t *)ievent);
2727 }
2728 }
2729
2730 void
isc__nm_async_readcb(isc__networker_t * worker,isc__netievent_t * ev0)2731 isc__nm_async_readcb(isc__networker_t *worker, isc__netievent_t *ev0) {
2732 isc__netievent_readcb_t *ievent = (isc__netievent_readcb_t *)ev0;
2733 isc_nmsocket_t *sock = ievent->sock;
2734 isc__nm_uvreq_t *uvreq = ievent->req;
2735 isc_result_t eresult = ievent->result;
2736 isc_region_t region;
2737
2738 UNUSED(worker);
2739
2740 REQUIRE(VALID_NMSOCK(sock));
2741 REQUIRE(VALID_UVREQ(uvreq));
2742 REQUIRE(VALID_NMHANDLE(uvreq->handle));
2743 REQUIRE(sock->tid == isc_nm_tid());
2744
2745 region.base = (unsigned char *)uvreq->uvbuf.base;
2746 region.length = uvreq->uvbuf.len;
2747
2748 uvreq->cb.recv(uvreq->handle, eresult, ®ion, uvreq->cbarg);
2749
2750 isc__nm_uvreq_put(&uvreq, sock);
2751 }
2752
2753 void
isc__nm_sendcb(isc_nmsocket_t * sock,isc__nm_uvreq_t * uvreq,isc_result_t eresult,bool async)2754 isc__nm_sendcb(isc_nmsocket_t *sock, isc__nm_uvreq_t *uvreq,
2755 isc_result_t eresult, bool async) {
2756 REQUIRE(VALID_NMSOCK(sock));
2757 REQUIRE(VALID_UVREQ(uvreq));
2758 REQUIRE(VALID_NMHANDLE(uvreq->handle));
2759
2760 if (!async) {
2761 isc__netievent_sendcb_t ievent = { .sock = sock,
2762 .req = uvreq,
2763 .result = eresult };
2764 isc__nm_async_sendcb(NULL, (isc__netievent_t *)&ievent);
2765 return;
2766 }
2767
2768 isc__netievent_sendcb_t *ievent =
2769 isc__nm_get_netievent_sendcb(sock->mgr, sock, uvreq, eresult);
2770 isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid],
2771 (isc__netievent_t *)ievent);
2772 }
2773
2774 void
isc__nm_async_sendcb(isc__networker_t * worker,isc__netievent_t * ev0)2775 isc__nm_async_sendcb(isc__networker_t *worker, isc__netievent_t *ev0) {
2776 isc__netievent_sendcb_t *ievent = (isc__netievent_sendcb_t *)ev0;
2777 isc_nmsocket_t *sock = ievent->sock;
2778 isc__nm_uvreq_t *uvreq = ievent->req;
2779 isc_result_t eresult = ievent->result;
2780
2781 UNUSED(worker);
2782
2783 REQUIRE(VALID_NMSOCK(sock));
2784 REQUIRE(VALID_UVREQ(uvreq));
2785 REQUIRE(VALID_NMHANDLE(uvreq->handle));
2786 REQUIRE(sock->tid == isc_nm_tid());
2787
2788 uvreq->cb.send(uvreq->handle, eresult, uvreq->cbarg);
2789
2790 isc__nm_uvreq_put(&uvreq, sock);
2791 }
2792
2793 static void
isc__nm_async_close(isc__networker_t * worker,isc__netievent_t * ev0)2794 isc__nm_async_close(isc__networker_t *worker, isc__netievent_t *ev0) {
2795 isc__netievent_close_t *ievent = (isc__netievent_close_t *)ev0;
2796 isc_nmsocket_t *sock = ievent->sock;
2797
2798 REQUIRE(VALID_NMSOCK(ievent->sock));
2799 REQUIRE(sock->tid == isc_nm_tid());
2800 REQUIRE(sock->closehandle_cb != NULL);
2801
2802 UNUSED(worker);
2803
2804 ievent->sock->closehandle_cb(sock);
2805 }
2806
2807 void
isc__nm_async_detach(isc__networker_t * worker,isc__netievent_t * ev0)2808 isc__nm_async_detach(isc__networker_t *worker, isc__netievent_t *ev0) {
2809 isc__netievent_detach_t *ievent = (isc__netievent_detach_t *)ev0;
2810 FLARG_IEVENT(ievent);
2811
2812 REQUIRE(VALID_NMSOCK(ievent->sock));
2813 REQUIRE(VALID_NMHANDLE(ievent->handle));
2814 REQUIRE(ievent->sock->tid == isc_nm_tid());
2815
2816 UNUSED(worker);
2817
2818 nmhandle_detach_cb(&ievent->handle FLARG_PASS);
2819 }
2820
2821 void
isc__nmsocket_shutdown(isc_nmsocket_t * sock)2822 isc__nmsocket_shutdown(isc_nmsocket_t *sock) {
2823 REQUIRE(VALID_NMSOCK(sock));
2824 switch (sock->type) {
2825 case isc_nm_udpsocket:
2826 isc__nm_udp_shutdown(sock);
2827 break;
2828 case isc_nm_tcpsocket:
2829 isc__nm_tcp_shutdown(sock);
2830 break;
2831 case isc_nm_tcpdnssocket:
2832 isc__nm_tcpdns_shutdown(sock);
2833 break;
2834 case isc_nm_tlsdnssocket:
2835 isc__nm_tlsdns_shutdown(sock);
2836 break;
2837 case isc_nm_udplistener:
2838 case isc_nm_tcplistener:
2839 case isc_nm_tcpdnslistener:
2840 case isc_nm_tlsdnslistener:
2841 return;
2842 default:
2843 INSIST(0);
2844 ISC_UNREACHABLE();
2845 }
2846 }
2847
2848 static void
shutdown_walk_cb(uv_handle_t * handle,void * arg)2849 shutdown_walk_cb(uv_handle_t *handle, void *arg) {
2850 isc_nmsocket_t *sock = uv_handle_get_data(handle);
2851 UNUSED(arg);
2852
2853 if (uv_is_closing(handle)) {
2854 return;
2855 }
2856
2857 switch (handle->type) {
2858 case UV_UDP:
2859 case UV_TCP:
2860 break;
2861 default:
2862 return;
2863 }
2864
2865 isc__nmsocket_shutdown(sock);
2866 }
2867
2868 void
isc__nm_async_shutdown(isc__networker_t * worker,isc__netievent_t * ev0)2869 isc__nm_async_shutdown(isc__networker_t *worker, isc__netievent_t *ev0) {
2870 UNUSED(ev0);
2871 uv_walk(&worker->loop, shutdown_walk_cb, NULL);
2872 }
2873
2874 bool
isc__nm_acquire_interlocked(isc_nm_t * mgr)2875 isc__nm_acquire_interlocked(isc_nm_t *mgr) {
2876 if (!isc__nm_in_netthread()) {
2877 return (false);
2878 }
2879
2880 LOCK(&mgr->lock);
2881 bool success = atomic_compare_exchange_strong(
2882 &mgr->interlocked, &(int){ ISC_NETMGR_NON_INTERLOCKED },
2883 isc_nm_tid());
2884
2885 UNLOCK(&mgr->lock);
2886 return (success);
2887 }
2888
2889 void
isc__nm_drop_interlocked(isc_nm_t * mgr)2890 isc__nm_drop_interlocked(isc_nm_t *mgr) {
2891 if (!isc__nm_in_netthread()) {
2892 return;
2893 }
2894
2895 LOCK(&mgr->lock);
2896 int tid = atomic_exchange(&mgr->interlocked,
2897 ISC_NETMGR_NON_INTERLOCKED);
2898 INSIST(tid != ISC_NETMGR_NON_INTERLOCKED);
2899 BROADCAST(&mgr->wkstatecond);
2900 UNLOCK(&mgr->lock);
2901 }
2902
2903 void
isc__nm_acquire_interlocked_force(isc_nm_t * mgr)2904 isc__nm_acquire_interlocked_force(isc_nm_t *mgr) {
2905 if (!isc__nm_in_netthread()) {
2906 return;
2907 }
2908
2909 LOCK(&mgr->lock);
2910 while (!atomic_compare_exchange_strong(
2911 &mgr->interlocked, &(int){ ISC_NETMGR_NON_INTERLOCKED },
2912 isc_nm_tid()))
2913 {
2914 WAIT(&mgr->wkstatecond, &mgr->lock);
2915 }
2916 UNLOCK(&mgr->lock);
2917 }
2918
2919 void
isc_nm_setstats(isc_nm_t * mgr,isc_stats_t * stats)2920 isc_nm_setstats(isc_nm_t *mgr, isc_stats_t *stats) {
2921 REQUIRE(VALID_NM(mgr));
2922 REQUIRE(mgr->stats == NULL);
2923 REQUIRE(isc_stats_ncounters(stats) == isc_sockstatscounter_max);
2924
2925 isc_stats_attach(stats, &mgr->stats);
2926 }
2927
2928 void
isc__nm_incstats(isc_nm_t * mgr,isc_statscounter_t counterid)2929 isc__nm_incstats(isc_nm_t *mgr, isc_statscounter_t counterid) {
2930 REQUIRE(VALID_NM(mgr));
2931 REQUIRE(counterid != -1);
2932
2933 if (mgr->stats != NULL) {
2934 isc_stats_increment(mgr->stats, counterid);
2935 }
2936 }
2937
2938 void
isc__nm_decstats(isc_nm_t * mgr,isc_statscounter_t counterid)2939 isc__nm_decstats(isc_nm_t *mgr, isc_statscounter_t counterid) {
2940 REQUIRE(VALID_NM(mgr));
2941 REQUIRE(counterid != -1);
2942
2943 if (mgr->stats != NULL) {
2944 isc_stats_decrement(mgr->stats, counterid);
2945 }
2946 }
2947
2948 isc_result_t
isc__nm_socket(int domain,int type,int protocol,uv_os_sock_t * sockp)2949 isc__nm_socket(int domain, int type, int protocol, uv_os_sock_t *sockp) {
2950 int sock = socket(domain, type, protocol);
2951 if (sock < 0) {
2952 return (isc_errno_toresult(errno));
2953 }
2954
2955 *sockp = (uv_os_sock_t)sock;
2956 return (ISC_R_SUCCESS);
2957 }
2958
2959 void
isc__nm_closesocket(uv_os_sock_t sock)2960 isc__nm_closesocket(uv_os_sock_t sock) {
2961 close(sock);
2962 }
2963
2964 #define setsockopt_on(socket, level, name) \
2965 setsockopt(socket, level, name, &(int){ 1 }, sizeof(int))
2966
2967 #define setsockopt_off(socket, level, name) \
2968 setsockopt(socket, level, name, &(int){ 0 }, sizeof(int))
2969
2970 isc_result_t
isc__nm_socket_freebind(uv_os_sock_t fd,sa_family_t sa_family)2971 isc__nm_socket_freebind(uv_os_sock_t fd, sa_family_t sa_family) {
2972 /*
2973 * Set the IP_FREEBIND (or equivalent option) on the uv_handle.
2974 */
2975 #ifdef IP_FREEBIND
2976 UNUSED(sa_family);
2977 if (setsockopt_on(fd, IPPROTO_IP, IP_FREEBIND) == -1) {
2978 return (ISC_R_FAILURE);
2979 }
2980 return (ISC_R_SUCCESS);
2981 #elif defined(IP_BINDANY) || defined(IPV6_BINDANY)
2982 if (sa_family == AF_INET) {
2983 #if defined(IP_BINDANY)
2984 if (setsockopt_on(fd, IPPROTO_IP, IP_BINDANY) == -1) {
2985 return (ISC_R_FAILURE);
2986 }
2987 return (ISC_R_SUCCESS);
2988 #endif
2989 } else if (sa_family == AF_INET6) {
2990 #if defined(IPV6_BINDANY)
2991 if (setsockopt_on(fd, IPPROTO_IPV6, IPV6_BINDANY) == -1) {
2992 return (ISC_R_FAILURE);
2993 }
2994 return (ISC_R_SUCCESS);
2995 #endif
2996 }
2997 return (ISC_R_NOTIMPLEMENTED);
2998 #elif defined(SO_BINDANY)
2999 UNUSED(sa_family);
3000 if (setsockopt_on(fd, SOL_SOCKET, SO_BINDANY) == -1) {
3001 return (ISC_R_FAILURE);
3002 }
3003 return (ISC_R_SUCCESS);
3004 #else
3005 UNUSED(fd);
3006 UNUSED(sa_family);
3007 return (ISC_R_NOTIMPLEMENTED);
3008 #endif
3009 }
3010
3011 isc_result_t
isc__nm_socket_reuse(uv_os_sock_t fd)3012 isc__nm_socket_reuse(uv_os_sock_t fd) {
3013 /*
3014 * Generally, the SO_REUSEADDR socket option allows reuse of
3015 * local addresses.
3016 *
3017 * On the BSDs, SO_REUSEPORT implies SO_REUSEADDR but with some
3018 * additional refinements for programs that use multicast.
3019 *
3020 * On Linux, SO_REUSEPORT has different semantics: it _shares_ the port
3021 * rather than steal it from the current listener, so we don't use it
3022 * here, but rather in isc__nm_socket_reuse_lb().
3023 *
3024 * On Windows, it also allows a socket to forcibly bind to a port in use
3025 * by another socket.
3026 */
3027
3028 #if defined(SO_REUSEPORT) && !defined(__linux__)
3029 if (setsockopt_on(fd, SOL_SOCKET, SO_REUSEPORT) == -1) {
3030 return (ISC_R_FAILURE);
3031 }
3032 return (ISC_R_SUCCESS);
3033 #elif defined(SO_REUSEADDR)
3034 if (setsockopt_on(fd, SOL_SOCKET, SO_REUSEADDR) == -1) {
3035 return (ISC_R_FAILURE);
3036 }
3037 return (ISC_R_SUCCESS);
3038 #else
3039 UNUSED(fd);
3040 return (ISC_R_NOTIMPLEMENTED);
3041 #endif
3042 }
3043
3044 isc_result_t
isc__nm_socket_reuse_lb(uv_os_sock_t fd)3045 isc__nm_socket_reuse_lb(uv_os_sock_t fd) {
3046 /*
3047 * On FreeBSD 12+, SO_REUSEPORT_LB socket option allows sockets to be
3048 * bound to an identical socket address. For UDP sockets, the use of
3049 * this option can provide better distribution of incoming datagrams to
3050 * multiple processes (or threads) as compared to the traditional
3051 * technique of having multiple processes compete to receive datagrams
3052 * on the same socket.
3053 *
3054 * On Linux, the same thing is achieved simply with SO_REUSEPORT.
3055 */
3056 #if defined(SO_REUSEPORT_LB)
3057 if (setsockopt_on(fd, SOL_SOCKET, SO_REUSEPORT_LB) == -1) {
3058 return (ISC_R_FAILURE);
3059 } else {
3060 return (ISC_R_SUCCESS);
3061 }
3062 #elif defined(SO_REUSEPORT) && defined(__linux__)
3063 if (setsockopt_on(fd, SOL_SOCKET, SO_REUSEPORT) == -1) {
3064 return (ISC_R_FAILURE);
3065 } else {
3066 return (ISC_R_SUCCESS);
3067 }
3068 #else
3069 UNUSED(fd);
3070 return (ISC_R_NOTIMPLEMENTED);
3071 #endif
3072 }
3073
3074 isc_result_t
isc__nm_socket_incoming_cpu(uv_os_sock_t fd)3075 isc__nm_socket_incoming_cpu(uv_os_sock_t fd) {
3076 #ifdef SO_INCOMING_CPU
3077 if (setsockopt_on(fd, SOL_SOCKET, SO_INCOMING_CPU) == -1) {
3078 return (ISC_R_FAILURE);
3079 } else {
3080 return (ISC_R_SUCCESS);
3081 }
3082 #else
3083 UNUSED(fd);
3084 #endif
3085 return (ISC_R_NOTIMPLEMENTED);
3086 }
3087
3088 isc_result_t
isc__nm_socket_disable_pmtud(uv_os_sock_t fd,sa_family_t sa_family)3089 isc__nm_socket_disable_pmtud(uv_os_sock_t fd, sa_family_t sa_family) {
3090 /*
3091 * Disable the Path MTU Discovery on IP packets
3092 */
3093 if (sa_family == AF_INET6) {
3094 #if defined(IPV6_DONTFRAG)
3095 if (setsockopt_off(fd, IPPROTO_IPV6, IPV6_DONTFRAG) == -1) {
3096 return (ISC_R_FAILURE);
3097 } else {
3098 return (ISC_R_SUCCESS);
3099 }
3100 #elif defined(IPV6_MTU_DISCOVER) && defined(IP_PMTUDISC_OMIT)
3101 if (setsockopt(fd, IPPROTO_IPV6, IPV6_MTU_DISCOVER,
3102 &(int){ IP_PMTUDISC_OMIT }, sizeof(int)) == -1)
3103 {
3104 return (ISC_R_FAILURE);
3105 } else {
3106 return (ISC_R_SUCCESS);
3107 }
3108 #else
3109 UNUSED(fd);
3110 #endif
3111 } else if (sa_family == AF_INET) {
3112 #if defined(IP_DONTFRAG)
3113 if (setsockopt_off(fd, IPPROTO_IP, IP_DONTFRAG) == -1) {
3114 return (ISC_R_FAILURE);
3115 } else {
3116 return (ISC_R_SUCCESS);
3117 }
3118 #elif defined(IP_MTU_DISCOVER) && defined(IP_PMTUDISC_OMIT)
3119 if (setsockopt(fd, IPPROTO_IP, IP_MTU_DISCOVER,
3120 &(int){ IP_PMTUDISC_OMIT }, sizeof(int)) == -1)
3121 {
3122 return (ISC_R_FAILURE);
3123 } else {
3124 return (ISC_R_SUCCESS);
3125 }
3126 #else
3127 UNUSED(fd);
3128 #endif
3129 } else {
3130 return (ISC_R_FAMILYNOSUPPORT);
3131 }
3132
3133 return (ISC_R_NOTIMPLEMENTED);
3134 }
3135
3136 isc_result_t
isc_nm_checkaddr(const isc_sockaddr_t * addr,isc_socktype_t type)3137 isc_nm_checkaddr(const isc_sockaddr_t *addr, isc_socktype_t type) {
3138 int proto, pf, addrlen, fd, r;
3139
3140 REQUIRE(addr != NULL);
3141
3142 switch (type) {
3143 case isc_socktype_tcp:
3144 proto = SOCK_STREAM;
3145 break;
3146 case isc_socktype_udp:
3147 proto = SOCK_DGRAM;
3148 break;
3149 default:
3150 return (ISC_R_NOTIMPLEMENTED);
3151 }
3152
3153 pf = isc_sockaddr_pf(addr);
3154 if (pf == AF_INET) {
3155 addrlen = sizeof(struct sockaddr_in);
3156 } else {
3157 addrlen = sizeof(struct sockaddr_in6);
3158 }
3159
3160 fd = socket(pf, proto, 0);
3161 if (fd < 0) {
3162 return (isc_errno_toresult(errno));
3163 }
3164
3165 r = bind(fd, (const struct sockaddr *)&addr->type.sa, addrlen);
3166 if (r < 0) {
3167 close(fd);
3168 return (isc_errno_toresult(errno));
3169 }
3170
3171 close(fd);
3172 return (ISC_R_SUCCESS);
3173 }
3174
3175 #if defined(TCP_CONNECTIONTIMEOUT)
3176 #define TIMEOUT_TYPE int
3177 #define TIMEOUT_DIV 1000
3178 #define TIMEOUT_OPTNAME TCP_CONNECTIONTIMEOUT
3179 #elif defined(TCP_RXT_CONNDROPTIME)
3180 #define TIMEOUT_TYPE int
3181 #define TIMEOUT_DIV 1000
3182 #define TIMEOUT_OPTNAME TCP_RXT_CONNDROPTIME
3183 #elif defined(TCP_USER_TIMEOUT)
3184 #define TIMEOUT_TYPE unsigned int
3185 #define TIMEOUT_DIV 1
3186 #define TIMEOUT_OPTNAME TCP_USER_TIMEOUT
3187 #elif defined(TCP_KEEPINIT)
3188 #define TIMEOUT_TYPE int
3189 #define TIMEOUT_DIV 1000
3190 #define TIMEOUT_OPTNAME TCP_KEEPINIT
3191 #endif
3192
3193 isc_result_t
isc__nm_socket_connectiontimeout(uv_os_sock_t fd,int timeout_ms)3194 isc__nm_socket_connectiontimeout(uv_os_sock_t fd, int timeout_ms) {
3195 #if defined(TIMEOUT_OPTNAME)
3196 TIMEOUT_TYPE timeout = timeout_ms / TIMEOUT_DIV;
3197
3198 if (timeout == 0) {
3199 timeout = 1;
3200 }
3201
3202 if (setsockopt(fd, IPPROTO_TCP, TIMEOUT_OPTNAME, &timeout,
3203 sizeof(timeout)) == -1)
3204 {
3205 return (ISC_R_FAILURE);
3206 }
3207
3208 return (ISC_R_SUCCESS);
3209 #else
3210 UNUSED(fd);
3211 UNUSED(timeout_ms);
3212
3213 return (ISC_R_SUCCESS);
3214 #endif
3215 }
3216
3217 isc_result_t
isc__nm_socket_tcp_nodelay(uv_os_sock_t fd)3218 isc__nm_socket_tcp_nodelay(uv_os_sock_t fd) {
3219 #ifdef TCP_NODELAY
3220 if (setsockopt_on(fd, IPPROTO_TCP, TCP_NODELAY) == -1) {
3221 return (ISC_R_FAILURE);
3222 } else {
3223 return (ISC_R_SUCCESS);
3224 }
3225 #else
3226 UNUSED(fd);
3227 return (ISC_R_SUCCESS);
3228 #endif
3229 }
3230
3231 void
isc__nm_set_network_buffers(isc_nm_t * nm,uv_handle_t * handle)3232 isc__nm_set_network_buffers(isc_nm_t *nm, uv_handle_t *handle) {
3233 int32_t recv_buffer_size = 0;
3234 int32_t send_buffer_size = 0;
3235
3236 switch (handle->type) {
3237 case UV_TCP:
3238 recv_buffer_size =
3239 atomic_load_relaxed(&nm->recv_tcp_buffer_size);
3240 send_buffer_size =
3241 atomic_load_relaxed(&nm->send_tcp_buffer_size);
3242 break;
3243 case UV_UDP:
3244 recv_buffer_size =
3245 atomic_load_relaxed(&nm->recv_udp_buffer_size);
3246 send_buffer_size =
3247 atomic_load_relaxed(&nm->send_udp_buffer_size);
3248 break;
3249 default:
3250 INSIST(0);
3251 ISC_UNREACHABLE();
3252 }
3253
3254 if (recv_buffer_size > 0) {
3255 int r = uv_recv_buffer_size(handle, &recv_buffer_size);
3256 INSIST(r == 0);
3257 }
3258
3259 if (send_buffer_size > 0) {
3260 int r = uv_send_buffer_size(handle, &send_buffer_size);
3261 INSIST(r == 0);
3262 }
3263 }
3264
3265 static isc_threadresult_t
isc__nm_work_run(isc_threadarg_t arg)3266 isc__nm_work_run(isc_threadarg_t arg) {
3267 isc__nm_work_t *work = (isc__nm_work_t *)arg;
3268
3269 work->cb(work->data);
3270
3271 return ((isc_threadresult_t)0);
3272 }
3273
3274 static void
isc__nm_work_cb(uv_work_t * req)3275 isc__nm_work_cb(uv_work_t *req) {
3276 isc__nm_work_t *work = uv_req_get_data((uv_req_t *)req);
3277
3278 if (isc_tid_v == SIZE_MAX) {
3279 isc__trampoline_t *trampoline_arg =
3280 isc__trampoline_get(isc__nm_work_run, work);
3281 (void)isc__trampoline_run(trampoline_arg);
3282 } else {
3283 (void)isc__nm_work_run((isc_threadarg_t)work);
3284 }
3285 }
3286
3287 static void
isc__nm_after_work_cb(uv_work_t * req,int status)3288 isc__nm_after_work_cb(uv_work_t *req, int status) {
3289 isc_result_t result = ISC_R_SUCCESS;
3290 isc__nm_work_t *work = uv_req_get_data((uv_req_t *)req);
3291 isc_nm_t *netmgr = work->netmgr;
3292
3293 if (status != 0) {
3294 result = isc__nm_uverr2result(status);
3295 }
3296
3297 work->after_cb(work->data, result);
3298
3299 isc_mem_put(netmgr->mctx, work, sizeof(*work));
3300
3301 isc_nm_detach(&netmgr);
3302 }
3303
3304 void
isc_nm_work_offload(isc_nm_t * netmgr,isc_nm_workcb_t work_cb,isc_nm_after_workcb_t after_work_cb,void * data)3305 isc_nm_work_offload(isc_nm_t *netmgr, isc_nm_workcb_t work_cb,
3306 isc_nm_after_workcb_t after_work_cb, void *data) {
3307 isc__networker_t *worker = NULL;
3308 isc__nm_work_t *work = NULL;
3309 int r;
3310
3311 REQUIRE(isc__nm_in_netthread());
3312 REQUIRE(VALID_NM(netmgr));
3313
3314 worker = &netmgr->workers[isc_nm_tid()];
3315
3316 work = isc_mem_get(netmgr->mctx, sizeof(*work));
3317 *work = (isc__nm_work_t){
3318 .cb = work_cb,
3319 .after_cb = after_work_cb,
3320 .data = data,
3321 };
3322
3323 isc_nm_attach(netmgr, &work->netmgr);
3324
3325 uv_req_set_data((uv_req_t *)&work->req, work);
3326
3327 r = uv_queue_work(&worker->loop, &work->req, isc__nm_work_cb,
3328 isc__nm_after_work_cb);
3329 RUNTIME_CHECK(r == 0);
3330 }
3331
3332 void
isc_nm_sequential(isc_nmhandle_t * handle)3333 isc_nm_sequential(isc_nmhandle_t *handle) {
3334 isc_nmsocket_t *sock = NULL;
3335
3336 REQUIRE(VALID_NMHANDLE(handle));
3337 REQUIRE(VALID_NMSOCK(handle->sock));
3338
3339 sock = handle->sock;
3340
3341 switch (sock->type) {
3342 case isc_nm_tcpdnssocket:
3343 case isc_nm_tlsdnssocket:
3344 break;
3345 case isc_nm_httpsocket:
3346 return;
3347 default:
3348 INSIST(0);
3349 ISC_UNREACHABLE();
3350 }
3351
3352 /*
3353 * We don't want pipelining on this connection. That means
3354 * that we need to pause after reading each request, and
3355 * resume only after the request has been processed. This
3356 * is done in isc__nm_resume_processing(), which is the
3357 * socket's closehandle_cb callback, called whenever a handle
3358 * is released.
3359 */
3360 isc__nmsocket_timer_stop(sock);
3361 isc__nm_stop_reading(sock);
3362 atomic_store(&sock->sequential, true);
3363 }
3364
3365 void
isc_nm_bad_request(isc_nmhandle_t * handle)3366 isc_nm_bad_request(isc_nmhandle_t *handle) {
3367 isc_nmsocket_t *sock;
3368
3369 REQUIRE(VALID_NMHANDLE(handle));
3370 REQUIRE(VALID_NMSOCK(handle->sock));
3371
3372 sock = handle->sock;
3373 switch (sock->type) {
3374 #if HAVE_LIBNGHTTP2
3375 case isc_nm_httpsocket:
3376 isc__nm_http_bad_request(handle);
3377 break;
3378 #endif /* HAVE_LIBNGHTTP2 */
3379
3380 case isc_nm_udpsocket:
3381 case isc_nm_tcpdnssocket:
3382 case isc_nm_tlsdnssocket:
3383 return;
3384 break;
3385
3386 case isc_nm_tcpsocket:
3387 #if HAVE_LIBNGHTTP2
3388 case isc_nm_tlssocket:
3389 #endif /* HAVE_LIBNGHTTP2 */
3390 default:
3391 INSIST(0);
3392 ISC_UNREACHABLE();
3393 break;
3394 }
3395 }
3396
3397 bool
isc_nm_xfr_allowed(isc_nmhandle_t * handle)3398 isc_nm_xfr_allowed(isc_nmhandle_t *handle) {
3399 isc_nmsocket_t *sock;
3400
3401 REQUIRE(VALID_NMHANDLE(handle));
3402 REQUIRE(VALID_NMSOCK(handle->sock));
3403
3404 sock = handle->sock;
3405
3406 switch (sock->type) {
3407 case isc_nm_tcpdnssocket:
3408 return (true);
3409 case isc_nm_tlsdnssocket:
3410 return (isc__nm_tlsdns_xfr_allowed(sock));
3411 default:
3412 return (false);
3413 }
3414
3415 INSIST(0);
3416 ISC_UNREACHABLE();
3417
3418 return (false);
3419 }
3420
3421 bool
isc_nm_is_tlsdns_handle(isc_nmhandle_t * handle)3422 isc_nm_is_tlsdns_handle(isc_nmhandle_t *handle) {
3423 REQUIRE(VALID_NMHANDLE(handle));
3424 REQUIRE(VALID_NMSOCK(handle->sock));
3425
3426 return (handle->sock->type == isc_nm_tlsdnssocket);
3427 }
3428
3429 #ifdef NETMGR_TRACE
3430 /*
3431 * Dump all active sockets in netmgr. We output to stderr
3432 * as the logger might be already shut down.
3433 */
3434
3435 static const char *
nmsocket_type_totext(isc_nmsocket_type type)3436 nmsocket_type_totext(isc_nmsocket_type type) {
3437 switch (type) {
3438 case isc_nm_udpsocket:
3439 return ("isc_nm_udpsocket");
3440 case isc_nm_udplistener:
3441 return ("isc_nm_udplistener");
3442 case isc_nm_tcpsocket:
3443 return ("isc_nm_tcpsocket");
3444 case isc_nm_tcplistener:
3445 return ("isc_nm_tcplistener");
3446 case isc_nm_tcpdnslistener:
3447 return ("isc_nm_tcpdnslistener");
3448 case isc_nm_tcpdnssocket:
3449 return ("isc_nm_tcpdnssocket");
3450 case isc_nm_tlssocket:
3451 return ("isc_nm_tlssocket");
3452 case isc_nm_tlslistener:
3453 return ("isc_nm_tlslistener");
3454 case isc_nm_tlsdnslistener:
3455 return ("isc_nm_tlsdnslistener");
3456 case isc_nm_tlsdnssocket:
3457 return ("isc_nm_tlsdnssocket");
3458 case isc_nm_httplistener:
3459 return ("isc_nm_httplistener");
3460 case isc_nm_httpsocket:
3461 return ("isc_nm_httpsocket");
3462 default:
3463 INSIST(0);
3464 ISC_UNREACHABLE();
3465 }
3466 }
3467
3468 static void
nmhandle_dump(isc_nmhandle_t * handle)3469 nmhandle_dump(isc_nmhandle_t *handle) {
3470 fprintf(stderr, "Active handle %p, refs %" PRIuFAST32 "\n", handle,
3471 isc_refcount_current(&handle->references));
3472 fprintf(stderr, "Created by:\n");
3473 isc_backtrace_symbols_fd(handle->backtrace, handle->backtrace_size,
3474 STDERR_FILENO);
3475 fprintf(stderr, "\n\n");
3476 }
3477
3478 static void
nmsocket_dump(isc_nmsocket_t * sock)3479 nmsocket_dump(isc_nmsocket_t *sock) {
3480 isc_nmhandle_t *handle = NULL;
3481
3482 LOCK(&sock->lock);
3483 fprintf(stderr, "\n=================\n");
3484 fprintf(stderr, "Active %s socket %p, type %s, refs %" PRIuFAST32 "\n",
3485 atomic_load(&sock->client) ? "client" : "server", sock,
3486 nmsocket_type_totext(sock->type),
3487 isc_refcount_current(&sock->references));
3488 fprintf(stderr,
3489 "Parent %p, listener %p, server %p, statichandle = "
3490 "%p\n",
3491 sock->parent, sock->listener, sock->server, sock->statichandle);
3492 fprintf(stderr, "Flags:%s%s%s%s%s\n",
3493 atomic_load(&sock->active) ? " active" : "",
3494 atomic_load(&sock->closing) ? " closing" : "",
3495 atomic_load(&sock->destroying) ? " destroying" : "",
3496 atomic_load(&sock->connecting) ? " connecting" : "",
3497 atomic_load(&sock->accepting) ? " accepting" : "");
3498 fprintf(stderr, "Created by:\n");
3499 isc_backtrace_symbols_fd(sock->backtrace, sock->backtrace_size,
3500 STDERR_FILENO);
3501 fprintf(stderr, "\n");
3502
3503 for (handle = ISC_LIST_HEAD(sock->active_handles); handle != NULL;
3504 handle = ISC_LIST_NEXT(handle, active_link))
3505 {
3506 static bool first = true;
3507 if (first) {
3508 fprintf(stderr, "Active handles:\n");
3509 first = false;
3510 }
3511 nmhandle_dump(handle);
3512 }
3513
3514 fprintf(stderr, "\n");
3515 UNLOCK(&sock->lock);
3516 }
3517
3518 void
isc__nm_dump_active(isc_nm_t * nm)3519 isc__nm_dump_active(isc_nm_t *nm) {
3520 isc_nmsocket_t *sock = NULL;
3521
3522 REQUIRE(VALID_NM(nm));
3523
3524 LOCK(&nm->lock);
3525 for (sock = ISC_LIST_HEAD(nm->active_sockets); sock != NULL;
3526 sock = ISC_LIST_NEXT(sock, active_link))
3527 {
3528 static bool first = true;
3529 if (first) {
3530 fprintf(stderr, "Outstanding sockets\n");
3531 first = false;
3532 }
3533 nmsocket_dump(sock);
3534 }
3535 UNLOCK(&nm->lock);
3536 }
3537 #endif
3538