1 /*
2  * Copyright (C) Internet Systems Consortium, Inc. ("ISC")
3  *
4  * This Source Code Form is subject to the terms of the Mozilla Public
5  * License, v. 2.0. If a copy of the MPL was not distributed with this
6  * file, you can obtain one at https://mozilla.org/MPL/2.0/.
7  *
8  * See the COPYRIGHT file distributed with this work for additional
9  * information regarding copyright ownership.
10  */
11 
12 #include <inttypes.h>
13 #include <unistd.h>
14 #include <uv.h>
15 
16 #include <isc/atomic.h>
17 #include <isc/backtrace.h>
18 #include <isc/barrier.h>
19 #include <isc/buffer.h>
20 #include <isc/condition.h>
21 #include <isc/errno.h>
22 #include <isc/log.h>
23 #include <isc/magic.h>
24 #include <isc/mem.h>
25 #include <isc/netmgr.h>
26 #include <isc/print.h>
27 #include <isc/quota.h>
28 #include <isc/random.h>
29 #include <isc/refcount.h>
30 #include <isc/region.h>
31 #include <isc/result.h>
32 #include <isc/sockaddr.h>
33 #include <isc/stats.h>
34 #include <isc/strerr.h>
35 #include <isc/task.h>
36 #include <isc/thread.h>
37 #include <isc/tls.h>
38 #include <isc/util.h>
39 
40 #include "netmgr-int.h"
41 #include "netmgr_p.h"
42 #include "openssl_shim.h"
43 #include "trampoline_p.h"
44 #include "uv-compat.h"
45 
46 /*%
47  * How many isc_nmhandles and isc_nm_uvreqs will we be
48  * caching for reuse in a socket.
49  */
50 #define ISC_NM_HANDLES_STACK_SIZE 600
51 #define ISC_NM_REQS_STACK_SIZE	  600
52 
53 /*%
54  * Shortcut index arrays to get access to statistics counters.
55  */
56 
57 static const isc_statscounter_t udp4statsindex[] = {
58 	isc_sockstatscounter_udp4open,
59 	isc_sockstatscounter_udp4openfail,
60 	isc_sockstatscounter_udp4close,
61 	isc_sockstatscounter_udp4bindfail,
62 	isc_sockstatscounter_udp4connectfail,
63 	isc_sockstatscounter_udp4connect,
64 	-1,
65 	-1,
66 	isc_sockstatscounter_udp4sendfail,
67 	isc_sockstatscounter_udp4recvfail,
68 	isc_sockstatscounter_udp4active
69 };
70 
71 static const isc_statscounter_t udp6statsindex[] = {
72 	isc_sockstatscounter_udp6open,
73 	isc_sockstatscounter_udp6openfail,
74 	isc_sockstatscounter_udp6close,
75 	isc_sockstatscounter_udp6bindfail,
76 	isc_sockstatscounter_udp6connectfail,
77 	isc_sockstatscounter_udp6connect,
78 	-1,
79 	-1,
80 	isc_sockstatscounter_udp6sendfail,
81 	isc_sockstatscounter_udp6recvfail,
82 	isc_sockstatscounter_udp6active
83 };
84 
85 static const isc_statscounter_t tcp4statsindex[] = {
86 	isc_sockstatscounter_tcp4open,	      isc_sockstatscounter_tcp4openfail,
87 	isc_sockstatscounter_tcp4close,	      isc_sockstatscounter_tcp4bindfail,
88 	isc_sockstatscounter_tcp4connectfail, isc_sockstatscounter_tcp4connect,
89 	isc_sockstatscounter_tcp4acceptfail,  isc_sockstatscounter_tcp4accept,
90 	isc_sockstatscounter_tcp4sendfail,    isc_sockstatscounter_tcp4recvfail,
91 	isc_sockstatscounter_tcp4active
92 };
93 
94 static const isc_statscounter_t tcp6statsindex[] = {
95 	isc_sockstatscounter_tcp6open,	      isc_sockstatscounter_tcp6openfail,
96 	isc_sockstatscounter_tcp6close,	      isc_sockstatscounter_tcp6bindfail,
97 	isc_sockstatscounter_tcp6connectfail, isc_sockstatscounter_tcp6connect,
98 	isc_sockstatscounter_tcp6acceptfail,  isc_sockstatscounter_tcp6accept,
99 	isc_sockstatscounter_tcp6sendfail,    isc_sockstatscounter_tcp6recvfail,
100 	isc_sockstatscounter_tcp6active
101 };
102 
103 #if 0
104 /* XXX: not currently used */
105 static const isc_statscounter_t unixstatsindex[] = {
106 	isc_sockstatscounter_unixopen,
107 	isc_sockstatscounter_unixopenfail,
108 	isc_sockstatscounter_unixclose,
109 	isc_sockstatscounter_unixbindfail,
110 	isc_sockstatscounter_unixconnectfail,
111 	isc_sockstatscounter_unixconnect,
112 	isc_sockstatscounter_unixacceptfail,
113 	isc_sockstatscounter_unixaccept,
114 	isc_sockstatscounter_unixsendfail,
115 	isc_sockstatscounter_unixrecvfail,
116 	isc_sockstatscounter_unixactive
117 };
118 #endif /* if 0 */
119 
120 /*
121  * libuv is not thread safe, but has mechanisms to pass messages
122  * between threads. Each socket is owned by a thread. For UDP
123  * sockets we have a set of sockets for each interface and we can
124  * choose a sibling and send the message directly. For TCP, or if
125  * we're calling from a non-networking thread, we need to pass the
126  * request using async_cb.
127  */
128 
129 static thread_local int isc__nm_tid_v = ISC_NETMGR_TID_UNKNOWN;
130 
131 static void
132 nmsocket_maybe_destroy(isc_nmsocket_t *sock FLARG);
133 static void
134 nmhandle_free(isc_nmsocket_t *sock, isc_nmhandle_t *handle);
135 static isc_threadresult_t
136 nm_thread(isc_threadarg_t worker0);
137 static void
138 async_cb(uv_async_t *handle);
139 static bool
140 process_netievent(isc__networker_t *worker, isc__netievent_t *ievent);
141 static isc_result_t
142 process_queue(isc__networker_t *worker, netievent_type_t type);
143 static void
144 wait_for_priority_queue(isc__networker_t *worker);
145 static void
146 drain_queue(isc__networker_t *worker, netievent_type_t type);
147 
148 #define ENQUEUE_NETIEVENT(worker, queue, event) \
149 	isc_queue_enqueue(worker->ievents[queue], (uintptr_t)event)
150 #define DEQUEUE_NETIEVENT(worker, queue) \
151 	(isc__netievent_t *)isc_queue_dequeue(worker->ievents[queue])
152 
153 #define ENQUEUE_PRIORITY_NETIEVENT(worker, event) \
154 	ENQUEUE_NETIEVENT(worker, NETIEVENT_PRIORITY, event)
155 #define ENQUEUE_PRIVILEGED_NETIEVENT(worker, event) \
156 	ENQUEUE_NETIEVENT(worker, NETIEVENT_PRIVILEGED, event)
157 #define ENQUEUE_TASK_NETIEVENT(worker, event) \
158 	ENQUEUE_NETIEVENT(worker, NETIEVENT_TASK, event)
159 #define ENQUEUE_NORMAL_NETIEVENT(worker, event) \
160 	ENQUEUE_NETIEVENT(worker, NETIEVENT_NORMAL, event)
161 
162 #define DEQUEUE_PRIORITY_NETIEVENT(worker) \
163 	DEQUEUE_NETIEVENT(worker, NETIEVENT_PRIORITY)
164 #define DEQUEUE_PRIVILEGED_NETIEVENT(worker) \
165 	DEQUEUE_NETIEVENT(worker, NETIEVENT_PRIVILEGED)
166 #define DEQUEUE_TASK_NETIEVENT(worker) DEQUEUE_NETIEVENT(worker, NETIEVENT_TASK)
167 #define DEQUEUE_NORMAL_NETIEVENT(worker) \
168 	DEQUEUE_NETIEVENT(worker, NETIEVENT_NORMAL)
169 
170 #define INCREMENT_NETIEVENT(worker, queue) \
171 	atomic_fetch_add_release(&worker->nievents[queue], 1)
172 #define DECREMENT_NETIEVENT(worker, queue) \
173 	atomic_fetch_sub_release(&worker->nievents[queue], 1)
174 
175 #define INCREMENT_PRIORITY_NETIEVENT(worker) \
176 	INCREMENT_NETIEVENT(worker, NETIEVENT_PRIORITY)
177 #define INCREMENT_PRIVILEGED_NETIEVENT(worker) \
178 	INCREMENT_NETIEVENT(worker, NETIEVENT_PRIVILEGED)
179 #define INCREMENT_TASK_NETIEVENT(worker) \
180 	INCREMENT_NETIEVENT(worker, NETIEVENT_TASK)
181 #define INCREMENT_NORMAL_NETIEVENT(worker) \
182 	INCREMENT_NETIEVENT(worker, NETIEVENT_NORMAL)
183 
184 #define DECREMENT_PRIORITY_NETIEVENT(worker) \
185 	DECREMENT_NETIEVENT(worker, NETIEVENT_PRIORITY)
186 #define DECREMENT_PRIVILEGED_NETIEVENT(worker) \
187 	DECREMENT_NETIEVENT(worker, NETIEVENT_PRIVILEGED)
188 #define DECREMENT_TASK_NETIEVENT(worker) \
189 	DECREMENT_NETIEVENT(worker, NETIEVENT_TASK)
190 #define DECREMENT_NORMAL_NETIEVENT(worker) \
191 	DECREMENT_NETIEVENT(worker, NETIEVENT_NORMAL)
192 
193 static void
194 isc__nm_async_stop(isc__networker_t *worker, isc__netievent_t *ev0);
195 static void
196 isc__nm_async_pause(isc__networker_t *worker, isc__netievent_t *ev0);
197 static void
198 isc__nm_async_resume(isc__networker_t *worker, isc__netievent_t *ev0);
199 static void
200 isc__nm_async_detach(isc__networker_t *worker, isc__netievent_t *ev0);
201 static void
202 isc__nm_async_close(isc__networker_t *worker, isc__netievent_t *ev0);
203 
204 static void
205 isc__nm_threadpool_initialize(uint32_t workers);
206 static void
207 isc__nm_work_cb(uv_work_t *req);
208 static void
209 isc__nm_after_work_cb(uv_work_t *req, int status);
210 
211 /*%<
212  * Issue a 'handle closed' callback on the socket.
213  */
214 
215 static void
216 nmhandle_detach_cb(isc_nmhandle_t **handlep FLARG);
217 
218 int
isc_nm_tid(void)219 isc_nm_tid(void) {
220 	return (isc__nm_tid_v);
221 }
222 
223 bool
isc__nm_in_netthread(void)224 isc__nm_in_netthread(void) {
225 	return (isc__nm_tid_v >= 0);
226 }
227 
228 void
isc__nm_force_tid(int tid)229 isc__nm_force_tid(int tid) {
230 	isc__nm_tid_v = tid;
231 }
232 
233 static void
isc__nm_threadpool_initialize(uint32_t workers)234 isc__nm_threadpool_initialize(uint32_t workers) {
235 	char buf[11];
236 	int r = uv_os_getenv("UV_THREADPOOL_SIZE", buf,
237 			     &(size_t){ sizeof(buf) });
238 	if (r == UV_ENOENT) {
239 		snprintf(buf, sizeof(buf), "%" PRIu32, workers);
240 		uv_os_setenv("UV_THREADPOOL_SIZE", buf);
241 	}
242 }
243 
244 void
isc__netmgr_create(isc_mem_t * mctx,uint32_t workers,isc_nm_t ** netmgrp)245 isc__netmgr_create(isc_mem_t *mctx, uint32_t workers, isc_nm_t **netmgrp) {
246 	isc_nm_t *mgr = NULL;
247 	char name[32];
248 
249 	REQUIRE(workers > 0);
250 
251 	isc__nm_threadpool_initialize(workers);
252 
253 	mgr = isc_mem_get(mctx, sizeof(*mgr));
254 	*mgr = (isc_nm_t){ .nworkers = workers };
255 
256 	isc_mem_attach(mctx, &mgr->mctx);
257 	isc_mutex_init(&mgr->lock);
258 	isc_condition_init(&mgr->wkstatecond);
259 	isc_condition_init(&mgr->wkpausecond);
260 	isc_refcount_init(&mgr->references, 1);
261 	atomic_init(&mgr->maxudp, 0);
262 	atomic_init(&mgr->interlocked, ISC_NETMGR_NON_INTERLOCKED);
263 	atomic_init(&mgr->workers_paused, 0);
264 	atomic_init(&mgr->paused, false);
265 	atomic_init(&mgr->closing, false);
266 	atomic_init(&mgr->recv_tcp_buffer_size, 0);
267 	atomic_init(&mgr->send_tcp_buffer_size, 0);
268 	atomic_init(&mgr->recv_udp_buffer_size, 0);
269 	atomic_init(&mgr->send_udp_buffer_size, 0);
270 
271 #ifdef NETMGR_TRACE
272 	ISC_LIST_INIT(mgr->active_sockets);
273 #endif
274 
275 	/*
276 	 * Default TCP timeout values.
277 	 * May be updated by isc_nm_tcptimeouts().
278 	 */
279 	atomic_init(&mgr->init, 30000);
280 	atomic_init(&mgr->idle, 30000);
281 	atomic_init(&mgr->keepalive, 30000);
282 	atomic_init(&mgr->advertised, 30000);
283 
284 	isc_barrier_init(&mgr->pausing, workers);
285 	isc_barrier_init(&mgr->resuming, workers);
286 
287 	mgr->workers = isc_mem_get(mctx, workers * sizeof(isc__networker_t));
288 	for (size_t i = 0; i < workers; i++) {
289 		int r;
290 		isc__networker_t *worker = &mgr->workers[i];
291 		*worker = (isc__networker_t){
292 			.mgr = mgr,
293 			.id = i,
294 		};
295 
296 		r = uv_loop_init(&worker->loop);
297 		RUNTIME_CHECK(r == 0);
298 
299 		worker->loop.data = &mgr->workers[i];
300 
301 		r = uv_async_init(&worker->loop, &worker->async, async_cb);
302 		RUNTIME_CHECK(r == 0);
303 
304 		isc_mutex_init(&worker->lock);
305 		isc_condition_init(&worker->cond_prio);
306 
307 		for (size_t type = 0; type < NETIEVENT_MAX; type++) {
308 			worker->ievents[type] = isc_queue_new(mgr->mctx, 128);
309 			atomic_init(&worker->nievents[type], 0);
310 		}
311 
312 		worker->recvbuf = isc_mem_get(mctx, ISC_NETMGR_RECVBUF_SIZE);
313 		worker->sendbuf = isc_mem_get(mctx, ISC_NETMGR_SENDBUF_SIZE);
314 
315 		/*
316 		 * We need to do this here and not in nm_thread to avoid a
317 		 * race - we could exit isc_nm_start, launch nm_destroy,
318 		 * and nm_thread would still not be up.
319 		 */
320 		mgr->workers_running++;
321 		isc_thread_create(nm_thread, &mgr->workers[i], &worker->thread);
322 
323 		snprintf(name, sizeof(name), "isc-net-%04zu", i);
324 		isc_thread_setname(worker->thread, name);
325 	}
326 
327 	mgr->magic = NM_MAGIC;
328 	*netmgrp = mgr;
329 }
330 
331 /*
332  * Free the resources of the network manager.
333  */
334 static void
nm_destroy(isc_nm_t ** mgr0)335 nm_destroy(isc_nm_t **mgr0) {
336 	REQUIRE(VALID_NM(*mgr0));
337 	REQUIRE(!isc__nm_in_netthread());
338 
339 	isc_nm_t *mgr = *mgr0;
340 	*mgr0 = NULL;
341 
342 	isc_refcount_destroy(&mgr->references);
343 
344 	mgr->magic = 0;
345 
346 	for (int i = 0; i < mgr->nworkers; i++) {
347 		isc__networker_t *worker = &mgr->workers[i];
348 		isc__netievent_t *event = isc__nm_get_netievent_stop(mgr);
349 		isc__nm_enqueue_ievent(worker, event);
350 	}
351 
352 	LOCK(&mgr->lock);
353 	while (mgr->workers_running > 0) {
354 		WAIT(&mgr->wkstatecond, &mgr->lock);
355 	}
356 	UNLOCK(&mgr->lock);
357 
358 	for (int i = 0; i < mgr->nworkers; i++) {
359 		isc__networker_t *worker = &mgr->workers[i];
360 		isc__netievent_t *ievent = NULL;
361 		int r;
362 
363 		/* Empty the async event queues */
364 		while ((ievent = DEQUEUE_PRIORITY_NETIEVENT(worker)) != NULL) {
365 			isc_mem_put(mgr->mctx, ievent, sizeof(*ievent));
366 		}
367 
368 		INSIST(DEQUEUE_PRIVILEGED_NETIEVENT(worker) == NULL);
369 		INSIST(DEQUEUE_TASK_NETIEVENT(worker) == NULL);
370 
371 		while ((ievent = DEQUEUE_PRIORITY_NETIEVENT(worker)) != NULL) {
372 			isc_mem_put(mgr->mctx, ievent, sizeof(*ievent));
373 		}
374 		isc_condition_destroy(&worker->cond_prio);
375 
376 		r = uv_loop_close(&worker->loop);
377 		INSIST(r == 0);
378 
379 		for (size_t type = 0; type < NETIEVENT_MAX; type++) {
380 			isc_queue_destroy(worker->ievents[type]);
381 		}
382 
383 		isc_mem_put(mgr->mctx, worker->sendbuf,
384 			    ISC_NETMGR_SENDBUF_SIZE);
385 		isc_mem_put(mgr->mctx, worker->recvbuf,
386 			    ISC_NETMGR_RECVBUF_SIZE);
387 		isc_thread_join(worker->thread, NULL);
388 	}
389 
390 	if (mgr->stats != NULL) {
391 		isc_stats_detach(&mgr->stats);
392 	}
393 
394 	isc_barrier_destroy(&mgr->resuming);
395 	isc_barrier_destroy(&mgr->pausing);
396 
397 	isc_condition_destroy(&mgr->wkstatecond);
398 	isc_condition_destroy(&mgr->wkpausecond);
399 	isc_mutex_destroy(&mgr->lock);
400 
401 	isc_mem_put(mgr->mctx, mgr->workers,
402 		    mgr->nworkers * sizeof(isc__networker_t));
403 	isc_mem_putanddetach(&mgr->mctx, mgr, sizeof(*mgr));
404 }
405 
406 static void
enqueue_pause(isc__networker_t * worker)407 enqueue_pause(isc__networker_t *worker) {
408 	isc__netievent_pause_t *event =
409 		isc__nm_get_netievent_pause(worker->mgr);
410 	isc__nm_enqueue_ievent(worker, (isc__netievent_t *)event);
411 }
412 
413 static void
isc__nm_async_pause(isc__networker_t * worker,isc__netievent_t * ev0)414 isc__nm_async_pause(isc__networker_t *worker, isc__netievent_t *ev0) {
415 	UNUSED(ev0);
416 	REQUIRE(worker->paused == false);
417 
418 	worker->paused = true;
419 	uv_stop(&worker->loop);
420 }
421 
422 void
isc_nm_pause(isc_nm_t * mgr)423 isc_nm_pause(isc_nm_t *mgr) {
424 	REQUIRE(VALID_NM(mgr));
425 	REQUIRE(!atomic_load(&mgr->paused));
426 
427 	isc__nm_acquire_interlocked_force(mgr);
428 
429 	if (isc__nm_in_netthread()) {
430 		REQUIRE(isc_nm_tid() == 0);
431 	}
432 
433 	for (int i = 0; i < mgr->nworkers; i++) {
434 		isc__networker_t *worker = &mgr->workers[i];
435 		if (i == isc_nm_tid()) {
436 			isc__nm_async_pause(worker, NULL);
437 		} else {
438 			enqueue_pause(worker);
439 		}
440 	}
441 
442 	if (isc__nm_in_netthread()) {
443 		atomic_fetch_add(&mgr->workers_paused, 1);
444 		isc_barrier_wait(&mgr->pausing);
445 	}
446 
447 	LOCK(&mgr->lock);
448 	while (atomic_load(&mgr->workers_paused) != mgr->workers_running) {
449 		WAIT(&mgr->wkstatecond, &mgr->lock);
450 	}
451 	UNLOCK(&mgr->lock);
452 
453 	REQUIRE(atomic_compare_exchange_strong(&mgr->paused, &(bool){ false },
454 					       true));
455 }
456 
457 static void
enqueue_resume(isc__networker_t * worker)458 enqueue_resume(isc__networker_t *worker) {
459 	isc__netievent_resume_t *event =
460 		isc__nm_get_netievent_resume(worker->mgr);
461 	isc__nm_enqueue_ievent(worker, (isc__netievent_t *)event);
462 }
463 
464 static void
isc__nm_async_resume(isc__networker_t * worker,isc__netievent_t * ev0)465 isc__nm_async_resume(isc__networker_t *worker, isc__netievent_t *ev0) {
466 	UNUSED(ev0);
467 	REQUIRE(worker->paused == true);
468 
469 	worker->paused = false;
470 }
471 
472 void
isc_nm_resume(isc_nm_t * mgr)473 isc_nm_resume(isc_nm_t *mgr) {
474 	REQUIRE(VALID_NM(mgr));
475 	REQUIRE(atomic_load(&mgr->paused));
476 
477 	if (isc__nm_in_netthread()) {
478 		REQUIRE(isc_nm_tid() == 0);
479 		drain_queue(&mgr->workers[isc_nm_tid()], NETIEVENT_PRIORITY);
480 	}
481 
482 	for (int i = 0; i < mgr->nworkers; i++) {
483 		isc__networker_t *worker = &mgr->workers[i];
484 		if (i == isc_nm_tid()) {
485 			isc__nm_async_resume(worker, NULL);
486 		} else {
487 			enqueue_resume(worker);
488 		}
489 	}
490 
491 	if (isc__nm_in_netthread()) {
492 		drain_queue(&mgr->workers[isc_nm_tid()], NETIEVENT_PRIVILEGED);
493 
494 		atomic_fetch_sub(&mgr->workers_paused, 1);
495 		isc_barrier_wait(&mgr->resuming);
496 	}
497 
498 	LOCK(&mgr->lock);
499 	while (atomic_load(&mgr->workers_paused) != 0) {
500 		WAIT(&mgr->wkstatecond, &mgr->lock);
501 	}
502 	UNLOCK(&mgr->lock);
503 
504 	REQUIRE(atomic_compare_exchange_strong(&mgr->paused, &(bool){ true },
505 					       false));
506 
507 	isc__nm_drop_interlocked(mgr);
508 }
509 
510 void
isc_nm_attach(isc_nm_t * mgr,isc_nm_t ** dst)511 isc_nm_attach(isc_nm_t *mgr, isc_nm_t **dst) {
512 	REQUIRE(VALID_NM(mgr));
513 	REQUIRE(dst != NULL && *dst == NULL);
514 
515 	isc_refcount_increment(&mgr->references);
516 
517 	*dst = mgr;
518 }
519 
520 void
isc_nm_detach(isc_nm_t ** mgr0)521 isc_nm_detach(isc_nm_t **mgr0) {
522 	isc_nm_t *mgr = NULL;
523 
524 	REQUIRE(mgr0 != NULL);
525 	REQUIRE(VALID_NM(*mgr0));
526 
527 	mgr = *mgr0;
528 	*mgr0 = NULL;
529 
530 	if (isc_refcount_decrement(&mgr->references) == 1) {
531 		nm_destroy(&mgr);
532 	}
533 }
534 
535 void
isc__netmgr_shutdown(isc_nm_t * mgr)536 isc__netmgr_shutdown(isc_nm_t *mgr) {
537 	REQUIRE(VALID_NM(mgr));
538 
539 	atomic_store(&mgr->closing, true);
540 	for (int i = 0; i < mgr->nworkers; i++) {
541 		isc__netievent_t *event = NULL;
542 		event = isc__nm_get_netievent_shutdown(mgr);
543 		isc__nm_enqueue_ievent(&mgr->workers[i], event);
544 	}
545 }
546 
547 void
isc__netmgr_destroy(isc_nm_t ** netmgrp)548 isc__netmgr_destroy(isc_nm_t **netmgrp) {
549 	isc_nm_t *mgr = NULL;
550 	int counter = 0;
551 
552 	REQUIRE(VALID_NM(*netmgrp));
553 
554 	mgr = *netmgrp;
555 
556 	/*
557 	 * Close active connections.
558 	 */
559 	isc__netmgr_shutdown(mgr);
560 
561 	/*
562 	 * Wait for the manager to be dereferenced elsewhere.
563 	 */
564 	while (isc_refcount_current(&mgr->references) > 1 && counter++ < 1000) {
565 		uv_sleep(10);
566 	}
567 
568 #ifdef NETMGR_TRACE
569 	if (isc_refcount_current(&mgr->references) > 1) {
570 		isc__nm_dump_active(mgr);
571 		INSIST(0);
572 		ISC_UNREACHABLE();
573 	}
574 #endif
575 
576 	/*
577 	 * Now just patiently wait
578 	 */
579 	while (isc_refcount_current(&mgr->references) > 1) {
580 		uv_sleep(10);
581 	}
582 
583 	/*
584 	 * Detach final reference.
585 	 */
586 	isc_nm_detach(netmgrp);
587 }
588 
589 void
isc_nm_maxudp(isc_nm_t * mgr,uint32_t maxudp)590 isc_nm_maxudp(isc_nm_t *mgr, uint32_t maxudp) {
591 	REQUIRE(VALID_NM(mgr));
592 
593 	atomic_store(&mgr->maxudp, maxudp);
594 }
595 
596 void
isc_nm_settimeouts(isc_nm_t * mgr,uint32_t init,uint32_t idle,uint32_t keepalive,uint32_t advertised)597 isc_nm_settimeouts(isc_nm_t *mgr, uint32_t init, uint32_t idle,
598 		   uint32_t keepalive, uint32_t advertised) {
599 	REQUIRE(VALID_NM(mgr));
600 
601 	atomic_store(&mgr->init, init);
602 	atomic_store(&mgr->idle, idle);
603 	atomic_store(&mgr->keepalive, keepalive);
604 	atomic_store(&mgr->advertised, advertised);
605 }
606 
607 void
isc_nm_setnetbuffers(isc_nm_t * mgr,int32_t recv_tcp,int32_t send_tcp,int32_t recv_udp,int32_t send_udp)608 isc_nm_setnetbuffers(isc_nm_t *mgr, int32_t recv_tcp, int32_t send_tcp,
609 		     int32_t recv_udp, int32_t send_udp) {
610 	REQUIRE(VALID_NM(mgr));
611 
612 	atomic_store(&mgr->recv_tcp_buffer_size, recv_tcp);
613 	atomic_store(&mgr->send_tcp_buffer_size, send_tcp);
614 	atomic_store(&mgr->recv_udp_buffer_size, recv_udp);
615 	atomic_store(&mgr->send_udp_buffer_size, send_udp);
616 }
617 
618 void
isc_nm_gettimeouts(isc_nm_t * mgr,uint32_t * initial,uint32_t * idle,uint32_t * keepalive,uint32_t * advertised)619 isc_nm_gettimeouts(isc_nm_t *mgr, uint32_t *initial, uint32_t *idle,
620 		   uint32_t *keepalive, uint32_t *advertised) {
621 	REQUIRE(VALID_NM(mgr));
622 
623 	if (initial != NULL) {
624 		*initial = atomic_load(&mgr->init);
625 	}
626 
627 	if (idle != NULL) {
628 		*idle = atomic_load(&mgr->idle);
629 	}
630 
631 	if (keepalive != NULL) {
632 		*keepalive = atomic_load(&mgr->keepalive);
633 	}
634 
635 	if (advertised != NULL) {
636 		*advertised = atomic_load(&mgr->advertised);
637 	}
638 }
639 
640 /*
641  * nm_thread is a single worker thread, that runs uv_run event loop
642  * until asked to stop.
643  *
644  * There are four queues for asynchronous events:
645  *
646  * 1. priority queue - netievents on the priority queue are run even when
647  *    the taskmgr enters exclusive mode and the netmgr is paused.  This
648  *    is needed to properly start listening on the interfaces, free
649  *    resources on shutdown, or resume from a pause.
650  *
651  * 2. privileged task queue - only privileged tasks are queued here and
652  *    this is the first queue that gets processed when network manager
653  *    is unpaused using isc_nm_resume().  All netmgr workers need to
654  *    clean the privileged task queue before they all proceed to normal
655  *    operation.  Both task queues are processed when the workers are
656  *    shutting down.
657  *
658  * 3. task queue - only (traditional) tasks are scheduled here, and this
659  *    queue and the privileged task queue are both processed when the
660  *    netmgr workers are finishing.  This is needed to process the task
661  *    shutdown events.
662  *
663  * 4. normal queue - this is the queue with netmgr events, e.g. reading,
664  *    sending, callbacks, etc.
665  */
666 
667 static isc_threadresult_t
nm_thread(isc_threadarg_t worker0)668 nm_thread(isc_threadarg_t worker0) {
669 	isc__networker_t *worker = (isc__networker_t *)worker0;
670 	isc_nm_t *mgr = worker->mgr;
671 
672 	isc__nm_tid_v = worker->id;
673 
674 	while (true) {
675 		/*
676 		 * uv_run() runs async_cb() in a loop, which processes
677 		 * all four event queues until a "pause" or "stop" event
678 		 * is encountered. On pause, we process only priority and
679 		 * privileged events until resuming.
680 		 */
681 		int r = uv_run(&worker->loop, UV_RUN_DEFAULT);
682 		INSIST(r > 0 || worker->finished);
683 
684 		if (worker->paused) {
685 			INSIST(atomic_load(&mgr->interlocked) != isc_nm_tid());
686 
687 			atomic_fetch_add(&mgr->workers_paused, 1);
688 			if (isc_barrier_wait(&mgr->pausing) != 0) {
689 				LOCK(&mgr->lock);
690 				SIGNAL(&mgr->wkstatecond);
691 				UNLOCK(&mgr->lock);
692 			}
693 
694 			while (worker->paused) {
695 				wait_for_priority_queue(worker);
696 			}
697 
698 			/*
699 			 * All workers must drain the privileged event
700 			 * queue before we resume from pause.
701 			 */
702 			drain_queue(worker, NETIEVENT_PRIVILEGED);
703 
704 			atomic_fetch_sub(&mgr->workers_paused, 1);
705 			if (isc_barrier_wait(&mgr->resuming) != 0) {
706 				LOCK(&mgr->lock);
707 				SIGNAL(&mgr->wkstatecond);
708 				UNLOCK(&mgr->lock);
709 			}
710 		}
711 
712 		if (r == 0) {
713 			INSIST(worker->finished);
714 			break;
715 		}
716 
717 		INSIST(!worker->finished);
718 	}
719 
720 	/*
721 	 * We are shutting down. Process the task queues
722 	 * (they may include shutdown events) but do not process
723 	 * the netmgr event queue.
724 	 */
725 	drain_queue(worker, NETIEVENT_PRIVILEGED);
726 	drain_queue(worker, NETIEVENT_TASK);
727 
728 	LOCK(&mgr->lock);
729 	mgr->workers_running--;
730 	SIGNAL(&mgr->wkstatecond);
731 	UNLOCK(&mgr->lock);
732 
733 	return ((isc_threadresult_t)0);
734 }
735 
736 static bool
process_all_queues(isc__networker_t * worker)737 process_all_queues(isc__networker_t *worker) {
738 	bool reschedule = false;
739 	/*
740 	 * The queue processing functions will return false when the
741 	 * system is pausing or stopping and we don't want to process
742 	 * the other queues in such case, but we need the async event
743 	 * to be rescheduled in the next uv_run().
744 	 */
745 	for (size_t type = 0; type < NETIEVENT_MAX; type++) {
746 		isc_result_t result = process_queue(worker, type);
747 		switch (result) {
748 		case ISC_R_SUSPEND:
749 			return (true);
750 		case ISC_R_EMPTY:
751 			/* empty queue */
752 			break;
753 		case ISC_R_SUCCESS:
754 			reschedule = true;
755 			break;
756 		default:
757 			INSIST(0);
758 			ISC_UNREACHABLE();
759 		}
760 	}
761 
762 	return (reschedule);
763 }
764 
765 /*
766  * async_cb() is a universal callback for 'async' events sent to event loop.
767  * It's the only way to safely pass data to the libuv event loop. We use a
768  * single async event and a set of lockless queues of 'isc__netievent_t'
769  * structures passed from other threads.
770  */
771 static void
async_cb(uv_async_t * handle)772 async_cb(uv_async_t *handle) {
773 	isc__networker_t *worker = (isc__networker_t *)handle->loop->data;
774 
775 	if (process_all_queues(worker)) {
776 		/*
777 		 * If we didn't process all the events, we need to enqueue
778 		 * async_cb to be run in the next iteration of the uv_loop
779 		 */
780 		uv_async_send(handle);
781 	}
782 }
783 
784 static void
isc__nm_async_stop(isc__networker_t * worker,isc__netievent_t * ev0)785 isc__nm_async_stop(isc__networker_t *worker, isc__netievent_t *ev0) {
786 	UNUSED(ev0);
787 	worker->finished = true;
788 	/* Close the async handler */
789 	uv_close((uv_handle_t *)&worker->async, NULL);
790 }
791 
792 void
isc_nm_task_enqueue(isc_nm_t * nm,isc_task_t * task,int threadid)793 isc_nm_task_enqueue(isc_nm_t *nm, isc_task_t *task, int threadid) {
794 	isc__netievent_t *event = NULL;
795 	int tid;
796 	isc__networker_t *worker = NULL;
797 
798 	if (threadid == -1) {
799 		tid = (int)isc_random_uniform(nm->nworkers);
800 	} else {
801 		tid = threadid % nm->nworkers;
802 	}
803 
804 	worker = &nm->workers[tid];
805 
806 	if (isc_task_privileged(task)) {
807 		event = (isc__netievent_t *)
808 			isc__nm_get_netievent_privilegedtask(nm, task);
809 	} else {
810 		event = (isc__netievent_t *)isc__nm_get_netievent_task(nm,
811 								       task);
812 	}
813 
814 	isc__nm_enqueue_ievent(worker, event);
815 }
816 
817 #define isc__nm_async_privilegedtask(worker, ev0) \
818 	isc__nm_async_task(worker, ev0)
819 
820 static void
isc__nm_async_task(isc__networker_t * worker,isc__netievent_t * ev0)821 isc__nm_async_task(isc__networker_t *worker, isc__netievent_t *ev0) {
822 	isc__netievent_task_t *ievent = (isc__netievent_task_t *)ev0;
823 	isc_result_t result;
824 
825 	UNUSED(worker);
826 
827 	result = isc_task_run(ievent->task);
828 
829 	switch (result) {
830 	case ISC_R_QUOTA:
831 		isc_task_ready(ievent->task);
832 		return;
833 	case ISC_R_SUCCESS:
834 		return;
835 	default:
836 		INSIST(0);
837 		ISC_UNREACHABLE();
838 	}
839 }
840 
841 static void
wait_for_priority_queue(isc__networker_t * worker)842 wait_for_priority_queue(isc__networker_t *worker) {
843 	isc_condition_t *cond = &worker->cond_prio;
844 	bool wait_for_work = true;
845 
846 	while (true) {
847 		isc__netievent_t *ievent;
848 		LOCK(&worker->lock);
849 		ievent = DEQUEUE_PRIORITY_NETIEVENT(worker);
850 		if (wait_for_work) {
851 			while (ievent == NULL) {
852 				WAIT(cond, &worker->lock);
853 				ievent = DEQUEUE_PRIORITY_NETIEVENT(worker);
854 			}
855 		}
856 		UNLOCK(&worker->lock);
857 		wait_for_work = false;
858 
859 		if (ievent == NULL) {
860 			return;
861 		}
862 		DECREMENT_PRIORITY_NETIEVENT(worker);
863 
864 		(void)process_netievent(worker, ievent);
865 	}
866 }
867 
868 static void
drain_queue(isc__networker_t * worker,netievent_type_t type)869 drain_queue(isc__networker_t *worker, netievent_type_t type) {
870 	while (process_queue(worker, type) != ISC_R_EMPTY) {
871 		;
872 	}
873 }
874 
875 /*
876  * The two macros here generate the individual cases for the process_netievent()
877  * function.  The NETIEVENT_CASE(type) macro is the common case, and
878  * NETIEVENT_CASE_NOMORE(type) is a macro that causes the loop in the
879  * process_queue() to stop, e.g. it's only used for the netievent that
880  * stops/pauses processing the enqueued netievents.
881  */
882 #define NETIEVENT_CASE(type)                                               \
883 	case netievent_##type: {                                           \
884 		isc__nm_async_##type(worker, ievent);                      \
885 		isc__nm_put_netievent_##type(                              \
886 			worker->mgr, (isc__netievent_##type##_t *)ievent); \
887 		return (true);                                             \
888 	}
889 
890 #define NETIEVENT_CASE_NOMORE(type)                                \
891 	case netievent_##type: {                                   \
892 		isc__nm_async_##type(worker, ievent);              \
893 		isc__nm_put_netievent_##type(worker->mgr, ievent); \
894 		return (false);                                    \
895 	}
896 
897 static bool
process_netievent(isc__networker_t * worker,isc__netievent_t * ievent)898 process_netievent(isc__networker_t *worker, isc__netievent_t *ievent) {
899 	REQUIRE(worker->id == isc_nm_tid());
900 
901 	switch (ievent->type) {
902 		/* Don't process more ievents when we are stopping */
903 		NETIEVENT_CASE_NOMORE(stop);
904 
905 		NETIEVENT_CASE(privilegedtask);
906 		NETIEVENT_CASE(task);
907 
908 		NETIEVENT_CASE(udpconnect);
909 		NETIEVENT_CASE(udplisten);
910 		NETIEVENT_CASE(udpstop);
911 		NETIEVENT_CASE(udpsend);
912 		NETIEVENT_CASE(udpread);
913 		NETIEVENT_CASE(udpcancel);
914 		NETIEVENT_CASE(udpclose);
915 
916 		NETIEVENT_CASE(tcpaccept);
917 		NETIEVENT_CASE(tcpconnect);
918 		NETIEVENT_CASE(tcplisten);
919 		NETIEVENT_CASE(tcpstartread);
920 		NETIEVENT_CASE(tcppauseread);
921 		NETIEVENT_CASE(tcpsend);
922 		NETIEVENT_CASE(tcpstop);
923 		NETIEVENT_CASE(tcpcancel);
924 		NETIEVENT_CASE(tcpclose);
925 
926 		NETIEVENT_CASE(tcpdnsaccept);
927 		NETIEVENT_CASE(tcpdnslisten);
928 		NETIEVENT_CASE(tcpdnsconnect);
929 		NETIEVENT_CASE(tcpdnssend);
930 		NETIEVENT_CASE(tcpdnscancel);
931 		NETIEVENT_CASE(tcpdnsclose);
932 		NETIEVENT_CASE(tcpdnsread);
933 		NETIEVENT_CASE(tcpdnsstop);
934 
935 		NETIEVENT_CASE(tlsdnscycle);
936 		NETIEVENT_CASE(tlsdnsaccept);
937 		NETIEVENT_CASE(tlsdnslisten);
938 		NETIEVENT_CASE(tlsdnsconnect);
939 		NETIEVENT_CASE(tlsdnssend);
940 		NETIEVENT_CASE(tlsdnscancel);
941 		NETIEVENT_CASE(tlsdnsclose);
942 		NETIEVENT_CASE(tlsdnsread);
943 		NETIEVENT_CASE(tlsdnsstop);
944 		NETIEVENT_CASE(tlsdnsshutdown);
945 
946 #if HAVE_LIBNGHTTP2
947 		NETIEVENT_CASE(tlsstartread);
948 		NETIEVENT_CASE(tlssend);
949 		NETIEVENT_CASE(tlsclose);
950 		NETIEVENT_CASE(tlsdobio);
951 		NETIEVENT_CASE(tlscancel);
952 
953 		NETIEVENT_CASE(httpstop);
954 		NETIEVENT_CASE(httpsend);
955 		NETIEVENT_CASE(httpclose);
956 #endif
957 
958 		NETIEVENT_CASE(connectcb);
959 		NETIEVENT_CASE(readcb);
960 		NETIEVENT_CASE(sendcb);
961 
962 		NETIEVENT_CASE(close);
963 		NETIEVENT_CASE(detach);
964 
965 		NETIEVENT_CASE(shutdown);
966 		NETIEVENT_CASE(resume);
967 		NETIEVENT_CASE_NOMORE(pause);
968 	default:
969 		INSIST(0);
970 		ISC_UNREACHABLE();
971 	}
972 	return (true);
973 }
974 
975 static isc_result_t
process_queue(isc__networker_t * worker,netievent_type_t type)976 process_queue(isc__networker_t *worker, netievent_type_t type) {
977 	/*
978 	 * The number of items on the queue is only loosely synchronized with
979 	 * the items on the queue.  But there's a guarantee that if there's an
980 	 * item on the queue, it will be accounted for.  However there's a
981 	 * possibility that the counter might be higher than the items on the
982 	 * queue stored.
983 	 */
984 	uint_fast32_t waiting = atomic_load_acquire(&worker->nievents[type]);
985 	isc__netievent_t *ievent = DEQUEUE_NETIEVENT(worker, type);
986 
987 	if (ievent == NULL && waiting == 0) {
988 		/* There's nothing scheduled */
989 		return (ISC_R_EMPTY);
990 	} else if (ievent == NULL) {
991 		/* There's at least one item scheduled, but not on the queue yet
992 		 */
993 		return (ISC_R_SUCCESS);
994 	}
995 
996 	while (ievent != NULL) {
997 		DECREMENT_NETIEVENT(worker, type);
998 		bool stop = !process_netievent(worker, ievent);
999 
1000 		if (stop) {
1001 			/* Netievent told us to stop */
1002 			return (ISC_R_SUSPEND);
1003 		}
1004 
1005 		if (waiting-- == 0) {
1006 			/* We reached this round "quota" */
1007 			break;
1008 		}
1009 
1010 		ievent = DEQUEUE_NETIEVENT(worker, type);
1011 	}
1012 
1013 	/* We processed at least one */
1014 	return (ISC_R_SUCCESS);
1015 }
1016 
1017 void *
isc__nm_get_netievent(isc_nm_t * mgr,isc__netievent_type type)1018 isc__nm_get_netievent(isc_nm_t *mgr, isc__netievent_type type) {
1019 	isc__netievent_storage_t *event = isc_mem_get(mgr->mctx,
1020 						      sizeof(*event));
1021 
1022 	*event = (isc__netievent_storage_t){ .ni.type = type };
1023 	return (event);
1024 }
1025 
1026 void
isc__nm_put_netievent(isc_nm_t * mgr,void * ievent)1027 isc__nm_put_netievent(isc_nm_t *mgr, void *ievent) {
1028 	isc_mem_put(mgr->mctx, ievent, sizeof(isc__netievent_storage_t));
1029 }
1030 
1031 NETIEVENT_SOCKET_DEF(tcpclose);
1032 NETIEVENT_SOCKET_DEF(tcplisten);
1033 NETIEVENT_SOCKET_DEF(tcppauseread);
1034 NETIEVENT_SOCKET_DEF(tcpstartread);
1035 NETIEVENT_SOCKET_DEF(tcpstop);
1036 NETIEVENT_SOCKET_DEF(tlsclose);
1037 NETIEVENT_SOCKET_DEF(tlsconnect);
1038 NETIEVENT_SOCKET_DEF(tlsdobio);
1039 NETIEVENT_SOCKET_DEF(tlsstartread);
1040 NETIEVENT_SOCKET_HANDLE_DEF(tlscancel);
1041 NETIEVENT_SOCKET_DEF(udpclose);
1042 NETIEVENT_SOCKET_DEF(udplisten);
1043 NETIEVENT_SOCKET_DEF(udpread);
1044 NETIEVENT_SOCKET_DEF(udpsend);
1045 NETIEVENT_SOCKET_DEF(udpstop);
1046 
1047 NETIEVENT_SOCKET_DEF(tcpdnsclose);
1048 NETIEVENT_SOCKET_DEF(tcpdnsread);
1049 NETIEVENT_SOCKET_DEF(tcpdnsstop);
1050 NETIEVENT_SOCKET_DEF(tcpdnslisten);
1051 NETIEVENT_SOCKET_REQ_DEF(tcpdnsconnect);
1052 NETIEVENT_SOCKET_REQ_DEF(tcpdnssend);
1053 NETIEVENT_SOCKET_HANDLE_DEF(tcpdnscancel);
1054 NETIEVENT_SOCKET_QUOTA_DEF(tcpdnsaccept);
1055 
1056 NETIEVENT_SOCKET_DEF(tlsdnsclose);
1057 NETIEVENT_SOCKET_DEF(tlsdnsread);
1058 NETIEVENT_SOCKET_DEF(tlsdnsstop);
1059 NETIEVENT_SOCKET_DEF(tlsdnslisten);
1060 NETIEVENT_SOCKET_REQ_DEF(tlsdnsconnect);
1061 NETIEVENT_SOCKET_REQ_DEF(tlsdnssend);
1062 NETIEVENT_SOCKET_HANDLE_DEF(tlsdnscancel);
1063 NETIEVENT_SOCKET_QUOTA_DEF(tlsdnsaccept);
1064 NETIEVENT_SOCKET_DEF(tlsdnscycle);
1065 NETIEVENT_SOCKET_DEF(tlsdnsshutdown);
1066 
1067 NETIEVENT_SOCKET_DEF(httpstop);
1068 NETIEVENT_SOCKET_REQ_DEF(httpsend);
1069 NETIEVENT_SOCKET_DEF(httpclose);
1070 
1071 NETIEVENT_SOCKET_REQ_DEF(tcpconnect);
1072 NETIEVENT_SOCKET_REQ_DEF(tcpsend);
1073 NETIEVENT_SOCKET_REQ_DEF(tlssend);
1074 NETIEVENT_SOCKET_REQ_DEF(udpconnect);
1075 NETIEVENT_SOCKET_REQ_RESULT_DEF(connectcb);
1076 NETIEVENT_SOCKET_REQ_RESULT_DEF(readcb);
1077 NETIEVENT_SOCKET_REQ_RESULT_DEF(sendcb);
1078 
1079 NETIEVENT_SOCKET_DEF(detach);
1080 NETIEVENT_SOCKET_HANDLE_DEF(tcpcancel);
1081 NETIEVENT_SOCKET_HANDLE_DEF(udpcancel);
1082 
1083 NETIEVENT_SOCKET_QUOTA_DEF(tcpaccept);
1084 
1085 NETIEVENT_SOCKET_DEF(close);
1086 NETIEVENT_DEF(pause);
1087 NETIEVENT_DEF(resume);
1088 NETIEVENT_DEF(shutdown);
1089 NETIEVENT_DEF(stop);
1090 
1091 NETIEVENT_TASK_DEF(task);
1092 NETIEVENT_TASK_DEF(privilegedtask);
1093 
1094 void
isc__nm_maybe_enqueue_ievent(isc__networker_t * worker,isc__netievent_t * event)1095 isc__nm_maybe_enqueue_ievent(isc__networker_t *worker,
1096 			     isc__netievent_t *event) {
1097 	/*
1098 	 * If we are already in the matching nmthread, process the ievent
1099 	 * directly.
1100 	 */
1101 	if (worker->id == isc_nm_tid()) {
1102 		process_netievent(worker, event);
1103 		return;
1104 	}
1105 
1106 	isc__nm_enqueue_ievent(worker, event);
1107 }
1108 
1109 void
isc__nm_enqueue_ievent(isc__networker_t * worker,isc__netievent_t * event)1110 isc__nm_enqueue_ievent(isc__networker_t *worker, isc__netievent_t *event) {
1111 	if (event->type > netievent_prio) {
1112 		/*
1113 		 * We need to make sure this signal will be delivered and
1114 		 * the queue will be processed.
1115 		 */
1116 		LOCK(&worker->lock);
1117 		INCREMENT_PRIORITY_NETIEVENT(worker);
1118 		ENQUEUE_PRIORITY_NETIEVENT(worker, event);
1119 		SIGNAL(&worker->cond_prio);
1120 		UNLOCK(&worker->lock);
1121 	} else if (event->type == netievent_privilegedtask) {
1122 		INCREMENT_PRIVILEGED_NETIEVENT(worker);
1123 		ENQUEUE_PRIVILEGED_NETIEVENT(worker, event);
1124 	} else if (event->type == netievent_task) {
1125 		INCREMENT_TASK_NETIEVENT(worker);
1126 		ENQUEUE_TASK_NETIEVENT(worker, event);
1127 	} else {
1128 		INCREMENT_NORMAL_NETIEVENT(worker);
1129 		ENQUEUE_NORMAL_NETIEVENT(worker, event);
1130 	}
1131 	uv_async_send(&worker->async);
1132 }
1133 
1134 bool
isc__nmsocket_active(isc_nmsocket_t * sock)1135 isc__nmsocket_active(isc_nmsocket_t *sock) {
1136 	REQUIRE(VALID_NMSOCK(sock));
1137 	if (sock->parent != NULL) {
1138 		return (atomic_load(&sock->parent->active));
1139 	}
1140 
1141 	return (atomic_load(&sock->active));
1142 }
1143 
1144 bool
isc__nmsocket_deactivate(isc_nmsocket_t * sock)1145 isc__nmsocket_deactivate(isc_nmsocket_t *sock) {
1146 	REQUIRE(VALID_NMSOCK(sock));
1147 
1148 	if (sock->parent != NULL) {
1149 		return (atomic_compare_exchange_strong(&sock->parent->active,
1150 						       &(bool){ true }, false));
1151 	}
1152 
1153 	return (atomic_compare_exchange_strong(&sock->active, &(bool){ true },
1154 					       false));
1155 }
1156 
1157 void
isc___nmsocket_attach(isc_nmsocket_t * sock,isc_nmsocket_t ** target FLARG)1158 isc___nmsocket_attach(isc_nmsocket_t *sock, isc_nmsocket_t **target FLARG) {
1159 	REQUIRE(VALID_NMSOCK(sock));
1160 	REQUIRE(target != NULL && *target == NULL);
1161 
1162 	isc_nmsocket_t *rsock = NULL;
1163 
1164 	if (sock->parent != NULL) {
1165 		rsock = sock->parent;
1166 		INSIST(rsock->parent == NULL); /* sanity check */
1167 	} else {
1168 		rsock = sock;
1169 	}
1170 
1171 	NETMGR_TRACE_LOG("isc__nmsocket_attach():%p->references = %" PRIuFAST32
1172 			 "\n",
1173 			 rsock, isc_refcount_current(&rsock->references) + 1);
1174 
1175 	isc_refcount_increment0(&rsock->references);
1176 
1177 	*target = sock;
1178 }
1179 
1180 /*
1181  * Free all resources inside a socket (including its children if any).
1182  */
1183 static void
nmsocket_cleanup(isc_nmsocket_t * sock,bool dofree FLARG)1184 nmsocket_cleanup(isc_nmsocket_t *sock, bool dofree FLARG) {
1185 	isc_nmhandle_t *handle = NULL;
1186 	isc__nm_uvreq_t *uvreq = NULL;
1187 
1188 	REQUIRE(VALID_NMSOCK(sock));
1189 	REQUIRE(!isc__nmsocket_active(sock));
1190 
1191 	NETMGR_TRACE_LOG("nmsocket_cleanup():%p->references = %" PRIuFAST32
1192 			 "\n",
1193 			 sock, isc_refcount_current(&sock->references));
1194 
1195 	atomic_store(&sock->destroying, true);
1196 
1197 	if (sock->parent == NULL && sock->children != NULL) {
1198 		/*
1199 		 * We shouldn't be here unless there are no active handles,
1200 		 * so we can clean up and free the children.
1201 		 */
1202 		for (size_t i = 0; i < sock->nchildren; i++) {
1203 			if (!atomic_load(&sock->children[i].destroying)) {
1204 				nmsocket_cleanup(&sock->children[i],
1205 						 false FLARG_PASS);
1206 			}
1207 		}
1208 
1209 		/*
1210 		 * This was a parent socket: destroy the listening
1211 		 * barriers that synchronized the children.
1212 		 */
1213 		isc_barrier_destroy(&sock->startlistening);
1214 		isc_barrier_destroy(&sock->stoplistening);
1215 
1216 		/*
1217 		 * Now free them.
1218 		 */
1219 		isc_mem_put(sock->mgr->mctx, sock->children,
1220 			    sock->nchildren * sizeof(*sock));
1221 		sock->children = NULL;
1222 		sock->nchildren = 0;
1223 	}
1224 	if (sock->statsindex != NULL) {
1225 		isc__nm_decstats(sock->mgr, sock->statsindex[STATID_ACTIVE]);
1226 	}
1227 
1228 	sock->statichandle = NULL;
1229 
1230 	if (sock->outerhandle != NULL) {
1231 		isc__nmhandle_detach(&sock->outerhandle FLARG_PASS);
1232 	}
1233 
1234 	if (sock->outer != NULL) {
1235 		isc___nmsocket_detach(&sock->outer FLARG_PASS);
1236 	}
1237 
1238 	while ((handle = isc_astack_pop(sock->inactivehandles)) != NULL) {
1239 		nmhandle_free(sock, handle);
1240 	}
1241 
1242 	if (sock->buf != NULL) {
1243 		isc_mem_put(sock->mgr->mctx, sock->buf, sock->buf_size);
1244 	}
1245 
1246 	if (sock->quota != NULL) {
1247 		isc_quota_detach(&sock->quota);
1248 	}
1249 
1250 	sock->pquota = NULL;
1251 
1252 	isc_astack_destroy(sock->inactivehandles);
1253 
1254 	while ((uvreq = isc_astack_pop(sock->inactivereqs)) != NULL) {
1255 		isc_mem_put(sock->mgr->mctx, uvreq, sizeof(*uvreq));
1256 	}
1257 
1258 	isc_astack_destroy(sock->inactivereqs);
1259 	sock->magic = 0;
1260 
1261 	isc_mem_put(sock->mgr->mctx, sock->ah_frees,
1262 		    sock->ah_size * sizeof(sock->ah_frees[0]));
1263 	isc_mem_put(sock->mgr->mctx, sock->ah_handles,
1264 		    sock->ah_size * sizeof(sock->ah_handles[0]));
1265 	isc_mutex_destroy(&sock->lock);
1266 	isc_condition_destroy(&sock->scond);
1267 #if HAVE_LIBNGHTTP2
1268 	isc__nm_tls_cleanup_data(sock);
1269 	isc__nm_http_cleanup_data(sock);
1270 #endif
1271 #ifdef NETMGR_TRACE
1272 	LOCK(&sock->mgr->lock);
1273 	ISC_LIST_UNLINK(sock->mgr->active_sockets, sock, active_link);
1274 	UNLOCK(&sock->mgr->lock);
1275 #endif
1276 	if (dofree) {
1277 		isc_nm_t *mgr = sock->mgr;
1278 		isc_mem_put(mgr->mctx, sock, sizeof(*sock));
1279 		isc_nm_detach(&mgr);
1280 	} else {
1281 		isc_nm_detach(&sock->mgr);
1282 	}
1283 }
1284 
1285 static void
nmsocket_maybe_destroy(isc_nmsocket_t * sock FLARG)1286 nmsocket_maybe_destroy(isc_nmsocket_t *sock FLARG) {
1287 	int active_handles;
1288 	bool destroy = false;
1289 
1290 	NETMGR_TRACE_LOG("%s():%p->references = %" PRIuFAST32 "\n", __func__,
1291 			 sock, isc_refcount_current(&sock->references));
1292 
1293 	if (sock->parent != NULL) {
1294 		/*
1295 		 * This is a child socket and cannot be destroyed except
1296 		 * as a side effect of destroying the parent, so let's go
1297 		 * see if the parent is ready to be destroyed.
1298 		 */
1299 		nmsocket_maybe_destroy(sock->parent FLARG_PASS);
1300 		return;
1301 	}
1302 
1303 	/*
1304 	 * This is a parent socket (or a standalone). See whether the
1305 	 * children have active handles before deciding whether to
1306 	 * accept destruction.
1307 	 */
1308 	LOCK(&sock->lock);
1309 	if (atomic_load(&sock->active) || atomic_load(&sock->destroying) ||
1310 	    !atomic_load(&sock->closed) || atomic_load(&sock->references) != 0)
1311 	{
1312 		UNLOCK(&sock->lock);
1313 		return;
1314 	}
1315 
1316 	active_handles = atomic_load(&sock->ah);
1317 	if (sock->children != NULL) {
1318 		for (size_t i = 0; i < sock->nchildren; i++) {
1319 			LOCK(&sock->children[i].lock);
1320 			active_handles += atomic_load(&sock->children[i].ah);
1321 			UNLOCK(&sock->children[i].lock);
1322 		}
1323 	}
1324 
1325 	if (active_handles == 0 || sock->statichandle != NULL) {
1326 		destroy = true;
1327 	}
1328 
1329 	NETMGR_TRACE_LOG("%s:%p->active_handles = %d, .statichandle = %p\n",
1330 			 __func__, sock, active_handles, sock->statichandle);
1331 
1332 	if (destroy) {
1333 		atomic_store(&sock->destroying, true);
1334 		UNLOCK(&sock->lock);
1335 		nmsocket_cleanup(sock, true FLARG_PASS);
1336 	} else {
1337 		UNLOCK(&sock->lock);
1338 	}
1339 }
1340 
1341 void
isc___nmsocket_prep_destroy(isc_nmsocket_t * sock FLARG)1342 isc___nmsocket_prep_destroy(isc_nmsocket_t *sock FLARG) {
1343 	REQUIRE(sock->parent == NULL);
1344 
1345 	NETMGR_TRACE_LOG("isc___nmsocket_prep_destroy():%p->references = "
1346 			 "%" PRIuFAST32 "\n",
1347 			 sock, isc_refcount_current(&sock->references));
1348 
1349 	/*
1350 	 * The final external reference to the socket is gone. We can try
1351 	 * destroying the socket, but we have to wait for all the inflight
1352 	 * handles to finish first.
1353 	 */
1354 	atomic_store(&sock->active, false);
1355 
1356 	/*
1357 	 * If the socket has children, they'll need to be marked inactive
1358 	 * so they can be cleaned up too.
1359 	 */
1360 	if (sock->children != NULL) {
1361 		for (size_t i = 0; i < sock->nchildren; i++) {
1362 			atomic_store(&sock->children[i].active, false);
1363 		}
1364 	}
1365 
1366 	/*
1367 	 * If we're here then we already stopped listening; otherwise
1368 	 * we'd have a hanging reference from the listening process.
1369 	 *
1370 	 * If it's a regular socket we may need to close it.
1371 	 */
1372 	if (!atomic_load(&sock->closed)) {
1373 		switch (sock->type) {
1374 		case isc_nm_udpsocket:
1375 			isc__nm_udp_close(sock);
1376 			return;
1377 		case isc_nm_tcpsocket:
1378 			isc__nm_tcp_close(sock);
1379 			return;
1380 		case isc_nm_tcpdnssocket:
1381 			isc__nm_tcpdns_close(sock);
1382 			return;
1383 		case isc_nm_tlsdnssocket:
1384 			isc__nm_tlsdns_close(sock);
1385 			return;
1386 #if HAVE_LIBNGHTTP2
1387 		case isc_nm_tlssocket:
1388 			isc__nm_tls_close(sock);
1389 			break;
1390 		case isc_nm_httpsocket:
1391 			isc__nm_http_close(sock);
1392 			return;
1393 #endif
1394 		default:
1395 			break;
1396 		}
1397 	}
1398 
1399 	nmsocket_maybe_destroy(sock FLARG_PASS);
1400 }
1401 
1402 void
isc___nmsocket_detach(isc_nmsocket_t ** sockp FLARG)1403 isc___nmsocket_detach(isc_nmsocket_t **sockp FLARG) {
1404 	REQUIRE(sockp != NULL && *sockp != NULL);
1405 	REQUIRE(VALID_NMSOCK(*sockp));
1406 
1407 	isc_nmsocket_t *sock = *sockp, *rsock = NULL;
1408 	*sockp = NULL;
1409 
1410 	/*
1411 	 * If the socket is a part of a set (a child socket) we are
1412 	 * counting references for the whole set at the parent.
1413 	 */
1414 	if (sock->parent != NULL) {
1415 		rsock = sock->parent;
1416 		INSIST(rsock->parent == NULL); /* Sanity check */
1417 	} else {
1418 		rsock = sock;
1419 	}
1420 
1421 	NETMGR_TRACE_LOG("isc__nmsocket_detach():%p->references = %" PRIuFAST32
1422 			 "\n",
1423 			 rsock, isc_refcount_current(&rsock->references) - 1);
1424 
1425 	if (isc_refcount_decrement(&rsock->references) == 1) {
1426 		isc___nmsocket_prep_destroy(rsock FLARG_PASS);
1427 	}
1428 }
1429 
1430 void
isc_nmsocket_close(isc_nmsocket_t ** sockp)1431 isc_nmsocket_close(isc_nmsocket_t **sockp) {
1432 	REQUIRE(sockp != NULL);
1433 	REQUIRE(VALID_NMSOCK(*sockp));
1434 	REQUIRE((*sockp)->type == isc_nm_udplistener ||
1435 		(*sockp)->type == isc_nm_tcplistener ||
1436 		(*sockp)->type == isc_nm_tcpdnslistener ||
1437 		(*sockp)->type == isc_nm_tlsdnslistener ||
1438 		(*sockp)->type == isc_nm_tlslistener ||
1439 		(*sockp)->type == isc_nm_httplistener);
1440 
1441 	isc__nmsocket_detach(sockp);
1442 }
1443 
1444 void
isc___nmsocket_init(isc_nmsocket_t * sock,isc_nm_t * mgr,isc_nmsocket_type type,isc_sockaddr_t * iface FLARG)1445 isc___nmsocket_init(isc_nmsocket_t *sock, isc_nm_t *mgr, isc_nmsocket_type type,
1446 		    isc_sockaddr_t *iface FLARG) {
1447 	uint16_t family;
1448 
1449 	REQUIRE(sock != NULL);
1450 	REQUIRE(mgr != NULL);
1451 	REQUIRE(iface != NULL);
1452 
1453 	family = iface->type.sa.sa_family;
1454 
1455 	*sock = (isc_nmsocket_t){ .type = type,
1456 				  .iface = *iface,
1457 				  .fd = -1,
1458 				  .ah_size = 32,
1459 				  .inactivehandles = isc_astack_new(
1460 					  mgr->mctx, ISC_NM_HANDLES_STACK_SIZE),
1461 				  .inactivereqs = isc_astack_new(
1462 					  mgr->mctx, ISC_NM_REQS_STACK_SIZE) };
1463 
1464 #if NETMGR_TRACE
1465 	sock->backtrace_size = isc_backtrace(sock->backtrace, TRACE_SIZE);
1466 	ISC_LINK_INIT(sock, active_link);
1467 	ISC_LIST_INIT(sock->active_handles);
1468 	LOCK(&mgr->lock);
1469 	ISC_LIST_APPEND(mgr->active_sockets, sock, active_link);
1470 	UNLOCK(&mgr->lock);
1471 #endif
1472 
1473 	isc_nm_attach(mgr, &sock->mgr);
1474 	sock->uv_handle.handle.data = sock;
1475 
1476 	sock->ah_frees = isc_mem_get(mgr->mctx,
1477 				     sock->ah_size * sizeof(sock->ah_frees[0]));
1478 	sock->ah_handles = isc_mem_get(
1479 		mgr->mctx, sock->ah_size * sizeof(sock->ah_handles[0]));
1480 	ISC_LINK_INIT(&sock->quotacb, link);
1481 	for (size_t i = 0; i < 32; i++) {
1482 		sock->ah_frees[i] = i;
1483 		sock->ah_handles[i] = NULL;
1484 	}
1485 
1486 	switch (type) {
1487 	case isc_nm_udpsocket:
1488 	case isc_nm_udplistener:
1489 		if (family == AF_INET) {
1490 			sock->statsindex = udp4statsindex;
1491 		} else {
1492 			sock->statsindex = udp6statsindex;
1493 		}
1494 		isc__nm_incstats(sock->mgr, sock->statsindex[STATID_ACTIVE]);
1495 		break;
1496 	case isc_nm_tcpsocket:
1497 	case isc_nm_tcplistener:
1498 	case isc_nm_tcpdnssocket:
1499 	case isc_nm_tcpdnslistener:
1500 	case isc_nm_tlsdnssocket:
1501 	case isc_nm_tlsdnslistener:
1502 	case isc_nm_httpsocket:
1503 	case isc_nm_httplistener:
1504 		if (family == AF_INET) {
1505 			sock->statsindex = tcp4statsindex;
1506 		} else {
1507 			sock->statsindex = tcp6statsindex;
1508 		}
1509 		isc__nm_incstats(sock->mgr, sock->statsindex[STATID_ACTIVE]);
1510 		break;
1511 	default:
1512 		break;
1513 	}
1514 
1515 	isc_mutex_init(&sock->lock);
1516 	isc_condition_init(&sock->cond);
1517 	isc_condition_init(&sock->scond);
1518 	isc_refcount_init(&sock->references, 1);
1519 
1520 #if HAVE_LIBNGHTTP2
1521 	memset(&sock->tlsstream, 0, sizeof(sock->tlsstream));
1522 #endif /* HAVE_LIBNGHTTP2 */
1523 
1524 	NETMGR_TRACE_LOG("isc__nmsocket_init():%p->references = %" PRIuFAST32
1525 			 "\n",
1526 			 sock, isc_refcount_current(&sock->references));
1527 
1528 	atomic_init(&sock->active, true);
1529 	atomic_init(&sock->sequential, false);
1530 	atomic_init(&sock->readpaused, false);
1531 	atomic_init(&sock->closing, false);
1532 	atomic_init(&sock->listening, 0);
1533 	atomic_init(&sock->closed, 0);
1534 	atomic_init(&sock->destroying, 0);
1535 	atomic_init(&sock->ah, 0);
1536 	atomic_init(&sock->client, 0);
1537 	atomic_init(&sock->connecting, false);
1538 	atomic_init(&sock->keepalive, false);
1539 	atomic_init(&sock->connected, false);
1540 
1541 	atomic_init(&sock->active_child_connections, 0);
1542 
1543 #if HAVE_LIBNGHTTP2
1544 	isc__nm_http_initsocket(sock);
1545 #endif
1546 
1547 	sock->magic = NMSOCK_MAGIC;
1548 }
1549 
1550 void
isc__nmsocket_clearcb(isc_nmsocket_t * sock)1551 isc__nmsocket_clearcb(isc_nmsocket_t *sock) {
1552 	REQUIRE(VALID_NMSOCK(sock));
1553 	REQUIRE(!isc__nm_in_netthread() || sock->tid == isc_nm_tid());
1554 
1555 	sock->recv_cb = NULL;
1556 	sock->recv_cbarg = NULL;
1557 	sock->accept_cb = NULL;
1558 	sock->accept_cbarg = NULL;
1559 	sock->connect_cb = NULL;
1560 	sock->connect_cbarg = NULL;
1561 }
1562 
1563 void
isc__nm_free_uvbuf(isc_nmsocket_t * sock,const uv_buf_t * buf)1564 isc__nm_free_uvbuf(isc_nmsocket_t *sock, const uv_buf_t *buf) {
1565 	isc__networker_t *worker = NULL;
1566 
1567 	REQUIRE(VALID_NMSOCK(sock));
1568 	if (buf->base == NULL) {
1569 		/* Empty buffer: might happen in case of error. */
1570 		return;
1571 	}
1572 	worker = &sock->mgr->workers[sock->tid];
1573 
1574 	REQUIRE(worker->recvbuf_inuse);
1575 	if (sock->type == isc_nm_udpsocket && buf->base > worker->recvbuf &&
1576 	    buf->base <= worker->recvbuf + ISC_NETMGR_RECVBUF_SIZE)
1577 	{
1578 		/* Can happen in case of out-of-order recvmmsg in libuv1.36 */
1579 		return;
1580 	}
1581 	REQUIRE(buf->base == worker->recvbuf);
1582 	worker->recvbuf_inuse = false;
1583 }
1584 
1585 static isc_nmhandle_t *
alloc_handle(isc_nmsocket_t * sock)1586 alloc_handle(isc_nmsocket_t *sock) {
1587 	isc_nmhandle_t *handle =
1588 		isc_mem_get(sock->mgr->mctx,
1589 			    sizeof(isc_nmhandle_t) + sock->extrahandlesize);
1590 
1591 	*handle = (isc_nmhandle_t){ .magic = NMHANDLE_MAGIC };
1592 #ifdef NETMGR_TRACE
1593 	ISC_LINK_INIT(handle, active_link);
1594 #endif
1595 	isc_refcount_init(&handle->references, 1);
1596 
1597 	return (handle);
1598 }
1599 
1600 isc_nmhandle_t *
isc___nmhandle_get(isc_nmsocket_t * sock,isc_sockaddr_t * peer,isc_sockaddr_t * local FLARG)1601 isc___nmhandle_get(isc_nmsocket_t *sock, isc_sockaddr_t *peer,
1602 		   isc_sockaddr_t *local FLARG) {
1603 	isc_nmhandle_t *handle = NULL;
1604 	size_t handlenum;
1605 	int pos;
1606 
1607 	REQUIRE(VALID_NMSOCK(sock));
1608 
1609 	handle = isc_astack_pop(sock->inactivehandles);
1610 
1611 	if (handle == NULL) {
1612 		handle = alloc_handle(sock);
1613 	} else {
1614 		isc_refcount_init(&handle->references, 1);
1615 		INSIST(VALID_NMHANDLE(handle));
1616 	}
1617 
1618 	NETMGR_TRACE_LOG(
1619 		"isc__nmhandle_get():handle %p->references = %" PRIuFAST32 "\n",
1620 		handle, isc_refcount_current(&handle->references));
1621 
1622 	isc___nmsocket_attach(sock, &handle->sock FLARG_PASS);
1623 
1624 #if NETMGR_TRACE
1625 	handle->backtrace_size = isc_backtrace(handle->backtrace, TRACE_SIZE);
1626 #endif
1627 
1628 	if (peer != NULL) {
1629 		handle->peer = *peer;
1630 	} else {
1631 		handle->peer = sock->peer;
1632 	}
1633 
1634 	if (local != NULL) {
1635 		handle->local = *local;
1636 	} else {
1637 		handle->local = sock->iface;
1638 	}
1639 
1640 	LOCK(&sock->lock);
1641 	/* We need to add this handle to the list of active handles */
1642 	if ((size_t)atomic_load(&sock->ah) == sock->ah_size) {
1643 		sock->ah_frees = isc_mem_reget(
1644 			sock->mgr->mctx, sock->ah_frees,
1645 			sock->ah_size * sizeof(sock->ah_frees[0]),
1646 			sock->ah_size * 2 * sizeof(sock->ah_frees[0]));
1647 		sock->ah_handles = isc_mem_reget(
1648 			sock->mgr->mctx, sock->ah_handles,
1649 			sock->ah_size * sizeof(sock->ah_handles[0]),
1650 			sock->ah_size * 2 * sizeof(sock->ah_handles[0]));
1651 
1652 		for (size_t i = sock->ah_size; i < sock->ah_size * 2; i++) {
1653 			sock->ah_frees[i] = i;
1654 			sock->ah_handles[i] = NULL;
1655 		}
1656 
1657 		sock->ah_size *= 2;
1658 	}
1659 
1660 	handlenum = atomic_fetch_add(&sock->ah, 1);
1661 	pos = sock->ah_frees[handlenum];
1662 
1663 	INSIST(sock->ah_handles[pos] == NULL);
1664 	sock->ah_handles[pos] = handle;
1665 	handle->ah_pos = pos;
1666 #ifdef NETMGR_TRACE
1667 	ISC_LIST_APPEND(sock->active_handles, handle, active_link);
1668 #endif
1669 	UNLOCK(&sock->lock);
1670 
1671 	switch (sock->type) {
1672 	case isc_nm_udpsocket:
1673 	case isc_nm_tcpdnssocket:
1674 	case isc_nm_tlsdnssocket:
1675 		if (!atomic_load(&sock->client)) {
1676 			break;
1677 		}
1678 		/* fallthrough */
1679 	case isc_nm_tcpsocket:
1680 	case isc_nm_tlssocket:
1681 		INSIST(sock->statichandle == NULL);
1682 
1683 		/*
1684 		 * statichandle must be assigned, not attached;
1685 		 * otherwise, if a handle was detached elsewhere
1686 		 * it could never reach 0 references, and the
1687 		 * handle and socket would never be freed.
1688 		 */
1689 		sock->statichandle = handle;
1690 		break;
1691 	default:
1692 		break;
1693 	}
1694 
1695 #if HAVE_LIBNGHTTP2
1696 	if (sock->type == isc_nm_httpsocket && sock->h2.session) {
1697 		isc__nm_httpsession_attach(sock->h2.session,
1698 					   &handle->httpsession);
1699 	}
1700 #endif
1701 
1702 	return (handle);
1703 }
1704 
1705 void
isc__nmhandle_attach(isc_nmhandle_t * handle,isc_nmhandle_t ** handlep FLARG)1706 isc__nmhandle_attach(isc_nmhandle_t *handle, isc_nmhandle_t **handlep FLARG) {
1707 	REQUIRE(VALID_NMHANDLE(handle));
1708 	REQUIRE(handlep != NULL && *handlep == NULL);
1709 
1710 	NETMGR_TRACE_LOG("isc__nmhandle_attach():handle %p->references = "
1711 			 "%" PRIuFAST32 "\n",
1712 			 handle, isc_refcount_current(&handle->references) + 1);
1713 
1714 	isc_refcount_increment(&handle->references);
1715 	*handlep = handle;
1716 }
1717 
1718 bool
isc_nmhandle_is_stream(isc_nmhandle_t * handle)1719 isc_nmhandle_is_stream(isc_nmhandle_t *handle) {
1720 	REQUIRE(VALID_NMHANDLE(handle));
1721 
1722 	return (handle->sock->type == isc_nm_tcpsocket ||
1723 		handle->sock->type == isc_nm_tcpdnssocket ||
1724 		handle->sock->type == isc_nm_tlssocket ||
1725 		handle->sock->type == isc_nm_tlsdnssocket ||
1726 		handle->sock->type == isc_nm_httpsocket);
1727 }
1728 
1729 static void
nmhandle_free(isc_nmsocket_t * sock,isc_nmhandle_t * handle)1730 nmhandle_free(isc_nmsocket_t *sock, isc_nmhandle_t *handle) {
1731 	size_t extra = sock->extrahandlesize;
1732 
1733 	isc_refcount_destroy(&handle->references);
1734 
1735 	if (handle->dofree != NULL) {
1736 		handle->dofree(handle->opaque);
1737 	}
1738 
1739 	*handle = (isc_nmhandle_t){ .magic = 0 };
1740 
1741 	isc_mem_put(sock->mgr->mctx, handle, sizeof(isc_nmhandle_t) + extra);
1742 }
1743 
1744 static void
nmhandle_deactivate(isc_nmsocket_t * sock,isc_nmhandle_t * handle)1745 nmhandle_deactivate(isc_nmsocket_t *sock, isc_nmhandle_t *handle) {
1746 	size_t handlenum;
1747 	bool reuse = false;
1748 
1749 	/*
1750 	 * We do all of this under lock to avoid races with socket
1751 	 * destruction.  We have to do this now, because at this point the
1752 	 * socket is either unused or still attached to event->sock.
1753 	 */
1754 	LOCK(&sock->lock);
1755 
1756 	INSIST(sock->ah_handles[handle->ah_pos] == handle);
1757 	INSIST(sock->ah_size > handle->ah_pos);
1758 	INSIST(atomic_load(&sock->ah) > 0);
1759 
1760 #ifdef NETMGR_TRACE
1761 	ISC_LIST_UNLINK(sock->active_handles, handle, active_link);
1762 #endif
1763 
1764 	sock->ah_handles[handle->ah_pos] = NULL;
1765 	handlenum = atomic_fetch_sub(&sock->ah, 1) - 1;
1766 	sock->ah_frees[handlenum] = handle->ah_pos;
1767 	handle->ah_pos = 0;
1768 	if (atomic_load(&sock->active)) {
1769 		reuse = isc_astack_trypush(sock->inactivehandles, handle);
1770 	}
1771 	if (!reuse) {
1772 		nmhandle_free(sock, handle);
1773 	}
1774 	UNLOCK(&sock->lock);
1775 }
1776 
1777 void
isc__nmhandle_detach(isc_nmhandle_t ** handlep FLARG)1778 isc__nmhandle_detach(isc_nmhandle_t **handlep FLARG) {
1779 	isc_nmsocket_t *sock = NULL;
1780 	isc_nmhandle_t *handle = NULL;
1781 
1782 	REQUIRE(handlep != NULL);
1783 	REQUIRE(VALID_NMHANDLE(*handlep));
1784 
1785 	handle = *handlep;
1786 	*handlep = NULL;
1787 
1788 	sock = handle->sock;
1789 	if (sock->tid == isc_nm_tid()) {
1790 		nmhandle_detach_cb(&handle FLARG_PASS);
1791 	} else {
1792 		isc__netievent_detach_t *event =
1793 			isc__nm_get_netievent_detach(sock->mgr, sock);
1794 		/*
1795 		 * we are using implicit "attach" as the last reference
1796 		 * need to be destroyed explicitly in the async callback
1797 		 */
1798 		event->handle = handle;
1799 		FLARG_IEVENT_PASS(event);
1800 		isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid],
1801 				       (isc__netievent_t *)event);
1802 	}
1803 }
1804 
1805 void
1806 isc__nmsocket_shutdown(isc_nmsocket_t *sock);
1807 
1808 static void
nmhandle_detach_cb(isc_nmhandle_t ** handlep FLARG)1809 nmhandle_detach_cb(isc_nmhandle_t **handlep FLARG) {
1810 	isc_nmsocket_t *sock = NULL;
1811 	isc_nmhandle_t *handle = NULL;
1812 
1813 	REQUIRE(handlep != NULL);
1814 	REQUIRE(VALID_NMHANDLE(*handlep));
1815 
1816 	handle = *handlep;
1817 	*handlep = NULL;
1818 
1819 	NETMGR_TRACE_LOG("isc__nmhandle_detach():%p->references = %" PRIuFAST32
1820 			 "\n",
1821 			 handle, isc_refcount_current(&handle->references) - 1);
1822 
1823 	if (isc_refcount_decrement(&handle->references) > 1) {
1824 		return;
1825 	}
1826 
1827 	/* We need an acquire memory barrier here */
1828 	(void)isc_refcount_current(&handle->references);
1829 
1830 	sock = handle->sock;
1831 	handle->sock = NULL;
1832 
1833 	if (handle->doreset != NULL) {
1834 		handle->doreset(handle->opaque);
1835 	}
1836 
1837 #if HAVE_LIBNGHTTP2
1838 	if (sock->type == isc_nm_httpsocket && handle->httpsession != NULL) {
1839 		isc__nm_httpsession_detach(&handle->httpsession);
1840 	}
1841 #endif
1842 
1843 	nmhandle_deactivate(sock, handle);
1844 
1845 	/*
1846 	 * The handle is gone now. If the socket has a callback configured
1847 	 * for that (e.g., to perform cleanup after request processing),
1848 	 * call it now, or schedule it to run asynchronously.
1849 	 */
1850 	if (sock->closehandle_cb != NULL) {
1851 		if (sock->tid == isc_nm_tid()) {
1852 			sock->closehandle_cb(sock);
1853 		} else {
1854 			isc__netievent_close_t *event =
1855 				isc__nm_get_netievent_close(sock->mgr, sock);
1856 			isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid],
1857 					       (isc__netievent_t *)event);
1858 		}
1859 	}
1860 
1861 	if (handle == sock->statichandle) {
1862 		/* statichandle is assigned, not attached. */
1863 		sock->statichandle = NULL;
1864 	}
1865 
1866 	isc___nmsocket_detach(&sock FLARG_PASS);
1867 }
1868 
1869 void *
isc_nmhandle_getdata(isc_nmhandle_t * handle)1870 isc_nmhandle_getdata(isc_nmhandle_t *handle) {
1871 	REQUIRE(VALID_NMHANDLE(handle));
1872 
1873 	return (handle->opaque);
1874 }
1875 
1876 void
isc_nmhandle_setdata(isc_nmhandle_t * handle,void * arg,isc_nm_opaquecb_t doreset,isc_nm_opaquecb_t dofree)1877 isc_nmhandle_setdata(isc_nmhandle_t *handle, void *arg,
1878 		     isc_nm_opaquecb_t doreset, isc_nm_opaquecb_t dofree) {
1879 	REQUIRE(VALID_NMHANDLE(handle));
1880 
1881 	handle->opaque = arg;
1882 	handle->doreset = doreset;
1883 	handle->dofree = dofree;
1884 }
1885 
1886 void
isc__nm_alloc_dnsbuf(isc_nmsocket_t * sock,size_t len)1887 isc__nm_alloc_dnsbuf(isc_nmsocket_t *sock, size_t len) {
1888 	REQUIRE(len <= NM_BIG_BUF);
1889 
1890 	if (sock->buf == NULL) {
1891 		/* We don't have the buffer at all */
1892 		size_t alloc_len = len < NM_REG_BUF ? NM_REG_BUF : NM_BIG_BUF;
1893 		sock->buf = isc_mem_get(sock->mgr->mctx, alloc_len);
1894 		sock->buf_size = alloc_len;
1895 	} else {
1896 		/* We have the buffer but it's too small */
1897 		sock->buf = isc_mem_reget(sock->mgr->mctx, sock->buf,
1898 					  sock->buf_size, NM_BIG_BUF);
1899 		sock->buf_size = NM_BIG_BUF;
1900 	}
1901 }
1902 
1903 void
isc__nm_failed_send_cb(isc_nmsocket_t * sock,isc__nm_uvreq_t * req,isc_result_t eresult)1904 isc__nm_failed_send_cb(isc_nmsocket_t *sock, isc__nm_uvreq_t *req,
1905 		       isc_result_t eresult) {
1906 	REQUIRE(VALID_NMSOCK(sock));
1907 	REQUIRE(VALID_UVREQ(req));
1908 
1909 	if (req->cb.send != NULL) {
1910 		isc__nm_sendcb(sock, req, eresult, true);
1911 	} else {
1912 		isc__nm_uvreq_put(&req, sock);
1913 	}
1914 }
1915 
1916 void
isc__nm_failed_accept_cb(isc_nmsocket_t * sock,isc_result_t eresult)1917 isc__nm_failed_accept_cb(isc_nmsocket_t *sock, isc_result_t eresult) {
1918 	REQUIRE(atomic_load(&sock->accepting));
1919 	REQUIRE(sock->server);
1920 
1921 	/*
1922 	 * Detach the quota early to make room for other connections;
1923 	 * otherwise it'd be detached later asynchronously, and clog
1924 	 * the quota unnecessarily.
1925 	 */
1926 	if (sock->quota != NULL) {
1927 		isc_quota_detach(&sock->quota);
1928 	}
1929 
1930 	isc__nmsocket_detach(&sock->server);
1931 
1932 	atomic_store(&sock->accepting, false);
1933 
1934 	switch (eresult) {
1935 	case ISC_R_NOTCONNECTED:
1936 		/* IGNORE: The client disconnected before we could accept */
1937 		break;
1938 	default:
1939 		isc_log_write(isc_lctx, ISC_LOGCATEGORY_GENERAL,
1940 			      ISC_LOGMODULE_NETMGR, ISC_LOG_ERROR,
1941 			      "Accepting TCP connection failed: %s",
1942 			      isc_result_totext(eresult));
1943 	}
1944 }
1945 
1946 void
isc__nm_failed_connect_cb(isc_nmsocket_t * sock,isc__nm_uvreq_t * req,isc_result_t eresult,bool async)1947 isc__nm_failed_connect_cb(isc_nmsocket_t *sock, isc__nm_uvreq_t *req,
1948 			  isc_result_t eresult, bool async) {
1949 	REQUIRE(VALID_NMSOCK(sock));
1950 	REQUIRE(VALID_UVREQ(req));
1951 	REQUIRE(sock->tid == isc_nm_tid());
1952 	REQUIRE(req->cb.connect != NULL);
1953 
1954 	isc__nmsocket_timer_stop(sock);
1955 	uv_handle_set_data((uv_handle_t *)&sock->timer, sock);
1956 
1957 	INSIST(atomic_compare_exchange_strong(&sock->connecting,
1958 					      &(bool){ true }, false));
1959 
1960 	isc__nmsocket_clearcb(sock);
1961 	isc__nm_connectcb(sock, req, eresult, async);
1962 
1963 	isc__nmsocket_prep_destroy(sock);
1964 }
1965 
1966 void
isc__nm_failed_read_cb(isc_nmsocket_t * sock,isc_result_t result,bool async)1967 isc__nm_failed_read_cb(isc_nmsocket_t *sock, isc_result_t result, bool async) {
1968 	REQUIRE(VALID_NMSOCK(sock));
1969 	switch (sock->type) {
1970 	case isc_nm_udpsocket:
1971 		isc__nm_udp_failed_read_cb(sock, result);
1972 		return;
1973 	case isc_nm_tcpsocket:
1974 		isc__nm_tcp_failed_read_cb(sock, result);
1975 		return;
1976 	case isc_nm_tcpdnssocket:
1977 		isc__nm_tcpdns_failed_read_cb(sock, result);
1978 		return;
1979 	case isc_nm_tlsdnssocket:
1980 		isc__nm_tlsdns_failed_read_cb(sock, result, async);
1981 		return;
1982 	default:
1983 		INSIST(0);
1984 		ISC_UNREACHABLE();
1985 	}
1986 }
1987 
1988 void
isc__nmsocket_connecttimeout_cb(uv_timer_t * timer)1989 isc__nmsocket_connecttimeout_cb(uv_timer_t *timer) {
1990 	uv_connect_t *uvreq = uv_handle_get_data((uv_handle_t *)timer);
1991 	isc_nmsocket_t *sock = uv_handle_get_data((uv_handle_t *)uvreq->handle);
1992 	isc__nm_uvreq_t *req = uv_handle_get_data((uv_handle_t *)uvreq);
1993 
1994 	REQUIRE(VALID_NMSOCK(sock));
1995 	REQUIRE(sock->tid == isc_nm_tid());
1996 	REQUIRE(atomic_load(&sock->connecting));
1997 	REQUIRE(VALID_UVREQ(req));
1998 	REQUIRE(VALID_NMHANDLE(req->handle));
1999 
2000 	isc__nmsocket_timer_stop(sock);
2001 
2002 	if (sock->tls.pending_req != NULL) {
2003 		REQUIRE(req == sock->tls.pending_req);
2004 		sock->tls.pending_req = NULL;
2005 	}
2006 
2007 	/* Call the connect callback directly */
2008 
2009 	req->cb.connect(req->handle, ISC_R_TIMEDOUT, req->cbarg);
2010 
2011 	/* Timer is not running, cleanup and shutdown everything */
2012 	if (!isc__nmsocket_timer_running(sock)) {
2013 		INSIST(atomic_compare_exchange_strong(&sock->connecting,
2014 						      &(bool){ true }, false));
2015 		isc__nm_uvreq_put(&req, sock);
2016 		isc__nmsocket_clearcb(sock);
2017 		isc__nmsocket_shutdown(sock);
2018 	}
2019 }
2020 
2021 static void
isc__nmsocket_readtimeout_cb(uv_timer_t * timer)2022 isc__nmsocket_readtimeout_cb(uv_timer_t *timer) {
2023 	isc_nmsocket_t *sock = uv_handle_get_data((uv_handle_t *)timer);
2024 
2025 	REQUIRE(VALID_NMSOCK(sock));
2026 	REQUIRE(sock->tid == isc_nm_tid());
2027 	REQUIRE(atomic_load(&sock->reading));
2028 
2029 	if (atomic_load(&sock->client)) {
2030 		uv_timer_stop(timer);
2031 
2032 		sock->recv_read = false;
2033 
2034 		if (sock->recv_cb != NULL) {
2035 			isc__nm_uvreq_t *req = isc__nm_get_read_req(sock, NULL);
2036 			isc__nm_readcb(sock, req, ISC_R_TIMEDOUT);
2037 		}
2038 
2039 		if (!isc__nmsocket_timer_running(sock)) {
2040 			isc__nmsocket_clearcb(sock);
2041 			isc__nm_failed_read_cb(sock, ISC_R_CANCELED, false);
2042 		}
2043 	} else {
2044 		isc__nm_failed_read_cb(sock, ISC_R_TIMEDOUT, false);
2045 	}
2046 }
2047 
2048 void
isc__nmsocket_timer_restart(isc_nmsocket_t * sock)2049 isc__nmsocket_timer_restart(isc_nmsocket_t *sock) {
2050 	int r = 0;
2051 
2052 	REQUIRE(VALID_NMSOCK(sock));
2053 
2054 	if (atomic_load(&sock->connecting)) {
2055 		if (sock->connect_timeout == 0) {
2056 			return;
2057 		}
2058 
2059 		r = uv_timer_start(&sock->timer,
2060 				   isc__nmsocket_connecttimeout_cb,
2061 				   sock->connect_timeout + 10, 0);
2062 
2063 	} else {
2064 		if (sock->read_timeout == 0) {
2065 			return;
2066 		}
2067 
2068 		r = uv_timer_start(&sock->timer, isc__nmsocket_readtimeout_cb,
2069 				   sock->read_timeout, 0);
2070 	}
2071 
2072 	RUNTIME_CHECK(r == 0);
2073 }
2074 
2075 bool
isc__nmsocket_timer_running(isc_nmsocket_t * sock)2076 isc__nmsocket_timer_running(isc_nmsocket_t *sock) {
2077 	REQUIRE(VALID_NMSOCK(sock));
2078 
2079 	return (uv_is_active((uv_handle_t *)&sock->timer));
2080 }
2081 
2082 void
isc__nmsocket_timer_start(isc_nmsocket_t * sock)2083 isc__nmsocket_timer_start(isc_nmsocket_t *sock) {
2084 	REQUIRE(VALID_NMSOCK(sock));
2085 
2086 	if (isc__nmsocket_timer_running(sock)) {
2087 		return;
2088 	}
2089 
2090 	isc__nmsocket_timer_restart(sock);
2091 }
2092 
2093 void
isc__nmsocket_timer_stop(isc_nmsocket_t * sock)2094 isc__nmsocket_timer_stop(isc_nmsocket_t *sock) {
2095 	REQUIRE(VALID_NMSOCK(sock));
2096 
2097 	/* uv_timer_stop() is idempotent, no need to check if running */
2098 
2099 	int r = uv_timer_stop(&sock->timer);
2100 	RUNTIME_CHECK(r == 0);
2101 }
2102 
2103 isc__nm_uvreq_t *
isc__nm_get_read_req(isc_nmsocket_t * sock,isc_sockaddr_t * sockaddr)2104 isc__nm_get_read_req(isc_nmsocket_t *sock, isc_sockaddr_t *sockaddr) {
2105 	isc__nm_uvreq_t *req = NULL;
2106 
2107 	req = isc__nm_uvreq_get(sock->mgr, sock);
2108 	req->cb.recv = sock->recv_cb;
2109 	req->cbarg = sock->recv_cbarg;
2110 
2111 	switch (sock->type) {
2112 	case isc_nm_tcpsocket:
2113 	case isc_nm_tlssocket:
2114 		isc_nmhandle_attach(sock->statichandle, &req->handle);
2115 		break;
2116 	default:
2117 		if (atomic_load(&sock->client)) {
2118 			isc_nmhandle_attach(sock->statichandle, &req->handle);
2119 		} else {
2120 			req->handle = isc__nmhandle_get(sock, sockaddr, NULL);
2121 		}
2122 		break;
2123 	}
2124 
2125 	return (req);
2126 }
2127 
2128 /*%<
2129  * Allocator for read operations. Limited to size 2^16.
2130  *
2131  * Note this doesn't actually allocate anything, it just assigns the
2132  * worker's receive buffer to a socket, and marks it as "in use".
2133  */
2134 void
isc__nm_alloc_cb(uv_handle_t * handle,size_t size,uv_buf_t * buf)2135 isc__nm_alloc_cb(uv_handle_t *handle, size_t size, uv_buf_t *buf) {
2136 	isc_nmsocket_t *sock = uv_handle_get_data(handle);
2137 	isc__networker_t *worker = NULL;
2138 
2139 	REQUIRE(VALID_NMSOCK(sock));
2140 	REQUIRE(isc__nm_in_netthread());
2141 
2142 	switch (sock->type) {
2143 	case isc_nm_udpsocket:
2144 		REQUIRE(size <= ISC_NETMGR_RECVBUF_SIZE);
2145 		size = ISC_NETMGR_RECVBUF_SIZE;
2146 		break;
2147 	case isc_nm_tcpsocket:
2148 	case isc_nm_tcpdnssocket:
2149 		break;
2150 	case isc_nm_tlsdnssocket:
2151 		/*
2152 		 * We need to limit the individual chunks to be read, so the
2153 		 * BIO_write() will always succeed and the consumed before the
2154 		 * next readcb is called.
2155 		 */
2156 		if (size >= ISC_NETMGR_TLSBUF_SIZE) {
2157 			size = ISC_NETMGR_TLSBUF_SIZE;
2158 		}
2159 		break;
2160 	default:
2161 		INSIST(0);
2162 		ISC_UNREACHABLE();
2163 	}
2164 
2165 	worker = &sock->mgr->workers[sock->tid];
2166 	INSIST(!worker->recvbuf_inuse || sock->type == isc_nm_udpsocket);
2167 
2168 	buf->base = worker->recvbuf;
2169 	buf->len = size;
2170 	worker->recvbuf_inuse = true;
2171 }
2172 
2173 void
isc__nm_start_reading(isc_nmsocket_t * sock)2174 isc__nm_start_reading(isc_nmsocket_t *sock) {
2175 	int r;
2176 
2177 	if (atomic_load(&sock->reading)) {
2178 		return;
2179 	}
2180 
2181 	switch (sock->type) {
2182 	case isc_nm_udpsocket:
2183 		r = uv_udp_recv_start(&sock->uv_handle.udp, isc__nm_alloc_cb,
2184 				      isc__nm_udp_read_cb);
2185 		break;
2186 	case isc_nm_tcpsocket:
2187 		r = uv_read_start(&sock->uv_handle.stream, isc__nm_alloc_cb,
2188 				  isc__nm_tcp_read_cb);
2189 		break;
2190 	case isc_nm_tcpdnssocket:
2191 		r = uv_read_start(&sock->uv_handle.stream, isc__nm_alloc_cb,
2192 				  isc__nm_tcpdns_read_cb);
2193 		break;
2194 	case isc_nm_tlsdnssocket:
2195 		r = uv_read_start(&sock->uv_handle.stream, isc__nm_alloc_cb,
2196 				  isc__nm_tlsdns_read_cb);
2197 		break;
2198 	default:
2199 		INSIST(0);
2200 		ISC_UNREACHABLE();
2201 	}
2202 	RUNTIME_CHECK(r == 0);
2203 	atomic_store(&sock->reading, true);
2204 }
2205 
2206 void
isc__nm_stop_reading(isc_nmsocket_t * sock)2207 isc__nm_stop_reading(isc_nmsocket_t *sock) {
2208 	int r;
2209 
2210 	if (!atomic_load(&sock->reading)) {
2211 		return;
2212 	}
2213 
2214 	switch (sock->type) {
2215 	case isc_nm_udpsocket:
2216 		r = uv_udp_recv_stop(&sock->uv_handle.udp);
2217 		break;
2218 	case isc_nm_tcpsocket:
2219 	case isc_nm_tcpdnssocket:
2220 	case isc_nm_tlsdnssocket:
2221 		r = uv_read_stop(&sock->uv_handle.stream);
2222 		break;
2223 	default:
2224 		INSIST(0);
2225 		ISC_UNREACHABLE();
2226 	}
2227 	RUNTIME_CHECK(r == 0);
2228 	atomic_store(&sock->reading, false);
2229 }
2230 
2231 bool
isc__nm_closing(isc_nmsocket_t * sock)2232 isc__nm_closing(isc_nmsocket_t *sock) {
2233 	return (atomic_load(&sock->mgr->closing));
2234 }
2235 
2236 bool
isc__nmsocket_closing(isc_nmsocket_t * sock)2237 isc__nmsocket_closing(isc_nmsocket_t *sock) {
2238 	return (!isc__nmsocket_active(sock) || atomic_load(&sock->closing) ||
2239 		isc__nm_closing(sock) ||
2240 		(sock->server != NULL && !isc__nmsocket_active(sock->server)));
2241 }
2242 
2243 static isc_result_t
processbuffer(isc_nmsocket_t * sock)2244 processbuffer(isc_nmsocket_t *sock) {
2245 	switch (sock->type) {
2246 	case isc_nm_tcpdnssocket:
2247 		return (isc__nm_tcpdns_processbuffer(sock));
2248 	case isc_nm_tlsdnssocket:
2249 		return (isc__nm_tlsdns_processbuffer(sock));
2250 	default:
2251 		INSIST(0);
2252 		ISC_UNREACHABLE();
2253 	}
2254 }
2255 
2256 /*
2257  * Process a DNS message.
2258  *
2259  * If we only have an incomplete DNS message, we don't touch any
2260  * timers. If we do have a full message, reset the timer.
2261  *
2262  * Stop reading if this is a client socket, or if the server socket
2263  * has been set to sequential mode, or the number of queries we are
2264  * processing simultaneously has reached the clients-per-connection
2265  * limit. In this case we'll be called again later by
2266  * isc__nm_resume_processing().
2267  */
2268 void
isc__nm_process_sock_buffer(isc_nmsocket_t * sock)2269 isc__nm_process_sock_buffer(isc_nmsocket_t *sock) {
2270 	for (;;) {
2271 		int_fast32_t ah = atomic_load(&sock->ah);
2272 		isc_result_t result = processbuffer(sock);
2273 		switch (result) {
2274 		case ISC_R_NOMORE:
2275 			/*
2276 			 * Don't reset the timer until we have a
2277 			 * full DNS message.
2278 			 */
2279 			isc__nm_start_reading(sock);
2280 			/*
2281 			 * Start the timer only if there are no externally used
2282 			 * active handles, there's always one active handle
2283 			 * attached internally to sock->recv_handle in
2284 			 * accept_connection()
2285 			 */
2286 			if (ah == 1) {
2287 				isc__nmsocket_timer_start(sock);
2288 			}
2289 			return;
2290 		case ISC_R_CANCELED:
2291 			isc__nmsocket_timer_stop(sock);
2292 			isc__nm_stop_reading(sock);
2293 			return;
2294 		case ISC_R_SUCCESS:
2295 			/*
2296 			 * Stop the timer on the successful message read, this
2297 			 * also allows to restart the timer when we have no more
2298 			 * data.
2299 			 */
2300 			isc__nmsocket_timer_stop(sock);
2301 
2302 			if (atomic_load(&sock->client) ||
2303 			    atomic_load(&sock->sequential) ||
2304 			    ah >= STREAM_CLIENTS_PER_CONN)
2305 			{
2306 				isc__nm_stop_reading(sock);
2307 				return;
2308 			}
2309 			break;
2310 		default:
2311 			INSIST(0);
2312 		}
2313 	}
2314 }
2315 
2316 void
isc__nm_resume_processing(void * arg)2317 isc__nm_resume_processing(void *arg) {
2318 	isc_nmsocket_t *sock = (isc_nmsocket_t *)arg;
2319 
2320 	REQUIRE(VALID_NMSOCK(sock));
2321 	REQUIRE(sock->tid == isc_nm_tid());
2322 	REQUIRE(!atomic_load(&sock->client));
2323 
2324 	if (isc__nmsocket_closing(sock)) {
2325 		return;
2326 	}
2327 
2328 	isc__nm_process_sock_buffer(sock);
2329 }
2330 
2331 void
isc_nmhandle_cleartimeout(isc_nmhandle_t * handle)2332 isc_nmhandle_cleartimeout(isc_nmhandle_t *handle) {
2333 	REQUIRE(VALID_NMHANDLE(handle));
2334 	REQUIRE(VALID_NMSOCK(handle->sock));
2335 
2336 	switch (handle->sock->type) {
2337 #if HAVE_LIBNGHTTP2
2338 	case isc_nm_httpsocket:
2339 		isc__nm_http_cleartimeout(handle);
2340 		return;
2341 	case isc_nm_tlssocket:
2342 		isc__nm_tls_cleartimeout(handle);
2343 		return;
2344 #endif
2345 	default:
2346 		handle->sock->read_timeout = 0;
2347 
2348 		if (uv_is_active((uv_handle_t *)&handle->sock->timer)) {
2349 			isc__nmsocket_timer_stop(handle->sock);
2350 		}
2351 	}
2352 }
2353 
2354 void
isc_nmhandle_settimeout(isc_nmhandle_t * handle,uint32_t timeout)2355 isc_nmhandle_settimeout(isc_nmhandle_t *handle, uint32_t timeout) {
2356 	REQUIRE(VALID_NMHANDLE(handle));
2357 	REQUIRE(VALID_NMSOCK(handle->sock));
2358 
2359 	switch (handle->sock->type) {
2360 #if HAVE_LIBNGHTTP2
2361 	case isc_nm_httpsocket:
2362 		isc__nm_http_settimeout(handle, timeout);
2363 		return;
2364 	case isc_nm_tlssocket:
2365 		isc__nm_tls_settimeout(handle, timeout);
2366 		return;
2367 #endif
2368 	default:
2369 		handle->sock->read_timeout = timeout;
2370 		isc__nmsocket_timer_restart(handle->sock);
2371 	}
2372 }
2373 
2374 void
isc_nmhandle_keepalive(isc_nmhandle_t * handle,bool value)2375 isc_nmhandle_keepalive(isc_nmhandle_t *handle, bool value) {
2376 	isc_nmsocket_t *sock = NULL;
2377 
2378 	REQUIRE(VALID_NMHANDLE(handle));
2379 	REQUIRE(VALID_NMSOCK(handle->sock));
2380 
2381 	sock = handle->sock;
2382 
2383 	switch (sock->type) {
2384 	case isc_nm_tcpsocket:
2385 	case isc_nm_tcpdnssocket:
2386 	case isc_nm_tlsdnssocket:
2387 		atomic_store(&sock->keepalive, value);
2388 		sock->read_timeout = value ? atomic_load(&sock->mgr->keepalive)
2389 					   : atomic_load(&sock->mgr->idle);
2390 		break;
2391 #if HAVE_LIBNGHTTP2
2392 	case isc_nm_tlssocket:
2393 		isc__nmhandle_tls_keepalive(handle, value);
2394 		break;
2395 	case isc_nm_httpsocket:
2396 		isc__nmhandle_http_keepalive(handle, value);
2397 		break;
2398 #endif /* HAVE_LIBNGHTTP2 */
2399 	default:
2400 		/*
2401 		 * For any other protocol, this is a no-op.
2402 		 */
2403 		return;
2404 	}
2405 }
2406 
2407 bool
isc_nmhandle_timer_running(isc_nmhandle_t * handle)2408 isc_nmhandle_timer_running(isc_nmhandle_t *handle) {
2409 	REQUIRE(VALID_NMHANDLE(handle));
2410 	REQUIRE(VALID_NMSOCK(handle->sock));
2411 
2412 	return (isc__nmsocket_timer_running(handle->sock));
2413 }
2414 
2415 void *
isc_nmhandle_getextra(isc_nmhandle_t * handle)2416 isc_nmhandle_getextra(isc_nmhandle_t *handle) {
2417 	REQUIRE(VALID_NMHANDLE(handle));
2418 
2419 	return (handle->extra);
2420 }
2421 
2422 isc_sockaddr_t
isc_nmhandle_peeraddr(isc_nmhandle_t * handle)2423 isc_nmhandle_peeraddr(isc_nmhandle_t *handle) {
2424 	REQUIRE(VALID_NMHANDLE(handle));
2425 
2426 	return (handle->peer);
2427 }
2428 
2429 isc_sockaddr_t
isc_nmhandle_localaddr(isc_nmhandle_t * handle)2430 isc_nmhandle_localaddr(isc_nmhandle_t *handle) {
2431 	REQUIRE(VALID_NMHANDLE(handle));
2432 
2433 	return (handle->local);
2434 }
2435 
2436 isc_nm_t *
isc_nmhandle_netmgr(isc_nmhandle_t * handle)2437 isc_nmhandle_netmgr(isc_nmhandle_t *handle) {
2438 	REQUIRE(VALID_NMHANDLE(handle));
2439 	REQUIRE(VALID_NMSOCK(handle->sock));
2440 
2441 	return (handle->sock->mgr);
2442 }
2443 
2444 isc__nm_uvreq_t *
isc___nm_uvreq_get(isc_nm_t * mgr,isc_nmsocket_t * sock FLARG)2445 isc___nm_uvreq_get(isc_nm_t *mgr, isc_nmsocket_t *sock FLARG) {
2446 	isc__nm_uvreq_t *req = NULL;
2447 
2448 	REQUIRE(VALID_NM(mgr));
2449 	REQUIRE(VALID_NMSOCK(sock));
2450 
2451 	if (sock != NULL && isc__nmsocket_active(sock)) {
2452 		/* Try to reuse one */
2453 		req = isc_astack_pop(sock->inactivereqs);
2454 	}
2455 
2456 	if (req == NULL) {
2457 		req = isc_mem_get(mgr->mctx, sizeof(*req));
2458 	}
2459 
2460 	*req = (isc__nm_uvreq_t){ .magic = 0 };
2461 	ISC_LINK_INIT(req, link);
2462 	req->uv_req.req.data = req;
2463 	isc___nmsocket_attach(sock, &req->sock FLARG_PASS);
2464 	req->magic = UVREQ_MAGIC;
2465 
2466 	return (req);
2467 }
2468 
2469 void
isc___nm_uvreq_put(isc__nm_uvreq_t ** req0,isc_nmsocket_t * sock FLARG)2470 isc___nm_uvreq_put(isc__nm_uvreq_t **req0, isc_nmsocket_t *sock FLARG) {
2471 	isc__nm_uvreq_t *req = NULL;
2472 	isc_nmhandle_t *handle = NULL;
2473 
2474 	REQUIRE(req0 != NULL);
2475 	REQUIRE(VALID_UVREQ(*req0));
2476 
2477 	req = *req0;
2478 	*req0 = NULL;
2479 
2480 	INSIST(sock == req->sock);
2481 
2482 	req->magic = 0;
2483 
2484 	/*
2485 	 * We need to save this first to make sure that handle,
2486 	 * sock, and the netmgr won't all disappear.
2487 	 */
2488 	handle = req->handle;
2489 	req->handle = NULL;
2490 
2491 	if (!isc__nmsocket_active(sock) ||
2492 	    !isc_astack_trypush(sock->inactivereqs, req)) {
2493 		isc_mem_put(sock->mgr->mctx, req, sizeof(*req));
2494 	}
2495 
2496 	if (handle != NULL) {
2497 		isc__nmhandle_detach(&handle FLARG_PASS);
2498 	}
2499 
2500 	isc___nmsocket_detach(&sock FLARG_PASS);
2501 }
2502 
2503 void
isc_nm_send(isc_nmhandle_t * handle,isc_region_t * region,isc_nm_cb_t cb,void * cbarg)2504 isc_nm_send(isc_nmhandle_t *handle, isc_region_t *region, isc_nm_cb_t cb,
2505 	    void *cbarg) {
2506 	REQUIRE(VALID_NMHANDLE(handle));
2507 
2508 	switch (handle->sock->type) {
2509 	case isc_nm_udpsocket:
2510 	case isc_nm_udplistener:
2511 		isc__nm_udp_send(handle, region, cb, cbarg);
2512 		break;
2513 	case isc_nm_tcpsocket:
2514 		isc__nm_tcp_send(handle, region, cb, cbarg);
2515 		break;
2516 	case isc_nm_tcpdnssocket:
2517 		isc__nm_tcpdns_send(handle, region, cb, cbarg);
2518 		break;
2519 	case isc_nm_tlsdnssocket:
2520 		isc__nm_tlsdns_send(handle, region, cb, cbarg);
2521 		break;
2522 #if HAVE_LIBNGHTTP2
2523 	case isc_nm_tlssocket:
2524 		isc__nm_tls_send(handle, region, cb, cbarg);
2525 		break;
2526 	case isc_nm_httpsocket:
2527 		isc__nm_http_send(handle, region, cb, cbarg);
2528 		break;
2529 #endif
2530 	default:
2531 		INSIST(0);
2532 		ISC_UNREACHABLE();
2533 	}
2534 }
2535 
2536 void
isc_nm_read(isc_nmhandle_t * handle,isc_nm_recv_cb_t cb,void * cbarg)2537 isc_nm_read(isc_nmhandle_t *handle, isc_nm_recv_cb_t cb, void *cbarg) {
2538 	REQUIRE(VALID_NMHANDLE(handle));
2539 
2540 	switch (handle->sock->type) {
2541 	case isc_nm_udpsocket:
2542 		isc__nm_udp_read(handle, cb, cbarg);
2543 		break;
2544 	case isc_nm_tcpsocket:
2545 		isc__nm_tcp_read(handle, cb, cbarg);
2546 		break;
2547 	case isc_nm_tcpdnssocket:
2548 		isc__nm_tcpdns_read(handle, cb, cbarg);
2549 		break;
2550 	case isc_nm_tlsdnssocket:
2551 		isc__nm_tlsdns_read(handle, cb, cbarg);
2552 		break;
2553 #if HAVE_LIBNGHTTP2
2554 	case isc_nm_tlssocket:
2555 		isc__nm_tls_read(handle, cb, cbarg);
2556 		break;
2557 	case isc_nm_httpsocket:
2558 		isc__nm_http_read(handle, cb, cbarg);
2559 		break;
2560 #endif
2561 	default:
2562 		INSIST(0);
2563 		ISC_UNREACHABLE();
2564 	}
2565 }
2566 
2567 void
isc_nm_cancelread(isc_nmhandle_t * handle)2568 isc_nm_cancelread(isc_nmhandle_t *handle) {
2569 	REQUIRE(VALID_NMHANDLE(handle));
2570 
2571 	switch (handle->sock->type) {
2572 	case isc_nm_udpsocket:
2573 		isc__nm_udp_cancelread(handle);
2574 		break;
2575 	case isc_nm_tcpsocket:
2576 		isc__nm_tcp_cancelread(handle);
2577 		break;
2578 	case isc_nm_tcpdnssocket:
2579 		isc__nm_tcpdns_cancelread(handle);
2580 		break;
2581 	case isc_nm_tlsdnssocket:
2582 		isc__nm_tlsdns_cancelread(handle);
2583 		break;
2584 #if HAVE_LIBNGHTTP2
2585 	case isc_nm_tlssocket:
2586 		isc__nm_tls_cancelread(handle);
2587 		break;
2588 #endif
2589 	default:
2590 		INSIST(0);
2591 		ISC_UNREACHABLE();
2592 	}
2593 }
2594 
2595 void
isc_nm_pauseread(isc_nmhandle_t * handle)2596 isc_nm_pauseread(isc_nmhandle_t *handle) {
2597 	REQUIRE(VALID_NMHANDLE(handle));
2598 
2599 	isc_nmsocket_t *sock = handle->sock;
2600 
2601 	switch (sock->type) {
2602 	case isc_nm_tcpsocket:
2603 		isc__nm_tcp_pauseread(handle);
2604 		break;
2605 #if HAVE_LIBNGHTTP2
2606 	case isc_nm_tlssocket:
2607 		isc__nm_tls_pauseread(handle);
2608 		break;
2609 #endif
2610 	default:
2611 		INSIST(0);
2612 		ISC_UNREACHABLE();
2613 	}
2614 }
2615 
2616 void
isc_nm_resumeread(isc_nmhandle_t * handle)2617 isc_nm_resumeread(isc_nmhandle_t *handle) {
2618 	REQUIRE(VALID_NMHANDLE(handle));
2619 
2620 	isc_nmsocket_t *sock = handle->sock;
2621 
2622 	switch (sock->type) {
2623 	case isc_nm_tcpsocket:
2624 		isc__nm_tcp_resumeread(handle);
2625 		break;
2626 #if HAVE_LIBNGHTTP2
2627 	case isc_nm_tlssocket:
2628 		isc__nm_tls_resumeread(handle);
2629 		break;
2630 #endif
2631 	default:
2632 		INSIST(0);
2633 		ISC_UNREACHABLE();
2634 	}
2635 }
2636 
2637 void
isc_nm_stoplistening(isc_nmsocket_t * sock)2638 isc_nm_stoplistening(isc_nmsocket_t *sock) {
2639 	REQUIRE(VALID_NMSOCK(sock));
2640 
2641 	switch (sock->type) {
2642 	case isc_nm_udplistener:
2643 		isc__nm_udp_stoplistening(sock);
2644 		break;
2645 	case isc_nm_tcpdnslistener:
2646 		isc__nm_tcpdns_stoplistening(sock);
2647 		break;
2648 	case isc_nm_tcplistener:
2649 		isc__nm_tcp_stoplistening(sock);
2650 		break;
2651 	case isc_nm_tlsdnslistener:
2652 		isc__nm_tlsdns_stoplistening(sock);
2653 		break;
2654 #if HAVE_LIBNGHTTP2
2655 	case isc_nm_tlslistener:
2656 		isc__nm_tls_stoplistening(sock);
2657 		break;
2658 	case isc_nm_httplistener:
2659 		isc__nm_http_stoplistening(sock);
2660 		break;
2661 #endif
2662 	default:
2663 		INSIST(0);
2664 		ISC_UNREACHABLE();
2665 	}
2666 }
2667 
2668 void
isc__nm_connectcb(isc_nmsocket_t * sock,isc__nm_uvreq_t * uvreq,isc_result_t eresult,bool async)2669 isc__nm_connectcb(isc_nmsocket_t *sock, isc__nm_uvreq_t *uvreq,
2670 		  isc_result_t eresult, bool async) {
2671 	REQUIRE(VALID_NMSOCK(sock));
2672 	REQUIRE(VALID_UVREQ(uvreq));
2673 	REQUIRE(VALID_NMHANDLE(uvreq->handle));
2674 
2675 	if (!async) {
2676 		isc__netievent_connectcb_t ievent = { .sock = sock,
2677 						      .req = uvreq,
2678 						      .result = eresult };
2679 		isc__nm_async_connectcb(NULL, (isc__netievent_t *)&ievent);
2680 	} else {
2681 		isc__netievent_connectcb_t *ievent =
2682 			isc__nm_get_netievent_connectcb(sock->mgr, sock, uvreq,
2683 							eresult);
2684 		isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid],
2685 				       (isc__netievent_t *)ievent);
2686 	}
2687 }
2688 
2689 void
isc__nm_async_connectcb(isc__networker_t * worker,isc__netievent_t * ev0)2690 isc__nm_async_connectcb(isc__networker_t *worker, isc__netievent_t *ev0) {
2691 	isc__netievent_connectcb_t *ievent = (isc__netievent_connectcb_t *)ev0;
2692 	isc_nmsocket_t *sock = ievent->sock;
2693 	isc__nm_uvreq_t *uvreq = ievent->req;
2694 	isc_result_t eresult = ievent->result;
2695 
2696 	UNUSED(worker);
2697 
2698 	REQUIRE(VALID_NMSOCK(sock));
2699 	REQUIRE(VALID_UVREQ(uvreq));
2700 	REQUIRE(VALID_NMHANDLE(uvreq->handle));
2701 	REQUIRE(ievent->sock->tid == isc_nm_tid());
2702 	REQUIRE(uvreq->cb.connect != NULL);
2703 
2704 	uvreq->cb.connect(uvreq->handle, eresult, uvreq->cbarg);
2705 
2706 	isc__nm_uvreq_put(&uvreq, sock);
2707 }
2708 
2709 void
isc__nm_readcb(isc_nmsocket_t * sock,isc__nm_uvreq_t * uvreq,isc_result_t eresult)2710 isc__nm_readcb(isc_nmsocket_t *sock, isc__nm_uvreq_t *uvreq,
2711 	       isc_result_t eresult) {
2712 	REQUIRE(VALID_NMSOCK(sock));
2713 	REQUIRE(VALID_UVREQ(uvreq));
2714 	REQUIRE(VALID_NMHANDLE(uvreq->handle));
2715 
2716 	if (eresult == ISC_R_SUCCESS || eresult == ISC_R_TIMEDOUT) {
2717 		isc__netievent_readcb_t ievent = { .sock = sock,
2718 						   .req = uvreq,
2719 						   .result = eresult };
2720 
2721 		isc__nm_async_readcb(NULL, (isc__netievent_t *)&ievent);
2722 	} else {
2723 		isc__netievent_readcb_t *ievent = isc__nm_get_netievent_readcb(
2724 			sock->mgr, sock, uvreq, eresult);
2725 		isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid],
2726 				       (isc__netievent_t *)ievent);
2727 	}
2728 }
2729 
2730 void
isc__nm_async_readcb(isc__networker_t * worker,isc__netievent_t * ev0)2731 isc__nm_async_readcb(isc__networker_t *worker, isc__netievent_t *ev0) {
2732 	isc__netievent_readcb_t *ievent = (isc__netievent_readcb_t *)ev0;
2733 	isc_nmsocket_t *sock = ievent->sock;
2734 	isc__nm_uvreq_t *uvreq = ievent->req;
2735 	isc_result_t eresult = ievent->result;
2736 	isc_region_t region;
2737 
2738 	UNUSED(worker);
2739 
2740 	REQUIRE(VALID_NMSOCK(sock));
2741 	REQUIRE(VALID_UVREQ(uvreq));
2742 	REQUIRE(VALID_NMHANDLE(uvreq->handle));
2743 	REQUIRE(sock->tid == isc_nm_tid());
2744 
2745 	region.base = (unsigned char *)uvreq->uvbuf.base;
2746 	region.length = uvreq->uvbuf.len;
2747 
2748 	uvreq->cb.recv(uvreq->handle, eresult, &region, uvreq->cbarg);
2749 
2750 	isc__nm_uvreq_put(&uvreq, sock);
2751 }
2752 
2753 void
isc__nm_sendcb(isc_nmsocket_t * sock,isc__nm_uvreq_t * uvreq,isc_result_t eresult,bool async)2754 isc__nm_sendcb(isc_nmsocket_t *sock, isc__nm_uvreq_t *uvreq,
2755 	       isc_result_t eresult, bool async) {
2756 	REQUIRE(VALID_NMSOCK(sock));
2757 	REQUIRE(VALID_UVREQ(uvreq));
2758 	REQUIRE(VALID_NMHANDLE(uvreq->handle));
2759 
2760 	if (!async) {
2761 		isc__netievent_sendcb_t ievent = { .sock = sock,
2762 						   .req = uvreq,
2763 						   .result = eresult };
2764 		isc__nm_async_sendcb(NULL, (isc__netievent_t *)&ievent);
2765 		return;
2766 	}
2767 
2768 	isc__netievent_sendcb_t *ievent =
2769 		isc__nm_get_netievent_sendcb(sock->mgr, sock, uvreq, eresult);
2770 	isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid],
2771 			       (isc__netievent_t *)ievent);
2772 }
2773 
2774 void
isc__nm_async_sendcb(isc__networker_t * worker,isc__netievent_t * ev0)2775 isc__nm_async_sendcb(isc__networker_t *worker, isc__netievent_t *ev0) {
2776 	isc__netievent_sendcb_t *ievent = (isc__netievent_sendcb_t *)ev0;
2777 	isc_nmsocket_t *sock = ievent->sock;
2778 	isc__nm_uvreq_t *uvreq = ievent->req;
2779 	isc_result_t eresult = ievent->result;
2780 
2781 	UNUSED(worker);
2782 
2783 	REQUIRE(VALID_NMSOCK(sock));
2784 	REQUIRE(VALID_UVREQ(uvreq));
2785 	REQUIRE(VALID_NMHANDLE(uvreq->handle));
2786 	REQUIRE(sock->tid == isc_nm_tid());
2787 
2788 	uvreq->cb.send(uvreq->handle, eresult, uvreq->cbarg);
2789 
2790 	isc__nm_uvreq_put(&uvreq, sock);
2791 }
2792 
2793 static void
isc__nm_async_close(isc__networker_t * worker,isc__netievent_t * ev0)2794 isc__nm_async_close(isc__networker_t *worker, isc__netievent_t *ev0) {
2795 	isc__netievent_close_t *ievent = (isc__netievent_close_t *)ev0;
2796 	isc_nmsocket_t *sock = ievent->sock;
2797 
2798 	REQUIRE(VALID_NMSOCK(ievent->sock));
2799 	REQUIRE(sock->tid == isc_nm_tid());
2800 	REQUIRE(sock->closehandle_cb != NULL);
2801 
2802 	UNUSED(worker);
2803 
2804 	ievent->sock->closehandle_cb(sock);
2805 }
2806 
2807 void
isc__nm_async_detach(isc__networker_t * worker,isc__netievent_t * ev0)2808 isc__nm_async_detach(isc__networker_t *worker, isc__netievent_t *ev0) {
2809 	isc__netievent_detach_t *ievent = (isc__netievent_detach_t *)ev0;
2810 	FLARG_IEVENT(ievent);
2811 
2812 	REQUIRE(VALID_NMSOCK(ievent->sock));
2813 	REQUIRE(VALID_NMHANDLE(ievent->handle));
2814 	REQUIRE(ievent->sock->tid == isc_nm_tid());
2815 
2816 	UNUSED(worker);
2817 
2818 	nmhandle_detach_cb(&ievent->handle FLARG_PASS);
2819 }
2820 
2821 void
isc__nmsocket_shutdown(isc_nmsocket_t * sock)2822 isc__nmsocket_shutdown(isc_nmsocket_t *sock) {
2823 	REQUIRE(VALID_NMSOCK(sock));
2824 	switch (sock->type) {
2825 	case isc_nm_udpsocket:
2826 		isc__nm_udp_shutdown(sock);
2827 		break;
2828 	case isc_nm_tcpsocket:
2829 		isc__nm_tcp_shutdown(sock);
2830 		break;
2831 	case isc_nm_tcpdnssocket:
2832 		isc__nm_tcpdns_shutdown(sock);
2833 		break;
2834 	case isc_nm_tlsdnssocket:
2835 		isc__nm_tlsdns_shutdown(sock);
2836 		break;
2837 	case isc_nm_udplistener:
2838 	case isc_nm_tcplistener:
2839 	case isc_nm_tcpdnslistener:
2840 	case isc_nm_tlsdnslistener:
2841 		return;
2842 	default:
2843 		INSIST(0);
2844 		ISC_UNREACHABLE();
2845 	}
2846 }
2847 
2848 static void
shutdown_walk_cb(uv_handle_t * handle,void * arg)2849 shutdown_walk_cb(uv_handle_t *handle, void *arg) {
2850 	isc_nmsocket_t *sock = uv_handle_get_data(handle);
2851 	UNUSED(arg);
2852 
2853 	if (uv_is_closing(handle)) {
2854 		return;
2855 	}
2856 
2857 	switch (handle->type) {
2858 	case UV_UDP:
2859 	case UV_TCP:
2860 		break;
2861 	default:
2862 		return;
2863 	}
2864 
2865 	isc__nmsocket_shutdown(sock);
2866 }
2867 
2868 void
isc__nm_async_shutdown(isc__networker_t * worker,isc__netievent_t * ev0)2869 isc__nm_async_shutdown(isc__networker_t *worker, isc__netievent_t *ev0) {
2870 	UNUSED(ev0);
2871 	uv_walk(&worker->loop, shutdown_walk_cb, NULL);
2872 }
2873 
2874 bool
isc__nm_acquire_interlocked(isc_nm_t * mgr)2875 isc__nm_acquire_interlocked(isc_nm_t *mgr) {
2876 	if (!isc__nm_in_netthread()) {
2877 		return (false);
2878 	}
2879 
2880 	LOCK(&mgr->lock);
2881 	bool success = atomic_compare_exchange_strong(
2882 		&mgr->interlocked, &(int){ ISC_NETMGR_NON_INTERLOCKED },
2883 		isc_nm_tid());
2884 
2885 	UNLOCK(&mgr->lock);
2886 	return (success);
2887 }
2888 
2889 void
isc__nm_drop_interlocked(isc_nm_t * mgr)2890 isc__nm_drop_interlocked(isc_nm_t *mgr) {
2891 	if (!isc__nm_in_netthread()) {
2892 		return;
2893 	}
2894 
2895 	LOCK(&mgr->lock);
2896 	int tid = atomic_exchange(&mgr->interlocked,
2897 				  ISC_NETMGR_NON_INTERLOCKED);
2898 	INSIST(tid != ISC_NETMGR_NON_INTERLOCKED);
2899 	BROADCAST(&mgr->wkstatecond);
2900 	UNLOCK(&mgr->lock);
2901 }
2902 
2903 void
isc__nm_acquire_interlocked_force(isc_nm_t * mgr)2904 isc__nm_acquire_interlocked_force(isc_nm_t *mgr) {
2905 	if (!isc__nm_in_netthread()) {
2906 		return;
2907 	}
2908 
2909 	LOCK(&mgr->lock);
2910 	while (!atomic_compare_exchange_strong(
2911 		&mgr->interlocked, &(int){ ISC_NETMGR_NON_INTERLOCKED },
2912 		isc_nm_tid()))
2913 	{
2914 		WAIT(&mgr->wkstatecond, &mgr->lock);
2915 	}
2916 	UNLOCK(&mgr->lock);
2917 }
2918 
2919 void
isc_nm_setstats(isc_nm_t * mgr,isc_stats_t * stats)2920 isc_nm_setstats(isc_nm_t *mgr, isc_stats_t *stats) {
2921 	REQUIRE(VALID_NM(mgr));
2922 	REQUIRE(mgr->stats == NULL);
2923 	REQUIRE(isc_stats_ncounters(stats) == isc_sockstatscounter_max);
2924 
2925 	isc_stats_attach(stats, &mgr->stats);
2926 }
2927 
2928 void
isc__nm_incstats(isc_nm_t * mgr,isc_statscounter_t counterid)2929 isc__nm_incstats(isc_nm_t *mgr, isc_statscounter_t counterid) {
2930 	REQUIRE(VALID_NM(mgr));
2931 	REQUIRE(counterid != -1);
2932 
2933 	if (mgr->stats != NULL) {
2934 		isc_stats_increment(mgr->stats, counterid);
2935 	}
2936 }
2937 
2938 void
isc__nm_decstats(isc_nm_t * mgr,isc_statscounter_t counterid)2939 isc__nm_decstats(isc_nm_t *mgr, isc_statscounter_t counterid) {
2940 	REQUIRE(VALID_NM(mgr));
2941 	REQUIRE(counterid != -1);
2942 
2943 	if (mgr->stats != NULL) {
2944 		isc_stats_decrement(mgr->stats, counterid);
2945 	}
2946 }
2947 
2948 isc_result_t
isc__nm_socket(int domain,int type,int protocol,uv_os_sock_t * sockp)2949 isc__nm_socket(int domain, int type, int protocol, uv_os_sock_t *sockp) {
2950 	int sock = socket(domain, type, protocol);
2951 	if (sock < 0) {
2952 		return (isc_errno_toresult(errno));
2953 	}
2954 
2955 	*sockp = (uv_os_sock_t)sock;
2956 	return (ISC_R_SUCCESS);
2957 }
2958 
2959 void
isc__nm_closesocket(uv_os_sock_t sock)2960 isc__nm_closesocket(uv_os_sock_t sock) {
2961 	close(sock);
2962 }
2963 
2964 #define setsockopt_on(socket, level, name) \
2965 	setsockopt(socket, level, name, &(int){ 1 }, sizeof(int))
2966 
2967 #define setsockopt_off(socket, level, name) \
2968 	setsockopt(socket, level, name, &(int){ 0 }, sizeof(int))
2969 
2970 isc_result_t
isc__nm_socket_freebind(uv_os_sock_t fd,sa_family_t sa_family)2971 isc__nm_socket_freebind(uv_os_sock_t fd, sa_family_t sa_family) {
2972 	/*
2973 	 * Set the IP_FREEBIND (or equivalent option) on the uv_handle.
2974 	 */
2975 #ifdef IP_FREEBIND
2976 	UNUSED(sa_family);
2977 	if (setsockopt_on(fd, IPPROTO_IP, IP_FREEBIND) == -1) {
2978 		return (ISC_R_FAILURE);
2979 	}
2980 	return (ISC_R_SUCCESS);
2981 #elif defined(IP_BINDANY) || defined(IPV6_BINDANY)
2982 	if (sa_family == AF_INET) {
2983 #if defined(IP_BINDANY)
2984 		if (setsockopt_on(fd, IPPROTO_IP, IP_BINDANY) == -1) {
2985 			return (ISC_R_FAILURE);
2986 		}
2987 		return (ISC_R_SUCCESS);
2988 #endif
2989 	} else if (sa_family == AF_INET6) {
2990 #if defined(IPV6_BINDANY)
2991 		if (setsockopt_on(fd, IPPROTO_IPV6, IPV6_BINDANY) == -1) {
2992 			return (ISC_R_FAILURE);
2993 		}
2994 		return (ISC_R_SUCCESS);
2995 #endif
2996 	}
2997 	return (ISC_R_NOTIMPLEMENTED);
2998 #elif defined(SO_BINDANY)
2999 	UNUSED(sa_family);
3000 	if (setsockopt_on(fd, SOL_SOCKET, SO_BINDANY) == -1) {
3001 		return (ISC_R_FAILURE);
3002 	}
3003 	return (ISC_R_SUCCESS);
3004 #else
3005 	UNUSED(fd);
3006 	UNUSED(sa_family);
3007 	return (ISC_R_NOTIMPLEMENTED);
3008 #endif
3009 }
3010 
3011 isc_result_t
isc__nm_socket_reuse(uv_os_sock_t fd)3012 isc__nm_socket_reuse(uv_os_sock_t fd) {
3013 	/*
3014 	 * Generally, the SO_REUSEADDR socket option allows reuse of
3015 	 * local addresses.
3016 	 *
3017 	 * On the BSDs, SO_REUSEPORT implies SO_REUSEADDR but with some
3018 	 * additional refinements for programs that use multicast.
3019 	 *
3020 	 * On Linux, SO_REUSEPORT has different semantics: it _shares_ the port
3021 	 * rather than steal it from the current listener, so we don't use it
3022 	 * here, but rather in isc__nm_socket_reuse_lb().
3023 	 *
3024 	 * On Windows, it also allows a socket to forcibly bind to a port in use
3025 	 * by another socket.
3026 	 */
3027 
3028 #if defined(SO_REUSEPORT) && !defined(__linux__)
3029 	if (setsockopt_on(fd, SOL_SOCKET, SO_REUSEPORT) == -1) {
3030 		return (ISC_R_FAILURE);
3031 	}
3032 	return (ISC_R_SUCCESS);
3033 #elif defined(SO_REUSEADDR)
3034 	if (setsockopt_on(fd, SOL_SOCKET, SO_REUSEADDR) == -1) {
3035 		return (ISC_R_FAILURE);
3036 	}
3037 	return (ISC_R_SUCCESS);
3038 #else
3039 	UNUSED(fd);
3040 	return (ISC_R_NOTIMPLEMENTED);
3041 #endif
3042 }
3043 
3044 isc_result_t
isc__nm_socket_reuse_lb(uv_os_sock_t fd)3045 isc__nm_socket_reuse_lb(uv_os_sock_t fd) {
3046 	/*
3047 	 * On FreeBSD 12+, SO_REUSEPORT_LB socket option allows sockets to be
3048 	 * bound to an identical socket address. For UDP sockets, the use of
3049 	 * this option can provide better distribution of incoming datagrams to
3050 	 * multiple processes (or threads) as compared to the traditional
3051 	 * technique of having multiple processes compete to receive datagrams
3052 	 * on the same socket.
3053 	 *
3054 	 * On Linux, the same thing is achieved simply with SO_REUSEPORT.
3055 	 */
3056 #if defined(SO_REUSEPORT_LB)
3057 	if (setsockopt_on(fd, SOL_SOCKET, SO_REUSEPORT_LB) == -1) {
3058 		return (ISC_R_FAILURE);
3059 	} else {
3060 		return (ISC_R_SUCCESS);
3061 	}
3062 #elif defined(SO_REUSEPORT) && defined(__linux__)
3063 	if (setsockopt_on(fd, SOL_SOCKET, SO_REUSEPORT) == -1) {
3064 		return (ISC_R_FAILURE);
3065 	} else {
3066 		return (ISC_R_SUCCESS);
3067 	}
3068 #else
3069 	UNUSED(fd);
3070 	return (ISC_R_NOTIMPLEMENTED);
3071 #endif
3072 }
3073 
3074 isc_result_t
isc__nm_socket_incoming_cpu(uv_os_sock_t fd)3075 isc__nm_socket_incoming_cpu(uv_os_sock_t fd) {
3076 #ifdef SO_INCOMING_CPU
3077 	if (setsockopt_on(fd, SOL_SOCKET, SO_INCOMING_CPU) == -1) {
3078 		return (ISC_R_FAILURE);
3079 	} else {
3080 		return (ISC_R_SUCCESS);
3081 	}
3082 #else
3083 	UNUSED(fd);
3084 #endif
3085 	return (ISC_R_NOTIMPLEMENTED);
3086 }
3087 
3088 isc_result_t
isc__nm_socket_disable_pmtud(uv_os_sock_t fd,sa_family_t sa_family)3089 isc__nm_socket_disable_pmtud(uv_os_sock_t fd, sa_family_t sa_family) {
3090 	/*
3091 	 * Disable the Path MTU Discovery on IP packets
3092 	 */
3093 	if (sa_family == AF_INET6) {
3094 #if defined(IPV6_DONTFRAG)
3095 		if (setsockopt_off(fd, IPPROTO_IPV6, IPV6_DONTFRAG) == -1) {
3096 			return (ISC_R_FAILURE);
3097 		} else {
3098 			return (ISC_R_SUCCESS);
3099 		}
3100 #elif defined(IPV6_MTU_DISCOVER) && defined(IP_PMTUDISC_OMIT)
3101 		if (setsockopt(fd, IPPROTO_IPV6, IPV6_MTU_DISCOVER,
3102 			       &(int){ IP_PMTUDISC_OMIT }, sizeof(int)) == -1)
3103 		{
3104 			return (ISC_R_FAILURE);
3105 		} else {
3106 			return (ISC_R_SUCCESS);
3107 		}
3108 #else
3109 		UNUSED(fd);
3110 #endif
3111 	} else if (sa_family == AF_INET) {
3112 #if defined(IP_DONTFRAG)
3113 		if (setsockopt_off(fd, IPPROTO_IP, IP_DONTFRAG) == -1) {
3114 			return (ISC_R_FAILURE);
3115 		} else {
3116 			return (ISC_R_SUCCESS);
3117 		}
3118 #elif defined(IP_MTU_DISCOVER) && defined(IP_PMTUDISC_OMIT)
3119 		if (setsockopt(fd, IPPROTO_IP, IP_MTU_DISCOVER,
3120 			       &(int){ IP_PMTUDISC_OMIT }, sizeof(int)) == -1)
3121 		{
3122 			return (ISC_R_FAILURE);
3123 		} else {
3124 			return (ISC_R_SUCCESS);
3125 		}
3126 #else
3127 		UNUSED(fd);
3128 #endif
3129 	} else {
3130 		return (ISC_R_FAMILYNOSUPPORT);
3131 	}
3132 
3133 	return (ISC_R_NOTIMPLEMENTED);
3134 }
3135 
3136 isc_result_t
isc_nm_checkaddr(const isc_sockaddr_t * addr,isc_socktype_t type)3137 isc_nm_checkaddr(const isc_sockaddr_t *addr, isc_socktype_t type) {
3138 	int proto, pf, addrlen, fd, r;
3139 
3140 	REQUIRE(addr != NULL);
3141 
3142 	switch (type) {
3143 	case isc_socktype_tcp:
3144 		proto = SOCK_STREAM;
3145 		break;
3146 	case isc_socktype_udp:
3147 		proto = SOCK_DGRAM;
3148 		break;
3149 	default:
3150 		return (ISC_R_NOTIMPLEMENTED);
3151 	}
3152 
3153 	pf = isc_sockaddr_pf(addr);
3154 	if (pf == AF_INET) {
3155 		addrlen = sizeof(struct sockaddr_in);
3156 	} else {
3157 		addrlen = sizeof(struct sockaddr_in6);
3158 	}
3159 
3160 	fd = socket(pf, proto, 0);
3161 	if (fd < 0) {
3162 		return (isc_errno_toresult(errno));
3163 	}
3164 
3165 	r = bind(fd, (const struct sockaddr *)&addr->type.sa, addrlen);
3166 	if (r < 0) {
3167 		close(fd);
3168 		return (isc_errno_toresult(errno));
3169 	}
3170 
3171 	close(fd);
3172 	return (ISC_R_SUCCESS);
3173 }
3174 
3175 #if defined(TCP_CONNECTIONTIMEOUT)
3176 #define TIMEOUT_TYPE	int
3177 #define TIMEOUT_DIV	1000
3178 #define TIMEOUT_OPTNAME TCP_CONNECTIONTIMEOUT
3179 #elif defined(TCP_RXT_CONNDROPTIME)
3180 #define TIMEOUT_TYPE	int
3181 #define TIMEOUT_DIV	1000
3182 #define TIMEOUT_OPTNAME TCP_RXT_CONNDROPTIME
3183 #elif defined(TCP_USER_TIMEOUT)
3184 #define TIMEOUT_TYPE	unsigned int
3185 #define TIMEOUT_DIV	1
3186 #define TIMEOUT_OPTNAME TCP_USER_TIMEOUT
3187 #elif defined(TCP_KEEPINIT)
3188 #define TIMEOUT_TYPE	int
3189 #define TIMEOUT_DIV	1000
3190 #define TIMEOUT_OPTNAME TCP_KEEPINIT
3191 #endif
3192 
3193 isc_result_t
isc__nm_socket_connectiontimeout(uv_os_sock_t fd,int timeout_ms)3194 isc__nm_socket_connectiontimeout(uv_os_sock_t fd, int timeout_ms) {
3195 #if defined(TIMEOUT_OPTNAME)
3196 	TIMEOUT_TYPE timeout = timeout_ms / TIMEOUT_DIV;
3197 
3198 	if (timeout == 0) {
3199 		timeout = 1;
3200 	}
3201 
3202 	if (setsockopt(fd, IPPROTO_TCP, TIMEOUT_OPTNAME, &timeout,
3203 		       sizeof(timeout)) == -1)
3204 	{
3205 		return (ISC_R_FAILURE);
3206 	}
3207 
3208 	return (ISC_R_SUCCESS);
3209 #else
3210 	UNUSED(fd);
3211 	UNUSED(timeout_ms);
3212 
3213 	return (ISC_R_SUCCESS);
3214 #endif
3215 }
3216 
3217 isc_result_t
isc__nm_socket_tcp_nodelay(uv_os_sock_t fd)3218 isc__nm_socket_tcp_nodelay(uv_os_sock_t fd) {
3219 #ifdef TCP_NODELAY
3220 	if (setsockopt_on(fd, IPPROTO_TCP, TCP_NODELAY) == -1) {
3221 		return (ISC_R_FAILURE);
3222 	} else {
3223 		return (ISC_R_SUCCESS);
3224 	}
3225 #else
3226 	UNUSED(fd);
3227 	return (ISC_R_SUCCESS);
3228 #endif
3229 }
3230 
3231 void
isc__nm_set_network_buffers(isc_nm_t * nm,uv_handle_t * handle)3232 isc__nm_set_network_buffers(isc_nm_t *nm, uv_handle_t *handle) {
3233 	int32_t recv_buffer_size = 0;
3234 	int32_t send_buffer_size = 0;
3235 
3236 	switch (handle->type) {
3237 	case UV_TCP:
3238 		recv_buffer_size =
3239 			atomic_load_relaxed(&nm->recv_tcp_buffer_size);
3240 		send_buffer_size =
3241 			atomic_load_relaxed(&nm->send_tcp_buffer_size);
3242 		break;
3243 	case UV_UDP:
3244 		recv_buffer_size =
3245 			atomic_load_relaxed(&nm->recv_udp_buffer_size);
3246 		send_buffer_size =
3247 			atomic_load_relaxed(&nm->send_udp_buffer_size);
3248 		break;
3249 	default:
3250 		INSIST(0);
3251 		ISC_UNREACHABLE();
3252 	}
3253 
3254 	if (recv_buffer_size > 0) {
3255 		int r = uv_recv_buffer_size(handle, &recv_buffer_size);
3256 		INSIST(r == 0);
3257 	}
3258 
3259 	if (send_buffer_size > 0) {
3260 		int r = uv_send_buffer_size(handle, &send_buffer_size);
3261 		INSIST(r == 0);
3262 	}
3263 }
3264 
3265 static isc_threadresult_t
isc__nm_work_run(isc_threadarg_t arg)3266 isc__nm_work_run(isc_threadarg_t arg) {
3267 	isc__nm_work_t *work = (isc__nm_work_t *)arg;
3268 
3269 	work->cb(work->data);
3270 
3271 	return ((isc_threadresult_t)0);
3272 }
3273 
3274 static void
isc__nm_work_cb(uv_work_t * req)3275 isc__nm_work_cb(uv_work_t *req) {
3276 	isc__nm_work_t *work = uv_req_get_data((uv_req_t *)req);
3277 
3278 	if (isc_tid_v == SIZE_MAX) {
3279 		isc__trampoline_t *trampoline_arg =
3280 			isc__trampoline_get(isc__nm_work_run, work);
3281 		(void)isc__trampoline_run(trampoline_arg);
3282 	} else {
3283 		(void)isc__nm_work_run((isc_threadarg_t)work);
3284 	}
3285 }
3286 
3287 static void
isc__nm_after_work_cb(uv_work_t * req,int status)3288 isc__nm_after_work_cb(uv_work_t *req, int status) {
3289 	isc_result_t result = ISC_R_SUCCESS;
3290 	isc__nm_work_t *work = uv_req_get_data((uv_req_t *)req);
3291 	isc_nm_t *netmgr = work->netmgr;
3292 
3293 	if (status != 0) {
3294 		result = isc__nm_uverr2result(status);
3295 	}
3296 
3297 	work->after_cb(work->data, result);
3298 
3299 	isc_mem_put(netmgr->mctx, work, sizeof(*work));
3300 
3301 	isc_nm_detach(&netmgr);
3302 }
3303 
3304 void
isc_nm_work_offload(isc_nm_t * netmgr,isc_nm_workcb_t work_cb,isc_nm_after_workcb_t after_work_cb,void * data)3305 isc_nm_work_offload(isc_nm_t *netmgr, isc_nm_workcb_t work_cb,
3306 		    isc_nm_after_workcb_t after_work_cb, void *data) {
3307 	isc__networker_t *worker = NULL;
3308 	isc__nm_work_t *work = NULL;
3309 	int r;
3310 
3311 	REQUIRE(isc__nm_in_netthread());
3312 	REQUIRE(VALID_NM(netmgr));
3313 
3314 	worker = &netmgr->workers[isc_nm_tid()];
3315 
3316 	work = isc_mem_get(netmgr->mctx, sizeof(*work));
3317 	*work = (isc__nm_work_t){
3318 		.cb = work_cb,
3319 		.after_cb = after_work_cb,
3320 		.data = data,
3321 	};
3322 
3323 	isc_nm_attach(netmgr, &work->netmgr);
3324 
3325 	uv_req_set_data((uv_req_t *)&work->req, work);
3326 
3327 	r = uv_queue_work(&worker->loop, &work->req, isc__nm_work_cb,
3328 			  isc__nm_after_work_cb);
3329 	RUNTIME_CHECK(r == 0);
3330 }
3331 
3332 void
isc_nm_sequential(isc_nmhandle_t * handle)3333 isc_nm_sequential(isc_nmhandle_t *handle) {
3334 	isc_nmsocket_t *sock = NULL;
3335 
3336 	REQUIRE(VALID_NMHANDLE(handle));
3337 	REQUIRE(VALID_NMSOCK(handle->sock));
3338 
3339 	sock = handle->sock;
3340 
3341 	switch (sock->type) {
3342 	case isc_nm_tcpdnssocket:
3343 	case isc_nm_tlsdnssocket:
3344 		break;
3345 	case isc_nm_httpsocket:
3346 		return;
3347 	default:
3348 		INSIST(0);
3349 		ISC_UNREACHABLE();
3350 	}
3351 
3352 	/*
3353 	 * We don't want pipelining on this connection. That means
3354 	 * that we need to pause after reading each request, and
3355 	 * resume only after the request has been processed. This
3356 	 * is done in isc__nm_resume_processing(), which is the
3357 	 * socket's closehandle_cb callback, called whenever a handle
3358 	 * is released.
3359 	 */
3360 	isc__nmsocket_timer_stop(sock);
3361 	isc__nm_stop_reading(sock);
3362 	atomic_store(&sock->sequential, true);
3363 }
3364 
3365 void
isc_nm_bad_request(isc_nmhandle_t * handle)3366 isc_nm_bad_request(isc_nmhandle_t *handle) {
3367 	isc_nmsocket_t *sock;
3368 
3369 	REQUIRE(VALID_NMHANDLE(handle));
3370 	REQUIRE(VALID_NMSOCK(handle->sock));
3371 
3372 	sock = handle->sock;
3373 	switch (sock->type) {
3374 #if HAVE_LIBNGHTTP2
3375 	case isc_nm_httpsocket:
3376 		isc__nm_http_bad_request(handle);
3377 		break;
3378 #endif /* HAVE_LIBNGHTTP2 */
3379 
3380 	case isc_nm_udpsocket:
3381 	case isc_nm_tcpdnssocket:
3382 	case isc_nm_tlsdnssocket:
3383 		return;
3384 		break;
3385 
3386 	case isc_nm_tcpsocket:
3387 #if HAVE_LIBNGHTTP2
3388 	case isc_nm_tlssocket:
3389 #endif /* HAVE_LIBNGHTTP2 */
3390 	default:
3391 		INSIST(0);
3392 		ISC_UNREACHABLE();
3393 		break;
3394 	}
3395 }
3396 
3397 bool
isc_nm_xfr_allowed(isc_nmhandle_t * handle)3398 isc_nm_xfr_allowed(isc_nmhandle_t *handle) {
3399 	isc_nmsocket_t *sock;
3400 
3401 	REQUIRE(VALID_NMHANDLE(handle));
3402 	REQUIRE(VALID_NMSOCK(handle->sock));
3403 
3404 	sock = handle->sock;
3405 
3406 	switch (sock->type) {
3407 	case isc_nm_tcpdnssocket:
3408 		return (true);
3409 	case isc_nm_tlsdnssocket:
3410 		return (isc__nm_tlsdns_xfr_allowed(sock));
3411 	default:
3412 		return (false);
3413 	}
3414 
3415 	INSIST(0);
3416 	ISC_UNREACHABLE();
3417 
3418 	return (false);
3419 }
3420 
3421 bool
isc_nm_is_tlsdns_handle(isc_nmhandle_t * handle)3422 isc_nm_is_tlsdns_handle(isc_nmhandle_t *handle) {
3423 	REQUIRE(VALID_NMHANDLE(handle));
3424 	REQUIRE(VALID_NMSOCK(handle->sock));
3425 
3426 	return (handle->sock->type == isc_nm_tlsdnssocket);
3427 }
3428 
3429 #ifdef NETMGR_TRACE
3430 /*
3431  * Dump all active sockets in netmgr. We output to stderr
3432  * as the logger might be already shut down.
3433  */
3434 
3435 static const char *
nmsocket_type_totext(isc_nmsocket_type type)3436 nmsocket_type_totext(isc_nmsocket_type type) {
3437 	switch (type) {
3438 	case isc_nm_udpsocket:
3439 		return ("isc_nm_udpsocket");
3440 	case isc_nm_udplistener:
3441 		return ("isc_nm_udplistener");
3442 	case isc_nm_tcpsocket:
3443 		return ("isc_nm_tcpsocket");
3444 	case isc_nm_tcplistener:
3445 		return ("isc_nm_tcplistener");
3446 	case isc_nm_tcpdnslistener:
3447 		return ("isc_nm_tcpdnslistener");
3448 	case isc_nm_tcpdnssocket:
3449 		return ("isc_nm_tcpdnssocket");
3450 	case isc_nm_tlssocket:
3451 		return ("isc_nm_tlssocket");
3452 	case isc_nm_tlslistener:
3453 		return ("isc_nm_tlslistener");
3454 	case isc_nm_tlsdnslistener:
3455 		return ("isc_nm_tlsdnslistener");
3456 	case isc_nm_tlsdnssocket:
3457 		return ("isc_nm_tlsdnssocket");
3458 	case isc_nm_httplistener:
3459 		return ("isc_nm_httplistener");
3460 	case isc_nm_httpsocket:
3461 		return ("isc_nm_httpsocket");
3462 	default:
3463 		INSIST(0);
3464 		ISC_UNREACHABLE();
3465 	}
3466 }
3467 
3468 static void
nmhandle_dump(isc_nmhandle_t * handle)3469 nmhandle_dump(isc_nmhandle_t *handle) {
3470 	fprintf(stderr, "Active handle %p, refs %" PRIuFAST32 "\n", handle,
3471 		isc_refcount_current(&handle->references));
3472 	fprintf(stderr, "Created by:\n");
3473 	isc_backtrace_symbols_fd(handle->backtrace, handle->backtrace_size,
3474 				 STDERR_FILENO);
3475 	fprintf(stderr, "\n\n");
3476 }
3477 
3478 static void
nmsocket_dump(isc_nmsocket_t * sock)3479 nmsocket_dump(isc_nmsocket_t *sock) {
3480 	isc_nmhandle_t *handle = NULL;
3481 
3482 	LOCK(&sock->lock);
3483 	fprintf(stderr, "\n=================\n");
3484 	fprintf(stderr, "Active %s socket %p, type %s, refs %" PRIuFAST32 "\n",
3485 		atomic_load(&sock->client) ? "client" : "server", sock,
3486 		nmsocket_type_totext(sock->type),
3487 		isc_refcount_current(&sock->references));
3488 	fprintf(stderr,
3489 		"Parent %p, listener %p, server %p, statichandle = "
3490 		"%p\n",
3491 		sock->parent, sock->listener, sock->server, sock->statichandle);
3492 	fprintf(stderr, "Flags:%s%s%s%s%s\n",
3493 		atomic_load(&sock->active) ? " active" : "",
3494 		atomic_load(&sock->closing) ? " closing" : "",
3495 		atomic_load(&sock->destroying) ? " destroying" : "",
3496 		atomic_load(&sock->connecting) ? " connecting" : "",
3497 		atomic_load(&sock->accepting) ? " accepting" : "");
3498 	fprintf(stderr, "Created by:\n");
3499 	isc_backtrace_symbols_fd(sock->backtrace, sock->backtrace_size,
3500 				 STDERR_FILENO);
3501 	fprintf(stderr, "\n");
3502 
3503 	for (handle = ISC_LIST_HEAD(sock->active_handles); handle != NULL;
3504 	     handle = ISC_LIST_NEXT(handle, active_link))
3505 	{
3506 		static bool first = true;
3507 		if (first) {
3508 			fprintf(stderr, "Active handles:\n");
3509 			first = false;
3510 		}
3511 		nmhandle_dump(handle);
3512 	}
3513 
3514 	fprintf(stderr, "\n");
3515 	UNLOCK(&sock->lock);
3516 }
3517 
3518 void
isc__nm_dump_active(isc_nm_t * nm)3519 isc__nm_dump_active(isc_nm_t *nm) {
3520 	isc_nmsocket_t *sock = NULL;
3521 
3522 	REQUIRE(VALID_NM(nm));
3523 
3524 	LOCK(&nm->lock);
3525 	for (sock = ISC_LIST_HEAD(nm->active_sockets); sock != NULL;
3526 	     sock = ISC_LIST_NEXT(sock, active_link))
3527 	{
3528 		static bool first = true;
3529 		if (first) {
3530 			fprintf(stderr, "Outstanding sockets\n");
3531 			first = false;
3532 		}
3533 		nmsocket_dump(sock);
3534 	}
3535 	UNLOCK(&nm->lock);
3536 }
3537 #endif
3538