1 /*	$NetBSD: netmgr-int.h,v 1.8 2022/09/23 12:15:34 christos Exp $	*/
2 
3 /*
4  * Copyright (C) Internet Systems Consortium, Inc. ("ISC")
5  *
6  * SPDX-License-Identifier: MPL-2.0
7  *
8  * This Source Code Form is subject to the terms of the Mozilla Public
9  * License, v. 2.0. If a copy of the MPL was not distributed with this
10  * file, you can obtain one at https://mozilla.org/MPL/2.0/.
11  *
12  * See the COPYRIGHT file distributed with this work for additional
13  * information regarding copyright ownership.
14  */
15 
16 #pragma once
17 
18 #include <unistd.h>
19 #include <uv.h>
20 
21 #include <openssl/err.h>
22 #include <openssl/ssl.h>
23 
24 #include <isc/astack.h>
25 #include <isc/atomic.h>
26 #include <isc/barrier.h>
27 #include <isc/buffer.h>
28 #include <isc/condition.h>
29 #include <isc/magic.h>
30 #include <isc/mem.h>
31 #include <isc/netmgr.h>
32 #include <isc/quota.h>
33 #include <isc/random.h>
34 #include <isc/refcount.h>
35 #include <isc/region.h>
36 #include <isc/result.h>
37 #include <isc/rwlock.h>
38 #include <isc/sockaddr.h>
39 #include <isc/stats.h>
40 #include <isc/thread.h>
41 #include <isc/util.h>
42 
43 #include "uv-compat.h"
44 
45 #define ISC_NETMGR_TID_UNKNOWN -1
46 
47 /* Must be different from ISC_NETMGR_TID_UNKNOWN */
48 #define ISC_NETMGR_NON_INTERLOCKED -2
49 
50 /*
51  * Receive buffers
52  */
53 #if HAVE_DECL_UV_UDP_MMSG_CHUNK
54 /*
55  * The value 20 here is UV__MMSG_MAXWIDTH taken from the current libuv source,
56  * libuv will not receive more that 20 datagrams in a single recvmmsg call.
57  */
58 #define ISC_NETMGR_UDP_RECVBUF_SIZE (20 * UINT16_MAX)
59 #else
60 /*
61  * A single DNS message size
62  */
63 #define ISC_NETMGR_UDP_RECVBUF_SIZE UINT16_MAX
64 #endif
65 
66 /*
67  * The TCP receive buffer can fit one maximum sized DNS message plus its size,
68  * the receive buffer here affects TCP, DoT and DoH.
69  */
70 #define ISC_NETMGR_TCP_RECVBUF_SIZE (sizeof(uint16_t) + UINT16_MAX)
71 
72 /* Pick the larger buffer */
73 #define ISC_NETMGR_RECVBUF_SIZE                                     \
74 	(ISC_NETMGR_UDP_RECVBUF_SIZE >= ISC_NETMGR_TCP_RECVBUF_SIZE \
75 		 ? ISC_NETMGR_UDP_RECVBUF_SIZE                      \
76 		 : ISC_NETMGR_TCP_RECVBUF_SIZE)
77 
78 /*
79  * Send buffer
80  */
81 #define ISC_NETMGR_SENDBUF_SIZE (sizeof(uint16_t) + UINT16_MAX)
82 
83 /*%
84  * Regular TCP buffer size.
85  */
86 #define NM_REG_BUF 4096
87 
88 /*%
89  * Larger buffer for when the regular one isn't enough; this will
90  * hold two full DNS packets with lengths.  netmgr receives 64k at
91  * most in TCPDNS connections, so there's no risk of overrun
92  * when using a buffer this size.
93  */
94 #define NM_BIG_BUF ISC_NETMGR_TCP_RECVBUF_SIZE * 2
95 
96 /*
97  * Define NETMGR_TRACE to activate tracing of handles and sockets.
98  * This will impair performance but enables us to quickly determine,
99  * if netmgr resources haven't been cleaned up on shutdown, which ones
100  * are still in use.
101  */
102 #ifdef NETMGR_TRACE
103 #define TRACE_SIZE 8
104 
105 void
106 isc__nm_dump_active(isc_nm_t *nm);
107 
108 #if defined(__linux__)
109 #include <syscall.h>
110 #define gettid() (uint32_t) syscall(SYS_gettid)
111 #elif defined(_WIN32)
112 #define gettid() (uint32_t) GetCurrentThreadId()
113 #else
114 #define gettid() (uint32_t) pthread_self()
115 #endif
116 
117 #ifdef NETMGR_TRACE_VERBOSE
118 #define NETMGR_TRACE_LOG(format, ...)                                \
119 	fprintf(stderr, "%" PRIu32 ":%d:%s:%u:%s:" format, gettid(), \
120 		isc_nm_tid(), file, line, func, __VA_ARGS__)
121 #else
122 #define NETMGR_TRACE_LOG(format, ...) \
123 	(void)file;                   \
124 	(void)line;                   \
125 	(void)func;
126 #endif
127 
128 #define FLARG_PASS , file, line, func
129 #define FLARG                                              \
130 	, const char *file __attribute__((unused)),        \
131 		unsigned int line __attribute__((unused)), \
132 		const char *func __attribute__((unused))
133 #define FLARG_IEVENT(ievent)              \
134 	const char *file = ievent->file;  \
135 	unsigned int line = ievent->line; \
136 	const char *func = ievent->func;
137 #define FLARG_IEVENT_PASS(ievent) \
138 	ievent->file = file;      \
139 	ievent->line = line;      \
140 	ievent->func = func;
141 #define isc__nm_uvreq_get(req, sock) \
142 	isc___nm_uvreq_get(req, sock, __FILE__, __LINE__, __func__)
143 #define isc__nm_uvreq_put(req, sock) \
144 	isc___nm_uvreq_put(req, sock, __FILE__, __LINE__, __func__)
145 #define isc__nmsocket_init(sock, mgr, type, iface)                      \
146 	isc___nmsocket_init(sock, mgr, type, iface, __FILE__, __LINE__, \
147 			    __func__)
148 #define isc__nmsocket_put(sockp) \
149 	isc___nmsocket_put(sockp, __FILE__, __LINE__, __func__)
150 #define isc__nmsocket_attach(sock, target) \
151 	isc___nmsocket_attach(sock, target, __FILE__, __LINE__, __func__)
152 #define isc__nmsocket_detach(socketp) \
153 	isc___nmsocket_detach(socketp, __FILE__, __LINE__, __func__)
154 #define isc__nmsocket_close(socketp) \
155 	isc___nmsocket_close(socketp, __FILE__, __LINE__, __func__)
156 #define isc__nmhandle_get(sock, peer, local) \
157 	isc___nmhandle_get(sock, peer, local, __FILE__, __LINE__, __func__)
158 #define isc__nmsocket_prep_destroy(sock) \
159 	isc___nmsocket_prep_destroy(sock, __FILE__, __LINE__, __func__)
160 #else
161 #define NETMGR_TRACE_LOG(format, ...)
162 
163 #define FLARG_PASS
164 #define FLARG
165 #define FLARG_IEVENT(ievent)
166 #define FLARG_IEVENT_PASS(ievent)
167 #define isc__nm_uvreq_get(req, sock) isc___nm_uvreq_get(req, sock)
168 #define isc__nm_uvreq_put(req, sock) isc___nm_uvreq_put(req, sock)
169 #define isc__nmsocket_init(sock, mgr, type, iface) \
170 	isc___nmsocket_init(sock, mgr, type, iface)
171 #define isc__nmsocket_put(sockp)	   isc___nmsocket_put(sockp)
172 #define isc__nmsocket_attach(sock, target) isc___nmsocket_attach(sock, target)
173 #define isc__nmsocket_detach(socketp)	   isc___nmsocket_detach(socketp)
174 #define isc__nmsocket_close(socketp)	   isc___nmsocket_close(socketp)
175 #define isc__nmhandle_get(sock, peer, local) \
176 	isc___nmhandle_get(sock, peer, local)
177 #define isc__nmsocket_prep_destroy(sock) isc___nmsocket_prep_destroy(sock)
178 #endif
179 
180 /*
181  * Queue types in the order of processing priority.
182  */
183 typedef enum {
184 	NETIEVENT_PRIORITY = 0,
185 	NETIEVENT_PRIVILEGED = 1,
186 	NETIEVENT_TASK = 2,
187 	NETIEVENT_NORMAL = 3,
188 	NETIEVENT_MAX = 4,
189 } netievent_type_t;
190 
191 typedef struct isc__nm_uvreq isc__nm_uvreq_t;
192 typedef struct isc__netievent isc__netievent_t;
193 
194 typedef ISC_LIST(isc__netievent_t) isc__netievent_list_t;
195 
196 typedef struct ievent {
197 	isc_mutex_t lock;
198 	isc_condition_t cond;
199 	isc__netievent_list_t list;
200 } ievent_t;
201 
202 /*
203  * Single network event loop worker.
204  */
205 typedef struct isc__networker {
206 	isc_nm_t *mgr;
207 	int id;		  /* thread id */
208 	uv_loop_t loop;	  /* libuv loop structure */
209 	uv_async_t async; /* async channel to send
210 			   * data to this networker */
211 	bool paused;
212 	bool finished;
213 	isc_thread_t thread;
214 	ievent_t ievents[NETIEVENT_MAX];
215 
216 	isc_refcount_t references;
217 	atomic_int_fast64_t pktcount;
218 	char *recvbuf;
219 	char *sendbuf;
220 	bool recvbuf_inuse;
221 } isc__networker_t;
222 
223 /*
224  * A general handle for a connection bound to a networker.  For UDP
225  * connections we have peer address here, so both TCP and UDP can be
226  * handled with a simple send-like function
227  */
228 #define NMHANDLE_MAGIC ISC_MAGIC('N', 'M', 'H', 'D')
229 #define VALID_NMHANDLE(t)                      \
230 	(ISC_MAGIC_VALID(t, NMHANDLE_MAGIC) && \
231 	 atomic_load(&(t)->references) > 0)
232 
233 typedef void (*isc__nm_closecb)(isc_nmhandle_t *);
234 
235 struct isc_nmhandle {
236 	int magic;
237 	isc_refcount_t references;
238 
239 	/*
240 	 * The socket is not 'attached' in the traditional
241 	 * reference-counting sense. Instead, we keep all handles in an
242 	 * array in the socket object.  This way, we don't have circular
243 	 * dependencies and we can close all handles when we're destroying
244 	 * the socket.
245 	 */
246 	isc_nmsocket_t *sock;
247 
248 	isc_sockaddr_t peer;
249 	isc_sockaddr_t local;
250 	isc_nm_opaquecb_t doreset; /* reset extra callback, external */
251 	isc_nm_opaquecb_t dofree;  /* free extra callback, external */
252 #ifdef NETMGR_TRACE
253 	void *backtrace[TRACE_SIZE];
254 	int backtrace_size;
255 	LINK(isc_nmhandle_t) active_link;
256 #endif
257 	void *opaque;
258 	char extra[];
259 };
260 
261 typedef enum isc__netievent_type {
262 	netievent_udpconnect,
263 	netievent_udpclose,
264 	netievent_udpsend,
265 	netievent_udpread,
266 	netievent_udpcancel,
267 
268 	netievent_tcpconnect,
269 	netievent_tcpclose,
270 	netievent_tcpsend,
271 	netievent_tcpstartread,
272 	netievent_tcppauseread,
273 	netievent_tcpaccept,
274 	netievent_tcpcancel,
275 
276 	netievent_tcpdnsaccept,
277 	netievent_tcpdnsconnect,
278 	netievent_tcpdnsclose,
279 	netievent_tcpdnssend,
280 	netievent_tcpdnsread,
281 	netievent_tcpdnscancel,
282 
283 	netievent_shutdown,
284 	netievent_stop,
285 	netievent_pause,
286 
287 	netievent_connectcb,
288 	netievent_readcb,
289 	netievent_sendcb,
290 
291 	netievent_task,
292 	netievent_privilegedtask,
293 
294 	/*
295 	 * event type values higher than this will be treated
296 	 * as high-priority events, which can be processed
297 	 * while the netmgr is pausing or paused.
298 	 */
299 	netievent_prio = 0xff,
300 
301 	netievent_udplisten,
302 	netievent_udpstop,
303 	netievent_tcplisten,
304 	netievent_tcpstop,
305 	netievent_tcpdnslisten,
306 	netievent_tcpdnsstop,
307 
308 	netievent_resume,
309 	netievent_detach,
310 	netievent_close,
311 } isc__netievent_type;
312 
313 typedef union {
314 	isc_nm_recv_cb_t recv;
315 	isc_nm_cb_t send;
316 	isc_nm_cb_t connect;
317 	isc_nm_accept_cb_t accept;
318 } isc__nm_cb_t;
319 
320 /*
321  * Wrapper around uv_req_t with 'our' fields in it.  req->data should
322  * always point to its parent.  Note that we always allocate more than
323  * sizeof(struct) because we make room for different req types;
324  */
325 #define UVREQ_MAGIC    ISC_MAGIC('N', 'M', 'U', 'R')
326 #define VALID_UVREQ(t) ISC_MAGIC_VALID(t, UVREQ_MAGIC)
327 
328 struct isc__nm_uvreq {
329 	int magic;
330 	isc_nmsocket_t *sock;
331 	isc_nmhandle_t *handle;
332 	char tcplen[2];	       /* The TCP DNS message length */
333 	uv_buf_t uvbuf;	       /* translated isc_region_t, to be
334 				* sent or received */
335 	isc_sockaddr_t local;  /* local address */
336 	isc_sockaddr_t peer;   /* peer address */
337 	isc__nm_cb_t cb;       /* callback */
338 	void *cbarg;	       /* callback argument */
339 	isc_nm_timer_t *timer; /* TCP write timer */
340 
341 	union {
342 		uv_handle_t handle;
343 		uv_req_t req;
344 		uv_getaddrinfo_t getaddrinfo;
345 		uv_getnameinfo_t getnameinfo;
346 		uv_shutdown_t shutdown;
347 		uv_write_t write;
348 		uv_connect_t connect;
349 		uv_udp_send_t udp_send;
350 		uv_fs_t fs;
351 		uv_work_t work;
352 	} uv_req;
353 	ISC_LINK(isc__nm_uvreq_t) link;
354 };
355 
356 struct isc_nm_timer {
357 	isc_refcount_t references;
358 	uv_timer_t timer;
359 	isc_nmhandle_t *handle;
360 	isc_nm_timer_cb cb;
361 	void *cbarg;
362 };
363 
364 void *
365 isc__nm_get_netievent(isc_nm_t *mgr, isc__netievent_type type);
366 /*%<
367  * Allocate an ievent and set the type.
368  */
369 void
370 isc__nm_put_netievent(isc_nm_t *mgr, void *ievent);
371 
372 /*
373  * The macros here are used to simulate the "inheritance" in C, there's the base
374  * netievent structure that contains just its own type and socket, and there are
375  * extended netievent types that also have handles or requests or other data.
376  *
377  * The macros here ensure that:
378  *
379  *   1. every netievent type has matching definition, declaration and
380  *      implementation
381  *
382  *   2. we handle all the netievent types of same subclass the same, e.g. if the
383  *      extended netievent contains handle, we always attach to the handle in
384  *      the ctor and detach from the handle in dtor.
385  *
386  * There are three macros here for each netievent subclass:
387  *
388  *   1. NETIEVENT_*_TYPE(type) creates the typedef for each type; used below in
389  *   this header
390  *
391  *   2. NETIEVENT_*_DECL(type) generates the declaration of the get and put
392  *      functions (isc__nm_get_netievent_* and isc__nm_put_netievent_*); used
393  *      below in this header
394  *
395  *   3. NETIEVENT_*_DEF(type) generates the definition of the functions; used
396  *   either in netmgr.c or matching protocol file (e.g. udp.c, tcp.c, etc.)
397  */
398 
399 #define NETIEVENT__SOCKET                \
400 	isc__netievent_type type;        \
401 	ISC_LINK(isc__netievent_t) link; \
402 	isc_nmsocket_t *sock;            \
403 	const char *file;                \
404 	unsigned int line;               \
405 	const char *func
406 
407 typedef struct isc__netievent__socket {
408 	NETIEVENT__SOCKET;
409 } isc__netievent__socket_t;
410 
411 #define NETIEVENT_SOCKET_TYPE(type) \
412 	typedef isc__netievent__socket_t isc__netievent_##type##_t
413 
414 #define NETIEVENT_SOCKET_DECL(type)                              \
415 	isc__netievent_##type##_t *isc__nm_get_netievent_##type( \
416 		isc_nm_t *nm, isc_nmsocket_t *sock);             \
417 	void isc__nm_put_netievent_##type(isc_nm_t *nm,          \
418 					  isc__netievent_##type##_t *ievent)
419 
420 #define NETIEVENT_SOCKET_DEF(type)                                             \
421 	isc__netievent_##type##_t *isc__nm_get_netievent_##type(               \
422 		isc_nm_t *nm, isc_nmsocket_t *sock) {                          \
423 		isc__netievent_##type##_t *ievent =                            \
424 			isc__nm_get_netievent(nm, netievent_##type);           \
425 		isc__nmsocket_attach(sock, &ievent->sock);                     \
426                                                                                \
427 		return (ievent);                                               \
428 	}                                                                      \
429                                                                                \
430 	void isc__nm_put_netievent_##type(isc_nm_t *nm,                        \
431 					  isc__netievent_##type##_t *ievent) { \
432 		isc__nmsocket_detach(&ievent->sock);                           \
433 		isc__nm_put_netievent(nm, ievent);                             \
434 	}
435 
436 typedef struct isc__netievent__socket_req {
437 	NETIEVENT__SOCKET;
438 	isc__nm_uvreq_t *req;
439 } isc__netievent__socket_req_t;
440 
441 #define NETIEVENT_SOCKET_REQ_TYPE(type) \
442 	typedef isc__netievent__socket_req_t isc__netievent_##type##_t
443 
444 #define NETIEVENT_SOCKET_REQ_DECL(type)                                    \
445 	isc__netievent_##type##_t *isc__nm_get_netievent_##type(           \
446 		isc_nm_t *nm, isc_nmsocket_t *sock, isc__nm_uvreq_t *req); \
447 	void isc__nm_put_netievent_##type(isc_nm_t *nm,                    \
448 					  isc__netievent_##type##_t *ievent)
449 
450 #define NETIEVENT_SOCKET_REQ_DEF(type)                                         \
451 	isc__netievent_##type##_t *isc__nm_get_netievent_##type(               \
452 		isc_nm_t *nm, isc_nmsocket_t *sock, isc__nm_uvreq_t *req) {    \
453 		isc__netievent_##type##_t *ievent =                            \
454 			isc__nm_get_netievent(nm, netievent_##type);           \
455 		isc__nmsocket_attach(sock, &ievent->sock);                     \
456 		ievent->req = req;                                             \
457                                                                                \
458 		return (ievent);                                               \
459 	}                                                                      \
460                                                                                \
461 	void isc__nm_put_netievent_##type(isc_nm_t *nm,                        \
462 					  isc__netievent_##type##_t *ievent) { \
463 		isc__nmsocket_detach(&ievent->sock);                           \
464 		isc__nm_put_netievent(nm, ievent);                             \
465 	}
466 
467 typedef struct isc__netievent__socket_req_result {
468 	NETIEVENT__SOCKET;
469 	isc__nm_uvreq_t *req;
470 	isc_result_t result;
471 } isc__netievent__socket_req_result_t;
472 
473 #define NETIEVENT_SOCKET_REQ_RESULT_TYPE(type) \
474 	typedef isc__netievent__socket_req_result_t isc__netievent_##type##_t
475 
476 #define NETIEVENT_SOCKET_REQ_RESULT_DECL(type)                            \
477 	isc__netievent_##type##_t *isc__nm_get_netievent_##type(          \
478 		isc_nm_t *nm, isc_nmsocket_t *sock, isc__nm_uvreq_t *req, \
479 		isc_result_t result);                                     \
480 	void isc__nm_put_netievent_##type(isc_nm_t *nm,                   \
481 					  isc__netievent_##type##_t *ievent)
482 
483 #define NETIEVENT_SOCKET_REQ_RESULT_DEF(type)                                  \
484 	isc__netievent_##type##_t *isc__nm_get_netievent_##type(               \
485 		isc_nm_t *nm, isc_nmsocket_t *sock, isc__nm_uvreq_t *req,      \
486 		isc_result_t result) {                                         \
487 		isc__netievent_##type##_t *ievent =                            \
488 			isc__nm_get_netievent(nm, netievent_##type);           \
489 		isc__nmsocket_attach(sock, &ievent->sock);                     \
490 		ievent->req = req;                                             \
491 		ievent->result = result;                                       \
492                                                                                \
493 		return (ievent);                                               \
494 	}                                                                      \
495                                                                                \
496 	void isc__nm_put_netievent_##type(isc_nm_t *nm,                        \
497 					  isc__netievent_##type##_t *ievent) { \
498 		isc__nmsocket_detach(&ievent->sock);                           \
499 		isc__nm_put_netievent(nm, ievent);                             \
500 	}
501 
502 typedef struct isc__netievent__socket_handle {
503 	NETIEVENT__SOCKET;
504 	isc_nmhandle_t *handle;
505 } isc__netievent__socket_handle_t;
506 
507 #define NETIEVENT_SOCKET_HANDLE_TYPE(type) \
508 	typedef isc__netievent__socket_handle_t isc__netievent_##type##_t
509 
510 #define NETIEVENT_SOCKET_HANDLE_DECL(type)                                   \
511 	isc__netievent_##type##_t *isc__nm_get_netievent_##type(             \
512 		isc_nm_t *nm, isc_nmsocket_t *sock, isc_nmhandle_t *handle); \
513 	void isc__nm_put_netievent_##type(isc_nm_t *nm,                      \
514 					  isc__netievent_##type##_t *ievent)
515 
516 #define NETIEVENT_SOCKET_HANDLE_DEF(type)                                      \
517 	isc__netievent_##type##_t *isc__nm_get_netievent_##type(               \
518 		isc_nm_t *nm, isc_nmsocket_t *sock, isc_nmhandle_t *handle) {  \
519 		isc__netievent_##type##_t *ievent =                            \
520 			isc__nm_get_netievent(nm, netievent_##type);           \
521 		isc__nmsocket_attach(sock, &ievent->sock);                     \
522 		isc_nmhandle_attach(handle, &ievent->handle);                  \
523                                                                                \
524 		return (ievent);                                               \
525 	}                                                                      \
526                                                                                \
527 	void isc__nm_put_netievent_##type(isc_nm_t *nm,                        \
528 					  isc__netievent_##type##_t *ievent) { \
529 		isc__nmsocket_detach(&ievent->sock);                           \
530 		isc_nmhandle_detach(&ievent->handle);                          \
531 		isc__nm_put_netievent(nm, ievent);                             \
532 	}
533 
534 typedef struct isc__netievent__socket_quota {
535 	NETIEVENT__SOCKET;
536 	isc_quota_t *quota;
537 } isc__netievent__socket_quota_t;
538 
539 #define NETIEVENT_SOCKET_QUOTA_TYPE(type) \
540 	typedef isc__netievent__socket_quota_t isc__netievent_##type##_t
541 
542 #define NETIEVENT_SOCKET_QUOTA_DECL(type)                                \
543 	isc__netievent_##type##_t *isc__nm_get_netievent_##type(         \
544 		isc_nm_t *nm, isc_nmsocket_t *sock, isc_quota_t *quota); \
545 	void isc__nm_put_netievent_##type(isc_nm_t *nm,                  \
546 					  isc__netievent_##type##_t *ievent)
547 
548 #define NETIEVENT_SOCKET_QUOTA_DEF(type)                                       \
549 	isc__netievent_##type##_t *isc__nm_get_netievent_##type(               \
550 		isc_nm_t *nm, isc_nmsocket_t *sock, isc_quota_t *quota) {      \
551 		isc__netievent_##type##_t *ievent =                            \
552 			isc__nm_get_netievent(nm, netievent_##type);           \
553 		isc__nmsocket_attach(sock, &ievent->sock);                     \
554 		ievent->quota = quota;                                         \
555                                                                                \
556 		return (ievent);                                               \
557 	}                                                                      \
558                                                                                \
559 	void isc__nm_put_netievent_##type(isc_nm_t *nm,                        \
560 					  isc__netievent_##type##_t *ievent) { \
561 		isc__nmsocket_detach(&ievent->sock);                           \
562 		isc__nm_put_netievent(nm, ievent);                             \
563 	}
564 
565 typedef struct isc__netievent__task {
566 	isc__netievent_type type;
567 	ISC_LINK(isc__netievent_t) link;
568 	isc_task_t *task;
569 } isc__netievent__task_t;
570 
571 #define NETIEVENT_TASK_TYPE(type) \
572 	typedef isc__netievent__task_t isc__netievent_##type##_t;
573 
574 #define NETIEVENT_TASK_DECL(type)                                \
575 	isc__netievent_##type##_t *isc__nm_get_netievent_##type( \
576 		isc_nm_t *nm, isc_task_t *task);                 \
577 	void isc__nm_put_netievent_##type(isc_nm_t *nm,          \
578 					  isc__netievent_##type##_t *ievent);
579 
580 #define NETIEVENT_TASK_DEF(type)                                               \
581 	isc__netievent_##type##_t *isc__nm_get_netievent_##type(               \
582 		isc_nm_t *nm, isc_task_t *task) {                              \
583 		isc__netievent_##type##_t *ievent =                            \
584 			isc__nm_get_netievent(nm, netievent_##type);           \
585 		ievent->task = task;                                           \
586                                                                                \
587 		return (ievent);                                               \
588 	}                                                                      \
589                                                                                \
590 	void isc__nm_put_netievent_##type(isc_nm_t *nm,                        \
591 					  isc__netievent_##type##_t *ievent) { \
592 		ievent->task = NULL;                                           \
593 		isc__nm_put_netievent(nm, ievent);                             \
594 	}
595 
596 typedef struct isc__netievent_udpsend {
597 	NETIEVENT__SOCKET;
598 	isc_sockaddr_t peer;
599 	isc__nm_uvreq_t *req;
600 } isc__netievent_udpsend_t;
601 
602 struct isc__netievent {
603 	isc__netievent_type type;
604 	ISC_LINK(isc__netievent_t) link;
605 };
606 
607 #define NETIEVENT_TYPE(type) typedef isc__netievent_t isc__netievent_##type##_t
608 
609 #define NETIEVENT_DECL(type)                                                   \
610 	isc__netievent_##type##_t *isc__nm_get_netievent_##type(isc_nm_t *nm); \
611 	void isc__nm_put_netievent_##type(isc_nm_t *nm,                        \
612 					  isc__netievent_##type##_t *ievent)
613 
614 #define NETIEVENT_DEF(type)                                                    \
615 	isc__netievent_##type##_t *isc__nm_get_netievent_##type(               \
616 		isc_nm_t *nm) {                                                \
617 		isc__netievent_##type##_t *ievent =                            \
618 			isc__nm_get_netievent(nm, netievent_##type);           \
619                                                                                \
620 		return (ievent);                                               \
621 	}                                                                      \
622                                                                                \
623 	void isc__nm_put_netievent_##type(isc_nm_t *nm,                        \
624 					  isc__netievent_##type##_t *ievent) { \
625 		isc__nm_put_netievent(nm, ievent);                             \
626 	}
627 
628 typedef union {
629 	isc__netievent_t ni;
630 	isc__netievent__socket_t nis;
631 	isc__netievent__socket_req_t nisr;
632 	isc__netievent_udpsend_t nius;
633 	isc__netievent__socket_quota_t nisq;
634 } isc__netievent_storage_t;
635 
636 /*
637  * Work item for a uv_work threadpool.
638  */
639 typedef struct isc__nm_work {
640 	isc_nm_t *netmgr;
641 	uv_work_t req;
642 	isc_nm_workcb_t cb;
643 	isc_nm_after_workcb_t after_cb;
644 	void *data;
645 } isc__nm_work_t;
646 
647 /*
648  * Network manager
649  */
650 #define NM_MAGIC    ISC_MAGIC('N', 'E', 'T', 'M')
651 #define VALID_NM(t) ISC_MAGIC_VALID(t, NM_MAGIC)
652 
653 struct isc_nm {
654 	int magic;
655 	isc_refcount_t references;
656 	isc_mem_t *mctx;
657 	int nworkers;
658 	isc_mutex_t lock;
659 	isc_condition_t wkstatecond;
660 	isc_condition_t wkpausecond;
661 	isc__networker_t *workers;
662 
663 	isc_stats_t *stats;
664 
665 	uint_fast32_t workers_running;
666 	atomic_uint_fast32_t workers_paused;
667 	atomic_uint_fast32_t maxudp;
668 
669 	bool load_balance_sockets;
670 
671 	atomic_bool paused;
672 
673 	/*
674 	 * Active connections are being closed and new connections are
675 	 * no longer allowed.
676 	 */
677 	atomic_bool closing;
678 
679 	/*
680 	 * A worker is actively waiting for other workers, for example to
681 	 * stop listening; that means no other thread can do the same thing
682 	 * or pause, or we'll deadlock. We have to either re-enqueue our
683 	 * event or wait for the other one to finish if we want to pause.
684 	 */
685 	atomic_int interlocked;
686 
687 	/*
688 	 * Timeout values for TCP connections, corresponding to
689 	 * tcp-intiial-timeout, tcp-idle-timeout, tcp-keepalive-timeout,
690 	 * and tcp-advertised-timeout. Note that these are stored in
691 	 * milliseconds so they can be used directly with the libuv timer,
692 	 * but they are configured in tenths of seconds.
693 	 */
694 	atomic_uint_fast32_t init;
695 	atomic_uint_fast32_t idle;
696 	atomic_uint_fast32_t keepalive;
697 	atomic_uint_fast32_t advertised;
698 
699 	isc_barrier_t pausing;
700 	isc_barrier_t resuming;
701 
702 #ifdef NETMGR_TRACE
703 	ISC_LIST(isc_nmsocket_t) active_sockets;
704 #endif
705 };
706 
707 typedef enum isc_nmsocket_type {
708 	isc_nm_udpsocket,
709 	isc_nm_udplistener, /* Aggregate of nm_udpsocks */
710 	isc_nm_tcpsocket,
711 	isc_nm_tcplistener,
712 	isc_nm_tcpdnslistener,
713 	isc_nm_tcpdnssocket,
714 } isc_nmsocket_type;
715 
716 /*%
717  * A universal structure for either a single socket or a group of
718  * dup'd/SO_REUSE_PORT-using sockets listening on the same interface.
719  */
720 #define NMSOCK_MAGIC	ISC_MAGIC('N', 'M', 'S', 'K')
721 #define VALID_NMSOCK(t) ISC_MAGIC_VALID(t, NMSOCK_MAGIC)
722 
723 /*%
724  * Index into socket stat counter arrays.
725  */
726 enum {
727 	STATID_OPEN = 0,
728 	STATID_OPENFAIL = 1,
729 	STATID_CLOSE = 2,
730 	STATID_BINDFAIL = 3,
731 	STATID_CONNECTFAIL = 4,
732 	STATID_CONNECT = 5,
733 	STATID_ACCEPTFAIL = 6,
734 	STATID_ACCEPT = 7,
735 	STATID_SENDFAIL = 8,
736 	STATID_RECVFAIL = 9,
737 	STATID_ACTIVE = 10
738 };
739 
740 typedef void (*isc_nm_closehandlecb_t)(void *arg);
741 /*%<
742  * Opaque callback function, used for isc_nmhandle 'reset' and 'free'
743  * callbacks.
744  */
745 
746 struct isc_nmsocket {
747 	/*% Unlocked, RO */
748 	int magic;
749 	int tid;
750 	isc_nmsocket_type type;
751 	isc_nm_t *mgr;
752 
753 	/*% Parent socket for multithreaded listeners */
754 	isc_nmsocket_t *parent;
755 	/*% Listener socket this connection was accepted on */
756 	isc_nmsocket_t *listener;
757 	/*% Self socket */
758 	isc_nmsocket_t *self;
759 
760 	isc_barrier_t startlistening;
761 	isc_barrier_t stoplistening;
762 
763 	/*%
764 	 * quota is the TCP client, attached when a TCP connection
765 	 * is established. pquota is a non-attached pointer to the
766 	 * TCP client quota, stored in listening sockets but only
767 	 * attached in connected sockets.
768 	 */
769 	isc_quota_t *quota;
770 	isc_quota_t *pquota;
771 	isc_quota_cb_t quotacb;
772 
773 	/*%
774 	 * Socket statistics
775 	 */
776 	const isc_statscounter_t *statsindex;
777 
778 	/*%
779 	 * TCP read/connect timeout timers.
780 	 */
781 	uv_timer_t read_timer;
782 	uint64_t read_timeout;
783 	uint64_t connect_timeout;
784 
785 	/*%
786 	 * TCP write timeout timer.
787 	 */
788 	uint64_t write_timeout;
789 
790 	/*% outer socket is for 'wrapped' sockets - e.g. tcpdns in tcp */
791 	isc_nmsocket_t *outer;
792 
793 	/*% server socket for connections */
794 	isc_nmsocket_t *server;
795 
796 	/*% Child sockets for multi-socket setups */
797 	isc_nmsocket_t *children;
798 	uint_fast32_t nchildren;
799 	isc_sockaddr_t iface;
800 	isc_nmhandle_t *statichandle;
801 	isc_nmhandle_t *outerhandle;
802 
803 	/*% Extra data allocated at the end of each isc_nmhandle_t */
804 	size_t extrahandlesize;
805 
806 	/*% TCP backlog */
807 	int backlog;
808 
809 	/*% libuv data */
810 	uv_os_sock_t fd;
811 	union uv_any_handle uv_handle;
812 
813 	/*% Peer address */
814 	isc_sockaddr_t peer;
815 
816 	/* Atomic */
817 	/*% Number of running (e.g. listening) child sockets */
818 	atomic_uint_fast32_t rchildren;
819 
820 	/*%
821 	 * Socket is active if it's listening, working, etc. If it's
822 	 * closing, then it doesn't make a sense, for example, to
823 	 * push handles or reqs for reuse.
824 	 */
825 	atomic_bool active;
826 	atomic_bool destroying;
827 
828 	/*%
829 	 * Socket is closed if it's not active and all the possible
830 	 * callbacks were fired, there are no active handles, etc.
831 	 * If active==false but closed==false, that means the socket
832 	 * is closing.
833 	 */
834 	atomic_bool closing;
835 	atomic_bool closed;
836 	atomic_bool listening;
837 	atomic_bool connecting;
838 	atomic_bool connected;
839 	bool accepting;
840 	bool reading;
841 	atomic_bool timedout;
842 	isc_refcount_t references;
843 
844 	/*%
845 	 * Established an outgoing connection, as client not server.
846 	 */
847 	atomic_bool client;
848 
849 	/*%
850 	 * TCPDNS socket has been set not to pipeline.
851 	 */
852 	atomic_bool sequential;
853 
854 	/*%
855 	 * The socket is processing read callback, this is guard to not read
856 	 * data before the readcb is back.
857 	 */
858 	bool processing;
859 
860 	/*%
861 	 * A TCP socket has had isc_nm_pauseread() called.
862 	 */
863 	atomic_bool readpaused;
864 
865 	/*%
866 	 * A TCP or TCPDNS socket has been set to use the keepalive
867 	 * timeout instead of the default idle timeout.
868 	 */
869 	atomic_bool keepalive;
870 
871 	/*%
872 	 * 'spare' handles for that can be reused to avoid allocations,
873 	 * for UDP.
874 	 */
875 	isc_astack_t *inactivehandles;
876 	isc_astack_t *inactivereqs;
877 
878 	/*%
879 	 * Used to wait for TCP listening events to complete, and
880 	 * for the number of running children to reach zero during
881 	 * shutdown.
882 	 *
883 	 * We use two condition variables to prevent the race where the netmgr
884 	 * threads would be able to finish and destroy the socket before it's
885 	 * unlocked by the isc_nm_listen<proto>() function.  So, the flow is as
886 	 * follows:
887 	 *
888 	 *   1. parent thread creates all children sockets and passes then to
889 	 *      netthreads, looks at the signaling variable and WAIT(cond) until
890 	 *      the childrens are done initializing
891 	 *
892 	 *   2. the events get picked by netthreads, calls the libuv API (and
893 	 *      either succeeds or fails) and WAIT(scond) until all other
894 	 *      children sockets in netthreads are initialized and the listening
895 	 *      socket lock is unlocked
896 	 *
897 	 *   3. the control is given back to the parent thread which now either
898 	 *      returns success or shutdowns the listener if an error has
899 	 *      occured in the children netthread
900 	 *
901 	 * NOTE: The other approach would be doing an extra attach to the parent
902 	 * listening socket, and then detach it in the parent thread, but that
903 	 * breaks the promise that once the libuv socket is initialized on the
904 	 * nmsocket, the nmsocket needs to be handled only by matching
905 	 * netthread, so in fact that would add a complexity in a way that
906 	 * isc__nmsocket_detach would have to be converted to use an
907 	 * asynchrounous netievent.
908 	 */
909 	isc_mutex_t lock;
910 	isc_condition_t cond;
911 	isc_condition_t scond;
912 
913 	/*%
914 	 * Used to pass a result back from listen or connect events.
915 	 */
916 	isc_result_t result;
917 
918 	/*%
919 	 * Current number of active handles.
920 	 */
921 	atomic_int_fast32_t ah;
922 
923 	/*% Buffer for TCPDNS processing */
924 	size_t buf_size;
925 	size_t buf_len;
926 	unsigned char *buf;
927 
928 	/*%
929 	 * This function will be called with handle->sock
930 	 * as the argument whenever a handle's references drop
931 	 * to zero, after its reset callback has been called.
932 	 */
933 	isc_nm_closehandlecb_t closehandle_cb;
934 
935 	isc_nmhandle_t *recv_handle;
936 	isc_nm_recv_cb_t recv_cb;
937 	void *recv_cbarg;
938 	bool recv_read;
939 
940 	isc_nm_cb_t connect_cb;
941 	void *connect_cbarg;
942 
943 	isc_nm_accept_cb_t accept_cb;
944 	void *accept_cbarg;
945 
946 	atomic_int_fast32_t active_child_connections;
947 
948 #ifdef NETMGR_TRACE
949 	void *backtrace[TRACE_SIZE];
950 	int backtrace_size;
951 	LINK(isc_nmsocket_t) active_link;
952 	ISC_LIST(isc_nmhandle_t) active_handles;
953 #endif
954 };
955 
956 bool
957 isc__nm_in_netthread(void);
958 /*%
959  * Returns 'true' if we're in the network thread.
960  */
961 
962 void
963 isc__nm_maybe_enqueue_ievent(isc__networker_t *worker, isc__netievent_t *event);
964 /*%<
965  * If the caller is already in the matching nmthread, process the netievent
966  * directly, if not enqueue using isc__nm_enqueue_ievent().
967  */
968 
969 void
970 isc__nm_enqueue_ievent(isc__networker_t *worker, isc__netievent_t *event);
971 /*%<
972  * Enqueue an ievent onto a specific worker queue. (This the only safe
973  * way to use an isc__networker_t from another thread.)
974  */
975 
976 void
977 isc__nm_free_uvbuf(isc_nmsocket_t *sock, const uv_buf_t *buf);
978 /*%<
979  * Free a buffer allocated for a receive operation.
980  *
981  * Note that as currently implemented, this doesn't actually
982  * free anything, marks the isc__networker's UDP receive buffer
983  * as "not in use".
984  */
985 
986 isc_nmhandle_t *
987 isc___nmhandle_get(isc_nmsocket_t *sock, isc_sockaddr_t *peer,
988 		   isc_sockaddr_t *local FLARG);
989 /*%<
990  * Get a handle for the socket 'sock', allocating a new one
991  * if there isn't one available in 'sock->inactivehandles'.
992  *
993  * If 'peer' is not NULL, set the handle's peer address to 'peer',
994  * otherwise set it to 'sock->peer'.
995  *
996  * If 'local' is not NULL, set the handle's local address to 'local',
997  * otherwise set it to 'sock->iface->addr'.
998  *
999  * 'sock' will be attached to 'handle->sock'. The caller may need
1000  * to detach the socket afterward.
1001  */
1002 
1003 isc__nm_uvreq_t *
1004 isc___nm_uvreq_get(isc_nm_t *mgr, isc_nmsocket_t *sock FLARG);
1005 /*%<
1006  * Get a UV request structure for the socket 'sock', allocating a
1007  * new one if there isn't one available in 'sock->inactivereqs'.
1008  */
1009 
1010 void
1011 isc___nm_uvreq_put(isc__nm_uvreq_t **req, isc_nmsocket_t *sock FLARG);
1012 /*%<
1013  * Completes the use of a UV request structure, setting '*req' to NULL.
1014  *
1015  * The UV request is pushed onto the 'sock->inactivereqs' stack or,
1016  * if that doesn't work, freed.
1017  */
1018 
1019 void
1020 isc___nmsocket_init(isc_nmsocket_t *sock, isc_nm_t *mgr, isc_nmsocket_type type,
1021 		    isc_sockaddr_t *iface FLARG);
1022 /*%<
1023  * Initialize socket 'sock', attach it to 'mgr', and set it to type 'type'
1024  * and its interface to 'iface'.
1025  */
1026 
1027 void
1028 isc___nmsocket_attach(isc_nmsocket_t *sock, isc_nmsocket_t **target FLARG);
1029 /*%<
1030  * Attach to a socket, increasing refcount
1031  */
1032 
1033 void
1034 isc___nmsocket_detach(isc_nmsocket_t **socketp FLARG);
1035 /*%<
1036  * Detach from socket, decreasing refcount and possibly destroying the
1037  * socket if it's no longer referenced.
1038  */
1039 
1040 void
1041 isc___nmsocket_prep_destroy(isc_nmsocket_t *sock FLARG);
1042 /*%<
1043  * Market 'sock' as inactive, close it if necessary, and destroy it
1044  * if there are no remaining references or active handles.
1045  */
1046 
1047 void
1048 isc__nmsocket_shutdown(isc_nmsocket_t *sock);
1049 /*%<
1050  * Initiate the socket shutdown which actively calls the active
1051  * callbacks.
1052  */
1053 
1054 bool
1055 isc__nmsocket_active(isc_nmsocket_t *sock);
1056 /*%<
1057  * Determine whether 'sock' is active by checking 'sock->active'
1058  * or, for child sockets, 'sock->parent->active'.
1059  */
1060 
1061 bool
1062 isc__nmsocket_deactivate(isc_nmsocket_t *sock);
1063 /*%<
1064  * @brief Deactivate active socket
1065  *
1066  * Atomically deactive the socket by setting @p sock->active or, for child
1067  * sockets, @p sock->parent->active to @c false
1068  *
1069  * @param[in] sock - valid nmsocket
1070  * @return @c false if the socket was already inactive, @c true otherwise
1071  */
1072 
1073 void
1074 isc__nmsocket_clearcb(isc_nmsocket_t *sock);
1075 /*%<
1076  * Clear the recv and accept callbacks in 'sock'.
1077  */
1078 
1079 void
1080 isc__nmsocket_timer_stop(isc_nmsocket_t *sock);
1081 void
1082 isc__nmsocket_timer_start(isc_nmsocket_t *sock);
1083 void
1084 isc__nmsocket_timer_restart(isc_nmsocket_t *sock);
1085 bool
1086 isc__nmsocket_timer_running(isc_nmsocket_t *sock);
1087 /*%<
1088  * Start/stop/restart/check the timeout on the socket
1089  */
1090 
1091 void
1092 isc__nm_connectcb(isc_nmsocket_t *sock, isc__nm_uvreq_t *uvreq,
1093 		  isc_result_t eresult, bool async);
1094 
1095 void
1096 isc__nm_async_connectcb(isc__networker_t *worker, isc__netievent_t *ev0);
1097 /*%<
1098  * Issue a connect callback on the socket, used to call the callback
1099  */
1100 
1101 void
1102 isc__nm_readcb(isc_nmsocket_t *sock, isc__nm_uvreq_t *uvreq,
1103 	       isc_result_t eresult);
1104 void
1105 isc__nm_async_readcb(isc__networker_t *worker, isc__netievent_t *ev0);
1106 
1107 /*%<
1108  * Issue a read callback on the socket, used to call the callback
1109  * on failed conditions when the event can't be scheduled on the uv loop.
1110  *
1111  */
1112 
1113 void
1114 isc__nm_sendcb(isc_nmsocket_t *sock, isc__nm_uvreq_t *uvreq,
1115 	       isc_result_t eresult, bool async);
1116 void
1117 isc__nm_async_sendcb(isc__networker_t *worker, isc__netievent_t *ev0);
1118 /*%<
1119  * Issue a write callback on the socket, used to call the callback
1120  * on failed conditions when the event can't be scheduled on the uv loop.
1121  */
1122 
1123 void
1124 isc__nm_async_shutdown(isc__networker_t *worker, isc__netievent_t *ev0);
1125 /*%<
1126  * Walk through all uv handles, get the underlying sockets and issue
1127  * close on them.
1128  */
1129 
1130 void
1131 isc__nm_udp_send(isc_nmhandle_t *handle, const isc_region_t *region,
1132 		 isc_nm_cb_t cb, void *cbarg);
1133 /*%<
1134  * Back-end implementation of isc_nm_send() for UDP handles.
1135  */
1136 
1137 void
1138 isc__nm_udp_read(isc_nmhandle_t *handle, isc_nm_recv_cb_t cb, void *cbarg);
1139 /*
1140  * Back-end implementation of isc_nm_read() for UDP handles.
1141  */
1142 
1143 void
1144 isc__nm_udp_close(isc_nmsocket_t *sock);
1145 /*%<
1146  * Close a UDP socket.
1147  */
1148 
1149 void
1150 isc__nm_udp_cancelread(isc_nmhandle_t *handle);
1151 /*%<
1152  * Stop reading on a connected UDP handle.
1153  */
1154 
1155 void
1156 isc__nm_udp_shutdown(isc_nmsocket_t *sock);
1157 /*%<
1158  * Called during the shutdown process to close and clean up connected
1159  * sockets.
1160  */
1161 
1162 void
1163 isc__nm_udp_stoplistening(isc_nmsocket_t *sock);
1164 /*%<
1165  * Stop listening on 'sock'.
1166  */
1167 
1168 void
1169 isc__nm_udp_settimeout(isc_nmhandle_t *handle, uint32_t timeout);
1170 /*%<
1171  * Set or clear the recv timeout for the UDP socket associated with 'handle'.
1172  */
1173 
1174 void
1175 isc__nm_async_udplisten(isc__networker_t *worker, isc__netievent_t *ev0);
1176 void
1177 isc__nm_async_udpconnect(isc__networker_t *worker, isc__netievent_t *ev0);
1178 void
1179 isc__nm_async_udpstop(isc__networker_t *worker, isc__netievent_t *ev0);
1180 void
1181 isc__nm_async_udpsend(isc__networker_t *worker, isc__netievent_t *ev0);
1182 void
1183 isc__nm_async_udpread(isc__networker_t *worker, isc__netievent_t *ev0);
1184 void
1185 isc__nm_async_udpcancel(isc__networker_t *worker, isc__netievent_t *ev0);
1186 void
1187 isc__nm_async_udpclose(isc__networker_t *worker, isc__netievent_t *ev0);
1188 /*%<
1189  * Callback handlers for asynchronous UDP events (listen, stoplisten, send).
1190  */
1191 
1192 void
1193 isc__nm_tcp_send(isc_nmhandle_t *handle, const isc_region_t *region,
1194 		 isc_nm_cb_t cb, void *cbarg);
1195 /*%<
1196  * Back-end implementation of isc_nm_send() for TCP handles.
1197  */
1198 
1199 void
1200 isc__nm_tcp_read(isc_nmhandle_t *handle, isc_nm_recv_cb_t cb, void *cbarg);
1201 /*
1202  * Back-end implementation of isc_nm_read() for TCP handles.
1203  */
1204 
1205 void
1206 isc__nm_tcp_close(isc_nmsocket_t *sock);
1207 /*%<
1208  * Close a TCP socket.
1209  */
1210 void
1211 isc__nm_tcp_pauseread(isc_nmhandle_t *handle);
1212 /*%<
1213  * Pause reading on this handle, while still remembering the callback.
1214  */
1215 
1216 void
1217 isc__nm_tcp_resumeread(isc_nmhandle_t *handle);
1218 /*%<
1219  * Resume reading from socket.
1220  *
1221  */
1222 
1223 void
1224 isc__nm_tcp_shutdown(isc_nmsocket_t *sock);
1225 /*%<
1226  * Called during the shutdown process to close and clean up connected
1227  * sockets.
1228  */
1229 
1230 void
1231 isc__nm_tcp_cancelread(isc_nmhandle_t *handle);
1232 /*%<
1233  * Stop reading on a connected TCP handle.
1234  */
1235 
1236 void
1237 isc__nm_tcp_stoplistening(isc_nmsocket_t *sock);
1238 /*%<
1239  * Stop listening on 'sock'.
1240  */
1241 
1242 int_fast32_t
1243 isc__nm_tcp_listener_nactive(isc_nmsocket_t *sock);
1244 /*%<
1245  * Returns the number of active connections for the TCP listener socket.
1246  */
1247 
1248 void
1249 isc__nm_tcp_settimeout(isc_nmhandle_t *handle, uint32_t timeout);
1250 /*%<
1251  * Set the read timeout for the TCP socket associated with 'handle'.
1252  */
1253 
1254 void
1255 isc__nm_async_tcpconnect(isc__networker_t *worker, isc__netievent_t *ev0);
1256 void
1257 isc__nm_async_tcplisten(isc__networker_t *worker, isc__netievent_t *ev0);
1258 void
1259 isc__nm_async_tcpaccept(isc__networker_t *worker, isc__netievent_t *ev0);
1260 void
1261 isc__nm_async_tcpstop(isc__networker_t *worker, isc__netievent_t *ev0);
1262 void
1263 isc__nm_async_tcpsend(isc__networker_t *worker, isc__netievent_t *ev0);
1264 void
1265 isc__nm_async_startread(isc__networker_t *worker, isc__netievent_t *ev0);
1266 void
1267 isc__nm_async_pauseread(isc__networker_t *worker, isc__netievent_t *ev0);
1268 void
1269 isc__nm_async_tcpstartread(isc__networker_t *worker, isc__netievent_t *ev0);
1270 void
1271 isc__nm_async_tcppauseread(isc__networker_t *worker, isc__netievent_t *ev0);
1272 void
1273 isc__nm_async_tcpcancel(isc__networker_t *worker, isc__netievent_t *ev0);
1274 void
1275 isc__nm_async_tcpclose(isc__networker_t *worker, isc__netievent_t *ev0);
1276 /*%<
1277  * Callback handlers for asynchronous TCP events (connect, listen,
1278  * stoplisten, send, read, pause, close).
1279  */
1280 
1281 void
1282 isc__nm_async_tcpdnsaccept(isc__networker_t *worker, isc__netievent_t *ev0);
1283 void
1284 isc__nm_async_tcpdnsconnect(isc__networker_t *worker, isc__netievent_t *ev0);
1285 void
1286 isc__nm_async_tcpdnslisten(isc__networker_t *worker, isc__netievent_t *ev0);
1287 
1288 void
1289 isc__nm_tcpdns_send(isc_nmhandle_t *handle, isc_region_t *region,
1290 		    isc_nm_cb_t cb, void *cbarg);
1291 /*%<
1292  * Back-end implementation of isc_nm_send() for TCPDNS handles.
1293  */
1294 
1295 void
1296 isc__nm_tcpdns_shutdown(isc_nmsocket_t *sock);
1297 
1298 void
1299 isc__nm_tcpdns_close(isc_nmsocket_t *sock);
1300 /*%<
1301  * Close a TCPDNS socket.
1302  */
1303 
1304 void
1305 isc__nm_tcpdns_stoplistening(isc_nmsocket_t *sock);
1306 /*%<
1307  * Stop listening on 'sock'.
1308  */
1309 
1310 void
1311 isc__nm_tcpdns_settimeout(isc_nmhandle_t *handle, uint32_t timeout);
1312 /*%<
1313  * Set the read timeout and reset the timer for the TCPDNS socket
1314  * associated with 'handle', and the TCP socket it wraps around.
1315  */
1316 
1317 void
1318 isc__nm_async_tcpdnsaccept(isc__networker_t *worker, isc__netievent_t *ev0);
1319 void
1320 isc__nm_async_tcpdnsconnect(isc__networker_t *worker, isc__netievent_t *ev0);
1321 void
1322 isc__nm_async_tcpdnslisten(isc__networker_t *worker, isc__netievent_t *ev0);
1323 void
1324 isc__nm_async_tcpdnscancel(isc__networker_t *worker, isc__netievent_t *ev0);
1325 void
1326 isc__nm_async_tcpdnsclose(isc__networker_t *worker, isc__netievent_t *ev0);
1327 void
1328 isc__nm_async_tcpdnssend(isc__networker_t *worker, isc__netievent_t *ev0);
1329 void
1330 isc__nm_async_tcpdnsstop(isc__networker_t *worker, isc__netievent_t *ev0);
1331 void
1332 isc__nm_async_tcpdnsread(isc__networker_t *worker, isc__netievent_t *ev0);
1333 /*%<
1334  * Callback handlers for asynchronous TCPDNS events.
1335  */
1336 
1337 void
1338 isc__nm_tcpdns_read(isc_nmhandle_t *handle, isc_nm_recv_cb_t cb, void *cbarg);
1339 /*
1340  * Back-end implementation of isc_nm_read() for TCPDNS handles.
1341  */
1342 
1343 void
1344 isc__nm_tcpdns_cancelread(isc_nmhandle_t *handle);
1345 /*%<
1346  * Stop reading on a connected TCPDNS handle.
1347  */
1348 
1349 #define isc__nm_uverr2result(x) \
1350 	isc___nm_uverr2result(x, true, __FILE__, __LINE__, __func__)
1351 isc_result_t
1352 isc___nm_uverr2result(int uverr, bool dolog, const char *file,
1353 		      unsigned int line, const char *func);
1354 /*%<
1355  * Convert a libuv error value into an isc_result_t.  The
1356  * list of supported error values is not complete; new users
1357  * of this function should add any expected errors that are
1358  * not already there.
1359  */
1360 
1361 bool
1362 isc__nm_acquire_interlocked(isc_nm_t *mgr);
1363 /*%<
1364  * Try to acquire interlocked state; return true if successful.
1365  */
1366 
1367 void
1368 isc__nm_drop_interlocked(isc_nm_t *mgr);
1369 /*%<
1370  * Drop interlocked state; signal waiters.
1371  */
1372 
1373 void
1374 isc__nm_acquire_interlocked_force(isc_nm_t *mgr);
1375 /*%<
1376  * Actively wait for interlocked state.
1377  */
1378 
1379 void
1380 isc__nm_incstats(isc_nm_t *mgr, isc_statscounter_t counterid);
1381 /*%<
1382  * Increment socket-related statistics counters.
1383  */
1384 
1385 void
1386 isc__nm_decstats(isc_nm_t *mgr, isc_statscounter_t counterid);
1387 /*%<
1388  * Decrement socket-related statistics counters.
1389  */
1390 
1391 isc_result_t
1392 isc__nm_socket(int domain, int type, int protocol, uv_os_sock_t *sockp);
1393 /*%<
1394  * Platform independent socket() version
1395  */
1396 
1397 void
1398 isc__nm_closesocket(uv_os_sock_t sock);
1399 /*%<
1400  * Platform independent closesocket() version
1401  */
1402 
1403 isc_result_t
1404 isc__nm_socket_freebind(uv_os_sock_t fd, sa_family_t sa_family);
1405 /*%<
1406  * Set the IP_FREEBIND (or equivalent) socket option on the uv_handle
1407  */
1408 
1409 isc_result_t
1410 isc__nm_socket_reuse(uv_os_sock_t fd);
1411 /*%<
1412  * Set the SO_REUSEADDR or SO_REUSEPORT (or equivalent) socket option on the fd
1413  */
1414 
1415 isc_result_t
1416 isc__nm_socket_reuse_lb(uv_os_sock_t fd);
1417 /*%<
1418  * Set the SO_REUSEPORT_LB (or equivalent) socket option on the fd
1419  */
1420 
1421 isc_result_t
1422 isc__nm_socket_incoming_cpu(uv_os_sock_t fd);
1423 /*%<
1424  * Set the SO_INCOMING_CPU socket option on the fd if available
1425  */
1426 
1427 isc_result_t
1428 isc__nm_socket_disable_pmtud(uv_os_sock_t fd, sa_family_t sa_family);
1429 /*%<
1430  * Disable the Path MTU Discovery, either by disabling IP(V6)_DONTFRAG socket
1431  * option, or setting the IP(V6)_MTU_DISCOVER socket option to IP_PMTUDISC_OMIT
1432  */
1433 
1434 isc_result_t
1435 isc__nm_socket_connectiontimeout(uv_os_sock_t fd, int timeout_ms);
1436 /*%<
1437  * Set the connection timeout in milliseconds, on non-Linux platforms,
1438  * the minimum value must be at least 1000 (1 second).
1439  */
1440 
1441 isc_result_t
1442 isc__nm_socket_tcp_nodelay(uv_os_sock_t fd);
1443 /*%<
1444  * Disables Nagle's algorithm on a TCP socket (sets TCP_NODELAY).
1445  */
1446 
1447 /*
1448  * typedef all the netievent types
1449  */
1450 
1451 NETIEVENT_SOCKET_TYPE(close);
1452 NETIEVENT_SOCKET_TYPE(tcpclose);
1453 NETIEVENT_SOCKET_TYPE(tcplisten);
1454 NETIEVENT_SOCKET_TYPE(tcppauseread);
1455 NETIEVENT_SOCKET_TYPE(tcpstop);
1456 NETIEVENT_SOCKET_TYPE(udpclose);
1457 NETIEVENT_SOCKET_TYPE(udplisten);
1458 NETIEVENT_SOCKET_TYPE(udpread);
1459 /* NETIEVENT_SOCKET_TYPE(udpsend); */ /* unique type, defined independently */
1460 NETIEVENT_SOCKET_TYPE(udpstop);
1461 
1462 NETIEVENT_SOCKET_TYPE(tcpdnsclose);
1463 NETIEVENT_SOCKET_TYPE(tcpdnsread);
1464 NETIEVENT_SOCKET_TYPE(tcpdnsstop);
1465 NETIEVENT_SOCKET_TYPE(tcpdnslisten);
1466 NETIEVENT_SOCKET_REQ_TYPE(tcpdnsconnect);
1467 NETIEVENT_SOCKET_REQ_TYPE(tcpdnssend);
1468 NETIEVENT_SOCKET_HANDLE_TYPE(tcpdnscancel);
1469 NETIEVENT_SOCKET_QUOTA_TYPE(tcpdnsaccept);
1470 
1471 NETIEVENT_SOCKET_REQ_TYPE(tcpconnect);
1472 NETIEVENT_SOCKET_REQ_TYPE(tcpsend);
1473 NETIEVENT_SOCKET_TYPE(tcpstartread);
1474 NETIEVENT_SOCKET_REQ_TYPE(udpconnect);
1475 
1476 NETIEVENT_SOCKET_REQ_RESULT_TYPE(connectcb);
1477 NETIEVENT_SOCKET_REQ_RESULT_TYPE(readcb);
1478 NETIEVENT_SOCKET_REQ_RESULT_TYPE(sendcb);
1479 
1480 NETIEVENT_SOCKET_HANDLE_TYPE(detach);
1481 NETIEVENT_SOCKET_HANDLE_TYPE(tcpcancel);
1482 NETIEVENT_SOCKET_HANDLE_TYPE(udpcancel);
1483 
1484 NETIEVENT_SOCKET_QUOTA_TYPE(tcpaccept);
1485 
1486 NETIEVENT_TYPE(pause);
1487 NETIEVENT_TYPE(resume);
1488 NETIEVENT_TYPE(shutdown);
1489 NETIEVENT_TYPE(stop);
1490 
1491 NETIEVENT_TASK_TYPE(task);
1492 NETIEVENT_TASK_TYPE(privilegedtask);
1493 
1494 /* Now declared the helper functions */
1495 
1496 NETIEVENT_SOCKET_DECL(close);
1497 NETIEVENT_SOCKET_DECL(tcpclose);
1498 NETIEVENT_SOCKET_DECL(tcplisten);
1499 NETIEVENT_SOCKET_DECL(tcppauseread);
1500 NETIEVENT_SOCKET_DECL(tcpstartread);
1501 NETIEVENT_SOCKET_DECL(tcpstop);
1502 NETIEVENT_SOCKET_DECL(udpclose);
1503 NETIEVENT_SOCKET_DECL(udplisten);
1504 NETIEVENT_SOCKET_DECL(udpread);
1505 NETIEVENT_SOCKET_DECL(udpsend);
1506 NETIEVENT_SOCKET_DECL(udpstop);
1507 
1508 NETIEVENT_SOCKET_DECL(tcpdnsclose);
1509 NETIEVENT_SOCKET_DECL(tcpdnsread);
1510 NETIEVENT_SOCKET_DECL(tcpdnsstop);
1511 NETIEVENT_SOCKET_DECL(tcpdnslisten);
1512 NETIEVENT_SOCKET_REQ_DECL(tcpdnsconnect);
1513 NETIEVENT_SOCKET_REQ_DECL(tcpdnssend);
1514 NETIEVENT_SOCKET_HANDLE_DECL(tcpdnscancel);
1515 NETIEVENT_SOCKET_QUOTA_DECL(tcpdnsaccept);
1516 
1517 NETIEVENT_SOCKET_REQ_DECL(tcpconnect);
1518 NETIEVENT_SOCKET_REQ_DECL(tcpsend);
1519 NETIEVENT_SOCKET_REQ_DECL(udpconnect);
1520 
1521 NETIEVENT_SOCKET_REQ_RESULT_DECL(connectcb);
1522 NETIEVENT_SOCKET_REQ_RESULT_DECL(readcb);
1523 NETIEVENT_SOCKET_REQ_RESULT_DECL(sendcb);
1524 
1525 NETIEVENT_SOCKET_HANDLE_DECL(udpcancel);
1526 NETIEVENT_SOCKET_HANDLE_DECL(tcpcancel);
1527 NETIEVENT_SOCKET_DECL(detach);
1528 
1529 NETIEVENT_SOCKET_QUOTA_DECL(tcpaccept);
1530 
1531 NETIEVENT_DECL(pause);
1532 NETIEVENT_DECL(resume);
1533 NETIEVENT_DECL(shutdown);
1534 NETIEVENT_DECL(stop);
1535 
1536 NETIEVENT_TASK_DECL(task);
1537 NETIEVENT_TASK_DECL(privilegedtask);
1538 
1539 void
1540 isc__nm_udp_failed_read_cb(isc_nmsocket_t *sock, isc_result_t result);
1541 void
1542 isc__nm_tcp_failed_read_cb(isc_nmsocket_t *sock, isc_result_t result);
1543 void
1544 isc__nm_tcpdns_failed_read_cb(isc_nmsocket_t *sock, isc_result_t result);
1545 
1546 isc_result_t
1547 isc__nm_tcpdns_processbuffer(isc_nmsocket_t *sock);
1548 
1549 isc__nm_uvreq_t *
1550 isc__nm_get_read_req(isc_nmsocket_t *sock, isc_sockaddr_t *sockaddr);
1551 
1552 void
1553 isc__nm_alloc_cb(uv_handle_t *handle, size_t size, uv_buf_t *buf);
1554 
1555 void
1556 isc__nm_udp_read_cb(uv_udp_t *handle, ssize_t nrecv, const uv_buf_t *buf,
1557 		    const struct sockaddr *addr, unsigned flags);
1558 void
1559 isc__nm_tcp_read_cb(uv_stream_t *stream, ssize_t nread, const uv_buf_t *buf);
1560 void
1561 isc__nm_tcpdns_read_cb(uv_stream_t *stream, ssize_t nread, const uv_buf_t *buf);
1562 
1563 isc_result_t
1564 isc__nm_start_reading(isc_nmsocket_t *sock);
1565 void
1566 isc__nm_stop_reading(isc_nmsocket_t *sock);
1567 isc_result_t
1568 isc__nm_process_sock_buffer(isc_nmsocket_t *sock);
1569 void
1570 isc__nm_resume_processing(void *arg);
1571 bool
1572 isc__nmsocket_closing(isc_nmsocket_t *sock);
1573 bool
1574 isc__nm_closing(isc_nmsocket_t *sock);
1575 
1576 void
1577 isc__nm_alloc_dnsbuf(isc_nmsocket_t *sock, size_t len);
1578 
1579 void
1580 isc__nm_failed_send_cb(isc_nmsocket_t *sock, isc__nm_uvreq_t *req,
1581 		       isc_result_t eresult);
1582 void
1583 isc__nm_failed_accept_cb(isc_nmsocket_t *sock, isc_result_t eresult);
1584 void
1585 isc__nm_failed_connect_cb(isc_nmsocket_t *sock, isc__nm_uvreq_t *req,
1586 			  isc_result_t eresult, bool async);
1587 void
1588 isc__nm_failed_read_cb(isc_nmsocket_t *sock, isc_result_t result, bool async);
1589 
1590 void
1591 isc__nm_accept_connection_log(isc_result_t result, bool can_log_quota);
1592 
1593 /*
1594  * Timeout callbacks
1595  */
1596 void
1597 isc__nmsocket_connecttimeout_cb(uv_timer_t *timer);
1598 void
1599 isc__nmsocket_readtimeout_cb(uv_timer_t *timer);
1600 void
1601 isc__nmsocket_writetimeout_cb(void *data, isc_result_t eresult);
1602 
1603 /*%<
1604  *
1605  * Maximum number of simultaneous handles in flight supported for a single
1606  * connected TCPDNS socket. This value was chosen arbitrarily, and may be
1607  * changed in the future.
1608  */
1609 #define STREAM_CLIENTS_PER_CONN 23
1610 
1611 #define UV_RUNTIME_CHECK(func, ret)                                           \
1612 	if (ret != 0) {                                                       \
1613 		isc_error_fatal(__FILE__, __LINE__, "%s failed: %s\n", #func, \
1614 				uv_strerror(ret));                            \
1615 	}
1616