1 /* 2 * Copyright (C) Internet Systems Consortium, Inc. ("ISC") 3 * 4 * SPDX-License-Identifier: MPL-2.0 5 * 6 * This Source Code Form is subject to the terms of the Mozilla Public 7 * License, v. 2.0. If a copy of the MPL was not distributed with this 8 * file, you can obtain one at https://mozilla.org/MPL/2.0/. 9 * 10 * See the COPYRIGHT file distributed with this work for additional 11 * information regarding copyright ownership. 12 */ 13 14 #pragma once 15 16 #include <unistd.h> 17 #include <uv.h> 18 19 #include <openssl/err.h> 20 #include <openssl/ssl.h> 21 22 #include <isc/astack.h> 23 #include <isc/atomic.h> 24 #include <isc/barrier.h> 25 #include <isc/buffer.h> 26 #include <isc/condition.h> 27 #include <isc/magic.h> 28 #include <isc/mem.h> 29 #include <isc/netmgr.h> 30 #include <isc/queue.h> 31 #include <isc/quota.h> 32 #include <isc/random.h> 33 #include <isc/refcount.h> 34 #include <isc/region.h> 35 #include <isc/result.h> 36 #include <isc/rwlock.h> 37 #include <isc/sockaddr.h> 38 #include <isc/stats.h> 39 #include <isc/thread.h> 40 #include <isc/util.h> 41 42 #include "uv-compat.h" 43 44 #define ISC_NETMGR_TID_UNKNOWN -1 45 46 /* Must be different from ISC_NETMGR_TID_UNKNOWN */ 47 #define ISC_NETMGR_NON_INTERLOCKED -2 48 49 /* 50 * Receive buffers 51 */ 52 #if HAVE_DECL_UV_UDP_MMSG_CHUNK 53 /* 54 * The value 20 here is UV__MMSG_MAXWIDTH taken from the current libuv source, 55 * libuv will not receive more that 20 datagrams in a single recvmmsg call. 56 */ 57 #define ISC_NETMGR_UDP_RECVBUF_SIZE (20 * UINT16_MAX) 58 #else 59 /* 60 * A single DNS message size 61 */ 62 #define ISC_NETMGR_UDP_RECVBUF_SIZE UINT16_MAX 63 #endif 64 65 /* 66 * The TCP receive buffer can fit one maximum sized DNS message plus its size, 67 * the receive buffer here affects TCP, DoT and DoH. 68 */ 69 #define ISC_NETMGR_TCP_RECVBUF_SIZE (sizeof(uint16_t) + UINT16_MAX) 70 71 /* Pick the larger buffer */ 72 #define ISC_NETMGR_RECVBUF_SIZE \ 73 (ISC_NETMGR_UDP_RECVBUF_SIZE >= ISC_NETMGR_TCP_RECVBUF_SIZE \ 74 ? ISC_NETMGR_UDP_RECVBUF_SIZE \ 75 : ISC_NETMGR_TCP_RECVBUF_SIZE) 76 77 /* 78 * Send buffer 79 */ 80 #define ISC_NETMGR_SENDBUF_SIZE (sizeof(uint16_t) + UINT16_MAX) 81 82 /*% 83 * Regular TCP buffer size. 84 */ 85 #define NM_REG_BUF 4096 86 87 /*% 88 * Larger buffer for when the regular one isn't enough; this will 89 * hold two full DNS packets with lengths. netmgr receives 64k at 90 * most in TCPDNS connections, so there's no risk of overrun 91 * when using a buffer this size. 92 */ 93 #define NM_BIG_BUF ISC_NETMGR_TCP_RECVBUF_SIZE * 2 94 95 #if defined(SO_REUSEPORT_LB) || (defined(SO_REUSEPORT) && defined(__linux__)) 96 #define HAVE_SO_REUSEPORT_LB 1 97 #endif 98 99 /* 100 * Define NETMGR_TRACE to activate tracing of handles and sockets. 101 * This will impair performance but enables us to quickly determine, 102 * if netmgr resources haven't been cleaned up on shutdown, which ones 103 * are still in use. 104 */ 105 #ifdef NETMGR_TRACE 106 #define TRACE_SIZE 8 107 108 void 109 isc__nm_dump_active(isc_nm_t *nm); 110 111 #if defined(__linux__) 112 #include <syscall.h> 113 #define gettid() (uint32_t) syscall(SYS_gettid) 114 #elif defined(_WIN32) 115 #define gettid() (uint32_t) GetCurrentThreadId() 116 #else 117 #define gettid() (uint32_t) pthread_self() 118 #endif 119 120 #ifdef NETMGR_TRACE_VERBOSE 121 #define NETMGR_TRACE_LOG(format, ...) \ 122 fprintf(stderr, "%" PRIu32 ":%d:%s:%u:%s:" format, gettid(), \ 123 isc_nm_tid(), file, line, func, __VA_ARGS__) 124 #else 125 #define NETMGR_TRACE_LOG(format, ...) \ 126 (void)file; \ 127 (void)line; \ 128 (void)func; 129 #endif 130 131 #define FLARG_PASS , file, line, func 132 #define FLARG \ 133 , const char *file __attribute__((unused)), \ 134 unsigned int line __attribute__((unused)), \ 135 const char *func __attribute__((unused)) 136 #define FLARG_IEVENT(ievent) \ 137 const char *file = ievent->file; \ 138 unsigned int line = ievent->line; \ 139 const char *func = ievent->func; 140 #define FLARG_IEVENT_PASS(ievent) \ 141 ievent->file = file; \ 142 ievent->line = line; \ 143 ievent->func = func; 144 #define isc__nm_uvreq_get(req, sock) \ 145 isc___nm_uvreq_get(req, sock, __FILE__, __LINE__, __func__) 146 #define isc__nm_uvreq_put(req, sock) \ 147 isc___nm_uvreq_put(req, sock, __FILE__, __LINE__, __func__) 148 #define isc__nmsocket_init(sock, mgr, type, iface) \ 149 isc___nmsocket_init(sock, mgr, type, iface, __FILE__, __LINE__, \ 150 __func__) 151 #define isc__nmsocket_put(sockp) \ 152 isc___nmsocket_put(sockp, __FILE__, __LINE__, __func__) 153 #define isc__nmsocket_attach(sock, target) \ 154 isc___nmsocket_attach(sock, target, __FILE__, __LINE__, __func__) 155 #define isc__nmsocket_detach(socketp) \ 156 isc___nmsocket_detach(socketp, __FILE__, __LINE__, __func__) 157 #define isc__nmsocket_close(socketp) \ 158 isc___nmsocket_close(socketp, __FILE__, __LINE__, __func__) 159 #define isc__nmhandle_get(sock, peer, local) \ 160 isc___nmhandle_get(sock, peer, local, __FILE__, __LINE__, __func__) 161 #define isc__nmsocket_prep_destroy(sock) \ 162 isc___nmsocket_prep_destroy(sock, __FILE__, __LINE__, __func__) 163 #else 164 #define NETMGR_TRACE_LOG(format, ...) 165 166 #define FLARG_PASS 167 #define FLARG 168 #define FLARG_IEVENT(ievent) 169 #define FLARG_IEVENT_PASS(ievent) 170 #define isc__nm_uvreq_get(req, sock) isc___nm_uvreq_get(req, sock) 171 #define isc__nm_uvreq_put(req, sock) isc___nm_uvreq_put(req, sock) 172 #define isc__nmsocket_init(sock, mgr, type, iface) \ 173 isc___nmsocket_init(sock, mgr, type, iface) 174 #define isc__nmsocket_put(sockp) isc___nmsocket_put(sockp) 175 #define isc__nmsocket_attach(sock, target) isc___nmsocket_attach(sock, target) 176 #define isc__nmsocket_detach(socketp) isc___nmsocket_detach(socketp) 177 #define isc__nmsocket_close(socketp) isc___nmsocket_close(socketp) 178 #define isc__nmhandle_get(sock, peer, local) \ 179 isc___nmhandle_get(sock, peer, local) 180 #define isc__nmsocket_prep_destroy(sock) isc___nmsocket_prep_destroy(sock) 181 #endif 182 183 /* 184 * Queue types in the order of processing priority. 185 */ 186 typedef enum { 187 NETIEVENT_PRIORITY = 0, 188 NETIEVENT_PRIVILEGED = 1, 189 NETIEVENT_TASK = 2, 190 NETIEVENT_NORMAL = 3, 191 NETIEVENT_MAX = 4, 192 } netievent_type_t; 193 194 /* 195 * Single network event loop worker. 196 */ 197 typedef struct isc__networker { 198 isc_nm_t *mgr; 199 int id; /* thread id */ 200 uv_loop_t loop; /* libuv loop structure */ 201 uv_async_t async; /* async channel to send 202 * data to this networker */ 203 isc_mutex_t lock; 204 bool paused; 205 bool finished; 206 isc_thread_t thread; 207 isc_queue_t *ievents[NETIEVENT_MAX]; 208 atomic_uint_fast32_t nievents[NETIEVENT_MAX]; 209 isc_condition_t cond_prio; 210 211 isc_refcount_t references; 212 atomic_int_fast64_t pktcount; 213 char *recvbuf; 214 char *sendbuf; 215 bool recvbuf_inuse; 216 } isc__networker_t; 217 218 /* 219 * A general handle for a connection bound to a networker. For UDP 220 * connections we have peer address here, so both TCP and UDP can be 221 * handled with a simple send-like function 222 */ 223 #define NMHANDLE_MAGIC ISC_MAGIC('N', 'M', 'H', 'D') 224 #define VALID_NMHANDLE(t) \ 225 (ISC_MAGIC_VALID(t, NMHANDLE_MAGIC) && \ 226 atomic_load(&(t)->references) > 0) 227 228 typedef void (*isc__nm_closecb)(isc_nmhandle_t *); 229 230 struct isc_nmhandle { 231 int magic; 232 isc_refcount_t references; 233 234 /* 235 * The socket is not 'attached' in the traditional 236 * reference-counting sense. Instead, we keep all handles in an 237 * array in the socket object. This way, we don't have circular 238 * dependencies and we can close all handles when we're destroying 239 * the socket. 240 */ 241 isc_nmsocket_t *sock; 242 243 isc_sockaddr_t peer; 244 isc_sockaddr_t local; 245 isc_nm_opaquecb_t doreset; /* reset extra callback, external */ 246 isc_nm_opaquecb_t dofree; /* free extra callback, external */ 247 #ifdef NETMGR_TRACE 248 void *backtrace[TRACE_SIZE]; 249 int backtrace_size; 250 LINK(isc_nmhandle_t) active_link; 251 #endif 252 void *opaque; 253 char extra[]; 254 }; 255 256 typedef enum isc__netievent_type { 257 netievent_udpconnect, 258 netievent_udpclose, 259 netievent_udpsend, 260 netievent_udpread, 261 netievent_udpcancel, 262 263 netievent_tcpconnect, 264 netievent_tcpclose, 265 netievent_tcpsend, 266 netievent_tcpstartread, 267 netievent_tcppauseread, 268 netievent_tcpaccept, 269 netievent_tcpcancel, 270 271 netievent_tcpdnsaccept, 272 netievent_tcpdnsconnect, 273 netievent_tcpdnsclose, 274 netievent_tcpdnssend, 275 netievent_tcpdnsread, 276 netievent_tcpdnscancel, 277 278 netievent_shutdown, 279 netievent_stop, 280 netievent_pause, 281 282 netievent_connectcb, 283 netievent_readcb, 284 netievent_sendcb, 285 286 netievent_task, 287 netievent_privilegedtask, 288 289 /* 290 * event type values higher than this will be treated 291 * as high-priority events, which can be processed 292 * while the netmgr is pausing or paused. 293 */ 294 netievent_prio = 0xff, 295 296 netievent_udplisten, 297 netievent_udpstop, 298 netievent_tcplisten, 299 netievent_tcpstop, 300 netievent_tcpdnslisten, 301 netievent_tcpdnsstop, 302 303 netievent_resume, 304 netievent_detach, 305 netievent_close, 306 } isc__netievent_type; 307 308 typedef union { 309 isc_nm_recv_cb_t recv; 310 isc_nm_cb_t send; 311 isc_nm_cb_t connect; 312 isc_nm_accept_cb_t accept; 313 } isc__nm_cb_t; 314 315 /* 316 * Wrapper around uv_req_t with 'our' fields in it. req->data should 317 * always point to its parent. Note that we always allocate more than 318 * sizeof(struct) because we make room for different req types; 319 */ 320 #define UVREQ_MAGIC ISC_MAGIC('N', 'M', 'U', 'R') 321 #define VALID_UVREQ(t) ISC_MAGIC_VALID(t, UVREQ_MAGIC) 322 323 typedef struct isc__nm_uvreq isc__nm_uvreq_t; 324 struct isc__nm_uvreq { 325 int magic; 326 isc_nmsocket_t *sock; 327 isc_nmhandle_t *handle; 328 char tcplen[2]; /* The TCP DNS message length */ 329 uv_buf_t uvbuf; /* translated isc_region_t, to be 330 * sent or received */ 331 isc_sockaddr_t local; /* local address */ 332 isc_sockaddr_t peer; /* peer address */ 333 isc__nm_cb_t cb; /* callback */ 334 void *cbarg; /* callback argument */ 335 uv_pipe_t ipc; /* used for sending socket 336 * uv_handles to other threads */ 337 union { 338 uv_handle_t handle; 339 uv_req_t req; 340 uv_getaddrinfo_t getaddrinfo; 341 uv_getnameinfo_t getnameinfo; 342 uv_shutdown_t shutdown; 343 uv_write_t write; 344 uv_connect_t connect; 345 uv_udp_send_t udp_send; 346 uv_fs_t fs; 347 uv_work_t work; 348 } uv_req; 349 ISC_LINK(isc__nm_uvreq_t) link; 350 }; 351 352 struct isc_nm_timer { 353 isc_refcount_t references; 354 uv_timer_t timer; 355 isc_nmhandle_t *handle; 356 isc_nm_timer_cb cb; 357 void *cbarg; 358 }; 359 360 void * 361 isc__nm_get_netievent(isc_nm_t *mgr, isc__netievent_type type); 362 /*%< 363 * Allocate an ievent and set the type. 364 */ 365 void 366 isc__nm_put_netievent(isc_nm_t *mgr, void *ievent); 367 368 /* 369 * The macros here are used to simulate the "inheritance" in C, there's the base 370 * netievent structure that contains just its own type and socket, and there are 371 * extended netievent types that also have handles or requests or other data. 372 * 373 * The macros here ensure that: 374 * 375 * 1. every netievent type has matching definition, declaration and 376 * implementation 377 * 378 * 2. we handle all the netievent types of same subclass the same, e.g. if the 379 * extended netievent contains handle, we always attach to the handle in 380 * the ctor and detach from the handle in dtor. 381 * 382 * There are three macros here for each netievent subclass: 383 * 384 * 1. NETIEVENT_*_TYPE(type) creates the typedef for each type; used below in 385 * this header 386 * 387 * 2. NETIEVENT_*_DECL(type) generates the declaration of the get and put 388 * functions (isc__nm_get_netievent_* and isc__nm_put_netievent_*); used 389 * below in this header 390 * 391 * 3. NETIEVENT_*_DEF(type) generates the definition of the functions; used 392 * either in netmgr.c or matching protocol file (e.g. udp.c, tcp.c, etc.) 393 */ 394 395 #define NETIEVENT__SOCKET \ 396 isc__netievent_type type; \ 397 isc_nmsocket_t *sock; \ 398 const char *file; \ 399 unsigned int line; \ 400 const char *func 401 402 typedef struct isc__netievent__socket { 403 NETIEVENT__SOCKET; 404 } isc__netievent__socket_t; 405 406 #define NETIEVENT_SOCKET_TYPE(type) \ 407 typedef isc__netievent__socket_t isc__netievent_##type##_t; 408 409 #define NETIEVENT_SOCKET_DECL(type) \ 410 isc__netievent_##type##_t *isc__nm_get_netievent_##type( \ 411 isc_nm_t *nm, isc_nmsocket_t *sock); \ 412 void isc__nm_put_netievent_##type(isc_nm_t *nm, \ 413 isc__netievent_##type##_t *ievent); 414 415 #define NETIEVENT_SOCKET_DEF(type) \ 416 isc__netievent_##type##_t *isc__nm_get_netievent_##type( \ 417 isc_nm_t *nm, isc_nmsocket_t *sock) { \ 418 isc__netievent_##type##_t *ievent = \ 419 isc__nm_get_netievent(nm, netievent_##type); \ 420 isc__nmsocket_attach(sock, &ievent->sock); \ 421 \ 422 return (ievent); \ 423 } \ 424 \ 425 void isc__nm_put_netievent_##type(isc_nm_t *nm, \ 426 isc__netievent_##type##_t *ievent) { \ 427 isc__nmsocket_detach(&ievent->sock); \ 428 isc__nm_put_netievent(nm, ievent); \ 429 } 430 431 typedef struct isc__netievent__socket_req { 432 NETIEVENT__SOCKET; 433 isc__nm_uvreq_t *req; 434 } isc__netievent__socket_req_t; 435 436 #define NETIEVENT_SOCKET_REQ_TYPE(type) \ 437 typedef isc__netievent__socket_req_t isc__netievent_##type##_t; 438 439 #define NETIEVENT_SOCKET_REQ_DECL(type) \ 440 isc__netievent_##type##_t *isc__nm_get_netievent_##type( \ 441 isc_nm_t *nm, isc_nmsocket_t *sock, isc__nm_uvreq_t *req); \ 442 void isc__nm_put_netievent_##type(isc_nm_t *nm, \ 443 isc__netievent_##type##_t *ievent); 444 445 #define NETIEVENT_SOCKET_REQ_DEF(type) \ 446 isc__netievent_##type##_t *isc__nm_get_netievent_##type( \ 447 isc_nm_t *nm, isc_nmsocket_t *sock, isc__nm_uvreq_t *req) { \ 448 isc__netievent_##type##_t *ievent = \ 449 isc__nm_get_netievent(nm, netievent_##type); \ 450 isc__nmsocket_attach(sock, &ievent->sock); \ 451 ievent->req = req; \ 452 \ 453 return (ievent); \ 454 } \ 455 \ 456 void isc__nm_put_netievent_##type(isc_nm_t *nm, \ 457 isc__netievent_##type##_t *ievent) { \ 458 isc__nmsocket_detach(&ievent->sock); \ 459 isc__nm_put_netievent(nm, ievent); \ 460 } 461 462 typedef struct isc__netievent__socket_req_result { 463 isc__netievent_type type; 464 isc_nmsocket_t *sock; 465 isc__nm_uvreq_t *req; 466 isc_result_t result; 467 } isc__netievent__socket_req_result_t; 468 469 #define NETIEVENT_SOCKET_REQ_RESULT_TYPE(type) \ 470 typedef isc__netievent__socket_req_result_t isc__netievent_##type##_t; 471 472 #define NETIEVENT_SOCKET_REQ_RESULT_DECL(type) \ 473 isc__netievent_##type##_t *isc__nm_get_netievent_##type( \ 474 isc_nm_t *nm, isc_nmsocket_t *sock, isc__nm_uvreq_t *req, \ 475 isc_result_t result); \ 476 void isc__nm_put_netievent_##type(isc_nm_t *nm, \ 477 isc__netievent_##type##_t *ievent); 478 479 #define NETIEVENT_SOCKET_REQ_RESULT_DEF(type) \ 480 isc__netievent_##type##_t *isc__nm_get_netievent_##type( \ 481 isc_nm_t *nm, isc_nmsocket_t *sock, isc__nm_uvreq_t *req, \ 482 isc_result_t result) { \ 483 isc__netievent_##type##_t *ievent = \ 484 isc__nm_get_netievent(nm, netievent_##type); \ 485 isc__nmsocket_attach(sock, &ievent->sock); \ 486 ievent->req = req; \ 487 ievent->result = result; \ 488 \ 489 return (ievent); \ 490 } \ 491 \ 492 void isc__nm_put_netievent_##type(isc_nm_t *nm, \ 493 isc__netievent_##type##_t *ievent) { \ 494 isc__nmsocket_detach(&ievent->sock); \ 495 isc__nm_put_netievent(nm, ievent); \ 496 } 497 498 typedef struct isc__netievent__socket_handle { 499 NETIEVENT__SOCKET; 500 isc_nmhandle_t *handle; 501 } isc__netievent__socket_handle_t; 502 503 #define NETIEVENT_SOCKET_HANDLE_TYPE(type) \ 504 typedef isc__netievent__socket_handle_t isc__netievent_##type##_t; 505 506 #define NETIEVENT_SOCKET_HANDLE_DECL(type) \ 507 isc__netievent_##type##_t *isc__nm_get_netievent_##type( \ 508 isc_nm_t *nm, isc_nmsocket_t *sock, isc_nmhandle_t *handle); \ 509 void isc__nm_put_netievent_##type(isc_nm_t *nm, \ 510 isc__netievent_##type##_t *ievent); 511 512 #define NETIEVENT_SOCKET_HANDLE_DEF(type) \ 513 isc__netievent_##type##_t *isc__nm_get_netievent_##type( \ 514 isc_nm_t *nm, isc_nmsocket_t *sock, isc_nmhandle_t *handle) { \ 515 isc__netievent_##type##_t *ievent = \ 516 isc__nm_get_netievent(nm, netievent_##type); \ 517 isc__nmsocket_attach(sock, &ievent->sock); \ 518 isc_nmhandle_attach(handle, &ievent->handle); \ 519 \ 520 return (ievent); \ 521 } \ 522 \ 523 void isc__nm_put_netievent_##type(isc_nm_t *nm, \ 524 isc__netievent_##type##_t *ievent) { \ 525 isc__nmsocket_detach(&ievent->sock); \ 526 isc_nmhandle_detach(&ievent->handle); \ 527 isc__nm_put_netievent(nm, ievent); \ 528 } 529 530 typedef struct isc__netievent__socket_quota { 531 NETIEVENT__SOCKET; 532 isc_quota_t *quota; 533 } isc__netievent__socket_quota_t; 534 535 #define NETIEVENT_SOCKET_QUOTA_TYPE(type) \ 536 typedef isc__netievent__socket_quota_t isc__netievent_##type##_t; 537 538 #define NETIEVENT_SOCKET_QUOTA_DECL(type) \ 539 isc__netievent_##type##_t *isc__nm_get_netievent_##type( \ 540 isc_nm_t *nm, isc_nmsocket_t *sock, isc_quota_t *quota); \ 541 void isc__nm_put_netievent_##type(isc_nm_t *nm, \ 542 isc__netievent_##type##_t *ievent); 543 544 #define NETIEVENT_SOCKET_QUOTA_DEF(type) \ 545 isc__netievent_##type##_t *isc__nm_get_netievent_##type( \ 546 isc_nm_t *nm, isc_nmsocket_t *sock, isc_quota_t *quota) { \ 547 isc__netievent_##type##_t *ievent = \ 548 isc__nm_get_netievent(nm, netievent_##type); \ 549 isc__nmsocket_attach(sock, &ievent->sock); \ 550 ievent->quota = quota; \ 551 \ 552 return (ievent); \ 553 } \ 554 \ 555 void isc__nm_put_netievent_##type(isc_nm_t *nm, \ 556 isc__netievent_##type##_t *ievent) { \ 557 isc__nmsocket_detach(&ievent->sock); \ 558 isc__nm_put_netievent(nm, ievent); \ 559 } 560 561 typedef struct isc__netievent__task { 562 isc__netievent_type type; 563 isc_task_t *task; 564 } isc__netievent__task_t; 565 566 #define NETIEVENT_TASK_TYPE(type) \ 567 typedef isc__netievent__task_t isc__netievent_##type##_t; 568 569 #define NETIEVENT_TASK_DECL(type) \ 570 isc__netievent_##type##_t *isc__nm_get_netievent_##type( \ 571 isc_nm_t *nm, isc_task_t *task); \ 572 void isc__nm_put_netievent_##type(isc_nm_t *nm, \ 573 isc__netievent_##type##_t *ievent); 574 575 #define NETIEVENT_TASK_DEF(type) \ 576 isc__netievent_##type##_t *isc__nm_get_netievent_##type( \ 577 isc_nm_t *nm, isc_task_t *task) { \ 578 isc__netievent_##type##_t *ievent = \ 579 isc__nm_get_netievent(nm, netievent_##type); \ 580 ievent->task = task; \ 581 \ 582 return (ievent); \ 583 } \ 584 \ 585 void isc__nm_put_netievent_##type(isc_nm_t *nm, \ 586 isc__netievent_##type##_t *ievent) { \ 587 ievent->task = NULL; \ 588 isc__nm_put_netievent(nm, ievent); \ 589 } 590 591 typedef struct isc__netievent_udpsend { 592 NETIEVENT__SOCKET; 593 isc_sockaddr_t peer; 594 isc__nm_uvreq_t *req; 595 } isc__netievent_udpsend_t; 596 597 typedef struct isc__netievent { 598 isc__netievent_type type; 599 } isc__netievent_t; 600 601 #define NETIEVENT_TYPE(type) typedef isc__netievent_t isc__netievent_##type##_t; 602 603 #define NETIEVENT_DECL(type) \ 604 isc__netievent_##type##_t *isc__nm_get_netievent_##type(isc_nm_t *nm); \ 605 void isc__nm_put_netievent_##type(isc_nm_t *nm, \ 606 isc__netievent_##type##_t *ievent); 607 608 #define NETIEVENT_DEF(type) \ 609 isc__netievent_##type##_t *isc__nm_get_netievent_##type( \ 610 isc_nm_t *nm) { \ 611 isc__netievent_##type##_t *ievent = \ 612 isc__nm_get_netievent(nm, netievent_##type); \ 613 \ 614 return (ievent); \ 615 } \ 616 \ 617 void isc__nm_put_netievent_##type(isc_nm_t *nm, \ 618 isc__netievent_##type##_t *ievent) { \ 619 isc__nm_put_netievent(nm, ievent); \ 620 } 621 622 typedef union { 623 isc__netievent_t ni; 624 isc__netievent__socket_t nis; 625 isc__netievent__socket_req_t nisr; 626 isc__netievent_udpsend_t nius; 627 isc__netievent__socket_quota_t nisq; 628 } isc__netievent_storage_t; 629 630 /* 631 * Work item for a uv_work threadpool. 632 */ 633 typedef struct isc__nm_work { 634 isc_nm_t *netmgr; 635 uv_work_t req; 636 isc_nm_workcb_t cb; 637 isc_nm_after_workcb_t after_cb; 638 void *data; 639 } isc__nm_work_t; 640 641 /* 642 * Network manager 643 */ 644 #define NM_MAGIC ISC_MAGIC('N', 'E', 'T', 'M') 645 #define VALID_NM(t) ISC_MAGIC_VALID(t, NM_MAGIC) 646 647 struct isc_nm { 648 int magic; 649 isc_refcount_t references; 650 isc_mem_t *mctx; 651 int nworkers; 652 isc_mutex_t lock; 653 isc_condition_t wkstatecond; 654 isc_condition_t wkpausecond; 655 isc__networker_t *workers; 656 657 isc_stats_t *stats; 658 659 uint_fast32_t workers_running; 660 atomic_uint_fast32_t workers_paused; 661 atomic_uint_fast32_t maxudp; 662 663 atomic_bool paused; 664 665 /* 666 * Active connections are being closed and new connections are 667 * no longer allowed. 668 */ 669 atomic_bool closing; 670 671 /* 672 * A worker is actively waiting for other workers, for example to 673 * stop listening; that means no other thread can do the same thing 674 * or pause, or we'll deadlock. We have to either re-enqueue our 675 * event or wait for the other one to finish if we want to pause. 676 */ 677 atomic_int interlocked; 678 679 /* 680 * Timeout values for TCP connections, corresponding to 681 * tcp-intiial-timeout, tcp-idle-timeout, tcp-keepalive-timeout, 682 * and tcp-advertised-timeout. Note that these are stored in 683 * milliseconds so they can be used directly with the libuv timer, 684 * but they are configured in tenths of seconds. 685 */ 686 atomic_uint_fast32_t init; 687 atomic_uint_fast32_t idle; 688 atomic_uint_fast32_t keepalive; 689 atomic_uint_fast32_t advertised; 690 691 isc_barrier_t pausing; 692 isc_barrier_t resuming; 693 694 #ifdef NETMGR_TRACE 695 ISC_LIST(isc_nmsocket_t) active_sockets; 696 #endif 697 }; 698 699 typedef enum isc_nmsocket_type { 700 isc_nm_udpsocket, 701 isc_nm_udplistener, /* Aggregate of nm_udpsocks */ 702 isc_nm_tcpsocket, 703 isc_nm_tcplistener, 704 isc_nm_tcpdnslistener, 705 isc_nm_tcpdnssocket, 706 } isc_nmsocket_type; 707 708 /*% 709 * A universal structure for either a single socket or a group of 710 * dup'd/SO_REUSE_PORT-using sockets listening on the same interface. 711 */ 712 #define NMSOCK_MAGIC ISC_MAGIC('N', 'M', 'S', 'K') 713 #define VALID_NMSOCK(t) ISC_MAGIC_VALID(t, NMSOCK_MAGIC) 714 715 /*% 716 * Index into socket stat counter arrays. 717 */ 718 enum { 719 STATID_OPEN = 0, 720 STATID_OPENFAIL = 1, 721 STATID_CLOSE = 2, 722 STATID_BINDFAIL = 3, 723 STATID_CONNECTFAIL = 4, 724 STATID_CONNECT = 5, 725 STATID_ACCEPTFAIL = 6, 726 STATID_ACCEPT = 7, 727 STATID_SENDFAIL = 8, 728 STATID_RECVFAIL = 9, 729 STATID_ACTIVE = 10 730 }; 731 732 typedef void (*isc_nm_closehandlecb_t)(void *arg); 733 /*%< 734 * Opaque callback function, used for isc_nmhandle 'reset' and 'free' 735 * callbacks. 736 */ 737 738 struct isc_nmsocket { 739 /*% Unlocked, RO */ 740 int magic; 741 int tid; 742 isc_nmsocket_type type; 743 isc_nm_t *mgr; 744 745 /*% Parent socket for multithreaded listeners */ 746 isc_nmsocket_t *parent; 747 /*% Listener socket this connection was accepted on */ 748 isc_nmsocket_t *listener; 749 /*% Self socket */ 750 isc_nmsocket_t *self; 751 752 isc_barrier_t startlistening; 753 isc_barrier_t stoplistening; 754 755 /*% 756 * quota is the TCP client, attached when a TCP connection 757 * is established. pquota is a non-attached pointer to the 758 * TCP client quota, stored in listening sockets but only 759 * attached in connected sockets. 760 */ 761 isc_quota_t *quota; 762 isc_quota_t *pquota; 763 isc_quota_cb_t quotacb; 764 765 /*% 766 * Socket statistics 767 */ 768 const isc_statscounter_t *statsindex; 769 770 /*% 771 * TCP read/connect timeout timers. 772 */ 773 uv_timer_t read_timer; 774 uint64_t read_timeout; 775 uint64_t connect_timeout; 776 777 /*% 778 * TCP write timeout timer. 779 */ 780 uv_timer_t write_timer; 781 uint64_t write_timeout; 782 int64_t writes; 783 784 /*% outer socket is for 'wrapped' sockets - e.g. tcpdns in tcp */ 785 isc_nmsocket_t *outer; 786 787 /*% server socket for connections */ 788 isc_nmsocket_t *server; 789 790 /*% Child sockets for multi-socket setups */ 791 isc_nmsocket_t *children; 792 uint_fast32_t nchildren; 793 isc_sockaddr_t iface; 794 isc_nmhandle_t *statichandle; 795 isc_nmhandle_t *outerhandle; 796 797 /*% Extra data allocated at the end of each isc_nmhandle_t */ 798 size_t extrahandlesize; 799 800 /*% TCP backlog */ 801 int backlog; 802 803 /*% libuv data */ 804 uv_os_sock_t fd; 805 union uv_any_handle uv_handle; 806 807 /*% Peer address */ 808 isc_sockaddr_t peer; 809 810 /* Atomic */ 811 /*% Number of running (e.g. listening) child sockets */ 812 atomic_uint_fast32_t rchildren; 813 814 /*% 815 * Socket is active if it's listening, working, etc. If it's 816 * closing, then it doesn't make a sense, for example, to 817 * push handles or reqs for reuse. 818 */ 819 atomic_bool active; 820 atomic_bool destroying; 821 822 /*% 823 * Socket is closed if it's not active and all the possible 824 * callbacks were fired, there are no active handles, etc. 825 * If active==false but closed==false, that means the socket 826 * is closing. 827 */ 828 atomic_bool closing; 829 atomic_bool closed; 830 atomic_bool listening; 831 atomic_bool connecting; 832 atomic_bool connected; 833 bool accepting; 834 bool reading; 835 atomic_bool timedout; 836 isc_refcount_t references; 837 838 /*% 839 * Established an outgoing connection, as client not server. 840 */ 841 atomic_bool client; 842 843 /*% 844 * TCPDNS socket has been set not to pipeline. 845 */ 846 atomic_bool sequential; 847 848 /*% 849 * The socket is processing read callback, this is guard to not read 850 * data before the readcb is back. 851 */ 852 bool processing; 853 854 /*% 855 * A TCP socket has had isc_nm_pauseread() called. 856 */ 857 atomic_bool readpaused; 858 859 /*% 860 * A TCP or TCPDNS socket has been set to use the keepalive 861 * timeout instead of the default idle timeout. 862 */ 863 atomic_bool keepalive; 864 865 /*% 866 * 'spare' handles for that can be reused to avoid allocations, 867 * for UDP. 868 */ 869 isc_astack_t *inactivehandles; 870 isc_astack_t *inactivereqs; 871 872 /*% 873 * Used to wait for TCP listening events to complete, and 874 * for the number of running children to reach zero during 875 * shutdown. 876 * 877 * We use two condition variables to prevent the race where the netmgr 878 * threads would be able to finish and destroy the socket before it's 879 * unlocked by the isc_nm_listen<proto>() function. So, the flow is as 880 * follows: 881 * 882 * 1. parent thread creates all children sockets and passes then to 883 * netthreads, looks at the signaling variable and WAIT(cond) until 884 * the childrens are done initializing 885 * 886 * 2. the events get picked by netthreads, calls the libuv API (and 887 * either succeeds or fails) and WAIT(scond) until all other 888 * children sockets in netthreads are initialized and the listening 889 * socket lock is unlocked 890 * 891 * 3. the control is given back to the parent thread which now either 892 * returns success or shutdowns the listener if an error has 893 * occured in the children netthread 894 * 895 * NOTE: The other approach would be doing an extra attach to the parent 896 * listening socket, and then detach it in the parent thread, but that 897 * breaks the promise that once the libuv socket is initialized on the 898 * nmsocket, the nmsocket needs to be handled only by matching 899 * netthread, so in fact that would add a complexity in a way that 900 * isc__nmsocket_detach would have to be converted to use an 901 * asynchrounous netievent. 902 */ 903 isc_mutex_t lock; 904 isc_condition_t cond; 905 isc_condition_t scond; 906 907 /*% 908 * Used to pass a result back from listen or connect events. 909 */ 910 isc_result_t result; 911 912 /*% 913 * Current number of active handles. 914 */ 915 atomic_int_fast32_t ah; 916 917 /*% Buffer for TCPDNS processing */ 918 size_t buf_size; 919 size_t buf_len; 920 unsigned char *buf; 921 922 /*% 923 * This function will be called with handle->sock 924 * as the argument whenever a handle's references drop 925 * to zero, after its reset callback has been called. 926 */ 927 isc_nm_closehandlecb_t closehandle_cb; 928 929 isc_nmhandle_t *recv_handle; 930 isc_nm_recv_cb_t recv_cb; 931 void *recv_cbarg; 932 bool recv_read; 933 934 isc_nm_cb_t connect_cb; 935 void *connect_cbarg; 936 937 isc_nm_accept_cb_t accept_cb; 938 void *accept_cbarg; 939 940 atomic_int_fast32_t active_child_connections; 941 942 #ifdef NETMGR_TRACE 943 void *backtrace[TRACE_SIZE]; 944 int backtrace_size; 945 LINK(isc_nmsocket_t) active_link; 946 ISC_LIST(isc_nmhandle_t) active_handles; 947 #endif 948 }; 949 950 bool 951 isc__nm_in_netthread(void); 952 /*% 953 * Returns 'true' if we're in the network thread. 954 */ 955 956 void 957 isc__nm_maybe_enqueue_ievent(isc__networker_t *worker, isc__netievent_t *event); 958 /*%< 959 * If the caller is already in the matching nmthread, process the netievent 960 * directly, if not enqueue using isc__nm_enqueue_ievent(). 961 */ 962 963 void 964 isc__nm_enqueue_ievent(isc__networker_t *worker, isc__netievent_t *event); 965 /*%< 966 * Enqueue an ievent onto a specific worker queue. (This the only safe 967 * way to use an isc__networker_t from another thread.) 968 */ 969 970 void 971 isc__nm_free_uvbuf(isc_nmsocket_t *sock, const uv_buf_t *buf); 972 /*%< 973 * Free a buffer allocated for a receive operation. 974 * 975 * Note that as currently implemented, this doesn't actually 976 * free anything, marks the isc__networker's UDP receive buffer 977 * as "not in use". 978 */ 979 980 isc_nmhandle_t * 981 isc___nmhandle_get(isc_nmsocket_t *sock, isc_sockaddr_t *peer, 982 isc_sockaddr_t *local FLARG); 983 /*%< 984 * Get a handle for the socket 'sock', allocating a new one 985 * if there isn't one available in 'sock->inactivehandles'. 986 * 987 * If 'peer' is not NULL, set the handle's peer address to 'peer', 988 * otherwise set it to 'sock->peer'. 989 * 990 * If 'local' is not NULL, set the handle's local address to 'local', 991 * otherwise set it to 'sock->iface->addr'. 992 * 993 * 'sock' will be attached to 'handle->sock'. The caller may need 994 * to detach the socket afterward. 995 */ 996 997 isc__nm_uvreq_t * 998 isc___nm_uvreq_get(isc_nm_t *mgr, isc_nmsocket_t *sock FLARG); 999 /*%< 1000 * Get a UV request structure for the socket 'sock', allocating a 1001 * new one if there isn't one available in 'sock->inactivereqs'. 1002 */ 1003 1004 void 1005 isc___nm_uvreq_put(isc__nm_uvreq_t **req, isc_nmsocket_t *sock FLARG); 1006 /*%< 1007 * Completes the use of a UV request structure, setting '*req' to NULL. 1008 * 1009 * The UV request is pushed onto the 'sock->inactivereqs' stack or, 1010 * if that doesn't work, freed. 1011 */ 1012 1013 void 1014 isc___nmsocket_init(isc_nmsocket_t *sock, isc_nm_t *mgr, isc_nmsocket_type type, 1015 isc_sockaddr_t *iface FLARG); 1016 /*%< 1017 * Initialize socket 'sock', attach it to 'mgr', and set it to type 'type' 1018 * and its interface to 'iface'. 1019 */ 1020 1021 void 1022 isc___nmsocket_attach(isc_nmsocket_t *sock, isc_nmsocket_t **target FLARG); 1023 /*%< 1024 * Attach to a socket, increasing refcount 1025 */ 1026 1027 void 1028 isc___nmsocket_detach(isc_nmsocket_t **socketp FLARG); 1029 /*%< 1030 * Detach from socket, decreasing refcount and possibly destroying the 1031 * socket if it's no longer referenced. 1032 */ 1033 1034 void 1035 isc___nmsocket_prep_destroy(isc_nmsocket_t *sock FLARG); 1036 /*%< 1037 * Market 'sock' as inactive, close it if necessary, and destroy it 1038 * if there are no remaining references or active handles. 1039 */ 1040 1041 void 1042 isc__nmsocket_shutdown(isc_nmsocket_t *sock); 1043 /*%< 1044 * Initiate the socket shutdown which actively calls the active 1045 * callbacks. 1046 */ 1047 1048 bool 1049 isc__nmsocket_active(isc_nmsocket_t *sock); 1050 /*%< 1051 * Determine whether 'sock' is active by checking 'sock->active' 1052 * or, for child sockets, 'sock->parent->active'. 1053 */ 1054 1055 bool 1056 isc__nmsocket_deactivate(isc_nmsocket_t *sock); 1057 /*%< 1058 * @brief Deactivate active socket 1059 * 1060 * Atomically deactive the socket by setting @p sock->active or, for child 1061 * sockets, @p sock->parent->active to @c false 1062 * 1063 * @param[in] sock - valid nmsocket 1064 * @return @c false if the socket was already inactive, @c true otherwise 1065 */ 1066 1067 void 1068 isc__nmsocket_clearcb(isc_nmsocket_t *sock); 1069 /*%< 1070 * Clear the recv and accept callbacks in 'sock'. 1071 */ 1072 1073 void 1074 isc__nmsocket_timer_stop(isc_nmsocket_t *sock); 1075 void 1076 isc__nmsocket_timer_start(isc_nmsocket_t *sock); 1077 void 1078 isc__nmsocket_timer_restart(isc_nmsocket_t *sock); 1079 bool 1080 isc__nmsocket_timer_running(isc_nmsocket_t *sock); 1081 /*%< 1082 * Start/stop/restart/check the timeout on the socket 1083 */ 1084 1085 void 1086 isc__nm_connectcb(isc_nmsocket_t *sock, isc__nm_uvreq_t *uvreq, 1087 isc_result_t eresult, bool async); 1088 1089 void 1090 isc__nm_async_connectcb(isc__networker_t *worker, isc__netievent_t *ev0); 1091 /*%< 1092 * Issue a connect callback on the socket, used to call the callback 1093 */ 1094 1095 void 1096 isc__nm_readcb(isc_nmsocket_t *sock, isc__nm_uvreq_t *uvreq, 1097 isc_result_t eresult); 1098 void 1099 isc__nm_async_readcb(isc__networker_t *worker, isc__netievent_t *ev0); 1100 1101 /*%< 1102 * Issue a read callback on the socket, used to call the callback 1103 * on failed conditions when the event can't be scheduled on the uv loop. 1104 * 1105 */ 1106 1107 void 1108 isc__nm_sendcb(isc_nmsocket_t *sock, isc__nm_uvreq_t *uvreq, 1109 isc_result_t eresult, bool async); 1110 void 1111 isc__nm_async_sendcb(isc__networker_t *worker, isc__netievent_t *ev0); 1112 /*%< 1113 * Issue a write callback on the socket, used to call the callback 1114 * on failed conditions when the event can't be scheduled on the uv loop. 1115 */ 1116 1117 void 1118 isc__nm_async_shutdown(isc__networker_t *worker, isc__netievent_t *ev0); 1119 /*%< 1120 * Walk through all uv handles, get the underlying sockets and issue 1121 * close on them. 1122 */ 1123 1124 void 1125 isc__nm_udp_send(isc_nmhandle_t *handle, const isc_region_t *region, 1126 isc_nm_cb_t cb, void *cbarg); 1127 /*%< 1128 * Back-end implementation of isc_nm_send() for UDP handles. 1129 */ 1130 1131 void 1132 isc__nm_udp_read(isc_nmhandle_t *handle, isc_nm_recv_cb_t cb, void *cbarg); 1133 /* 1134 * Back-end implementation of isc_nm_read() for UDP handles. 1135 */ 1136 1137 void 1138 isc__nm_udp_close(isc_nmsocket_t *sock); 1139 /*%< 1140 * Close a UDP socket. 1141 */ 1142 1143 void 1144 isc__nm_udp_cancelread(isc_nmhandle_t *handle); 1145 /*%< 1146 * Stop reading on a connected UDP handle. 1147 */ 1148 1149 void 1150 isc__nm_udp_shutdown(isc_nmsocket_t *sock); 1151 /*%< 1152 * Called during the shutdown process to close and clean up connected 1153 * sockets. 1154 */ 1155 1156 void 1157 isc__nm_udp_stoplistening(isc_nmsocket_t *sock); 1158 /*%< 1159 * Stop listening on 'sock'. 1160 */ 1161 1162 void 1163 isc__nm_udp_settimeout(isc_nmhandle_t *handle, uint32_t timeout); 1164 /*%< 1165 * Set or clear the recv timeout for the UDP socket associated with 'handle'. 1166 */ 1167 1168 void 1169 isc__nm_async_udplisten(isc__networker_t *worker, isc__netievent_t *ev0); 1170 void 1171 isc__nm_async_udpconnect(isc__networker_t *worker, isc__netievent_t *ev0); 1172 void 1173 isc__nm_async_udpstop(isc__networker_t *worker, isc__netievent_t *ev0); 1174 void 1175 isc__nm_async_udpsend(isc__networker_t *worker, isc__netievent_t *ev0); 1176 void 1177 isc__nm_async_udpread(isc__networker_t *worker, isc__netievent_t *ev0); 1178 void 1179 isc__nm_async_udpcancel(isc__networker_t *worker, isc__netievent_t *ev0); 1180 void 1181 isc__nm_async_udpclose(isc__networker_t *worker, isc__netievent_t *ev0); 1182 /*%< 1183 * Callback handlers for asynchronous UDP events (listen, stoplisten, send). 1184 */ 1185 1186 void 1187 isc__nm_tcp_send(isc_nmhandle_t *handle, const isc_region_t *region, 1188 isc_nm_cb_t cb, void *cbarg); 1189 /*%< 1190 * Back-end implementation of isc_nm_send() for TCP handles. 1191 */ 1192 1193 void 1194 isc__nm_tcp_read(isc_nmhandle_t *handle, isc_nm_recv_cb_t cb, void *cbarg); 1195 /* 1196 * Back-end implementation of isc_nm_read() for TCP handles. 1197 */ 1198 1199 void 1200 isc__nm_tcp_close(isc_nmsocket_t *sock); 1201 /*%< 1202 * Close a TCP socket. 1203 */ 1204 void 1205 isc__nm_tcp_pauseread(isc_nmhandle_t *handle); 1206 /*%< 1207 * Pause reading on this handle, while still remembering the callback. 1208 */ 1209 1210 void 1211 isc__nm_tcp_resumeread(isc_nmhandle_t *handle); 1212 /*%< 1213 * Resume reading from socket. 1214 * 1215 */ 1216 1217 void 1218 isc__nm_tcp_shutdown(isc_nmsocket_t *sock); 1219 /*%< 1220 * Called during the shutdown process to close and clean up connected 1221 * sockets. 1222 */ 1223 1224 void 1225 isc__nm_tcp_cancelread(isc_nmhandle_t *handle); 1226 /*%< 1227 * Stop reading on a connected TCP handle. 1228 */ 1229 1230 void 1231 isc__nm_tcp_stoplistening(isc_nmsocket_t *sock); 1232 /*%< 1233 * Stop listening on 'sock'. 1234 */ 1235 1236 int_fast32_t 1237 isc__nm_tcp_listener_nactive(isc_nmsocket_t *sock); 1238 /*%< 1239 * Returns the number of active connections for the TCP listener socket. 1240 */ 1241 1242 void 1243 isc__nm_tcp_settimeout(isc_nmhandle_t *handle, uint32_t timeout); 1244 /*%< 1245 * Set the read timeout for the TCP socket associated with 'handle'. 1246 */ 1247 1248 void 1249 isc__nm_async_tcpconnect(isc__networker_t *worker, isc__netievent_t *ev0); 1250 void 1251 isc__nm_async_tcplisten(isc__networker_t *worker, isc__netievent_t *ev0); 1252 void 1253 isc__nm_async_tcpaccept(isc__networker_t *worker, isc__netievent_t *ev0); 1254 void 1255 isc__nm_async_tcpstop(isc__networker_t *worker, isc__netievent_t *ev0); 1256 void 1257 isc__nm_async_tcpsend(isc__networker_t *worker, isc__netievent_t *ev0); 1258 void 1259 isc__nm_async_startread(isc__networker_t *worker, isc__netievent_t *ev0); 1260 void 1261 isc__nm_async_pauseread(isc__networker_t *worker, isc__netievent_t *ev0); 1262 void 1263 isc__nm_async_tcpstartread(isc__networker_t *worker, isc__netievent_t *ev0); 1264 void 1265 isc__nm_async_tcppauseread(isc__networker_t *worker, isc__netievent_t *ev0); 1266 void 1267 isc__nm_async_tcpcancel(isc__networker_t *worker, isc__netievent_t *ev0); 1268 void 1269 isc__nm_async_tcpclose(isc__networker_t *worker, isc__netievent_t *ev0); 1270 /*%< 1271 * Callback handlers for asynchronous TCP events (connect, listen, 1272 * stoplisten, send, read, pause, close). 1273 */ 1274 1275 void 1276 isc__nm_async_tcpdnsaccept(isc__networker_t *worker, isc__netievent_t *ev0); 1277 void 1278 isc__nm_async_tcpdnsconnect(isc__networker_t *worker, isc__netievent_t *ev0); 1279 void 1280 isc__nm_async_tcpdnslisten(isc__networker_t *worker, isc__netievent_t *ev0); 1281 1282 void 1283 isc__nm_tcpdns_send(isc_nmhandle_t *handle, isc_region_t *region, 1284 isc_nm_cb_t cb, void *cbarg); 1285 /*%< 1286 * Back-end implementation of isc_nm_send() for TCPDNS handles. 1287 */ 1288 1289 void 1290 isc__nm_tcpdns_shutdown(isc_nmsocket_t *sock); 1291 1292 void 1293 isc__nm_tcpdns_close(isc_nmsocket_t *sock); 1294 /*%< 1295 * Close a TCPDNS socket. 1296 */ 1297 1298 void 1299 isc__nm_tcpdns_stoplistening(isc_nmsocket_t *sock); 1300 /*%< 1301 * Stop listening on 'sock'. 1302 */ 1303 1304 void 1305 isc__nm_tcpdns_settimeout(isc_nmhandle_t *handle, uint32_t timeout); 1306 /*%< 1307 * Set the read timeout and reset the timer for the TCPDNS socket 1308 * associated with 'handle', and the TCP socket it wraps around. 1309 */ 1310 1311 void 1312 isc__nm_async_tcpdnsaccept(isc__networker_t *worker, isc__netievent_t *ev0); 1313 void 1314 isc__nm_async_tcpdnsconnect(isc__networker_t *worker, isc__netievent_t *ev0); 1315 void 1316 isc__nm_async_tcpdnslisten(isc__networker_t *worker, isc__netievent_t *ev0); 1317 void 1318 isc__nm_async_tcpdnscancel(isc__networker_t *worker, isc__netievent_t *ev0); 1319 void 1320 isc__nm_async_tcpdnsclose(isc__networker_t *worker, isc__netievent_t *ev0); 1321 void 1322 isc__nm_async_tcpdnssend(isc__networker_t *worker, isc__netievent_t *ev0); 1323 void 1324 isc__nm_async_tcpdnsstop(isc__networker_t *worker, isc__netievent_t *ev0); 1325 void 1326 isc__nm_async_tcpdnsread(isc__networker_t *worker, isc__netievent_t *ev0); 1327 /*%< 1328 * Callback handlers for asynchronous TCPDNS events. 1329 */ 1330 1331 void 1332 isc__nm_tcpdns_read(isc_nmhandle_t *handle, isc_nm_recv_cb_t cb, void *cbarg); 1333 /* 1334 * Back-end implementation of isc_nm_read() for TCPDNS handles. 1335 */ 1336 1337 void 1338 isc__nm_tcpdns_cancelread(isc_nmhandle_t *handle); 1339 /*%< 1340 * Stop reading on a connected TCPDNS handle. 1341 */ 1342 1343 #define isc__nm_uverr2result(x) \ 1344 isc___nm_uverr2result(x, true, __FILE__, __LINE__, __func__) 1345 isc_result_t 1346 isc___nm_uverr2result(int uverr, bool dolog, const char *file, 1347 unsigned int line, const char *func); 1348 /*%< 1349 * Convert a libuv error value into an isc_result_t. The 1350 * list of supported error values is not complete; new users 1351 * of this function should add any expected errors that are 1352 * not already there. 1353 */ 1354 1355 bool 1356 isc__nm_acquire_interlocked(isc_nm_t *mgr); 1357 /*%< 1358 * Try to acquire interlocked state; return true if successful. 1359 */ 1360 1361 void 1362 isc__nm_drop_interlocked(isc_nm_t *mgr); 1363 /*%< 1364 * Drop interlocked state; signal waiters. 1365 */ 1366 1367 void 1368 isc__nm_acquire_interlocked_force(isc_nm_t *mgr); 1369 /*%< 1370 * Actively wait for interlocked state. 1371 */ 1372 1373 void 1374 isc__nm_incstats(isc_nm_t *mgr, isc_statscounter_t counterid); 1375 /*%< 1376 * Increment socket-related statistics counters. 1377 */ 1378 1379 void 1380 isc__nm_decstats(isc_nm_t *mgr, isc_statscounter_t counterid); 1381 /*%< 1382 * Decrement socket-related statistics counters. 1383 */ 1384 1385 isc_result_t 1386 isc__nm_socket(int domain, int type, int protocol, uv_os_sock_t *sockp); 1387 /*%< 1388 * Platform independent socket() version 1389 */ 1390 1391 void 1392 isc__nm_closesocket(uv_os_sock_t sock); 1393 /*%< 1394 * Platform independent closesocket() version 1395 */ 1396 1397 isc_result_t 1398 isc__nm_socket_freebind(uv_os_sock_t fd, sa_family_t sa_family); 1399 /*%< 1400 * Set the IP_FREEBIND (or equivalent) socket option on the uv_handle 1401 */ 1402 1403 isc_result_t 1404 isc__nm_socket_reuse(uv_os_sock_t fd); 1405 /*%< 1406 * Set the SO_REUSEADDR or SO_REUSEPORT (or equivalent) socket option on the fd 1407 */ 1408 1409 isc_result_t 1410 isc__nm_socket_reuse_lb(uv_os_sock_t fd); 1411 /*%< 1412 * Set the SO_REUSEPORT_LB (or equivalent) socket option on the fd 1413 */ 1414 1415 isc_result_t 1416 isc__nm_socket_incoming_cpu(uv_os_sock_t fd); 1417 /*%< 1418 * Set the SO_INCOMING_CPU socket option on the fd if available 1419 */ 1420 1421 isc_result_t 1422 isc__nm_socket_disable_pmtud(uv_os_sock_t fd, sa_family_t sa_family); 1423 /*%< 1424 * Disable the Path MTU Discovery, either by disabling IP(V6)_DONTFRAG socket 1425 * option, or setting the IP(V6)_MTU_DISCOVER socket option to IP_PMTUDISC_OMIT 1426 */ 1427 1428 isc_result_t 1429 isc__nm_socket_connectiontimeout(uv_os_sock_t fd, int timeout_ms); 1430 /*%< 1431 * Set the connection timeout in milliseconds, on non-Linux platforms, 1432 * the minimum value must be at least 1000 (1 second). 1433 */ 1434 1435 isc_result_t 1436 isc__nm_socket_tcp_nodelay(uv_os_sock_t fd); 1437 /*%< 1438 * Disables Nagle's algorithm on a TCP socket (sets TCP_NODELAY). 1439 */ 1440 1441 /* 1442 * typedef all the netievent types 1443 */ 1444 1445 NETIEVENT_SOCKET_TYPE(close); 1446 NETIEVENT_SOCKET_TYPE(tcpclose); 1447 NETIEVENT_SOCKET_TYPE(tcplisten); 1448 NETIEVENT_SOCKET_TYPE(tcppauseread); 1449 NETIEVENT_SOCKET_TYPE(tcpstop); 1450 NETIEVENT_SOCKET_TYPE(udpclose); 1451 NETIEVENT_SOCKET_TYPE(udplisten); 1452 NETIEVENT_SOCKET_TYPE(udpread); 1453 /* NETIEVENT_SOCKET_TYPE(udpsend); */ /* unique type, defined independently */ 1454 NETIEVENT_SOCKET_TYPE(udpstop); 1455 1456 NETIEVENT_SOCKET_TYPE(tcpdnsclose); 1457 NETIEVENT_SOCKET_TYPE(tcpdnsread); 1458 NETIEVENT_SOCKET_TYPE(tcpdnsstop); 1459 NETIEVENT_SOCKET_TYPE(tcpdnslisten); 1460 NETIEVENT_SOCKET_REQ_TYPE(tcpdnsconnect); 1461 NETIEVENT_SOCKET_REQ_TYPE(tcpdnssend); 1462 NETIEVENT_SOCKET_HANDLE_TYPE(tcpdnscancel); 1463 NETIEVENT_SOCKET_QUOTA_TYPE(tcpdnsaccept); 1464 1465 NETIEVENT_SOCKET_REQ_TYPE(tcpconnect); 1466 NETIEVENT_SOCKET_REQ_TYPE(tcpsend); 1467 NETIEVENT_SOCKET_TYPE(tcpstartread); 1468 NETIEVENT_SOCKET_REQ_TYPE(udpconnect); 1469 1470 NETIEVENT_SOCKET_REQ_RESULT_TYPE(connectcb); 1471 NETIEVENT_SOCKET_REQ_RESULT_TYPE(readcb); 1472 NETIEVENT_SOCKET_REQ_RESULT_TYPE(sendcb); 1473 1474 NETIEVENT_SOCKET_HANDLE_TYPE(detach); 1475 NETIEVENT_SOCKET_HANDLE_TYPE(tcpcancel); 1476 NETIEVENT_SOCKET_HANDLE_TYPE(udpcancel); 1477 1478 NETIEVENT_SOCKET_QUOTA_TYPE(tcpaccept); 1479 1480 NETIEVENT_TYPE(pause); 1481 NETIEVENT_TYPE(resume); 1482 NETIEVENT_TYPE(shutdown); 1483 NETIEVENT_TYPE(stop); 1484 1485 NETIEVENT_TASK_TYPE(task); 1486 NETIEVENT_TASK_TYPE(privilegedtask); 1487 1488 /* Now declared the helper functions */ 1489 1490 NETIEVENT_SOCKET_DECL(close); 1491 NETIEVENT_SOCKET_DECL(tcpclose); 1492 NETIEVENT_SOCKET_DECL(tcplisten); 1493 NETIEVENT_SOCKET_DECL(tcppauseread); 1494 NETIEVENT_SOCKET_DECL(tcpstartread); 1495 NETIEVENT_SOCKET_DECL(tcpstop); 1496 NETIEVENT_SOCKET_DECL(udpclose); 1497 NETIEVENT_SOCKET_DECL(udplisten); 1498 NETIEVENT_SOCKET_DECL(udpread); 1499 NETIEVENT_SOCKET_DECL(udpsend); 1500 NETIEVENT_SOCKET_DECL(udpstop); 1501 1502 NETIEVENT_SOCKET_DECL(tcpdnsclose); 1503 NETIEVENT_SOCKET_DECL(tcpdnsread); 1504 NETIEVENT_SOCKET_DECL(tcpdnsstop); 1505 NETIEVENT_SOCKET_DECL(tcpdnslisten); 1506 NETIEVENT_SOCKET_REQ_DECL(tcpdnsconnect); 1507 NETIEVENT_SOCKET_REQ_DECL(tcpdnssend); 1508 NETIEVENT_SOCKET_HANDLE_DECL(tcpdnscancel); 1509 NETIEVENT_SOCKET_QUOTA_DECL(tcpdnsaccept); 1510 1511 NETIEVENT_SOCKET_REQ_DECL(tcpconnect); 1512 NETIEVENT_SOCKET_REQ_DECL(tcpsend); 1513 NETIEVENT_SOCKET_REQ_DECL(udpconnect); 1514 1515 NETIEVENT_SOCKET_REQ_RESULT_DECL(connectcb); 1516 NETIEVENT_SOCKET_REQ_RESULT_DECL(readcb); 1517 NETIEVENT_SOCKET_REQ_RESULT_DECL(sendcb); 1518 1519 NETIEVENT_SOCKET_HANDLE_DECL(udpcancel); 1520 NETIEVENT_SOCKET_HANDLE_DECL(tcpcancel); 1521 NETIEVENT_SOCKET_DECL(detach); 1522 1523 NETIEVENT_SOCKET_QUOTA_DECL(tcpaccept); 1524 1525 NETIEVENT_DECL(pause); 1526 NETIEVENT_DECL(resume); 1527 NETIEVENT_DECL(shutdown); 1528 NETIEVENT_DECL(stop); 1529 1530 NETIEVENT_TASK_DECL(task); 1531 NETIEVENT_TASK_DECL(privilegedtask); 1532 1533 void 1534 isc__nm_udp_failed_read_cb(isc_nmsocket_t *sock, isc_result_t result); 1535 void 1536 isc__nm_tcp_failed_read_cb(isc_nmsocket_t *sock, isc_result_t result); 1537 void 1538 isc__nm_tcpdns_failed_read_cb(isc_nmsocket_t *sock, isc_result_t result); 1539 1540 isc_result_t 1541 isc__nm_tcpdns_processbuffer(isc_nmsocket_t *sock); 1542 1543 isc__nm_uvreq_t * 1544 isc__nm_get_read_req(isc_nmsocket_t *sock, isc_sockaddr_t *sockaddr); 1545 1546 void 1547 isc__nm_alloc_cb(uv_handle_t *handle, size_t size, uv_buf_t *buf); 1548 1549 void 1550 isc__nm_udp_read_cb(uv_udp_t *handle, ssize_t nrecv, const uv_buf_t *buf, 1551 const struct sockaddr *addr, unsigned flags); 1552 void 1553 isc__nm_tcp_read_cb(uv_stream_t *stream, ssize_t nread, const uv_buf_t *buf); 1554 void 1555 isc__nm_tcpdns_read_cb(uv_stream_t *stream, ssize_t nread, const uv_buf_t *buf); 1556 1557 void 1558 isc__nm_start_reading(isc_nmsocket_t *sock); 1559 void 1560 isc__nm_stop_reading(isc_nmsocket_t *sock); 1561 void 1562 isc__nm_process_sock_buffer(isc_nmsocket_t *sock); 1563 void 1564 isc__nm_resume_processing(void *arg); 1565 bool 1566 isc__nmsocket_closing(isc_nmsocket_t *sock); 1567 bool 1568 isc__nm_closing(isc_nmsocket_t *sock); 1569 1570 void 1571 isc__nm_alloc_dnsbuf(isc_nmsocket_t *sock, size_t len); 1572 1573 void 1574 isc__nm_failed_send_cb(isc_nmsocket_t *sock, isc__nm_uvreq_t *req, 1575 isc_result_t eresult); 1576 void 1577 isc__nm_failed_accept_cb(isc_nmsocket_t *sock, isc_result_t eresult); 1578 void 1579 isc__nm_failed_connect_cb(isc_nmsocket_t *sock, isc__nm_uvreq_t *req, 1580 isc_result_t eresult, bool async); 1581 void 1582 isc__nm_failed_read_cb(isc_nmsocket_t *sock, isc_result_t result, bool async); 1583 1584 void 1585 isc__nm_accept_connection_log(isc_result_t result, bool can_log_quota); 1586 1587 /* 1588 * Timeout callbacks 1589 */ 1590 void 1591 isc__nmsocket_connecttimeout_cb(uv_timer_t *timer); 1592 void 1593 isc__nmsocket_readtimeout_cb(uv_timer_t *timer); 1594 void 1595 isc__nmsocket_writetimeout_cb(uv_timer_t *timer); 1596 1597 /*%< 1598 * 1599 * Maximum number of simultaneous handles in flight supported for a single 1600 * connected TCPDNS socket. This value was chosen arbitrarily, and may be 1601 * changed in the future. 1602 */ 1603 #define STREAM_CLIENTS_PER_CONN 23 1604 1605 #define UV_RUNTIME_CHECK(func, ret) \ 1606 if (ret != 0) { \ 1607 isc_error_fatal(__FILE__, __LINE__, "%s failed: %s\n", #func, \ 1608 uv_strerror(ret)); \ 1609 } 1610