1 /* $NetBSD: netmgr-int.h,v 1.8 2022/09/23 12:15:34 christos Exp $ */ 2 3 /* 4 * Copyright (C) Internet Systems Consortium, Inc. ("ISC") 5 * 6 * SPDX-License-Identifier: MPL-2.0 7 * 8 * This Source Code Form is subject to the terms of the Mozilla Public 9 * License, v. 2.0. If a copy of the MPL was not distributed with this 10 * file, you can obtain one at https://mozilla.org/MPL/2.0/. 11 * 12 * See the COPYRIGHT file distributed with this work for additional 13 * information regarding copyright ownership. 14 */ 15 16 #pragma once 17 18 #include <unistd.h> 19 #include <uv.h> 20 21 #include <openssl/err.h> 22 #include <openssl/ssl.h> 23 24 #include <isc/astack.h> 25 #include <isc/atomic.h> 26 #include <isc/barrier.h> 27 #include <isc/buffer.h> 28 #include <isc/condition.h> 29 #include <isc/magic.h> 30 #include <isc/mem.h> 31 #include <isc/netmgr.h> 32 #include <isc/quota.h> 33 #include <isc/random.h> 34 #include <isc/refcount.h> 35 #include <isc/region.h> 36 #include <isc/result.h> 37 #include <isc/rwlock.h> 38 #include <isc/sockaddr.h> 39 #include <isc/stats.h> 40 #include <isc/thread.h> 41 #include <isc/util.h> 42 43 #include "uv-compat.h" 44 45 #define ISC_NETMGR_TID_UNKNOWN -1 46 47 /* Must be different from ISC_NETMGR_TID_UNKNOWN */ 48 #define ISC_NETMGR_NON_INTERLOCKED -2 49 50 /* 51 * Receive buffers 52 */ 53 #if HAVE_DECL_UV_UDP_MMSG_CHUNK 54 /* 55 * The value 20 here is UV__MMSG_MAXWIDTH taken from the current libuv source, 56 * libuv will not receive more that 20 datagrams in a single recvmmsg call. 57 */ 58 #define ISC_NETMGR_UDP_RECVBUF_SIZE (20 * UINT16_MAX) 59 #else 60 /* 61 * A single DNS message size 62 */ 63 #define ISC_NETMGR_UDP_RECVBUF_SIZE UINT16_MAX 64 #endif 65 66 /* 67 * The TCP receive buffer can fit one maximum sized DNS message plus its size, 68 * the receive buffer here affects TCP, DoT and DoH. 69 */ 70 #define ISC_NETMGR_TCP_RECVBUF_SIZE (sizeof(uint16_t) + UINT16_MAX) 71 72 /* Pick the larger buffer */ 73 #define ISC_NETMGR_RECVBUF_SIZE \ 74 (ISC_NETMGR_UDP_RECVBUF_SIZE >= ISC_NETMGR_TCP_RECVBUF_SIZE \ 75 ? ISC_NETMGR_UDP_RECVBUF_SIZE \ 76 : ISC_NETMGR_TCP_RECVBUF_SIZE) 77 78 /* 79 * Send buffer 80 */ 81 #define ISC_NETMGR_SENDBUF_SIZE (sizeof(uint16_t) + UINT16_MAX) 82 83 /*% 84 * Regular TCP buffer size. 85 */ 86 #define NM_REG_BUF 4096 87 88 /*% 89 * Larger buffer for when the regular one isn't enough; this will 90 * hold two full DNS packets with lengths. netmgr receives 64k at 91 * most in TCPDNS connections, so there's no risk of overrun 92 * when using a buffer this size. 93 */ 94 #define NM_BIG_BUF ISC_NETMGR_TCP_RECVBUF_SIZE * 2 95 96 /* 97 * Define NETMGR_TRACE to activate tracing of handles and sockets. 98 * This will impair performance but enables us to quickly determine, 99 * if netmgr resources haven't been cleaned up on shutdown, which ones 100 * are still in use. 101 */ 102 #ifdef NETMGR_TRACE 103 #define TRACE_SIZE 8 104 105 void 106 isc__nm_dump_active(isc_nm_t *nm); 107 108 #if defined(__linux__) 109 #include <syscall.h> 110 #define gettid() (uint32_t) syscall(SYS_gettid) 111 #elif defined(_WIN32) 112 #define gettid() (uint32_t) GetCurrentThreadId() 113 #else 114 #define gettid() (uint32_t) pthread_self() 115 #endif 116 117 #ifdef NETMGR_TRACE_VERBOSE 118 #define NETMGR_TRACE_LOG(format, ...) \ 119 fprintf(stderr, "%" PRIu32 ":%d:%s:%u:%s:" format, gettid(), \ 120 isc_nm_tid(), file, line, func, __VA_ARGS__) 121 #else 122 #define NETMGR_TRACE_LOG(format, ...) \ 123 (void)file; \ 124 (void)line; \ 125 (void)func; 126 #endif 127 128 #define FLARG_PASS , file, line, func 129 #define FLARG \ 130 , const char *file __attribute__((unused)), \ 131 unsigned int line __attribute__((unused)), \ 132 const char *func __attribute__((unused)) 133 #define FLARG_IEVENT(ievent) \ 134 const char *file = ievent->file; \ 135 unsigned int line = ievent->line; \ 136 const char *func = ievent->func; 137 #define FLARG_IEVENT_PASS(ievent) \ 138 ievent->file = file; \ 139 ievent->line = line; \ 140 ievent->func = func; 141 #define isc__nm_uvreq_get(req, sock) \ 142 isc___nm_uvreq_get(req, sock, __FILE__, __LINE__, __func__) 143 #define isc__nm_uvreq_put(req, sock) \ 144 isc___nm_uvreq_put(req, sock, __FILE__, __LINE__, __func__) 145 #define isc__nmsocket_init(sock, mgr, type, iface) \ 146 isc___nmsocket_init(sock, mgr, type, iface, __FILE__, __LINE__, \ 147 __func__) 148 #define isc__nmsocket_put(sockp) \ 149 isc___nmsocket_put(sockp, __FILE__, __LINE__, __func__) 150 #define isc__nmsocket_attach(sock, target) \ 151 isc___nmsocket_attach(sock, target, __FILE__, __LINE__, __func__) 152 #define isc__nmsocket_detach(socketp) \ 153 isc___nmsocket_detach(socketp, __FILE__, __LINE__, __func__) 154 #define isc__nmsocket_close(socketp) \ 155 isc___nmsocket_close(socketp, __FILE__, __LINE__, __func__) 156 #define isc__nmhandle_get(sock, peer, local) \ 157 isc___nmhandle_get(sock, peer, local, __FILE__, __LINE__, __func__) 158 #define isc__nmsocket_prep_destroy(sock) \ 159 isc___nmsocket_prep_destroy(sock, __FILE__, __LINE__, __func__) 160 #else 161 #define NETMGR_TRACE_LOG(format, ...) 162 163 #define FLARG_PASS 164 #define FLARG 165 #define FLARG_IEVENT(ievent) 166 #define FLARG_IEVENT_PASS(ievent) 167 #define isc__nm_uvreq_get(req, sock) isc___nm_uvreq_get(req, sock) 168 #define isc__nm_uvreq_put(req, sock) isc___nm_uvreq_put(req, sock) 169 #define isc__nmsocket_init(sock, mgr, type, iface) \ 170 isc___nmsocket_init(sock, mgr, type, iface) 171 #define isc__nmsocket_put(sockp) isc___nmsocket_put(sockp) 172 #define isc__nmsocket_attach(sock, target) isc___nmsocket_attach(sock, target) 173 #define isc__nmsocket_detach(socketp) isc___nmsocket_detach(socketp) 174 #define isc__nmsocket_close(socketp) isc___nmsocket_close(socketp) 175 #define isc__nmhandle_get(sock, peer, local) \ 176 isc___nmhandle_get(sock, peer, local) 177 #define isc__nmsocket_prep_destroy(sock) isc___nmsocket_prep_destroy(sock) 178 #endif 179 180 /* 181 * Queue types in the order of processing priority. 182 */ 183 typedef enum { 184 NETIEVENT_PRIORITY = 0, 185 NETIEVENT_PRIVILEGED = 1, 186 NETIEVENT_TASK = 2, 187 NETIEVENT_NORMAL = 3, 188 NETIEVENT_MAX = 4, 189 } netievent_type_t; 190 191 typedef struct isc__nm_uvreq isc__nm_uvreq_t; 192 typedef struct isc__netievent isc__netievent_t; 193 194 typedef ISC_LIST(isc__netievent_t) isc__netievent_list_t; 195 196 typedef struct ievent { 197 isc_mutex_t lock; 198 isc_condition_t cond; 199 isc__netievent_list_t list; 200 } ievent_t; 201 202 /* 203 * Single network event loop worker. 204 */ 205 typedef struct isc__networker { 206 isc_nm_t *mgr; 207 int id; /* thread id */ 208 uv_loop_t loop; /* libuv loop structure */ 209 uv_async_t async; /* async channel to send 210 * data to this networker */ 211 bool paused; 212 bool finished; 213 isc_thread_t thread; 214 ievent_t ievents[NETIEVENT_MAX]; 215 216 isc_refcount_t references; 217 atomic_int_fast64_t pktcount; 218 char *recvbuf; 219 char *sendbuf; 220 bool recvbuf_inuse; 221 } isc__networker_t; 222 223 /* 224 * A general handle for a connection bound to a networker. For UDP 225 * connections we have peer address here, so both TCP and UDP can be 226 * handled with a simple send-like function 227 */ 228 #define NMHANDLE_MAGIC ISC_MAGIC('N', 'M', 'H', 'D') 229 #define VALID_NMHANDLE(t) \ 230 (ISC_MAGIC_VALID(t, NMHANDLE_MAGIC) && \ 231 atomic_load(&(t)->references) > 0) 232 233 typedef void (*isc__nm_closecb)(isc_nmhandle_t *); 234 235 struct isc_nmhandle { 236 int magic; 237 isc_refcount_t references; 238 239 /* 240 * The socket is not 'attached' in the traditional 241 * reference-counting sense. Instead, we keep all handles in an 242 * array in the socket object. This way, we don't have circular 243 * dependencies and we can close all handles when we're destroying 244 * the socket. 245 */ 246 isc_nmsocket_t *sock; 247 248 isc_sockaddr_t peer; 249 isc_sockaddr_t local; 250 isc_nm_opaquecb_t doreset; /* reset extra callback, external */ 251 isc_nm_opaquecb_t dofree; /* free extra callback, external */ 252 #ifdef NETMGR_TRACE 253 void *backtrace[TRACE_SIZE]; 254 int backtrace_size; 255 LINK(isc_nmhandle_t) active_link; 256 #endif 257 void *opaque; 258 char extra[]; 259 }; 260 261 typedef enum isc__netievent_type { 262 netievent_udpconnect, 263 netievent_udpclose, 264 netievent_udpsend, 265 netievent_udpread, 266 netievent_udpcancel, 267 268 netievent_tcpconnect, 269 netievent_tcpclose, 270 netievent_tcpsend, 271 netievent_tcpstartread, 272 netievent_tcppauseread, 273 netievent_tcpaccept, 274 netievent_tcpcancel, 275 276 netievent_tcpdnsaccept, 277 netievent_tcpdnsconnect, 278 netievent_tcpdnsclose, 279 netievent_tcpdnssend, 280 netievent_tcpdnsread, 281 netievent_tcpdnscancel, 282 283 netievent_shutdown, 284 netievent_stop, 285 netievent_pause, 286 287 netievent_connectcb, 288 netievent_readcb, 289 netievent_sendcb, 290 291 netievent_task, 292 netievent_privilegedtask, 293 294 /* 295 * event type values higher than this will be treated 296 * as high-priority events, which can be processed 297 * while the netmgr is pausing or paused. 298 */ 299 netievent_prio = 0xff, 300 301 netievent_udplisten, 302 netievent_udpstop, 303 netievent_tcplisten, 304 netievent_tcpstop, 305 netievent_tcpdnslisten, 306 netievent_tcpdnsstop, 307 308 netievent_resume, 309 netievent_detach, 310 netievent_close, 311 } isc__netievent_type; 312 313 typedef union { 314 isc_nm_recv_cb_t recv; 315 isc_nm_cb_t send; 316 isc_nm_cb_t connect; 317 isc_nm_accept_cb_t accept; 318 } isc__nm_cb_t; 319 320 /* 321 * Wrapper around uv_req_t with 'our' fields in it. req->data should 322 * always point to its parent. Note that we always allocate more than 323 * sizeof(struct) because we make room for different req types; 324 */ 325 #define UVREQ_MAGIC ISC_MAGIC('N', 'M', 'U', 'R') 326 #define VALID_UVREQ(t) ISC_MAGIC_VALID(t, UVREQ_MAGIC) 327 328 struct isc__nm_uvreq { 329 int magic; 330 isc_nmsocket_t *sock; 331 isc_nmhandle_t *handle; 332 char tcplen[2]; /* The TCP DNS message length */ 333 uv_buf_t uvbuf; /* translated isc_region_t, to be 334 * sent or received */ 335 isc_sockaddr_t local; /* local address */ 336 isc_sockaddr_t peer; /* peer address */ 337 isc__nm_cb_t cb; /* callback */ 338 void *cbarg; /* callback argument */ 339 isc_nm_timer_t *timer; /* TCP write timer */ 340 341 union { 342 uv_handle_t handle; 343 uv_req_t req; 344 uv_getaddrinfo_t getaddrinfo; 345 uv_getnameinfo_t getnameinfo; 346 uv_shutdown_t shutdown; 347 uv_write_t write; 348 uv_connect_t connect; 349 uv_udp_send_t udp_send; 350 uv_fs_t fs; 351 uv_work_t work; 352 } uv_req; 353 ISC_LINK(isc__nm_uvreq_t) link; 354 }; 355 356 struct isc_nm_timer { 357 isc_refcount_t references; 358 uv_timer_t timer; 359 isc_nmhandle_t *handle; 360 isc_nm_timer_cb cb; 361 void *cbarg; 362 }; 363 364 void * 365 isc__nm_get_netievent(isc_nm_t *mgr, isc__netievent_type type); 366 /*%< 367 * Allocate an ievent and set the type. 368 */ 369 void 370 isc__nm_put_netievent(isc_nm_t *mgr, void *ievent); 371 372 /* 373 * The macros here are used to simulate the "inheritance" in C, there's the base 374 * netievent structure that contains just its own type and socket, and there are 375 * extended netievent types that also have handles or requests or other data. 376 * 377 * The macros here ensure that: 378 * 379 * 1. every netievent type has matching definition, declaration and 380 * implementation 381 * 382 * 2. we handle all the netievent types of same subclass the same, e.g. if the 383 * extended netievent contains handle, we always attach to the handle in 384 * the ctor and detach from the handle in dtor. 385 * 386 * There are three macros here for each netievent subclass: 387 * 388 * 1. NETIEVENT_*_TYPE(type) creates the typedef for each type; used below in 389 * this header 390 * 391 * 2. NETIEVENT_*_DECL(type) generates the declaration of the get and put 392 * functions (isc__nm_get_netievent_* and isc__nm_put_netievent_*); used 393 * below in this header 394 * 395 * 3. NETIEVENT_*_DEF(type) generates the definition of the functions; used 396 * either in netmgr.c or matching protocol file (e.g. udp.c, tcp.c, etc.) 397 */ 398 399 #define NETIEVENT__SOCKET \ 400 isc__netievent_type type; \ 401 ISC_LINK(isc__netievent_t) link; \ 402 isc_nmsocket_t *sock; \ 403 const char *file; \ 404 unsigned int line; \ 405 const char *func 406 407 typedef struct isc__netievent__socket { 408 NETIEVENT__SOCKET; 409 } isc__netievent__socket_t; 410 411 #define NETIEVENT_SOCKET_TYPE(type) \ 412 typedef isc__netievent__socket_t isc__netievent_##type##_t 413 414 #define NETIEVENT_SOCKET_DECL(type) \ 415 isc__netievent_##type##_t *isc__nm_get_netievent_##type( \ 416 isc_nm_t *nm, isc_nmsocket_t *sock); \ 417 void isc__nm_put_netievent_##type(isc_nm_t *nm, \ 418 isc__netievent_##type##_t *ievent) 419 420 #define NETIEVENT_SOCKET_DEF(type) \ 421 isc__netievent_##type##_t *isc__nm_get_netievent_##type( \ 422 isc_nm_t *nm, isc_nmsocket_t *sock) { \ 423 isc__netievent_##type##_t *ievent = \ 424 isc__nm_get_netievent(nm, netievent_##type); \ 425 isc__nmsocket_attach(sock, &ievent->sock); \ 426 \ 427 return (ievent); \ 428 } \ 429 \ 430 void isc__nm_put_netievent_##type(isc_nm_t *nm, \ 431 isc__netievent_##type##_t *ievent) { \ 432 isc__nmsocket_detach(&ievent->sock); \ 433 isc__nm_put_netievent(nm, ievent); \ 434 } 435 436 typedef struct isc__netievent__socket_req { 437 NETIEVENT__SOCKET; 438 isc__nm_uvreq_t *req; 439 } isc__netievent__socket_req_t; 440 441 #define NETIEVENT_SOCKET_REQ_TYPE(type) \ 442 typedef isc__netievent__socket_req_t isc__netievent_##type##_t 443 444 #define NETIEVENT_SOCKET_REQ_DECL(type) \ 445 isc__netievent_##type##_t *isc__nm_get_netievent_##type( \ 446 isc_nm_t *nm, isc_nmsocket_t *sock, isc__nm_uvreq_t *req); \ 447 void isc__nm_put_netievent_##type(isc_nm_t *nm, \ 448 isc__netievent_##type##_t *ievent) 449 450 #define NETIEVENT_SOCKET_REQ_DEF(type) \ 451 isc__netievent_##type##_t *isc__nm_get_netievent_##type( \ 452 isc_nm_t *nm, isc_nmsocket_t *sock, isc__nm_uvreq_t *req) { \ 453 isc__netievent_##type##_t *ievent = \ 454 isc__nm_get_netievent(nm, netievent_##type); \ 455 isc__nmsocket_attach(sock, &ievent->sock); \ 456 ievent->req = req; \ 457 \ 458 return (ievent); \ 459 } \ 460 \ 461 void isc__nm_put_netievent_##type(isc_nm_t *nm, \ 462 isc__netievent_##type##_t *ievent) { \ 463 isc__nmsocket_detach(&ievent->sock); \ 464 isc__nm_put_netievent(nm, ievent); \ 465 } 466 467 typedef struct isc__netievent__socket_req_result { 468 NETIEVENT__SOCKET; 469 isc__nm_uvreq_t *req; 470 isc_result_t result; 471 } isc__netievent__socket_req_result_t; 472 473 #define NETIEVENT_SOCKET_REQ_RESULT_TYPE(type) \ 474 typedef isc__netievent__socket_req_result_t isc__netievent_##type##_t 475 476 #define NETIEVENT_SOCKET_REQ_RESULT_DECL(type) \ 477 isc__netievent_##type##_t *isc__nm_get_netievent_##type( \ 478 isc_nm_t *nm, isc_nmsocket_t *sock, isc__nm_uvreq_t *req, \ 479 isc_result_t result); \ 480 void isc__nm_put_netievent_##type(isc_nm_t *nm, \ 481 isc__netievent_##type##_t *ievent) 482 483 #define NETIEVENT_SOCKET_REQ_RESULT_DEF(type) \ 484 isc__netievent_##type##_t *isc__nm_get_netievent_##type( \ 485 isc_nm_t *nm, isc_nmsocket_t *sock, isc__nm_uvreq_t *req, \ 486 isc_result_t result) { \ 487 isc__netievent_##type##_t *ievent = \ 488 isc__nm_get_netievent(nm, netievent_##type); \ 489 isc__nmsocket_attach(sock, &ievent->sock); \ 490 ievent->req = req; \ 491 ievent->result = result; \ 492 \ 493 return (ievent); \ 494 } \ 495 \ 496 void isc__nm_put_netievent_##type(isc_nm_t *nm, \ 497 isc__netievent_##type##_t *ievent) { \ 498 isc__nmsocket_detach(&ievent->sock); \ 499 isc__nm_put_netievent(nm, ievent); \ 500 } 501 502 typedef struct isc__netievent__socket_handle { 503 NETIEVENT__SOCKET; 504 isc_nmhandle_t *handle; 505 } isc__netievent__socket_handle_t; 506 507 #define NETIEVENT_SOCKET_HANDLE_TYPE(type) \ 508 typedef isc__netievent__socket_handle_t isc__netievent_##type##_t 509 510 #define NETIEVENT_SOCKET_HANDLE_DECL(type) \ 511 isc__netievent_##type##_t *isc__nm_get_netievent_##type( \ 512 isc_nm_t *nm, isc_nmsocket_t *sock, isc_nmhandle_t *handle); \ 513 void isc__nm_put_netievent_##type(isc_nm_t *nm, \ 514 isc__netievent_##type##_t *ievent) 515 516 #define NETIEVENT_SOCKET_HANDLE_DEF(type) \ 517 isc__netievent_##type##_t *isc__nm_get_netievent_##type( \ 518 isc_nm_t *nm, isc_nmsocket_t *sock, isc_nmhandle_t *handle) { \ 519 isc__netievent_##type##_t *ievent = \ 520 isc__nm_get_netievent(nm, netievent_##type); \ 521 isc__nmsocket_attach(sock, &ievent->sock); \ 522 isc_nmhandle_attach(handle, &ievent->handle); \ 523 \ 524 return (ievent); \ 525 } \ 526 \ 527 void isc__nm_put_netievent_##type(isc_nm_t *nm, \ 528 isc__netievent_##type##_t *ievent) { \ 529 isc__nmsocket_detach(&ievent->sock); \ 530 isc_nmhandle_detach(&ievent->handle); \ 531 isc__nm_put_netievent(nm, ievent); \ 532 } 533 534 typedef struct isc__netievent__socket_quota { 535 NETIEVENT__SOCKET; 536 isc_quota_t *quota; 537 } isc__netievent__socket_quota_t; 538 539 #define NETIEVENT_SOCKET_QUOTA_TYPE(type) \ 540 typedef isc__netievent__socket_quota_t isc__netievent_##type##_t 541 542 #define NETIEVENT_SOCKET_QUOTA_DECL(type) \ 543 isc__netievent_##type##_t *isc__nm_get_netievent_##type( \ 544 isc_nm_t *nm, isc_nmsocket_t *sock, isc_quota_t *quota); \ 545 void isc__nm_put_netievent_##type(isc_nm_t *nm, \ 546 isc__netievent_##type##_t *ievent) 547 548 #define NETIEVENT_SOCKET_QUOTA_DEF(type) \ 549 isc__netievent_##type##_t *isc__nm_get_netievent_##type( \ 550 isc_nm_t *nm, isc_nmsocket_t *sock, isc_quota_t *quota) { \ 551 isc__netievent_##type##_t *ievent = \ 552 isc__nm_get_netievent(nm, netievent_##type); \ 553 isc__nmsocket_attach(sock, &ievent->sock); \ 554 ievent->quota = quota; \ 555 \ 556 return (ievent); \ 557 } \ 558 \ 559 void isc__nm_put_netievent_##type(isc_nm_t *nm, \ 560 isc__netievent_##type##_t *ievent) { \ 561 isc__nmsocket_detach(&ievent->sock); \ 562 isc__nm_put_netievent(nm, ievent); \ 563 } 564 565 typedef struct isc__netievent__task { 566 isc__netievent_type type; 567 ISC_LINK(isc__netievent_t) link; 568 isc_task_t *task; 569 } isc__netievent__task_t; 570 571 #define NETIEVENT_TASK_TYPE(type) \ 572 typedef isc__netievent__task_t isc__netievent_##type##_t; 573 574 #define NETIEVENT_TASK_DECL(type) \ 575 isc__netievent_##type##_t *isc__nm_get_netievent_##type( \ 576 isc_nm_t *nm, isc_task_t *task); \ 577 void isc__nm_put_netievent_##type(isc_nm_t *nm, \ 578 isc__netievent_##type##_t *ievent); 579 580 #define NETIEVENT_TASK_DEF(type) \ 581 isc__netievent_##type##_t *isc__nm_get_netievent_##type( \ 582 isc_nm_t *nm, isc_task_t *task) { \ 583 isc__netievent_##type##_t *ievent = \ 584 isc__nm_get_netievent(nm, netievent_##type); \ 585 ievent->task = task; \ 586 \ 587 return (ievent); \ 588 } \ 589 \ 590 void isc__nm_put_netievent_##type(isc_nm_t *nm, \ 591 isc__netievent_##type##_t *ievent) { \ 592 ievent->task = NULL; \ 593 isc__nm_put_netievent(nm, ievent); \ 594 } 595 596 typedef struct isc__netievent_udpsend { 597 NETIEVENT__SOCKET; 598 isc_sockaddr_t peer; 599 isc__nm_uvreq_t *req; 600 } isc__netievent_udpsend_t; 601 602 struct isc__netievent { 603 isc__netievent_type type; 604 ISC_LINK(isc__netievent_t) link; 605 }; 606 607 #define NETIEVENT_TYPE(type) typedef isc__netievent_t isc__netievent_##type##_t 608 609 #define NETIEVENT_DECL(type) \ 610 isc__netievent_##type##_t *isc__nm_get_netievent_##type(isc_nm_t *nm); \ 611 void isc__nm_put_netievent_##type(isc_nm_t *nm, \ 612 isc__netievent_##type##_t *ievent) 613 614 #define NETIEVENT_DEF(type) \ 615 isc__netievent_##type##_t *isc__nm_get_netievent_##type( \ 616 isc_nm_t *nm) { \ 617 isc__netievent_##type##_t *ievent = \ 618 isc__nm_get_netievent(nm, netievent_##type); \ 619 \ 620 return (ievent); \ 621 } \ 622 \ 623 void isc__nm_put_netievent_##type(isc_nm_t *nm, \ 624 isc__netievent_##type##_t *ievent) { \ 625 isc__nm_put_netievent(nm, ievent); \ 626 } 627 628 typedef union { 629 isc__netievent_t ni; 630 isc__netievent__socket_t nis; 631 isc__netievent__socket_req_t nisr; 632 isc__netievent_udpsend_t nius; 633 isc__netievent__socket_quota_t nisq; 634 } isc__netievent_storage_t; 635 636 /* 637 * Work item for a uv_work threadpool. 638 */ 639 typedef struct isc__nm_work { 640 isc_nm_t *netmgr; 641 uv_work_t req; 642 isc_nm_workcb_t cb; 643 isc_nm_after_workcb_t after_cb; 644 void *data; 645 } isc__nm_work_t; 646 647 /* 648 * Network manager 649 */ 650 #define NM_MAGIC ISC_MAGIC('N', 'E', 'T', 'M') 651 #define VALID_NM(t) ISC_MAGIC_VALID(t, NM_MAGIC) 652 653 struct isc_nm { 654 int magic; 655 isc_refcount_t references; 656 isc_mem_t *mctx; 657 int nworkers; 658 isc_mutex_t lock; 659 isc_condition_t wkstatecond; 660 isc_condition_t wkpausecond; 661 isc__networker_t *workers; 662 663 isc_stats_t *stats; 664 665 uint_fast32_t workers_running; 666 atomic_uint_fast32_t workers_paused; 667 atomic_uint_fast32_t maxudp; 668 669 bool load_balance_sockets; 670 671 atomic_bool paused; 672 673 /* 674 * Active connections are being closed and new connections are 675 * no longer allowed. 676 */ 677 atomic_bool closing; 678 679 /* 680 * A worker is actively waiting for other workers, for example to 681 * stop listening; that means no other thread can do the same thing 682 * or pause, or we'll deadlock. We have to either re-enqueue our 683 * event or wait for the other one to finish if we want to pause. 684 */ 685 atomic_int interlocked; 686 687 /* 688 * Timeout values for TCP connections, corresponding to 689 * tcp-intiial-timeout, tcp-idle-timeout, tcp-keepalive-timeout, 690 * and tcp-advertised-timeout. Note that these are stored in 691 * milliseconds so they can be used directly with the libuv timer, 692 * but they are configured in tenths of seconds. 693 */ 694 atomic_uint_fast32_t init; 695 atomic_uint_fast32_t idle; 696 atomic_uint_fast32_t keepalive; 697 atomic_uint_fast32_t advertised; 698 699 isc_barrier_t pausing; 700 isc_barrier_t resuming; 701 702 #ifdef NETMGR_TRACE 703 ISC_LIST(isc_nmsocket_t) active_sockets; 704 #endif 705 }; 706 707 typedef enum isc_nmsocket_type { 708 isc_nm_udpsocket, 709 isc_nm_udplistener, /* Aggregate of nm_udpsocks */ 710 isc_nm_tcpsocket, 711 isc_nm_tcplistener, 712 isc_nm_tcpdnslistener, 713 isc_nm_tcpdnssocket, 714 } isc_nmsocket_type; 715 716 /*% 717 * A universal structure for either a single socket or a group of 718 * dup'd/SO_REUSE_PORT-using sockets listening on the same interface. 719 */ 720 #define NMSOCK_MAGIC ISC_MAGIC('N', 'M', 'S', 'K') 721 #define VALID_NMSOCK(t) ISC_MAGIC_VALID(t, NMSOCK_MAGIC) 722 723 /*% 724 * Index into socket stat counter arrays. 725 */ 726 enum { 727 STATID_OPEN = 0, 728 STATID_OPENFAIL = 1, 729 STATID_CLOSE = 2, 730 STATID_BINDFAIL = 3, 731 STATID_CONNECTFAIL = 4, 732 STATID_CONNECT = 5, 733 STATID_ACCEPTFAIL = 6, 734 STATID_ACCEPT = 7, 735 STATID_SENDFAIL = 8, 736 STATID_RECVFAIL = 9, 737 STATID_ACTIVE = 10 738 }; 739 740 typedef void (*isc_nm_closehandlecb_t)(void *arg); 741 /*%< 742 * Opaque callback function, used for isc_nmhandle 'reset' and 'free' 743 * callbacks. 744 */ 745 746 struct isc_nmsocket { 747 /*% Unlocked, RO */ 748 int magic; 749 int tid; 750 isc_nmsocket_type type; 751 isc_nm_t *mgr; 752 753 /*% Parent socket for multithreaded listeners */ 754 isc_nmsocket_t *parent; 755 /*% Listener socket this connection was accepted on */ 756 isc_nmsocket_t *listener; 757 /*% Self socket */ 758 isc_nmsocket_t *self; 759 760 isc_barrier_t startlistening; 761 isc_barrier_t stoplistening; 762 763 /*% 764 * quota is the TCP client, attached when a TCP connection 765 * is established. pquota is a non-attached pointer to the 766 * TCP client quota, stored in listening sockets but only 767 * attached in connected sockets. 768 */ 769 isc_quota_t *quota; 770 isc_quota_t *pquota; 771 isc_quota_cb_t quotacb; 772 773 /*% 774 * Socket statistics 775 */ 776 const isc_statscounter_t *statsindex; 777 778 /*% 779 * TCP read/connect timeout timers. 780 */ 781 uv_timer_t read_timer; 782 uint64_t read_timeout; 783 uint64_t connect_timeout; 784 785 /*% 786 * TCP write timeout timer. 787 */ 788 uint64_t write_timeout; 789 790 /*% outer socket is for 'wrapped' sockets - e.g. tcpdns in tcp */ 791 isc_nmsocket_t *outer; 792 793 /*% server socket for connections */ 794 isc_nmsocket_t *server; 795 796 /*% Child sockets for multi-socket setups */ 797 isc_nmsocket_t *children; 798 uint_fast32_t nchildren; 799 isc_sockaddr_t iface; 800 isc_nmhandle_t *statichandle; 801 isc_nmhandle_t *outerhandle; 802 803 /*% Extra data allocated at the end of each isc_nmhandle_t */ 804 size_t extrahandlesize; 805 806 /*% TCP backlog */ 807 int backlog; 808 809 /*% libuv data */ 810 uv_os_sock_t fd; 811 union uv_any_handle uv_handle; 812 813 /*% Peer address */ 814 isc_sockaddr_t peer; 815 816 /* Atomic */ 817 /*% Number of running (e.g. listening) child sockets */ 818 atomic_uint_fast32_t rchildren; 819 820 /*% 821 * Socket is active if it's listening, working, etc. If it's 822 * closing, then it doesn't make a sense, for example, to 823 * push handles or reqs for reuse. 824 */ 825 atomic_bool active; 826 atomic_bool destroying; 827 828 /*% 829 * Socket is closed if it's not active and all the possible 830 * callbacks were fired, there are no active handles, etc. 831 * If active==false but closed==false, that means the socket 832 * is closing. 833 */ 834 atomic_bool closing; 835 atomic_bool closed; 836 atomic_bool listening; 837 atomic_bool connecting; 838 atomic_bool connected; 839 bool accepting; 840 bool reading; 841 atomic_bool timedout; 842 isc_refcount_t references; 843 844 /*% 845 * Established an outgoing connection, as client not server. 846 */ 847 atomic_bool client; 848 849 /*% 850 * TCPDNS socket has been set not to pipeline. 851 */ 852 atomic_bool sequential; 853 854 /*% 855 * The socket is processing read callback, this is guard to not read 856 * data before the readcb is back. 857 */ 858 bool processing; 859 860 /*% 861 * A TCP socket has had isc_nm_pauseread() called. 862 */ 863 atomic_bool readpaused; 864 865 /*% 866 * A TCP or TCPDNS socket has been set to use the keepalive 867 * timeout instead of the default idle timeout. 868 */ 869 atomic_bool keepalive; 870 871 /*% 872 * 'spare' handles for that can be reused to avoid allocations, 873 * for UDP. 874 */ 875 isc_astack_t *inactivehandles; 876 isc_astack_t *inactivereqs; 877 878 /*% 879 * Used to wait for TCP listening events to complete, and 880 * for the number of running children to reach zero during 881 * shutdown. 882 * 883 * We use two condition variables to prevent the race where the netmgr 884 * threads would be able to finish and destroy the socket before it's 885 * unlocked by the isc_nm_listen<proto>() function. So, the flow is as 886 * follows: 887 * 888 * 1. parent thread creates all children sockets and passes then to 889 * netthreads, looks at the signaling variable and WAIT(cond) until 890 * the childrens are done initializing 891 * 892 * 2. the events get picked by netthreads, calls the libuv API (and 893 * either succeeds or fails) and WAIT(scond) until all other 894 * children sockets in netthreads are initialized and the listening 895 * socket lock is unlocked 896 * 897 * 3. the control is given back to the parent thread which now either 898 * returns success or shutdowns the listener if an error has 899 * occured in the children netthread 900 * 901 * NOTE: The other approach would be doing an extra attach to the parent 902 * listening socket, and then detach it in the parent thread, but that 903 * breaks the promise that once the libuv socket is initialized on the 904 * nmsocket, the nmsocket needs to be handled only by matching 905 * netthread, so in fact that would add a complexity in a way that 906 * isc__nmsocket_detach would have to be converted to use an 907 * asynchrounous netievent. 908 */ 909 isc_mutex_t lock; 910 isc_condition_t cond; 911 isc_condition_t scond; 912 913 /*% 914 * Used to pass a result back from listen or connect events. 915 */ 916 isc_result_t result; 917 918 /*% 919 * Current number of active handles. 920 */ 921 atomic_int_fast32_t ah; 922 923 /*% Buffer for TCPDNS processing */ 924 size_t buf_size; 925 size_t buf_len; 926 unsigned char *buf; 927 928 /*% 929 * This function will be called with handle->sock 930 * as the argument whenever a handle's references drop 931 * to zero, after its reset callback has been called. 932 */ 933 isc_nm_closehandlecb_t closehandle_cb; 934 935 isc_nmhandle_t *recv_handle; 936 isc_nm_recv_cb_t recv_cb; 937 void *recv_cbarg; 938 bool recv_read; 939 940 isc_nm_cb_t connect_cb; 941 void *connect_cbarg; 942 943 isc_nm_accept_cb_t accept_cb; 944 void *accept_cbarg; 945 946 atomic_int_fast32_t active_child_connections; 947 948 #ifdef NETMGR_TRACE 949 void *backtrace[TRACE_SIZE]; 950 int backtrace_size; 951 LINK(isc_nmsocket_t) active_link; 952 ISC_LIST(isc_nmhandle_t) active_handles; 953 #endif 954 }; 955 956 bool 957 isc__nm_in_netthread(void); 958 /*% 959 * Returns 'true' if we're in the network thread. 960 */ 961 962 void 963 isc__nm_maybe_enqueue_ievent(isc__networker_t *worker, isc__netievent_t *event); 964 /*%< 965 * If the caller is already in the matching nmthread, process the netievent 966 * directly, if not enqueue using isc__nm_enqueue_ievent(). 967 */ 968 969 void 970 isc__nm_enqueue_ievent(isc__networker_t *worker, isc__netievent_t *event); 971 /*%< 972 * Enqueue an ievent onto a specific worker queue. (This the only safe 973 * way to use an isc__networker_t from another thread.) 974 */ 975 976 void 977 isc__nm_free_uvbuf(isc_nmsocket_t *sock, const uv_buf_t *buf); 978 /*%< 979 * Free a buffer allocated for a receive operation. 980 * 981 * Note that as currently implemented, this doesn't actually 982 * free anything, marks the isc__networker's UDP receive buffer 983 * as "not in use". 984 */ 985 986 isc_nmhandle_t * 987 isc___nmhandle_get(isc_nmsocket_t *sock, isc_sockaddr_t *peer, 988 isc_sockaddr_t *local FLARG); 989 /*%< 990 * Get a handle for the socket 'sock', allocating a new one 991 * if there isn't one available in 'sock->inactivehandles'. 992 * 993 * If 'peer' is not NULL, set the handle's peer address to 'peer', 994 * otherwise set it to 'sock->peer'. 995 * 996 * If 'local' is not NULL, set the handle's local address to 'local', 997 * otherwise set it to 'sock->iface->addr'. 998 * 999 * 'sock' will be attached to 'handle->sock'. The caller may need 1000 * to detach the socket afterward. 1001 */ 1002 1003 isc__nm_uvreq_t * 1004 isc___nm_uvreq_get(isc_nm_t *mgr, isc_nmsocket_t *sock FLARG); 1005 /*%< 1006 * Get a UV request structure for the socket 'sock', allocating a 1007 * new one if there isn't one available in 'sock->inactivereqs'. 1008 */ 1009 1010 void 1011 isc___nm_uvreq_put(isc__nm_uvreq_t **req, isc_nmsocket_t *sock FLARG); 1012 /*%< 1013 * Completes the use of a UV request structure, setting '*req' to NULL. 1014 * 1015 * The UV request is pushed onto the 'sock->inactivereqs' stack or, 1016 * if that doesn't work, freed. 1017 */ 1018 1019 void 1020 isc___nmsocket_init(isc_nmsocket_t *sock, isc_nm_t *mgr, isc_nmsocket_type type, 1021 isc_sockaddr_t *iface FLARG); 1022 /*%< 1023 * Initialize socket 'sock', attach it to 'mgr', and set it to type 'type' 1024 * and its interface to 'iface'. 1025 */ 1026 1027 void 1028 isc___nmsocket_attach(isc_nmsocket_t *sock, isc_nmsocket_t **target FLARG); 1029 /*%< 1030 * Attach to a socket, increasing refcount 1031 */ 1032 1033 void 1034 isc___nmsocket_detach(isc_nmsocket_t **socketp FLARG); 1035 /*%< 1036 * Detach from socket, decreasing refcount and possibly destroying the 1037 * socket if it's no longer referenced. 1038 */ 1039 1040 void 1041 isc___nmsocket_prep_destroy(isc_nmsocket_t *sock FLARG); 1042 /*%< 1043 * Market 'sock' as inactive, close it if necessary, and destroy it 1044 * if there are no remaining references or active handles. 1045 */ 1046 1047 void 1048 isc__nmsocket_shutdown(isc_nmsocket_t *sock); 1049 /*%< 1050 * Initiate the socket shutdown which actively calls the active 1051 * callbacks. 1052 */ 1053 1054 bool 1055 isc__nmsocket_active(isc_nmsocket_t *sock); 1056 /*%< 1057 * Determine whether 'sock' is active by checking 'sock->active' 1058 * or, for child sockets, 'sock->parent->active'. 1059 */ 1060 1061 bool 1062 isc__nmsocket_deactivate(isc_nmsocket_t *sock); 1063 /*%< 1064 * @brief Deactivate active socket 1065 * 1066 * Atomically deactive the socket by setting @p sock->active or, for child 1067 * sockets, @p sock->parent->active to @c false 1068 * 1069 * @param[in] sock - valid nmsocket 1070 * @return @c false if the socket was already inactive, @c true otherwise 1071 */ 1072 1073 void 1074 isc__nmsocket_clearcb(isc_nmsocket_t *sock); 1075 /*%< 1076 * Clear the recv and accept callbacks in 'sock'. 1077 */ 1078 1079 void 1080 isc__nmsocket_timer_stop(isc_nmsocket_t *sock); 1081 void 1082 isc__nmsocket_timer_start(isc_nmsocket_t *sock); 1083 void 1084 isc__nmsocket_timer_restart(isc_nmsocket_t *sock); 1085 bool 1086 isc__nmsocket_timer_running(isc_nmsocket_t *sock); 1087 /*%< 1088 * Start/stop/restart/check the timeout on the socket 1089 */ 1090 1091 void 1092 isc__nm_connectcb(isc_nmsocket_t *sock, isc__nm_uvreq_t *uvreq, 1093 isc_result_t eresult, bool async); 1094 1095 void 1096 isc__nm_async_connectcb(isc__networker_t *worker, isc__netievent_t *ev0); 1097 /*%< 1098 * Issue a connect callback on the socket, used to call the callback 1099 */ 1100 1101 void 1102 isc__nm_readcb(isc_nmsocket_t *sock, isc__nm_uvreq_t *uvreq, 1103 isc_result_t eresult); 1104 void 1105 isc__nm_async_readcb(isc__networker_t *worker, isc__netievent_t *ev0); 1106 1107 /*%< 1108 * Issue a read callback on the socket, used to call the callback 1109 * on failed conditions when the event can't be scheduled on the uv loop. 1110 * 1111 */ 1112 1113 void 1114 isc__nm_sendcb(isc_nmsocket_t *sock, isc__nm_uvreq_t *uvreq, 1115 isc_result_t eresult, bool async); 1116 void 1117 isc__nm_async_sendcb(isc__networker_t *worker, isc__netievent_t *ev0); 1118 /*%< 1119 * Issue a write callback on the socket, used to call the callback 1120 * on failed conditions when the event can't be scheduled on the uv loop. 1121 */ 1122 1123 void 1124 isc__nm_async_shutdown(isc__networker_t *worker, isc__netievent_t *ev0); 1125 /*%< 1126 * Walk through all uv handles, get the underlying sockets and issue 1127 * close on them. 1128 */ 1129 1130 void 1131 isc__nm_udp_send(isc_nmhandle_t *handle, const isc_region_t *region, 1132 isc_nm_cb_t cb, void *cbarg); 1133 /*%< 1134 * Back-end implementation of isc_nm_send() for UDP handles. 1135 */ 1136 1137 void 1138 isc__nm_udp_read(isc_nmhandle_t *handle, isc_nm_recv_cb_t cb, void *cbarg); 1139 /* 1140 * Back-end implementation of isc_nm_read() for UDP handles. 1141 */ 1142 1143 void 1144 isc__nm_udp_close(isc_nmsocket_t *sock); 1145 /*%< 1146 * Close a UDP socket. 1147 */ 1148 1149 void 1150 isc__nm_udp_cancelread(isc_nmhandle_t *handle); 1151 /*%< 1152 * Stop reading on a connected UDP handle. 1153 */ 1154 1155 void 1156 isc__nm_udp_shutdown(isc_nmsocket_t *sock); 1157 /*%< 1158 * Called during the shutdown process to close and clean up connected 1159 * sockets. 1160 */ 1161 1162 void 1163 isc__nm_udp_stoplistening(isc_nmsocket_t *sock); 1164 /*%< 1165 * Stop listening on 'sock'. 1166 */ 1167 1168 void 1169 isc__nm_udp_settimeout(isc_nmhandle_t *handle, uint32_t timeout); 1170 /*%< 1171 * Set or clear the recv timeout for the UDP socket associated with 'handle'. 1172 */ 1173 1174 void 1175 isc__nm_async_udplisten(isc__networker_t *worker, isc__netievent_t *ev0); 1176 void 1177 isc__nm_async_udpconnect(isc__networker_t *worker, isc__netievent_t *ev0); 1178 void 1179 isc__nm_async_udpstop(isc__networker_t *worker, isc__netievent_t *ev0); 1180 void 1181 isc__nm_async_udpsend(isc__networker_t *worker, isc__netievent_t *ev0); 1182 void 1183 isc__nm_async_udpread(isc__networker_t *worker, isc__netievent_t *ev0); 1184 void 1185 isc__nm_async_udpcancel(isc__networker_t *worker, isc__netievent_t *ev0); 1186 void 1187 isc__nm_async_udpclose(isc__networker_t *worker, isc__netievent_t *ev0); 1188 /*%< 1189 * Callback handlers for asynchronous UDP events (listen, stoplisten, send). 1190 */ 1191 1192 void 1193 isc__nm_tcp_send(isc_nmhandle_t *handle, const isc_region_t *region, 1194 isc_nm_cb_t cb, void *cbarg); 1195 /*%< 1196 * Back-end implementation of isc_nm_send() for TCP handles. 1197 */ 1198 1199 void 1200 isc__nm_tcp_read(isc_nmhandle_t *handle, isc_nm_recv_cb_t cb, void *cbarg); 1201 /* 1202 * Back-end implementation of isc_nm_read() for TCP handles. 1203 */ 1204 1205 void 1206 isc__nm_tcp_close(isc_nmsocket_t *sock); 1207 /*%< 1208 * Close a TCP socket. 1209 */ 1210 void 1211 isc__nm_tcp_pauseread(isc_nmhandle_t *handle); 1212 /*%< 1213 * Pause reading on this handle, while still remembering the callback. 1214 */ 1215 1216 void 1217 isc__nm_tcp_resumeread(isc_nmhandle_t *handle); 1218 /*%< 1219 * Resume reading from socket. 1220 * 1221 */ 1222 1223 void 1224 isc__nm_tcp_shutdown(isc_nmsocket_t *sock); 1225 /*%< 1226 * Called during the shutdown process to close and clean up connected 1227 * sockets. 1228 */ 1229 1230 void 1231 isc__nm_tcp_cancelread(isc_nmhandle_t *handle); 1232 /*%< 1233 * Stop reading on a connected TCP handle. 1234 */ 1235 1236 void 1237 isc__nm_tcp_stoplistening(isc_nmsocket_t *sock); 1238 /*%< 1239 * Stop listening on 'sock'. 1240 */ 1241 1242 int_fast32_t 1243 isc__nm_tcp_listener_nactive(isc_nmsocket_t *sock); 1244 /*%< 1245 * Returns the number of active connections for the TCP listener socket. 1246 */ 1247 1248 void 1249 isc__nm_tcp_settimeout(isc_nmhandle_t *handle, uint32_t timeout); 1250 /*%< 1251 * Set the read timeout for the TCP socket associated with 'handle'. 1252 */ 1253 1254 void 1255 isc__nm_async_tcpconnect(isc__networker_t *worker, isc__netievent_t *ev0); 1256 void 1257 isc__nm_async_tcplisten(isc__networker_t *worker, isc__netievent_t *ev0); 1258 void 1259 isc__nm_async_tcpaccept(isc__networker_t *worker, isc__netievent_t *ev0); 1260 void 1261 isc__nm_async_tcpstop(isc__networker_t *worker, isc__netievent_t *ev0); 1262 void 1263 isc__nm_async_tcpsend(isc__networker_t *worker, isc__netievent_t *ev0); 1264 void 1265 isc__nm_async_startread(isc__networker_t *worker, isc__netievent_t *ev0); 1266 void 1267 isc__nm_async_pauseread(isc__networker_t *worker, isc__netievent_t *ev0); 1268 void 1269 isc__nm_async_tcpstartread(isc__networker_t *worker, isc__netievent_t *ev0); 1270 void 1271 isc__nm_async_tcppauseread(isc__networker_t *worker, isc__netievent_t *ev0); 1272 void 1273 isc__nm_async_tcpcancel(isc__networker_t *worker, isc__netievent_t *ev0); 1274 void 1275 isc__nm_async_tcpclose(isc__networker_t *worker, isc__netievent_t *ev0); 1276 /*%< 1277 * Callback handlers for asynchronous TCP events (connect, listen, 1278 * stoplisten, send, read, pause, close). 1279 */ 1280 1281 void 1282 isc__nm_async_tcpdnsaccept(isc__networker_t *worker, isc__netievent_t *ev0); 1283 void 1284 isc__nm_async_tcpdnsconnect(isc__networker_t *worker, isc__netievent_t *ev0); 1285 void 1286 isc__nm_async_tcpdnslisten(isc__networker_t *worker, isc__netievent_t *ev0); 1287 1288 void 1289 isc__nm_tcpdns_send(isc_nmhandle_t *handle, isc_region_t *region, 1290 isc_nm_cb_t cb, void *cbarg); 1291 /*%< 1292 * Back-end implementation of isc_nm_send() for TCPDNS handles. 1293 */ 1294 1295 void 1296 isc__nm_tcpdns_shutdown(isc_nmsocket_t *sock); 1297 1298 void 1299 isc__nm_tcpdns_close(isc_nmsocket_t *sock); 1300 /*%< 1301 * Close a TCPDNS socket. 1302 */ 1303 1304 void 1305 isc__nm_tcpdns_stoplistening(isc_nmsocket_t *sock); 1306 /*%< 1307 * Stop listening on 'sock'. 1308 */ 1309 1310 void 1311 isc__nm_tcpdns_settimeout(isc_nmhandle_t *handle, uint32_t timeout); 1312 /*%< 1313 * Set the read timeout and reset the timer for the TCPDNS socket 1314 * associated with 'handle', and the TCP socket it wraps around. 1315 */ 1316 1317 void 1318 isc__nm_async_tcpdnsaccept(isc__networker_t *worker, isc__netievent_t *ev0); 1319 void 1320 isc__nm_async_tcpdnsconnect(isc__networker_t *worker, isc__netievent_t *ev0); 1321 void 1322 isc__nm_async_tcpdnslisten(isc__networker_t *worker, isc__netievent_t *ev0); 1323 void 1324 isc__nm_async_tcpdnscancel(isc__networker_t *worker, isc__netievent_t *ev0); 1325 void 1326 isc__nm_async_tcpdnsclose(isc__networker_t *worker, isc__netievent_t *ev0); 1327 void 1328 isc__nm_async_tcpdnssend(isc__networker_t *worker, isc__netievent_t *ev0); 1329 void 1330 isc__nm_async_tcpdnsstop(isc__networker_t *worker, isc__netievent_t *ev0); 1331 void 1332 isc__nm_async_tcpdnsread(isc__networker_t *worker, isc__netievent_t *ev0); 1333 /*%< 1334 * Callback handlers for asynchronous TCPDNS events. 1335 */ 1336 1337 void 1338 isc__nm_tcpdns_read(isc_nmhandle_t *handle, isc_nm_recv_cb_t cb, void *cbarg); 1339 /* 1340 * Back-end implementation of isc_nm_read() for TCPDNS handles. 1341 */ 1342 1343 void 1344 isc__nm_tcpdns_cancelread(isc_nmhandle_t *handle); 1345 /*%< 1346 * Stop reading on a connected TCPDNS handle. 1347 */ 1348 1349 #define isc__nm_uverr2result(x) \ 1350 isc___nm_uverr2result(x, true, __FILE__, __LINE__, __func__) 1351 isc_result_t 1352 isc___nm_uverr2result(int uverr, bool dolog, const char *file, 1353 unsigned int line, const char *func); 1354 /*%< 1355 * Convert a libuv error value into an isc_result_t. The 1356 * list of supported error values is not complete; new users 1357 * of this function should add any expected errors that are 1358 * not already there. 1359 */ 1360 1361 bool 1362 isc__nm_acquire_interlocked(isc_nm_t *mgr); 1363 /*%< 1364 * Try to acquire interlocked state; return true if successful. 1365 */ 1366 1367 void 1368 isc__nm_drop_interlocked(isc_nm_t *mgr); 1369 /*%< 1370 * Drop interlocked state; signal waiters. 1371 */ 1372 1373 void 1374 isc__nm_acquire_interlocked_force(isc_nm_t *mgr); 1375 /*%< 1376 * Actively wait for interlocked state. 1377 */ 1378 1379 void 1380 isc__nm_incstats(isc_nm_t *mgr, isc_statscounter_t counterid); 1381 /*%< 1382 * Increment socket-related statistics counters. 1383 */ 1384 1385 void 1386 isc__nm_decstats(isc_nm_t *mgr, isc_statscounter_t counterid); 1387 /*%< 1388 * Decrement socket-related statistics counters. 1389 */ 1390 1391 isc_result_t 1392 isc__nm_socket(int domain, int type, int protocol, uv_os_sock_t *sockp); 1393 /*%< 1394 * Platform independent socket() version 1395 */ 1396 1397 void 1398 isc__nm_closesocket(uv_os_sock_t sock); 1399 /*%< 1400 * Platform independent closesocket() version 1401 */ 1402 1403 isc_result_t 1404 isc__nm_socket_freebind(uv_os_sock_t fd, sa_family_t sa_family); 1405 /*%< 1406 * Set the IP_FREEBIND (or equivalent) socket option on the uv_handle 1407 */ 1408 1409 isc_result_t 1410 isc__nm_socket_reuse(uv_os_sock_t fd); 1411 /*%< 1412 * Set the SO_REUSEADDR or SO_REUSEPORT (or equivalent) socket option on the fd 1413 */ 1414 1415 isc_result_t 1416 isc__nm_socket_reuse_lb(uv_os_sock_t fd); 1417 /*%< 1418 * Set the SO_REUSEPORT_LB (or equivalent) socket option on the fd 1419 */ 1420 1421 isc_result_t 1422 isc__nm_socket_incoming_cpu(uv_os_sock_t fd); 1423 /*%< 1424 * Set the SO_INCOMING_CPU socket option on the fd if available 1425 */ 1426 1427 isc_result_t 1428 isc__nm_socket_disable_pmtud(uv_os_sock_t fd, sa_family_t sa_family); 1429 /*%< 1430 * Disable the Path MTU Discovery, either by disabling IP(V6)_DONTFRAG socket 1431 * option, or setting the IP(V6)_MTU_DISCOVER socket option to IP_PMTUDISC_OMIT 1432 */ 1433 1434 isc_result_t 1435 isc__nm_socket_connectiontimeout(uv_os_sock_t fd, int timeout_ms); 1436 /*%< 1437 * Set the connection timeout in milliseconds, on non-Linux platforms, 1438 * the minimum value must be at least 1000 (1 second). 1439 */ 1440 1441 isc_result_t 1442 isc__nm_socket_tcp_nodelay(uv_os_sock_t fd); 1443 /*%< 1444 * Disables Nagle's algorithm on a TCP socket (sets TCP_NODELAY). 1445 */ 1446 1447 /* 1448 * typedef all the netievent types 1449 */ 1450 1451 NETIEVENT_SOCKET_TYPE(close); 1452 NETIEVENT_SOCKET_TYPE(tcpclose); 1453 NETIEVENT_SOCKET_TYPE(tcplisten); 1454 NETIEVENT_SOCKET_TYPE(tcppauseread); 1455 NETIEVENT_SOCKET_TYPE(tcpstop); 1456 NETIEVENT_SOCKET_TYPE(udpclose); 1457 NETIEVENT_SOCKET_TYPE(udplisten); 1458 NETIEVENT_SOCKET_TYPE(udpread); 1459 /* NETIEVENT_SOCKET_TYPE(udpsend); */ /* unique type, defined independently */ 1460 NETIEVENT_SOCKET_TYPE(udpstop); 1461 1462 NETIEVENT_SOCKET_TYPE(tcpdnsclose); 1463 NETIEVENT_SOCKET_TYPE(tcpdnsread); 1464 NETIEVENT_SOCKET_TYPE(tcpdnsstop); 1465 NETIEVENT_SOCKET_TYPE(tcpdnslisten); 1466 NETIEVENT_SOCKET_REQ_TYPE(tcpdnsconnect); 1467 NETIEVENT_SOCKET_REQ_TYPE(tcpdnssend); 1468 NETIEVENT_SOCKET_HANDLE_TYPE(tcpdnscancel); 1469 NETIEVENT_SOCKET_QUOTA_TYPE(tcpdnsaccept); 1470 1471 NETIEVENT_SOCKET_REQ_TYPE(tcpconnect); 1472 NETIEVENT_SOCKET_REQ_TYPE(tcpsend); 1473 NETIEVENT_SOCKET_TYPE(tcpstartread); 1474 NETIEVENT_SOCKET_REQ_TYPE(udpconnect); 1475 1476 NETIEVENT_SOCKET_REQ_RESULT_TYPE(connectcb); 1477 NETIEVENT_SOCKET_REQ_RESULT_TYPE(readcb); 1478 NETIEVENT_SOCKET_REQ_RESULT_TYPE(sendcb); 1479 1480 NETIEVENT_SOCKET_HANDLE_TYPE(detach); 1481 NETIEVENT_SOCKET_HANDLE_TYPE(tcpcancel); 1482 NETIEVENT_SOCKET_HANDLE_TYPE(udpcancel); 1483 1484 NETIEVENT_SOCKET_QUOTA_TYPE(tcpaccept); 1485 1486 NETIEVENT_TYPE(pause); 1487 NETIEVENT_TYPE(resume); 1488 NETIEVENT_TYPE(shutdown); 1489 NETIEVENT_TYPE(stop); 1490 1491 NETIEVENT_TASK_TYPE(task); 1492 NETIEVENT_TASK_TYPE(privilegedtask); 1493 1494 /* Now declared the helper functions */ 1495 1496 NETIEVENT_SOCKET_DECL(close); 1497 NETIEVENT_SOCKET_DECL(tcpclose); 1498 NETIEVENT_SOCKET_DECL(tcplisten); 1499 NETIEVENT_SOCKET_DECL(tcppauseread); 1500 NETIEVENT_SOCKET_DECL(tcpstartread); 1501 NETIEVENT_SOCKET_DECL(tcpstop); 1502 NETIEVENT_SOCKET_DECL(udpclose); 1503 NETIEVENT_SOCKET_DECL(udplisten); 1504 NETIEVENT_SOCKET_DECL(udpread); 1505 NETIEVENT_SOCKET_DECL(udpsend); 1506 NETIEVENT_SOCKET_DECL(udpstop); 1507 1508 NETIEVENT_SOCKET_DECL(tcpdnsclose); 1509 NETIEVENT_SOCKET_DECL(tcpdnsread); 1510 NETIEVENT_SOCKET_DECL(tcpdnsstop); 1511 NETIEVENT_SOCKET_DECL(tcpdnslisten); 1512 NETIEVENT_SOCKET_REQ_DECL(tcpdnsconnect); 1513 NETIEVENT_SOCKET_REQ_DECL(tcpdnssend); 1514 NETIEVENT_SOCKET_HANDLE_DECL(tcpdnscancel); 1515 NETIEVENT_SOCKET_QUOTA_DECL(tcpdnsaccept); 1516 1517 NETIEVENT_SOCKET_REQ_DECL(tcpconnect); 1518 NETIEVENT_SOCKET_REQ_DECL(tcpsend); 1519 NETIEVENT_SOCKET_REQ_DECL(udpconnect); 1520 1521 NETIEVENT_SOCKET_REQ_RESULT_DECL(connectcb); 1522 NETIEVENT_SOCKET_REQ_RESULT_DECL(readcb); 1523 NETIEVENT_SOCKET_REQ_RESULT_DECL(sendcb); 1524 1525 NETIEVENT_SOCKET_HANDLE_DECL(udpcancel); 1526 NETIEVENT_SOCKET_HANDLE_DECL(tcpcancel); 1527 NETIEVENT_SOCKET_DECL(detach); 1528 1529 NETIEVENT_SOCKET_QUOTA_DECL(tcpaccept); 1530 1531 NETIEVENT_DECL(pause); 1532 NETIEVENT_DECL(resume); 1533 NETIEVENT_DECL(shutdown); 1534 NETIEVENT_DECL(stop); 1535 1536 NETIEVENT_TASK_DECL(task); 1537 NETIEVENT_TASK_DECL(privilegedtask); 1538 1539 void 1540 isc__nm_udp_failed_read_cb(isc_nmsocket_t *sock, isc_result_t result); 1541 void 1542 isc__nm_tcp_failed_read_cb(isc_nmsocket_t *sock, isc_result_t result); 1543 void 1544 isc__nm_tcpdns_failed_read_cb(isc_nmsocket_t *sock, isc_result_t result); 1545 1546 isc_result_t 1547 isc__nm_tcpdns_processbuffer(isc_nmsocket_t *sock); 1548 1549 isc__nm_uvreq_t * 1550 isc__nm_get_read_req(isc_nmsocket_t *sock, isc_sockaddr_t *sockaddr); 1551 1552 void 1553 isc__nm_alloc_cb(uv_handle_t *handle, size_t size, uv_buf_t *buf); 1554 1555 void 1556 isc__nm_udp_read_cb(uv_udp_t *handle, ssize_t nrecv, const uv_buf_t *buf, 1557 const struct sockaddr *addr, unsigned flags); 1558 void 1559 isc__nm_tcp_read_cb(uv_stream_t *stream, ssize_t nread, const uv_buf_t *buf); 1560 void 1561 isc__nm_tcpdns_read_cb(uv_stream_t *stream, ssize_t nread, const uv_buf_t *buf); 1562 1563 isc_result_t 1564 isc__nm_start_reading(isc_nmsocket_t *sock); 1565 void 1566 isc__nm_stop_reading(isc_nmsocket_t *sock); 1567 isc_result_t 1568 isc__nm_process_sock_buffer(isc_nmsocket_t *sock); 1569 void 1570 isc__nm_resume_processing(void *arg); 1571 bool 1572 isc__nmsocket_closing(isc_nmsocket_t *sock); 1573 bool 1574 isc__nm_closing(isc_nmsocket_t *sock); 1575 1576 void 1577 isc__nm_alloc_dnsbuf(isc_nmsocket_t *sock, size_t len); 1578 1579 void 1580 isc__nm_failed_send_cb(isc_nmsocket_t *sock, isc__nm_uvreq_t *req, 1581 isc_result_t eresult); 1582 void 1583 isc__nm_failed_accept_cb(isc_nmsocket_t *sock, isc_result_t eresult); 1584 void 1585 isc__nm_failed_connect_cb(isc_nmsocket_t *sock, isc__nm_uvreq_t *req, 1586 isc_result_t eresult, bool async); 1587 void 1588 isc__nm_failed_read_cb(isc_nmsocket_t *sock, isc_result_t result, bool async); 1589 1590 void 1591 isc__nm_accept_connection_log(isc_result_t result, bool can_log_quota); 1592 1593 /* 1594 * Timeout callbacks 1595 */ 1596 void 1597 isc__nmsocket_connecttimeout_cb(uv_timer_t *timer); 1598 void 1599 isc__nmsocket_readtimeout_cb(uv_timer_t *timer); 1600 void 1601 isc__nmsocket_writetimeout_cb(void *data, isc_result_t eresult); 1602 1603 /*%< 1604 * 1605 * Maximum number of simultaneous handles in flight supported for a single 1606 * connected TCPDNS socket. This value was chosen arbitrarily, and may be 1607 * changed in the future. 1608 */ 1609 #define STREAM_CLIENTS_PER_CONN 23 1610 1611 #define UV_RUNTIME_CHECK(func, ret) \ 1612 if (ret != 0) { \ 1613 isc_error_fatal(__FILE__, __LINE__, "%s failed: %s\n", #func, \ 1614 uv_strerror(ret)); \ 1615 } 1616