1 /* $NetBSD: dispatch.c,v 1.9 2023/01/25 21:43:30 christos Exp $ */
2
3 /*
4 * Copyright (C) Internet Systems Consortium, Inc. ("ISC")
5 *
6 * SPDX-License-Identifier: MPL-2.0
7 *
8 * This Source Code Form is subject to the terms of the Mozilla Public
9 * License, v. 2.0. If a copy of the MPL was not distributed with this
10 * file, you can obtain one at https://mozilla.org/MPL/2.0/.
11 *
12 * See the COPYRIGHT file distributed with this work for additional
13 * information regarding copyright ownership.
14 */
15
16 /*! \file */
17
18 #include <inttypes.h>
19 #include <stdbool.h>
20 #include <stdlib.h>
21 #include <sys/types.h>
22 #include <unistd.h>
23
24 #include <isc/mem.h>
25 #include <isc/mutex.h>
26 #include <isc/portset.h>
27 #include <isc/print.h>
28 #include <isc/random.h>
29 #include <isc/socket.h>
30 #include <isc/stats.h>
31 #include <isc/string.h>
32 #include <isc/task.h>
33 #include <isc/time.h>
34 #include <isc/util.h>
35
36 #include <dns/acl.h>
37 #include <dns/dispatch.h>
38 #include <dns/events.h>
39 #include <dns/log.h>
40 #include <dns/message.h>
41 #include <dns/portlist.h>
42 #include <dns/stats.h>
43 #include <dns/tcpmsg.h>
44 #include <dns/types.h>
45
46 typedef ISC_LIST(dns_dispentry_t) dns_displist_t;
47
48 typedef struct dispsocket dispsocket_t;
49 typedef ISC_LIST(dispsocket_t) dispsocketlist_t;
50
51 typedef struct dispportentry dispportentry_t;
52 typedef ISC_LIST(dispportentry_t) dispportlist_t;
53
54 typedef struct dns_qid {
55 unsigned int magic;
56 unsigned int qid_nbuckets; /*%< hash table size */
57 unsigned int qid_increment; /*%< id increment on collision */
58 isc_mutex_t lock;
59 dns_displist_t *qid_table; /*%< the table itself */
60 dispsocketlist_t *sock_table; /*%< socket table */
61 } dns_qid_t;
62
63 struct dns_dispatchmgr {
64 /* Unlocked. */
65 unsigned int magic;
66 isc_mem_t *mctx;
67 dns_acl_t *blackhole;
68 dns_portlist_t *portlist;
69 isc_stats_t *stats;
70
71 /* Locked by "lock". */
72 isc_mutex_t lock;
73 unsigned int state;
74 ISC_LIST(dns_dispatch_t) list;
75
76 /* locked by buffer_lock */
77 dns_qid_t *qid;
78 isc_mutex_t buffer_lock;
79 unsigned int buffers; /*%< allocated buffers */
80 unsigned int buffersize; /*%< size of each buffer */
81 unsigned int maxbuffers; /*%< max buffers */
82
83 isc_refcount_t irefs;
84
85 /*%
86 * Locked by qid->lock if qid exists; otherwise, can be used without
87 * being locked.
88 * Memory footprint considerations: this is a simple implementation of
89 * available ports, i.e., an ordered array of the actual port numbers.
90 * This will require about 256KB of memory in the worst case (128KB for
91 * each of IPv4 and IPv6). We could reduce it by representing it as a
92 * more sophisticated way such as a list (or array) of ranges that are
93 * searched to identify a specific port. Our decision here is the saved
94 * memory isn't worth the implementation complexity, considering the
95 * fact that the whole BIND9 process (which is mainly named) already
96 * requires a pretty large memory footprint. We may, however, have to
97 * revisit the decision when we want to use it as a separate module for
98 * an environment where memory requirement is severer.
99 */
100 in_port_t *v4ports; /*%< available ports for IPv4 */
101 unsigned int nv4ports; /*%< # of available ports for IPv4 */
102 in_port_t *v6ports; /*%< available ports for IPv4 */
103 unsigned int nv6ports; /*%< # of available ports for IPv4 */
104 };
105
106 #define MGR_SHUTTINGDOWN 0x00000001U
107 #define MGR_IS_SHUTTINGDOWN(l) (((l)->state & MGR_SHUTTINGDOWN) != 0)
108
109 #define IS_PRIVATE(d) (((d)->attributes & DNS_DISPATCHATTR_PRIVATE) != 0)
110
111 struct dns_dispentry {
112 unsigned int magic;
113 dns_dispatch_t *disp;
114 dns_messageid_t id;
115 in_port_t port;
116 unsigned int bucket;
117 isc_sockaddr_t host;
118 isc_task_t *task;
119 isc_taskaction_t action;
120 void *arg;
121 bool item_out;
122 dispsocket_t *dispsocket;
123 ISC_LIST(dns_dispatchevent_t) items;
124 ISC_LINK(dns_dispentry_t) link;
125 };
126
127 /*%
128 * Maximum number of dispatch sockets that can be pooled for reuse. The
129 * appropriate value may vary, but experiments have shown a busy caching server
130 * may need more than 1000 sockets concurrently opened. The maximum allowable
131 * number of dispatch sockets (per manager) will be set to the double of this
132 * value.
133 */
134 #ifndef DNS_DISPATCH_POOLSOCKS
135 #define DNS_DISPATCH_POOLSOCKS 2048
136 #endif /* ifndef DNS_DISPATCH_POOLSOCKS */
137
138 /*%
139 * Quota to control the number of dispatch sockets. If a dispatch has more
140 * than the quota of sockets, new queries will purge oldest ones, so that
141 * a massive number of outstanding queries won't prevent subsequent queries
142 * (especially if the older ones take longer time and result in timeout).
143 */
144 #ifndef DNS_DISPATCH_SOCKSQUOTA
145 #define DNS_DISPATCH_SOCKSQUOTA 3072
146 #endif /* ifndef DNS_DISPATCH_SOCKSQUOTA */
147
148 struct dispsocket {
149 unsigned int magic;
150 isc_socket_t *socket;
151 dns_dispatch_t *disp;
152 isc_sockaddr_t host;
153 in_port_t localport; /* XXX: should be removed later */
154 dispportentry_t *portentry;
155 dns_dispentry_t *resp;
156 isc_task_t *task;
157 ISC_LINK(dispsocket_t) link;
158 unsigned int bucket;
159 ISC_LINK(dispsocket_t) blink;
160 };
161
162 /*%
163 * A port table entry. We remember every port we first open in a table with a
164 * reference counter so that we can 'reuse' the same port (with different
165 * destination addresses) using the SO_REUSEADDR socket option.
166 */
167 struct dispportentry {
168 in_port_t port;
169 isc_refcount_t refs;
170 ISC_LINK(struct dispportentry) link;
171 };
172
173 #ifndef DNS_DISPATCH_PORTTABLESIZE
174 #define DNS_DISPATCH_PORTTABLESIZE 1024
175 #endif /* ifndef DNS_DISPATCH_PORTTABLESIZE */
176
177 #define INVALID_BUCKET (0xffffdead)
178
179 /*%
180 * Number of tasks for each dispatch that use separate sockets for different
181 * transactions. This must be a power of 2 as it will divide 32 bit numbers
182 * to get an uniformly random tasks selection. See get_dispsocket().
183 */
184 #define MAX_INTERNAL_TASKS 64
185
186 struct dns_dispatch {
187 /* Unlocked. */
188 unsigned int magic; /*%< magic */
189 dns_dispatchmgr_t *mgr; /*%< dispatch manager */
190 int ntasks;
191 /*%
192 * internal task buckets. We use multiple tasks to distribute various
193 * socket events well when using separate dispatch sockets. We use the
194 * 1st task (task[0]) for internal control events.
195 */
196 isc_task_t *task[MAX_INTERNAL_TASKS];
197 isc_socket_t *socket; /*%< isc socket attached to */
198 isc_sockaddr_t local; /*%< local address */
199 in_port_t localport; /*%< local UDP port */
200 isc_sockaddr_t peer; /*%< peer address (TCP) */
201 isc_dscp_t dscp; /*%< "listen-on" DSCP value */
202 unsigned int maxrequests; /*%< max requests */
203 isc_event_t *ctlevent;
204
205 isc_mem_t *sepool; /*%< pool for socket events */
206
207 /*% Locked by mgr->lock. */
208 ISC_LINK(dns_dispatch_t) link;
209
210 /* Locked by "lock". */
211 isc_mutex_t lock; /*%< locks all below */
212 isc_sockettype_t socktype;
213 unsigned int attributes;
214 unsigned int refcount; /*%< number of users */
215 dns_dispatchevent_t *failsafe_ev; /*%< failsafe cancel event */
216 unsigned int shutting_down : 1, shutdown_out : 1, connected : 1,
217 tcpmsg_valid : 1, recv_pending : 1; /*%< is a
218 * recv()
219 * pending?
220 * */
221 isc_result_t shutdown_why;
222 ISC_LIST(dispsocket_t) activesockets;
223 ISC_LIST(dispsocket_t) inactivesockets;
224 unsigned int nsockets;
225 unsigned int requests; /*%< how many requests we have */
226 unsigned int tcpbuffers; /*%< allocated buffers */
227 dns_tcpmsg_t tcpmsg; /*%< for tcp streams */
228 dns_qid_t *qid;
229 dispportlist_t *port_table; /*%< hold ports 'owned' by us */
230 };
231
232 #define QID_MAGIC ISC_MAGIC('Q', 'i', 'd', ' ')
233 #define VALID_QID(e) ISC_MAGIC_VALID((e), QID_MAGIC)
234
235 #define RESPONSE_MAGIC ISC_MAGIC('D', 'r', 's', 'p')
236 #define VALID_RESPONSE(e) ISC_MAGIC_VALID((e), RESPONSE_MAGIC)
237
238 #define DISPSOCK_MAGIC ISC_MAGIC('D', 's', 'o', 'c')
239 #define VALID_DISPSOCK(e) ISC_MAGIC_VALID((e), DISPSOCK_MAGIC)
240
241 #define DISPATCH_MAGIC ISC_MAGIC('D', 'i', 's', 'p')
242 #define VALID_DISPATCH(e) ISC_MAGIC_VALID((e), DISPATCH_MAGIC)
243
244 #define DNS_DISPATCHMGR_MAGIC ISC_MAGIC('D', 'M', 'g', 'r')
245 #define VALID_DISPATCHMGR(e) ISC_MAGIC_VALID((e), DNS_DISPATCHMGR_MAGIC)
246
247 #define DNS_QID(disp) \
248 ((disp)->socktype == isc_sockettype_tcp) ? (disp)->qid \
249 : (disp)->mgr->qid
250
251 /*%
252 * Locking a query port buffer is a bit tricky. We access the buffer without
253 * locking until qid is created. Technically, there is a possibility of race
254 * between the creation of qid and access to the port buffer; in practice,
255 * however, this should be safe because qid isn't created until the first
256 * dispatch is created and there should be no contending situation until then.
257 */
258 #define PORTBUFLOCK(mgr) \
259 if ((mgr)->qid != NULL) \
260 LOCK(&((mgr)->qid->lock))
261 #define PORTBUFUNLOCK(mgr) \
262 if ((mgr)->qid != NULL) \
263 UNLOCK((&(mgr)->qid->lock))
264
265 /*
266 * Statics.
267 */
268 static dns_dispentry_t *
269 entry_search(dns_qid_t *, const isc_sockaddr_t *, dns_messageid_t, in_port_t,
270 unsigned int);
271 static bool
272 destroy_disp_ok(dns_dispatch_t *);
273 static void
274 destroy_disp(isc_task_t *task, isc_event_t *event);
275 static void
276 destroy_dispsocket(dns_dispatch_t *, dispsocket_t **);
277 static void
278 deactivate_dispsocket(dns_dispatch_t *, dispsocket_t *);
279 static void
280 udp_exrecv(isc_task_t *, isc_event_t *);
281 static void
282 udp_shrecv(isc_task_t *, isc_event_t *);
283 static void
284 udp_recv(isc_event_t *, dns_dispatch_t *, dispsocket_t *);
285 static void
286 tcp_recv(isc_task_t *, isc_event_t *);
287 static isc_result_t
288 startrecv(dns_dispatch_t *, dispsocket_t *);
289 static uint32_t
290 dns_hash(dns_qid_t *, const isc_sockaddr_t *, dns_messageid_t, in_port_t);
291 static void
292 free_buffer(dns_dispatch_t *disp, void *buf, unsigned int len);
293 static void *
294 allocate_udp_buffer(dns_dispatch_t *disp);
295 static void
296 free_devent(dns_dispatch_t *disp, dns_dispatchevent_t *ev);
297 static dns_dispatchevent_t *
298 allocate_devent(dns_dispatch_t *disp);
299 static void
300 do_cancel(dns_dispatch_t *disp);
301 static dns_dispentry_t *
302 linear_first(dns_qid_t *disp);
303 static dns_dispentry_t *
304 linear_next(dns_qid_t *disp, dns_dispentry_t *resp);
305 static void
306 dispatch_free(dns_dispatch_t **dispp);
307 static isc_result_t
308 get_udpsocket(dns_dispatchmgr_t *mgr, dns_dispatch_t *disp,
309 isc_socketmgr_t *sockmgr, const isc_sockaddr_t *localaddr,
310 isc_socket_t **sockp, isc_socket_t *dup_socket, bool duponly);
311 static isc_result_t
312 dispatch_createudp(dns_dispatchmgr_t *mgr, isc_socketmgr_t *sockmgr,
313 isc_taskmgr_t *taskmgr, const isc_sockaddr_t *localaddr,
314 unsigned int maxrequests, unsigned int attributes,
315 dns_dispatch_t **dispp, isc_socket_t *dup_socket);
316 static bool
317 destroy_mgr_ok(dns_dispatchmgr_t *mgr);
318 static void
319 destroy_mgr(dns_dispatchmgr_t **mgrp);
320 static isc_result_t
321 qid_allocate(dns_dispatchmgr_t *mgr, unsigned int buckets,
322 unsigned int increment, dns_qid_t **qidp, bool needaddrtable);
323 static void
324 qid_destroy(isc_mem_t *mctx, dns_qid_t **qidp);
325 static isc_result_t
326 open_socket(isc_socketmgr_t *mgr, const isc_sockaddr_t *local,
327 unsigned int options, isc_socket_t **sockp,
328 isc_socket_t *dup_socket, bool duponly);
329 static bool
330 portavailable(dns_dispatchmgr_t *mgr, isc_socket_t *sock,
331 isc_sockaddr_t *sockaddrp);
332
333 #define LVL(x) ISC_LOG_DEBUG(x)
334
335 static void
336 mgr_log(dns_dispatchmgr_t *mgr, int level, const char *fmt, ...)
337 ISC_FORMAT_PRINTF(3, 4);
338
339 static void
mgr_log(dns_dispatchmgr_t * mgr,int level,const char * fmt,...)340 mgr_log(dns_dispatchmgr_t *mgr, int level, const char *fmt, ...) {
341 char msgbuf[2048];
342 va_list ap;
343
344 if (!isc_log_wouldlog(dns_lctx, level)) {
345 return;
346 }
347
348 va_start(ap, fmt);
349 vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
350 va_end(ap);
351
352 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DISPATCH,
353 DNS_LOGMODULE_DISPATCH, level, "dispatchmgr %p: %s", mgr,
354 msgbuf);
355 }
356
357 static void
inc_stats(dns_dispatchmgr_t * mgr,isc_statscounter_t counter)358 inc_stats(dns_dispatchmgr_t *mgr, isc_statscounter_t counter) {
359 if (mgr->stats != NULL) {
360 isc_stats_increment(mgr->stats, counter);
361 }
362 }
363
364 static void
dec_stats(dns_dispatchmgr_t * mgr,isc_statscounter_t counter)365 dec_stats(dns_dispatchmgr_t *mgr, isc_statscounter_t counter) {
366 if (mgr->stats != NULL) {
367 isc_stats_decrement(mgr->stats, counter);
368 }
369 }
370
371 static void
372 dispatch_log(dns_dispatch_t *disp, int level, const char *fmt, ...)
373 ISC_FORMAT_PRINTF(3, 4);
374
375 static void
dispatch_log(dns_dispatch_t * disp,int level,const char * fmt,...)376 dispatch_log(dns_dispatch_t *disp, int level, const char *fmt, ...) {
377 char msgbuf[2048];
378 va_list ap;
379
380 if (!isc_log_wouldlog(dns_lctx, level)) {
381 return;
382 }
383
384 va_start(ap, fmt);
385 vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
386 va_end(ap);
387
388 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DISPATCH,
389 DNS_LOGMODULE_DISPATCH, level, "dispatch %p: %s", disp,
390 msgbuf);
391 }
392
393 static void
394 request_log(dns_dispatch_t *disp, dns_dispentry_t *resp, int level,
395 const char *fmt, ...) ISC_FORMAT_PRINTF(4, 5);
396
397 static void
request_log(dns_dispatch_t * disp,dns_dispentry_t * resp,int level,const char * fmt,...)398 request_log(dns_dispatch_t *disp, dns_dispentry_t *resp, int level,
399 const char *fmt, ...) {
400 char msgbuf[2048];
401 char peerbuf[256];
402 va_list ap;
403
404 if (!isc_log_wouldlog(dns_lctx, level)) {
405 return;
406 }
407
408 va_start(ap, fmt);
409 vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
410 va_end(ap);
411
412 if (VALID_RESPONSE(resp)) {
413 isc_sockaddr_format(&resp->host, peerbuf, sizeof(peerbuf));
414 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DISPATCH,
415 DNS_LOGMODULE_DISPATCH, level,
416 "dispatch %p response %p %s: %s", disp, resp,
417 peerbuf, msgbuf);
418 } else {
419 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DISPATCH,
420 DNS_LOGMODULE_DISPATCH, level,
421 "dispatch %p req/resp %p: %s", disp, resp,
422 msgbuf);
423 }
424 }
425
426 /*
427 * Return a hash of the destination and message id.
428 */
429 static uint32_t
dns_hash(dns_qid_t * qid,const isc_sockaddr_t * dest,dns_messageid_t id,in_port_t port)430 dns_hash(dns_qid_t *qid, const isc_sockaddr_t *dest, dns_messageid_t id,
431 in_port_t port) {
432 uint32_t ret;
433
434 ret = isc_sockaddr_hash(dest, true);
435 ret ^= ((uint32_t)id << 16) | port;
436 ret %= qid->qid_nbuckets;
437
438 INSIST(ret < qid->qid_nbuckets);
439
440 return (ret);
441 }
442
443 /*
444 * Find the first entry in 'qid'. Returns NULL if there are no entries.
445 */
446 static dns_dispentry_t *
linear_first(dns_qid_t * qid)447 linear_first(dns_qid_t *qid) {
448 dns_dispentry_t *ret;
449 unsigned int bucket;
450
451 bucket = 0;
452
453 while (bucket < qid->qid_nbuckets) {
454 ret = ISC_LIST_HEAD(qid->qid_table[bucket]);
455 if (ret != NULL) {
456 return (ret);
457 }
458 bucket++;
459 }
460
461 return (NULL);
462 }
463
464 /*
465 * Find the next entry after 'resp' in 'qid'. Return NULL if there are
466 * no more entries.
467 */
468 static dns_dispentry_t *
linear_next(dns_qid_t * qid,dns_dispentry_t * resp)469 linear_next(dns_qid_t *qid, dns_dispentry_t *resp) {
470 dns_dispentry_t *ret;
471 unsigned int bucket;
472
473 ret = ISC_LIST_NEXT(resp, link);
474 if (ret != NULL) {
475 return (ret);
476 }
477
478 bucket = resp->bucket;
479 bucket++;
480 while (bucket < qid->qid_nbuckets) {
481 ret = ISC_LIST_HEAD(qid->qid_table[bucket]);
482 if (ret != NULL) {
483 return (ret);
484 }
485 bucket++;
486 }
487
488 return (NULL);
489 }
490
491 /*
492 * The dispatch must be locked.
493 */
494 static bool
destroy_disp_ok(dns_dispatch_t * disp)495 destroy_disp_ok(dns_dispatch_t *disp) {
496 if (disp->refcount != 0) {
497 return (false);
498 }
499
500 if (disp->recv_pending != 0) {
501 return (false);
502 }
503
504 if (!ISC_LIST_EMPTY(disp->activesockets)) {
505 return (false);
506 }
507
508 if (disp->shutting_down == 0) {
509 return (false);
510 }
511
512 return (true);
513 }
514
515 /*
516 * Called when refcount reaches 0 (and safe to destroy).
517 *
518 * The dispatcher must be locked.
519 * The manager must not be locked.
520 */
521 static void
destroy_disp(isc_task_t * task,isc_event_t * event)522 destroy_disp(isc_task_t *task, isc_event_t *event) {
523 dns_dispatch_t *disp;
524 dns_dispatchmgr_t *mgr;
525 bool killmgr;
526 dispsocket_t *dispsocket;
527 int i;
528
529 INSIST(event->ev_type == DNS_EVENT_DISPATCHCONTROL);
530
531 UNUSED(task);
532
533 disp = event->ev_arg;
534 mgr = disp->mgr;
535
536 LOCK(&mgr->lock);
537 ISC_LIST_UNLINK(mgr->list, disp, link);
538
539 dispatch_log(disp, LVL(90),
540 "shutting down; detaching from sock %p, task %p",
541 disp->socket, disp->task[0]); /* XXXX */
542
543 if (disp->sepool != NULL) {
544 isc_mem_destroy(&disp->sepool);
545 }
546
547 if (disp->socket != NULL) {
548 isc_socket_detach(&disp->socket);
549 }
550 while ((dispsocket = ISC_LIST_HEAD(disp->inactivesockets)) != NULL) {
551 ISC_LIST_UNLINK(disp->inactivesockets, dispsocket, link);
552 destroy_dispsocket(disp, &dispsocket);
553 }
554 for (i = 0; i < disp->ntasks; i++) {
555 isc_task_detach(&disp->task[i]);
556 }
557 isc_event_free(&event);
558
559 dispatch_free(&disp);
560
561 killmgr = destroy_mgr_ok(mgr);
562 UNLOCK(&mgr->lock);
563 if (killmgr) {
564 destroy_mgr(&mgr);
565 }
566 }
567
568 /*%
569 * Manipulate port table per dispatch: find an entry for a given port number,
570 * create a new entry, and decrement a given entry with possible clean-up.
571 */
572 static dispportentry_t *
port_search(dns_dispatch_t * disp,in_port_t port)573 port_search(dns_dispatch_t *disp, in_port_t port) {
574 dispportentry_t *portentry;
575
576 REQUIRE(disp->port_table != NULL);
577
578 portentry = ISC_LIST_HEAD(
579 disp->port_table[port % DNS_DISPATCH_PORTTABLESIZE]);
580 while (portentry != NULL) {
581 if (portentry->port == port) {
582 return (portentry);
583 }
584 portentry = ISC_LIST_NEXT(portentry, link);
585 }
586
587 return (NULL);
588 }
589
590 static dispportentry_t *
new_portentry(dns_dispatch_t * disp,in_port_t port)591 new_portentry(dns_dispatch_t *disp, in_port_t port) {
592 dispportentry_t *portentry;
593 dns_qid_t *qid;
594
595 REQUIRE(disp->port_table != NULL);
596
597 portentry = isc_mem_get(disp->mgr->mctx, sizeof(*portentry));
598
599 portentry->port = port;
600 isc_refcount_init(&portentry->refs, 1);
601 ISC_LINK_INIT(portentry, link);
602 qid = DNS_QID(disp);
603 LOCK(&qid->lock);
604 ISC_LIST_APPEND(disp->port_table[port % DNS_DISPATCH_PORTTABLESIZE],
605 portentry, link);
606 UNLOCK(&qid->lock);
607
608 return (portentry);
609 }
610
611 /*%
612 * The caller must hold the qid->lock.
613 */
614 static void
deref_portentry(dns_dispatch_t * disp,dispportentry_t ** portentryp)615 deref_portentry(dns_dispatch_t *disp, dispportentry_t **portentryp) {
616 dispportentry_t *portentry = *portentryp;
617 *portentryp = NULL;
618
619 REQUIRE(disp->port_table != NULL);
620 REQUIRE(portentry != NULL);
621
622 if (isc_refcount_decrement(&portentry->refs) == 1) {
623 ISC_LIST_UNLINK(disp->port_table[portentry->port %
624 DNS_DISPATCH_PORTTABLESIZE],
625 portentry, link);
626 isc_mem_put(disp->mgr->mctx, portentry, sizeof(*portentry));
627 }
628 }
629
630 /*%
631 * Find a dispsocket for socket address 'dest', and port number 'port'.
632 * Return NULL if no such entry exists. Requires qid->lock to be held.
633 */
634 static dispsocket_t *
socket_search(dns_qid_t * qid,const isc_sockaddr_t * dest,in_port_t port,unsigned int bucket)635 socket_search(dns_qid_t *qid, const isc_sockaddr_t *dest, in_port_t port,
636 unsigned int bucket) {
637 dispsocket_t *dispsock;
638
639 REQUIRE(VALID_QID(qid));
640 REQUIRE(bucket < qid->qid_nbuckets);
641
642 dispsock = ISC_LIST_HEAD(qid->sock_table[bucket]);
643
644 while (dispsock != NULL) {
645 if (dispsock->portentry != NULL &&
646 dispsock->portentry->port == port &&
647 isc_sockaddr_equal(dest, &dispsock->host))
648 {
649 return (dispsock);
650 }
651 dispsock = ISC_LIST_NEXT(dispsock, blink);
652 }
653
654 return (NULL);
655 }
656
657 /*%
658 * Make a new socket for a single dispatch with a random port number.
659 * The caller must hold the disp->lock
660 */
661 static isc_result_t
get_dispsocket(dns_dispatch_t * disp,const isc_sockaddr_t * dest,isc_socketmgr_t * sockmgr,dispsocket_t ** dispsockp,in_port_t * portp)662 get_dispsocket(dns_dispatch_t *disp, const isc_sockaddr_t *dest,
663 isc_socketmgr_t *sockmgr, dispsocket_t **dispsockp,
664 in_port_t *portp) {
665 int i;
666 dns_dispatchmgr_t *mgr = disp->mgr;
667 isc_socket_t *sock = NULL;
668 isc_result_t result = ISC_R_FAILURE;
669 in_port_t port;
670 isc_sockaddr_t localaddr;
671 unsigned int bucket = 0;
672 dispsocket_t *dispsock;
673 unsigned int nports;
674 in_port_t *ports;
675 isc_socket_options_t bindoptions;
676 dispportentry_t *portentry = NULL;
677 dns_qid_t *qid;
678
679 if (isc_sockaddr_pf(&disp->local) == AF_INET) {
680 nports = disp->mgr->nv4ports;
681 ports = disp->mgr->v4ports;
682 } else {
683 nports = disp->mgr->nv6ports;
684 ports = disp->mgr->v6ports;
685 }
686 if (nports == 0) {
687 return (ISC_R_ADDRNOTAVAIL);
688 }
689
690 dispsock = ISC_LIST_HEAD(disp->inactivesockets);
691 if (dispsock != NULL) {
692 ISC_LIST_UNLINK(disp->inactivesockets, dispsock, link);
693 sock = dispsock->socket;
694 dispsock->socket = NULL;
695 } else {
696 dispsock = isc_mem_get(mgr->mctx, sizeof(*dispsock));
697
698 disp->nsockets++;
699 dispsock->socket = NULL;
700 dispsock->disp = disp;
701 dispsock->resp = NULL;
702 dispsock->portentry = NULL;
703 dispsock->task = NULL;
704 isc_task_attach(disp->task[isc_random_uniform(disp->ntasks)],
705 &dispsock->task);
706 ISC_LINK_INIT(dispsock, link);
707 ISC_LINK_INIT(dispsock, blink);
708 dispsock->magic = DISPSOCK_MAGIC;
709 }
710
711 /*
712 * Pick up a random UDP port and open a new socket with it. Avoid
713 * choosing ports that share the same destination because it will be
714 * very likely to fail in bind(2) or connect(2).
715 */
716 localaddr = disp->local;
717 qid = DNS_QID(disp);
718
719 for (i = 0; i < 64; i++) {
720 port = ports[isc_random_uniform(nports)];
721 isc_sockaddr_setport(&localaddr, port);
722
723 LOCK(&qid->lock);
724 bucket = dns_hash(qid, dest, 0, port);
725 if (socket_search(qid, dest, port, bucket) != NULL) {
726 UNLOCK(&qid->lock);
727 continue;
728 }
729 UNLOCK(&qid->lock);
730 bindoptions = 0;
731 portentry = port_search(disp, port);
732
733 if (portentry != NULL) {
734 bindoptions |= ISC_SOCKET_REUSEADDRESS;
735 }
736 result = open_socket(sockmgr, &localaddr, bindoptions, &sock,
737 NULL, false);
738 if (result == ISC_R_SUCCESS) {
739 if (portentry == NULL) {
740 portentry = new_portentry(disp, port);
741 if (portentry == NULL) {
742 result = ISC_R_NOMEMORY;
743 break;
744 }
745 } else {
746 isc_refcount_increment(&portentry->refs);
747 }
748 break;
749 } else if (result == ISC_R_NOPERM) {
750 char buf[ISC_SOCKADDR_FORMATSIZE];
751 isc_sockaddr_format(&localaddr, buf, sizeof(buf));
752 dispatch_log(disp, ISC_LOG_WARNING,
753 "open_socket(%s) -> %s: continuing", buf,
754 isc_result_totext(result));
755 } else if (result != ISC_R_ADDRINUSE) {
756 break;
757 }
758 }
759
760 if (result == ISC_R_SUCCESS) {
761 dispsock->socket = sock;
762 dispsock->host = *dest;
763 dispsock->bucket = bucket;
764 LOCK(&qid->lock);
765 dispsock->portentry = portentry;
766 ISC_LIST_APPEND(qid->sock_table[bucket], dispsock, blink);
767 UNLOCK(&qid->lock);
768 *dispsockp = dispsock;
769 *portp = port;
770 } else {
771 /*
772 * We could keep it in the inactive list, but since this should
773 * be an exceptional case and might be resource shortage, we'd
774 * rather destroy it.
775 */
776 if (sock != NULL) {
777 isc_socket_detach(&sock);
778 }
779 destroy_dispsocket(disp, &dispsock);
780 }
781
782 return (result);
783 }
784
785 /*%
786 * Destroy a dedicated dispatch socket.
787 */
788 static void
destroy_dispsocket(dns_dispatch_t * disp,dispsocket_t ** dispsockp)789 destroy_dispsocket(dns_dispatch_t *disp, dispsocket_t **dispsockp) {
790 dispsocket_t *dispsock;
791 dns_qid_t *qid = DNS_QID(disp);
792
793 /*
794 * The dispatch must be locked.
795 */
796
797 REQUIRE(dispsockp != NULL && *dispsockp != NULL);
798 dispsock = *dispsockp;
799 *dispsockp = NULL;
800 REQUIRE(!ISC_LINK_LINKED(dispsock, link));
801
802 disp->nsockets--;
803 dispsock->magic = 0;
804 if (dispsock->portentry != NULL) {
805 /* socket_search() tests and dereferences portentry. */
806 LOCK(&qid->lock);
807 deref_portentry(disp, &dispsock->portentry);
808 UNLOCK(&qid->lock);
809 }
810 if (dispsock->socket != NULL) {
811 isc_socket_detach(&dispsock->socket);
812 }
813 if (ISC_LINK_LINKED(dispsock, blink)) {
814 LOCK(&qid->lock);
815 ISC_LIST_UNLINK(qid->sock_table[dispsock->bucket], dispsock,
816 blink);
817 UNLOCK(&qid->lock);
818 }
819 if (dispsock->task != NULL) {
820 isc_task_detach(&dispsock->task);
821 }
822 isc_mem_put(disp->mgr->mctx, dispsock, sizeof(*dispsock));
823 }
824
825 /*%
826 * Deactivate a dedicated dispatch socket. Move it to the inactive list for
827 * future reuse unless the total number of sockets are exceeding the maximum.
828 */
829 static void
deactivate_dispsocket(dns_dispatch_t * disp,dispsocket_t * dispsock)830 deactivate_dispsocket(dns_dispatch_t *disp, dispsocket_t *dispsock) {
831 isc_result_t result;
832 dns_qid_t *qid = DNS_QID(disp);
833
834 /*
835 * The dispatch must be locked.
836 */
837 ISC_LIST_UNLINK(disp->activesockets, dispsock, link);
838 if (dispsock->resp != NULL) {
839 INSIST(dispsock->resp->dispsocket == dispsock);
840 dispsock->resp->dispsocket = NULL;
841 }
842
843 INSIST(dispsock->portentry != NULL);
844 /* socket_search() tests and dereferences portentry. */
845 LOCK(&qid->lock);
846 deref_portentry(disp, &dispsock->portentry);
847 UNLOCK(&qid->lock);
848
849 if (disp->nsockets > DNS_DISPATCH_POOLSOCKS) {
850 destroy_dispsocket(disp, &dispsock);
851 } else {
852 result = isc_socket_close(dispsock->socket);
853
854 LOCK(&qid->lock);
855 ISC_LIST_UNLINK(qid->sock_table[dispsock->bucket], dispsock,
856 blink);
857 UNLOCK(&qid->lock);
858
859 if (result == ISC_R_SUCCESS) {
860 ISC_LIST_APPEND(disp->inactivesockets, dispsock, link);
861 } else {
862 /*
863 * If the underlying system does not allow this
864 * optimization, destroy this temporary structure (and
865 * create a new one for a new transaction).
866 */
867 INSIST(result == ISC_R_NOTIMPLEMENTED);
868 destroy_dispsocket(disp, &dispsock);
869 }
870 }
871 }
872
873 /*
874 * Find an entry for query ID 'id', socket address 'dest', and port number
875 * 'port'.
876 * Return NULL if no such entry exists.
877 */
878 static dns_dispentry_t *
entry_search(dns_qid_t * qid,const isc_sockaddr_t * dest,dns_messageid_t id,in_port_t port,unsigned int bucket)879 entry_search(dns_qid_t *qid, const isc_sockaddr_t *dest, dns_messageid_t id,
880 in_port_t port, unsigned int bucket) {
881 dns_dispentry_t *res;
882
883 REQUIRE(VALID_QID(qid));
884 REQUIRE(bucket < qid->qid_nbuckets);
885
886 res = ISC_LIST_HEAD(qid->qid_table[bucket]);
887
888 while (res != NULL) {
889 if (res->id == id && isc_sockaddr_equal(dest, &res->host) &&
890 res->port == port)
891 {
892 return (res);
893 }
894 res = ISC_LIST_NEXT(res, link);
895 }
896
897 return (NULL);
898 }
899
900 static void
free_buffer(dns_dispatch_t * disp,void * buf,unsigned int len)901 free_buffer(dns_dispatch_t *disp, void *buf, unsigned int len) {
902 unsigned int buffersize;
903 INSIST(buf != NULL && len != 0);
904
905 switch (disp->socktype) {
906 case isc_sockettype_tcp:
907 INSIST(disp->tcpbuffers > 0);
908 disp->tcpbuffers--;
909 isc_mem_put(disp->mgr->mctx, buf, len);
910 break;
911 case isc_sockettype_udp:
912 LOCK(&disp->mgr->buffer_lock);
913 INSIST(disp->mgr->buffers > 0);
914 INSIST(len == disp->mgr->buffersize);
915 disp->mgr->buffers--;
916 buffersize = disp->mgr->buffersize;
917 UNLOCK(&disp->mgr->buffer_lock);
918 isc_mem_put(disp->mgr->mctx, buf, buffersize);
919 break;
920 default:
921 UNREACHABLE();
922 }
923 }
924
925 static void *
allocate_udp_buffer(dns_dispatch_t * disp)926 allocate_udp_buffer(dns_dispatch_t *disp) {
927 unsigned int buffersize;
928
929 LOCK(&disp->mgr->buffer_lock);
930 if (disp->mgr->buffers >= disp->mgr->maxbuffers) {
931 UNLOCK(&disp->mgr->buffer_lock);
932 return (NULL);
933 }
934 buffersize = disp->mgr->buffersize;
935 disp->mgr->buffers++;
936 UNLOCK(&disp->mgr->buffer_lock);
937
938 return (isc_mem_get(disp->mgr->mctx, buffersize));
939 }
940
941 static void
free_sevent(isc_event_t * ev)942 free_sevent(isc_event_t *ev) {
943 isc_mem_t *pool = ev->ev_destroy_arg;
944 isc_socketevent_t *sev = (isc_socketevent_t *)ev;
945 isc_mem_put(pool, sev, sizeof(*sev));
946 }
947
948 static isc_socketevent_t *
allocate_sevent(dns_dispatch_t * disp,isc_socket_t * sock,isc_eventtype_t type,isc_taskaction_t action,const void * arg)949 allocate_sevent(dns_dispatch_t *disp, isc_socket_t *sock, isc_eventtype_t type,
950 isc_taskaction_t action, const void *arg) {
951 isc_socketevent_t *ev;
952 void *deconst_arg;
953
954 ev = isc_mem_get(disp->sepool, sizeof(*ev));
955 DE_CONST(arg, deconst_arg);
956 ISC_EVENT_INIT(ev, sizeof(*ev), 0, NULL, type, action, deconst_arg,
957 sock, free_sevent, disp->sepool);
958 ev->result = ISC_R_UNSET;
959 ISC_LINK_INIT(ev, ev_link);
960 ev->region.base = NULL;
961 ev->n = 0;
962 ev->offset = 0;
963 ev->attributes = 0;
964
965 return (ev);
966 }
967
968 static void
free_devent(dns_dispatch_t * disp,dns_dispatchevent_t * ev)969 free_devent(dns_dispatch_t *disp, dns_dispatchevent_t *ev) {
970 if (disp->failsafe_ev == ev) {
971 INSIST(disp->shutdown_out == 1);
972 disp->shutdown_out = 0;
973
974 return;
975 }
976
977 isc_refcount_decrement(&disp->mgr->irefs);
978 isc_mem_put(disp->mgr->mctx, ev, sizeof(*ev));
979 }
980
981 static dns_dispatchevent_t *
allocate_devent(dns_dispatch_t * disp)982 allocate_devent(dns_dispatch_t *disp) {
983 dns_dispatchevent_t *ev;
984
985 ev = isc_mem_get(disp->mgr->mctx, sizeof(*ev));
986 isc_refcount_increment0(&disp->mgr->irefs);
987 ISC_EVENT_INIT(ev, sizeof(*ev), 0, NULL, 0, NULL, NULL, NULL, NULL,
988 NULL);
989
990 return (ev);
991 }
992
993 static void
udp_exrecv(isc_task_t * task,isc_event_t * ev)994 udp_exrecv(isc_task_t *task, isc_event_t *ev) {
995 dispsocket_t *dispsock = ev->ev_arg;
996
997 UNUSED(task);
998
999 REQUIRE(VALID_DISPSOCK(dispsock));
1000 udp_recv(ev, dispsock->disp, dispsock);
1001 }
1002
1003 static void
udp_shrecv(isc_task_t * task,isc_event_t * ev)1004 udp_shrecv(isc_task_t *task, isc_event_t *ev) {
1005 dns_dispatch_t *disp = ev->ev_arg;
1006
1007 UNUSED(task);
1008
1009 REQUIRE(VALID_DISPATCH(disp));
1010 udp_recv(ev, disp, NULL);
1011 }
1012
1013 /*
1014 * General flow:
1015 *
1016 * If I/O result == CANCELED or error, free the buffer.
1017 *
1018 * If query, free the buffer, restart.
1019 *
1020 * If response:
1021 * Allocate event, fill in details.
1022 * If cannot allocate, free buffer, restart.
1023 * find target. If not found, free buffer, restart.
1024 * if event queue is not empty, queue. else, send.
1025 * restart.
1026 */
1027 static void
udp_recv(isc_event_t * ev_in,dns_dispatch_t * disp,dispsocket_t * dispsock)1028 udp_recv(isc_event_t *ev_in, dns_dispatch_t *disp, dispsocket_t *dispsock) {
1029 isc_socketevent_t *ev = (isc_socketevent_t *)ev_in;
1030 dns_messageid_t id;
1031 isc_result_t dres;
1032 isc_buffer_t source;
1033 unsigned int flags;
1034 dns_dispentry_t *resp = NULL;
1035 dns_dispatchevent_t *rev;
1036 unsigned int bucket;
1037 bool killit;
1038 bool queue_response;
1039 dns_dispatchmgr_t *mgr;
1040 dns_qid_t *qid;
1041 isc_netaddr_t netaddr;
1042 int match;
1043 int result;
1044 bool qidlocked = false;
1045
1046 LOCK(&disp->lock);
1047
1048 mgr = disp->mgr;
1049 qid = mgr->qid;
1050
1051 LOCK(&disp->mgr->buffer_lock);
1052 dispatch_log(disp, LVL(90),
1053 "got packet: requests %d, buffers %d, recvs %d",
1054 disp->requests, disp->mgr->buffers, disp->recv_pending);
1055 UNLOCK(&disp->mgr->buffer_lock);
1056
1057 if (dispsock == NULL && ev->ev_type == ISC_SOCKEVENT_RECVDONE) {
1058 /*
1059 * Unless the receive event was imported from a listening
1060 * interface, in which case the event type is
1061 * DNS_EVENT_IMPORTRECVDONE, receive operation must be pending.
1062 */
1063 INSIST(disp->recv_pending != 0);
1064 disp->recv_pending = 0;
1065 }
1066
1067 if (dispsock != NULL &&
1068 (ev->result == ISC_R_CANCELED || dispsock->resp == NULL))
1069 {
1070 /*
1071 * dispsock->resp can be NULL if this transaction was canceled
1072 * just after receiving a response. Since this socket is
1073 * exclusively used and there should be at most one receive
1074 * event the canceled event should have been no effect. So
1075 * we can (and should) deactivate the socket right now.
1076 */
1077 deactivate_dispsocket(disp, dispsock);
1078 dispsock = NULL;
1079 }
1080
1081 if (disp->shutting_down) {
1082 /*
1083 * This dispatcher is shutting down.
1084 */
1085 free_buffer(disp, ev->region.base, ev->region.length);
1086
1087 isc_event_free(&ev_in);
1088 ev = NULL;
1089
1090 killit = destroy_disp_ok(disp);
1091 UNLOCK(&disp->lock);
1092 if (killit) {
1093 isc_task_send(disp->task[0], &disp->ctlevent);
1094 }
1095
1096 return;
1097 }
1098
1099 if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0) {
1100 if (dispsock != NULL) {
1101 resp = dispsock->resp;
1102 id = resp->id;
1103 if (ev->result != ISC_R_SUCCESS) {
1104 /*
1105 * This is most likely a network error on a
1106 * connected socket. It makes no sense to
1107 * check the address or parse the packet, but it
1108 * will help to return the error to the caller.
1109 */
1110 goto sendresponse;
1111 }
1112 } else {
1113 free_buffer(disp, ev->region.base, ev->region.length);
1114
1115 isc_event_free(&ev_in);
1116 UNLOCK(&disp->lock);
1117 return;
1118 }
1119 } else if (ev->result != ISC_R_SUCCESS) {
1120 free_buffer(disp, ev->region.base, ev->region.length);
1121
1122 if (ev->result != ISC_R_CANCELED) {
1123 dispatch_log(disp, ISC_LOG_ERROR,
1124 "odd socket result in udp_recv(): %s",
1125 isc_result_totext(ev->result));
1126 }
1127
1128 isc_event_free(&ev_in);
1129 UNLOCK(&disp->lock);
1130 return;
1131 }
1132
1133 /*
1134 * If this is from a blackholed address, drop it.
1135 */
1136 isc_netaddr_fromsockaddr(&netaddr, &ev->address);
1137 if (disp->mgr->blackhole != NULL &&
1138 dns_acl_match(&netaddr, NULL, disp->mgr->blackhole, NULL, &match,
1139 NULL) == ISC_R_SUCCESS &&
1140 match > 0)
1141 {
1142 if (isc_log_wouldlog(dns_lctx, LVL(10))) {
1143 char netaddrstr[ISC_NETADDR_FORMATSIZE];
1144 isc_netaddr_format(&netaddr, netaddrstr,
1145 sizeof(netaddrstr));
1146 dispatch_log(disp, LVL(10), "blackholed packet from %s",
1147 netaddrstr);
1148 }
1149 free_buffer(disp, ev->region.base, ev->region.length);
1150 goto restart;
1151 }
1152
1153 /*
1154 * Peek into the buffer to see what we can see.
1155 */
1156 isc_buffer_init(&source, ev->region.base, ev->region.length);
1157 isc_buffer_add(&source, ev->n);
1158 dres = dns_message_peekheader(&source, &id, &flags);
1159 if (dres != ISC_R_SUCCESS) {
1160 free_buffer(disp, ev->region.base, ev->region.length);
1161 dispatch_log(disp, LVL(10), "got garbage packet");
1162 goto restart;
1163 }
1164
1165 dispatch_log(disp, LVL(92),
1166 "got valid DNS message header, /QR %c, id %u",
1167 (((flags & DNS_MESSAGEFLAG_QR) != 0) ? '1' : '0'), id);
1168
1169 /*
1170 * Look at flags. If query, drop it. If response,
1171 * look to see where it goes.
1172 */
1173 if ((flags & DNS_MESSAGEFLAG_QR) == 0) {
1174 /* query */
1175 free_buffer(disp, ev->region.base, ev->region.length);
1176 goto restart;
1177 }
1178
1179 /*
1180 * Search for the corresponding response. If we are using an exclusive
1181 * socket, we've already identified it and we can skip the search; but
1182 * the ID and the address must match the expected ones.
1183 */
1184 if (resp == NULL) {
1185 bucket = dns_hash(qid, &ev->address, id, disp->localport);
1186 LOCK(&qid->lock);
1187 qidlocked = true;
1188 resp = entry_search(qid, &ev->address, id, disp->localport,
1189 bucket);
1190 dispatch_log(disp, LVL(90),
1191 "search for response in bucket %d: %s", bucket,
1192 (resp == NULL ? "not found" : "found"));
1193
1194 } else if (resp->id != id ||
1195 !isc_sockaddr_equal(&ev->address, &resp->host))
1196 {
1197 dispatch_log(disp, LVL(90),
1198 "response to an exclusive socket doesn't match");
1199 inc_stats(mgr, dns_resstatscounter_mismatch);
1200 free_buffer(disp, ev->region.base, ev->region.length);
1201 goto unlock;
1202 }
1203
1204 if (resp == NULL) {
1205 inc_stats(mgr, dns_resstatscounter_mismatch);
1206 free_buffer(disp, ev->region.base, ev->region.length);
1207 goto unlock;
1208 }
1209
1210 /*
1211 * Now that we have the original dispatch the query was sent
1212 * from check that the address and port the response was
1213 * sent to make sense.
1214 */
1215 if (disp != resp->disp) {
1216 isc_sockaddr_t a1;
1217 isc_sockaddr_t a2;
1218
1219 /*
1220 * Check that the socket types and ports match.
1221 */
1222 if (disp->socktype != resp->disp->socktype ||
1223 isc_sockaddr_getport(&disp->local) !=
1224 isc_sockaddr_getport(&resp->disp->local))
1225 {
1226 free_buffer(disp, ev->region.base, ev->region.length);
1227 goto unlock;
1228 }
1229
1230 /*
1231 * If each dispatch is bound to a different address
1232 * then fail.
1233 *
1234 * Note under Linux a packet can be sent out via IPv4 socket
1235 * and the response be received via a IPv6 socket.
1236 *
1237 * Requests sent out via IPv6 should always come back in
1238 * via IPv6.
1239 */
1240 if (isc_sockaddr_pf(&resp->disp->local) == PF_INET6 &&
1241 isc_sockaddr_pf(&disp->local) != PF_INET6)
1242 {
1243 free_buffer(disp, ev->region.base, ev->region.length);
1244 goto unlock;
1245 }
1246 isc_sockaddr_anyofpf(&a1, isc_sockaddr_pf(&resp->disp->local));
1247 isc_sockaddr_anyofpf(&a2, isc_sockaddr_pf(&disp->local));
1248 if (!isc_sockaddr_eqaddr(&disp->local, &resp->disp->local) &&
1249 !isc_sockaddr_eqaddr(&a1, &resp->disp->local) &&
1250 !isc_sockaddr_eqaddr(&a2, &disp->local))
1251 {
1252 free_buffer(disp, ev->region.base, ev->region.length);
1253 goto unlock;
1254 }
1255 }
1256
1257 sendresponse:
1258 queue_response = resp->item_out;
1259 rev = allocate_devent(resp->disp);
1260 if (rev == NULL) {
1261 free_buffer(disp, ev->region.base, ev->region.length);
1262 goto unlock;
1263 }
1264
1265 /*
1266 * At this point, rev contains the event we want to fill in, and
1267 * resp contains the information on the place to send it to.
1268 * Send the event off.
1269 */
1270 isc_buffer_init(&rev->buffer, ev->region.base, ev->region.length);
1271 isc_buffer_add(&rev->buffer, ev->n);
1272 rev->result = ev->result;
1273 rev->id = id;
1274 rev->addr = ev->address;
1275 rev->pktinfo = ev->pktinfo;
1276 rev->attributes = ev->attributes;
1277 if (queue_response) {
1278 ISC_LIST_APPEND(resp->items, rev, ev_link);
1279 } else {
1280 ISC_EVENT_INIT(rev, sizeof(*rev), 0, NULL, DNS_EVENT_DISPATCH,
1281 resp->action, resp->arg, resp, NULL, NULL);
1282 request_log(disp, resp, LVL(90),
1283 "[a] Sent event %p buffer %p len %d to task %p",
1284 rev, rev->buffer.base, rev->buffer.length,
1285 resp->task);
1286 resp->item_out = true;
1287 isc_task_send(resp->task, ISC_EVENT_PTR(&rev));
1288 }
1289 unlock:
1290 if (qidlocked) {
1291 UNLOCK(&qid->lock);
1292 }
1293
1294 /*
1295 * Restart recv() to get the next packet.
1296 */
1297 restart:
1298 result = startrecv(disp, dispsock);
1299 if (result != ISC_R_SUCCESS && dispsock != NULL) {
1300 /*
1301 * XXX: wired. There seems to be no recovery process other than
1302 * deactivate this socket anyway (since we cannot start
1303 * receiving, we won't be able to receive a cancel event
1304 * from the user).
1305 */
1306 deactivate_dispsocket(disp, dispsock);
1307 }
1308 isc_event_free(&ev_in);
1309 UNLOCK(&disp->lock);
1310 }
1311
1312 /*
1313 * General flow:
1314 *
1315 * If I/O result == CANCELED, EOF, or error, notify everyone as the
1316 * various queues drain.
1317 *
1318 * If query, restart.
1319 *
1320 * If response:
1321 * Allocate event, fill in details.
1322 * If cannot allocate, restart.
1323 * find target. If not found, restart.
1324 * if event queue is not empty, queue. else, send.
1325 * restart.
1326 */
1327 static void
tcp_recv(isc_task_t * task,isc_event_t * ev_in)1328 tcp_recv(isc_task_t *task, isc_event_t *ev_in) {
1329 dns_dispatch_t *disp = ev_in->ev_arg;
1330 dns_tcpmsg_t *tcpmsg = &disp->tcpmsg;
1331 dns_messageid_t id;
1332 isc_result_t dres;
1333 unsigned int flags;
1334 dns_dispentry_t *resp;
1335 dns_dispatchevent_t *rev;
1336 unsigned int bucket;
1337 bool killit;
1338 bool queue_response;
1339 dns_qid_t *qid;
1340 int level;
1341 char buf[ISC_SOCKADDR_FORMATSIZE];
1342
1343 UNUSED(task);
1344
1345 REQUIRE(VALID_DISPATCH(disp));
1346
1347 qid = disp->qid;
1348
1349 LOCK(&disp->lock);
1350
1351 dispatch_log(disp, LVL(90),
1352 "got TCP packet: requests %d, buffers %d, recvs %d",
1353 disp->requests, disp->tcpbuffers, disp->recv_pending);
1354
1355 INSIST(disp->recv_pending != 0);
1356 disp->recv_pending = 0;
1357
1358 if (disp->refcount == 0) {
1359 /*
1360 * This dispatcher is shutting down. Force cancellation.
1361 */
1362 tcpmsg->result = ISC_R_CANCELED;
1363 }
1364
1365 if (tcpmsg->result != ISC_R_SUCCESS) {
1366 switch (tcpmsg->result) {
1367 case ISC_R_CANCELED:
1368 break;
1369
1370 case ISC_R_EOF:
1371 dispatch_log(disp, LVL(90), "shutting down on EOF");
1372 do_cancel(disp);
1373 break;
1374
1375 case ISC_R_CONNECTIONRESET:
1376 level = ISC_LOG_INFO;
1377 goto logit;
1378
1379 default:
1380 level = ISC_LOG_ERROR;
1381 logit:
1382 isc_sockaddr_format(&tcpmsg->address, buf, sizeof(buf));
1383 dispatch_log(disp, level,
1384 "shutting down due to TCP "
1385 "receive error: %s: %s",
1386 buf, isc_result_totext(tcpmsg->result));
1387 do_cancel(disp);
1388 break;
1389 }
1390
1391 /*
1392 * The event is statically allocated in the tcpmsg
1393 * structure, and destroy_disp() frees the tcpmsg, so we must
1394 * free the event *before* calling destroy_disp().
1395 */
1396 isc_event_free(&ev_in);
1397
1398 disp->shutting_down = 1;
1399 disp->shutdown_why = tcpmsg->result;
1400
1401 /*
1402 * If the recv() was canceled pass the word on.
1403 */
1404 killit = destroy_disp_ok(disp);
1405 UNLOCK(&disp->lock);
1406 if (killit) {
1407 isc_task_send(disp->task[0], &disp->ctlevent);
1408 }
1409 return;
1410 }
1411
1412 dispatch_log(disp, LVL(90), "result %d, length == %d, addr = %p",
1413 tcpmsg->result, tcpmsg->buffer.length,
1414 tcpmsg->buffer.base);
1415
1416 /*
1417 * Peek into the buffer to see what we can see.
1418 */
1419 dres = dns_message_peekheader(&tcpmsg->buffer, &id, &flags);
1420 if (dres != ISC_R_SUCCESS) {
1421 dispatch_log(disp, LVL(10), "got garbage packet");
1422 goto restart;
1423 }
1424
1425 dispatch_log(disp, LVL(92),
1426 "got valid DNS message header, /QR %c, id %u",
1427 (((flags & DNS_MESSAGEFLAG_QR) != 0) ? '1' : '0'), id);
1428
1429 /*
1430 * Allocate an event to send to the query or response client, and
1431 * allocate a new buffer for our use.
1432 */
1433
1434 /*
1435 * Look at flags. If query, drop it. If response,
1436 * look to see where it goes.
1437 */
1438 if ((flags & DNS_MESSAGEFLAG_QR) == 0) {
1439 /*
1440 * Query.
1441 */
1442 goto restart;
1443 }
1444
1445 /*
1446 * Response.
1447 */
1448 bucket = dns_hash(qid, &tcpmsg->address, id, disp->localport);
1449 LOCK(&qid->lock);
1450 resp = entry_search(qid, &tcpmsg->address, id, disp->localport, bucket);
1451 dispatch_log(disp, LVL(90), "search for response in bucket %d: %s",
1452 bucket, (resp == NULL ? "not found" : "found"));
1453
1454 if (resp == NULL) {
1455 goto unlock;
1456 }
1457 queue_response = resp->item_out;
1458 rev = allocate_devent(disp);
1459 if (rev == NULL) {
1460 goto unlock;
1461 }
1462
1463 /*
1464 * At this point, rev contains the event we want to fill in, and
1465 * resp contains the information on the place to send it to.
1466 * Send the event off.
1467 */
1468 dns_tcpmsg_keepbuffer(tcpmsg, &rev->buffer);
1469 disp->tcpbuffers++;
1470 rev->result = ISC_R_SUCCESS;
1471 rev->id = id;
1472 rev->addr = tcpmsg->address;
1473 if (queue_response) {
1474 ISC_LIST_APPEND(resp->items, rev, ev_link);
1475 } else {
1476 ISC_EVENT_INIT(rev, sizeof(*rev), 0, NULL, DNS_EVENT_DISPATCH,
1477 resp->action, resp->arg, resp, NULL, NULL);
1478 request_log(disp, resp, LVL(90),
1479 "[b] Sent event %p buffer %p len %d to task %p",
1480 rev, rev->buffer.base, rev->buffer.length,
1481 resp->task);
1482 resp->item_out = true;
1483 isc_task_send(resp->task, ISC_EVENT_PTR(&rev));
1484 }
1485 unlock:
1486 UNLOCK(&qid->lock);
1487
1488 /*
1489 * Restart recv() to get the next packet.
1490 */
1491 restart:
1492 (void)startrecv(disp, NULL);
1493
1494 isc_event_free(&ev_in);
1495 UNLOCK(&disp->lock);
1496 }
1497
1498 /*
1499 * disp must be locked.
1500 */
1501 static isc_result_t
startrecv(dns_dispatch_t * disp,dispsocket_t * dispsock)1502 startrecv(dns_dispatch_t *disp, dispsocket_t *dispsock) {
1503 isc_result_t res;
1504 isc_region_t region;
1505 isc_socket_t *sock;
1506
1507 if (disp->shutting_down == 1) {
1508 return (ISC_R_SUCCESS);
1509 }
1510
1511 if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) != 0) {
1512 return (ISC_R_SUCCESS);
1513 }
1514
1515 if (disp->recv_pending != 0 && dispsock == NULL) {
1516 return (ISC_R_SUCCESS);
1517 }
1518
1519 if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0 &&
1520 dispsock == NULL)
1521 {
1522 return (ISC_R_SUCCESS);
1523 }
1524
1525 if (dispsock != NULL) {
1526 sock = dispsock->socket;
1527 } else {
1528 sock = disp->socket;
1529 }
1530 INSIST(sock != NULL);
1531
1532 switch (disp->socktype) {
1533 /*
1534 * UDP reads are always maximal.
1535 */
1536 case isc_sockettype_udp:
1537 region.length = disp->mgr->buffersize;
1538 region.base = allocate_udp_buffer(disp);
1539 if (region.base == NULL) {
1540 return (ISC_R_NOMEMORY);
1541 }
1542 if (dispsock != NULL) {
1543 isc_task_t *dt = dispsock->task;
1544 isc_socketevent_t *sev = allocate_sevent(
1545 disp, sock, ISC_SOCKEVENT_RECVDONE, udp_exrecv,
1546 dispsock);
1547 if (sev == NULL) {
1548 free_buffer(disp, region.base, region.length);
1549 return (ISC_R_NOMEMORY);
1550 }
1551
1552 res = isc_socket_recv2(sock, ®ion, 1, dt, sev, 0);
1553 if (res != ISC_R_SUCCESS) {
1554 free_buffer(disp, region.base, region.length);
1555 return (res);
1556 }
1557 } else {
1558 isc_task_t *dt = disp->task[0];
1559 isc_socketevent_t *sev = allocate_sevent(
1560 disp, sock, ISC_SOCKEVENT_RECVDONE, udp_shrecv,
1561 disp);
1562 if (sev == NULL) {
1563 free_buffer(disp, region.base, region.length);
1564 return (ISC_R_NOMEMORY);
1565 }
1566
1567 res = isc_socket_recv2(sock, ®ion, 1, dt, sev, 0);
1568 if (res != ISC_R_SUCCESS) {
1569 free_buffer(disp, region.base, region.length);
1570 disp->shutdown_why = res;
1571 disp->shutting_down = 1;
1572 do_cancel(disp);
1573 return (ISC_R_SUCCESS); /* recover by cancel */
1574 }
1575 INSIST(disp->recv_pending == 0);
1576 disp->recv_pending = 1;
1577 }
1578 break;
1579
1580 case isc_sockettype_tcp:
1581 res = dns_tcpmsg_readmessage(&disp->tcpmsg, disp->task[0],
1582 tcp_recv, disp);
1583 if (res != ISC_R_SUCCESS) {
1584 disp->shutdown_why = res;
1585 disp->shutting_down = 1;
1586 do_cancel(disp);
1587 return (ISC_R_SUCCESS); /* recover by cancel */
1588 }
1589 INSIST(disp->recv_pending == 0);
1590 disp->recv_pending = 1;
1591 break;
1592 default:
1593 UNREACHABLE();
1594 }
1595
1596 return (ISC_R_SUCCESS);
1597 }
1598
1599 /*
1600 * Mgr must be locked when calling this function.
1601 */
1602 static bool
destroy_mgr_ok(dns_dispatchmgr_t * mgr)1603 destroy_mgr_ok(dns_dispatchmgr_t *mgr) {
1604 mgr_log(mgr, LVL(90),
1605 "destroy_mgr_ok: shuttingdown=%d, listnonempty=%d, ",
1606 MGR_IS_SHUTTINGDOWN(mgr), !ISC_LIST_EMPTY(mgr->list));
1607 if (!MGR_IS_SHUTTINGDOWN(mgr)) {
1608 return (false);
1609 }
1610 if (!ISC_LIST_EMPTY(mgr->list)) {
1611 return (false);
1612 }
1613 if (isc_refcount_current(&mgr->irefs) != 0) {
1614 return (false);
1615 }
1616
1617 return (true);
1618 }
1619
1620 /*
1621 * Mgr must be unlocked when calling this function.
1622 */
1623 static void
destroy_mgr(dns_dispatchmgr_t ** mgrp)1624 destroy_mgr(dns_dispatchmgr_t **mgrp) {
1625 dns_dispatchmgr_t *mgr;
1626
1627 mgr = *mgrp;
1628 *mgrp = NULL;
1629
1630 mgr->magic = 0;
1631 isc_mutex_destroy(&mgr->lock);
1632 mgr->state = 0;
1633
1634 if (mgr->qid != NULL) {
1635 qid_destroy(mgr->mctx, &mgr->qid);
1636 }
1637
1638 isc_mutex_destroy(&mgr->buffer_lock);
1639
1640 if (mgr->blackhole != NULL) {
1641 dns_acl_detach(&mgr->blackhole);
1642 }
1643
1644 if (mgr->stats != NULL) {
1645 isc_stats_detach(&mgr->stats);
1646 }
1647
1648 if (mgr->v4ports != NULL) {
1649 isc_mem_put(mgr->mctx, mgr->v4ports,
1650 mgr->nv4ports * sizeof(in_port_t));
1651 }
1652 if (mgr->v6ports != NULL) {
1653 isc_mem_put(mgr->mctx, mgr->v6ports,
1654 mgr->nv6ports * sizeof(in_port_t));
1655 }
1656 isc_mem_putanddetach(&mgr->mctx, mgr, sizeof(dns_dispatchmgr_t));
1657 }
1658
1659 static isc_result_t
open_socket(isc_socketmgr_t * mgr,const isc_sockaddr_t * local,unsigned int options,isc_socket_t ** sockp,isc_socket_t * dup_socket,bool duponly)1660 open_socket(isc_socketmgr_t *mgr, const isc_sockaddr_t *local,
1661 unsigned int options, isc_socket_t **sockp,
1662 isc_socket_t *dup_socket, bool duponly) {
1663 isc_socket_t *sock;
1664 isc_result_t result;
1665
1666 sock = *sockp;
1667 if (sock != NULL) {
1668 result = isc_socket_open(sock);
1669 if (result != ISC_R_SUCCESS) {
1670 return (result);
1671 }
1672 } else if (dup_socket != NULL &&
1673 (!isc_socket_hasreuseport() || duponly))
1674 {
1675 result = isc_socket_dup(dup_socket, &sock);
1676 if (result != ISC_R_SUCCESS) {
1677 return (result);
1678 }
1679
1680 isc_socket_setname(sock, "dispatcher", NULL);
1681 *sockp = sock;
1682 return (ISC_R_SUCCESS);
1683 } else {
1684 result = isc_socket_create(mgr, isc_sockaddr_pf(local),
1685 isc_sockettype_udp, &sock);
1686 if (result != ISC_R_SUCCESS) {
1687 return (result);
1688 }
1689 }
1690
1691 isc_socket_setname(sock, "dispatcher", NULL);
1692
1693 #ifndef ISC_ALLOW_MAPPED
1694 isc_socket_ipv6only(sock, true);
1695 #endif /* ifndef ISC_ALLOW_MAPPED */
1696 result = isc_socket_bind(sock, local, options);
1697 if (result != ISC_R_SUCCESS) {
1698 if (*sockp == NULL) {
1699 isc_socket_detach(&sock);
1700 } else {
1701 isc_socket_close(sock);
1702 }
1703 return (result);
1704 }
1705
1706 *sockp = sock;
1707 return (ISC_R_SUCCESS);
1708 }
1709
1710 /*%
1711 * Create a temporary port list to set the initial default set of dispatch
1712 * ports: [1024, 65535]. This is almost meaningless as the application will
1713 * normally set the ports explicitly, but is provided to fill some minor corner
1714 * cases.
1715 */
1716 static isc_result_t
create_default_portset(isc_mem_t * mctx,isc_portset_t ** portsetp)1717 create_default_portset(isc_mem_t *mctx, isc_portset_t **portsetp) {
1718 isc_result_t result;
1719
1720 result = isc_portset_create(mctx, portsetp);
1721 if (result != ISC_R_SUCCESS) {
1722 return (result);
1723 }
1724 isc_portset_addrange(*portsetp, 1024, 65535);
1725
1726 return (ISC_R_SUCCESS);
1727 }
1728
1729 /*
1730 * Publics.
1731 */
1732
1733 isc_result_t
dns_dispatchmgr_create(isc_mem_t * mctx,dns_dispatchmgr_t ** mgrp)1734 dns_dispatchmgr_create(isc_mem_t *mctx, dns_dispatchmgr_t **mgrp) {
1735 dns_dispatchmgr_t *mgr;
1736 isc_result_t result;
1737 isc_portset_t *v4portset = NULL;
1738 isc_portset_t *v6portset = NULL;
1739
1740 REQUIRE(mctx != NULL);
1741 REQUIRE(mgrp != NULL && *mgrp == NULL);
1742
1743 mgr = isc_mem_get(mctx, sizeof(dns_dispatchmgr_t));
1744 *mgr = (dns_dispatchmgr_t){ 0 };
1745
1746 isc_mem_attach(mctx, &mgr->mctx);
1747
1748 isc_mutex_init(&mgr->lock);
1749 isc_mutex_init(&mgr->buffer_lock);
1750
1751 isc_refcount_init(&mgr->irefs, 0);
1752
1753 ISC_LIST_INIT(mgr->list);
1754
1755 mgr->magic = DNS_DISPATCHMGR_MAGIC;
1756
1757 result = create_default_portset(mctx, &v4portset);
1758 if (result == ISC_R_SUCCESS) {
1759 result = create_default_portset(mctx, &v6portset);
1760 if (result == ISC_R_SUCCESS) {
1761 result = dns_dispatchmgr_setavailports(mgr, v4portset,
1762 v6portset);
1763 }
1764 }
1765 if (v4portset != NULL) {
1766 isc_portset_destroy(mctx, &v4portset);
1767 }
1768 if (v6portset != NULL) {
1769 isc_portset_destroy(mctx, &v6portset);
1770 }
1771 if (result != ISC_R_SUCCESS) {
1772 goto kill_dpool;
1773 }
1774
1775 *mgrp = mgr;
1776 return (ISC_R_SUCCESS);
1777
1778 kill_dpool:
1779 isc_mutex_destroy(&mgr->buffer_lock);
1780 isc_mutex_destroy(&mgr->lock);
1781 isc_mem_putanddetach(&mctx, mgr, sizeof(dns_dispatchmgr_t));
1782
1783 return (result);
1784 }
1785
1786 void
dns_dispatchmgr_setblackhole(dns_dispatchmgr_t * mgr,dns_acl_t * blackhole)1787 dns_dispatchmgr_setblackhole(dns_dispatchmgr_t *mgr, dns_acl_t *blackhole) {
1788 REQUIRE(VALID_DISPATCHMGR(mgr));
1789 if (mgr->blackhole != NULL) {
1790 dns_acl_detach(&mgr->blackhole);
1791 }
1792 dns_acl_attach(blackhole, &mgr->blackhole);
1793 }
1794
1795 dns_acl_t *
dns_dispatchmgr_getblackhole(dns_dispatchmgr_t * mgr)1796 dns_dispatchmgr_getblackhole(dns_dispatchmgr_t *mgr) {
1797 REQUIRE(VALID_DISPATCHMGR(mgr));
1798 return (mgr->blackhole);
1799 }
1800
1801 void
dns_dispatchmgr_setblackportlist(dns_dispatchmgr_t * mgr,dns_portlist_t * portlist)1802 dns_dispatchmgr_setblackportlist(dns_dispatchmgr_t *mgr,
1803 dns_portlist_t *portlist) {
1804 REQUIRE(VALID_DISPATCHMGR(mgr));
1805 UNUSED(portlist);
1806
1807 /* This function is deprecated: use dns_dispatchmgr_setavailports(). */
1808 return;
1809 }
1810
1811 dns_portlist_t *
dns_dispatchmgr_getblackportlist(dns_dispatchmgr_t * mgr)1812 dns_dispatchmgr_getblackportlist(dns_dispatchmgr_t *mgr) {
1813 REQUIRE(VALID_DISPATCHMGR(mgr));
1814 return (NULL); /* this function is deprecated */
1815 }
1816
1817 isc_result_t
dns_dispatchmgr_setavailports(dns_dispatchmgr_t * mgr,isc_portset_t * v4portset,isc_portset_t * v6portset)1818 dns_dispatchmgr_setavailports(dns_dispatchmgr_t *mgr, isc_portset_t *v4portset,
1819 isc_portset_t *v6portset) {
1820 in_port_t *v4ports, *v6ports, p;
1821 unsigned int nv4ports, nv6ports, i4, i6;
1822
1823 REQUIRE(VALID_DISPATCHMGR(mgr));
1824
1825 nv4ports = isc_portset_nports(v4portset);
1826 nv6ports = isc_portset_nports(v6portset);
1827
1828 v4ports = NULL;
1829 if (nv4ports != 0) {
1830 v4ports = isc_mem_get(mgr->mctx, sizeof(in_port_t) * nv4ports);
1831 }
1832 v6ports = NULL;
1833 if (nv6ports != 0) {
1834 v6ports = isc_mem_get(mgr->mctx, sizeof(in_port_t) * nv6ports);
1835 }
1836
1837 p = 0;
1838 i4 = 0;
1839 i6 = 0;
1840 do {
1841 if (isc_portset_isset(v4portset, p)) {
1842 INSIST(i4 < nv4ports);
1843 v4ports[i4++] = p;
1844 }
1845 if (isc_portset_isset(v6portset, p)) {
1846 INSIST(i6 < nv6ports);
1847 v6ports[i6++] = p;
1848 }
1849 } while (p++ < 65535);
1850 INSIST(i4 == nv4ports && i6 == nv6ports);
1851
1852 PORTBUFLOCK(mgr);
1853 if (mgr->v4ports != NULL) {
1854 isc_mem_put(mgr->mctx, mgr->v4ports,
1855 mgr->nv4ports * sizeof(in_port_t));
1856 }
1857 mgr->v4ports = v4ports;
1858 mgr->nv4ports = nv4ports;
1859
1860 if (mgr->v6ports != NULL) {
1861 isc_mem_put(mgr->mctx, mgr->v6ports,
1862 mgr->nv6ports * sizeof(in_port_t));
1863 }
1864 mgr->v6ports = v6ports;
1865 mgr->nv6ports = nv6ports;
1866 PORTBUFUNLOCK(mgr);
1867
1868 return (ISC_R_SUCCESS);
1869 }
1870
1871 static isc_result_t
dns_dispatchmgr_setudp(dns_dispatchmgr_t * mgr,unsigned int buffersize,unsigned int maxbuffers,unsigned int maxrequests,unsigned int buckets,unsigned int increment)1872 dns_dispatchmgr_setudp(dns_dispatchmgr_t *mgr, unsigned int buffersize,
1873 unsigned int maxbuffers, unsigned int maxrequests,
1874 unsigned int buckets, unsigned int increment) {
1875 isc_result_t result;
1876
1877 REQUIRE(VALID_DISPATCHMGR(mgr));
1878 REQUIRE(buffersize >= 512 && buffersize < (64 * 1024));
1879 REQUIRE(maxbuffers > 0);
1880 REQUIRE(buckets < 2097169); /* next prime > 65536 * 32 */
1881 REQUIRE(increment > buckets);
1882 UNUSED(maxrequests);
1883
1884 /*
1885 * Keep some number of items around. This should be a config
1886 * option. For now, keep 8, but later keep at least two even
1887 * if the caller wants less. This allows us to ensure certain
1888 * things, like an event can be "freed" and the next allocation
1889 * will always succeed.
1890 *
1891 * Note that if limits are placed on anything here, we use one
1892 * event internally, so the actual limit should be "wanted + 1."
1893 *
1894 * XXXMLG
1895 */
1896
1897 if (maxbuffers < 8) {
1898 maxbuffers = 8;
1899 }
1900
1901 LOCK(&mgr->buffer_lock);
1902
1903 if (maxbuffers > mgr->maxbuffers) {
1904 mgr->maxbuffers = maxbuffers;
1905 }
1906
1907 /* Create or adjust socket pool */
1908 if (mgr->qid != NULL) {
1909 UNLOCK(&mgr->buffer_lock);
1910 return (ISC_R_SUCCESS);
1911 }
1912
1913 result = qid_allocate(mgr, buckets, increment, &mgr->qid, true);
1914 if (result != ISC_R_SUCCESS) {
1915 goto cleanup;
1916 }
1917
1918 mgr->buffersize = buffersize;
1919 mgr->maxbuffers = maxbuffers;
1920 UNLOCK(&mgr->buffer_lock);
1921 return (ISC_R_SUCCESS);
1922
1923 cleanup:
1924 UNLOCK(&mgr->buffer_lock);
1925 return (result);
1926 }
1927
1928 void
dns_dispatchmgr_destroy(dns_dispatchmgr_t ** mgrp)1929 dns_dispatchmgr_destroy(dns_dispatchmgr_t **mgrp) {
1930 dns_dispatchmgr_t *mgr;
1931 bool killit;
1932
1933 REQUIRE(mgrp != NULL);
1934 REQUIRE(VALID_DISPATCHMGR(*mgrp));
1935
1936 mgr = *mgrp;
1937 *mgrp = NULL;
1938
1939 LOCK(&mgr->lock);
1940 mgr->state |= MGR_SHUTTINGDOWN;
1941 killit = destroy_mgr_ok(mgr);
1942 UNLOCK(&mgr->lock);
1943
1944 mgr_log(mgr, LVL(90), "destroy: killit=%d", killit);
1945
1946 if (killit) {
1947 destroy_mgr(&mgr);
1948 }
1949 }
1950
1951 void
dns_dispatchmgr_setstats(dns_dispatchmgr_t * mgr,isc_stats_t * stats)1952 dns_dispatchmgr_setstats(dns_dispatchmgr_t *mgr, isc_stats_t *stats) {
1953 REQUIRE(VALID_DISPATCHMGR(mgr));
1954 REQUIRE(ISC_LIST_EMPTY(mgr->list));
1955 REQUIRE(mgr->stats == NULL);
1956
1957 isc_stats_attach(stats, &mgr->stats);
1958 }
1959
1960 static int
port_cmp(const void * key,const void * ent)1961 port_cmp(const void *key, const void *ent) {
1962 in_port_t p1 = *(const in_port_t *)key;
1963 in_port_t p2 = *(const in_port_t *)ent;
1964
1965 if (p1 < p2) {
1966 return (-1);
1967 } else if (p1 == p2) {
1968 return (0);
1969 } else {
1970 return (1);
1971 }
1972 }
1973
1974 static bool
portavailable(dns_dispatchmgr_t * mgr,isc_socket_t * sock,isc_sockaddr_t * sockaddrp)1975 portavailable(dns_dispatchmgr_t *mgr, isc_socket_t *sock,
1976 isc_sockaddr_t *sockaddrp) {
1977 isc_sockaddr_t sockaddr;
1978 isc_result_t result;
1979 in_port_t *ports, port;
1980 unsigned int nports;
1981 bool available = false;
1982
1983 REQUIRE(sock != NULL || sockaddrp != NULL);
1984
1985 PORTBUFLOCK(mgr);
1986 if (sock != NULL) {
1987 sockaddrp = &sockaddr;
1988 result = isc_socket_getsockname(sock, sockaddrp);
1989 if (result != ISC_R_SUCCESS) {
1990 goto unlock;
1991 }
1992 }
1993
1994 if (isc_sockaddr_pf(sockaddrp) == AF_INET) {
1995 ports = mgr->v4ports;
1996 nports = mgr->nv4ports;
1997 } else {
1998 ports = mgr->v6ports;
1999 nports = mgr->nv6ports;
2000 }
2001 if (ports == NULL) {
2002 goto unlock;
2003 }
2004
2005 port = isc_sockaddr_getport(sockaddrp);
2006 if (bsearch(&port, ports, nports, sizeof(in_port_t), port_cmp) != NULL)
2007 {
2008 available = true;
2009 }
2010
2011 unlock:
2012 PORTBUFUNLOCK(mgr);
2013 return (available);
2014 }
2015
2016 #define ATTRMATCH(_a1, _a2, _mask) (((_a1) & (_mask)) == ((_a2) & (_mask)))
2017
2018 static bool
local_addr_match(dns_dispatch_t * disp,const isc_sockaddr_t * addr)2019 local_addr_match(dns_dispatch_t *disp, const isc_sockaddr_t *addr) {
2020 isc_sockaddr_t sockaddr;
2021 isc_result_t result;
2022
2023 REQUIRE(disp->socket != NULL);
2024
2025 if (addr == NULL) {
2026 return (true);
2027 }
2028
2029 /*
2030 * Don't match wildcard ports unless the port is available in the
2031 * current configuration.
2032 */
2033 if (isc_sockaddr_getport(addr) == 0 &&
2034 isc_sockaddr_getport(&disp->local) == 0 &&
2035 !portavailable(disp->mgr, disp->socket, NULL))
2036 {
2037 return (false);
2038 }
2039
2040 /*
2041 * Check if we match the binding <address,port>.
2042 * Wildcard ports match/fail here.
2043 */
2044 if (isc_sockaddr_equal(&disp->local, addr)) {
2045 return (true);
2046 }
2047 if (isc_sockaddr_getport(addr) == 0) {
2048 return (false);
2049 }
2050
2051 /*
2052 * Check if we match a bound wildcard port <address,port>.
2053 */
2054 if (!isc_sockaddr_eqaddr(&disp->local, addr)) {
2055 return (false);
2056 }
2057 result = isc_socket_getsockname(disp->socket, &sockaddr);
2058 if (result != ISC_R_SUCCESS) {
2059 return (false);
2060 }
2061
2062 return (isc_sockaddr_equal(&sockaddr, addr));
2063 }
2064
2065 /*
2066 * Requires mgr be locked.
2067 *
2068 * No dispatcher can be locked by this thread when calling this function.
2069 *
2070 *
2071 * NOTE:
2072 * If a matching dispatcher is found, it is locked after this function
2073 * returns, and must be unlocked by the caller.
2074 */
2075 static isc_result_t
dispatch_find(dns_dispatchmgr_t * mgr,const isc_sockaddr_t * local,unsigned int attributes,unsigned int mask,dns_dispatch_t ** dispp)2076 dispatch_find(dns_dispatchmgr_t *mgr, const isc_sockaddr_t *local,
2077 unsigned int attributes, unsigned int mask,
2078 dns_dispatch_t **dispp) {
2079 dns_dispatch_t *disp;
2080 isc_result_t result;
2081
2082 /*
2083 * Make certain that we will not match a private or exclusive dispatch.
2084 */
2085 attributes &= ~(DNS_DISPATCHATTR_PRIVATE | DNS_DISPATCHATTR_EXCLUSIVE);
2086 mask |= (DNS_DISPATCHATTR_PRIVATE | DNS_DISPATCHATTR_EXCLUSIVE);
2087
2088 disp = ISC_LIST_HEAD(mgr->list);
2089 while (disp != NULL) {
2090 LOCK(&disp->lock);
2091 if ((disp->shutting_down == 0) &&
2092 ATTRMATCH(disp->attributes, attributes, mask) &&
2093 local_addr_match(disp, local))
2094 {
2095 break;
2096 }
2097 UNLOCK(&disp->lock);
2098 disp = ISC_LIST_NEXT(disp, link);
2099 }
2100
2101 if (disp == NULL) {
2102 result = ISC_R_NOTFOUND;
2103 goto out;
2104 }
2105
2106 *dispp = disp;
2107 result = ISC_R_SUCCESS;
2108 out:
2109
2110 return (result);
2111 }
2112
2113 static isc_result_t
qid_allocate(dns_dispatchmgr_t * mgr,unsigned int buckets,unsigned int increment,dns_qid_t ** qidp,bool needsocktable)2114 qid_allocate(dns_dispatchmgr_t *mgr, unsigned int buckets,
2115 unsigned int increment, dns_qid_t **qidp, bool needsocktable) {
2116 dns_qid_t *qid;
2117 unsigned int i;
2118
2119 REQUIRE(VALID_DISPATCHMGR(mgr));
2120 REQUIRE(buckets < 2097169); /* next prime > 65536 * 32 */
2121 REQUIRE(increment > buckets);
2122 REQUIRE(qidp != NULL && *qidp == NULL);
2123
2124 qid = isc_mem_get(mgr->mctx, sizeof(*qid));
2125
2126 qid->qid_table = isc_mem_get(mgr->mctx,
2127 buckets * sizeof(dns_displist_t));
2128
2129 qid->sock_table = NULL;
2130 if (needsocktable) {
2131 qid->sock_table = isc_mem_get(
2132 mgr->mctx, buckets * sizeof(dispsocketlist_t));
2133 }
2134
2135 isc_mutex_init(&qid->lock);
2136
2137 for (i = 0; i < buckets; i++) {
2138 ISC_LIST_INIT(qid->qid_table[i]);
2139 if (qid->sock_table != NULL) {
2140 ISC_LIST_INIT(qid->sock_table[i]);
2141 }
2142 }
2143
2144 qid->qid_nbuckets = buckets;
2145 qid->qid_increment = increment;
2146 qid->magic = QID_MAGIC;
2147 *qidp = qid;
2148 return (ISC_R_SUCCESS);
2149 }
2150
2151 static void
qid_destroy(isc_mem_t * mctx,dns_qid_t ** qidp)2152 qid_destroy(isc_mem_t *mctx, dns_qid_t **qidp) {
2153 dns_qid_t *qid;
2154
2155 REQUIRE(qidp != NULL);
2156 qid = *qidp;
2157 *qidp = NULL;
2158
2159 REQUIRE(VALID_QID(qid));
2160
2161 qid->magic = 0;
2162 isc_mem_put(mctx, qid->qid_table,
2163 qid->qid_nbuckets * sizeof(dns_displist_t));
2164 if (qid->sock_table != NULL) {
2165 isc_mem_put(mctx, qid->sock_table,
2166 qid->qid_nbuckets * sizeof(dispsocketlist_t));
2167 }
2168 isc_mutex_destroy(&qid->lock);
2169 isc_mem_put(mctx, qid, sizeof(*qid));
2170 }
2171
2172 /*
2173 * Allocate and set important limits.
2174 */
2175 static isc_result_t
dispatch_allocate(dns_dispatchmgr_t * mgr,unsigned int maxrequests,dns_dispatch_t ** dispp)2176 dispatch_allocate(dns_dispatchmgr_t *mgr, unsigned int maxrequests,
2177 dns_dispatch_t **dispp) {
2178 dns_dispatch_t *disp;
2179 isc_result_t result;
2180
2181 REQUIRE(VALID_DISPATCHMGR(mgr));
2182 REQUIRE(dispp != NULL && *dispp == NULL);
2183
2184 /*
2185 * Set up the dispatcher, mostly. Don't bother setting some of
2186 * the options that are controlled by tcp vs. udp, etc.
2187 */
2188
2189 disp = isc_mem_get(mgr->mctx, sizeof(*disp));
2190 isc_refcount_increment0(&mgr->irefs);
2191
2192 disp->magic = 0;
2193 disp->mgr = mgr;
2194 disp->maxrequests = maxrequests;
2195 disp->attributes = 0;
2196 ISC_LINK_INIT(disp, link);
2197 disp->refcount = 1;
2198 disp->recv_pending = 0;
2199 memset(&disp->local, 0, sizeof(disp->local));
2200 memset(&disp->peer, 0, sizeof(disp->peer));
2201 disp->localport = 0;
2202 disp->shutting_down = 0;
2203 disp->shutdown_out = 0;
2204 disp->connected = 0;
2205 disp->tcpmsg_valid = 0;
2206 disp->shutdown_why = ISC_R_UNEXPECTED;
2207 disp->requests = 0;
2208 disp->tcpbuffers = 0;
2209 disp->qid = NULL;
2210 ISC_LIST_INIT(disp->activesockets);
2211 ISC_LIST_INIT(disp->inactivesockets);
2212 disp->nsockets = 0;
2213 disp->port_table = NULL;
2214 disp->dscp = -1;
2215
2216 isc_mutex_init(&disp->lock);
2217
2218 disp->failsafe_ev = allocate_devent(disp);
2219 if (disp->failsafe_ev == NULL) {
2220 result = ISC_R_NOMEMORY;
2221 goto kill_lock;
2222 }
2223
2224 disp->magic = DISPATCH_MAGIC;
2225
2226 *dispp = disp;
2227 return (ISC_R_SUCCESS);
2228
2229 /*
2230 * error returns
2231 */
2232 kill_lock:
2233 isc_mutex_destroy(&disp->lock);
2234 isc_refcount_decrement(&mgr->irefs);
2235 isc_mem_put(mgr->mctx, disp, sizeof(*disp));
2236
2237 return (result);
2238 }
2239
2240 /*
2241 * MUST be unlocked, and not used by anything.
2242 */
2243 static void
dispatch_free(dns_dispatch_t ** dispp)2244 dispatch_free(dns_dispatch_t **dispp) {
2245 dns_dispatch_t *disp;
2246 dns_dispatchmgr_t *mgr;
2247
2248 REQUIRE(VALID_DISPATCH(*dispp));
2249 disp = *dispp;
2250 *dispp = NULL;
2251
2252 mgr = disp->mgr;
2253 REQUIRE(VALID_DISPATCHMGR(mgr));
2254
2255 if (disp->tcpmsg_valid) {
2256 dns_tcpmsg_invalidate(&disp->tcpmsg);
2257 disp->tcpmsg_valid = 0;
2258 }
2259
2260 INSIST(disp->tcpbuffers == 0);
2261 INSIST(disp->requests == 0);
2262 INSIST(disp->recv_pending == 0);
2263 INSIST(ISC_LIST_EMPTY(disp->activesockets));
2264 INSIST(ISC_LIST_EMPTY(disp->inactivesockets));
2265
2266 isc_refcount_decrement(&mgr->irefs);
2267 isc_mem_put(mgr->mctx, disp->failsafe_ev, sizeof(*disp->failsafe_ev));
2268 disp->failsafe_ev = NULL;
2269
2270 if (disp->qid != NULL) {
2271 qid_destroy(mgr->mctx, &disp->qid);
2272 }
2273
2274 if (disp->port_table != NULL) {
2275 for (int i = 0; i < DNS_DISPATCH_PORTTABLESIZE; i++) {
2276 INSIST(ISC_LIST_EMPTY(disp->port_table[i]));
2277 }
2278 isc_mem_put(mgr->mctx, disp->port_table,
2279 sizeof(disp->port_table[0]) *
2280 DNS_DISPATCH_PORTTABLESIZE);
2281 }
2282
2283 disp->mgr = NULL;
2284 isc_mutex_destroy(&disp->lock);
2285 disp->magic = 0;
2286 isc_refcount_decrement(&mgr->irefs);
2287 isc_mem_put(mgr->mctx, disp, sizeof(*disp));
2288 }
2289
2290 isc_result_t
dns_dispatch_createtcp(dns_dispatchmgr_t * mgr,isc_socket_t * sock,isc_taskmgr_t * taskmgr,const isc_sockaddr_t * localaddr,const isc_sockaddr_t * destaddr,unsigned int buffersize,unsigned int maxbuffers,unsigned int maxrequests,unsigned int buckets,unsigned int increment,unsigned int attributes,dns_dispatch_t ** dispp)2291 dns_dispatch_createtcp(dns_dispatchmgr_t *mgr, isc_socket_t *sock,
2292 isc_taskmgr_t *taskmgr, const isc_sockaddr_t *localaddr,
2293 const isc_sockaddr_t *destaddr, unsigned int buffersize,
2294 unsigned int maxbuffers, unsigned int maxrequests,
2295 unsigned int buckets, unsigned int increment,
2296 unsigned int attributes, dns_dispatch_t **dispp) {
2297 isc_result_t result;
2298 dns_dispatch_t *disp;
2299
2300 UNUSED(maxbuffers);
2301 UNUSED(buffersize);
2302
2303 REQUIRE(VALID_DISPATCHMGR(mgr));
2304 REQUIRE(isc_socket_gettype(sock) == isc_sockettype_tcp);
2305 REQUIRE((attributes & DNS_DISPATCHATTR_TCP) != 0);
2306 REQUIRE((attributes & DNS_DISPATCHATTR_UDP) == 0);
2307
2308 if (destaddr == NULL) {
2309 attributes |= DNS_DISPATCHATTR_PRIVATE; /* XXXMLG */
2310 }
2311
2312 LOCK(&mgr->lock);
2313
2314 /*
2315 * dispatch_allocate() checks mgr for us.
2316 * qid_allocate() checks buckets and increment for us.
2317 */
2318 disp = NULL;
2319 result = dispatch_allocate(mgr, maxrequests, &disp);
2320 if (result != ISC_R_SUCCESS) {
2321 UNLOCK(&mgr->lock);
2322 return (result);
2323 }
2324
2325 result = qid_allocate(mgr, buckets, increment, &disp->qid, false);
2326 if (result != ISC_R_SUCCESS) {
2327 goto deallocate_dispatch;
2328 }
2329
2330 disp->socktype = isc_sockettype_tcp;
2331 disp->socket = NULL;
2332 isc_socket_attach(sock, &disp->socket);
2333
2334 disp->sepool = NULL;
2335
2336 disp->ntasks = 1;
2337 disp->task[0] = NULL;
2338 result = isc_task_create(taskmgr, 50, &disp->task[0]);
2339 if (result != ISC_R_SUCCESS) {
2340 goto kill_socket;
2341 }
2342
2343 disp->ctlevent =
2344 isc_event_allocate(mgr->mctx, disp, DNS_EVENT_DISPATCHCONTROL,
2345 destroy_disp, disp, sizeof(isc_event_t));
2346
2347 isc_task_setname(disp->task[0], "tcpdispatch", disp);
2348
2349 dns_tcpmsg_init(mgr->mctx, disp->socket, &disp->tcpmsg);
2350 disp->tcpmsg_valid = 1;
2351
2352 disp->attributes = attributes;
2353
2354 if (localaddr == NULL) {
2355 if (destaddr != NULL) {
2356 switch (isc_sockaddr_pf(destaddr)) {
2357 case AF_INET:
2358 isc_sockaddr_any(&disp->local);
2359 break;
2360 case AF_INET6:
2361 isc_sockaddr_any6(&disp->local);
2362 break;
2363 }
2364 }
2365 } else {
2366 disp->local = *localaddr;
2367 }
2368
2369 if (destaddr != NULL) {
2370 disp->peer = *destaddr;
2371 }
2372
2373 /*
2374 * Append it to the dispatcher list.
2375 */
2376 ISC_LIST_APPEND(mgr->list, disp, link);
2377 UNLOCK(&mgr->lock);
2378
2379 mgr_log(mgr, LVL(90), "created TCP dispatcher %p", disp);
2380 dispatch_log(disp, LVL(90), "created task %p", disp->task[0]);
2381 *dispp = disp;
2382
2383 return (ISC_R_SUCCESS);
2384
2385 kill_socket:
2386 isc_socket_detach(&disp->socket);
2387 deallocate_dispatch:
2388 dispatch_free(&disp);
2389
2390 UNLOCK(&mgr->lock);
2391
2392 return (result);
2393 }
2394
2395 isc_result_t
dns_dispatch_gettcp(dns_dispatchmgr_t * mgr,const isc_sockaddr_t * destaddr,const isc_sockaddr_t * localaddr,bool * connected,dns_dispatch_t ** dispp)2396 dns_dispatch_gettcp(dns_dispatchmgr_t *mgr, const isc_sockaddr_t *destaddr,
2397 const isc_sockaddr_t *localaddr, bool *connected,
2398 dns_dispatch_t **dispp) {
2399 dns_dispatch_t *disp;
2400 isc_result_t result;
2401 isc_sockaddr_t peeraddr;
2402 isc_sockaddr_t sockname;
2403 unsigned int attributes, mask;
2404 bool match = false;
2405
2406 REQUIRE(VALID_DISPATCHMGR(mgr));
2407 REQUIRE(destaddr != NULL);
2408 REQUIRE(dispp != NULL && *dispp == NULL);
2409
2410 /* First pass */
2411 attributes = DNS_DISPATCHATTR_TCP | DNS_DISPATCHATTR_CONNECTED;
2412 mask = DNS_DISPATCHATTR_TCP | DNS_DISPATCHATTR_PRIVATE |
2413 DNS_DISPATCHATTR_EXCLUSIVE | DNS_DISPATCHATTR_CONNECTED;
2414
2415 LOCK(&mgr->lock);
2416 disp = ISC_LIST_HEAD(mgr->list);
2417 while (disp != NULL && !match) {
2418 LOCK(&disp->lock);
2419 if ((disp->shutting_down == 0) &&
2420 ATTRMATCH(disp->attributes, attributes, mask) &&
2421 (localaddr == NULL ||
2422 isc_sockaddr_eqaddr(localaddr, &disp->local)))
2423 {
2424 result = isc_socket_getsockname(disp->socket,
2425 &sockname);
2426 if (result == ISC_R_SUCCESS) {
2427 result = isc_socket_getpeername(disp->socket,
2428 &peeraddr);
2429 }
2430 if (result == ISC_R_SUCCESS &&
2431 isc_sockaddr_equal(destaddr, &peeraddr) &&
2432 (localaddr == NULL ||
2433 isc_sockaddr_eqaddr(localaddr, &sockname)))
2434 {
2435 /* attach */
2436 disp->refcount++;
2437 *dispp = disp;
2438 match = true;
2439 if (connected != NULL) {
2440 *connected = true;
2441 }
2442 }
2443 }
2444 UNLOCK(&disp->lock);
2445 disp = ISC_LIST_NEXT(disp, link);
2446 }
2447 if (match || connected == NULL) {
2448 UNLOCK(&mgr->lock);
2449 return (match ? ISC_R_SUCCESS : ISC_R_NOTFOUND);
2450 }
2451
2452 /* Second pass, only if connected != NULL */
2453 attributes = DNS_DISPATCHATTR_TCP;
2454
2455 disp = ISC_LIST_HEAD(mgr->list);
2456 while (disp != NULL && !match) {
2457 LOCK(&disp->lock);
2458 if ((disp->shutting_down == 0) &&
2459 ATTRMATCH(disp->attributes, attributes, mask) &&
2460 (localaddr == NULL ||
2461 isc_sockaddr_eqaddr(localaddr, &disp->local)) &&
2462 isc_sockaddr_equal(destaddr, &disp->peer))
2463 {
2464 /* attach */
2465 disp->refcount++;
2466 *dispp = disp;
2467 match = true;
2468 }
2469 UNLOCK(&disp->lock);
2470 disp = ISC_LIST_NEXT(disp, link);
2471 }
2472 UNLOCK(&mgr->lock);
2473 return (match ? ISC_R_SUCCESS : ISC_R_NOTFOUND);
2474 }
2475
2476 isc_result_t
dns_dispatch_getudp_dup(dns_dispatchmgr_t * mgr,isc_socketmgr_t * sockmgr,isc_taskmgr_t * taskmgr,const isc_sockaddr_t * localaddr,unsigned int buffersize,unsigned int maxbuffers,unsigned int maxrequests,unsigned int buckets,unsigned int increment,unsigned int attributes,unsigned int mask,dns_dispatch_t ** dispp,dns_dispatch_t * dup_dispatch)2477 dns_dispatch_getudp_dup(dns_dispatchmgr_t *mgr, isc_socketmgr_t *sockmgr,
2478 isc_taskmgr_t *taskmgr, const isc_sockaddr_t *localaddr,
2479 unsigned int buffersize, unsigned int maxbuffers,
2480 unsigned int maxrequests, unsigned int buckets,
2481 unsigned int increment, unsigned int attributes,
2482 unsigned int mask, dns_dispatch_t **dispp,
2483 dns_dispatch_t *dup_dispatch) {
2484 isc_result_t result;
2485 dns_dispatch_t *disp = NULL;
2486
2487 REQUIRE(VALID_DISPATCHMGR(mgr));
2488 REQUIRE(sockmgr != NULL);
2489 REQUIRE(localaddr != NULL);
2490 REQUIRE(taskmgr != NULL);
2491 REQUIRE(buffersize >= 512 && buffersize < (64 * 1024));
2492 REQUIRE(maxbuffers > 0);
2493 REQUIRE(buckets < 2097169); /* next prime > 65536 * 32 */
2494 REQUIRE(increment > buckets);
2495 REQUIRE(dispp != NULL && *dispp == NULL);
2496 REQUIRE((attributes & DNS_DISPATCHATTR_TCP) == 0);
2497
2498 result = dns_dispatchmgr_setudp(mgr, buffersize, maxbuffers,
2499 maxrequests, buckets, increment);
2500 if (result != ISC_R_SUCCESS) {
2501 return (result);
2502 }
2503
2504 LOCK(&mgr->lock);
2505
2506 if ((attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0) {
2507 REQUIRE(isc_sockaddr_getport(localaddr) == 0);
2508 goto createudp;
2509 }
2510
2511 /*
2512 * See if we have a dispatcher that matches.
2513 */
2514 if (dup_dispatch == NULL) {
2515 result = dispatch_find(mgr, localaddr, attributes, mask, &disp);
2516 if (result == ISC_R_SUCCESS) {
2517 disp->refcount++;
2518
2519 if (disp->maxrequests < maxrequests) {
2520 disp->maxrequests = maxrequests;
2521 }
2522
2523 if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) ==
2524 0 &&
2525 (attributes & DNS_DISPATCHATTR_NOLISTEN) != 0)
2526 {
2527 disp->attributes |= DNS_DISPATCHATTR_NOLISTEN;
2528 if (disp->recv_pending != 0) {
2529 isc_socket_cancel(disp->socket,
2530 disp->task[0],
2531 ISC_SOCKCANCEL_RECV);
2532 }
2533 }
2534
2535 UNLOCK(&disp->lock);
2536 UNLOCK(&mgr->lock);
2537
2538 *dispp = disp;
2539
2540 return (ISC_R_SUCCESS);
2541 }
2542 }
2543
2544 createudp:
2545 /*
2546 * Nope, create one.
2547 */
2548 result = dispatch_createudp(
2549 mgr, sockmgr, taskmgr, localaddr, maxrequests, attributes,
2550 &disp, dup_dispatch == NULL ? NULL : dup_dispatch->socket);
2551
2552 if (result != ISC_R_SUCCESS) {
2553 UNLOCK(&mgr->lock);
2554 return (result);
2555 }
2556
2557 UNLOCK(&mgr->lock);
2558 *dispp = disp;
2559
2560 return (ISC_R_SUCCESS);
2561 }
2562
2563 isc_result_t
dns_dispatch_getudp(dns_dispatchmgr_t * mgr,isc_socketmgr_t * sockmgr,isc_taskmgr_t * taskmgr,const isc_sockaddr_t * localaddr,unsigned int buffersize,unsigned int maxbuffers,unsigned int maxrequests,unsigned int buckets,unsigned int increment,unsigned int attributes,unsigned int mask,dns_dispatch_t ** dispp)2564 dns_dispatch_getudp(dns_dispatchmgr_t *mgr, isc_socketmgr_t *sockmgr,
2565 isc_taskmgr_t *taskmgr, const isc_sockaddr_t *localaddr,
2566 unsigned int buffersize, unsigned int maxbuffers,
2567 unsigned int maxrequests, unsigned int buckets,
2568 unsigned int increment, unsigned int attributes,
2569 unsigned int mask, dns_dispatch_t **dispp) {
2570 return (dns_dispatch_getudp_dup(mgr, sockmgr, taskmgr, localaddr,
2571 buffersize, maxbuffers, maxrequests,
2572 buckets, increment, attributes, mask,
2573 dispp, NULL));
2574 }
2575
2576 /*
2577 * mgr should be locked.
2578 */
2579
2580 #ifndef DNS_DISPATCH_HELD
2581 #define DNS_DISPATCH_HELD 20U
2582 #endif /* ifndef DNS_DISPATCH_HELD */
2583
2584 static isc_result_t
get_udpsocket(dns_dispatchmgr_t * mgr,dns_dispatch_t * disp,isc_socketmgr_t * sockmgr,const isc_sockaddr_t * localaddr,isc_socket_t ** sockp,isc_socket_t * dup_socket,bool duponly)2585 get_udpsocket(dns_dispatchmgr_t *mgr, dns_dispatch_t *disp,
2586 isc_socketmgr_t *sockmgr, const isc_sockaddr_t *localaddr,
2587 isc_socket_t **sockp, isc_socket_t *dup_socket, bool duponly) {
2588 unsigned int i, j;
2589 isc_socket_t *held[DNS_DISPATCH_HELD];
2590 isc_sockaddr_t localaddr_bound;
2591 isc_socket_t *sock = NULL;
2592 isc_result_t result = ISC_R_SUCCESS;
2593 bool anyport;
2594
2595 INSIST(sockp != NULL && *sockp == NULL);
2596
2597 localaddr_bound = *localaddr;
2598 anyport = (isc_sockaddr_getport(localaddr) == 0);
2599
2600 if (anyport) {
2601 unsigned int nports;
2602 in_port_t *ports;
2603
2604 /*
2605 * If no port is specified, we first try to pick up a random
2606 * port by ourselves.
2607 */
2608 if (isc_sockaddr_pf(localaddr) == AF_INET) {
2609 nports = disp->mgr->nv4ports;
2610 ports = disp->mgr->v4ports;
2611 } else {
2612 nports = disp->mgr->nv6ports;
2613 ports = disp->mgr->v6ports;
2614 }
2615 if (nports == 0) {
2616 return (ISC_R_ADDRNOTAVAIL);
2617 }
2618
2619 for (i = 0; i < 1024; i++) {
2620 in_port_t prt;
2621
2622 prt = ports[isc_random_uniform(nports)];
2623 isc_sockaddr_setport(&localaddr_bound, prt);
2624 result = open_socket(sockmgr, &localaddr_bound, 0,
2625 &sock, NULL, false);
2626 /*
2627 * Continue if the port chosen is already in use
2628 * or the OS has reserved it.
2629 */
2630 if (result == ISC_R_NOPERM || result == ISC_R_ADDRINUSE)
2631 {
2632 continue;
2633 }
2634 disp->localport = prt;
2635 *sockp = sock;
2636 return (result);
2637 }
2638
2639 /*
2640 * If this fails 1024 times, we then ask the kernel for
2641 * choosing one.
2642 */
2643 } else {
2644 /* Allow to reuse address for non-random ports. */
2645 result = open_socket(sockmgr, localaddr,
2646 ISC_SOCKET_REUSEADDRESS, &sock, dup_socket,
2647 duponly);
2648
2649 if (result == ISC_R_SUCCESS) {
2650 *sockp = sock;
2651 }
2652
2653 return (result);
2654 }
2655
2656 memset(held, 0, sizeof(held));
2657 i = 0;
2658
2659 for (j = 0; j < 0xffffU; j++) {
2660 result = open_socket(sockmgr, localaddr, 0, &sock, NULL, false);
2661 if (result != ISC_R_SUCCESS) {
2662 goto end;
2663 } else if (portavailable(mgr, sock, NULL)) {
2664 break;
2665 }
2666 if (held[i] != NULL) {
2667 isc_socket_detach(&held[i]);
2668 }
2669 held[i++] = sock;
2670 sock = NULL;
2671 if (i == DNS_DISPATCH_HELD) {
2672 i = 0;
2673 }
2674 }
2675 if (j == 0xffffU) {
2676 mgr_log(mgr, ISC_LOG_ERROR,
2677 "avoid-v%s-udp-ports: unable to allocate "
2678 "an available port",
2679 isc_sockaddr_pf(localaddr) == AF_INET ? "4" : "6");
2680 result = ISC_R_FAILURE;
2681 goto end;
2682 }
2683 *sockp = sock;
2684
2685 end:
2686 for (i = 0; i < DNS_DISPATCH_HELD; i++) {
2687 if (held[i] != NULL) {
2688 isc_socket_detach(&held[i]);
2689 }
2690 }
2691
2692 return (result);
2693 }
2694
2695 static isc_result_t
dispatch_createudp(dns_dispatchmgr_t * mgr,isc_socketmgr_t * sockmgr,isc_taskmgr_t * taskmgr,const isc_sockaddr_t * localaddr,unsigned int maxrequests,unsigned int attributes,dns_dispatch_t ** dispp,isc_socket_t * dup_socket)2696 dispatch_createudp(dns_dispatchmgr_t *mgr, isc_socketmgr_t *sockmgr,
2697 isc_taskmgr_t *taskmgr, const isc_sockaddr_t *localaddr,
2698 unsigned int maxrequests, unsigned int attributes,
2699 dns_dispatch_t **dispp, isc_socket_t *dup_socket) {
2700 isc_result_t result;
2701 dns_dispatch_t *disp;
2702 isc_socket_t *sock = NULL;
2703 int i = 0;
2704 bool duponly = ((attributes & DNS_DISPATCHATTR_CANREUSE) == 0);
2705
2706 /* This is an attribute needed only at creation time */
2707 attributes &= ~DNS_DISPATCHATTR_CANREUSE;
2708 /*
2709 * dispatch_allocate() checks mgr for us.
2710 */
2711 disp = NULL;
2712 result = dispatch_allocate(mgr, maxrequests, &disp);
2713 if (result != ISC_R_SUCCESS) {
2714 return (result);
2715 }
2716
2717 disp->socktype = isc_sockettype_udp;
2718
2719 if ((attributes & DNS_DISPATCHATTR_EXCLUSIVE) == 0) {
2720 result = get_udpsocket(mgr, disp, sockmgr, localaddr, &sock,
2721 dup_socket, duponly);
2722 if (result != ISC_R_SUCCESS) {
2723 goto deallocate_dispatch;
2724 }
2725
2726 if (isc_log_wouldlog(dns_lctx, 90)) {
2727 char addrbuf[ISC_SOCKADDR_FORMATSIZE];
2728
2729 isc_sockaddr_format(localaddr, addrbuf,
2730 ISC_SOCKADDR_FORMATSIZE);
2731 mgr_log(mgr, LVL(90),
2732 "dns_dispatch_createudp: Created"
2733 " UDP dispatch for %s with socket fd %d",
2734 addrbuf, isc_socket_getfd(sock));
2735 }
2736 } else {
2737 isc_sockaddr_t sa_any;
2738
2739 /*
2740 * For dispatches using exclusive sockets with a specific
2741 * source address, we only check if the specified address is
2742 * available on the system. Query sockets will be created later
2743 * on demand.
2744 */
2745 isc_sockaddr_anyofpf(&sa_any, isc_sockaddr_pf(localaddr));
2746 if (!isc_sockaddr_eqaddr(&sa_any, localaddr)) {
2747 result = open_socket(sockmgr, localaddr, 0, &sock, NULL,
2748 false);
2749 if (sock != NULL) {
2750 isc_socket_detach(&sock);
2751 }
2752 if (result != ISC_R_SUCCESS) {
2753 goto deallocate_dispatch;
2754 }
2755 }
2756
2757 disp->port_table = isc_mem_get(
2758 mgr->mctx, sizeof(disp->port_table[0]) *
2759 DNS_DISPATCH_PORTTABLESIZE);
2760 for (i = 0; i < DNS_DISPATCH_PORTTABLESIZE; i++) {
2761 ISC_LIST_INIT(disp->port_table[i]);
2762 }
2763 }
2764 disp->socket = sock;
2765 disp->local = *localaddr;
2766
2767 if ((attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0) {
2768 disp->ntasks = MAX_INTERNAL_TASKS;
2769 } else {
2770 disp->ntasks = 1;
2771 }
2772 for (i = 0; i < disp->ntasks; i++) {
2773 disp->task[i] = NULL;
2774 result = isc_task_create(taskmgr, 0, &disp->task[i]);
2775 if (result != ISC_R_SUCCESS) {
2776 while (--i >= 0) {
2777 isc_task_shutdown(disp->task[i]);
2778 isc_task_detach(&disp->task[i]);
2779 }
2780 goto kill_socket;
2781 }
2782 isc_task_setname(disp->task[i], "udpdispatch", disp);
2783 }
2784
2785 disp->ctlevent =
2786 isc_event_allocate(mgr->mctx, disp, DNS_EVENT_DISPATCHCONTROL,
2787 destroy_disp, disp, sizeof(isc_event_t));
2788
2789 disp->sepool = NULL;
2790 isc_mem_create(&disp->sepool);
2791 isc_mem_setname(disp->sepool, "disp_sepool", NULL);
2792
2793 attributes &= ~DNS_DISPATCHATTR_TCP;
2794 attributes |= DNS_DISPATCHATTR_UDP;
2795 disp->attributes = attributes;
2796
2797 /*
2798 * Append it to the dispatcher list.
2799 */
2800 ISC_LIST_APPEND(mgr->list, disp, link);
2801
2802 mgr_log(mgr, LVL(90), "created UDP dispatcher %p", disp);
2803 dispatch_log(disp, LVL(90), "created task %p", disp->task[0]); /* XXX */
2804 if (disp->socket != NULL) {
2805 dispatch_log(disp, LVL(90), "created socket %p", disp->socket);
2806 }
2807
2808 *dispp = disp;
2809
2810 return (result);
2811
2812 /*
2813 * Error returns.
2814 */
2815 kill_socket:
2816 if (disp->socket != NULL) {
2817 isc_socket_detach(&disp->socket);
2818 }
2819 deallocate_dispatch:
2820 dispatch_free(&disp);
2821
2822 return (result);
2823 }
2824
2825 void
dns_dispatch_attach(dns_dispatch_t * disp,dns_dispatch_t ** dispp)2826 dns_dispatch_attach(dns_dispatch_t *disp, dns_dispatch_t **dispp) {
2827 REQUIRE(VALID_DISPATCH(disp));
2828 REQUIRE(dispp != NULL && *dispp == NULL);
2829
2830 LOCK(&disp->lock);
2831 disp->refcount++;
2832 UNLOCK(&disp->lock);
2833
2834 *dispp = disp;
2835 }
2836
2837 /*
2838 * It is important to lock the manager while we are deleting the dispatch,
2839 * since dns_dispatch_getudp will call dispatch_find, which returns to
2840 * the caller a dispatch but does not attach to it until later. _getudp
2841 * locks the manager, however, so locking it here will keep us from attaching
2842 * to a dispatcher that is in the process of going away.
2843 */
2844 void
dns_dispatch_detach(dns_dispatch_t ** dispp)2845 dns_dispatch_detach(dns_dispatch_t **dispp) {
2846 dns_dispatch_t *disp;
2847 dispsocket_t *dispsock;
2848 bool killit;
2849
2850 REQUIRE(dispp != NULL && VALID_DISPATCH(*dispp));
2851
2852 disp = *dispp;
2853 *dispp = NULL;
2854
2855 LOCK(&disp->lock);
2856
2857 INSIST(disp->refcount > 0);
2858 disp->refcount--;
2859 if (disp->refcount == 0) {
2860 if (disp->recv_pending > 0) {
2861 isc_socket_cancel(disp->socket, disp->task[0],
2862 ISC_SOCKCANCEL_RECV);
2863 }
2864 for (dispsock = ISC_LIST_HEAD(disp->activesockets);
2865 dispsock != NULL; dispsock = ISC_LIST_NEXT(dispsock, link))
2866 {
2867 isc_socket_cancel(dispsock->socket, dispsock->task,
2868 ISC_SOCKCANCEL_RECV);
2869 }
2870 disp->shutting_down = 1;
2871 }
2872
2873 dispatch_log(disp, LVL(90), "detach: refcount %d", disp->refcount);
2874
2875 killit = destroy_disp_ok(disp);
2876 UNLOCK(&disp->lock);
2877 if (killit) {
2878 isc_task_send(disp->task[0], &disp->ctlevent);
2879 }
2880 }
2881
2882 isc_result_t
dns_dispatch_addresponse(dns_dispatch_t * disp,unsigned int options,const isc_sockaddr_t * dest,isc_task_t * task,isc_taskaction_t action,void * arg,dns_messageid_t * idp,dns_dispentry_t ** resp,isc_socketmgr_t * sockmgr)2883 dns_dispatch_addresponse(dns_dispatch_t *disp, unsigned int options,
2884 const isc_sockaddr_t *dest, isc_task_t *task,
2885 isc_taskaction_t action, void *arg,
2886 dns_messageid_t *idp, dns_dispentry_t **resp,
2887 isc_socketmgr_t *sockmgr) {
2888 dns_dispentry_t *res;
2889 unsigned int bucket;
2890 in_port_t localport = 0;
2891 dns_messageid_t id;
2892 int i;
2893 bool ok;
2894 dns_qid_t *qid;
2895 dispsocket_t *dispsocket = NULL;
2896 isc_result_t result;
2897
2898 REQUIRE(VALID_DISPATCH(disp));
2899 REQUIRE(task != NULL);
2900 REQUIRE(dest != NULL);
2901 REQUIRE(resp != NULL && *resp == NULL);
2902 REQUIRE(idp != NULL);
2903 if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0) {
2904 REQUIRE(sockmgr != NULL);
2905 }
2906
2907 LOCK(&disp->lock);
2908
2909 if (disp->shutting_down == 1) {
2910 UNLOCK(&disp->lock);
2911 return (ISC_R_SHUTTINGDOWN);
2912 }
2913
2914 if (disp->requests >= disp->maxrequests) {
2915 UNLOCK(&disp->lock);
2916 return (ISC_R_QUOTA);
2917 }
2918
2919 if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0 &&
2920 disp->nsockets > DNS_DISPATCH_SOCKSQUOTA)
2921 {
2922 dispsocket_t *oldestsocket;
2923 dns_dispentry_t *oldestresp;
2924 dns_dispatchevent_t *rev;
2925
2926 /*
2927 * Kill oldest outstanding query if the number of sockets
2928 * exceeds the quota to keep the room for new queries.
2929 */
2930 oldestsocket = ISC_LIST_HEAD(disp->activesockets);
2931 oldestresp = oldestsocket->resp;
2932 if (oldestresp != NULL && !oldestresp->item_out) {
2933 rev = allocate_devent(oldestresp->disp);
2934 if (rev != NULL) {
2935 rev->buffer.base = NULL;
2936 rev->result = ISC_R_CANCELED;
2937 rev->id = oldestresp->id;
2938 ISC_EVENT_INIT(rev, sizeof(*rev), 0, NULL,
2939 DNS_EVENT_DISPATCH,
2940 oldestresp->action,
2941 oldestresp->arg, oldestresp,
2942 NULL, NULL);
2943 oldestresp->item_out = true;
2944 isc_task_send(oldestresp->task,
2945 ISC_EVENT_PTR(&rev));
2946 inc_stats(disp->mgr,
2947 dns_resstatscounter_dispabort);
2948 }
2949 }
2950
2951 /*
2952 * Move this entry to the tail so that it won't (easily) be
2953 * examined before actually being canceled.
2954 */
2955 ISC_LIST_UNLINK(disp->activesockets, oldestsocket, link);
2956 ISC_LIST_APPEND(disp->activesockets, oldestsocket, link);
2957 }
2958
2959 qid = DNS_QID(disp);
2960
2961 if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0) {
2962 /*
2963 * Get a separate UDP socket with a random port number.
2964 */
2965 result = get_dispsocket(disp, dest, sockmgr, &dispsocket,
2966 &localport);
2967 if (result != ISC_R_SUCCESS) {
2968 UNLOCK(&disp->lock);
2969 inc_stats(disp->mgr, dns_resstatscounter_dispsockfail);
2970 return (result);
2971 }
2972 } else {
2973 localport = disp->localport;
2974 }
2975
2976 /*
2977 * Try somewhat hard to find an unique ID unless FIXEDID is set
2978 * in which case we use the id passed in via *idp.
2979 */
2980 LOCK(&qid->lock);
2981 if ((options & DNS_DISPATCHOPT_FIXEDID) != 0) {
2982 id = *idp;
2983 } else {
2984 id = (dns_messageid_t)isc_random16();
2985 }
2986 ok = false;
2987 i = 0;
2988 do {
2989 bucket = dns_hash(qid, dest, id, localport);
2990 if (entry_search(qid, dest, id, localport, bucket) == NULL) {
2991 ok = true;
2992 break;
2993 }
2994 if ((disp->attributes & DNS_DISPATCHATTR_FIXEDID) != 0) {
2995 break;
2996 }
2997 id += qid->qid_increment;
2998 id &= 0x0000ffff;
2999 } while (i++ < 64);
3000 UNLOCK(&qid->lock);
3001
3002 if (!ok) {
3003 UNLOCK(&disp->lock);
3004 return (ISC_R_NOMORE);
3005 }
3006
3007 res = isc_mem_get(disp->mgr->mctx, sizeof(*res));
3008 isc_refcount_increment0(&disp->mgr->irefs);
3009
3010 disp->refcount++;
3011 disp->requests++;
3012 res->task = NULL;
3013 isc_task_attach(task, &res->task);
3014 res->disp = disp;
3015 res->id = id;
3016 res->port = localport;
3017 res->bucket = bucket;
3018 res->host = *dest;
3019 res->action = action;
3020 res->arg = arg;
3021 res->dispsocket = dispsocket;
3022 if (dispsocket != NULL) {
3023 dispsocket->resp = res;
3024 }
3025 res->item_out = false;
3026 ISC_LIST_INIT(res->items);
3027 ISC_LINK_INIT(res, link);
3028 res->magic = RESPONSE_MAGIC;
3029
3030 LOCK(&qid->lock);
3031 ISC_LIST_APPEND(qid->qid_table[bucket], res, link);
3032 UNLOCK(&qid->lock);
3033
3034 inc_stats(disp->mgr, (qid == disp->mgr->qid)
3035 ? dns_resstatscounter_disprequdp
3036 : dns_resstatscounter_dispreqtcp);
3037
3038 request_log(disp, res, LVL(90), "attached to task %p", res->task);
3039
3040 if (((disp->attributes & DNS_DISPATCHATTR_UDP) != 0) ||
3041 ((disp->attributes & DNS_DISPATCHATTR_CONNECTED) != 0))
3042 {
3043 result = startrecv(disp, dispsocket);
3044 if (result != ISC_R_SUCCESS) {
3045 LOCK(&qid->lock);
3046 ISC_LIST_UNLINK(qid->qid_table[bucket], res, link);
3047 UNLOCK(&qid->lock);
3048
3049 if (dispsocket != NULL) {
3050 destroy_dispsocket(disp, &dispsocket);
3051 }
3052
3053 disp->refcount--;
3054 disp->requests--;
3055
3056 dec_stats(disp->mgr,
3057 (qid == disp->mgr->qid)
3058 ? dns_resstatscounter_disprequdp
3059 : dns_resstatscounter_dispreqtcp);
3060
3061 UNLOCK(&disp->lock);
3062 isc_task_detach(&res->task);
3063 isc_refcount_decrement(&disp->mgr->irefs);
3064 isc_mem_put(disp->mgr->mctx, res, sizeof(*res));
3065 return (result);
3066 }
3067 }
3068
3069 if (dispsocket != NULL) {
3070 ISC_LIST_APPEND(disp->activesockets, dispsocket, link);
3071 }
3072
3073 UNLOCK(&disp->lock);
3074
3075 *idp = id;
3076 *resp = res;
3077
3078 if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0) {
3079 INSIST(res->dispsocket != NULL);
3080 }
3081
3082 return (ISC_R_SUCCESS);
3083 }
3084
3085 void
dns_dispatch_starttcp(dns_dispatch_t * disp)3086 dns_dispatch_starttcp(dns_dispatch_t *disp) {
3087 REQUIRE(VALID_DISPATCH(disp));
3088
3089 dispatch_log(disp, LVL(90), "starttcp %p", disp->task[0]);
3090
3091 LOCK(&disp->lock);
3092 if ((disp->attributes & DNS_DISPATCHATTR_CONNECTED) == 0) {
3093 disp->attributes |= DNS_DISPATCHATTR_CONNECTED;
3094 (void)startrecv(disp, NULL);
3095 }
3096 UNLOCK(&disp->lock);
3097 }
3098
3099 isc_result_t
dns_dispatch_getnext(dns_dispentry_t * resp,dns_dispatchevent_t ** sockevent)3100 dns_dispatch_getnext(dns_dispentry_t *resp, dns_dispatchevent_t **sockevent) {
3101 dns_dispatch_t *disp;
3102 dns_dispatchevent_t *ev;
3103
3104 REQUIRE(VALID_RESPONSE(resp));
3105 REQUIRE(sockevent != NULL && *sockevent != NULL);
3106
3107 disp = resp->disp;
3108 REQUIRE(VALID_DISPATCH(disp));
3109
3110 ev = *sockevent;
3111 *sockevent = NULL;
3112
3113 LOCK(&disp->lock);
3114
3115 REQUIRE(resp->item_out);
3116 resp->item_out = false;
3117
3118 if (ev->buffer.base != NULL) {
3119 free_buffer(disp, ev->buffer.base, ev->buffer.length);
3120 }
3121 free_devent(disp, ev);
3122
3123 if (disp->shutting_down == 1) {
3124 UNLOCK(&disp->lock);
3125 return (ISC_R_SHUTTINGDOWN);
3126 }
3127 ev = ISC_LIST_HEAD(resp->items);
3128 if (ev != NULL) {
3129 ISC_LIST_UNLINK(resp->items, ev, ev_link);
3130 ISC_EVENT_INIT(ev, sizeof(*ev), 0, NULL, DNS_EVENT_DISPATCH,
3131 resp->action, resp->arg, resp, NULL, NULL);
3132 request_log(disp, resp, LVL(90),
3133 "[c] Sent event %p buffer %p len %d to task %p", ev,
3134 ev->buffer.base, ev->buffer.length, resp->task);
3135 resp->item_out = true;
3136 isc_task_send(resp->task, ISC_EVENT_PTR(&ev));
3137 }
3138 UNLOCK(&disp->lock);
3139 return (ISC_R_SUCCESS);
3140 }
3141
3142 void
dns_dispatch_removeresponse(dns_dispentry_t ** resp,dns_dispatchevent_t ** sockevent)3143 dns_dispatch_removeresponse(dns_dispentry_t **resp,
3144 dns_dispatchevent_t **sockevent) {
3145 dns_dispatchmgr_t *mgr;
3146 dns_dispatch_t *disp;
3147 dns_dispentry_t *res;
3148 dispsocket_t *dispsock;
3149 dns_dispatchevent_t *ev;
3150 unsigned int bucket;
3151 bool killit;
3152 unsigned int n;
3153 isc_eventlist_t events;
3154 dns_qid_t *qid;
3155
3156 REQUIRE(resp != NULL);
3157 REQUIRE(VALID_RESPONSE(*resp));
3158
3159 res = *resp;
3160 *resp = NULL;
3161
3162 disp = res->disp;
3163 REQUIRE(VALID_DISPATCH(disp));
3164 mgr = disp->mgr;
3165 REQUIRE(VALID_DISPATCHMGR(mgr));
3166
3167 qid = DNS_QID(disp);
3168
3169 if (sockevent != NULL) {
3170 REQUIRE(*sockevent != NULL);
3171 ev = *sockevent;
3172 *sockevent = NULL;
3173 } else {
3174 ev = NULL;
3175 }
3176
3177 LOCK(&disp->lock);
3178
3179 INSIST(disp->requests > 0);
3180 disp->requests--;
3181 dec_stats(disp->mgr, (qid == disp->mgr->qid)
3182 ? dns_resstatscounter_disprequdp
3183 : dns_resstatscounter_dispreqtcp);
3184 INSIST(disp->refcount > 0);
3185 disp->refcount--;
3186 if (disp->refcount == 0) {
3187 if (disp->recv_pending > 0) {
3188 isc_socket_cancel(disp->socket, disp->task[0],
3189 ISC_SOCKCANCEL_RECV);
3190 }
3191 for (dispsock = ISC_LIST_HEAD(disp->activesockets);
3192 dispsock != NULL; dispsock = ISC_LIST_NEXT(dispsock, link))
3193 {
3194 isc_socket_cancel(dispsock->socket, dispsock->task,
3195 ISC_SOCKCANCEL_RECV);
3196 }
3197 disp->shutting_down = 1;
3198 }
3199
3200 bucket = res->bucket;
3201
3202 LOCK(&qid->lock);
3203 ISC_LIST_UNLINK(qid->qid_table[bucket], res, link);
3204 UNLOCK(&qid->lock);
3205
3206 if (ev == NULL && res->item_out) {
3207 /*
3208 * We've posted our event, but the caller hasn't gotten it
3209 * yet. Take it back.
3210 */
3211 ISC_LIST_INIT(events);
3212 n = isc_task_unsend(res->task, res, DNS_EVENT_DISPATCH, NULL,
3213 &events);
3214 /*
3215 * We had better have gotten it back.
3216 */
3217 INSIST(n == 1);
3218 ev = (dns_dispatchevent_t *)ISC_LIST_HEAD(events);
3219 }
3220
3221 if (ev != NULL) {
3222 REQUIRE(res->item_out);
3223 res->item_out = false;
3224 if (ev->buffer.base != NULL) {
3225 free_buffer(disp, ev->buffer.base, ev->buffer.length);
3226 }
3227 free_devent(disp, ev);
3228 }
3229
3230 request_log(disp, res, LVL(90), "detaching from task %p", res->task);
3231 isc_task_detach(&res->task);
3232
3233 if (res->dispsocket != NULL) {
3234 isc_socket_cancel(res->dispsocket->socket,
3235 res->dispsocket->task, ISC_SOCKCANCEL_RECV);
3236 res->dispsocket->resp = NULL;
3237 }
3238
3239 /*
3240 * Free any buffered responses as well
3241 */
3242 ev = ISC_LIST_HEAD(res->items);
3243 while (ev != NULL) {
3244 ISC_LIST_UNLINK(res->items, ev, ev_link);
3245 if (ev->buffer.base != NULL) {
3246 free_buffer(disp, ev->buffer.base, ev->buffer.length);
3247 }
3248 free_devent(disp, ev);
3249 ev = ISC_LIST_HEAD(res->items);
3250 }
3251 res->magic = 0;
3252 isc_refcount_decrement(&disp->mgr->irefs);
3253 isc_mem_put(disp->mgr->mctx, res, sizeof(*res));
3254 if (disp->shutting_down == 1) {
3255 do_cancel(disp);
3256 } else {
3257 (void)startrecv(disp, NULL);
3258 }
3259
3260 killit = destroy_disp_ok(disp);
3261 UNLOCK(&disp->lock);
3262 if (killit) {
3263 isc_task_send(disp->task[0], &disp->ctlevent);
3264 }
3265 }
3266
3267 /*
3268 * disp must be locked.
3269 */
3270 static void
do_cancel(dns_dispatch_t * disp)3271 do_cancel(dns_dispatch_t *disp) {
3272 dns_dispatchevent_t *ev;
3273 dns_dispentry_t *resp;
3274 dns_qid_t *qid;
3275
3276 if (disp->shutdown_out == 1) {
3277 return;
3278 }
3279
3280 qid = DNS_QID(disp);
3281
3282 /*
3283 * Search for the first response handler without packets outstanding
3284 * unless a specific handler is given.
3285 */
3286 LOCK(&qid->lock);
3287 for (resp = linear_first(qid); resp != NULL && resp->item_out;
3288 /* Empty. */)
3289 {
3290 resp = linear_next(qid, resp);
3291 }
3292
3293 /*
3294 * No one to send the cancel event to, so nothing to do.
3295 */
3296 if (resp == NULL) {
3297 goto unlock;
3298 }
3299
3300 /*
3301 * Send the shutdown failsafe event to this resp.
3302 */
3303 ev = disp->failsafe_ev;
3304 ISC_EVENT_INIT(ev, sizeof(*ev), 0, NULL, DNS_EVENT_DISPATCH,
3305 resp->action, resp->arg, resp, NULL, NULL);
3306 ev->result = disp->shutdown_why;
3307 ev->buffer.base = NULL;
3308 ev->buffer.length = 0;
3309 disp->shutdown_out = 1;
3310 request_log(disp, resp, LVL(10), "cancel: failsafe event %p -> task %p",
3311 ev, resp->task);
3312 resp->item_out = true;
3313 isc_task_send(resp->task, ISC_EVENT_PTR(&ev));
3314 unlock:
3315 UNLOCK(&qid->lock);
3316 }
3317
3318 isc_socket_t *
dns_dispatch_getsocket(dns_dispatch_t * disp)3319 dns_dispatch_getsocket(dns_dispatch_t *disp) {
3320 REQUIRE(VALID_DISPATCH(disp));
3321
3322 return (disp->socket);
3323 }
3324
3325 isc_socket_t *
dns_dispatch_getentrysocket(dns_dispentry_t * resp)3326 dns_dispatch_getentrysocket(dns_dispentry_t *resp) {
3327 REQUIRE(VALID_RESPONSE(resp));
3328
3329 if (resp->dispsocket != NULL) {
3330 return (resp->dispsocket->socket);
3331 } else {
3332 return (NULL);
3333 }
3334 }
3335
3336 isc_result_t
dns_dispatch_getlocaladdress(dns_dispatch_t * disp,isc_sockaddr_t * addrp)3337 dns_dispatch_getlocaladdress(dns_dispatch_t *disp, isc_sockaddr_t *addrp) {
3338 REQUIRE(VALID_DISPATCH(disp));
3339 REQUIRE(addrp != NULL);
3340
3341 if (disp->socktype == isc_sockettype_udp) {
3342 *addrp = disp->local;
3343 return (ISC_R_SUCCESS);
3344 }
3345 return (ISC_R_NOTIMPLEMENTED);
3346 }
3347
3348 void
dns_dispatch_cancel(dns_dispatch_t * disp)3349 dns_dispatch_cancel(dns_dispatch_t *disp) {
3350 REQUIRE(VALID_DISPATCH(disp));
3351
3352 LOCK(&disp->lock);
3353
3354 if (disp->shutting_down == 1) {
3355 UNLOCK(&disp->lock);
3356 return;
3357 }
3358
3359 disp->shutdown_why = ISC_R_CANCELED;
3360 disp->shutting_down = 1;
3361 do_cancel(disp);
3362
3363 UNLOCK(&disp->lock);
3364
3365 return;
3366 }
3367
3368 unsigned int
dns_dispatch_getattributes(dns_dispatch_t * disp)3369 dns_dispatch_getattributes(dns_dispatch_t *disp) {
3370 REQUIRE(VALID_DISPATCH(disp));
3371
3372 /*
3373 * We don't bother locking disp here; it's the caller's responsibility
3374 * to use only non volatile flags.
3375 */
3376 return (disp->attributes);
3377 }
3378
3379 void
dns_dispatch_changeattributes(dns_dispatch_t * disp,unsigned int attributes,unsigned int mask)3380 dns_dispatch_changeattributes(dns_dispatch_t *disp, unsigned int attributes,
3381 unsigned int mask) {
3382 REQUIRE(VALID_DISPATCH(disp));
3383 /* Exclusive attribute can only be set on creation */
3384 REQUIRE((attributes & DNS_DISPATCHATTR_EXCLUSIVE) == 0);
3385 /* Also, a dispatch with randomport specified cannot start listening */
3386 REQUIRE((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) == 0 ||
3387 (attributes & DNS_DISPATCHATTR_NOLISTEN) == 0);
3388
3389 /* XXXMLG
3390 * Should check for valid attributes here!
3391 */
3392
3393 LOCK(&disp->lock);
3394
3395 if ((mask & DNS_DISPATCHATTR_NOLISTEN) != 0) {
3396 if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) != 0 &&
3397 (attributes & DNS_DISPATCHATTR_NOLISTEN) == 0)
3398 {
3399 disp->attributes &= ~DNS_DISPATCHATTR_NOLISTEN;
3400 (void)startrecv(disp, NULL);
3401 } else if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) ==
3402 0 &&
3403 (attributes & DNS_DISPATCHATTR_NOLISTEN) != 0)
3404 {
3405 disp->attributes |= DNS_DISPATCHATTR_NOLISTEN;
3406 if (disp->recv_pending != 0) {
3407 isc_socket_cancel(disp->socket, disp->task[0],
3408 ISC_SOCKCANCEL_RECV);
3409 }
3410 }
3411 }
3412
3413 disp->attributes &= ~mask;
3414 disp->attributes |= (attributes & mask);
3415 UNLOCK(&disp->lock);
3416 }
3417
3418 void
dns_dispatch_importrecv(dns_dispatch_t * disp,isc_event_t * event)3419 dns_dispatch_importrecv(dns_dispatch_t *disp, isc_event_t *event) {
3420 void *buf;
3421 isc_socketevent_t *sevent, *newsevent;
3422
3423 REQUIRE(VALID_DISPATCH(disp));
3424 REQUIRE(event != NULL);
3425
3426 if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) == 0) {
3427 return;
3428 }
3429
3430 sevent = (isc_socketevent_t *)event;
3431 INSIST(sevent->n <= disp->mgr->buffersize);
3432
3433 newsevent = (isc_socketevent_t *)isc_event_allocate(
3434 disp->mgr->mctx, NULL, DNS_EVENT_IMPORTRECVDONE, udp_shrecv,
3435 disp, sizeof(isc_socketevent_t));
3436
3437 buf = allocate_udp_buffer(disp);
3438 if (buf == NULL) {
3439 isc_event_free(ISC_EVENT_PTR(&newsevent));
3440 return;
3441 }
3442 memmove(buf, sevent->region.base, sevent->n);
3443 newsevent->region.base = buf;
3444 newsevent->region.length = disp->mgr->buffersize;
3445 newsevent->n = sevent->n;
3446 newsevent->result = sevent->result;
3447 newsevent->address = sevent->address;
3448 newsevent->timestamp = sevent->timestamp;
3449 newsevent->pktinfo = sevent->pktinfo;
3450 newsevent->attributes = sevent->attributes;
3451
3452 isc_task_send(disp->task[0], ISC_EVENT_PTR(&newsevent));
3453 }
3454
3455 dns_dispatch_t *
dns_dispatchset_get(dns_dispatchset_t * dset)3456 dns_dispatchset_get(dns_dispatchset_t *dset) {
3457 dns_dispatch_t *disp;
3458
3459 /* check that dispatch set is configured */
3460 if (dset == NULL || dset->ndisp == 0) {
3461 return (NULL);
3462 }
3463
3464 LOCK(&dset->lock);
3465 disp = dset->dispatches[dset->cur];
3466 dset->cur++;
3467 if (dset->cur == dset->ndisp) {
3468 dset->cur = 0;
3469 }
3470 UNLOCK(&dset->lock);
3471
3472 return (disp);
3473 }
3474
3475 isc_result_t
dns_dispatchset_create(isc_mem_t * mctx,isc_socketmgr_t * sockmgr,isc_taskmgr_t * taskmgr,dns_dispatch_t * source,dns_dispatchset_t ** dsetp,int n)3476 dns_dispatchset_create(isc_mem_t *mctx, isc_socketmgr_t *sockmgr,
3477 isc_taskmgr_t *taskmgr, dns_dispatch_t *source,
3478 dns_dispatchset_t **dsetp, int n) {
3479 isc_result_t result;
3480 dns_dispatchset_t *dset;
3481 dns_dispatchmgr_t *mgr;
3482 int i, j;
3483
3484 REQUIRE(VALID_DISPATCH(source));
3485 REQUIRE((source->attributes & DNS_DISPATCHATTR_UDP) != 0);
3486 REQUIRE(dsetp != NULL && *dsetp == NULL);
3487
3488 mgr = source->mgr;
3489
3490 dset = isc_mem_get(mctx, sizeof(dns_dispatchset_t));
3491 memset(dset, 0, sizeof(*dset));
3492
3493 isc_mutex_init(&dset->lock);
3494
3495 dset->dispatches = isc_mem_get(mctx, sizeof(dns_dispatch_t *) * n);
3496
3497 isc_mem_attach(mctx, &dset->mctx);
3498 dset->ndisp = n;
3499 dset->cur = 0;
3500
3501 dset->dispatches[0] = NULL;
3502 dns_dispatch_attach(source, &dset->dispatches[0]);
3503
3504 LOCK(&mgr->lock);
3505 for (i = 1; i < n; i++) {
3506 dset->dispatches[i] = NULL;
3507 result = dispatch_createudp(
3508 mgr, sockmgr, taskmgr, &source->local,
3509 source->maxrequests, source->attributes,
3510 &dset->dispatches[i], source->socket);
3511 if (result != ISC_R_SUCCESS) {
3512 goto fail;
3513 }
3514 }
3515
3516 UNLOCK(&mgr->lock);
3517 *dsetp = dset;
3518
3519 return (ISC_R_SUCCESS);
3520
3521 fail:
3522 UNLOCK(&mgr->lock);
3523
3524 for (j = 0; j < i; j++) {
3525 dns_dispatch_detach(&(dset->dispatches[j]));
3526 }
3527 isc_mem_put(mctx, dset->dispatches, sizeof(dns_dispatch_t *) * n);
3528 if (dset->mctx == mctx) {
3529 isc_mem_detach(&dset->mctx);
3530 }
3531
3532 isc_mutex_destroy(&dset->lock);
3533 isc_mem_put(mctx, dset, sizeof(dns_dispatchset_t));
3534 return (result);
3535 }
3536
3537 void
dns_dispatchset_cancelall(dns_dispatchset_t * dset,isc_task_t * task)3538 dns_dispatchset_cancelall(dns_dispatchset_t *dset, isc_task_t *task) {
3539 int i;
3540
3541 REQUIRE(dset != NULL);
3542
3543 for (i = 0; i < dset->ndisp; i++) {
3544 isc_socket_t *sock;
3545 sock = dns_dispatch_getsocket(dset->dispatches[i]);
3546 isc_socket_cancel(sock, task, ISC_SOCKCANCEL_ALL);
3547 }
3548 }
3549
3550 void
dns_dispatchset_destroy(dns_dispatchset_t ** dsetp)3551 dns_dispatchset_destroy(dns_dispatchset_t **dsetp) {
3552 dns_dispatchset_t *dset;
3553 int i;
3554
3555 REQUIRE(dsetp != NULL && *dsetp != NULL);
3556
3557 dset = *dsetp;
3558 *dsetp = NULL;
3559 for (i = 0; i < dset->ndisp; i++) {
3560 dns_dispatch_detach(&(dset->dispatches[i]));
3561 }
3562 isc_mem_put(dset->mctx, dset->dispatches,
3563 sizeof(dns_dispatch_t *) * dset->ndisp);
3564 isc_mutex_destroy(&dset->lock);
3565 isc_mem_putanddetach(&dset->mctx, dset, sizeof(dns_dispatchset_t));
3566 }
3567
3568 void
dns_dispatch_setdscp(dns_dispatch_t * disp,isc_dscp_t dscp)3569 dns_dispatch_setdscp(dns_dispatch_t *disp, isc_dscp_t dscp) {
3570 REQUIRE(VALID_DISPATCH(disp));
3571 disp->dscp = dscp;
3572 }
3573
3574 isc_dscp_t
dns_dispatch_getdscp(dns_dispatch_t * disp)3575 dns_dispatch_getdscp(dns_dispatch_t *disp) {
3576 REQUIRE(VALID_DISPATCH(disp));
3577 return (disp->dscp);
3578 }
3579
3580 #if 0
3581 void
3582 dns_dispatchmgr_dump(dns_dispatchmgr_t *mgr) {
3583 dns_dispatch_t *disp;
3584 char foo[1024];
3585
3586 disp = ISC_LIST_HEAD(mgr->list);
3587 while (disp != NULL) {
3588 isc_sockaddr_format(&disp->local, foo, sizeof(foo));
3589 printf("\tdispatch %p, addr %s\n", disp, foo);
3590 disp = ISC_LIST_NEXT(disp, link);
3591 }
3592 }
3593 #endif /* if 0 */
3594