1 /*
2  * Copyright (C) Internet Systems Consortium, Inc. ("ISC")
3  *
4  * This Source Code Form is subject to the terms of the Mozilla Public
5  * License, v. 2.0. If a copy of the MPL was not distributed with this
6  * file, you can obtain one at https://mozilla.org/MPL/2.0/.
7  *
8  * See the COPYRIGHT file distributed with this work for additional
9  * information regarding copyright ownership.
10  */
11 
12 /*! \file */
13 
14 #include <config.h>
15 
16 #include <inttypes.h>
17 #include <stdbool.h>
18 
19 #include <stdlib.h>
20 #include <sys/types.h>
21 #include <unistd.h>
22 #include <stdlib.h>
23 
24 #include <isc/entropy.h>
25 #include <isc/mem.h>
26 #include <isc/mutex.h>
27 #include <isc/portset.h>
28 #include <isc/print.h>
29 #include <isc/random.h>
30 #include <isc/socket.h>
31 #include <isc/stats.h>
32 #include <isc/string.h>
33 #include <isc/task.h>
34 #include <isc/time.h>
35 #include <isc/util.h>
36 
37 #include <dns/acl.h>
38 #include <dns/dispatch.h>
39 #include <dns/events.h>
40 #include <dns/log.h>
41 #include <dns/message.h>
42 #include <dns/portlist.h>
43 #include <dns/stats.h>
44 #include <dns/tcpmsg.h>
45 #include <dns/types.h>
46 
47 typedef ISC_LIST(dns_dispentry_t)	dns_displist_t;
48 
49 typedef struct dispsocket		dispsocket_t;
50 typedef ISC_LIST(dispsocket_t)		dispsocketlist_t;
51 
52 typedef struct dispportentry		dispportentry_t;
53 typedef ISC_LIST(dispportentry_t)	dispportlist_t;
54 
55 typedef struct dns_qid {
56 	unsigned int	magic;
57 	unsigned int	qid_nbuckets;	/*%< hash table size */
58 	unsigned int	qid_increment;	/*%< id increment on collision */
59 	isc_mutex_t	lock;
60 	dns_displist_t	*qid_table;	/*%< the table itself */
61 	dispsocketlist_t *sock_table;	/*%< socket table */
62 } dns_qid_t;
63 
64 struct dns_dispatchmgr {
65 	/* Unlocked. */
66 	unsigned int			magic;
67 	isc_mem_t		       *mctx;
68 	dns_acl_t		       *blackhole;
69 	dns_portlist_t		       *portlist;
70 	isc_stats_t		       *stats;
71 	isc_entropy_t		       *entropy; /*%< entropy source */
72 
73 	/* Locked by "lock". */
74 	isc_mutex_t			lock;
75 	unsigned int			state;
76 	ISC_LIST(dns_dispatch_t)	list;
77 
78 	/* Locked by rng_lock. */
79 	isc_mutex_t			rng_lock;
80 	isc_rng_t		       *rngctx; /*%< RNG context for QID */
81 
82 	/* locked by buffer_lock */
83 	dns_qid_t			*qid;
84 	isc_mutex_t			buffer_lock;
85 	unsigned int			buffers;    /*%< allocated buffers */
86 	unsigned int			buffersize; /*%< size of each buffer */
87 	unsigned int			maxbuffers; /*%< max buffers */
88 
89 	/* Locked internally. */
90 	isc_mutex_t			depool_lock;
91 	isc_mempool_t		       *depool;	/*%< pool for dispatch events */
92 	isc_mutex_t			rpool_lock;
93 	isc_mempool_t		       *rpool;	/*%< pool for replies */
94 	isc_mutex_t			dpool_lock;
95 	isc_mempool_t		       *dpool;  /*%< dispatch allocations */
96 	isc_mutex_t			bpool_lock;
97 	isc_mempool_t		       *bpool;	/*%< pool for buffers */
98 	isc_mutex_t			spool_lock;
99 	isc_mempool_t		       *spool;	/*%< pool for dispsocks */
100 
101 	/*%
102 	 * Locked by qid->lock if qid exists; otherwise, can be used without
103 	 * being locked.
104 	 * Memory footprint considerations: this is a simple implementation of
105 	 * available ports, i.e., an ordered array of the actual port numbers.
106 	 * This will require about 256KB of memory in the worst case (128KB for
107 	 * each of IPv4 and IPv6).  We could reduce it by representing it as a
108 	 * more sophisticated way such as a list (or array) of ranges that are
109 	 * searched to identify a specific port.  Our decision here is the saved
110 	 * memory isn't worth the implementation complexity, considering the
111 	 * fact that the whole BIND9 process (which is mainly named) already
112 	 * requires a pretty large memory footprint.  We may, however, have to
113 	 * revisit the decision when we want to use it as a separate module for
114 	 * an environment where memory requirement is severer.
115 	 */
116 	in_port_t	*v4ports;	/*%< available ports for IPv4 */
117 	unsigned int	nv4ports;	/*%< # of available ports for IPv4 */
118 	in_port_t	*v6ports;	/*%< available ports for IPv4 */
119 	unsigned int	nv6ports;	/*%< # of available ports for IPv4 */
120 };
121 
122 #define MGR_SHUTTINGDOWN		0x00000001U
123 #define MGR_IS_SHUTTINGDOWN(l)	(((l)->state & MGR_SHUTTINGDOWN) != 0)
124 
125 #define IS_PRIVATE(d)	(((d)->attributes & DNS_DISPATCHATTR_PRIVATE) != 0)
126 
127 struct dns_dispentry {
128 	unsigned int			magic;
129 	dns_dispatch_t		       *disp;
130 	dns_messageid_t			id;
131 	in_port_t			port;
132 	unsigned int			bucket;
133 	isc_sockaddr_t			host;
134 	isc_task_t		       *task;
135 	isc_taskaction_t		action;
136 	void			       *arg;
137 	bool				item_out;
138 	dispsocket_t			*dispsocket;
139 	ISC_LIST(dns_dispatchevent_t)	items;
140 	ISC_LINK(dns_dispentry_t)	link;
141 };
142 
143 /*%
144  * Maximum number of dispatch sockets that can be pooled for reuse.  The
145  * appropriate value may vary, but experiments have shown a busy caching server
146  * may need more than 1000 sockets concurrently opened.  The maximum allowable
147  * number of dispatch sockets (per manager) will be set to the double of this
148  * value.
149  */
150 #ifndef DNS_DISPATCH_POOLSOCKS
151 #define DNS_DISPATCH_POOLSOCKS			2048
152 #endif
153 
154 /*%
155  * Quota to control the number of dispatch sockets.  If a dispatch has more
156  * than the quota of sockets, new queries will purge oldest ones, so that
157  * a massive number of outstanding queries won't prevent subsequent queries
158  * (especially if the older ones take longer time and result in timeout).
159  */
160 #ifndef DNS_DISPATCH_SOCKSQUOTA
161 #define DNS_DISPATCH_SOCKSQUOTA			3072
162 #endif
163 
164 struct dispsocket {
165 	unsigned int			magic;
166 	isc_socket_t			*socket;
167 	dns_dispatch_t			*disp;
168 	isc_sockaddr_t			host;
169 	in_port_t			localport; /* XXX: should be removed later */
170 	dispportentry_t			*portentry;
171 	dns_dispentry_t			*resp;
172 	isc_task_t			*task;
173 	ISC_LINK(dispsocket_t)		link;
174 	unsigned int			bucket;
175 	ISC_LINK(dispsocket_t)		blink;
176 };
177 
178 /*%
179  * A port table entry.  We remember every port we first open in a table with a
180  * reference counter so that we can 'reuse' the same port (with different
181  * destination addresses) using the SO_REUSEADDR socket option.
182  */
183 struct dispportentry {
184 	in_port_t			port;
185 	unsigned int			refs;
186 	ISC_LINK(struct dispportentry)	link;
187 };
188 
189 #ifndef DNS_DISPATCH_PORTTABLESIZE
190 #define DNS_DISPATCH_PORTTABLESIZE	1024
191 #endif
192 
193 #define INVALID_BUCKET		(0xffffdead)
194 
195 /*%
196  * Number of tasks for each dispatch that use separate sockets for different
197  * transactions.  This must be a power of 2 as it will divide 32 bit numbers
198  * to get an uniformly random tasks selection.  See get_dispsocket().
199  */
200 #define MAX_INTERNAL_TASKS	64
201 
202 struct dns_dispatch {
203 	/* Unlocked. */
204 	unsigned int		magic;		/*%< magic */
205 	dns_dispatchmgr_t      *mgr;		/*%< dispatch manager */
206 	int			ntasks;
207 	/*%
208 	 * internal task buckets.  We use multiple tasks to distribute various
209 	 * socket events well when using separate dispatch sockets.  We use the
210 	 * 1st task (task[0]) for internal control events.
211 	 */
212 	isc_task_t	       *task[MAX_INTERNAL_TASKS];
213 	isc_socket_t	       *socket;		/*%< isc socket attached to */
214 	isc_sockaddr_t		local;		/*%< local address */
215 	in_port_t		localport;	/*%< local UDP port */
216 	isc_sockaddr_t		peer;		/*%< peer address (TCP) */
217 	isc_dscp_t		dscp;		/*%< "listen-on" DSCP value */
218 	unsigned int		maxrequests;	/*%< max requests */
219 	isc_event_t	       *ctlevent;
220 
221 	isc_mutex_t		sepool_lock;
222 	isc_mempool_t	       *sepool;		/*%< pool for socket events */
223 
224 	/*% Locked by mgr->lock. */
225 	ISC_LINK(dns_dispatch_t) link;
226 
227 	/* Locked by "lock". */
228 	isc_mutex_t		lock;		/*%< locks all below */
229 	isc_sockettype_t	socktype;
230 	unsigned int		attributes;
231 	unsigned int		refcount;	/*%< number of users */
232 	dns_dispatchevent_t    *failsafe_ev;	/*%< failsafe cancel event */
233 	unsigned int		shutting_down : 1,
234 				shutdown_out : 1,
235 				connected : 1,
236 				tcpmsg_valid : 1,
237 				recv_pending : 1; /*%< is a recv() pending? */
238 	isc_result_t		shutdown_why;
239 	ISC_LIST(dispsocket_t)	activesockets;
240 	ISC_LIST(dispsocket_t)	inactivesockets;
241 	unsigned int		nsockets;
242 	unsigned int		requests;	/*%< how many requests we have */
243 	unsigned int		tcpbuffers;	/*%< allocated buffers */
244 	dns_tcpmsg_t		tcpmsg;		/*%< for tcp streams */
245 	dns_qid_t		*qid;
246 	isc_rng_t		*rngctx;	/*%< for QID/UDP port num */
247 	dispportlist_t		*port_table;	/*%< hold ports 'owned' by us */
248 	isc_mempool_t		*portpool;	/*%< port table entries  */
249 };
250 
251 #define QID_MAGIC		ISC_MAGIC('Q', 'i', 'd', ' ')
252 #define VALID_QID(e)		ISC_MAGIC_VALID((e), QID_MAGIC)
253 
254 #define RESPONSE_MAGIC		ISC_MAGIC('D', 'r', 's', 'p')
255 #define VALID_RESPONSE(e)	ISC_MAGIC_VALID((e), RESPONSE_MAGIC)
256 
257 #define DISPSOCK_MAGIC		ISC_MAGIC('D', 's', 'o', 'c')
258 #define VALID_DISPSOCK(e)	ISC_MAGIC_VALID((e), DISPSOCK_MAGIC)
259 
260 #define DISPATCH_MAGIC		ISC_MAGIC('D', 'i', 's', 'p')
261 #define VALID_DISPATCH(e)	ISC_MAGIC_VALID((e), DISPATCH_MAGIC)
262 
263 #define DNS_DISPATCHMGR_MAGIC	ISC_MAGIC('D', 'M', 'g', 'r')
264 #define VALID_DISPATCHMGR(e)	ISC_MAGIC_VALID((e), DNS_DISPATCHMGR_MAGIC)
265 
266 #define DNS_QID(disp) ((disp)->socktype == isc_sockettype_tcp) ? \
267 		       (disp)->qid : (disp)->mgr->qid
268 #define DISP_RNGCTX(disp) ((disp)->socktype == isc_sockettype_udp) ? \
269 			((disp)->rngctx) : ((disp)->mgr->rngctx)
270 
271 /*%
272  * Locking a query port buffer is a bit tricky.  We access the buffer without
273  * locking until qid is created.  Technically, there is a possibility of race
274  * between the creation of qid and access to the port buffer; in practice,
275  * however, this should be safe because qid isn't created until the first
276  * dispatch is created and there should be no contending situation until then.
277  */
278 #define PORTBUFLOCK(mgr) if ((mgr)->qid != NULL) LOCK(&((mgr)->qid->lock))
279 #define PORTBUFUNLOCK(mgr) if ((mgr)->qid != NULL) UNLOCK((&(mgr)->qid->lock))
280 
281 /*
282  * Statics.
283  */
284 static dns_dispentry_t *entry_search(dns_qid_t *, isc_sockaddr_t *,
285 				     dns_messageid_t, in_port_t, unsigned int);
286 static bool destroy_disp_ok(dns_dispatch_t *);
287 static void destroy_disp(isc_task_t *task, isc_event_t *event);
288 static void destroy_dispsocket(dns_dispatch_t *, dispsocket_t **);
289 static void deactivate_dispsocket(dns_dispatch_t *, dispsocket_t *);
290 static void udp_exrecv(isc_task_t *, isc_event_t *);
291 static void udp_shrecv(isc_task_t *, isc_event_t *);
292 static void udp_recv(isc_event_t *, dns_dispatch_t *, dispsocket_t *);
293 static void tcp_recv(isc_task_t *, isc_event_t *);
294 static isc_result_t startrecv(dns_dispatch_t *, dispsocket_t *);
295 static uint32_t dns_hash(dns_qid_t *, isc_sockaddr_t *, dns_messageid_t,
296 			 in_port_t);
297 static void free_buffer(dns_dispatch_t *disp, void *buf, unsigned int len);
298 static void *allocate_udp_buffer(dns_dispatch_t *disp);
299 static inline void free_devent(dns_dispatch_t *disp, dns_dispatchevent_t *ev);
300 static inline dns_dispatchevent_t *allocate_devent(dns_dispatch_t *disp);
301 static void do_cancel(dns_dispatch_t *disp);
302 static dns_dispentry_t *linear_first(dns_qid_t *disp);
303 static dns_dispentry_t *linear_next(dns_qid_t *disp,
304 				    dns_dispentry_t *resp);
305 static void dispatch_free(dns_dispatch_t **dispp);
306 static isc_result_t get_udpsocket(dns_dispatchmgr_t *mgr,
307 				  dns_dispatch_t *disp,
308 				  isc_socketmgr_t *sockmgr,
309 				  isc_sockaddr_t *localaddr,
310 				  isc_socket_t **sockp,
311 				  isc_socket_t *dup_socket);
312 static isc_result_t dispatch_createudp(dns_dispatchmgr_t *mgr,
313 				       isc_socketmgr_t *sockmgr,
314 				       isc_taskmgr_t *taskmgr,
315 				       isc_sockaddr_t *localaddr,
316 				       unsigned int maxrequests,
317 				       unsigned int attributes,
318 				       dns_dispatch_t **dispp,
319 				       isc_socket_t *dup_socket);
320 static bool destroy_mgr_ok(dns_dispatchmgr_t *mgr);
321 static void destroy_mgr(dns_dispatchmgr_t **mgrp);
322 static isc_result_t qid_allocate(dns_dispatchmgr_t *mgr, unsigned int buckets,
323 				 unsigned int increment, dns_qid_t **qidp,
324 				 bool needaddrtable);
325 static void qid_destroy(isc_mem_t *mctx, dns_qid_t **qidp);
326 static isc_result_t open_socket(isc_socketmgr_t *mgr, isc_sockaddr_t *local,
327 				unsigned int options, isc_socket_t **sockp,
328 				isc_socket_t *dup_socket);
329 static bool portavailable(dns_dispatchmgr_t *mgr, isc_socket_t *sock,
330 				   isc_sockaddr_t *sockaddrp);
331 
332 #define LVL(x) ISC_LOG_DEBUG(x)
333 
334 static void
335 mgr_log(dns_dispatchmgr_t *mgr, int level, const char *fmt, ...)
336      ISC_FORMAT_PRINTF(3, 4);
337 
338 static void
mgr_log(dns_dispatchmgr_t * mgr,int level,const char * fmt,...)339 mgr_log(dns_dispatchmgr_t *mgr, int level, const char *fmt, ...) {
340 	char msgbuf[2048];
341 	va_list ap;
342 
343 	if (! isc_log_wouldlog(dns_lctx, level))
344 		return;
345 
346 	va_start(ap, fmt);
347 	vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
348 	va_end(ap);
349 
350 	isc_log_write(dns_lctx,
351 		      DNS_LOGCATEGORY_DISPATCH, DNS_LOGMODULE_DISPATCH,
352 		      level, "dispatchmgr %p: %s", mgr, msgbuf);
353 }
354 
355 static inline void
inc_stats(dns_dispatchmgr_t * mgr,isc_statscounter_t counter)356 inc_stats(dns_dispatchmgr_t *mgr, isc_statscounter_t counter) {
357 	if (mgr->stats != NULL)
358 		isc_stats_increment(mgr->stats, counter);
359 }
360 
361 static inline void
dec_stats(dns_dispatchmgr_t * mgr,isc_statscounter_t counter)362 dec_stats(dns_dispatchmgr_t *mgr, isc_statscounter_t counter) {
363 	if (mgr->stats != NULL)
364 		isc_stats_decrement(mgr->stats, counter);
365 }
366 
367 static void
368 dispatch_log(dns_dispatch_t *disp, int level, const char *fmt, ...)
369      ISC_FORMAT_PRINTF(3, 4);
370 
371 static void
dispatch_log(dns_dispatch_t * disp,int level,const char * fmt,...)372 dispatch_log(dns_dispatch_t *disp, int level, const char *fmt, ...) {
373 	char msgbuf[2048];
374 	va_list ap;
375 
376 	if (! isc_log_wouldlog(dns_lctx, level))
377 		return;
378 
379 	va_start(ap, fmt);
380 	vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
381 	va_end(ap);
382 
383 	isc_log_write(dns_lctx,
384 		      DNS_LOGCATEGORY_DISPATCH, DNS_LOGMODULE_DISPATCH,
385 		      level, "dispatch %p: %s", disp, msgbuf);
386 }
387 
388 static void
389 request_log(dns_dispatch_t *disp, dns_dispentry_t *resp,
390 	    int level, const char *fmt, ...)
391      ISC_FORMAT_PRINTF(4, 5);
392 
393 static void
request_log(dns_dispatch_t * disp,dns_dispentry_t * resp,int level,const char * fmt,...)394 request_log(dns_dispatch_t *disp, dns_dispentry_t *resp,
395 	    int level, const char *fmt, ...)
396 {
397 	char msgbuf[2048];
398 	char peerbuf[256];
399 	va_list ap;
400 
401 	if (! isc_log_wouldlog(dns_lctx, level))
402 		return;
403 
404 	va_start(ap, fmt);
405 	vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
406 	va_end(ap);
407 
408 	if (VALID_RESPONSE(resp)) {
409 		isc_sockaddr_format(&resp->host, peerbuf, sizeof(peerbuf));
410 		isc_log_write(dns_lctx, DNS_LOGCATEGORY_DISPATCH,
411 			      DNS_LOGMODULE_DISPATCH, level,
412 			      "dispatch %p response %p %s: %s", disp, resp,
413 			      peerbuf, msgbuf);
414 	} else {
415 		isc_log_write(dns_lctx, DNS_LOGCATEGORY_DISPATCH,
416 			      DNS_LOGMODULE_DISPATCH, level,
417 			      "dispatch %p req/resp %p: %s", disp, resp,
418 			      msgbuf);
419 	}
420 }
421 
422 /*
423  * Return a hash of the destination and message id.
424  */
425 static uint32_t
dns_hash(dns_qid_t * qid,isc_sockaddr_t * dest,dns_messageid_t id,in_port_t port)426 dns_hash(dns_qid_t *qid, isc_sockaddr_t *dest, dns_messageid_t id,
427 	 in_port_t port)
428 {
429 	uint32_t ret;
430 
431 	ret = isc_sockaddr_hash(dest, true);
432 	ret ^= ((uint32_t)id << 16) | port;
433 	ret %= qid->qid_nbuckets;
434 
435 	INSIST(ret < qid->qid_nbuckets);
436 
437 	return (ret);
438 }
439 
440 /*
441  * Find the first entry in 'qid'.  Returns NULL if there are no entries.
442  */
443 static dns_dispentry_t *
linear_first(dns_qid_t * qid)444 linear_first(dns_qid_t *qid) {
445 	dns_dispentry_t *ret;
446 	unsigned int bucket;
447 
448 	bucket = 0;
449 
450 	while (bucket < qid->qid_nbuckets) {
451 		ret = ISC_LIST_HEAD(qid->qid_table[bucket]);
452 		if (ret != NULL)
453 			return (ret);
454 		bucket++;
455 	}
456 
457 	return (NULL);
458 }
459 
460 /*
461  * Find the next entry after 'resp' in 'qid'.  Return NULL if there are
462  * no more entries.
463  */
464 static dns_dispentry_t *
linear_next(dns_qid_t * qid,dns_dispentry_t * resp)465 linear_next(dns_qid_t *qid, dns_dispentry_t *resp) {
466 	dns_dispentry_t *ret;
467 	unsigned int bucket;
468 
469 	ret = ISC_LIST_NEXT(resp, link);
470 	if (ret != NULL)
471 		return (ret);
472 
473 	bucket = resp->bucket;
474 	bucket++;
475 	while (bucket < qid->qid_nbuckets) {
476 		ret = ISC_LIST_HEAD(qid->qid_table[bucket]);
477 		if (ret != NULL)
478 			return (ret);
479 		bucket++;
480 	}
481 
482 	return (NULL);
483 }
484 
485 /*
486  * The dispatch must be locked.
487  */
488 static bool
destroy_disp_ok(dns_dispatch_t * disp)489 destroy_disp_ok(dns_dispatch_t *disp)
490 {
491 	if (disp->refcount != 0)
492 		return (false);
493 
494 	if (disp->recv_pending != 0)
495 		return (false);
496 
497 	if (!ISC_LIST_EMPTY(disp->activesockets))
498 		return (false);
499 
500 	if (disp->shutting_down == 0)
501 		return (false);
502 
503 	return (true);
504 }
505 
506 /*
507  * Called when refcount reaches 0 (and safe to destroy).
508  *
509  * The dispatcher must be locked.
510  * The manager must not be locked.
511  */
512 static void
destroy_disp(isc_task_t * task,isc_event_t * event)513 destroy_disp(isc_task_t *task, isc_event_t *event) {
514 	dns_dispatch_t *disp;
515 	dns_dispatchmgr_t *mgr;
516 	bool killmgr;
517 	dispsocket_t *dispsocket;
518 	int i;
519 
520 	INSIST(event->ev_type == DNS_EVENT_DISPATCHCONTROL);
521 
522 	UNUSED(task);
523 
524 	disp = event->ev_arg;
525 	mgr = disp->mgr;
526 
527 	LOCK(&mgr->lock);
528 	ISC_LIST_UNLINK(mgr->list, disp, link);
529 
530 	dispatch_log(disp, LVL(90),
531 		     "shutting down; detaching from sock %p, task %p",
532 		     disp->socket, disp->task[0]); /* XXXX */
533 
534 	if (disp->sepool != NULL) {
535 		isc_mempool_destroy(&disp->sepool);
536 		(void)isc_mutex_destroy(&disp->sepool_lock);
537 	}
538 
539 	if (disp->socket != NULL)
540 		isc_socket_detach(&disp->socket);
541 	while ((dispsocket = ISC_LIST_HEAD(disp->inactivesockets)) != NULL) {
542 		ISC_LIST_UNLINK(disp->inactivesockets, dispsocket, link);
543 		destroy_dispsocket(disp, &dispsocket);
544 	}
545 	for (i = 0; i < disp->ntasks; i++)
546 		isc_task_detach(&disp->task[i]);
547 	isc_event_free(&event);
548 
549 	dispatch_free(&disp);
550 
551 	killmgr = destroy_mgr_ok(mgr);
552 	UNLOCK(&mgr->lock);
553 	if (killmgr)
554 		destroy_mgr(&mgr);
555 }
556 
557 /*%
558  * Manipulate port table per dispatch: find an entry for a given port number,
559  * create a new entry, and decrement a given entry with possible clean-up.
560  */
561 static dispportentry_t *
port_search(dns_dispatch_t * disp,in_port_t port)562 port_search(dns_dispatch_t *disp, in_port_t port) {
563 	dispportentry_t *portentry;
564 
565 	REQUIRE(disp->port_table != NULL);
566 
567 	portentry = ISC_LIST_HEAD(disp->port_table[port %
568 						   DNS_DISPATCH_PORTTABLESIZE]);
569 	while (portentry != NULL) {
570 		if (portentry->port == port)
571 			return (portentry);
572 		portentry = ISC_LIST_NEXT(portentry, link);
573 	}
574 
575 	return (NULL);
576 }
577 
578 static dispportentry_t *
new_portentry(dns_dispatch_t * disp,in_port_t port)579 new_portentry(dns_dispatch_t *disp, in_port_t port) {
580 	dispportentry_t *portentry;
581 	dns_qid_t *qid;
582 
583 	REQUIRE(disp->port_table != NULL);
584 
585 	portentry = isc_mempool_get(disp->portpool);
586 	if (portentry == NULL)
587 		return (portentry);
588 
589 	portentry->port = port;
590 	portentry->refs = 1;
591 	ISC_LINK_INIT(portentry, link);
592 	qid = DNS_QID(disp);
593 	LOCK(&qid->lock);
594 	ISC_LIST_APPEND(disp->port_table[port % DNS_DISPATCH_PORTTABLESIZE],
595 			portentry, link);
596 	UNLOCK(&qid->lock);
597 
598 	return (portentry);
599 }
600 
601 /*%
602  * The caller must hold the qid->lock.
603  */
604 static void
deref_portentry(dns_dispatch_t * disp,dispportentry_t ** portentryp)605 deref_portentry(dns_dispatch_t *disp, dispportentry_t **portentryp) {
606 	dispportentry_t *portentry = *portentryp;
607 	*portentryp = NULL;
608 
609 	REQUIRE(disp->port_table != NULL);
610 	REQUIRE(portentry != NULL && portentry->refs > 0);
611 
612 	if (--portentry->refs == 0) {
613 		ISC_LIST_UNLINK(disp->port_table[portentry->port %
614 						 DNS_DISPATCH_PORTTABLESIZE],
615 				portentry, link);
616 		isc_mempool_put(disp->portpool, portentry);
617 	}
618 }
619 
620 /*%
621  * Find a dispsocket for socket address 'dest', and port number 'port'.
622  * Return NULL if no such entry exists.  Requires qid->lock to be held.
623  */
624 static dispsocket_t *
socket_search(dns_qid_t * qid,isc_sockaddr_t * dest,in_port_t port,unsigned int bucket)625 socket_search(dns_qid_t *qid, isc_sockaddr_t *dest, in_port_t port,
626 	      unsigned int bucket)
627 {
628 	dispsocket_t *dispsock;
629 
630 	REQUIRE(VALID_QID(qid));
631 	REQUIRE(bucket < qid->qid_nbuckets);
632 
633 	dispsock = ISC_LIST_HEAD(qid->sock_table[bucket]);
634 
635 	while (dispsock != NULL) {
636 		if (dispsock->portentry != NULL &&
637 		    dispsock->portentry->port == port &&
638 		    isc_sockaddr_equal(dest, &dispsock->host))
639 			return (dispsock);
640 		dispsock = ISC_LIST_NEXT(dispsock, blink);
641 	}
642 
643 	return (NULL);
644 }
645 
646 /*%
647  * Make a new socket for a single dispatch with a random port number.
648  * The caller must hold the disp->lock
649  */
650 static isc_result_t
get_dispsocket(dns_dispatch_t * disp,isc_sockaddr_t * dest,isc_socketmgr_t * sockmgr,dispsocket_t ** dispsockp,in_port_t * portp)651 get_dispsocket(dns_dispatch_t *disp, isc_sockaddr_t *dest,
652 	       isc_socketmgr_t *sockmgr, dispsocket_t **dispsockp,
653 	       in_port_t *portp)
654 {
655 	int i;
656 	uint32_t r;
657 	dns_dispatchmgr_t *mgr = disp->mgr;
658 	isc_socket_t *sock = NULL;
659 	isc_result_t result = ISC_R_FAILURE;
660 	in_port_t port;
661 	isc_sockaddr_t localaddr;
662 	unsigned int bucket = 0;
663 	dispsocket_t *dispsock;
664 	unsigned int nports;
665 	in_port_t *ports;
666 	unsigned int bindoptions;
667 	dispportentry_t *portentry = NULL;
668 	dns_qid_t *qid;
669 
670 	if (isc_sockaddr_pf(&disp->local) == AF_INET) {
671 		nports = disp->mgr->nv4ports;
672 		ports = disp->mgr->v4ports;
673 	} else {
674 		nports = disp->mgr->nv6ports;
675 		ports = disp->mgr->v6ports;
676 	}
677 	if (nports == 0)
678 		return (ISC_R_ADDRNOTAVAIL);
679 
680 	dispsock = ISC_LIST_HEAD(disp->inactivesockets);
681 	if (dispsock != NULL) {
682 		ISC_LIST_UNLINK(disp->inactivesockets, dispsock, link);
683 		sock = dispsock->socket;
684 		dispsock->socket = NULL;
685 	} else {
686 		dispsock = isc_mempool_get(mgr->spool);
687 		if (dispsock == NULL)
688 			return (ISC_R_NOMEMORY);
689 
690 		disp->nsockets++;
691 		dispsock->socket = NULL;
692 		dispsock->disp = disp;
693 		dispsock->resp = NULL;
694 		dispsock->portentry = NULL;
695 		isc_random_get(&r);
696 		dispsock->task = NULL;
697 		isc_task_attach(disp->task[r % disp->ntasks], &dispsock->task);
698 		ISC_LINK_INIT(dispsock, link);
699 		ISC_LINK_INIT(dispsock, blink);
700 		dispsock->magic = DISPSOCK_MAGIC;
701 	}
702 
703 	/*
704 	 * Pick up a random UDP port and open a new socket with it.  Avoid
705 	 * choosing ports that share the same destination because it will be
706 	 * very likely to fail in bind(2) or connect(2).
707 	 */
708 	localaddr = disp->local;
709 	qid = DNS_QID(disp);
710 
711 	for (i = 0; i < 64; i++) {
712 		port = ports[isc_rng_uniformrandom(DISP_RNGCTX(disp), nports)];
713 		isc_sockaddr_setport(&localaddr, port);
714 
715 		LOCK(&qid->lock);
716 		bucket = dns_hash(qid, dest, 0, port);
717 		if (socket_search(qid, dest, port, bucket) != NULL) {
718 			UNLOCK(&qid->lock);
719 			continue;
720 		}
721 		UNLOCK(&qid->lock);
722 		bindoptions = 0;
723 		portentry = port_search(disp, port);
724 
725 		if (portentry != NULL)
726 			bindoptions |= ISC_SOCKET_REUSEADDRESS;
727 		result = open_socket(sockmgr, &localaddr, bindoptions, &sock,
728 				     NULL);
729 		if (result == ISC_R_SUCCESS) {
730 			if (portentry == NULL) {
731 				portentry = new_portentry(disp, port);
732 				if (portentry == NULL) {
733 					result = ISC_R_NOMEMORY;
734 					break;
735 				}
736 			} else {
737 				LOCK(&qid->lock);
738 				portentry->refs++;
739 				UNLOCK(&qid->lock);
740 			}
741 			break;
742 		} else if (result == ISC_R_NOPERM) {
743 			char buf[ISC_SOCKADDR_FORMATSIZE];
744 			isc_sockaddr_format(&localaddr, buf, sizeof(buf));
745 			dispatch_log(disp, ISC_LOG_WARNING,
746 				     "open_socket(%s) -> %s: continuing",
747 				     buf, isc_result_totext(result));
748 		} else if (result != ISC_R_ADDRINUSE)
749 			break;
750 	}
751 
752 	if (result == ISC_R_SUCCESS) {
753 		dispsock->socket = sock;
754 		dispsock->host = *dest;
755 		dispsock->bucket = bucket;
756 		LOCK(&qid->lock);
757 		dispsock->portentry = portentry;
758 		ISC_LIST_APPEND(qid->sock_table[bucket], dispsock, blink);
759 		UNLOCK(&qid->lock);
760 		*dispsockp = dispsock;
761 		*portp = port;
762 	} else {
763 		/*
764 		 * We could keep it in the inactive list, but since this should
765 		 * be an exceptional case and might be resource shortage, we'd
766 		 * rather destroy it.
767 		 */
768 		if (sock != NULL)
769 			isc_socket_detach(&sock);
770 		destroy_dispsocket(disp, &dispsock);
771 	}
772 
773 	return (result);
774 }
775 
776 /*%
777  * Destroy a dedicated dispatch socket.
778  */
779 static void
destroy_dispsocket(dns_dispatch_t * disp,dispsocket_t ** dispsockp)780 destroy_dispsocket(dns_dispatch_t *disp, dispsocket_t **dispsockp) {
781 	dispsocket_t *dispsock;
782 	dns_qid_t *qid = DNS_QID(disp);
783 
784 	/*
785 	 * The dispatch must be locked.
786 	 */
787 
788 	REQUIRE(dispsockp != NULL && *dispsockp != NULL);
789 	dispsock = *dispsockp;
790 	REQUIRE(!ISC_LINK_LINKED(dispsock, link));
791 
792 	disp->nsockets--;
793 	dispsock->magic = 0;
794 	if (dispsock->portentry != NULL) {
795 		/* socket_search() tests and dereferences portentry. */
796 		LOCK(&qid->lock);
797 		deref_portentry(disp, &dispsock->portentry);
798 		UNLOCK(&qid->lock);
799 	}
800 	if (dispsock->socket != NULL) {
801 		isc_socket_detach(&dispsock->socket);
802 	}
803 	if (ISC_LINK_LINKED(dispsock, blink)) {
804 		LOCK(&qid->lock);
805 		ISC_LIST_UNLINK(qid->sock_table[dispsock->bucket], dispsock,
806 				blink);
807 		UNLOCK(&qid->lock);
808 	}
809 	if (dispsock->task != NULL) {
810 		isc_task_detach(&dispsock->task);
811 	}
812 	isc_mempool_put(disp->mgr->spool, dispsock);
813 
814 	*dispsockp = NULL;
815 }
816 
817 /*%
818  * Deactivate a dedicated dispatch socket.  Move it to the inactive list for
819  * future reuse unless the total number of sockets are exceeding the maximum.
820  */
821 static void
deactivate_dispsocket(dns_dispatch_t * disp,dispsocket_t * dispsock)822 deactivate_dispsocket(dns_dispatch_t *disp, dispsocket_t *dispsock) {
823 	isc_result_t result;
824 	dns_qid_t *qid = DNS_QID(disp);
825 
826 	/*
827 	 * The dispatch must be locked.
828 	 */
829 	ISC_LIST_UNLINK(disp->activesockets, dispsock, link);
830 	if (dispsock->resp != NULL) {
831 		INSIST(dispsock->resp->dispsocket == dispsock);
832 		dispsock->resp->dispsocket = NULL;
833 	}
834 
835 	INSIST(dispsock->portentry != NULL);
836 	/* socket_search() tests and dereferences portentry. */
837 	LOCK(&qid->lock);
838 	deref_portentry(disp, &dispsock->portentry);
839 	UNLOCK(&qid->lock);
840 
841 	if (disp->nsockets > DNS_DISPATCH_POOLSOCKS)
842 		destroy_dispsocket(disp, &dispsock);
843 	else {
844 		result = isc_socket_close(dispsock->socket);
845 
846 		LOCK(&qid->lock);
847 		ISC_LIST_UNLINK(qid->sock_table[dispsock->bucket], dispsock,
848 				blink);
849 		UNLOCK(&qid->lock);
850 
851 		if (result == ISC_R_SUCCESS)
852 			ISC_LIST_APPEND(disp->inactivesockets, dispsock, link);
853 		else {
854 			/*
855 			 * If the underlying system does not allow this
856 			 * optimization, destroy this temporary structure (and
857 			 * create a new one for a new transaction).
858 			 */
859 			INSIST(result == ISC_R_NOTIMPLEMENTED);
860 			destroy_dispsocket(disp, &dispsock);
861 		}
862 	}
863 }
864 
865 /*
866  * Find an entry for query ID 'id', socket address 'dest', and port number
867  * 'port'.
868  * Return NULL if no such entry exists.
869  */
870 static dns_dispentry_t *
entry_search(dns_qid_t * qid,isc_sockaddr_t * dest,dns_messageid_t id,in_port_t port,unsigned int bucket)871 entry_search(dns_qid_t *qid, isc_sockaddr_t *dest, dns_messageid_t id,
872 	     in_port_t port, unsigned int bucket)
873 {
874 	dns_dispentry_t *res;
875 
876 	REQUIRE(VALID_QID(qid));
877 	REQUIRE(bucket < qid->qid_nbuckets);
878 
879 	res = ISC_LIST_HEAD(qid->qid_table[bucket]);
880 
881 	while (res != NULL) {
882 		if (res->id == id && isc_sockaddr_equal(dest, &res->host) &&
883 		    res->port == port) {
884 			return (res);
885 		}
886 		res = ISC_LIST_NEXT(res, link);
887 	}
888 
889 	return (NULL);
890 }
891 
892 static void
free_buffer(dns_dispatch_t * disp,void * buf,unsigned int len)893 free_buffer(dns_dispatch_t *disp, void *buf, unsigned int len) {
894 	isc_mempool_t *bpool;
895 	INSIST(buf != NULL && len != 0);
896 
897 
898 	switch (disp->socktype) {
899 	case isc_sockettype_tcp:
900 		INSIST(disp->tcpbuffers > 0);
901 		disp->tcpbuffers--;
902 		isc_mem_put(disp->mgr->mctx, buf, len);
903 		break;
904 	case isc_sockettype_udp:
905 		LOCK(&disp->mgr->buffer_lock);
906 		INSIST(disp->mgr->buffers > 0);
907 		INSIST(len == disp->mgr->buffersize);
908 		disp->mgr->buffers--;
909 		bpool = disp->mgr->bpool;
910 		UNLOCK(&disp->mgr->buffer_lock);
911 		isc_mempool_put(bpool, buf);
912 		break;
913 	default:
914 		INSIST(0);
915 		ISC_UNREACHABLE();
916 	}
917 }
918 
919 static void *
allocate_udp_buffer(dns_dispatch_t * disp)920 allocate_udp_buffer(dns_dispatch_t *disp) {
921 	isc_mempool_t *bpool;
922 	void *temp;
923 
924 	LOCK(&disp->mgr->buffer_lock);
925 	if (disp->mgr->buffers >= disp->mgr->maxbuffers) {
926 		UNLOCK(&disp->mgr->buffer_lock);
927 		return (NULL);
928 	}
929 	bpool = disp->mgr->bpool;
930 	disp->mgr->buffers++;
931 	UNLOCK(&disp->mgr->buffer_lock);
932 
933 	temp = isc_mempool_get(bpool);
934 
935 	if (temp == NULL) {
936 		LOCK(&disp->mgr->buffer_lock);
937 		disp->mgr->buffers--;
938 		UNLOCK(&disp->mgr->buffer_lock);
939 	}
940 
941 	return (temp);
942 }
943 
944 static inline void
free_sevent(isc_event_t * ev)945 free_sevent(isc_event_t *ev) {
946 	isc_mempool_t *pool = ev->ev_destroy_arg;
947 	isc_socketevent_t *sev = (isc_socketevent_t *) ev;
948 	isc_mempool_put(pool, sev);
949 }
950 
951 static inline isc_socketevent_t *
allocate_sevent(dns_dispatch_t * disp,isc_socket_t * sock,isc_eventtype_t type,isc_taskaction_t action,const void * arg)952 allocate_sevent(dns_dispatch_t *disp, isc_socket_t *sock,
953 		isc_eventtype_t type, isc_taskaction_t action, const void *arg)
954 {
955 	isc_socketevent_t *ev;
956 	void *deconst_arg;
957 
958 	ev = isc_mempool_get(disp->sepool);
959 	if (ev == NULL)
960 		return (NULL);
961 	DE_CONST(arg, deconst_arg);
962 	ISC_EVENT_INIT(ev, sizeof(*ev), 0, NULL, type,
963 		       action, deconst_arg, sock,
964 		       free_sevent, disp->sepool);
965 	ev->result = ISC_R_UNSET;
966 	ISC_LINK_INIT(ev, ev_link);
967 	ISC_LIST_INIT(ev->bufferlist);
968 	ev->region.base = NULL;
969 	ev->n = 0;
970 	ev->offset = 0;
971 	ev->attributes = 0;
972 
973 	return (ev);
974 }
975 
976 
977 static inline void
free_devent(dns_dispatch_t * disp,dns_dispatchevent_t * ev)978 free_devent(dns_dispatch_t *disp, dns_dispatchevent_t *ev) {
979 	if (disp->failsafe_ev == ev) {
980 		INSIST(disp->shutdown_out == 1);
981 		disp->shutdown_out = 0;
982 
983 		return;
984 	}
985 
986 	isc_mempool_put(disp->mgr->depool, ev);
987 }
988 
989 static inline dns_dispatchevent_t *
allocate_devent(dns_dispatch_t * disp)990 allocate_devent(dns_dispatch_t *disp) {
991 	dns_dispatchevent_t *ev;
992 
993 	ev = isc_mempool_get(disp->mgr->depool);
994 	if (ev == NULL)
995 		return (NULL);
996 	ISC_EVENT_INIT(ev, sizeof(*ev), 0, NULL, 0,
997 		       NULL, NULL, NULL, NULL, NULL);
998 
999 	return (ev);
1000 }
1001 
1002 static void
udp_exrecv(isc_task_t * task,isc_event_t * ev)1003 udp_exrecv(isc_task_t *task, isc_event_t *ev) {
1004 	dispsocket_t *dispsock = ev->ev_arg;
1005 
1006 	UNUSED(task);
1007 
1008 	REQUIRE(VALID_DISPSOCK(dispsock));
1009 	udp_recv(ev, dispsock->disp, dispsock);
1010 }
1011 
1012 static void
udp_shrecv(isc_task_t * task,isc_event_t * ev)1013 udp_shrecv(isc_task_t *task, isc_event_t *ev) {
1014 	dns_dispatch_t *disp = ev->ev_arg;
1015 
1016 	UNUSED(task);
1017 
1018 	REQUIRE(VALID_DISPATCH(disp));
1019 	udp_recv(ev, disp, NULL);
1020 }
1021 
1022 /*
1023  * General flow:
1024  *
1025  * If I/O result == CANCELED or error, free the buffer.
1026  *
1027  * If query, free the buffer, restart.
1028  *
1029  * If response:
1030  *	Allocate event, fill in details.
1031  *		If cannot allocate, free buffer, restart.
1032  *	find target.  If not found, free buffer, restart.
1033  *	if event queue is not empty, queue.  else, send.
1034  *	restart.
1035  */
1036 static void
udp_recv(isc_event_t * ev_in,dns_dispatch_t * disp,dispsocket_t * dispsock)1037 udp_recv(isc_event_t *ev_in, dns_dispatch_t *disp, dispsocket_t *dispsock) {
1038 	isc_socketevent_t *ev = (isc_socketevent_t *)ev_in;
1039 	dns_messageid_t id;
1040 	isc_result_t dres;
1041 	isc_buffer_t source;
1042 	unsigned int flags;
1043 	dns_dispentry_t *resp = NULL;
1044 	dns_dispatchevent_t *rev;
1045 	unsigned int bucket;
1046 	bool killit;
1047 	bool queue_response;
1048 	dns_dispatchmgr_t *mgr;
1049 	dns_qid_t *qid;
1050 	isc_netaddr_t netaddr;
1051 	int match;
1052 	int result;
1053 	bool qidlocked = false;
1054 
1055 	LOCK(&disp->lock);
1056 
1057 	mgr = disp->mgr;
1058 	qid = mgr->qid;
1059 
1060 	LOCK(&disp->mgr->buffer_lock);
1061 	dispatch_log(disp, LVL(90),
1062 		     "got packet: requests %d, buffers %d, recvs %d",
1063 		     disp->requests, disp->mgr->buffers, disp->recv_pending);
1064 	UNLOCK(&disp->mgr->buffer_lock);
1065 
1066 	if (dispsock == NULL && ev->ev_type == ISC_SOCKEVENT_RECVDONE) {
1067 		/*
1068 		 * Unless the receive event was imported from a listening
1069 		 * interface, in which case the event type is
1070 		 * DNS_EVENT_IMPORTRECVDONE, receive operation must be pending.
1071 		 */
1072 		INSIST(disp->recv_pending != 0);
1073 		disp->recv_pending = 0;
1074 	}
1075 
1076 	if (dispsock != NULL &&
1077 	    (ev->result == ISC_R_CANCELED || dispsock->resp == NULL)) {
1078 		/*
1079 		 * dispsock->resp can be NULL if this transaction was canceled
1080 		 * just after receiving a response.  Since this socket is
1081 		 * exclusively used and there should be at most one receive
1082 		 * event the canceled event should have been no effect.  So
1083 		 * we can (and should) deactivate the socket right now.
1084 		 */
1085 		deactivate_dispsocket(disp, dispsock);
1086 		dispsock = NULL;
1087 	}
1088 
1089 	if (disp->shutting_down) {
1090 		/*
1091 		 * This dispatcher is shutting down.
1092 		 */
1093 		free_buffer(disp, ev->region.base, ev->region.length);
1094 
1095 		isc_event_free(&ev_in);
1096 		ev = NULL;
1097 
1098 		killit = destroy_disp_ok(disp);
1099 		UNLOCK(&disp->lock);
1100 		if (killit)
1101 			isc_task_send(disp->task[0], &disp->ctlevent);
1102 
1103 		return;
1104 	}
1105 
1106 	if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0) {
1107 		if (dispsock != NULL) {
1108 			resp = dispsock->resp;
1109 			id = resp->id;
1110 			if (ev->result != ISC_R_SUCCESS) {
1111 				/*
1112 				 * This is most likely a network error on a
1113 				 * connected socket.  It makes no sense to
1114 				 * check the address or parse the packet, but it
1115 				 * will help to return the error to the caller.
1116 				 */
1117 				goto sendresponse;
1118 			}
1119 		} else {
1120 			free_buffer(disp, ev->region.base, ev->region.length);
1121 
1122 			isc_event_free(&ev_in);
1123 			UNLOCK(&disp->lock);
1124 			return;
1125 		}
1126 	} else if (ev->result != ISC_R_SUCCESS) {
1127 		free_buffer(disp, ev->region.base, ev->region.length);
1128 
1129 		if (ev->result != ISC_R_CANCELED)
1130 			dispatch_log(disp, ISC_LOG_ERROR,
1131 				     "odd socket result in udp_recv(): %s",
1132 				     isc_result_totext(ev->result));
1133 
1134 		isc_event_free(&ev_in);
1135 		UNLOCK(&disp->lock);
1136 		return;
1137 	}
1138 
1139 	/*
1140 	 * If this is from a blackholed address, drop it.
1141 	 */
1142 	isc_netaddr_fromsockaddr(&netaddr, &ev->address);
1143 	if (disp->mgr->blackhole != NULL &&
1144 	    dns_acl_match(&netaddr, NULL, disp->mgr->blackhole,
1145 			  NULL, &match, NULL) == ISC_R_SUCCESS &&
1146 	    match > 0)
1147 	{
1148 		if (isc_log_wouldlog(dns_lctx, LVL(10))) {
1149 			char netaddrstr[ISC_NETADDR_FORMATSIZE];
1150 			isc_netaddr_format(&netaddr, netaddrstr,
1151 					   sizeof(netaddrstr));
1152 			dispatch_log(disp, LVL(10),
1153 				     "blackholed packet from %s",
1154 				     netaddrstr);
1155 		}
1156 		free_buffer(disp, ev->region.base, ev->region.length);
1157 		goto restart;
1158 	}
1159 
1160 	/*
1161 	 * Peek into the buffer to see what we can see.
1162 	 */
1163 	isc_buffer_init(&source, ev->region.base, ev->region.length);
1164 	isc_buffer_add(&source, ev->n);
1165 	dres = dns_message_peekheader(&source, &id, &flags);
1166 	if (dres != ISC_R_SUCCESS) {
1167 		free_buffer(disp, ev->region.base, ev->region.length);
1168 		dispatch_log(disp, LVL(10), "got garbage packet");
1169 		goto restart;
1170 	}
1171 
1172 	dispatch_log(disp, LVL(92),
1173 		     "got valid DNS message header, /QR %c, id %u",
1174 		     (((flags & DNS_MESSAGEFLAG_QR) != 0) ? '1' : '0'), id);
1175 
1176 	/*
1177 	 * Look at flags.  If query, drop it. If response,
1178 	 * look to see where it goes.
1179 	 */
1180 	if ((flags & DNS_MESSAGEFLAG_QR) == 0) {
1181 		/* query */
1182 		free_buffer(disp, ev->region.base, ev->region.length);
1183 		goto restart;
1184 	}
1185 
1186 	/*
1187 	 * Search for the corresponding response.  If we are using an exclusive
1188 	 * socket, we've already identified it and we can skip the search; but
1189 	 * the ID and the address must match the expected ones.
1190 	 */
1191 	if (resp == NULL) {
1192 		bucket = dns_hash(qid, &ev->address, id, disp->localport);
1193 		LOCK(&qid->lock);
1194 		qidlocked = true;
1195 		resp = entry_search(qid, &ev->address, id, disp->localport,
1196 				    bucket);
1197 		dispatch_log(disp, LVL(90),
1198 			     "search for response in bucket %d: %s",
1199 			     bucket, (resp == NULL ? "not found" : "found"));
1200 
1201 	} else if (resp->id != id || !isc_sockaddr_equal(&ev->address,
1202 							 &resp->host)) {
1203 		dispatch_log(disp, LVL(90),
1204 			     "response to an exclusive socket doesn't match");
1205 		inc_stats(mgr, dns_resstatscounter_mismatch);
1206 		free_buffer(disp, ev->region.base, ev->region.length);
1207 		goto unlock;
1208 	}
1209 
1210 	if (resp == NULL) {
1211 		inc_stats(mgr, dns_resstatscounter_mismatch);
1212 		free_buffer(disp, ev->region.base, ev->region.length);
1213 		goto unlock;
1214 	}
1215 
1216 	/*
1217 	 * Now that we have the original dispatch the query was sent
1218 	 * from check that the address and port the response was
1219 	 * sent to make sense.
1220 	 */
1221 	if (disp != resp->disp) {
1222 		isc_sockaddr_t a1;
1223 		isc_sockaddr_t a2;
1224 
1225 		/*
1226 		 * Check that the socket types and ports match.
1227 		 */
1228 		if (disp->socktype != resp->disp->socktype ||
1229 		    isc_sockaddr_getport(&disp->local) !=
1230 		    isc_sockaddr_getport(&resp->disp->local)) {
1231 			free_buffer(disp, ev->region.base, ev->region.length);
1232 			goto unlock;
1233 		}
1234 
1235 		/*
1236 		 * If each dispatch is bound to a different address
1237 		 * then fail.
1238 		 *
1239 		 * Note under Linux a packet can be sent out via IPv4 socket
1240 		 * and the response be received via a IPv6 socket.
1241 		 *
1242 		 * Requests sent out via IPv6 should always come back in
1243 		 * via IPv6.
1244 		 */
1245 		if (isc_sockaddr_pf(&resp->disp->local) == PF_INET6 &&
1246 		    isc_sockaddr_pf(&disp->local) != PF_INET6) {
1247 			free_buffer(disp, ev->region.base, ev->region.length);
1248 			goto unlock;
1249 		}
1250 		isc_sockaddr_anyofpf(&a1, isc_sockaddr_pf(&resp->disp->local));
1251 		isc_sockaddr_anyofpf(&a2, isc_sockaddr_pf(&disp->local));
1252 		if (!isc_sockaddr_eqaddr(&disp->local, &resp->disp->local) &&
1253 		    !isc_sockaddr_eqaddr(&a1, &resp->disp->local) &&
1254 		    !isc_sockaddr_eqaddr(&a2, &disp->local)) {
1255 			free_buffer(disp, ev->region.base, ev->region.length);
1256 			goto unlock;
1257 		}
1258 	}
1259 
1260   sendresponse:
1261 	queue_response = resp->item_out;
1262 	rev = allocate_devent(resp->disp);
1263 	if (rev == NULL) {
1264 		free_buffer(disp, ev->region.base, ev->region.length);
1265 		goto unlock;
1266 	}
1267 
1268 	/*
1269 	 * At this point, rev contains the event we want to fill in, and
1270 	 * resp contains the information on the place to send it to.
1271 	 * Send the event off.
1272 	 */
1273 	isc_buffer_init(&rev->buffer, ev->region.base, ev->region.length);
1274 	isc_buffer_add(&rev->buffer, ev->n);
1275 	rev->result = ev->result;
1276 	rev->id = id;
1277 	rev->addr = ev->address;
1278 	rev->pktinfo = ev->pktinfo;
1279 	rev->attributes = ev->attributes;
1280 	if (queue_response) {
1281 		ISC_LIST_APPEND(resp->items, rev, ev_link);
1282 	} else {
1283 		ISC_EVENT_INIT(rev, sizeof(*rev), 0, NULL,
1284 			       DNS_EVENT_DISPATCH,
1285 			       resp->action, resp->arg, resp, NULL, NULL);
1286 		request_log(disp, resp, LVL(90),
1287 			    "[a] Sent event %p buffer %p len %d to task %p",
1288 			    rev, rev->buffer.base, rev->buffer.length,
1289 			    resp->task);
1290 		resp->item_out = true;
1291 		isc_task_send(resp->task, ISC_EVENT_PTR(&rev));
1292 	}
1293  unlock:
1294 	if (qidlocked)
1295 		UNLOCK(&qid->lock);
1296 
1297 	/*
1298 	 * Restart recv() to get the next packet.
1299 	 */
1300  restart:
1301 	result = startrecv(disp, dispsock);
1302 	if (result != ISC_R_SUCCESS && dispsock != NULL) {
1303 		/*
1304 		 * XXX: wired. There seems to be no recovery process other than
1305 		 * deactivate this socket anyway (since we cannot start
1306 		 * receiving, we won't be able to receive a cancel event
1307 		 * from the user).
1308 		 */
1309 		deactivate_dispsocket(disp, dispsock);
1310 	}
1311 	isc_event_free(&ev_in);
1312 	UNLOCK(&disp->lock);
1313 }
1314 
1315 /*
1316  * General flow:
1317  *
1318  * If I/O result == CANCELED, EOF, or error, notify everyone as the
1319  * various queues drain.
1320  *
1321  * If query, restart.
1322  *
1323  * If response:
1324  *	Allocate event, fill in details.
1325  *		If cannot allocate, restart.
1326  *	find target.  If not found, restart.
1327  *	if event queue is not empty, queue.  else, send.
1328  *	restart.
1329  */
1330 static void
tcp_recv(isc_task_t * task,isc_event_t * ev_in)1331 tcp_recv(isc_task_t *task, isc_event_t *ev_in) {
1332 	dns_dispatch_t *disp = ev_in->ev_arg;
1333 	dns_tcpmsg_t *tcpmsg = &disp->tcpmsg;
1334 	dns_messageid_t id;
1335 	isc_result_t dres;
1336 	unsigned int flags;
1337 	dns_dispentry_t *resp;
1338 	dns_dispatchevent_t *rev;
1339 	unsigned int bucket;
1340 	bool killit;
1341 	bool queue_response;
1342 	dns_qid_t *qid;
1343 	int level;
1344 	char buf[ISC_SOCKADDR_FORMATSIZE];
1345 
1346 	UNUSED(task);
1347 
1348 	REQUIRE(VALID_DISPATCH(disp));
1349 
1350 	qid = disp->qid;
1351 
1352 	LOCK(&disp->lock);
1353 
1354 	dispatch_log(disp, LVL(90),
1355 		     "got TCP packet: requests %d, buffers %d, recvs %d",
1356 		     disp->requests, disp->tcpbuffers, disp->recv_pending);
1357 
1358 	INSIST(disp->recv_pending != 0);
1359 	disp->recv_pending = 0;
1360 
1361 	if (disp->refcount == 0) {
1362 		/*
1363 		 * This dispatcher is shutting down.  Force cancellation.
1364 		 */
1365 		tcpmsg->result = ISC_R_CANCELED;
1366 	}
1367 
1368 	if (tcpmsg->result != ISC_R_SUCCESS) {
1369 		switch (tcpmsg->result) {
1370 		case ISC_R_CANCELED:
1371 			break;
1372 
1373 		case ISC_R_EOF:
1374 			dispatch_log(disp, LVL(90), "shutting down on EOF");
1375 			do_cancel(disp);
1376 			break;
1377 
1378 		case ISC_R_CONNECTIONRESET:
1379 			level = ISC_LOG_INFO;
1380 			goto logit;
1381 
1382 		default:
1383 			level = ISC_LOG_ERROR;
1384 		logit:
1385 			isc_sockaddr_format(&tcpmsg->address, buf, sizeof(buf));
1386 			dispatch_log(disp, level, "shutting down due to TCP "
1387 				     "receive error: %s: %s", buf,
1388 				     isc_result_totext(tcpmsg->result));
1389 			do_cancel(disp);
1390 			break;
1391 		}
1392 
1393 		/*
1394 		 * The event is statically allocated in the tcpmsg
1395 		 * structure, and destroy_disp() frees the tcpmsg, so we must
1396 		 * free the event *before* calling destroy_disp().
1397 		 */
1398 		isc_event_free(&ev_in);
1399 
1400 		disp->shutting_down = 1;
1401 		disp->shutdown_why = tcpmsg->result;
1402 
1403 		/*
1404 		 * If the recv() was canceled pass the word on.
1405 		 */
1406 		killit = destroy_disp_ok(disp);
1407 		UNLOCK(&disp->lock);
1408 		if (killit)
1409 			isc_task_send(disp->task[0], &disp->ctlevent);
1410 		return;
1411 	}
1412 
1413 	dispatch_log(disp, LVL(90), "result %d, length == %d, addr = %p",
1414 		     tcpmsg->result,
1415 		     tcpmsg->buffer.length, tcpmsg->buffer.base);
1416 
1417 	/*
1418 	 * Peek into the buffer to see what we can see.
1419 	 */
1420 	dres = dns_message_peekheader(&tcpmsg->buffer, &id, &flags);
1421 	if (dres != ISC_R_SUCCESS) {
1422 		dispatch_log(disp, LVL(10), "got garbage packet");
1423 		goto restart;
1424 	}
1425 
1426 	dispatch_log(disp, LVL(92),
1427 		     "got valid DNS message header, /QR %c, id %u",
1428 		     (((flags & DNS_MESSAGEFLAG_QR) != 0) ? '1' : '0'), id);
1429 
1430 	/*
1431 	 * Allocate an event to send to the query or response client, and
1432 	 * allocate a new buffer for our use.
1433 	 */
1434 
1435 	/*
1436 	 * Look at flags.  If query, drop it. If response,
1437 	 * look to see where it goes.
1438 	 */
1439 	if ((flags & DNS_MESSAGEFLAG_QR) == 0) {
1440 		/*
1441 		 * Query.
1442 		 */
1443 		goto restart;
1444 	}
1445 
1446 	/*
1447 	 * Response.
1448 	 */
1449 	bucket = dns_hash(qid, &tcpmsg->address, id, disp->localport);
1450 	LOCK(&qid->lock);
1451 	resp = entry_search(qid, &tcpmsg->address, id, disp->localport, bucket);
1452 	dispatch_log(disp, LVL(90),
1453 		     "search for response in bucket %d: %s",
1454 		     bucket, (resp == NULL ? "not found" : "found"));
1455 
1456 	if (resp == NULL)
1457 		goto unlock;
1458 	queue_response = resp->item_out;
1459 	rev = allocate_devent(disp);
1460 	if (rev == NULL)
1461 		goto unlock;
1462 
1463 	/*
1464 	 * At this point, rev contains the event we want to fill in, and
1465 	 * resp contains the information on the place to send it to.
1466 	 * Send the event off.
1467 	 */
1468 	dns_tcpmsg_keepbuffer(tcpmsg, &rev->buffer);
1469 	disp->tcpbuffers++;
1470 	rev->result = ISC_R_SUCCESS;
1471 	rev->id = id;
1472 	rev->addr = tcpmsg->address;
1473 	if (queue_response) {
1474 		ISC_LIST_APPEND(resp->items, rev, ev_link);
1475 	} else {
1476 		ISC_EVENT_INIT(rev, sizeof(*rev), 0, NULL, DNS_EVENT_DISPATCH,
1477 			       resp->action, resp->arg, resp, NULL, NULL);
1478 		request_log(disp, resp, LVL(90),
1479 			    "[b] Sent event %p buffer %p len %d to task %p",
1480 			    rev, rev->buffer.base, rev->buffer.length,
1481 			    resp->task);
1482 		resp->item_out = true;
1483 		isc_task_send(resp->task, ISC_EVENT_PTR(&rev));
1484 	}
1485  unlock:
1486 	UNLOCK(&qid->lock);
1487 
1488 	/*
1489 	 * Restart recv() to get the next packet.
1490 	 */
1491  restart:
1492 	(void)startrecv(disp, NULL);
1493 
1494 	isc_event_free(&ev_in);
1495 	UNLOCK(&disp->lock);
1496 }
1497 
1498 /*
1499  * disp must be locked.
1500  */
1501 static isc_result_t
startrecv(dns_dispatch_t * disp,dispsocket_t * dispsock)1502 startrecv(dns_dispatch_t *disp, dispsocket_t *dispsock) {
1503 	isc_result_t res;
1504 	isc_region_t region;
1505 	isc_socket_t *sock;
1506 
1507 	if (disp->shutting_down == 1)
1508 		return (ISC_R_SUCCESS);
1509 
1510 	if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) != 0)
1511 		return (ISC_R_SUCCESS);
1512 
1513 	if (disp->recv_pending != 0 && dispsock == NULL)
1514 		return (ISC_R_SUCCESS);
1515 
1516 	if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0 &&
1517 	    dispsock == NULL)
1518 		return (ISC_R_SUCCESS);
1519 
1520 	if (dispsock != NULL)
1521 		sock = dispsock->socket;
1522 	else
1523 		sock = disp->socket;
1524 	INSIST(sock != NULL);
1525 
1526 	switch (disp->socktype) {
1527 		/*
1528 		 * UDP reads are always maximal.
1529 		 */
1530 	case isc_sockettype_udp:
1531 		region.length = disp->mgr->buffersize;
1532 		region.base = allocate_udp_buffer(disp);
1533 		if (region.base == NULL)
1534 			return (ISC_R_NOMEMORY);
1535 		if (dispsock != NULL) {
1536 			isc_task_t *dt = dispsock->task;
1537 			isc_socketevent_t *sev =
1538 				allocate_sevent(disp, sock,
1539 						ISC_SOCKEVENT_RECVDONE,
1540 						udp_exrecv, dispsock);
1541 			if (sev == NULL) {
1542 				free_buffer(disp, region.base, region.length);
1543 				return (ISC_R_NOMEMORY);
1544 			}
1545 
1546 			res = isc_socket_recv2(sock, &region, 1, dt, sev, 0);
1547 			if (res != ISC_R_SUCCESS) {
1548 				free_buffer(disp, region.base, region.length);
1549 				return (res);
1550 			}
1551 		} else {
1552 			isc_task_t *dt = disp->task[0];
1553 			isc_socketevent_t *sev =
1554 				allocate_sevent(disp, sock,
1555 						ISC_SOCKEVENT_RECVDONE,
1556 						udp_shrecv, disp);
1557 			if (sev == NULL) {
1558 				free_buffer(disp, region.base, region.length);
1559 				return (ISC_R_NOMEMORY);
1560 			}
1561 
1562 			res = isc_socket_recv2(sock, &region, 1, dt, sev, 0);
1563 			if (res != ISC_R_SUCCESS) {
1564 				free_buffer(disp, region.base, region.length);
1565 				disp->shutdown_why = res;
1566 				disp->shutting_down = 1;
1567 				do_cancel(disp);
1568 				return (ISC_R_SUCCESS); /* recover by cancel */
1569 			}
1570 			INSIST(disp->recv_pending == 0);
1571 			disp->recv_pending = 1;
1572 		}
1573 		break;
1574 
1575 	case isc_sockettype_tcp:
1576 		res = dns_tcpmsg_readmessage(&disp->tcpmsg, disp->task[0],
1577 					     tcp_recv, disp);
1578 		if (res != ISC_R_SUCCESS) {
1579 			disp->shutdown_why = res;
1580 			disp->shutting_down = 1;
1581 			do_cancel(disp);
1582 			return (ISC_R_SUCCESS); /* recover by cancel */
1583 		}
1584 		INSIST(disp->recv_pending == 0);
1585 		disp->recv_pending = 1;
1586 		break;
1587 	default:
1588 		INSIST(0);
1589 		ISC_UNREACHABLE();
1590 	}
1591 
1592 	return (ISC_R_SUCCESS);
1593 }
1594 
1595 /*
1596  * Mgr must be locked when calling this function.
1597  */
1598 static bool
destroy_mgr_ok(dns_dispatchmgr_t * mgr)1599 destroy_mgr_ok(dns_dispatchmgr_t *mgr) {
1600 	mgr_log(mgr, LVL(90),
1601 		"destroy_mgr_ok: shuttingdown=%d, listnonempty=%d, "
1602 		"depool=%d, rpool=%d, dpool=%d",
1603 		MGR_IS_SHUTTINGDOWN(mgr), !ISC_LIST_EMPTY(mgr->list),
1604 		isc_mempool_getallocated(mgr->depool),
1605 		isc_mempool_getallocated(mgr->rpool),
1606 		isc_mempool_getallocated(mgr->dpool));
1607 	if (!MGR_IS_SHUTTINGDOWN(mgr))
1608 		return (false);
1609 	if (!ISC_LIST_EMPTY(mgr->list))
1610 		return (false);
1611 	if (isc_mempool_getallocated(mgr->depool) != 0)
1612 		return (false);
1613 	if (isc_mempool_getallocated(mgr->rpool) != 0)
1614 		return (false);
1615 	if (isc_mempool_getallocated(mgr->dpool) != 0)
1616 		return (false);
1617 
1618 	return (true);
1619 }
1620 
1621 /*
1622  * Mgr must be unlocked when calling this function.
1623  */
1624 static void
destroy_mgr(dns_dispatchmgr_t ** mgrp)1625 destroy_mgr(dns_dispatchmgr_t **mgrp) {
1626 	isc_mem_t *mctx;
1627 	dns_dispatchmgr_t *mgr;
1628 
1629 	mgr = *mgrp;
1630 	*mgrp = NULL;
1631 
1632 	mctx = mgr->mctx;
1633 
1634 	mgr->magic = 0;
1635 	mgr->mctx = NULL;
1636 	DESTROYLOCK(&mgr->lock);
1637 	mgr->state = 0;
1638 
1639 	if (mgr->rngctx != NULL)
1640 		isc_rng_detach(&mgr->rngctx);
1641 	DESTROYLOCK(&mgr->rng_lock);
1642 
1643 	isc_mempool_destroy(&mgr->depool);
1644 	isc_mempool_destroy(&mgr->rpool);
1645 	isc_mempool_destroy(&mgr->dpool);
1646 	if (mgr->bpool != NULL)
1647 		isc_mempool_destroy(&mgr->bpool);
1648 	if (mgr->spool != NULL)
1649 		isc_mempool_destroy(&mgr->spool);
1650 
1651 	DESTROYLOCK(&mgr->spool_lock);
1652 	DESTROYLOCK(&mgr->bpool_lock);
1653 	DESTROYLOCK(&mgr->dpool_lock);
1654 	DESTROYLOCK(&mgr->rpool_lock);
1655 	DESTROYLOCK(&mgr->depool_lock);
1656 
1657 	if (mgr->entropy != NULL)
1658 		isc_entropy_detach(&mgr->entropy);
1659 	if (mgr->qid != NULL)
1660 		qid_destroy(mctx, &mgr->qid);
1661 
1662 	DESTROYLOCK(&mgr->buffer_lock);
1663 
1664 	if (mgr->blackhole != NULL)
1665 		dns_acl_detach(&mgr->blackhole);
1666 
1667 	if (mgr->stats != NULL)
1668 		isc_stats_detach(&mgr->stats);
1669 
1670 	if (mgr->v4ports != NULL) {
1671 		isc_mem_put(mctx, mgr->v4ports,
1672 			    mgr->nv4ports * sizeof(in_port_t));
1673 	}
1674 	if (mgr->v6ports != NULL) {
1675 		isc_mem_put(mctx, mgr->v6ports,
1676 			    mgr->nv6ports * sizeof(in_port_t));
1677 	}
1678 	isc_mem_put(mctx, mgr, sizeof(dns_dispatchmgr_t));
1679 	isc_mem_detach(&mctx);
1680 }
1681 
1682 static isc_result_t
open_socket(isc_socketmgr_t * mgr,isc_sockaddr_t * local,unsigned int options,isc_socket_t ** sockp,isc_socket_t * dup_socket)1683 open_socket(isc_socketmgr_t *mgr, isc_sockaddr_t *local,
1684 	    unsigned int options, isc_socket_t **sockp,
1685 	    isc_socket_t *dup_socket)
1686 {
1687 	isc_socket_t *sock;
1688 	isc_result_t result;
1689 
1690 	sock = *sockp;
1691 	if (sock != NULL) {
1692 		result = isc_socket_open(sock);
1693 		if (result != ISC_R_SUCCESS)
1694 			return (result);
1695 	} else if (dup_socket != NULL) {
1696 		result = isc_socket_dup(dup_socket, &sock);
1697 		if (result != ISC_R_SUCCESS)
1698 			return (result);
1699 
1700 		isc_socket_setname(sock, "dispatcher", NULL);
1701 		*sockp = sock;
1702 		return (ISC_R_SUCCESS);
1703 	} else {
1704 		result = isc_socket_create(mgr, isc_sockaddr_pf(local),
1705 					   isc_sockettype_udp, &sock);
1706 		if (result != ISC_R_SUCCESS)
1707 			return (result);
1708 	}
1709 
1710 	isc_socket_setname(sock, "dispatcher", NULL);
1711 
1712 #ifndef ISC_ALLOW_MAPPED
1713 	isc_socket_ipv6only(sock, true);
1714 #endif
1715 	result = isc_socket_bind(sock, local, options);
1716 	if (result != ISC_R_SUCCESS) {
1717 		if (*sockp == NULL)
1718 			isc_socket_detach(&sock);
1719 		else {
1720 			isc_socket_close(sock);
1721 		}
1722 		return (result);
1723 	}
1724 
1725 	*sockp = sock;
1726 	return (ISC_R_SUCCESS);
1727 }
1728 
1729 /*%
1730  * Create a temporary port list to set the initial default set of dispatch
1731  * ports: [1024, 65535].  This is almost meaningless as the application will
1732  * normally set the ports explicitly, but is provided to fill some minor corner
1733  * cases.
1734  */
1735 static isc_result_t
create_default_portset(isc_mem_t * mctx,isc_portset_t ** portsetp)1736 create_default_portset(isc_mem_t *mctx, isc_portset_t **portsetp) {
1737 	isc_result_t result;
1738 
1739 	result = isc_portset_create(mctx, portsetp);
1740 	if (result != ISC_R_SUCCESS)
1741 		return (result);
1742 	isc_portset_addrange(*portsetp, 1024, 65535);
1743 
1744 	return (ISC_R_SUCCESS);
1745 }
1746 
1747 /*
1748  * Publics.
1749  */
1750 
1751 isc_result_t
dns_dispatchmgr_create(isc_mem_t * mctx,isc_entropy_t * entropy,dns_dispatchmgr_t ** mgrp)1752 dns_dispatchmgr_create(isc_mem_t *mctx, isc_entropy_t *entropy,
1753 		       dns_dispatchmgr_t **mgrp)
1754 {
1755 	dns_dispatchmgr_t *mgr;
1756 	isc_result_t result;
1757 	isc_portset_t *v4portset = NULL;
1758 	isc_portset_t *v6portset = NULL;
1759 
1760 	REQUIRE(mctx != NULL);
1761 	REQUIRE(mgrp != NULL && *mgrp == NULL);
1762 
1763 	mgr = isc_mem_get(mctx, sizeof(dns_dispatchmgr_t));
1764 	if (mgr == NULL)
1765 		return (ISC_R_NOMEMORY);
1766 
1767 	mgr->mctx = NULL;
1768 	isc_mem_attach(mctx, &mgr->mctx);
1769 
1770 	mgr->blackhole = NULL;
1771 	mgr->stats = NULL;
1772 	mgr->rngctx = NULL;
1773 
1774 	result = isc_mutex_init(&mgr->lock);
1775 	if (result != ISC_R_SUCCESS)
1776 		goto deallocate;
1777 
1778 	result = isc_mutex_init(&mgr->rng_lock);
1779 	if (result != ISC_R_SUCCESS)
1780 		goto kill_lock;
1781 
1782 	result = isc_mutex_init(&mgr->buffer_lock);
1783 	if (result != ISC_R_SUCCESS)
1784 		goto kill_rng_lock;
1785 
1786 	result = isc_mutex_init(&mgr->depool_lock);
1787 	if (result != ISC_R_SUCCESS)
1788 		goto kill_buffer_lock;
1789 
1790 	result = isc_mutex_init(&mgr->rpool_lock);
1791 	if (result != ISC_R_SUCCESS)
1792 		goto kill_depool_lock;
1793 
1794 	result = isc_mutex_init(&mgr->dpool_lock);
1795 	if (result != ISC_R_SUCCESS)
1796 		goto kill_rpool_lock;
1797 
1798 	result = isc_mutex_init(&mgr->bpool_lock);
1799 	if (result != ISC_R_SUCCESS)
1800 		goto kill_dpool_lock;
1801 
1802 	result = isc_mutex_init(&mgr->spool_lock);
1803 	if (result != ISC_R_SUCCESS)
1804 		goto kill_bpool_lock;
1805 
1806 	mgr->depool = NULL;
1807 	if (isc_mempool_create(mgr->mctx, sizeof(dns_dispatchevent_t),
1808 			       &mgr->depool) != ISC_R_SUCCESS) {
1809 		result = ISC_R_NOMEMORY;
1810 		goto kill_spool_lock;
1811 	}
1812 
1813 	mgr->rpool = NULL;
1814 	if (isc_mempool_create(mgr->mctx, sizeof(dns_dispentry_t),
1815 			       &mgr->rpool) != ISC_R_SUCCESS) {
1816 		result = ISC_R_NOMEMORY;
1817 		goto kill_depool;
1818 	}
1819 
1820 	mgr->dpool = NULL;
1821 	if (isc_mempool_create(mgr->mctx, sizeof(dns_dispatch_t),
1822 			       &mgr->dpool) != ISC_R_SUCCESS) {
1823 		result = ISC_R_NOMEMORY;
1824 		goto kill_rpool;
1825 	}
1826 
1827 	isc_mempool_setname(mgr->depool, "dispmgr_depool");
1828 	isc_mempool_setmaxalloc(mgr->depool, 32768);
1829 	isc_mempool_setfreemax(mgr->depool, 32768);
1830 	isc_mempool_associatelock(mgr->depool, &mgr->depool_lock);
1831 	isc_mempool_setfillcount(mgr->depool, 32);
1832 
1833 	isc_mempool_setname(mgr->rpool, "dispmgr_rpool");
1834 	isc_mempool_setmaxalloc(mgr->rpool, 32768);
1835 	isc_mempool_setfreemax(mgr->rpool, 32768);
1836 	isc_mempool_associatelock(mgr->rpool, &mgr->rpool_lock);
1837 	isc_mempool_setfillcount(mgr->rpool, 32);
1838 
1839 	isc_mempool_setname(mgr->dpool, "dispmgr_dpool");
1840 	isc_mempool_setmaxalloc(mgr->dpool, 32768);
1841 	isc_mempool_setfreemax(mgr->dpool, 32768);
1842 	isc_mempool_associatelock(mgr->dpool, &mgr->dpool_lock);
1843 	isc_mempool_setfillcount(mgr->dpool, 32);
1844 
1845 	mgr->buffers = 0;
1846 	mgr->buffersize = 0;
1847 	mgr->maxbuffers = 0;
1848 	mgr->bpool = NULL;
1849 	mgr->spool = NULL;
1850 	mgr->entropy = NULL;
1851 	mgr->qid = NULL;
1852 	mgr->state = 0;
1853 	ISC_LIST_INIT(mgr->list);
1854 	mgr->v4ports = NULL;
1855 	mgr->v6ports = NULL;
1856 	mgr->nv4ports = 0;
1857 	mgr->nv6ports = 0;
1858 	mgr->magic = DNS_DISPATCHMGR_MAGIC;
1859 
1860 	result = create_default_portset(mctx, &v4portset);
1861 	if (result == ISC_R_SUCCESS) {
1862 		result = create_default_portset(mctx, &v6portset);
1863 		if (result == ISC_R_SUCCESS) {
1864 			result = dns_dispatchmgr_setavailports(mgr,
1865 							       v4portset,
1866 							       v6portset);
1867 		}
1868 	}
1869 	if (v4portset != NULL)
1870 		isc_portset_destroy(mctx, &v4portset);
1871 	if (v6portset != NULL)
1872 		isc_portset_destroy(mctx, &v6portset);
1873 	if (result != ISC_R_SUCCESS)
1874 		goto kill_dpool;
1875 
1876 	if (entropy != NULL)
1877 		isc_entropy_attach(entropy, &mgr->entropy);
1878 
1879 	result = isc_rng_create(mctx, mgr->entropy, &mgr->rngctx);
1880 	if (result != ISC_R_SUCCESS)
1881 		goto kill_dpool;
1882 
1883 	*mgrp = mgr;
1884 	return (ISC_R_SUCCESS);
1885 
1886  kill_dpool:
1887 	isc_mempool_destroy(&mgr->dpool);
1888  kill_rpool:
1889 	isc_mempool_destroy(&mgr->rpool);
1890  kill_depool:
1891 	isc_mempool_destroy(&mgr->depool);
1892  kill_spool_lock:
1893 	DESTROYLOCK(&mgr->spool_lock);
1894  kill_bpool_lock:
1895 	DESTROYLOCK(&mgr->bpool_lock);
1896  kill_dpool_lock:
1897 	DESTROYLOCK(&mgr->dpool_lock);
1898  kill_rpool_lock:
1899 	DESTROYLOCK(&mgr->rpool_lock);
1900  kill_depool_lock:
1901 	DESTROYLOCK(&mgr->depool_lock);
1902  kill_buffer_lock:
1903 	DESTROYLOCK(&mgr->buffer_lock);
1904  kill_rng_lock:
1905 	DESTROYLOCK(&mgr->rng_lock);
1906  kill_lock:
1907 	DESTROYLOCK(&mgr->lock);
1908  deallocate:
1909 	isc_mem_put(mctx, mgr, sizeof(dns_dispatchmgr_t));
1910 	isc_mem_detach(&mctx);
1911 
1912 	return (result);
1913 }
1914 
1915 void
dns_dispatchmgr_setblackhole(dns_dispatchmgr_t * mgr,dns_acl_t * blackhole)1916 dns_dispatchmgr_setblackhole(dns_dispatchmgr_t *mgr, dns_acl_t *blackhole) {
1917 	REQUIRE(VALID_DISPATCHMGR(mgr));
1918 	if (mgr->blackhole != NULL)
1919 		dns_acl_detach(&mgr->blackhole);
1920 	dns_acl_attach(blackhole, &mgr->blackhole);
1921 }
1922 
1923 dns_acl_t *
dns_dispatchmgr_getblackhole(dns_dispatchmgr_t * mgr)1924 dns_dispatchmgr_getblackhole(dns_dispatchmgr_t *mgr) {
1925 	REQUIRE(VALID_DISPATCHMGR(mgr));
1926 	return (mgr->blackhole);
1927 }
1928 
1929 void
dns_dispatchmgr_setblackportlist(dns_dispatchmgr_t * mgr,dns_portlist_t * portlist)1930 dns_dispatchmgr_setblackportlist(dns_dispatchmgr_t *mgr,
1931 				 dns_portlist_t *portlist)
1932 {
1933 	REQUIRE(VALID_DISPATCHMGR(mgr));
1934 	UNUSED(portlist);
1935 
1936 	/* This function is deprecated: use dns_dispatchmgr_setavailports(). */
1937 	return;
1938 }
1939 
1940 dns_portlist_t *
dns_dispatchmgr_getblackportlist(dns_dispatchmgr_t * mgr)1941 dns_dispatchmgr_getblackportlist(dns_dispatchmgr_t *mgr) {
1942 	REQUIRE(VALID_DISPATCHMGR(mgr));
1943 	return (NULL);		/* this function is deprecated */
1944 }
1945 
1946 isc_result_t
dns_dispatchmgr_setavailports(dns_dispatchmgr_t * mgr,isc_portset_t * v4portset,isc_portset_t * v6portset)1947 dns_dispatchmgr_setavailports(dns_dispatchmgr_t *mgr, isc_portset_t *v4portset,
1948 			      isc_portset_t *v6portset)
1949 {
1950 	in_port_t *v4ports, *v6ports, p;
1951 	unsigned int nv4ports, nv6ports, i4, i6;
1952 
1953 	REQUIRE(VALID_DISPATCHMGR(mgr));
1954 
1955 	nv4ports = isc_portset_nports(v4portset);
1956 	nv6ports = isc_portset_nports(v6portset);
1957 
1958 	v4ports = NULL;
1959 	if (nv4ports != 0) {
1960 		v4ports = isc_mem_get(mgr->mctx, sizeof(in_port_t) * nv4ports);
1961 		if (v4ports == NULL)
1962 			return (ISC_R_NOMEMORY);
1963 	}
1964 	v6ports = NULL;
1965 	if (nv6ports != 0) {
1966 		v6ports = isc_mem_get(mgr->mctx, sizeof(in_port_t) * nv6ports);
1967 		if (v6ports == NULL) {
1968 			if (v4ports != NULL) {
1969 				isc_mem_put(mgr->mctx, v4ports,
1970 					    sizeof(in_port_t) *
1971 					    isc_portset_nports(v4portset));
1972 			}
1973 			return (ISC_R_NOMEMORY);
1974 		}
1975 	}
1976 
1977 	p = 0;
1978 	i4 = 0;
1979 	i6 = 0;
1980 	do {
1981 		if (isc_portset_isset(v4portset, p)) {
1982 			INSIST(i4 < nv4ports);
1983 			v4ports[i4++] = p;
1984 		}
1985 		if (isc_portset_isset(v6portset, p)) {
1986 			INSIST(i6 < nv6ports);
1987 			v6ports[i6++] = p;
1988 		}
1989 	} while (p++ < 65535);
1990 	INSIST(i4 == nv4ports && i6 == nv6ports);
1991 
1992 	PORTBUFLOCK(mgr);
1993 	if (mgr->v4ports != NULL) {
1994 		isc_mem_put(mgr->mctx, mgr->v4ports,
1995 			    mgr->nv4ports * sizeof(in_port_t));
1996 	}
1997 	mgr->v4ports = v4ports;
1998 	mgr->nv4ports = nv4ports;
1999 
2000 	if (mgr->v6ports != NULL) {
2001 		isc_mem_put(mgr->mctx, mgr->v6ports,
2002 			    mgr->nv6ports * sizeof(in_port_t));
2003 	}
2004 	mgr->v6ports = v6ports;
2005 	mgr->nv6ports = nv6ports;
2006 	PORTBUFUNLOCK(mgr);
2007 
2008 	return (ISC_R_SUCCESS);
2009 }
2010 
2011 static isc_result_t
dns_dispatchmgr_setudp(dns_dispatchmgr_t * mgr,unsigned int buffersize,unsigned int maxbuffers,unsigned int maxrequests,unsigned int buckets,unsigned int increment)2012 dns_dispatchmgr_setudp(dns_dispatchmgr_t *mgr,
2013 		       unsigned int buffersize, unsigned int maxbuffers,
2014 		       unsigned int maxrequests, unsigned int buckets,
2015 		       unsigned int increment)
2016 {
2017 	isc_result_t result;
2018 
2019 	REQUIRE(VALID_DISPATCHMGR(mgr));
2020 	REQUIRE(buffersize >= 512 && buffersize < (64 * 1024));
2021 	REQUIRE(maxbuffers > 0);
2022 	REQUIRE(buckets < 2097169);  /* next prime > 65536 * 32 */
2023 	REQUIRE(increment > buckets);
2024 
2025 	/*
2026 	 * Keep some number of items around.  This should be a config
2027 	 * option.  For now, keep 8, but later keep at least two even
2028 	 * if the caller wants less.  This allows us to ensure certain
2029 	 * things, like an event can be "freed" and the next allocation
2030 	 * will always succeed.
2031 	 *
2032 	 * Note that if limits are placed on anything here, we use one
2033 	 * event internally, so the actual limit should be "wanted + 1."
2034 	 *
2035 	 * XXXMLG
2036 	 */
2037 
2038 	if (maxbuffers < 8)
2039 		maxbuffers = 8;
2040 
2041 	LOCK(&mgr->buffer_lock);
2042 
2043 	/* Create or adjust buffer pool */
2044 	if (mgr->bpool != NULL) {
2045 		/*
2046 		 * We only increase the maxbuffers to avoid accidental buffer
2047 		 * shortage.  Ideally we'd separate the manager-wide maximum
2048 		 * from per-dispatch limits and respect the latter within the
2049 		 * global limit.  But at this moment that's deemed to be
2050 		 * overkilling and isn't worth additional implementation
2051 		 * complexity.
2052 		 */
2053 		if (maxbuffers > mgr->maxbuffers) {
2054 			isc_mempool_setmaxalloc(mgr->bpool, maxbuffers);
2055 			isc_mempool_setfreemax(mgr->bpool, maxbuffers);
2056 			mgr->maxbuffers = maxbuffers;
2057 		}
2058 	} else {
2059 		result = isc_mempool_create(mgr->mctx, buffersize, &mgr->bpool);
2060 		if (result != ISC_R_SUCCESS) {
2061 			UNLOCK(&mgr->buffer_lock);
2062 			return (result);
2063 		}
2064 		isc_mempool_setname(mgr->bpool, "dispmgr_bpool");
2065 		isc_mempool_setmaxalloc(mgr->bpool, maxbuffers);
2066 		isc_mempool_setfreemax(mgr->bpool, maxbuffers);
2067 		isc_mempool_associatelock(mgr->bpool, &mgr->bpool_lock);
2068 		isc_mempool_setfillcount(mgr->bpool, 32);
2069 	}
2070 
2071 	/* Create or adjust socket pool */
2072 	if (mgr->spool != NULL) {
2073 		if (maxrequests < DNS_DISPATCH_POOLSOCKS * 2) {
2074 			isc_mempool_setmaxalloc(mgr->spool,
2075 						DNS_DISPATCH_POOLSOCKS * 2);
2076 			isc_mempool_setfreemax(mgr->spool,
2077 					       DNS_DISPATCH_POOLSOCKS * 2);
2078 		}
2079 		UNLOCK(&mgr->buffer_lock);
2080 		return (ISC_R_SUCCESS);
2081 	}
2082 	result = isc_mempool_create(mgr->mctx, sizeof(dispsocket_t),
2083 				    &mgr->spool);
2084 	if (result != ISC_R_SUCCESS)
2085 		goto cleanup;
2086 
2087 	isc_mempool_setname(mgr->spool, "dispmgr_spool");
2088 	isc_mempool_setmaxalloc(mgr->spool, maxrequests);
2089 	isc_mempool_setfreemax(mgr->spool, maxrequests);
2090 	isc_mempool_associatelock(mgr->spool, &mgr->spool_lock);
2091 	isc_mempool_setfillcount(mgr->spool, 32);
2092 
2093 	result = qid_allocate(mgr, buckets, increment, &mgr->qid, true);
2094 	if (result != ISC_R_SUCCESS)
2095 		goto cleanup;
2096 
2097 	mgr->buffersize = buffersize;
2098 	mgr->maxbuffers = maxbuffers;
2099 	UNLOCK(&mgr->buffer_lock);
2100 	return (ISC_R_SUCCESS);
2101 
2102  cleanup:
2103 	isc_mempool_destroy(&mgr->bpool);
2104 	if (mgr->spool != NULL)
2105 		isc_mempool_destroy(&mgr->spool);
2106 	UNLOCK(&mgr->buffer_lock);
2107 	return (result);
2108 }
2109 
2110 void
dns_dispatchmgr_destroy(dns_dispatchmgr_t ** mgrp)2111 dns_dispatchmgr_destroy(dns_dispatchmgr_t **mgrp) {
2112 	dns_dispatchmgr_t *mgr;
2113 	bool killit;
2114 
2115 	REQUIRE(mgrp != NULL);
2116 	REQUIRE(VALID_DISPATCHMGR(*mgrp));
2117 
2118 	mgr = *mgrp;
2119 	*mgrp = NULL;
2120 
2121 	LOCK(&mgr->lock);
2122 	mgr->state |= MGR_SHUTTINGDOWN;
2123 	killit = destroy_mgr_ok(mgr);
2124 	UNLOCK(&mgr->lock);
2125 
2126 	mgr_log(mgr, LVL(90), "destroy: killit=%d", killit);
2127 
2128 	if (killit)
2129 		destroy_mgr(&mgr);
2130 }
2131 
2132 void
dns_dispatchmgr_setstats(dns_dispatchmgr_t * mgr,isc_stats_t * stats)2133 dns_dispatchmgr_setstats(dns_dispatchmgr_t *mgr, isc_stats_t *stats) {
2134 	REQUIRE(VALID_DISPATCHMGR(mgr));
2135 	REQUIRE(ISC_LIST_EMPTY(mgr->list));
2136 	REQUIRE(mgr->stats == NULL);
2137 
2138 	isc_stats_attach(stats, &mgr->stats);
2139 }
2140 
2141 static int
port_cmp(const void * key,const void * ent)2142 port_cmp(const void *key, const void *ent) {
2143 	in_port_t p1 = *(const in_port_t *)key;
2144 	in_port_t p2 = *(const in_port_t *)ent;
2145 
2146 	if (p1 < p2)
2147 		return (-1);
2148 	else if (p1 == p2)
2149 		return (0);
2150 	else
2151 		return (1);
2152 }
2153 
2154 static bool
portavailable(dns_dispatchmgr_t * mgr,isc_socket_t * sock,isc_sockaddr_t * sockaddrp)2155 portavailable(dns_dispatchmgr_t *mgr, isc_socket_t *sock,
2156 	      isc_sockaddr_t *sockaddrp)
2157 {
2158 	isc_sockaddr_t sockaddr;
2159 	isc_result_t result;
2160 	in_port_t *ports, port;
2161 	unsigned int nports;
2162 	bool available = false;
2163 
2164 	REQUIRE(sock != NULL || sockaddrp != NULL);
2165 
2166 	PORTBUFLOCK(mgr);
2167 	if (sock != NULL) {
2168 		sockaddrp = &sockaddr;
2169 		result = isc_socket_getsockname(sock, sockaddrp);
2170 		if (result != ISC_R_SUCCESS)
2171 			goto unlock;
2172 	}
2173 
2174 	if (isc_sockaddr_pf(sockaddrp) == AF_INET) {
2175 		ports = mgr->v4ports;
2176 		nports = mgr->nv4ports;
2177 	} else {
2178 		ports = mgr->v6ports;
2179 		nports = mgr->nv6ports;
2180 	}
2181 	if (ports == NULL)
2182 		goto unlock;
2183 
2184 	port = isc_sockaddr_getport(sockaddrp);
2185 	if (bsearch(&port, ports, nports, sizeof(in_port_t), port_cmp) != NULL)
2186 		available = true;
2187 
2188 unlock:
2189 	PORTBUFUNLOCK(mgr);
2190 	return (available);
2191 }
2192 
2193 #define ATTRMATCH(_a1, _a2, _mask) (((_a1) & (_mask)) == ((_a2) & (_mask)))
2194 
2195 static bool
local_addr_match(dns_dispatch_t * disp,isc_sockaddr_t * addr)2196 local_addr_match(dns_dispatch_t *disp, isc_sockaddr_t *addr) {
2197 	isc_sockaddr_t sockaddr;
2198 	isc_result_t result;
2199 
2200 	REQUIRE(disp->socket != NULL);
2201 
2202 	if (addr == NULL)
2203 		return (true);
2204 
2205 	/*
2206 	 * Don't match wildcard ports unless the port is available in the
2207 	 * current configuration.
2208 	 */
2209 	if (isc_sockaddr_getport(addr) == 0 &&
2210 	    isc_sockaddr_getport(&disp->local) == 0 &&
2211 	    !portavailable(disp->mgr, disp->socket, NULL)) {
2212 		return (false);
2213 	}
2214 
2215 	/*
2216 	 * Check if we match the binding <address,port>.
2217 	 * Wildcard ports match/fail here.
2218 	 */
2219 	if (isc_sockaddr_equal(&disp->local, addr))
2220 		return (true);
2221 	if (isc_sockaddr_getport(addr) == 0)
2222 		return (false);
2223 
2224 	/*
2225 	 * Check if we match a bound wildcard port <address,port>.
2226 	 */
2227 	if (!isc_sockaddr_eqaddr(&disp->local, addr))
2228 		return (false);
2229 	result = isc_socket_getsockname(disp->socket, &sockaddr);
2230 	if (result != ISC_R_SUCCESS)
2231 		return (false);
2232 
2233 	return (isc_sockaddr_equal(&sockaddr, addr));
2234 }
2235 
2236 /*
2237  * Requires mgr be locked.
2238  *
2239  * No dispatcher can be locked by this thread when calling this function.
2240  *
2241  *
2242  * NOTE:
2243  *	If a matching dispatcher is found, it is locked after this function
2244  *	returns, and must be unlocked by the caller.
2245  */
2246 static isc_result_t
dispatch_find(dns_dispatchmgr_t * mgr,isc_sockaddr_t * local,unsigned int attributes,unsigned int mask,dns_dispatch_t ** dispp)2247 dispatch_find(dns_dispatchmgr_t *mgr, isc_sockaddr_t *local,
2248 	      unsigned int attributes, unsigned int mask,
2249 	      dns_dispatch_t **dispp)
2250 {
2251 	dns_dispatch_t *disp;
2252 	isc_result_t result;
2253 
2254 	/*
2255 	 * Make certain that we will not match a private or exclusive dispatch.
2256 	 */
2257 	attributes &= ~(DNS_DISPATCHATTR_PRIVATE|DNS_DISPATCHATTR_EXCLUSIVE);
2258 	mask |= (DNS_DISPATCHATTR_PRIVATE|DNS_DISPATCHATTR_EXCLUSIVE);
2259 
2260 	disp = ISC_LIST_HEAD(mgr->list);
2261 	while (disp != NULL) {
2262 		LOCK(&disp->lock);
2263 		if ((disp->shutting_down == 0)
2264 		    && ATTRMATCH(disp->attributes, attributes, mask)
2265 		    && local_addr_match(disp, local))
2266 			break;
2267 		UNLOCK(&disp->lock);
2268 		disp = ISC_LIST_NEXT(disp, link);
2269 	}
2270 
2271 	if (disp == NULL) {
2272 		result = ISC_R_NOTFOUND;
2273 		goto out;
2274 	}
2275 
2276 	*dispp = disp;
2277 	result = ISC_R_SUCCESS;
2278  out:
2279 
2280 	return (result);
2281 }
2282 
2283 static isc_result_t
qid_allocate(dns_dispatchmgr_t * mgr,unsigned int buckets,unsigned int increment,dns_qid_t ** qidp,bool needsocktable)2284 qid_allocate(dns_dispatchmgr_t *mgr, unsigned int buckets,
2285 	     unsigned int increment, dns_qid_t **qidp,
2286 	     bool needsocktable)
2287 {
2288 	dns_qid_t *qid;
2289 	unsigned int i;
2290 	isc_result_t result;
2291 
2292 	REQUIRE(VALID_DISPATCHMGR(mgr));
2293 	REQUIRE(buckets < 2097169);  /* next prime > 65536 * 32 */
2294 	REQUIRE(increment > buckets);
2295 	REQUIRE(qidp != NULL && *qidp == NULL);
2296 
2297 	qid = isc_mem_get(mgr->mctx, sizeof(*qid));
2298 	if (qid == NULL)
2299 		return (ISC_R_NOMEMORY);
2300 
2301 	qid->qid_table = isc_mem_get(mgr->mctx,
2302 				     buckets * sizeof(dns_displist_t));
2303 	if (qid->qid_table == NULL) {
2304 		isc_mem_put(mgr->mctx, qid, sizeof(*qid));
2305 		return (ISC_R_NOMEMORY);
2306 	}
2307 
2308 	qid->sock_table = NULL;
2309 	if (needsocktable) {
2310 		qid->sock_table = isc_mem_get(mgr->mctx, buckets *
2311 					      sizeof(dispsocketlist_t));
2312 		if (qid->sock_table == NULL) {
2313 			isc_mem_put(mgr->mctx, qid->qid_table,
2314 				    buckets * sizeof(dns_displist_t));
2315 			isc_mem_put(mgr->mctx, qid, sizeof(*qid));
2316 			return (ISC_R_NOMEMORY);
2317 		}
2318 	}
2319 
2320 	result = isc_mutex_init(&qid->lock);
2321 	if (result != ISC_R_SUCCESS) {
2322 		if (qid->sock_table != NULL) {
2323 			isc_mem_put(mgr->mctx, qid->sock_table,
2324 				    buckets * sizeof(dispsocketlist_t));
2325 		}
2326 		isc_mem_put(mgr->mctx, qid->qid_table,
2327 			    buckets * sizeof(dns_displist_t));
2328 		isc_mem_put(mgr->mctx, qid, sizeof(*qid));
2329 		return (result);
2330 	}
2331 
2332 	for (i = 0; i < buckets; i++) {
2333 		ISC_LIST_INIT(qid->qid_table[i]);
2334 		if (qid->sock_table != NULL)
2335 			ISC_LIST_INIT(qid->sock_table[i]);
2336 	}
2337 
2338 	qid->qid_nbuckets = buckets;
2339 	qid->qid_increment = increment;
2340 	qid->magic = QID_MAGIC;
2341 	*qidp = qid;
2342 	return (ISC_R_SUCCESS);
2343 }
2344 
2345 static void
qid_destroy(isc_mem_t * mctx,dns_qid_t ** qidp)2346 qid_destroy(isc_mem_t *mctx, dns_qid_t **qidp) {
2347 	dns_qid_t *qid;
2348 
2349 	REQUIRE(qidp != NULL);
2350 	qid = *qidp;
2351 
2352 	REQUIRE(VALID_QID(qid));
2353 
2354 	*qidp = NULL;
2355 	qid->magic = 0;
2356 	isc_mem_put(mctx, qid->qid_table,
2357 		    qid->qid_nbuckets * sizeof(dns_displist_t));
2358 	if (qid->sock_table != NULL) {
2359 		isc_mem_put(mctx, qid->sock_table,
2360 			    qid->qid_nbuckets * sizeof(dispsocketlist_t));
2361 	}
2362 	DESTROYLOCK(&qid->lock);
2363 	isc_mem_put(mctx, qid, sizeof(*qid));
2364 }
2365 
2366 /*
2367  * Allocate and set important limits.
2368  */
2369 static isc_result_t
dispatch_allocate(dns_dispatchmgr_t * mgr,unsigned int maxrequests,dns_dispatch_t ** dispp)2370 dispatch_allocate(dns_dispatchmgr_t *mgr, unsigned int maxrequests,
2371 		  dns_dispatch_t **dispp)
2372 {
2373 	dns_dispatch_t *disp;
2374 	isc_result_t result;
2375 
2376 	REQUIRE(VALID_DISPATCHMGR(mgr));
2377 	REQUIRE(dispp != NULL && *dispp == NULL);
2378 
2379 	/*
2380 	 * Set up the dispatcher, mostly.  Don't bother setting some of
2381 	 * the options that are controlled by tcp vs. udp, etc.
2382 	 */
2383 
2384 	disp = isc_mempool_get(mgr->dpool);
2385 	if (disp == NULL)
2386 		return (ISC_R_NOMEMORY);
2387 
2388 	disp->magic = 0;
2389 	disp->mgr = mgr;
2390 	disp->maxrequests = maxrequests;
2391 	disp->attributes = 0;
2392 	ISC_LINK_INIT(disp, link);
2393 	disp->refcount = 1;
2394 	disp->recv_pending = 0;
2395 	memset(&disp->local, 0, sizeof(disp->local));
2396 	memset(&disp->peer, 0, sizeof(disp->peer));
2397 	disp->localport = 0;
2398 	disp->shutting_down = 0;
2399 	disp->shutdown_out = 0;
2400 	disp->connected = 0;
2401 	disp->tcpmsg_valid = 0;
2402 	disp->shutdown_why = ISC_R_UNEXPECTED;
2403 	disp->requests = 0;
2404 	disp->tcpbuffers = 0;
2405 	disp->qid = NULL;
2406 	ISC_LIST_INIT(disp->activesockets);
2407 	ISC_LIST_INIT(disp->inactivesockets);
2408 	disp->nsockets = 0;
2409 	disp->rngctx = NULL;
2410 	isc_rng_attach(mgr->rngctx, &disp->rngctx);
2411 	disp->port_table = NULL;
2412 	disp->portpool = NULL;
2413 	disp->dscp = -1;
2414 
2415 	result = isc_mutex_init(&disp->lock);
2416 	if (result != ISC_R_SUCCESS)
2417 		goto deallocate;
2418 
2419 	disp->failsafe_ev = allocate_devent(disp);
2420 	if (disp->failsafe_ev == NULL) {
2421 		result = ISC_R_NOMEMORY;
2422 		goto kill_lock;
2423 	}
2424 
2425 	disp->magic = DISPATCH_MAGIC;
2426 
2427 	*dispp = disp;
2428 	return (ISC_R_SUCCESS);
2429 
2430 	/*
2431 	 * error returns
2432 	 */
2433  kill_lock:
2434 	DESTROYLOCK(&disp->lock);
2435  deallocate:
2436 	if (disp->rngctx != NULL)
2437 		isc_rng_detach(&disp->rngctx);
2438 	isc_mempool_put(mgr->dpool, disp);
2439 
2440 	return (result);
2441 }
2442 
2443 
2444 /*
2445  * MUST be unlocked, and not used by anything.
2446  */
2447 static void
dispatch_free(dns_dispatch_t ** dispp)2448 dispatch_free(dns_dispatch_t **dispp) {
2449 	dns_dispatch_t *disp;
2450 	dns_dispatchmgr_t *mgr;
2451 	int i;
2452 
2453 	REQUIRE(VALID_DISPATCH(*dispp));
2454 	disp = *dispp;
2455 	*dispp = NULL;
2456 
2457 	mgr = disp->mgr;
2458 	REQUIRE(VALID_DISPATCHMGR(mgr));
2459 
2460 	if (disp->tcpmsg_valid) {
2461 		dns_tcpmsg_invalidate(&disp->tcpmsg);
2462 		disp->tcpmsg_valid = 0;
2463 	}
2464 
2465 	INSIST(disp->tcpbuffers == 0);
2466 	INSIST(disp->requests == 0);
2467 	INSIST(disp->recv_pending == 0);
2468 	INSIST(ISC_LIST_EMPTY(disp->activesockets));
2469 	INSIST(ISC_LIST_EMPTY(disp->inactivesockets));
2470 
2471 	isc_mempool_put(mgr->depool, disp->failsafe_ev);
2472 	disp->failsafe_ev = NULL;
2473 
2474 	if (disp->qid != NULL)
2475 		qid_destroy(mgr->mctx, &disp->qid);
2476 
2477 	if (disp->port_table != NULL) {
2478 		for (i = 0; i < DNS_DISPATCH_PORTTABLESIZE; i++)
2479 			INSIST(ISC_LIST_EMPTY(disp->port_table[i]));
2480 		isc_mem_put(mgr->mctx, disp->port_table,
2481 			    sizeof(disp->port_table[0]) *
2482 			    DNS_DISPATCH_PORTTABLESIZE);
2483 	}
2484 
2485 	if (disp->portpool != NULL)
2486 		isc_mempool_destroy(&disp->portpool);
2487 
2488 	if (disp->rngctx != NULL)
2489 		isc_rng_detach(&disp->rngctx);
2490 
2491 	disp->mgr = NULL;
2492 	DESTROYLOCK(&disp->lock);
2493 	disp->magic = 0;
2494 	isc_mempool_put(mgr->dpool, disp);
2495 }
2496 
2497 isc_result_t
dns_dispatch_createtcp(dns_dispatchmgr_t * mgr,isc_socket_t * sock,isc_taskmgr_t * taskmgr,unsigned int buffersize,unsigned int maxbuffers,unsigned int maxrequests,unsigned int buckets,unsigned int increment,unsigned int attributes,dns_dispatch_t ** dispp)2498 dns_dispatch_createtcp(dns_dispatchmgr_t *mgr, isc_socket_t *sock,
2499 		       isc_taskmgr_t *taskmgr, unsigned int buffersize,
2500 		       unsigned int maxbuffers, unsigned int maxrequests,
2501 		       unsigned int buckets, unsigned int increment,
2502 		       unsigned int attributes, dns_dispatch_t **dispp)
2503 {
2504 
2505 	attributes |= DNS_DISPATCHATTR_PRIVATE;  /* XXXMLG */
2506 
2507 	return (dns_dispatch_createtcp2(mgr, sock, taskmgr, NULL, NULL,
2508 					buffersize, maxbuffers, maxrequests,
2509 					buckets, increment, attributes,
2510 					dispp));
2511 }
2512 
2513 isc_result_t
dns_dispatch_createtcp2(dns_dispatchmgr_t * mgr,isc_socket_t * sock,isc_taskmgr_t * taskmgr,isc_sockaddr_t * localaddr,isc_sockaddr_t * destaddr,unsigned int buffersize,unsigned int maxbuffers,unsigned int maxrequests,unsigned int buckets,unsigned int increment,unsigned int attributes,dns_dispatch_t ** dispp)2514 dns_dispatch_createtcp2(dns_dispatchmgr_t *mgr, isc_socket_t *sock,
2515 			isc_taskmgr_t *taskmgr, isc_sockaddr_t *localaddr,
2516 			isc_sockaddr_t *destaddr, unsigned int buffersize,
2517 			unsigned int maxbuffers, unsigned int maxrequests,
2518 			unsigned int buckets, unsigned int increment,
2519 			unsigned int attributes, dns_dispatch_t **dispp)
2520 {
2521 	isc_result_t result;
2522 	dns_dispatch_t *disp;
2523 
2524 	UNUSED(maxbuffers);
2525 	UNUSED(buffersize);
2526 
2527 	REQUIRE(VALID_DISPATCHMGR(mgr));
2528 	REQUIRE(isc_socket_gettype(sock) == isc_sockettype_tcp);
2529 	REQUIRE((attributes & DNS_DISPATCHATTR_TCP) != 0);
2530 	REQUIRE((attributes & DNS_DISPATCHATTR_UDP) == 0);
2531 
2532 	if (destaddr == NULL)
2533 		attributes |= DNS_DISPATCHATTR_PRIVATE;  /* XXXMLG */
2534 
2535 	LOCK(&mgr->lock);
2536 
2537 	/*
2538 	 * dispatch_allocate() checks mgr for us.
2539 	 * qid_allocate() checks buckets and increment for us.
2540 	 */
2541 	disp = NULL;
2542 	result = dispatch_allocate(mgr, maxrequests, &disp);
2543 	if (result != ISC_R_SUCCESS) {
2544 		UNLOCK(&mgr->lock);
2545 		return (result);
2546 	}
2547 
2548 	result = qid_allocate(mgr, buckets, increment, &disp->qid, false);
2549 	if (result != ISC_R_SUCCESS)
2550 		goto deallocate_dispatch;
2551 
2552 	disp->socktype = isc_sockettype_tcp;
2553 	disp->socket = NULL;
2554 	isc_socket_attach(sock, &disp->socket);
2555 
2556 	disp->sepool = NULL;
2557 
2558 	disp->ntasks = 1;
2559 	disp->task[0] = NULL;
2560 	result = isc_task_create(taskmgr, 0, &disp->task[0]);
2561 	if (result != ISC_R_SUCCESS)
2562 		goto kill_socket;
2563 
2564 	disp->ctlevent = isc_event_allocate(mgr->mctx, disp,
2565 					    DNS_EVENT_DISPATCHCONTROL,
2566 					    destroy_disp, disp,
2567 					    sizeof(isc_event_t));
2568 	if (disp->ctlevent == NULL) {
2569 		result = ISC_R_NOMEMORY;
2570 		goto kill_task;
2571 	}
2572 
2573 	isc_task_setname(disp->task[0], "tcpdispatch", disp);
2574 
2575 	dns_tcpmsg_init(mgr->mctx, disp->socket, &disp->tcpmsg);
2576 	disp->tcpmsg_valid = 1;
2577 
2578 	disp->attributes = attributes;
2579 
2580 	if (localaddr == NULL) {
2581 		if (destaddr != NULL) {
2582 			switch (isc_sockaddr_pf(destaddr)) {
2583 			case AF_INET:
2584 				isc_sockaddr_any(&disp->local);
2585 				break;
2586 			case AF_INET6:
2587 				isc_sockaddr_any6(&disp->local);
2588 				break;
2589 			}
2590 		}
2591 	} else
2592 		disp->local = *localaddr;
2593 
2594 	if (destaddr != NULL)
2595 		disp->peer = *destaddr;
2596 
2597 	/*
2598 	 * Append it to the dispatcher list.
2599 	 */
2600 	ISC_LIST_APPEND(mgr->list, disp, link);
2601 	UNLOCK(&mgr->lock);
2602 
2603 	mgr_log(mgr, LVL(90), "created TCP dispatcher %p", disp);
2604 	dispatch_log(disp, LVL(90), "created task %p", disp->task[0]);
2605 	*dispp = disp;
2606 
2607 	return (ISC_R_SUCCESS);
2608 
2609 	/*
2610 	 * Error returns.
2611 	 */
2612  kill_task:
2613 	isc_task_detach(&disp->task[0]);
2614  kill_socket:
2615 	isc_socket_detach(&disp->socket);
2616  deallocate_dispatch:
2617 	dispatch_free(&disp);
2618 
2619 	UNLOCK(&mgr->lock);
2620 
2621 	return (result);
2622 }
2623 
2624 isc_result_t
dns_dispatch_gettcp(dns_dispatchmgr_t * mgr,isc_sockaddr_t * destaddr,isc_sockaddr_t * localaddr,dns_dispatch_t ** dispp)2625 dns_dispatch_gettcp(dns_dispatchmgr_t *mgr, isc_sockaddr_t *destaddr,
2626 		    isc_sockaddr_t *localaddr, dns_dispatch_t **dispp)
2627 {
2628 	dns_dispatch_t *disp;
2629 	isc_result_t result;
2630 	isc_sockaddr_t peeraddr;
2631 	isc_sockaddr_t sockname;
2632 	unsigned int attributes, mask;
2633 	bool match = false;
2634 
2635 	REQUIRE(VALID_DISPATCHMGR(mgr));
2636 	REQUIRE(destaddr != NULL);
2637 	REQUIRE(dispp != NULL && *dispp == NULL);
2638 
2639 	attributes = DNS_DISPATCHATTR_TCP | DNS_DISPATCHATTR_CONNECTED;
2640 	mask = DNS_DISPATCHATTR_TCP | DNS_DISPATCHATTR_PRIVATE |
2641 	       DNS_DISPATCHATTR_EXCLUSIVE | DNS_DISPATCHATTR_CONNECTED;
2642 
2643 	LOCK(&mgr->lock);
2644 	disp = ISC_LIST_HEAD(mgr->list);
2645 	while (disp != NULL && !match) {
2646 		LOCK(&disp->lock);
2647 		if ((disp->shutting_down == 0) &&
2648 		    ATTRMATCH(disp->attributes, attributes, mask) &&
2649 		    (localaddr == NULL ||
2650 		     isc_sockaddr_eqaddr(localaddr, &disp->local))) {
2651 			result = isc_socket_getsockname(disp->socket,
2652 							&sockname);
2653 			if (result == ISC_R_SUCCESS)
2654 				result = isc_socket_getpeername(disp->socket,
2655 								&peeraddr);
2656 			if (result == ISC_R_SUCCESS &&
2657 			    isc_sockaddr_equal(destaddr, &peeraddr) &&
2658 			    (localaddr == NULL ||
2659 			     isc_sockaddr_eqaddr(localaddr, &sockname))) {
2660 				/* attach */
2661 				disp->refcount++;
2662 				*dispp = disp;
2663 				match = true;
2664 			}
2665 		}
2666 		UNLOCK(&disp->lock);
2667 		disp = ISC_LIST_NEXT(disp, link);
2668 	}
2669 	UNLOCK(&mgr->lock);
2670 	return (match ? ISC_R_SUCCESS : ISC_R_NOTFOUND);
2671 }
2672 
2673 isc_result_t
dns_dispatch_gettcp2(dns_dispatchmgr_t * mgr,isc_sockaddr_t * destaddr,isc_sockaddr_t * localaddr,bool * connected,dns_dispatch_t ** dispp)2674 dns_dispatch_gettcp2(dns_dispatchmgr_t *mgr, isc_sockaddr_t *destaddr,
2675 		     isc_sockaddr_t *localaddr, bool *connected,
2676 		     dns_dispatch_t **dispp)
2677 {
2678 	dns_dispatch_t *disp;
2679 	isc_result_t result;
2680 	isc_sockaddr_t peeraddr;
2681 	isc_sockaddr_t sockname;
2682 	unsigned int attributes, mask;
2683 	bool match = false;
2684 
2685 	REQUIRE(VALID_DISPATCHMGR(mgr));
2686 	REQUIRE(destaddr != NULL);
2687 	REQUIRE(dispp != NULL && *dispp == NULL);
2688 	REQUIRE(connected != NULL);
2689 
2690 	/* First pass (same as dns_dispatch_gettcp()) */
2691 	attributes = DNS_DISPATCHATTR_TCP | DNS_DISPATCHATTR_CONNECTED;
2692 	mask = DNS_DISPATCHATTR_TCP | DNS_DISPATCHATTR_PRIVATE |
2693 	       DNS_DISPATCHATTR_EXCLUSIVE | DNS_DISPATCHATTR_CONNECTED;
2694 
2695 	LOCK(&mgr->lock);
2696 	disp = ISC_LIST_HEAD(mgr->list);
2697 	while (disp != NULL && !match) {
2698 		LOCK(&disp->lock);
2699 		if ((disp->shutting_down == 0) &&
2700 		    ATTRMATCH(disp->attributes, attributes, mask) &&
2701 		    (localaddr == NULL ||
2702 		     isc_sockaddr_eqaddr(localaddr, &disp->local))) {
2703 			result = isc_socket_getsockname(disp->socket,
2704 							&sockname);
2705 			if (result == ISC_R_SUCCESS)
2706 				result = isc_socket_getpeername(disp->socket,
2707 								&peeraddr);
2708 			if (result == ISC_R_SUCCESS &&
2709 			    isc_sockaddr_equal(destaddr, &peeraddr) &&
2710 			    (localaddr == NULL ||
2711 			     isc_sockaddr_eqaddr(localaddr, &sockname))) {
2712 				/* attach */
2713 				disp->refcount++;
2714 				*dispp = disp;
2715 				match = true;
2716 				*connected = true;
2717 			}
2718 		}
2719 		UNLOCK(&disp->lock);
2720 		disp = ISC_LIST_NEXT(disp, link);
2721 	}
2722 	if (match) {
2723 		UNLOCK(&mgr->lock);
2724 		return (ISC_R_SUCCESS);
2725 	}
2726 
2727 	/* Second pass */
2728 	attributes = DNS_DISPATCHATTR_TCP;
2729 
2730 	disp = ISC_LIST_HEAD(mgr->list);
2731 	while (disp != NULL && !match) {
2732 		LOCK(&disp->lock);
2733 		if ((disp->shutting_down == 0) &&
2734 		    ATTRMATCH(disp->attributes, attributes, mask) &&
2735 		    (localaddr == NULL ||
2736 		     isc_sockaddr_eqaddr(localaddr, &disp->local)) &&
2737 		    isc_sockaddr_equal(destaddr, &disp->peer)) {
2738 			/* attach */
2739 			disp->refcount++;
2740 			*dispp = disp;
2741 			match = true;
2742 		}
2743 		UNLOCK(&disp->lock);
2744 		disp = ISC_LIST_NEXT(disp, link);
2745 	}
2746 	UNLOCK(&mgr->lock);
2747 	return (match ? ISC_R_SUCCESS : ISC_R_NOTFOUND);
2748 }
2749 
2750 isc_result_t
dns_dispatch_getudp_dup(dns_dispatchmgr_t * mgr,isc_socketmgr_t * sockmgr,isc_taskmgr_t * taskmgr,isc_sockaddr_t * localaddr,unsigned int buffersize,unsigned int maxbuffers,unsigned int maxrequests,unsigned int buckets,unsigned int increment,unsigned int attributes,unsigned int mask,dns_dispatch_t ** dispp,dns_dispatch_t * dup_dispatch)2751 dns_dispatch_getudp_dup(dns_dispatchmgr_t *mgr, isc_socketmgr_t *sockmgr,
2752 		    isc_taskmgr_t *taskmgr, isc_sockaddr_t *localaddr,
2753 		    unsigned int buffersize,
2754 		    unsigned int maxbuffers, unsigned int maxrequests,
2755 		    unsigned int buckets, unsigned int increment,
2756 		    unsigned int attributes, unsigned int mask,
2757 		    dns_dispatch_t **dispp, dns_dispatch_t *dup_dispatch)
2758 {
2759 	isc_result_t result;
2760 	dns_dispatch_t *disp = NULL;
2761 
2762 	REQUIRE(VALID_DISPATCHMGR(mgr));
2763 	REQUIRE(sockmgr != NULL);
2764 	REQUIRE(localaddr != NULL);
2765 	REQUIRE(taskmgr != NULL);
2766 	REQUIRE(buffersize >= 512 && buffersize < (64 * 1024));
2767 	REQUIRE(maxbuffers > 0);
2768 	REQUIRE(buckets < 2097169);  /* next prime > 65536 * 32 */
2769 	REQUIRE(increment > buckets);
2770 	REQUIRE(dispp != NULL && *dispp == NULL);
2771 	REQUIRE((attributes & DNS_DISPATCHATTR_TCP) == 0);
2772 
2773 	result = dns_dispatchmgr_setudp(mgr, buffersize, maxbuffers,
2774 					maxrequests, buckets, increment);
2775 	if (result != ISC_R_SUCCESS)
2776 		return (result);
2777 
2778 	LOCK(&mgr->lock);
2779 
2780 	if ((attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0) {
2781 		REQUIRE(isc_sockaddr_getport(localaddr) == 0);
2782 		goto createudp;
2783 	}
2784 
2785 	/*
2786 	 * See if we have a dispatcher that matches.
2787 	 */
2788 	if (dup_dispatch == NULL) {
2789 		result = dispatch_find(mgr, localaddr, attributes, mask, &disp);
2790 		if (result == ISC_R_SUCCESS) {
2791 			disp->refcount++;
2792 
2793 			if (disp->maxrequests < maxrequests)
2794 				disp->maxrequests = maxrequests;
2795 
2796 			if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) == 0
2797 			    && (attributes & DNS_DISPATCHATTR_NOLISTEN) != 0)
2798 			{
2799 				disp->attributes |= DNS_DISPATCHATTR_NOLISTEN;
2800 				if (disp->recv_pending != 0)
2801 					isc_socket_cancel(disp->socket,
2802 							  disp->task[0],
2803 							  ISC_SOCKCANCEL_RECV);
2804 			}
2805 
2806 			UNLOCK(&disp->lock);
2807 			UNLOCK(&mgr->lock);
2808 
2809 			*dispp = disp;
2810 
2811 			return (ISC_R_SUCCESS);
2812 		}
2813 	}
2814 
2815  createudp:
2816 	/*
2817 	 * Nope, create one.
2818 	 */
2819 	result = dispatch_createudp(mgr, sockmgr, taskmgr, localaddr,
2820 				    maxrequests, attributes, &disp,
2821 				    dup_dispatch == NULL
2822 					    ? NULL
2823 					    : dup_dispatch->socket);
2824 
2825 	if (result != ISC_R_SUCCESS) {
2826 		UNLOCK(&mgr->lock);
2827 		return (result);
2828 	}
2829 
2830 	UNLOCK(&mgr->lock);
2831 	*dispp = disp;
2832 
2833 	return (ISC_R_SUCCESS);
2834 }
2835 
2836 isc_result_t
dns_dispatch_getudp(dns_dispatchmgr_t * mgr,isc_socketmgr_t * sockmgr,isc_taskmgr_t * taskmgr,isc_sockaddr_t * localaddr,unsigned int buffersize,unsigned int maxbuffers,unsigned int maxrequests,unsigned int buckets,unsigned int increment,unsigned int attributes,unsigned int mask,dns_dispatch_t ** dispp)2837 dns_dispatch_getudp(dns_dispatchmgr_t *mgr, isc_socketmgr_t *sockmgr,
2838 		    isc_taskmgr_t *taskmgr, isc_sockaddr_t *localaddr,
2839 		    unsigned int buffersize,
2840 		    unsigned int maxbuffers, unsigned int maxrequests,
2841 		    unsigned int buckets, unsigned int increment,
2842 		    unsigned int attributes, unsigned int mask,
2843 		    dns_dispatch_t **dispp)
2844 {
2845 	return (dns_dispatch_getudp_dup(mgr, sockmgr, taskmgr, localaddr,
2846 					buffersize, maxbuffers, maxrequests,
2847 					buckets, increment, attributes,
2848 					mask, dispp, NULL));
2849 }
2850 
2851 /*
2852  * mgr should be locked.
2853  */
2854 
2855 #ifndef DNS_DISPATCH_HELD
2856 #define DNS_DISPATCH_HELD 20U
2857 #endif
2858 
2859 static isc_result_t
get_udpsocket(dns_dispatchmgr_t * mgr,dns_dispatch_t * disp,isc_socketmgr_t * sockmgr,isc_sockaddr_t * localaddr,isc_socket_t ** sockp,isc_socket_t * dup_socket)2860 get_udpsocket(dns_dispatchmgr_t *mgr, dns_dispatch_t *disp,
2861 	      isc_socketmgr_t *sockmgr, isc_sockaddr_t *localaddr,
2862 	      isc_socket_t **sockp, isc_socket_t *dup_socket)
2863 {
2864 	unsigned int i, j;
2865 	isc_socket_t *held[DNS_DISPATCH_HELD];
2866 	isc_sockaddr_t localaddr_bound;
2867 	isc_socket_t *sock = NULL;
2868 	isc_result_t result = ISC_R_SUCCESS;
2869 	bool anyport;
2870 
2871 	INSIST(sockp != NULL && *sockp == NULL);
2872 
2873 	localaddr_bound = *localaddr;
2874 	anyport = (isc_sockaddr_getport(localaddr) == 0);
2875 
2876 	if (anyport) {
2877 		unsigned int nports;
2878 		in_port_t *ports;
2879 
2880 		/*
2881 		 * If no port is specified, we first try to pick up a random
2882 		 * port by ourselves.
2883 		 */
2884 		if (isc_sockaddr_pf(localaddr) == AF_INET) {
2885 			nports = disp->mgr->nv4ports;
2886 			ports = disp->mgr->v4ports;
2887 		} else {
2888 			nports = disp->mgr->nv6ports;
2889 			ports = disp->mgr->v6ports;
2890 		}
2891 		if (nports == 0)
2892 			return (ISC_R_ADDRNOTAVAIL);
2893 
2894 		for (i = 0; i < 1024; i++) {
2895 			in_port_t prt;
2896 
2897 			prt = ports[isc_rng_uniformrandom(DISP_RNGCTX(disp),
2898 							  nports)];
2899 			isc_sockaddr_setport(&localaddr_bound, prt);
2900 			result = open_socket(sockmgr, &localaddr_bound,
2901 					     0, &sock, NULL);
2902 			/*
2903 			 * Continue if the port chosen is already in use
2904 			 * or the OS has reserved it.
2905 			 */
2906 			if (result == ISC_R_NOPERM ||
2907 			    result == ISC_R_ADDRINUSE)
2908 				continue;
2909 			disp->localport = prt;
2910 			*sockp = sock;
2911 			return (result);
2912 		}
2913 
2914 		/*
2915 		 * If this fails 1024 times, we then ask the kernel for
2916 		 * choosing one.
2917 		 */
2918 	} else {
2919 		/* Allow to reuse address for non-random ports. */
2920 		result = open_socket(sockmgr, localaddr,
2921 				     ISC_SOCKET_REUSEADDRESS, &sock,
2922 				     dup_socket);
2923 
2924 		if (result == ISC_R_SUCCESS)
2925 			*sockp = sock;
2926 
2927 		return (result);
2928 	}
2929 
2930 	memset(held, 0, sizeof(held));
2931 	i = 0;
2932 
2933 	for (j = 0; j < 0xffffU; j++) {
2934 		result = open_socket(sockmgr, localaddr, 0, &sock, NULL);
2935 		if (result != ISC_R_SUCCESS)
2936 			goto end;
2937 		else if (portavailable(mgr, sock, NULL))
2938 			break;
2939 		if (held[i] != NULL)
2940 			isc_socket_detach(&held[i]);
2941 		held[i++] = sock;
2942 		sock = NULL;
2943 		if (i == DNS_DISPATCH_HELD)
2944 			i = 0;
2945 	}
2946 	if (j == 0xffffU) {
2947 		mgr_log(mgr, ISC_LOG_ERROR,
2948 			"avoid-v%s-udp-ports: unable to allocate "
2949 			"an available port",
2950 			isc_sockaddr_pf(localaddr) == AF_INET ? "4" : "6");
2951 		result = ISC_R_FAILURE;
2952 		goto end;
2953 	}
2954 	*sockp = sock;
2955 
2956 end:
2957 	for (i = 0; i < DNS_DISPATCH_HELD; i++) {
2958 		if (held[i] != NULL)
2959 			isc_socket_detach(&held[i]);
2960 	}
2961 
2962 	return (result);
2963 }
2964 
2965 static isc_result_t
dispatch_createudp(dns_dispatchmgr_t * mgr,isc_socketmgr_t * sockmgr,isc_taskmgr_t * taskmgr,isc_sockaddr_t * localaddr,unsigned int maxrequests,unsigned int attributes,dns_dispatch_t ** dispp,isc_socket_t * dup_socket)2966 dispatch_createudp(dns_dispatchmgr_t *mgr, isc_socketmgr_t *sockmgr,
2967 		   isc_taskmgr_t *taskmgr,
2968 		   isc_sockaddr_t *localaddr,
2969 		   unsigned int maxrequests,
2970 		   unsigned int attributes,
2971 		   dns_dispatch_t **dispp,
2972 		   isc_socket_t *dup_socket)
2973 {
2974 	isc_result_t result;
2975 	dns_dispatch_t *disp;
2976 	isc_socket_t *sock = NULL;
2977 	int i = 0;
2978 
2979 	/*
2980 	 * dispatch_allocate() checks mgr for us.
2981 	 */
2982 	disp = NULL;
2983 	result = dispatch_allocate(mgr, maxrequests, &disp);
2984 	if (result != ISC_R_SUCCESS)
2985 		return (result);
2986 
2987 	disp->socktype = isc_sockettype_udp;
2988 
2989 	if ((attributes & DNS_DISPATCHATTR_EXCLUSIVE) == 0) {
2990 		result = get_udpsocket(mgr, disp, sockmgr, localaddr, &sock,
2991 				       dup_socket);
2992 		if (result != ISC_R_SUCCESS)
2993 			goto deallocate_dispatch;
2994 
2995 		if (isc_log_wouldlog(dns_lctx, 90)) {
2996 			char addrbuf[ISC_SOCKADDR_FORMATSIZE];
2997 
2998 			isc_sockaddr_format(localaddr, addrbuf,
2999 					    ISC_SOCKADDR_FORMATSIZE);
3000 			mgr_log(mgr, LVL(90), "dns_dispatch_createudp: Created"
3001 				" UDP dispatch for %s with socket fd %d",
3002 				addrbuf, isc_socket_getfd(sock));
3003 		}
3004 
3005 	} else {
3006 		isc_sockaddr_t sa_any;
3007 
3008 		/*
3009 		 * For dispatches using exclusive sockets with a specific
3010 		 * source address, we only check if the specified address is
3011 		 * available on the system.  Query sockets will be created later
3012 		 * on demand.
3013 		 */
3014 		isc_sockaddr_anyofpf(&sa_any, isc_sockaddr_pf(localaddr));
3015 		if (!isc_sockaddr_eqaddr(&sa_any, localaddr)) {
3016 			result = open_socket(sockmgr, localaddr, 0, &sock, NULL);
3017 			if (sock != NULL)
3018 				isc_socket_detach(&sock);
3019 			if (result != ISC_R_SUCCESS)
3020 				goto deallocate_dispatch;
3021 		}
3022 
3023 		disp->port_table = isc_mem_get(mgr->mctx,
3024 					       sizeof(disp->port_table[0]) *
3025 					       DNS_DISPATCH_PORTTABLESIZE);
3026 		if (disp->port_table == NULL)
3027 			goto deallocate_dispatch;
3028 		for (i = 0; i < DNS_DISPATCH_PORTTABLESIZE; i++)
3029 			ISC_LIST_INIT(disp->port_table[i]);
3030 
3031 		result = isc_mempool_create(mgr->mctx, sizeof(dispportentry_t),
3032 					    &disp->portpool);
3033 		if (result != ISC_R_SUCCESS)
3034 			goto deallocate_dispatch;
3035 		isc_mempool_setname(disp->portpool, "disp_portpool");
3036 		isc_mempool_setfreemax(disp->portpool, 128);
3037 	}
3038 	disp->socket = sock;
3039 	disp->local = *localaddr;
3040 
3041 	if ((attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0)
3042 		disp->ntasks = MAX_INTERNAL_TASKS;
3043 	else
3044 		disp->ntasks = 1;
3045 	for (i = 0; i < disp->ntasks; i++) {
3046 		disp->task[i] = NULL;
3047 		result = isc_task_create(taskmgr, 0, &disp->task[i]);
3048 		if (result != ISC_R_SUCCESS) {
3049 			while (--i >= 0) {
3050 				isc_task_shutdown(disp->task[i]);
3051 				isc_task_detach(&disp->task[i]);
3052 			}
3053 			goto kill_socket;
3054 		}
3055 		isc_task_setname(disp->task[i], "udpdispatch", disp);
3056 	}
3057 
3058 	disp->ctlevent = isc_event_allocate(mgr->mctx, disp,
3059 					    DNS_EVENT_DISPATCHCONTROL,
3060 					    destroy_disp, disp,
3061 					    sizeof(isc_event_t));
3062 	if (disp->ctlevent == NULL) {
3063 		result = ISC_R_NOMEMORY;
3064 		goto kill_task;
3065 	}
3066 
3067 	disp->sepool = NULL;
3068 	if (isc_mempool_create(mgr->mctx, sizeof(isc_socketevent_t),
3069 			       &disp->sepool) != ISC_R_SUCCESS)
3070 	{
3071 		result = ISC_R_NOMEMORY;
3072 		goto kill_ctlevent;
3073 	}
3074 
3075 	result = isc_mutex_init(&disp->sepool_lock);
3076 	if (result != ISC_R_SUCCESS)
3077 		goto kill_sepool;
3078 
3079 	isc_mempool_setname(disp->sepool, "disp_sepool");
3080 	isc_mempool_setmaxalloc(disp->sepool, 32768);
3081 	isc_mempool_setfreemax(disp->sepool, 32768);
3082 	isc_mempool_associatelock(disp->sepool, &disp->sepool_lock);
3083 	isc_mempool_setfillcount(disp->sepool, 16);
3084 
3085 	attributes &= ~DNS_DISPATCHATTR_TCP;
3086 	attributes |= DNS_DISPATCHATTR_UDP;
3087 	disp->attributes = attributes;
3088 
3089 	/*
3090 	 * Append it to the dispatcher list.
3091 	 */
3092 	ISC_LIST_APPEND(mgr->list, disp, link);
3093 
3094 	mgr_log(mgr, LVL(90), "created UDP dispatcher %p", disp);
3095 	dispatch_log(disp, LVL(90), "created task %p", disp->task[0]); /* XXX */
3096 	if (disp->socket != NULL)
3097 		dispatch_log(disp, LVL(90), "created socket %p", disp->socket);
3098 
3099 	*dispp = disp;
3100 
3101 	return (result);
3102 
3103 	/*
3104 	 * Error returns.
3105 	 */
3106  kill_sepool:
3107 	isc_mempool_destroy(&disp->sepool);
3108  kill_ctlevent:
3109 	isc_event_free(&disp->ctlevent);
3110  kill_task:
3111 	for (i = 0; i < disp->ntasks; i++)
3112 		isc_task_detach(&disp->task[i]);
3113  kill_socket:
3114 	if (disp->socket != NULL)
3115 		isc_socket_detach(&disp->socket);
3116  deallocate_dispatch:
3117 	dispatch_free(&disp);
3118 
3119 	return (result);
3120 }
3121 
3122 void
dns_dispatch_attach(dns_dispatch_t * disp,dns_dispatch_t ** dispp)3123 dns_dispatch_attach(dns_dispatch_t *disp, dns_dispatch_t **dispp) {
3124 	REQUIRE(VALID_DISPATCH(disp));
3125 	REQUIRE(dispp != NULL && *dispp == NULL);
3126 
3127 	LOCK(&disp->lock);
3128 	disp->refcount++;
3129 	UNLOCK(&disp->lock);
3130 
3131 	*dispp = disp;
3132 }
3133 
3134 /*
3135  * It is important to lock the manager while we are deleting the dispatch,
3136  * since dns_dispatch_getudp will call dispatch_find, which returns to
3137  * the caller a dispatch but does not attach to it until later.  _getudp
3138  * locks the manager, however, so locking it here will keep us from attaching
3139  * to a dispatcher that is in the process of going away.
3140  */
3141 void
dns_dispatch_detach(dns_dispatch_t ** dispp)3142 dns_dispatch_detach(dns_dispatch_t **dispp) {
3143 	dns_dispatch_t *disp;
3144 	dispsocket_t *dispsock;
3145 	bool killit;
3146 
3147 	REQUIRE(dispp != NULL && VALID_DISPATCH(*dispp));
3148 
3149 	disp = *dispp;
3150 	*dispp = NULL;
3151 
3152 	LOCK(&disp->lock);
3153 
3154 	INSIST(disp->refcount > 0);
3155 	disp->refcount--;
3156 	if (disp->refcount == 0) {
3157 		if (disp->recv_pending > 0)
3158 			isc_socket_cancel(disp->socket, disp->task[0],
3159 					  ISC_SOCKCANCEL_RECV);
3160 		for (dispsock = ISC_LIST_HEAD(disp->activesockets);
3161 		     dispsock != NULL;
3162 		     dispsock = ISC_LIST_NEXT(dispsock, link)) {
3163 			isc_socket_cancel(dispsock->socket, dispsock->task,
3164 					  ISC_SOCKCANCEL_RECV);
3165 		}
3166 		disp->shutting_down = 1;
3167 	}
3168 
3169 	dispatch_log(disp, LVL(90), "detach: refcount %d", disp->refcount);
3170 
3171 	killit = destroy_disp_ok(disp);
3172 	UNLOCK(&disp->lock);
3173 	if (killit)
3174 		isc_task_send(disp->task[0], &disp->ctlevent);
3175 }
3176 
3177 isc_result_t
dns_dispatch_addresponse2(dns_dispatch_t * disp,isc_sockaddr_t * dest,isc_task_t * task,isc_taskaction_t action,void * arg,dns_messageid_t * idp,dns_dispentry_t ** resp,isc_socketmgr_t * sockmgr)3178 dns_dispatch_addresponse2(dns_dispatch_t *disp, isc_sockaddr_t *dest,
3179 			  isc_task_t *task, isc_taskaction_t action, void *arg,
3180 			  dns_messageid_t *idp, dns_dispentry_t **resp,
3181 			  isc_socketmgr_t *sockmgr)
3182 {
3183 	return (dns_dispatch_addresponse3(disp, 0, dest, task, action, arg,
3184 					  idp, resp, sockmgr));
3185 }
3186 
3187 isc_result_t
dns_dispatch_addresponse3(dns_dispatch_t * disp,unsigned int options,isc_sockaddr_t * dest,isc_task_t * task,isc_taskaction_t action,void * arg,dns_messageid_t * idp,dns_dispentry_t ** resp,isc_socketmgr_t * sockmgr)3188 dns_dispatch_addresponse3(dns_dispatch_t *disp, unsigned int options,
3189 			  isc_sockaddr_t *dest, isc_task_t *task,
3190 			  isc_taskaction_t action, void *arg,
3191 			  dns_messageid_t *idp, dns_dispentry_t **resp,
3192 			  isc_socketmgr_t *sockmgr)
3193 {
3194 	dns_dispentry_t *res;
3195 	unsigned int bucket;
3196 	in_port_t localport = 0;
3197 	dns_messageid_t id;
3198 	int i;
3199 	bool ok;
3200 	dns_qid_t *qid;
3201 	dispsocket_t *dispsocket = NULL;
3202 	isc_result_t result;
3203 
3204 	REQUIRE(VALID_DISPATCH(disp));
3205 	REQUIRE(task != NULL);
3206 	REQUIRE(dest != NULL);
3207 	REQUIRE(resp != NULL && *resp == NULL);
3208 	REQUIRE(idp != NULL);
3209 	if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0)
3210 		REQUIRE(sockmgr != NULL);
3211 
3212 	LOCK(&disp->lock);
3213 
3214 	if (disp->shutting_down == 1) {
3215 		UNLOCK(&disp->lock);
3216 		return (ISC_R_SHUTTINGDOWN);
3217 	}
3218 
3219 	if (disp->requests >= disp->maxrequests) {
3220 		UNLOCK(&disp->lock);
3221 		return (ISC_R_QUOTA);
3222 	}
3223 
3224 	if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0 &&
3225 	    disp->nsockets > DNS_DISPATCH_SOCKSQUOTA) {
3226 		dispsocket_t *oldestsocket;
3227 		dns_dispentry_t *oldestresp;
3228 		dns_dispatchevent_t *rev;
3229 
3230 		/*
3231 		 * Kill oldest outstanding query if the number of sockets
3232 		 * exceeds the quota to keep the room for new queries.
3233 		 */
3234 		oldestsocket = ISC_LIST_HEAD(disp->activesockets);
3235 		oldestresp = oldestsocket->resp;
3236 		if (oldestresp != NULL && !oldestresp->item_out) {
3237 			rev = allocate_devent(oldestresp->disp);
3238 			if (rev != NULL) {
3239 				rev->buffer.base = NULL;
3240 				rev->result = ISC_R_CANCELED;
3241 				rev->id = oldestresp->id;
3242 				ISC_EVENT_INIT(rev, sizeof(*rev), 0,
3243 					       NULL, DNS_EVENT_DISPATCH,
3244 					       oldestresp->action,
3245 					       oldestresp->arg, oldestresp,
3246 					       NULL, NULL);
3247 				oldestresp->item_out = true;
3248 				isc_task_send(oldestresp->task,
3249 					      ISC_EVENT_PTR(&rev));
3250 				inc_stats(disp->mgr,
3251 					  dns_resstatscounter_dispabort);
3252 			}
3253 		}
3254 
3255 		/*
3256 		 * Move this entry to the tail so that it won't (easily) be
3257 		 * examined before actually being canceled.
3258 		 */
3259 		ISC_LIST_UNLINK(disp->activesockets, oldestsocket, link);
3260 		ISC_LIST_APPEND(disp->activesockets, oldestsocket, link);
3261 	}
3262 
3263 	qid = DNS_QID(disp);
3264 
3265 	if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0) {
3266 		/*
3267 		 * Get a separate UDP socket with a random port number.
3268 		 */
3269 		result = get_dispsocket(disp, dest, sockmgr, &dispsocket,
3270 					&localport);
3271 		if (result != ISC_R_SUCCESS) {
3272 			UNLOCK(&disp->lock);
3273 			inc_stats(disp->mgr, dns_resstatscounter_dispsockfail);
3274 			return (result);
3275 		}
3276 	} else {
3277 		localport = disp->localport;
3278 	}
3279 
3280 	/*
3281 	 * Try somewhat hard to find an unique ID unless FIXEDID is set
3282 	 * in which case we use the id passed in via *idp.
3283 	 */
3284 	LOCK(&qid->lock);
3285 	if ((options & DNS_DISPATCHOPT_FIXEDID) != 0)
3286 		id = *idp;
3287 	else
3288 		id = (dns_messageid_t)isc_rng_random(DISP_RNGCTX(disp));
3289 	ok = false;
3290 	i = 0;
3291 	do {
3292 		bucket = dns_hash(qid, dest, id, localport);
3293 		if (entry_search(qid, dest, id, localport, bucket) == NULL) {
3294 			ok = true;
3295 			break;
3296 		}
3297 		if ((disp->attributes & DNS_DISPATCHATTR_FIXEDID) != 0)
3298 			break;
3299 		id += qid->qid_increment;
3300 		id &= 0x0000ffff;
3301 	} while (i++ < 64);
3302 	UNLOCK(&qid->lock);
3303 
3304 	if (!ok) {
3305 		UNLOCK(&disp->lock);
3306 		return (ISC_R_NOMORE);
3307 	}
3308 
3309 	res = isc_mempool_get(disp->mgr->rpool);
3310 	if (res == NULL) {
3311 		if (dispsocket != NULL)
3312 			destroy_dispsocket(disp, &dispsocket);
3313 		UNLOCK(&disp->lock);
3314 		return (ISC_R_NOMEMORY);
3315 	}
3316 
3317 	disp->refcount++;
3318 	disp->requests++;
3319 	res->task = NULL;
3320 	isc_task_attach(task, &res->task);
3321 	res->disp = disp;
3322 	res->id = id;
3323 	res->port = localport;
3324 	res->bucket = bucket;
3325 	res->host = *dest;
3326 	res->action = action;
3327 	res->arg = arg;
3328 	res->dispsocket = dispsocket;
3329 	if (dispsocket != NULL)
3330 		dispsocket->resp = res;
3331 	res->item_out = false;
3332 	ISC_LIST_INIT(res->items);
3333 	ISC_LINK_INIT(res, link);
3334 	res->magic = RESPONSE_MAGIC;
3335 
3336 	LOCK(&qid->lock);
3337 	ISC_LIST_APPEND(qid->qid_table[bucket], res, link);
3338 	UNLOCK(&qid->lock);
3339 
3340 	inc_stats(disp->mgr, (qid == disp->mgr->qid) ?
3341 			     dns_resstatscounter_disprequdp :
3342 			     dns_resstatscounter_dispreqtcp);
3343 
3344 	request_log(disp, res, LVL(90),
3345 		    "attached to task %p", res->task);
3346 
3347 	if (((disp->attributes & DNS_DISPATCHATTR_UDP) != 0) ||
3348 	    ((disp->attributes & DNS_DISPATCHATTR_CONNECTED) != 0)) {
3349 		result = startrecv(disp, dispsocket);
3350 		if (result != ISC_R_SUCCESS) {
3351 			LOCK(&qid->lock);
3352 			ISC_LIST_UNLINK(qid->qid_table[bucket], res, link);
3353 			UNLOCK(&qid->lock);
3354 
3355 			if (dispsocket != NULL)
3356 				destroy_dispsocket(disp, &dispsocket);
3357 
3358 			disp->refcount--;
3359 			disp->requests--;
3360 
3361 			dec_stats(disp->mgr, (qid == disp->mgr->qid) ?
3362 					     dns_resstatscounter_disprequdp :
3363 					     dns_resstatscounter_dispreqtcp);
3364 
3365 			UNLOCK(&disp->lock);
3366 			isc_task_detach(&res->task);
3367 			isc_mempool_put(disp->mgr->rpool, res);
3368 			return (result);
3369 		}
3370 	}
3371 
3372 	if (dispsocket != NULL)
3373 		ISC_LIST_APPEND(disp->activesockets, dispsocket, link);
3374 
3375 	UNLOCK(&disp->lock);
3376 
3377 	*idp = id;
3378 	*resp = res;
3379 
3380 	if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0)
3381 		INSIST(res->dispsocket != NULL);
3382 
3383 	return (ISC_R_SUCCESS);
3384 }
3385 
3386 isc_result_t
dns_dispatch_addresponse(dns_dispatch_t * disp,isc_sockaddr_t * dest,isc_task_t * task,isc_taskaction_t action,void * arg,dns_messageid_t * idp,dns_dispentry_t ** resp)3387 dns_dispatch_addresponse(dns_dispatch_t *disp, isc_sockaddr_t *dest,
3388 			 isc_task_t *task, isc_taskaction_t action, void *arg,
3389 			 dns_messageid_t *idp, dns_dispentry_t **resp)
3390 {
3391 	REQUIRE(VALID_DISPATCH(disp));
3392 	REQUIRE((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) == 0);
3393 
3394 	return (dns_dispatch_addresponse3(disp, 0, dest, task, action, arg,
3395 					  idp, resp, NULL));
3396 }
3397 
3398 void
dns_dispatch_starttcp(dns_dispatch_t * disp)3399 dns_dispatch_starttcp(dns_dispatch_t *disp) {
3400 
3401 	REQUIRE(VALID_DISPATCH(disp));
3402 
3403 	dispatch_log(disp, LVL(90), "starttcp %p", disp->task[0]);
3404 
3405 	LOCK(&disp->lock);
3406 	if ((disp->attributes & DNS_DISPATCHATTR_CONNECTED) == 0) {
3407 		disp->attributes |= DNS_DISPATCHATTR_CONNECTED;
3408 		(void)startrecv(disp, NULL);
3409 	}
3410 	UNLOCK(&disp->lock);
3411 }
3412 
3413 isc_result_t
dns_dispatch_getnext(dns_dispentry_t * resp,dns_dispatchevent_t ** sockevent)3414 dns_dispatch_getnext(dns_dispentry_t *resp, dns_dispatchevent_t **sockevent) {
3415 	dns_dispatch_t *disp;
3416 	dns_dispatchevent_t *ev;
3417 
3418 	REQUIRE(VALID_RESPONSE(resp));
3419 	REQUIRE(sockevent != NULL && *sockevent != NULL);
3420 
3421 	disp = resp->disp;
3422 	REQUIRE(VALID_DISPATCH(disp));
3423 
3424 	ev = *sockevent;
3425 	*sockevent = NULL;
3426 
3427 	LOCK(&disp->lock);
3428 
3429 	REQUIRE(resp->item_out == true);
3430 	resp->item_out = false;
3431 
3432 	if (ev->buffer.base != NULL)
3433 		free_buffer(disp, ev->buffer.base, ev->buffer.length);
3434 	free_devent(disp, ev);
3435 
3436 	if (disp->shutting_down == 1) {
3437 		UNLOCK(&disp->lock);
3438 		return (ISC_R_SHUTTINGDOWN);
3439 	}
3440 	ev = ISC_LIST_HEAD(resp->items);
3441 	if (ev != NULL) {
3442 		ISC_LIST_UNLINK(resp->items, ev, ev_link);
3443 		ISC_EVENT_INIT(ev, sizeof(*ev), 0, NULL, DNS_EVENT_DISPATCH,
3444 			       resp->action, resp->arg, resp, NULL, NULL);
3445 		request_log(disp, resp, LVL(90),
3446 			    "[c] Sent event %p buffer %p len %d to task %p",
3447 			    ev, ev->buffer.base, ev->buffer.length,
3448 			    resp->task);
3449 		resp->item_out = true;
3450 		isc_task_send(resp->task, ISC_EVENT_PTR(&ev));
3451 	}
3452 	UNLOCK(&disp->lock);
3453 	return (ISC_R_SUCCESS);
3454 }
3455 
3456 void
dns_dispatch_removeresponse(dns_dispentry_t ** resp,dns_dispatchevent_t ** sockevent)3457 dns_dispatch_removeresponse(dns_dispentry_t **resp,
3458 			    dns_dispatchevent_t **sockevent)
3459 {
3460 	dns_dispatchmgr_t *mgr;
3461 	dns_dispatch_t *disp;
3462 	dns_dispentry_t *res;
3463 	dispsocket_t *dispsock;
3464 	dns_dispatchevent_t *ev;
3465 	unsigned int bucket;
3466 	bool killit;
3467 	unsigned int n;
3468 	isc_eventlist_t events;
3469 	dns_qid_t *qid;
3470 
3471 	REQUIRE(resp != NULL);
3472 	REQUIRE(VALID_RESPONSE(*resp));
3473 
3474 	res = *resp;
3475 	*resp = NULL;
3476 
3477 	disp = res->disp;
3478 	REQUIRE(VALID_DISPATCH(disp));
3479 	mgr = disp->mgr;
3480 	REQUIRE(VALID_DISPATCHMGR(mgr));
3481 
3482 	qid = DNS_QID(disp);
3483 
3484 	if (sockevent != NULL) {
3485 		REQUIRE(*sockevent != NULL);
3486 		ev = *sockevent;
3487 		*sockevent = NULL;
3488 	} else {
3489 		ev = NULL;
3490 	}
3491 
3492 	LOCK(&disp->lock);
3493 
3494 	INSIST(disp->requests > 0);
3495 	disp->requests--;
3496 	dec_stats(disp->mgr, (qid == disp->mgr->qid) ?
3497 			     dns_resstatscounter_disprequdp :
3498 			     dns_resstatscounter_dispreqtcp);
3499 	INSIST(disp->refcount > 0);
3500 	disp->refcount--;
3501 	if (disp->refcount == 0) {
3502 		if (disp->recv_pending > 0)
3503 			isc_socket_cancel(disp->socket, disp->task[0],
3504 					  ISC_SOCKCANCEL_RECV);
3505 		for (dispsock = ISC_LIST_HEAD(disp->activesockets);
3506 		     dispsock != NULL;
3507 		     dispsock = ISC_LIST_NEXT(dispsock, link)) {
3508 			isc_socket_cancel(dispsock->socket, dispsock->task,
3509 					  ISC_SOCKCANCEL_RECV);
3510 		}
3511 		disp->shutting_down = 1;
3512 	}
3513 
3514 	bucket = res->bucket;
3515 
3516 	LOCK(&qid->lock);
3517 	ISC_LIST_UNLINK(qid->qid_table[bucket], res, link);
3518 	UNLOCK(&qid->lock);
3519 
3520 	if (ev == NULL && res->item_out) {
3521 		/*
3522 		 * We've posted our event, but the caller hasn't gotten it
3523 		 * yet.  Take it back.
3524 		 */
3525 		ISC_LIST_INIT(events);
3526 		n = isc_task_unsend(res->task, res, DNS_EVENT_DISPATCH,
3527 				    NULL, &events);
3528 		/*
3529 		 * We had better have gotten it back.
3530 		 */
3531 		INSIST(n == 1);
3532 		ev = (dns_dispatchevent_t *)ISC_LIST_HEAD(events);
3533 	}
3534 
3535 	if (ev != NULL) {
3536 		REQUIRE(res->item_out == true);
3537 		res->item_out = false;
3538 		if (ev->buffer.base != NULL)
3539 			free_buffer(disp, ev->buffer.base, ev->buffer.length);
3540 		free_devent(disp, ev);
3541 	}
3542 
3543 	request_log(disp, res, LVL(90), "detaching from task %p", res->task);
3544 	isc_task_detach(&res->task);
3545 
3546 	if (res->dispsocket != NULL) {
3547 		isc_socket_cancel(res->dispsocket->socket,
3548 				  res->dispsocket->task, ISC_SOCKCANCEL_RECV);
3549 		res->dispsocket->resp = NULL;
3550 	}
3551 
3552 	/*
3553 	 * Free any buffered responses as well
3554 	 */
3555 	ev = ISC_LIST_HEAD(res->items);
3556 	while (ev != NULL) {
3557 		ISC_LIST_UNLINK(res->items, ev, ev_link);
3558 		if (ev->buffer.base != NULL)
3559 			free_buffer(disp, ev->buffer.base, ev->buffer.length);
3560 		free_devent(disp, ev);
3561 		ev = ISC_LIST_HEAD(res->items);
3562 	}
3563 	res->magic = 0;
3564 	isc_mempool_put(disp->mgr->rpool, res);
3565 	if (disp->shutting_down == 1)
3566 		do_cancel(disp);
3567 	else
3568 		(void)startrecv(disp, NULL);
3569 
3570 	killit = destroy_disp_ok(disp);
3571 	UNLOCK(&disp->lock);
3572 	if (killit)
3573 		isc_task_send(disp->task[0], &disp->ctlevent);
3574 }
3575 
3576 /*
3577  * disp must be locked.
3578  */
3579 static void
do_cancel(dns_dispatch_t * disp)3580 do_cancel(dns_dispatch_t *disp) {
3581 	dns_dispatchevent_t *ev;
3582 	dns_dispentry_t *resp;
3583 	dns_qid_t *qid;
3584 
3585 	if (disp->shutdown_out == 1)
3586 		return;
3587 
3588 	qid = DNS_QID(disp);
3589 
3590 	/*
3591 	 * Search for the first response handler without packets outstanding
3592 	 * unless a specific handler is given.
3593 	 */
3594 	LOCK(&qid->lock);
3595 	for (resp = linear_first(qid);
3596 	     resp != NULL && resp->item_out;
3597 	     /* Empty. */)
3598 		resp = linear_next(qid, resp);
3599 
3600 	/*
3601 	 * No one to send the cancel event to, so nothing to do.
3602 	 */
3603 	if (resp == NULL)
3604 		goto unlock;
3605 
3606 	/*
3607 	 * Send the shutdown failsafe event to this resp.
3608 	 */
3609 	ev = disp->failsafe_ev;
3610 	ISC_EVENT_INIT(ev, sizeof(*ev), 0, NULL, DNS_EVENT_DISPATCH,
3611 		       resp->action, resp->arg, resp, NULL, NULL);
3612 	ev->result = disp->shutdown_why;
3613 	ev->buffer.base = NULL;
3614 	ev->buffer.length = 0;
3615 	disp->shutdown_out = 1;
3616 	request_log(disp, resp, LVL(10),
3617 		    "cancel: failsafe event %p -> task %p",
3618 		    ev, resp->task);
3619 	resp->item_out = true;
3620 	isc_task_send(resp->task, ISC_EVENT_PTR(&ev));
3621  unlock:
3622 	UNLOCK(&qid->lock);
3623 }
3624 
3625 isc_socket_t *
dns_dispatch_getsocket(dns_dispatch_t * disp)3626 dns_dispatch_getsocket(dns_dispatch_t *disp) {
3627 	REQUIRE(VALID_DISPATCH(disp));
3628 
3629 	return (disp->socket);
3630 }
3631 
3632 isc_socket_t *
dns_dispatch_getentrysocket(dns_dispentry_t * resp)3633 dns_dispatch_getentrysocket(dns_dispentry_t *resp) {
3634 	REQUIRE(VALID_RESPONSE(resp));
3635 
3636 	if (resp->dispsocket != NULL)
3637 		return (resp->dispsocket->socket);
3638 	else
3639 		return (NULL);
3640 }
3641 
3642 isc_result_t
dns_dispatch_getlocaladdress(dns_dispatch_t * disp,isc_sockaddr_t * addrp)3643 dns_dispatch_getlocaladdress(dns_dispatch_t *disp, isc_sockaddr_t *addrp) {
3644 
3645 	REQUIRE(VALID_DISPATCH(disp));
3646 	REQUIRE(addrp != NULL);
3647 
3648 	if (disp->socktype == isc_sockettype_udp) {
3649 		*addrp = disp->local;
3650 		return (ISC_R_SUCCESS);
3651 	}
3652 	return (ISC_R_NOTIMPLEMENTED);
3653 }
3654 
3655 void
dns_dispatch_cancel(dns_dispatch_t * disp)3656 dns_dispatch_cancel(dns_dispatch_t *disp) {
3657 	REQUIRE(VALID_DISPATCH(disp));
3658 
3659 	LOCK(&disp->lock);
3660 
3661 	if (disp->shutting_down == 1) {
3662 		UNLOCK(&disp->lock);
3663 		return;
3664 	}
3665 
3666 	disp->shutdown_why = ISC_R_CANCELED;
3667 	disp->shutting_down = 1;
3668 	do_cancel(disp);
3669 
3670 	UNLOCK(&disp->lock);
3671 
3672 	return;
3673 }
3674 
3675 unsigned int
dns_dispatch_getattributes(dns_dispatch_t * disp)3676 dns_dispatch_getattributes(dns_dispatch_t *disp) {
3677 	REQUIRE(VALID_DISPATCH(disp));
3678 
3679 	/*
3680 	 * We don't bother locking disp here; it's the caller's responsibility
3681 	 * to use only non volatile flags.
3682 	 */
3683 	return (disp->attributes);
3684 }
3685 
3686 void
dns_dispatch_changeattributes(dns_dispatch_t * disp,unsigned int attributes,unsigned int mask)3687 dns_dispatch_changeattributes(dns_dispatch_t *disp,
3688 			      unsigned int attributes, unsigned int mask)
3689 {
3690 	REQUIRE(VALID_DISPATCH(disp));
3691 	/* Exclusive attribute can only be set on creation */
3692 	REQUIRE((attributes & DNS_DISPATCHATTR_EXCLUSIVE) == 0);
3693 	/* Also, a dispatch with randomport specified cannot start listening */
3694 	REQUIRE((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) == 0 ||
3695 		(attributes & DNS_DISPATCHATTR_NOLISTEN) == 0);
3696 
3697 	/* XXXMLG
3698 	 * Should check for valid attributes here!
3699 	 */
3700 
3701 	LOCK(&disp->lock);
3702 
3703 	if ((mask & DNS_DISPATCHATTR_NOLISTEN) != 0) {
3704 		if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) != 0 &&
3705 		    (attributes & DNS_DISPATCHATTR_NOLISTEN) == 0) {
3706 			disp->attributes &= ~DNS_DISPATCHATTR_NOLISTEN;
3707 			(void)startrecv(disp, NULL);
3708 		} else if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN)
3709 			   == 0 &&
3710 			   (attributes & DNS_DISPATCHATTR_NOLISTEN) != 0) {
3711 			disp->attributes |= DNS_DISPATCHATTR_NOLISTEN;
3712 			if (disp->recv_pending != 0)
3713 				isc_socket_cancel(disp->socket, disp->task[0],
3714 						  ISC_SOCKCANCEL_RECV);
3715 		}
3716 	}
3717 
3718 	disp->attributes &= ~mask;
3719 	disp->attributes |= (attributes & mask);
3720 	UNLOCK(&disp->lock);
3721 }
3722 
3723 void
dns_dispatch_importrecv(dns_dispatch_t * disp,isc_event_t * event)3724 dns_dispatch_importrecv(dns_dispatch_t *disp, isc_event_t *event) {
3725 	void *buf;
3726 	isc_socketevent_t *sevent, *newsevent;
3727 
3728 	REQUIRE(VALID_DISPATCH(disp));
3729 	REQUIRE(event != NULL);
3730 
3731 	if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) == 0)
3732 		return;
3733 
3734 	sevent = (isc_socketevent_t *)event;
3735 	INSIST(sevent->n <= disp->mgr->buffersize);
3736 
3737 	newsevent = (isc_socketevent_t *)
3738 		    isc_event_allocate(disp->mgr->mctx, NULL,
3739 				      DNS_EVENT_IMPORTRECVDONE, udp_shrecv,
3740 				      disp, sizeof(isc_socketevent_t));
3741 	if (newsevent == NULL)
3742 		return;
3743 
3744 	buf = allocate_udp_buffer(disp);
3745 	if (buf == NULL) {
3746 		isc_event_free(ISC_EVENT_PTR(&newsevent));
3747 		return;
3748 	}
3749 	memmove(buf, sevent->region.base, sevent->n);
3750 	newsevent->region.base = buf;
3751 	newsevent->region.length = disp->mgr->buffersize;
3752 	newsevent->n = sevent->n;
3753 	newsevent->result = sevent->result;
3754 	newsevent->address = sevent->address;
3755 	newsevent->timestamp = sevent->timestamp;
3756 	newsevent->pktinfo = sevent->pktinfo;
3757 	newsevent->attributes = sevent->attributes;
3758 
3759 	isc_task_send(disp->task[0], ISC_EVENT_PTR(&newsevent));
3760 }
3761 
3762 dns_dispatch_t *
dns_dispatchset_get(dns_dispatchset_t * dset)3763 dns_dispatchset_get(dns_dispatchset_t *dset) {
3764 	dns_dispatch_t *disp;
3765 
3766 	/* check that dispatch set is configured */
3767 	if (dset == NULL || dset->ndisp == 0)
3768 		return (NULL);
3769 
3770 	LOCK(&dset->lock);
3771 	disp = dset->dispatches[dset->cur];
3772 	dset->cur++;
3773 	if (dset->cur == dset->ndisp)
3774 		dset->cur = 0;
3775 	UNLOCK(&dset->lock);
3776 
3777 	return (disp);
3778 }
3779 
3780 isc_result_t
dns_dispatchset_create(isc_mem_t * mctx,isc_socketmgr_t * sockmgr,isc_taskmgr_t * taskmgr,dns_dispatch_t * source,dns_dispatchset_t ** dsetp,int n)3781 dns_dispatchset_create(isc_mem_t *mctx, isc_socketmgr_t *sockmgr,
3782 		       isc_taskmgr_t *taskmgr, dns_dispatch_t *source,
3783 		       dns_dispatchset_t **dsetp, int n)
3784 {
3785 	isc_result_t result;
3786 	dns_dispatchset_t *dset;
3787 	dns_dispatchmgr_t *mgr;
3788 	int i, j;
3789 
3790 	REQUIRE(VALID_DISPATCH(source));
3791 	REQUIRE((source->attributes & DNS_DISPATCHATTR_UDP) != 0);
3792 	REQUIRE(dsetp != NULL && *dsetp == NULL);
3793 
3794 	mgr = source->mgr;
3795 
3796 	dset = isc_mem_get(mctx, sizeof(dns_dispatchset_t));
3797 	if (dset == NULL)
3798 		return (ISC_R_NOMEMORY);
3799 	memset(dset, 0, sizeof(*dset));
3800 
3801 	result = isc_mutex_init(&dset->lock);
3802 	if (result != ISC_R_SUCCESS)
3803 		goto fail_alloc;
3804 
3805 	dset->dispatches = isc_mem_get(mctx, sizeof(dns_dispatch_t *) * n);
3806 	if (dset->dispatches == NULL) {
3807 		result = ISC_R_NOMEMORY;
3808 		goto fail_lock;
3809 	}
3810 
3811 	isc_mem_attach(mctx, &dset->mctx);
3812 	dset->ndisp = n;
3813 	dset->cur = 0;
3814 
3815 	dset->dispatches[0] = NULL;
3816 	dns_dispatch_attach(source, &dset->dispatches[0]);
3817 
3818 	LOCK(&mgr->lock);
3819 	for (i = 1; i < n; i++) {
3820 		dset->dispatches[i] = NULL;
3821 		result = dispatch_createudp(mgr, sockmgr, taskmgr,
3822 					    &source->local,
3823 					    source->maxrequests,
3824 					    source->attributes,
3825 					    &dset->dispatches[i],
3826 					    source->socket);
3827 		if (result != ISC_R_SUCCESS)
3828 			goto fail;
3829 	}
3830 
3831 	UNLOCK(&mgr->lock);
3832 	*dsetp = dset;
3833 
3834 	return (ISC_R_SUCCESS);
3835 
3836  fail:
3837 	UNLOCK(&mgr->lock);
3838 
3839 	for (j = 0; j < i; j++)
3840 		dns_dispatch_detach(&(dset->dispatches[j]));
3841 	isc_mem_put(mctx, dset->dispatches, sizeof(dns_dispatch_t *) * n);
3842 	if (dset->mctx == mctx)
3843 		isc_mem_detach(&dset->mctx);
3844 
3845  fail_lock:
3846 	DESTROYLOCK(&dset->lock);
3847 
3848  fail_alloc:
3849 	isc_mem_put(mctx, dset, sizeof(dns_dispatchset_t));
3850 	return (result);
3851 }
3852 
3853 void
dns_dispatchset_cancelall(dns_dispatchset_t * dset,isc_task_t * task)3854 dns_dispatchset_cancelall(dns_dispatchset_t *dset, isc_task_t *task) {
3855 	int i;
3856 
3857 	REQUIRE(dset != NULL);
3858 
3859 	for (i = 0; i < dset->ndisp; i++) {
3860 		isc_socket_t *sock;
3861 		sock = dns_dispatch_getsocket(dset->dispatches[i]);
3862 		isc_socket_cancel(sock, task, ISC_SOCKCANCEL_ALL);
3863 	}
3864 }
3865 
3866 void
dns_dispatchset_destroy(dns_dispatchset_t ** dsetp)3867 dns_dispatchset_destroy(dns_dispatchset_t **dsetp) {
3868 	dns_dispatchset_t *dset;
3869 	int i;
3870 
3871 	REQUIRE(dsetp != NULL && *dsetp != NULL);
3872 
3873 	dset = *dsetp;
3874 	for (i = 0; i < dset->ndisp; i++)
3875 		dns_dispatch_detach(&(dset->dispatches[i]));
3876 	isc_mem_put(dset->mctx, dset->dispatches,
3877 		    sizeof(dns_dispatch_t *) * dset->ndisp);
3878 	DESTROYLOCK(&dset->lock);
3879 	isc_mem_putanddetach(&dset->mctx, dset, sizeof(dns_dispatchset_t));
3880 
3881 	*dsetp = NULL;
3882 }
3883 
3884 void
dns_dispatch_setdscp(dns_dispatch_t * disp,isc_dscp_t dscp)3885 dns_dispatch_setdscp(dns_dispatch_t *disp, isc_dscp_t dscp) {
3886 	REQUIRE(VALID_DISPATCH(disp));
3887 	disp->dscp = dscp;
3888 }
3889 
3890 isc_dscp_t
dns_dispatch_getdscp(dns_dispatch_t * disp)3891 dns_dispatch_getdscp(dns_dispatch_t *disp) {
3892 	REQUIRE(VALID_DISPATCH(disp));
3893 	return (disp->dscp);
3894 }
3895 
3896 #if 0
3897 void
3898 dns_dispatchmgr_dump(dns_dispatchmgr_t *mgr) {
3899 	dns_dispatch_t *disp;
3900 	char foo[1024];
3901 
3902 	disp = ISC_LIST_HEAD(mgr->list);
3903 	while (disp != NULL) {
3904 		isc_sockaddr_format(&disp->local, foo, sizeof(foo));
3905 		printf("\tdispatch %p, addr %s\n", disp, foo);
3906 		disp = ISC_LIST_NEXT(disp, link);
3907 	}
3908 }
3909 #endif
3910