xref: /minix/external/bsd/bind/dist/lib/dns/dispatch.c (revision 00b67f09)
1 /*	$NetBSD: dispatch.c,v 1.10 2015/07/08 17:28:58 christos Exp $	*/
2 
3 /*
4  * Copyright (C) 2004-2009, 2011-2015  Internet Systems Consortium, Inc. ("ISC")
5  * Copyright (C) 1999-2003  Internet Software Consortium.
6  *
7  * Permission to use, copy, modify, and/or distribute this software for any
8  * purpose with or without fee is hereby granted, provided that the above
9  * copyright notice and this permission notice appear in all copies.
10  *
11  * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
12  * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
13  * AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
14  * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
15  * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
16  * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
17  * PERFORMANCE OF THIS SOFTWARE.
18  */
19 
20 /* Id: dispatch.c,v 1.175 2011/11/29 01:03:47 marka Exp  */
21 
22 /*! \file */
23 
24 #include <config.h>
25 
26 #include <stdlib.h>
27 #include <sys/types.h>
28 #include <unistd.h>
29 #include <stdlib.h>
30 
31 #include <isc/entropy.h>
32 #include <isc/mem.h>
33 #include <isc/mutex.h>
34 #include <isc/portset.h>
35 #include <isc/print.h>
36 #include <isc/random.h>
37 #include <isc/socket.h>
38 #include <isc/stats.h>
39 #include <isc/string.h>
40 #include <isc/task.h>
41 #include <isc/time.h>
42 #include <isc/util.h>
43 
44 #include <dns/acl.h>
45 #include <dns/dispatch.h>
46 #include <dns/events.h>
47 #include <dns/log.h>
48 #include <dns/message.h>
49 #include <dns/portlist.h>
50 #include <dns/stats.h>
51 #include <dns/tcpmsg.h>
52 #include <dns/types.h>
53 
54 typedef ISC_LIST(dns_dispentry_t)	dns_displist_t;
55 
56 typedef struct dispsocket		dispsocket_t;
57 typedef ISC_LIST(dispsocket_t)		dispsocketlist_t;
58 
59 typedef struct dispportentry		dispportentry_t;
60 typedef ISC_LIST(dispportentry_t)	dispportlist_t;
61 
62 /* ARC4 Random generator state */
63 typedef struct arc4ctx {
64 	isc_uint8_t	i;
65 	isc_uint8_t	j;
66 	isc_uint8_t	s[256];
67 	int		count;
68 	isc_entropy_t	*entropy;	/*%< entropy source for ARC4 */
69 	isc_mutex_t	*lock;
70 } arc4ctx_t;
71 
72 typedef struct dns_qid {
73 	unsigned int	magic;
74 	unsigned int	qid_nbuckets;	/*%< hash table size */
75 	unsigned int	qid_increment;	/*%< id increment on collision */
76 	isc_mutex_t	lock;
77 	dns_displist_t	*qid_table;	/*%< the table itself */
78 	dispsocketlist_t *sock_table;	/*%< socket table */
79 } dns_qid_t;
80 
81 struct dns_dispatchmgr {
82 	/* Unlocked. */
83 	unsigned int			magic;
84 	isc_mem_t		       *mctx;
85 	dns_acl_t		       *blackhole;
86 	dns_portlist_t		       *portlist;
87 	isc_stats_t		       *stats;
88 	isc_entropy_t		       *entropy; /*%< entropy source */
89 
90 	/* Locked by "lock". */
91 	isc_mutex_t			lock;
92 	unsigned int			state;
93 	ISC_LIST(dns_dispatch_t)	list;
94 
95 	/* Locked by arc4_lock. */
96 	isc_mutex_t			arc4_lock;
97 	arc4ctx_t			arc4ctx;    /*%< ARC4 context for QID */
98 
99 	/* locked by buffer lock */
100 	dns_qid_t			*qid;
101 	isc_mutex_t			buffer_lock;
102 	unsigned int			buffers;    /*%< allocated buffers */
103 	unsigned int			buffersize; /*%< size of each buffer */
104 	unsigned int			maxbuffers; /*%< max buffers */
105 
106 	/* Locked internally. */
107 	isc_mutex_t			depool_lock;
108 	isc_mempool_t		       *depool;	/*%< pool for dispatch events */
109 	isc_mutex_t			rpool_lock;
110 	isc_mempool_t		       *rpool;	/*%< pool for replies */
111 	isc_mutex_t			dpool_lock;
112 	isc_mempool_t		       *dpool;  /*%< dispatch allocations */
113 	isc_mutex_t			bpool_lock;
114 	isc_mempool_t		       *bpool;	/*%< pool for buffers */
115 	isc_mutex_t			spool_lock;
116 	isc_mempool_t		       *spool;	/*%< pool for dispsocks */
117 
118 	/*%
119 	 * Locked by qid->lock if qid exists; otherwise, can be used without
120 	 * being locked.
121 	 * Memory footprint considerations: this is a simple implementation of
122 	 * available ports, i.e., an ordered array of the actual port numbers.
123 	 * This will require about 256KB of memory in the worst case (128KB for
124 	 * each of IPv4 and IPv6).  We could reduce it by representing it as a
125 	 * more sophisticated way such as a list (or array) of ranges that are
126 	 * searched to identify a specific port.  Our decision here is the saved
127 	 * memory isn't worth the implementation complexity, considering the
128 	 * fact that the whole BIND9 process (which is mainly named) already
129 	 * requires a pretty large memory footprint.  We may, however, have to
130 	 * revisit the decision when we want to use it as a separate module for
131 	 * an environment where memory requirement is severer.
132 	 */
133 	in_port_t	*v4ports;	/*%< available ports for IPv4 */
134 	unsigned int	nv4ports;	/*%< # of available ports for IPv4 */
135 	in_port_t	*v6ports;	/*%< available ports for IPv4 */
136 	unsigned int	nv6ports;	/*%< # of available ports for IPv4 */
137 };
138 
139 #define MGR_SHUTTINGDOWN		0x00000001U
140 #define MGR_IS_SHUTTINGDOWN(l)	(((l)->state & MGR_SHUTTINGDOWN) != 0)
141 
142 #define IS_PRIVATE(d)	(((d)->attributes & DNS_DISPATCHATTR_PRIVATE) != 0)
143 
144 struct dns_dispentry {
145 	unsigned int			magic;
146 	dns_dispatch_t		       *disp;
147 	dns_messageid_t			id;
148 	in_port_t			port;
149 	unsigned int			bucket;
150 	isc_sockaddr_t			host;
151 	isc_task_t		       *task;
152 	isc_taskaction_t		action;
153 	void			       *arg;
154 	isc_boolean_t			item_out;
155 	dispsocket_t			*dispsocket;
156 	ISC_LIST(dns_dispatchevent_t)	items;
157 	ISC_LINK(dns_dispentry_t)	link;
158 };
159 
160 /*%
161  * Maximum number of dispatch sockets that can be pooled for reuse.  The
162  * appropriate value may vary, but experiments have shown a busy caching server
163  * may need more than 1000 sockets concurrently opened.  The maximum allowable
164  * number of dispatch sockets (per manager) will be set to the double of this
165  * value.
166  */
167 #ifndef DNS_DISPATCH_POOLSOCKS
168 #define DNS_DISPATCH_POOLSOCKS			2048
169 #endif
170 
171 /*%
172  * Quota to control the number of dispatch sockets.  If a dispatch has more
173  * than the quota of sockets, new queries will purge oldest ones, so that
174  * a massive number of outstanding queries won't prevent subsequent queries
175  * (especially if the older ones take longer time and result in timeout).
176  */
177 #ifndef DNS_DISPATCH_SOCKSQUOTA
178 #define DNS_DISPATCH_SOCKSQUOTA			3072
179 #endif
180 
181 struct dispsocket {
182 	unsigned int			magic;
183 	isc_socket_t			*socket;
184 	dns_dispatch_t			*disp;
185 	isc_sockaddr_t			host;
186 	in_port_t			localport; /* XXX: should be removed later */
187 	dispportentry_t			*portentry;
188 	dns_dispentry_t			*resp;
189 	isc_task_t			*task;
190 	ISC_LINK(dispsocket_t)		link;
191 	unsigned int			bucket;
192 	ISC_LINK(dispsocket_t)		blink;
193 };
194 
195 /*%
196  * A port table entry.  We remember every port we first open in a table with a
197  * reference counter so that we can 'reuse' the same port (with different
198  * destination addresses) using the SO_REUSEADDR socket option.
199  */
200 struct dispportentry {
201 	in_port_t			port;
202 	unsigned int			refs;
203 	ISC_LINK(struct dispportentry)	link;
204 };
205 
206 #ifndef DNS_DISPATCH_PORTTABLESIZE
207 #define DNS_DISPATCH_PORTTABLESIZE	1024
208 #endif
209 
210 #define INVALID_BUCKET		(0xffffdead)
211 
212 /*%
213  * Number of tasks for each dispatch that use separate sockets for different
214  * transactions.  This must be a power of 2 as it will divide 32 bit numbers
215  * to get an uniformly random tasks selection.  See get_dispsocket().
216  */
217 #define MAX_INTERNAL_TASKS	64
218 
219 struct dns_dispatch {
220 	/* Unlocked. */
221 	unsigned int		magic;		/*%< magic */
222 	dns_dispatchmgr_t      *mgr;		/*%< dispatch manager */
223 	int			ntasks;
224 	/*%
225 	 * internal task buckets.  We use multiple tasks to distribute various
226 	 * socket events well when using separate dispatch sockets.  We use the
227 	 * 1st task (task[0]) for internal control events.
228 	 */
229 	isc_task_t	       *task[MAX_INTERNAL_TASKS];
230 	isc_socket_t	       *socket;		/*%< isc socket attached to */
231 	isc_sockaddr_t		local;		/*%< local address */
232 	in_port_t		localport;	/*%< local UDP port */
233 	isc_dscp_t		dscp;		/*%< "listen-on" DSCP value */
234 	unsigned int		maxrequests;	/*%< max requests */
235 	isc_event_t	       *ctlevent;
236 
237 	isc_mutex_t		sepool_lock;
238 	isc_mempool_t	       *sepool;		/*%< pool for socket events */
239 
240 	/*% Locked by mgr->lock. */
241 	ISC_LINK(dns_dispatch_t) link;
242 
243 	/* Locked by "lock". */
244 	isc_mutex_t		lock;		/*%< locks all below */
245 	isc_sockettype_t	socktype;
246 	unsigned int		attributes;
247 	unsigned int		refcount;	/*%< number of users */
248 	dns_dispatchevent_t    *failsafe_ev;	/*%< failsafe cancel event */
249 	unsigned int		shutting_down : 1,
250 				shutdown_out : 1,
251 				connected : 1,
252 				tcpmsg_valid : 1,
253 				recv_pending : 1; /*%< is a recv() pending? */
254 	isc_result_t		shutdown_why;
255 	ISC_LIST(dispsocket_t)	activesockets;
256 	ISC_LIST(dispsocket_t)	inactivesockets;
257 	unsigned int		nsockets;
258 	unsigned int		requests;	/*%< how many requests we have */
259 	unsigned int		tcpbuffers;	/*%< allocated buffers */
260 	dns_tcpmsg_t		tcpmsg;		/*%< for tcp streams */
261 	dns_qid_t		*qid;
262 	arc4ctx_t		arc4ctx;	/*%< for QID/UDP port num */
263 	dispportlist_t		*port_table;	/*%< hold ports 'owned' by us */
264 	isc_mempool_t		*portpool;	/*%< port table entries  */
265 };
266 
267 #define QID_MAGIC		ISC_MAGIC('Q', 'i', 'd', ' ')
268 #define VALID_QID(e)		ISC_MAGIC_VALID((e), QID_MAGIC)
269 
270 #define RESPONSE_MAGIC		ISC_MAGIC('D', 'r', 's', 'p')
271 #define VALID_RESPONSE(e)	ISC_MAGIC_VALID((e), RESPONSE_MAGIC)
272 
273 #define DISPSOCK_MAGIC		ISC_MAGIC('D', 's', 'o', 'c')
274 #define VALID_DISPSOCK(e)	ISC_MAGIC_VALID((e), DISPSOCK_MAGIC)
275 
276 #define DISPATCH_MAGIC		ISC_MAGIC('D', 'i', 's', 'p')
277 #define VALID_DISPATCH(e)	ISC_MAGIC_VALID((e), DISPATCH_MAGIC)
278 
279 #define DNS_DISPATCHMGR_MAGIC	ISC_MAGIC('D', 'M', 'g', 'r')
280 #define VALID_DISPATCHMGR(e)	ISC_MAGIC_VALID((e), DNS_DISPATCHMGR_MAGIC)
281 
282 #define DNS_QID(disp) ((disp)->socktype == isc_sockettype_tcp) ? \
283 		       (disp)->qid : (disp)->mgr->qid
284 #define DISP_ARC4CTX(disp) ((disp)->socktype == isc_sockettype_udp) ? \
285 			(&(disp)->arc4ctx) : (&(disp)->mgr->arc4ctx)
286 
287 /*%
288  * Locking a query port buffer is a bit tricky.  We access the buffer without
289  * locking until qid is created.  Technically, there is a possibility of race
290  * between the creation of qid and access to the port buffer; in practice,
291  * however, this should be safe because qid isn't created until the first
292  * dispatch is created and there should be no contending situation until then.
293  */
294 #define PORTBUFLOCK(mgr) if ((mgr)->qid != NULL) LOCK(&((mgr)->qid->lock))
295 #define PORTBUFUNLOCK(mgr) if ((mgr)->qid != NULL) UNLOCK((&(mgr)->qid->lock))
296 
297 /*
298  * Statics.
299  */
300 static dns_dispentry_t *entry_search(dns_qid_t *, isc_sockaddr_t *,
301 				     dns_messageid_t, in_port_t, unsigned int);
302 static isc_boolean_t destroy_disp_ok(dns_dispatch_t *);
303 static void destroy_disp(isc_task_t *task, isc_event_t *event);
304 static void destroy_dispsocket(dns_dispatch_t *, dispsocket_t **);
305 static void deactivate_dispsocket(dns_dispatch_t *, dispsocket_t *);
306 static void udp_exrecv(isc_task_t *, isc_event_t *);
307 static void udp_shrecv(isc_task_t *, isc_event_t *);
308 static void udp_recv(isc_event_t *, dns_dispatch_t *, dispsocket_t *);
309 static void tcp_recv(isc_task_t *, isc_event_t *);
310 static isc_result_t startrecv(dns_dispatch_t *, dispsocket_t *);
311 static isc_uint32_t dns_hash(dns_qid_t *, isc_sockaddr_t *, dns_messageid_t,
312 			     in_port_t);
313 static void free_buffer(dns_dispatch_t *disp, void *buf, unsigned int len);
314 static void *allocate_udp_buffer(dns_dispatch_t *disp);
315 static inline void free_devent(dns_dispatch_t *disp, dns_dispatchevent_t *ev);
316 static inline dns_dispatchevent_t *allocate_devent(dns_dispatch_t *disp);
317 static void do_cancel(dns_dispatch_t *disp);
318 static dns_dispentry_t *linear_first(dns_qid_t *disp);
319 static dns_dispentry_t *linear_next(dns_qid_t *disp,
320 				    dns_dispentry_t *resp);
321 static void dispatch_free(dns_dispatch_t **dispp);
322 static isc_result_t get_udpsocket(dns_dispatchmgr_t *mgr,
323 				  dns_dispatch_t *disp,
324 				  isc_socketmgr_t *sockmgr,
325 				  isc_sockaddr_t *localaddr,
326 				  isc_socket_t **sockp,
327 				  isc_socket_t *dup_socket);
328 static isc_result_t dispatch_createudp(dns_dispatchmgr_t *mgr,
329 				       isc_socketmgr_t *sockmgr,
330 				       isc_taskmgr_t *taskmgr,
331 				       isc_sockaddr_t *localaddr,
332 				       unsigned int maxrequests,
333 				       unsigned int attributes,
334 				       dns_dispatch_t **dispp,
335 				       isc_socket_t *dup_socket);
336 static isc_boolean_t destroy_mgr_ok(dns_dispatchmgr_t *mgr);
337 static void destroy_mgr(dns_dispatchmgr_t **mgrp);
338 static isc_result_t qid_allocate(dns_dispatchmgr_t *mgr, unsigned int buckets,
339 				 unsigned int increment, dns_qid_t **qidp,
340 				 isc_boolean_t needaddrtable);
341 static void qid_destroy(isc_mem_t *mctx, dns_qid_t **qidp);
342 static isc_result_t open_socket(isc_socketmgr_t *mgr, isc_sockaddr_t *local,
343 				unsigned int options, isc_socket_t **sockp,
344 				isc_socket_t *dup_socket);
345 static isc_boolean_t portavailable(dns_dispatchmgr_t *mgr, isc_socket_t *sock,
346 				   isc_sockaddr_t *sockaddrp);
347 
348 #define LVL(x) ISC_LOG_DEBUG(x)
349 
350 static void
351 mgr_log(dns_dispatchmgr_t *mgr, int level, const char *fmt, ...)
352      ISC_FORMAT_PRINTF(3, 4);
353 
354 static void
mgr_log(dns_dispatchmgr_t * mgr,int level,const char * fmt,...)355 mgr_log(dns_dispatchmgr_t *mgr, int level, const char *fmt, ...) {
356 	char msgbuf[2048];
357 	va_list ap;
358 
359 	if (! isc_log_wouldlog(dns_lctx, level))
360 		return;
361 
362 	va_start(ap, fmt);
363 	vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
364 	va_end(ap);
365 
366 	isc_log_write(dns_lctx,
367 		      DNS_LOGCATEGORY_DISPATCH, DNS_LOGMODULE_DISPATCH,
368 		      level, "dispatchmgr %p: %s", mgr, msgbuf);
369 }
370 
371 static inline void
inc_stats(dns_dispatchmgr_t * mgr,isc_statscounter_t counter)372 inc_stats(dns_dispatchmgr_t *mgr, isc_statscounter_t counter) {
373 	if (mgr->stats != NULL)
374 		isc_stats_increment(mgr->stats, counter);
375 }
376 
377 static inline void
dec_stats(dns_dispatchmgr_t * mgr,isc_statscounter_t counter)378 dec_stats(dns_dispatchmgr_t *mgr, isc_statscounter_t counter) {
379 	if (mgr->stats != NULL)
380 		isc_stats_decrement(mgr->stats, counter);
381 }
382 
383 static void
384 dispatch_log(dns_dispatch_t *disp, int level, const char *fmt, ...)
385      ISC_FORMAT_PRINTF(3, 4);
386 
387 static void
dispatch_log(dns_dispatch_t * disp,int level,const char * fmt,...)388 dispatch_log(dns_dispatch_t *disp, int level, const char *fmt, ...) {
389 	char msgbuf[2048];
390 	va_list ap;
391 
392 	if (! isc_log_wouldlog(dns_lctx, level))
393 		return;
394 
395 	va_start(ap, fmt);
396 	vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
397 	va_end(ap);
398 
399 	isc_log_write(dns_lctx,
400 		      DNS_LOGCATEGORY_DISPATCH, DNS_LOGMODULE_DISPATCH,
401 		      level, "dispatch %p: %s", disp, msgbuf);
402 }
403 
404 static void
405 request_log(dns_dispatch_t *disp, dns_dispentry_t *resp,
406 	    int level, const char *fmt, ...)
407      ISC_FORMAT_PRINTF(4, 5);
408 
409 static void
request_log(dns_dispatch_t * disp,dns_dispentry_t * resp,int level,const char * fmt,...)410 request_log(dns_dispatch_t *disp, dns_dispentry_t *resp,
411 	    int level, const char *fmt, ...)
412 {
413 	char msgbuf[2048];
414 	char peerbuf[256];
415 	va_list ap;
416 
417 	if (! isc_log_wouldlog(dns_lctx, level))
418 		return;
419 
420 	va_start(ap, fmt);
421 	vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
422 	va_end(ap);
423 
424 	if (VALID_RESPONSE(resp)) {
425 		isc_sockaddr_format(&resp->host, peerbuf, sizeof(peerbuf));
426 		isc_log_write(dns_lctx, DNS_LOGCATEGORY_DISPATCH,
427 			      DNS_LOGMODULE_DISPATCH, level,
428 			      "dispatch %p response %p %s: %s", disp, resp,
429 			      peerbuf, msgbuf);
430 	} else {
431 		isc_log_write(dns_lctx, DNS_LOGCATEGORY_DISPATCH,
432 			      DNS_LOGMODULE_DISPATCH, level,
433 			      "dispatch %p req/resp %p: %s", disp, resp,
434 			      msgbuf);
435 	}
436 }
437 
438 /*%
439  * ARC4 random number generator derived from OpenBSD.
440  * Only dispatch_random() and dispatch_uniformrandom() are expected
441  * to be called from general dispatch routines; the rest of them are subroutines
442  * for these two.
443  *
444  * The original copyright follows:
445  * Copyright (c) 1996, David Mazieres <dm@uun.org>
446  * Copyright (c) 2008, Damien Miller <djm@openbsd.org>
447  *
448  * Permission to use, copy, modify, and distribute this software for any
449  * purpose with or without fee is hereby granted, provided that the above
450  * copyright notice and this permission notice appear in all copies.
451  *
452  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
453  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
454  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
455  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
456  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
457  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
458  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
459  */
460 static void
dispatch_initrandom(arc4ctx_t * actx,isc_entropy_t * entropy,isc_mutex_t * lock)461 dispatch_initrandom(arc4ctx_t *actx, isc_entropy_t *entropy,
462 		    isc_mutex_t *lock)
463 {
464 	int n;
465 	for (n = 0; n < 256; n++)
466 		actx->s[n] = n;
467 	actx->i = 0;
468 	actx->j = 0;
469 	actx->count = 0;
470 	actx->entropy = entropy; /* don't have to attach */
471 	actx->lock = lock;
472 }
473 
474 static void
dispatch_arc4addrandom(arc4ctx_t * actx,unsigned char * dat,int datlen)475 dispatch_arc4addrandom(arc4ctx_t *actx, unsigned char *dat, int datlen) {
476 	int n;
477 	isc_uint8_t si;
478 
479 	actx->i--;
480 	for (n = 0; n < 256; n++) {
481 		actx->i = (actx->i + 1);
482 		si = actx->s[actx->i];
483 		actx->j = (actx->j + si + dat[n % datlen]);
484 		actx->s[actx->i] = actx->s[actx->j];
485 		actx->s[actx->j] = si;
486 	}
487 	actx->j = actx->i;
488 }
489 
490 static inline isc_uint8_t
dispatch_arc4get8(arc4ctx_t * actx)491 dispatch_arc4get8(arc4ctx_t *actx) {
492 	isc_uint8_t si, sj;
493 
494 	actx->i = (actx->i + 1);
495 	si = actx->s[actx->i];
496 	actx->j = (actx->j + si);
497 	sj = actx->s[actx->j];
498 	actx->s[actx->i] = sj;
499 	actx->s[actx->j] = si;
500 
501 	return (actx->s[(si + sj) & 0xff]);
502 }
503 
504 static inline isc_uint16_t
dispatch_arc4get16(arc4ctx_t * actx)505 dispatch_arc4get16(arc4ctx_t *actx) {
506 	isc_uint16_t val;
507 
508 	val = dispatch_arc4get8(actx) << 8;
509 	val |= dispatch_arc4get8(actx);
510 
511 	return (val);
512 }
513 
514 static void
dispatch_arc4stir(arc4ctx_t * actx)515 dispatch_arc4stir(arc4ctx_t *actx) {
516 	int i;
517 	union {
518 		unsigned char rnd[128];
519 		isc_uint32_t rnd32[32];
520 	} rnd;
521 	isc_result_t result;
522 
523 	if (actx->entropy != NULL) {
524 		/*
525 		 * We accept any quality of random data to avoid blocking.
526 		 */
527 		result = isc_entropy_getdata(actx->entropy, rnd.rnd,
528 					     sizeof(rnd), NULL, 0);
529 		RUNTIME_CHECK(result == ISC_R_SUCCESS);
530 	} else {
531 		for (i = 0; i < 32; i++)
532 			isc_random_get(&rnd.rnd32[i]);
533 	}
534 	dispatch_arc4addrandom(actx, rnd.rnd, sizeof(rnd.rnd));
535 
536 	/*
537 	 * Discard early keystream, as per recommendations in:
538 	 * http://www.wisdom.weizmann.ac.il/~itsik/RC4/Papers/Rc4_ksa.ps
539 	 */
540 	for (i = 0; i < 256; i++)
541 		(void)dispatch_arc4get8(actx);
542 
543 	/*
544 	 * Derived from OpenBSD's implementation.  The rationale is not clear,
545 	 * but should be conservative enough in safety, and reasonably large
546 	 * for efficiency.
547 	 */
548 	actx->count = 1600000;
549 }
550 
551 static isc_uint16_t
dispatch_random(arc4ctx_t * actx)552 dispatch_random(arc4ctx_t *actx) {
553 	isc_uint16_t result;
554 
555 	if (actx->lock != NULL)
556 		LOCK(actx->lock);
557 
558 	actx->count -= sizeof(isc_uint16_t);
559 	if (actx->count <= 0)
560 		dispatch_arc4stir(actx);
561 	result = dispatch_arc4get16(actx);
562 
563 	if (actx->lock != NULL)
564 		UNLOCK(actx->lock);
565 
566 	return (result);
567 }
568 
569 static isc_uint16_t
dispatch_uniformrandom(arc4ctx_t * actx,isc_uint16_t upper_bound)570 dispatch_uniformrandom(arc4ctx_t *actx, isc_uint16_t upper_bound) {
571 	isc_uint16_t min, r;
572 
573 	if (upper_bound < 2)
574 		return (0);
575 
576 	/*
577 	 * Ensure the range of random numbers [min, 0xffff] be a multiple of
578 	 * upper_bound and contain at least a half of the 16 bit range.
579 	 */
580 
581 	if (upper_bound > 0x8000)
582 		min = 1 + ~upper_bound; /* 0x8000 - upper_bound */
583 	else
584 		min = (isc_uint16_t)(0x10000 % (isc_uint32_t)upper_bound);
585 
586 	/*
587 	 * This could theoretically loop forever but each retry has
588 	 * p > 0.5 (worst case, usually far better) of selecting a
589 	 * number inside the range we need, so it should rarely need
590 	 * to re-roll.
591 	 */
592 	for (;;) {
593 		r = dispatch_random(actx);
594 		if (r >= min)
595 			break;
596 	}
597 
598 	return (r % upper_bound);
599 }
600 
601 /*
602  * Return a hash of the destination and message id.
603  */
604 static isc_uint32_t
dns_hash(dns_qid_t * qid,isc_sockaddr_t * dest,dns_messageid_t id,in_port_t port)605 dns_hash(dns_qid_t *qid, isc_sockaddr_t *dest, dns_messageid_t id,
606 	 in_port_t port)
607 {
608 	unsigned int ret;
609 
610 	ret = isc_sockaddr_hash(dest, ISC_TRUE);
611 	ret ^= (id << 16) | port;
612 	ret %= qid->qid_nbuckets;
613 
614 	INSIST(ret < qid->qid_nbuckets);
615 
616 	return (ret);
617 }
618 
619 /*
620  * Find the first entry in 'qid'.  Returns NULL if there are no entries.
621  */
622 static dns_dispentry_t *
linear_first(dns_qid_t * qid)623 linear_first(dns_qid_t *qid) {
624 	dns_dispentry_t *ret;
625 	unsigned int bucket;
626 
627 	bucket = 0;
628 
629 	while (bucket < qid->qid_nbuckets) {
630 		ret = ISC_LIST_HEAD(qid->qid_table[bucket]);
631 		if (ret != NULL)
632 			return (ret);
633 		bucket++;
634 	}
635 
636 	return (NULL);
637 }
638 
639 /*
640  * Find the next entry after 'resp' in 'qid'.  Return NULL if there are
641  * no more entries.
642  */
643 static dns_dispentry_t *
linear_next(dns_qid_t * qid,dns_dispentry_t * resp)644 linear_next(dns_qid_t *qid, dns_dispentry_t *resp) {
645 	dns_dispentry_t *ret;
646 	unsigned int bucket;
647 
648 	ret = ISC_LIST_NEXT(resp, link);
649 	if (ret != NULL)
650 		return (ret);
651 
652 	bucket = resp->bucket;
653 	bucket++;
654 	while (bucket < qid->qid_nbuckets) {
655 		ret = ISC_LIST_HEAD(qid->qid_table[bucket]);
656 		if (ret != NULL)
657 			return (ret);
658 		bucket++;
659 	}
660 
661 	return (NULL);
662 }
663 
664 /*
665  * The dispatch must be locked.
666  */
667 static isc_boolean_t
destroy_disp_ok(dns_dispatch_t * disp)668 destroy_disp_ok(dns_dispatch_t *disp)
669 {
670 	if (disp->refcount != 0)
671 		return (ISC_FALSE);
672 
673 	if (disp->recv_pending != 0)
674 		return (ISC_FALSE);
675 
676 	if (!ISC_LIST_EMPTY(disp->activesockets))
677 		return (ISC_FALSE);
678 
679 	if (disp->shutting_down == 0)
680 		return (ISC_FALSE);
681 
682 	return (ISC_TRUE);
683 }
684 
685 /*
686  * Called when refcount reaches 0 (and safe to destroy).
687  *
688  * The dispatcher must be locked.
689  * The manager must not be locked.
690  */
691 static void
destroy_disp(isc_task_t * task,isc_event_t * event)692 destroy_disp(isc_task_t *task, isc_event_t *event) {
693 	dns_dispatch_t *disp;
694 	dns_dispatchmgr_t *mgr;
695 	isc_boolean_t killmgr;
696 	dispsocket_t *dispsocket;
697 	int i;
698 
699 	INSIST(event->ev_type == DNS_EVENT_DISPATCHCONTROL);
700 
701 	UNUSED(task);
702 
703 	disp = event->ev_arg;
704 	mgr = disp->mgr;
705 
706 	LOCK(&mgr->lock);
707 	ISC_LIST_UNLINK(mgr->list, disp, link);
708 
709 	dispatch_log(disp, LVL(90),
710 		     "shutting down; detaching from sock %p, task %p",
711 		     disp->socket, disp->task[0]); /* XXXX */
712 
713 	if (disp->sepool != NULL) {
714 		isc_mempool_destroy(&disp->sepool);
715 		(void)isc_mutex_destroy(&disp->sepool_lock);
716 	}
717 
718 	if (disp->socket != NULL)
719 		isc_socket_detach(&disp->socket);
720 	while ((dispsocket = ISC_LIST_HEAD(disp->inactivesockets)) != NULL) {
721 		ISC_LIST_UNLINK(disp->inactivesockets, dispsocket, link);
722 		destroy_dispsocket(disp, &dispsocket);
723 	}
724 	for (i = 0; i < disp->ntasks; i++)
725 		isc_task_detach(&disp->task[i]);
726 	isc_event_free(&event);
727 
728 	dispatch_free(&disp);
729 
730 	killmgr = destroy_mgr_ok(mgr);
731 	UNLOCK(&mgr->lock);
732 	if (killmgr)
733 		destroy_mgr(&mgr);
734 }
735 
736 /*%
737  * Manipulate port table per dispatch: find an entry for a given port number,
738  * create a new entry, and decrement a given entry with possible clean-up.
739  */
740 static dispportentry_t *
port_search(dns_dispatch_t * disp,in_port_t port)741 port_search(dns_dispatch_t *disp, in_port_t port) {
742 	dispportentry_t *portentry;
743 
744 	REQUIRE(disp->port_table != NULL);
745 
746 	portentry = ISC_LIST_HEAD(disp->port_table[port %
747 						   DNS_DISPATCH_PORTTABLESIZE]);
748 	while (portentry != NULL) {
749 		if (portentry->port == port)
750 			return (portentry);
751 		portentry = ISC_LIST_NEXT(portentry, link);
752 	}
753 
754 	return (NULL);
755 }
756 
757 static dispportentry_t *
new_portentry(dns_dispatch_t * disp,in_port_t port)758 new_portentry(dns_dispatch_t *disp, in_port_t port) {
759 	dispportentry_t *portentry;
760 	dns_qid_t *qid;
761 
762 	REQUIRE(disp->port_table != NULL);
763 
764 	portentry = isc_mempool_get(disp->portpool);
765 	if (portentry == NULL)
766 		return (portentry);
767 
768 	portentry->port = port;
769 	portentry->refs = 1;
770 	ISC_LINK_INIT(portentry, link);
771 	qid = DNS_QID(disp);
772 	LOCK(&qid->lock);
773 	ISC_LIST_APPEND(disp->port_table[port % DNS_DISPATCH_PORTTABLESIZE],
774 			portentry, link);
775 	UNLOCK(&qid->lock);
776 
777 	return (portentry);
778 }
779 
780 /*%
781  * The caller must not hold the qid->lock.
782  */
783 static void
deref_portentry(dns_dispatch_t * disp,dispportentry_t ** portentryp)784 deref_portentry(dns_dispatch_t *disp, dispportentry_t **portentryp) {
785 	dispportentry_t *portentry = *portentryp;
786 	dns_qid_t *qid;
787 
788 	REQUIRE(disp->port_table != NULL);
789 	REQUIRE(portentry != NULL && portentry->refs > 0);
790 
791 	qid = DNS_QID(disp);
792 	LOCK(&qid->lock);
793 	portentry->refs--;
794 
795 	if (portentry->refs == 0) {
796 		ISC_LIST_UNLINK(disp->port_table[portentry->port %
797 						 DNS_DISPATCH_PORTTABLESIZE],
798 				portentry, link);
799 		isc_mempool_put(disp->portpool, portentry);
800 	}
801 
802 	/*
803 	 * Set '*portentryp' to NULL inside the lock so that
804 	 * dispsock->portentry does not change in socket_search.
805 	 */
806 	*portentryp = NULL;
807 
808 	UNLOCK(&qid->lock);
809 }
810 
811 /*%
812  * Find a dispsocket for socket address 'dest', and port number 'port'.
813  * Return NULL if no such entry exists.  Requires qid->lock to be held.
814  */
815 static dispsocket_t *
socket_search(dns_qid_t * qid,isc_sockaddr_t * dest,in_port_t port,unsigned int bucket)816 socket_search(dns_qid_t *qid, isc_sockaddr_t *dest, in_port_t port,
817 	      unsigned int bucket)
818 {
819 	dispsocket_t *dispsock;
820 
821 	REQUIRE(VALID_QID(qid));
822 	REQUIRE(bucket < qid->qid_nbuckets);
823 
824 	dispsock = ISC_LIST_HEAD(qid->sock_table[bucket]);
825 
826 	while (dispsock != NULL) {
827 		if (dispsock->portentry != NULL &&
828 		    dispsock->portentry->port == port &&
829 		    isc_sockaddr_equal(dest, &dispsock->host))
830 			return (dispsock);
831 		dispsock = ISC_LIST_NEXT(dispsock, blink);
832 	}
833 
834 	return (NULL);
835 }
836 
837 /*%
838  * Make a new socket for a single dispatch with a random port number.
839  * The caller must hold the disp->lock
840  */
841 static isc_result_t
get_dispsocket(dns_dispatch_t * disp,isc_sockaddr_t * dest,isc_socketmgr_t * sockmgr,dispsocket_t ** dispsockp,in_port_t * portp)842 get_dispsocket(dns_dispatch_t *disp, isc_sockaddr_t *dest,
843 	       isc_socketmgr_t *sockmgr, dispsocket_t **dispsockp,
844 	       in_port_t *portp)
845 {
846 	int i;
847 	isc_uint32_t r;
848 	dns_dispatchmgr_t *mgr = disp->mgr;
849 	isc_socket_t *sock = NULL;
850 	isc_result_t result = ISC_R_FAILURE;
851 	in_port_t port;
852 	isc_sockaddr_t localaddr;
853 	unsigned int bucket = 0;
854 	dispsocket_t *dispsock;
855 	unsigned int nports;
856 	in_port_t *ports;
857 	unsigned int bindoptions;
858 	dispportentry_t *portentry = NULL;
859 	dns_qid_t *qid;
860 
861 	if (isc_sockaddr_pf(&disp->local) == AF_INET) {
862 		nports = disp->mgr->nv4ports;
863 		ports = disp->mgr->v4ports;
864 	} else {
865 		nports = disp->mgr->nv6ports;
866 		ports = disp->mgr->v6ports;
867 	}
868 	if (nports == 0)
869 		return (ISC_R_ADDRNOTAVAIL);
870 
871 	dispsock = ISC_LIST_HEAD(disp->inactivesockets);
872 	if (dispsock != NULL) {
873 		ISC_LIST_UNLINK(disp->inactivesockets, dispsock, link);
874 		sock = dispsock->socket;
875 		dispsock->socket = NULL;
876 	} else {
877 		dispsock = isc_mempool_get(mgr->spool);
878 		if (dispsock == NULL)
879 			return (ISC_R_NOMEMORY);
880 
881 		disp->nsockets++;
882 		dispsock->socket = NULL;
883 		dispsock->disp = disp;
884 		dispsock->resp = NULL;
885 		dispsock->portentry = NULL;
886 		isc_random_get(&r);
887 		dispsock->task = NULL;
888 		isc_task_attach(disp->task[r % disp->ntasks], &dispsock->task);
889 		ISC_LINK_INIT(dispsock, link);
890 		ISC_LINK_INIT(dispsock, blink);
891 		dispsock->magic = DISPSOCK_MAGIC;
892 	}
893 
894 	/*
895 	 * Pick up a random UDP port and open a new socket with it.  Avoid
896 	 * choosing ports that share the same destination because it will be
897 	 * very likely to fail in bind(2) or connect(2).
898 	 */
899 	localaddr = disp->local;
900 	qid = DNS_QID(disp);
901 
902 	for (i = 0; i < 64; i++) {
903 		port = ports[dispatch_uniformrandom(DISP_ARC4CTX(disp),
904 							nports)];
905 		isc_sockaddr_setport(&localaddr, port);
906 
907 		LOCK(&qid->lock);
908 		bucket = dns_hash(qid, dest, 0, port);
909 		if (socket_search(qid, dest, port, bucket) != NULL) {
910 			UNLOCK(&qid->lock);
911 			continue;
912 		}
913 		UNLOCK(&qid->lock);
914 		bindoptions = 0;
915 		portentry = port_search(disp, port);
916 
917 		if (portentry != NULL)
918 			bindoptions |= ISC_SOCKET_REUSEADDRESS;
919 		result = open_socket(sockmgr, &localaddr, bindoptions, &sock,
920 				     NULL);
921 		if (result == ISC_R_SUCCESS) {
922 			if (portentry == NULL) {
923 				portentry = new_portentry(disp, port);
924 				if (portentry == NULL) {
925 					result = ISC_R_NOMEMORY;
926 					break;
927 				}
928 			} else {
929 				LOCK(&qid->lock);
930 				portentry->refs++;
931 				UNLOCK(&qid->lock);
932 			}
933 			break;
934 		} else if (result == ISC_R_NOPERM) {
935 			char buf[ISC_SOCKADDR_FORMATSIZE];
936 			isc_sockaddr_format(&localaddr, buf, sizeof(buf));
937 			dispatch_log(disp, ISC_LOG_WARNING,
938 				     "open_socket(%s) -> %s: continuing",
939 				     buf, isc_result_totext(result));
940 		} else if (result != ISC_R_ADDRINUSE)
941 			break;
942 	}
943 
944 	if (result == ISC_R_SUCCESS) {
945 		dispsock->socket = sock;
946 		dispsock->host = *dest;
947 		dispsock->portentry = portentry;
948 		dispsock->bucket = bucket;
949 		LOCK(&qid->lock);
950 		ISC_LIST_APPEND(qid->sock_table[bucket], dispsock, blink);
951 		UNLOCK(&qid->lock);
952 		*dispsockp = dispsock;
953 		*portp = port;
954 	} else {
955 		/*
956 		 * We could keep it in the inactive list, but since this should
957 		 * be an exceptional case and might be resource shortage, we'd
958 		 * rather destroy it.
959 		 */
960 		if (sock != NULL)
961 			isc_socket_detach(&sock);
962 		destroy_dispsocket(disp, &dispsock);
963 	}
964 
965 	return (result);
966 }
967 
968 /*%
969  * Destroy a dedicated dispatch socket.
970  */
971 static void
destroy_dispsocket(dns_dispatch_t * disp,dispsocket_t ** dispsockp)972 destroy_dispsocket(dns_dispatch_t *disp, dispsocket_t **dispsockp) {
973 	dispsocket_t *dispsock;
974 	dns_qid_t *qid;
975 
976 	/*
977 	 * The dispatch must be locked.
978 	 */
979 
980 	REQUIRE(dispsockp != NULL && *dispsockp != NULL);
981 	dispsock = *dispsockp;
982 	REQUIRE(!ISC_LINK_LINKED(dispsock, link));
983 
984 	disp->nsockets--;
985 	dispsock->magic = 0;
986 	if (dispsock->portentry != NULL)
987 		deref_portentry(disp, &dispsock->portentry);
988 	if (dispsock->socket != NULL)
989 		isc_socket_detach(&dispsock->socket);
990 	if (ISC_LINK_LINKED(dispsock, blink)) {
991 		qid = DNS_QID(disp);
992 		LOCK(&qid->lock);
993 		ISC_LIST_UNLINK(qid->sock_table[dispsock->bucket], dispsock,
994 				blink);
995 		UNLOCK(&qid->lock);
996 	}
997 	if (dispsock->task != NULL)
998 		isc_task_detach(&dispsock->task);
999 	isc_mempool_put(disp->mgr->spool, dispsock);
1000 
1001 	*dispsockp = NULL;
1002 }
1003 
1004 /*%
1005  * Deactivate a dedicated dispatch socket.  Move it to the inactive list for
1006  * future reuse unless the total number of sockets are exceeding the maximum.
1007  */
1008 static void
deactivate_dispsocket(dns_dispatch_t * disp,dispsocket_t * dispsock)1009 deactivate_dispsocket(dns_dispatch_t *disp, dispsocket_t *dispsock) {
1010 	isc_result_t result;
1011 	dns_qid_t *qid;
1012 
1013 	/*
1014 	 * The dispatch must be locked.
1015 	 */
1016 	ISC_LIST_UNLINK(disp->activesockets, dispsock, link);
1017 	if (dispsock->resp != NULL) {
1018 		INSIST(dispsock->resp->dispsocket == dispsock);
1019 		dispsock->resp->dispsocket = NULL;
1020 	}
1021 
1022 	INSIST(dispsock->portentry != NULL);
1023 	deref_portentry(disp, &dispsock->portentry);
1024 
1025 	if (disp->nsockets > DNS_DISPATCH_POOLSOCKS)
1026 		destroy_dispsocket(disp, &dispsock);
1027 	else {
1028 		result = isc_socket_close(dispsock->socket);
1029 
1030 		qid = DNS_QID(disp);
1031 		LOCK(&qid->lock);
1032 		ISC_LIST_UNLINK(qid->sock_table[dispsock->bucket], dispsock,
1033 				blink);
1034 		UNLOCK(&qid->lock);
1035 
1036 		if (result == ISC_R_SUCCESS)
1037 			ISC_LIST_APPEND(disp->inactivesockets, dispsock, link);
1038 		else {
1039 			/*
1040 			 * If the underlying system does not allow this
1041 			 * optimization, destroy this temporary structure (and
1042 			 * create a new one for a new transaction).
1043 			 */
1044 			INSIST(result == ISC_R_NOTIMPLEMENTED);
1045 			destroy_dispsocket(disp, &dispsock);
1046 		}
1047 	}
1048 }
1049 
1050 /*
1051  * Find an entry for query ID 'id', socket address 'dest', and port number
1052  * 'port'.
1053  * Return NULL if no such entry exists.
1054  */
1055 static dns_dispentry_t *
entry_search(dns_qid_t * qid,isc_sockaddr_t * dest,dns_messageid_t id,in_port_t port,unsigned int bucket)1056 entry_search(dns_qid_t *qid, isc_sockaddr_t *dest, dns_messageid_t id,
1057 	     in_port_t port, unsigned int bucket)
1058 {
1059 	dns_dispentry_t *res;
1060 
1061 	REQUIRE(VALID_QID(qid));
1062 	REQUIRE(bucket < qid->qid_nbuckets);
1063 
1064 	res = ISC_LIST_HEAD(qid->qid_table[bucket]);
1065 
1066 	while (res != NULL) {
1067 		if (res->id == id && isc_sockaddr_equal(dest, &res->host) &&
1068 		    res->port == port) {
1069 			return (res);
1070 		}
1071 		res = ISC_LIST_NEXT(res, link);
1072 	}
1073 
1074 	return (NULL);
1075 }
1076 
1077 static void
free_buffer(dns_dispatch_t * disp,void * buf,unsigned int len)1078 free_buffer(dns_dispatch_t *disp, void *buf, unsigned int len) {
1079 	isc_mempool_t *bpool;
1080 	INSIST(buf != NULL && len != 0);
1081 
1082 
1083 	switch (disp->socktype) {
1084 	case isc_sockettype_tcp:
1085 		INSIST(disp->tcpbuffers > 0);
1086 		disp->tcpbuffers--;
1087 		isc_mem_put(disp->mgr->mctx, buf, len);
1088 		break;
1089 	case isc_sockettype_udp:
1090 		LOCK(&disp->mgr->buffer_lock);
1091 		INSIST(disp->mgr->buffers > 0);
1092 		INSIST(len == disp->mgr->buffersize);
1093 		disp->mgr->buffers--;
1094 		bpool = disp->mgr->bpool;
1095 		UNLOCK(&disp->mgr->buffer_lock);
1096 		isc_mempool_put(bpool, buf);
1097 		break;
1098 	default:
1099 		INSIST(0);
1100 		break;
1101 	}
1102 }
1103 
1104 static void *
allocate_udp_buffer(dns_dispatch_t * disp)1105 allocate_udp_buffer(dns_dispatch_t *disp) {
1106 	isc_mempool_t *bpool;
1107 	void *temp;
1108 
1109 	LOCK(&disp->mgr->buffer_lock);
1110 	bpool = disp->mgr->bpool;
1111 	disp->mgr->buffers++;
1112 	UNLOCK(&disp->mgr->buffer_lock);
1113 
1114 	temp = isc_mempool_get(bpool);
1115 
1116 	if (temp == NULL) {
1117 		LOCK(&disp->mgr->buffer_lock);
1118 		disp->mgr->buffers--;
1119 		UNLOCK(&disp->mgr->buffer_lock);
1120 	}
1121 
1122 	return (temp);
1123 }
1124 
1125 static inline void
free_sevent(isc_event_t * ev)1126 free_sevent(isc_event_t *ev) {
1127 	isc_mempool_t *pool = ev->ev_destroy_arg;
1128 	isc_socketevent_t *sev = (isc_socketevent_t *) ev;
1129 	isc_mempool_put(pool, sev);
1130 }
1131 
1132 static inline isc_socketevent_t *
allocate_sevent(dns_dispatch_t * disp,isc_socket_t * socket,isc_eventtype_t type,isc_taskaction_t action,const void * arg)1133 allocate_sevent(dns_dispatch_t *disp, isc_socket_t *socket,
1134 		isc_eventtype_t type, isc_taskaction_t action, const void *arg)
1135 {
1136 	isc_socketevent_t *ev;
1137 	void *deconst_arg;
1138 
1139 	ev = isc_mempool_get(disp->sepool);
1140 	if (ev == NULL)
1141 		return (NULL);
1142 	DE_CONST(arg, deconst_arg);
1143 	ISC_EVENT_INIT(ev, sizeof(*ev), 0, NULL, type,
1144 		       action, deconst_arg, socket,
1145 		       free_sevent, disp->sepool);
1146 	ev->result = ISC_R_UNSET;
1147 	ISC_LINK_INIT(ev, ev_link);
1148 	ISC_LIST_INIT(ev->bufferlist);
1149 	ev->region.base = NULL;
1150 	ev->n = 0;
1151 	ev->offset = 0;
1152 	ev->attributes = 0;
1153 
1154 	return (ev);
1155 }
1156 
1157 
1158 static inline void
free_devent(dns_dispatch_t * disp,dns_dispatchevent_t * ev)1159 free_devent(dns_dispatch_t *disp, dns_dispatchevent_t *ev) {
1160 	if (disp->failsafe_ev == ev) {
1161 		INSIST(disp->shutdown_out == 1);
1162 		disp->shutdown_out = 0;
1163 
1164 		return;
1165 	}
1166 
1167 	isc_mempool_put(disp->mgr->depool, ev);
1168 }
1169 
1170 static inline dns_dispatchevent_t *
allocate_devent(dns_dispatch_t * disp)1171 allocate_devent(dns_dispatch_t *disp) {
1172 	dns_dispatchevent_t *ev;
1173 
1174 	ev = isc_mempool_get(disp->mgr->depool);
1175 	if (ev == NULL)
1176 		return (NULL);
1177 	ISC_EVENT_INIT(ev, sizeof(*ev), 0, NULL, 0,
1178 		       NULL, NULL, NULL, NULL, NULL);
1179 
1180 	return (ev);
1181 }
1182 
1183 static void
udp_exrecv(isc_task_t * task,isc_event_t * ev)1184 udp_exrecv(isc_task_t *task, isc_event_t *ev) {
1185 	dispsocket_t *dispsock = ev->ev_arg;
1186 
1187 	UNUSED(task);
1188 
1189 	REQUIRE(VALID_DISPSOCK(dispsock));
1190 	udp_recv(ev, dispsock->disp, dispsock);
1191 }
1192 
1193 static void
udp_shrecv(isc_task_t * task,isc_event_t * ev)1194 udp_shrecv(isc_task_t *task, isc_event_t *ev) {
1195 	dns_dispatch_t *disp = ev->ev_arg;
1196 
1197 	UNUSED(task);
1198 
1199 	REQUIRE(VALID_DISPATCH(disp));
1200 	udp_recv(ev, disp, NULL);
1201 }
1202 
1203 /*
1204  * General flow:
1205  *
1206  * If I/O result == CANCELED or error, free the buffer.
1207  *
1208  * If query, free the buffer, restart.
1209  *
1210  * If response:
1211  *	Allocate event, fill in details.
1212  *		If cannot allocate, free buffer, restart.
1213  *	find target.  If not found, free buffer, restart.
1214  *	if event queue is not empty, queue.  else, send.
1215  *	restart.
1216  */
1217 static void
udp_recv(isc_event_t * ev_in,dns_dispatch_t * disp,dispsocket_t * dispsock)1218 udp_recv(isc_event_t *ev_in, dns_dispatch_t *disp, dispsocket_t *dispsock) {
1219 	isc_socketevent_t *ev = (isc_socketevent_t *)ev_in;
1220 	dns_messageid_t id;
1221 	isc_result_t dres;
1222 	isc_buffer_t source;
1223 	unsigned int flags;
1224 	dns_dispentry_t *resp = NULL;
1225 	dns_dispatchevent_t *rev;
1226 	unsigned int bucket;
1227 	isc_boolean_t killit;
1228 	isc_boolean_t queue_response;
1229 	dns_dispatchmgr_t *mgr;
1230 	dns_qid_t *qid;
1231 	isc_netaddr_t netaddr;
1232 	int match;
1233 	int result;
1234 	isc_boolean_t qidlocked = ISC_FALSE;
1235 
1236 	LOCK(&disp->lock);
1237 
1238 	mgr = disp->mgr;
1239 	qid = mgr->qid;
1240 
1241 	dispatch_log(disp, LVL(90),
1242 		     "got packet: requests %d, buffers %d, recvs %d",
1243 		     disp->requests, disp->mgr->buffers, disp->recv_pending);
1244 
1245 	if (dispsock == NULL && ev->ev_type == ISC_SOCKEVENT_RECVDONE) {
1246 		/*
1247 		 * Unless the receive event was imported from a listening
1248 		 * interface, in which case the event type is
1249 		 * DNS_EVENT_IMPORTRECVDONE, receive operation must be pending.
1250 		 */
1251 		INSIST(disp->recv_pending != 0);
1252 		disp->recv_pending = 0;
1253 	}
1254 
1255 	if (dispsock != NULL &&
1256 	    (ev->result == ISC_R_CANCELED || dispsock->resp == NULL)) {
1257 		/*
1258 		 * dispsock->resp can be NULL if this transaction was canceled
1259 		 * just after receiving a response.  Since this socket is
1260 		 * exclusively used and there should be at most one receive
1261 		 * event the canceled event should have been no effect.  So
1262 		 * we can (and should) deactivate the socket right now.
1263 		 */
1264 		deactivate_dispsocket(disp, dispsock);
1265 		dispsock = NULL;
1266 	}
1267 
1268 	if (disp->shutting_down) {
1269 		/*
1270 		 * This dispatcher is shutting down.
1271 		 */
1272 		free_buffer(disp, ev->region.base, ev->region.length);
1273 
1274 		isc_event_free(&ev_in);
1275 		ev = NULL;
1276 
1277 		killit = destroy_disp_ok(disp);
1278 		UNLOCK(&disp->lock);
1279 		if (killit)
1280 			isc_task_send(disp->task[0], &disp->ctlevent);
1281 
1282 		return;
1283 	}
1284 
1285 	if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0) {
1286 		if (dispsock != NULL) {
1287 			resp = dispsock->resp;
1288 			id = resp->id;
1289 			if (ev->result != ISC_R_SUCCESS) {
1290 				/*
1291 				 * This is most likely a network error on a
1292 				 * connected socket.  It makes no sense to
1293 				 * check the address or parse the packet, but it
1294 				 * will help to return the error to the caller.
1295 				 */
1296 				goto sendresponse;
1297 			}
1298 		} else {
1299 			free_buffer(disp, ev->region.base, ev->region.length);
1300 
1301 			isc_event_free(&ev_in);
1302 			UNLOCK(&disp->lock);
1303 			return;
1304 		}
1305 	} else if (ev->result != ISC_R_SUCCESS) {
1306 		free_buffer(disp, ev->region.base, ev->region.length);
1307 
1308 		if (ev->result != ISC_R_CANCELED)
1309 			dispatch_log(disp, ISC_LOG_ERROR,
1310 				     "odd socket result in udp_recv(): %s",
1311 				     isc_result_totext(ev->result));
1312 
1313 		isc_event_free(&ev_in);
1314 		UNLOCK(&disp->lock);
1315 		return;
1316 	}
1317 
1318 	/*
1319 	 * If this is from a blackholed address, drop it.
1320 	 */
1321 	isc_netaddr_fromsockaddr(&netaddr, &ev->address);
1322 	if (disp->mgr->blackhole != NULL &&
1323 	    dns_acl_match(&netaddr, NULL, disp->mgr->blackhole,
1324 			  NULL, &match, NULL) == ISC_R_SUCCESS &&
1325 	    match > 0)
1326 	{
1327 		if (isc_log_wouldlog(dns_lctx, LVL(10))) {
1328 			char netaddrstr[ISC_NETADDR_FORMATSIZE];
1329 			isc_netaddr_format(&netaddr, netaddrstr,
1330 					   sizeof(netaddrstr));
1331 			dispatch_log(disp, LVL(10),
1332 				     "blackholed packet from %s",
1333 				     netaddrstr);
1334 		}
1335 		free_buffer(disp, ev->region.base, ev->region.length);
1336 		goto restart;
1337 	}
1338 
1339 	/*
1340 	 * Peek into the buffer to see what we can see.
1341 	 */
1342 	isc_buffer_init(&source, ev->region.base, ev->region.length);
1343 	isc_buffer_add(&source, ev->n);
1344 	dres = dns_message_peekheader(&source, &id, &flags);
1345 	if (dres != ISC_R_SUCCESS) {
1346 		free_buffer(disp, ev->region.base, ev->region.length);
1347 		dispatch_log(disp, LVL(10), "got garbage packet");
1348 		goto restart;
1349 	}
1350 
1351 	dispatch_log(disp, LVL(92),
1352 		     "got valid DNS message header, /QR %c, id %u",
1353 		     ((flags & DNS_MESSAGEFLAG_QR) ? '1' : '0'), id);
1354 
1355 	/*
1356 	 * Look at flags.  If query, drop it. If response,
1357 	 * look to see where it goes.
1358 	 */
1359 	if ((flags & DNS_MESSAGEFLAG_QR) == 0) {
1360 		/* query */
1361 		free_buffer(disp, ev->region.base, ev->region.length);
1362 		goto restart;
1363 	}
1364 
1365 	/*
1366 	 * Search for the corresponding response.  If we are using an exclusive
1367 	 * socket, we've already identified it and we can skip the search; but
1368 	 * the ID and the address must match the expected ones.
1369 	 */
1370 	if (resp == NULL) {
1371 		bucket = dns_hash(qid, &ev->address, id, disp->localport);
1372 		LOCK(&qid->lock);
1373 		qidlocked = ISC_TRUE;
1374 		resp = entry_search(qid, &ev->address, id, disp->localport,
1375 				    bucket);
1376 		dispatch_log(disp, LVL(90),
1377 			     "search for response in bucket %d: %s",
1378 			     bucket, (resp == NULL ? "not found" : "found"));
1379 
1380 		if (resp == NULL) {
1381 			inc_stats(mgr, dns_resstatscounter_mismatch);
1382 			free_buffer(disp, ev->region.base, ev->region.length);
1383 			goto unlock;
1384 		}
1385 	} else if (resp->id != id || !isc_sockaddr_equal(&ev->address,
1386 							 &resp->host)) {
1387 		dispatch_log(disp, LVL(90),
1388 			     "response to an exclusive socket doesn't match");
1389 		inc_stats(mgr, dns_resstatscounter_mismatch);
1390 		free_buffer(disp, ev->region.base, ev->region.length);
1391 		goto unlock;
1392 	}
1393 
1394 	/*
1395 	 * Now that we have the original dispatch the query was sent
1396 	 * from check that the address and port the response was
1397 	 * sent to make sense.
1398 	 */
1399 	if (disp != resp->disp) {
1400 		isc_sockaddr_t a1;
1401 		isc_sockaddr_t a2;
1402 
1403 		/*
1404 		 * Check that the socket types and ports match.
1405 		 */
1406 		if (disp->socktype != resp->disp->socktype ||
1407 		    isc_sockaddr_getport(&disp->local) !=
1408 		    isc_sockaddr_getport(&resp->disp->local)) {
1409 			free_buffer(disp, ev->region.base, ev->region.length);
1410 			goto unlock;
1411 		}
1412 
1413 		/*
1414 		 * If each dispatch is bound to a different address
1415 		 * then fail.
1416 		 *
1417 		 * Note under Linux a packet can be sent out via IPv4 socket
1418 		 * and the response be received via a IPv6 socket.
1419 		 *
1420 		 * Requests sent out via IPv6 should always come back in
1421 		 * via IPv6.
1422 		 */
1423 		if (isc_sockaddr_pf(&resp->disp->local) == PF_INET6 &&
1424 		    isc_sockaddr_pf(&disp->local) != PF_INET6) {
1425 			free_buffer(disp, ev->region.base, ev->region.length);
1426 			goto unlock;
1427 		}
1428 		isc_sockaddr_anyofpf(&a1, isc_sockaddr_pf(&resp->disp->local));
1429 		isc_sockaddr_anyofpf(&a2, isc_sockaddr_pf(&disp->local));
1430 		if (!isc_sockaddr_eqaddr(&disp->local, &resp->disp->local) &&
1431 		    !isc_sockaddr_eqaddr(&a1, &resp->disp->local) &&
1432 		    !isc_sockaddr_eqaddr(&a2, &disp->local)) {
1433 			free_buffer(disp, ev->region.base, ev->region.length);
1434 			goto unlock;
1435 		}
1436 	}
1437 
1438   sendresponse:
1439 	queue_response = resp->item_out;
1440 	rev = allocate_devent(resp->disp);
1441 	if (rev == NULL) {
1442 		free_buffer(disp, ev->region.base, ev->region.length);
1443 		goto unlock;
1444 	}
1445 
1446 	/*
1447 	 * At this point, rev contains the event we want to fill in, and
1448 	 * resp contains the information on the place to send it to.
1449 	 * Send the event off.
1450 	 */
1451 	isc_buffer_init(&rev->buffer, ev->region.base, ev->region.length);
1452 	isc_buffer_add(&rev->buffer, ev->n);
1453 	rev->result = ev->result;
1454 	rev->id = id;
1455 	rev->addr = ev->address;
1456 	rev->pktinfo = ev->pktinfo;
1457 	rev->attributes = ev->attributes;
1458 	if (queue_response) {
1459 		ISC_LIST_APPEND(resp->items, rev, ev_link);
1460 	} else {
1461 		ISC_EVENT_INIT(rev, sizeof(*rev), 0, NULL,
1462 			       DNS_EVENT_DISPATCH,
1463 			       resp->action, resp->arg, resp, NULL, NULL);
1464 		request_log(disp, resp, LVL(90),
1465 			    "[a] Sent event %p buffer %p len %d to task %p",
1466 			    rev, rev->buffer.base, rev->buffer.length,
1467 			    resp->task);
1468 		resp->item_out = ISC_TRUE;
1469 		isc_task_send(resp->task, ISC_EVENT_PTR(&rev));
1470 	}
1471  unlock:
1472 	if (qidlocked)
1473 		UNLOCK(&qid->lock);
1474 
1475 	/*
1476 	 * Restart recv() to get the next packet.
1477 	 */
1478  restart:
1479 	result = startrecv(disp, dispsock);
1480 	if (result != ISC_R_SUCCESS && dispsock != NULL) {
1481 		/*
1482 		 * XXX: wired. There seems to be no recovery process other than
1483 		 * deactivate this socket anyway (since we cannot start
1484 		 * receiving, we won't be able to receive a cancel event
1485 		 * from the user).
1486 		 */
1487 		deactivate_dispsocket(disp, dispsock);
1488 	}
1489 	isc_event_free(&ev_in);
1490 	UNLOCK(&disp->lock);
1491 }
1492 
1493 /*
1494  * General flow:
1495  *
1496  * If I/O result == CANCELED, EOF, or error, notify everyone as the
1497  * various queues drain.
1498  *
1499  * If query, restart.
1500  *
1501  * If response:
1502  *	Allocate event, fill in details.
1503  *		If cannot allocate, restart.
1504  *	find target.  If not found, restart.
1505  *	if event queue is not empty, queue.  else, send.
1506  *	restart.
1507  */
1508 static void
tcp_recv(isc_task_t * task,isc_event_t * ev_in)1509 tcp_recv(isc_task_t *task, isc_event_t *ev_in) {
1510 	dns_dispatch_t *disp = ev_in->ev_arg;
1511 	dns_tcpmsg_t *tcpmsg = &disp->tcpmsg;
1512 	dns_messageid_t id;
1513 	isc_result_t dres;
1514 	unsigned int flags;
1515 	dns_dispentry_t *resp;
1516 	dns_dispatchevent_t *rev;
1517 	unsigned int bucket;
1518 	isc_boolean_t killit;
1519 	isc_boolean_t queue_response;
1520 	dns_qid_t *qid;
1521 	int level;
1522 	char buf[ISC_SOCKADDR_FORMATSIZE];
1523 
1524 	UNUSED(task);
1525 
1526 	REQUIRE(VALID_DISPATCH(disp));
1527 
1528 	qid = disp->qid;
1529 
1530 	dispatch_log(disp, LVL(90),
1531 		     "got TCP packet: requests %d, buffers %d, recvs %d",
1532 		     disp->requests, disp->tcpbuffers, disp->recv_pending);
1533 
1534 	LOCK(&disp->lock);
1535 
1536 	INSIST(disp->recv_pending != 0);
1537 	disp->recv_pending = 0;
1538 
1539 	if (disp->refcount == 0) {
1540 		/*
1541 		 * This dispatcher is shutting down.  Force cancelation.
1542 		 */
1543 		tcpmsg->result = ISC_R_CANCELED;
1544 	}
1545 
1546 	if (tcpmsg->result != ISC_R_SUCCESS) {
1547 		switch (tcpmsg->result) {
1548 		case ISC_R_CANCELED:
1549 			break;
1550 
1551 		case ISC_R_EOF:
1552 			dispatch_log(disp, LVL(90), "shutting down on EOF");
1553 			do_cancel(disp);
1554 			break;
1555 
1556 		case ISC_R_CONNECTIONRESET:
1557 			level = ISC_LOG_INFO;
1558 			goto logit;
1559 
1560 		default:
1561 			level = ISC_LOG_ERROR;
1562 		logit:
1563 			isc_sockaddr_format(&tcpmsg->address, buf, sizeof(buf));
1564 			dispatch_log(disp, level, "shutting down due to TCP "
1565 				     "receive error: %s: %s", buf,
1566 				     isc_result_totext(tcpmsg->result));
1567 			do_cancel(disp);
1568 			break;
1569 		}
1570 
1571 		/*
1572 		 * The event is statically allocated in the tcpmsg
1573 		 * structure, and destroy_disp() frees the tcpmsg, so we must
1574 		 * free the event *before* calling destroy_disp().
1575 		 */
1576 		isc_event_free(&ev_in);
1577 
1578 		disp->shutting_down = 1;
1579 		disp->shutdown_why = tcpmsg->result;
1580 
1581 		/*
1582 		 * If the recv() was canceled pass the word on.
1583 		 */
1584 		killit = destroy_disp_ok(disp);
1585 		UNLOCK(&disp->lock);
1586 		if (killit)
1587 			isc_task_send(disp->task[0], &disp->ctlevent);
1588 		return;
1589 	}
1590 
1591 	dispatch_log(disp, LVL(90), "result %d, length == %d, addr = %p",
1592 		     tcpmsg->result,
1593 		     tcpmsg->buffer.length, tcpmsg->buffer.base);
1594 
1595 	/*
1596 	 * Peek into the buffer to see what we can see.
1597 	 */
1598 	dres = dns_message_peekheader(&tcpmsg->buffer, &id, &flags);
1599 	if (dres != ISC_R_SUCCESS) {
1600 		dispatch_log(disp, LVL(10), "got garbage packet");
1601 		goto restart;
1602 	}
1603 
1604 	dispatch_log(disp, LVL(92),
1605 		     "got valid DNS message header, /QR %c, id %u",
1606 		     ((flags & DNS_MESSAGEFLAG_QR) ? '1' : '0'), id);
1607 
1608 	/*
1609 	 * Allocate an event to send to the query or response client, and
1610 	 * allocate a new buffer for our use.
1611 	 */
1612 
1613 	/*
1614 	 * Look at flags.  If query, drop it. If response,
1615 	 * look to see where it goes.
1616 	 */
1617 	if ((flags & DNS_MESSAGEFLAG_QR) == 0) {
1618 		/*
1619 		 * Query.
1620 		 */
1621 		goto restart;
1622 	}
1623 
1624 	/*
1625 	 * Response.
1626 	 */
1627 	bucket = dns_hash(qid, &tcpmsg->address, id, disp->localport);
1628 	LOCK(&qid->lock);
1629 	resp = entry_search(qid, &tcpmsg->address, id, disp->localport, bucket);
1630 	dispatch_log(disp, LVL(90),
1631 		     "search for response in bucket %d: %s",
1632 		     bucket, (resp == NULL ? "not found" : "found"));
1633 
1634 	if (resp == NULL)
1635 		goto unlock;
1636 	queue_response = resp->item_out;
1637 	rev = allocate_devent(disp);
1638 	if (rev == NULL)
1639 		goto unlock;
1640 
1641 	/*
1642 	 * At this point, rev contains the event we want to fill in, and
1643 	 * resp contains the information on the place to send it to.
1644 	 * Send the event off.
1645 	 */
1646 	dns_tcpmsg_keepbuffer(tcpmsg, &rev->buffer);
1647 	disp->tcpbuffers++;
1648 	rev->result = ISC_R_SUCCESS;
1649 	rev->id = id;
1650 	rev->addr = tcpmsg->address;
1651 	if (queue_response) {
1652 		ISC_LIST_APPEND(resp->items, rev, ev_link);
1653 	} else {
1654 		ISC_EVENT_INIT(rev, sizeof(*rev), 0, NULL, DNS_EVENT_DISPATCH,
1655 			       resp->action, resp->arg, resp, NULL, NULL);
1656 		request_log(disp, resp, LVL(90),
1657 			    "[b] Sent event %p buffer %p len %d to task %p",
1658 			    rev, rev->buffer.base, rev->buffer.length,
1659 			    resp->task);
1660 		resp->item_out = ISC_TRUE;
1661 		isc_task_send(resp->task, ISC_EVENT_PTR(&rev));
1662 	}
1663  unlock:
1664 	UNLOCK(&qid->lock);
1665 
1666 	/*
1667 	 * Restart recv() to get the next packet.
1668 	 */
1669  restart:
1670 	(void)startrecv(disp, NULL);
1671 
1672 	isc_event_free(&ev_in);
1673 	UNLOCK(&disp->lock);
1674 }
1675 
1676 /*
1677  * disp must be locked.
1678  */
1679 static isc_result_t
startrecv(dns_dispatch_t * disp,dispsocket_t * dispsock)1680 startrecv(dns_dispatch_t *disp, dispsocket_t *dispsock) {
1681 	isc_result_t res;
1682 	isc_region_t region;
1683 	isc_socket_t *socket;
1684 
1685 	if (disp->shutting_down == 1)
1686 		return (ISC_R_SUCCESS);
1687 
1688 	if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) != 0)
1689 		return (ISC_R_SUCCESS);
1690 
1691 	if (disp->recv_pending != 0 && dispsock == NULL)
1692 		return (ISC_R_SUCCESS);
1693 
1694 	if (disp->mgr->buffers >= disp->mgr->maxbuffers)
1695 		return (ISC_R_NOMEMORY);
1696 
1697 	if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0 &&
1698 	    dispsock == NULL)
1699 		return (ISC_R_SUCCESS);
1700 
1701 	if (dispsock != NULL)
1702 		socket = dispsock->socket;
1703 	else
1704 		socket = disp->socket;
1705 	INSIST(socket != NULL);
1706 
1707 	switch (disp->socktype) {
1708 		/*
1709 		 * UDP reads are always maximal.
1710 		 */
1711 	case isc_sockettype_udp:
1712 		region.length = disp->mgr->buffersize;
1713 		region.base = allocate_udp_buffer(disp);
1714 		if (region.base == NULL)
1715 			return (ISC_R_NOMEMORY);
1716 		if (dispsock != NULL) {
1717 			isc_task_t *dt = dispsock->task;
1718 			isc_socketevent_t *sev =
1719 				allocate_sevent(disp, socket,
1720 						ISC_SOCKEVENT_RECVDONE,
1721 						udp_exrecv, dispsock);
1722 			if (sev == NULL) {
1723 				free_buffer(disp, region.base, region.length);
1724 				return (ISC_R_NOMEMORY);
1725 			}
1726 
1727 			res = isc_socket_recv2(socket, &region, 1, dt, sev, 0);
1728 			if (res != ISC_R_SUCCESS) {
1729 				free_buffer(disp, region.base, region.length);
1730 				return (res);
1731 			}
1732 		} else {
1733 			isc_task_t *dt = disp->task[0];
1734 			isc_socketevent_t *sev =
1735 				allocate_sevent(disp, socket,
1736 						ISC_SOCKEVENT_RECVDONE,
1737 						udp_shrecv, disp);
1738 			if (sev == NULL) {
1739 				free_buffer(disp, region.base, region.length);
1740 				return (ISC_R_NOMEMORY);
1741 			}
1742 
1743 			res = isc_socket_recv2(socket, &region, 1, dt, sev, 0);
1744 			if (res != ISC_R_SUCCESS) {
1745 				free_buffer(disp, region.base, region.length);
1746 				disp->shutdown_why = res;
1747 				disp->shutting_down = 1;
1748 				do_cancel(disp);
1749 				return (ISC_R_SUCCESS); /* recover by cancel */
1750 			}
1751 			INSIST(disp->recv_pending == 0);
1752 			disp->recv_pending = 1;
1753 		}
1754 		break;
1755 
1756 	case isc_sockettype_tcp:
1757 		res = dns_tcpmsg_readmessage(&disp->tcpmsg, disp->task[0],
1758 					     tcp_recv, disp);
1759 		if (res != ISC_R_SUCCESS) {
1760 			disp->shutdown_why = res;
1761 			disp->shutting_down = 1;
1762 			do_cancel(disp);
1763 			return (ISC_R_SUCCESS); /* recover by cancel */
1764 		}
1765 		INSIST(disp->recv_pending == 0);
1766 		disp->recv_pending = 1;
1767 		break;
1768 	default:
1769 		INSIST(0);
1770 		break;
1771 	}
1772 
1773 	return (ISC_R_SUCCESS);
1774 }
1775 
1776 /*
1777  * Mgr must be locked when calling this function.
1778  */
1779 static isc_boolean_t
destroy_mgr_ok(dns_dispatchmgr_t * mgr)1780 destroy_mgr_ok(dns_dispatchmgr_t *mgr) {
1781 	mgr_log(mgr, LVL(90),
1782 		"destroy_mgr_ok: shuttingdown=%d, listnonempty=%d, "
1783 		"depool=%d, rpool=%d, dpool=%d",
1784 		MGR_IS_SHUTTINGDOWN(mgr), !ISC_LIST_EMPTY(mgr->list),
1785 		isc_mempool_getallocated(mgr->depool),
1786 		isc_mempool_getallocated(mgr->rpool),
1787 		isc_mempool_getallocated(mgr->dpool));
1788 	if (!MGR_IS_SHUTTINGDOWN(mgr))
1789 		return (ISC_FALSE);
1790 	if (!ISC_LIST_EMPTY(mgr->list))
1791 		return (ISC_FALSE);
1792 	if (isc_mempool_getallocated(mgr->depool) != 0)
1793 		return (ISC_FALSE);
1794 	if (isc_mempool_getallocated(mgr->rpool) != 0)
1795 		return (ISC_FALSE);
1796 	if (isc_mempool_getallocated(mgr->dpool) != 0)
1797 		return (ISC_FALSE);
1798 
1799 	return (ISC_TRUE);
1800 }
1801 
1802 /*
1803  * Mgr must be unlocked when calling this function.
1804  */
1805 static void
destroy_mgr(dns_dispatchmgr_t ** mgrp)1806 destroy_mgr(dns_dispatchmgr_t **mgrp) {
1807 	isc_mem_t *mctx;
1808 	dns_dispatchmgr_t *mgr;
1809 
1810 	mgr = *mgrp;
1811 	*mgrp = NULL;
1812 
1813 	mctx = mgr->mctx;
1814 
1815 	mgr->magic = 0;
1816 	mgr->mctx = NULL;
1817 	DESTROYLOCK(&mgr->lock);
1818 	mgr->state = 0;
1819 
1820 	DESTROYLOCK(&mgr->arc4_lock);
1821 
1822 	isc_mempool_destroy(&mgr->depool);
1823 	isc_mempool_destroy(&mgr->rpool);
1824 	isc_mempool_destroy(&mgr->dpool);
1825 	if (mgr->bpool != NULL)
1826 		isc_mempool_destroy(&mgr->bpool);
1827 	if (mgr->spool != NULL)
1828 		isc_mempool_destroy(&mgr->spool);
1829 
1830 	DESTROYLOCK(&mgr->spool_lock);
1831 	DESTROYLOCK(&mgr->bpool_lock);
1832 	DESTROYLOCK(&mgr->dpool_lock);
1833 	DESTROYLOCK(&mgr->rpool_lock);
1834 	DESTROYLOCK(&mgr->depool_lock);
1835 
1836 	if (mgr->entropy != NULL)
1837 		isc_entropy_detach(&mgr->entropy);
1838 	if (mgr->qid != NULL)
1839 		qid_destroy(mctx, &mgr->qid);
1840 
1841 	DESTROYLOCK(&mgr->buffer_lock);
1842 
1843 	if (mgr->blackhole != NULL)
1844 		dns_acl_detach(&mgr->blackhole);
1845 
1846 	if (mgr->stats != NULL)
1847 		isc_stats_detach(&mgr->stats);
1848 
1849 	if (mgr->v4ports != NULL) {
1850 		isc_mem_put(mctx, mgr->v4ports,
1851 			    mgr->nv4ports * sizeof(in_port_t));
1852 	}
1853 	if (mgr->v6ports != NULL) {
1854 		isc_mem_put(mctx, mgr->v6ports,
1855 			    mgr->nv6ports * sizeof(in_port_t));
1856 	}
1857 	isc_mem_put(mctx, mgr, sizeof(dns_dispatchmgr_t));
1858 	isc_mem_detach(&mctx);
1859 }
1860 
1861 static isc_result_t
open_socket(isc_socketmgr_t * mgr,isc_sockaddr_t * local,unsigned int options,isc_socket_t ** sockp,isc_socket_t * dup_socket)1862 open_socket(isc_socketmgr_t *mgr, isc_sockaddr_t *local,
1863 	    unsigned int options, isc_socket_t **sockp,
1864 	    isc_socket_t *dup_socket)
1865 {
1866 	isc_socket_t *sock;
1867 	isc_result_t result;
1868 
1869 	sock = *sockp;
1870 	if (sock != NULL) {
1871 		result = isc_socket_open(sock);
1872 		if (result != ISC_R_SUCCESS)
1873 			return (result);
1874 	} else if (dup_socket != NULL) {
1875 		result = isc_socket_dup(dup_socket, &sock);
1876 		if (result != ISC_R_SUCCESS)
1877 			return (result);
1878 
1879 		isc_socket_setname(sock, "dispatcher", NULL);
1880 		*sockp = sock;
1881 		return (ISC_R_SUCCESS);
1882 	} else {
1883 		result = isc_socket_create(mgr, isc_sockaddr_pf(local),
1884 					isc_sockettype_udp, &sock);
1885 		if (result != ISC_R_SUCCESS)
1886 			return (result);
1887 	}
1888 
1889 	isc_socket_setname(sock, "dispatcher", NULL);
1890 
1891 #ifndef ISC_ALLOW_MAPPED
1892 	isc_socket_ipv6only(sock, ISC_TRUE);
1893 #endif
1894 	result = isc_socket_bind(sock, local, options);
1895 	if (result != ISC_R_SUCCESS) {
1896 		if (*sockp == NULL)
1897 			isc_socket_detach(&sock);
1898 		else {
1899 			isc_socket_close(sock);
1900 		}
1901 		return (result);
1902 	}
1903 
1904 	*sockp = sock;
1905 	return (ISC_R_SUCCESS);
1906 }
1907 
1908 /*%
1909  * Create a temporary port list to set the initial default set of dispatch
1910  * ports: [1024, 65535].  This is almost meaningless as the application will
1911  * normally set the ports explicitly, but is provided to fill some minor corner
1912  * cases.
1913  */
1914 static isc_result_t
create_default_portset(isc_mem_t * mctx,isc_portset_t ** portsetp)1915 create_default_portset(isc_mem_t *mctx, isc_portset_t **portsetp) {
1916 	isc_result_t result;
1917 
1918 	result = isc_portset_create(mctx, portsetp);
1919 	if (result != ISC_R_SUCCESS)
1920 		return (result);
1921 	isc_portset_addrange(*portsetp, 1024, 65535);
1922 
1923 	return (ISC_R_SUCCESS);
1924 }
1925 
1926 /*
1927  * Publics.
1928  */
1929 
1930 isc_result_t
dns_dispatchmgr_create(isc_mem_t * mctx,isc_entropy_t * entropy,dns_dispatchmgr_t ** mgrp)1931 dns_dispatchmgr_create(isc_mem_t *mctx, isc_entropy_t *entropy,
1932 		       dns_dispatchmgr_t **mgrp)
1933 {
1934 	dns_dispatchmgr_t *mgr;
1935 	isc_result_t result;
1936 	isc_portset_t *v4portset = NULL;
1937 	isc_portset_t *v6portset = NULL;
1938 
1939 	REQUIRE(mctx != NULL);
1940 	REQUIRE(mgrp != NULL && *mgrp == NULL);
1941 
1942 	mgr = isc_mem_get(mctx, sizeof(dns_dispatchmgr_t));
1943 	if (mgr == NULL)
1944 		return (ISC_R_NOMEMORY);
1945 
1946 	mgr->mctx = NULL;
1947 	isc_mem_attach(mctx, &mgr->mctx);
1948 
1949 	mgr->blackhole = NULL;
1950 	mgr->stats = NULL;
1951 
1952 	result = isc_mutex_init(&mgr->lock);
1953 	if (result != ISC_R_SUCCESS)
1954 		goto deallocate;
1955 
1956 	result = isc_mutex_init(&mgr->arc4_lock);
1957 	if (result != ISC_R_SUCCESS)
1958 		goto kill_lock;
1959 
1960 	result = isc_mutex_init(&mgr->buffer_lock);
1961 	if (result != ISC_R_SUCCESS)
1962 		goto kill_arc4_lock;
1963 
1964 	result = isc_mutex_init(&mgr->depool_lock);
1965 	if (result != ISC_R_SUCCESS)
1966 		goto kill_buffer_lock;
1967 
1968 	result = isc_mutex_init(&mgr->rpool_lock);
1969 	if (result != ISC_R_SUCCESS)
1970 		goto kill_depool_lock;
1971 
1972 	result = isc_mutex_init(&mgr->dpool_lock);
1973 	if (result != ISC_R_SUCCESS)
1974 		goto kill_rpool_lock;
1975 
1976 	result = isc_mutex_init(&mgr->bpool_lock);
1977 	if (result != ISC_R_SUCCESS)
1978 		goto kill_dpool_lock;
1979 
1980 	result = isc_mutex_init(&mgr->spool_lock);
1981 	if (result != ISC_R_SUCCESS)
1982 		goto kill_bpool_lock;
1983 
1984 	mgr->depool = NULL;
1985 	if (isc_mempool_create(mgr->mctx, sizeof(dns_dispatchevent_t),
1986 			       &mgr->depool) != ISC_R_SUCCESS) {
1987 		result = ISC_R_NOMEMORY;
1988 		goto kill_spool_lock;
1989 	}
1990 
1991 	mgr->rpool = NULL;
1992 	if (isc_mempool_create(mgr->mctx, sizeof(dns_dispentry_t),
1993 			       &mgr->rpool) != ISC_R_SUCCESS) {
1994 		result = ISC_R_NOMEMORY;
1995 		goto kill_depool;
1996 	}
1997 
1998 	mgr->dpool = NULL;
1999 	if (isc_mempool_create(mgr->mctx, sizeof(dns_dispatch_t),
2000 			       &mgr->dpool) != ISC_R_SUCCESS) {
2001 		result = ISC_R_NOMEMORY;
2002 		goto kill_rpool;
2003 	}
2004 
2005 	isc_mempool_setname(mgr->depool, "dispmgr_depool");
2006 	isc_mempool_setmaxalloc(mgr->depool, 32768);
2007 	isc_mempool_setfreemax(mgr->depool, 32768);
2008 	isc_mempool_associatelock(mgr->depool, &mgr->depool_lock);
2009 	isc_mempool_setfillcount(mgr->depool, 32);
2010 
2011 	isc_mempool_setname(mgr->rpool, "dispmgr_rpool");
2012 	isc_mempool_setmaxalloc(mgr->rpool, 32768);
2013 	isc_mempool_setfreemax(mgr->rpool, 32768);
2014 	isc_mempool_associatelock(mgr->rpool, &mgr->rpool_lock);
2015 	isc_mempool_setfillcount(mgr->rpool, 32);
2016 
2017 	isc_mempool_setname(mgr->dpool, "dispmgr_dpool");
2018 	isc_mempool_setmaxalloc(mgr->dpool, 32768);
2019 	isc_mempool_setfreemax(mgr->dpool, 32768);
2020 	isc_mempool_associatelock(mgr->dpool, &mgr->dpool_lock);
2021 	isc_mempool_setfillcount(mgr->dpool, 32);
2022 
2023 	mgr->buffers = 0;
2024 	mgr->buffersize = 0;
2025 	mgr->maxbuffers = 0;
2026 	mgr->bpool = NULL;
2027 	mgr->spool = NULL;
2028 	mgr->entropy = NULL;
2029 	mgr->qid = NULL;
2030 	mgr->state = 0;
2031 	ISC_LIST_INIT(mgr->list);
2032 	mgr->v4ports = NULL;
2033 	mgr->v6ports = NULL;
2034 	mgr->nv4ports = 0;
2035 	mgr->nv6ports = 0;
2036 	mgr->magic = DNS_DISPATCHMGR_MAGIC;
2037 
2038 	result = create_default_portset(mctx, &v4portset);
2039 	if (result == ISC_R_SUCCESS) {
2040 		result = create_default_portset(mctx, &v6portset);
2041 		if (result == ISC_R_SUCCESS) {
2042 			result = dns_dispatchmgr_setavailports(mgr,
2043 							       v4portset,
2044 							       v6portset);
2045 		}
2046 	}
2047 	if (v4portset != NULL)
2048 		isc_portset_destroy(mctx, &v4portset);
2049 	if (v6portset != NULL)
2050 		isc_portset_destroy(mctx, &v6portset);
2051 	if (result != ISC_R_SUCCESS)
2052 		goto kill_dpool;
2053 
2054 	if (entropy != NULL)
2055 		isc_entropy_attach(entropy, &mgr->entropy);
2056 
2057 	dispatch_initrandom(&mgr->arc4ctx, mgr->entropy, &mgr->arc4_lock);
2058 
2059 	*mgrp = mgr;
2060 	return (ISC_R_SUCCESS);
2061 
2062  kill_dpool:
2063 	isc_mempool_destroy(&mgr->dpool);
2064  kill_rpool:
2065 	isc_mempool_destroy(&mgr->rpool);
2066  kill_depool:
2067 	isc_mempool_destroy(&mgr->depool);
2068  kill_spool_lock:
2069 	DESTROYLOCK(&mgr->spool_lock);
2070  kill_bpool_lock:
2071 	DESTROYLOCK(&mgr->bpool_lock);
2072  kill_dpool_lock:
2073 	DESTROYLOCK(&mgr->dpool_lock);
2074  kill_rpool_lock:
2075 	DESTROYLOCK(&mgr->rpool_lock);
2076  kill_depool_lock:
2077 	DESTROYLOCK(&mgr->depool_lock);
2078  kill_buffer_lock:
2079 	DESTROYLOCK(&mgr->buffer_lock);
2080  kill_arc4_lock:
2081 	DESTROYLOCK(&mgr->arc4_lock);
2082  kill_lock:
2083 	DESTROYLOCK(&mgr->lock);
2084  deallocate:
2085 	isc_mem_put(mctx, mgr, sizeof(dns_dispatchmgr_t));
2086 	isc_mem_detach(&mctx);
2087 
2088 	return (result);
2089 }
2090 
2091 void
dns_dispatchmgr_setblackhole(dns_dispatchmgr_t * mgr,dns_acl_t * blackhole)2092 dns_dispatchmgr_setblackhole(dns_dispatchmgr_t *mgr, dns_acl_t *blackhole) {
2093 	REQUIRE(VALID_DISPATCHMGR(mgr));
2094 	if (mgr->blackhole != NULL)
2095 		dns_acl_detach(&mgr->blackhole);
2096 	dns_acl_attach(blackhole, &mgr->blackhole);
2097 }
2098 
2099 dns_acl_t *
dns_dispatchmgr_getblackhole(dns_dispatchmgr_t * mgr)2100 dns_dispatchmgr_getblackhole(dns_dispatchmgr_t *mgr) {
2101 	REQUIRE(VALID_DISPATCHMGR(mgr));
2102 	return (mgr->blackhole);
2103 }
2104 
2105 void
dns_dispatchmgr_setblackportlist(dns_dispatchmgr_t * mgr,dns_portlist_t * portlist)2106 dns_dispatchmgr_setblackportlist(dns_dispatchmgr_t *mgr,
2107 				 dns_portlist_t *portlist)
2108 {
2109 	REQUIRE(VALID_DISPATCHMGR(mgr));
2110 	UNUSED(portlist);
2111 
2112 	/* This function is deprecated: use dns_dispatchmgr_setavailports(). */
2113 	return;
2114 }
2115 
2116 dns_portlist_t *
dns_dispatchmgr_getblackportlist(dns_dispatchmgr_t * mgr)2117 dns_dispatchmgr_getblackportlist(dns_dispatchmgr_t *mgr) {
2118 	REQUIRE(VALID_DISPATCHMGR(mgr));
2119 	return (NULL);		/* this function is deprecated */
2120 }
2121 
2122 isc_result_t
dns_dispatchmgr_setavailports(dns_dispatchmgr_t * mgr,isc_portset_t * v4portset,isc_portset_t * v6portset)2123 dns_dispatchmgr_setavailports(dns_dispatchmgr_t *mgr, isc_portset_t *v4portset,
2124 			      isc_portset_t *v6portset)
2125 {
2126 	in_port_t *v4ports, *v6ports, p;
2127 	unsigned int nv4ports, nv6ports, i4, i6;
2128 
2129 	REQUIRE(VALID_DISPATCHMGR(mgr));
2130 
2131 	nv4ports = isc_portset_nports(v4portset);
2132 	nv6ports = isc_portset_nports(v6portset);
2133 
2134 	v4ports = NULL;
2135 	if (nv4ports != 0) {
2136 		v4ports = isc_mem_get(mgr->mctx, sizeof(in_port_t) * nv4ports);
2137 		if (v4ports == NULL)
2138 			return (ISC_R_NOMEMORY);
2139 	}
2140 	v6ports = NULL;
2141 	if (nv6ports != 0) {
2142 		v6ports = isc_mem_get(mgr->mctx, sizeof(in_port_t) * nv6ports);
2143 		if (v6ports == NULL) {
2144 			if (v4ports != NULL) {
2145 				isc_mem_put(mgr->mctx, v4ports,
2146 					    sizeof(in_port_t) *
2147 					    isc_portset_nports(v4portset));
2148 			}
2149 			return (ISC_R_NOMEMORY);
2150 		}
2151 	}
2152 
2153 	p = 0;
2154 	i4 = 0;
2155 	i6 = 0;
2156 	do {
2157 		if (isc_portset_isset(v4portset, p)) {
2158 			INSIST(i4 < nv4ports);
2159 			v4ports[i4++] = p;
2160 		}
2161 		if (isc_portset_isset(v6portset, p)) {
2162 			INSIST(i6 < nv6ports);
2163 			v6ports[i6++] = p;
2164 		}
2165 	} while (p++ < 65535);
2166 	INSIST(i4 == nv4ports && i6 == nv6ports);
2167 
2168 	PORTBUFLOCK(mgr);
2169 	if (mgr->v4ports != NULL) {
2170 		isc_mem_put(mgr->mctx, mgr->v4ports,
2171 			    mgr->nv4ports * sizeof(in_port_t));
2172 	}
2173 	mgr->v4ports = v4ports;
2174 	mgr->nv4ports = nv4ports;
2175 
2176 	if (mgr->v6ports != NULL) {
2177 		isc_mem_put(mgr->mctx, mgr->v6ports,
2178 			    mgr->nv6ports * sizeof(in_port_t));
2179 	}
2180 	mgr->v6ports = v6ports;
2181 	mgr->nv6ports = nv6ports;
2182 	PORTBUFUNLOCK(mgr);
2183 
2184 	return (ISC_R_SUCCESS);
2185 }
2186 
2187 static isc_result_t
dns_dispatchmgr_setudp(dns_dispatchmgr_t * mgr,unsigned int buffersize,unsigned int maxbuffers,unsigned int maxrequests,unsigned int buckets,unsigned int increment)2188 dns_dispatchmgr_setudp(dns_dispatchmgr_t *mgr,
2189 		       unsigned int buffersize, unsigned int maxbuffers,
2190 		       unsigned int maxrequests, unsigned int buckets,
2191 		       unsigned int increment)
2192 {
2193 	isc_result_t result;
2194 
2195 	REQUIRE(VALID_DISPATCHMGR(mgr));
2196 	REQUIRE(buffersize >= 512 && buffersize < (64 * 1024));
2197 	REQUIRE(maxbuffers > 0);
2198 	REQUIRE(buckets < 2097169);  /* next prime > 65536 * 32 */
2199 	REQUIRE(increment > buckets);
2200 
2201 	/*
2202 	 * Keep some number of items around.  This should be a config
2203 	 * option.  For now, keep 8, but later keep at least two even
2204 	 * if the caller wants less.  This allows us to ensure certain
2205 	 * things, like an event can be "freed" and the next allocation
2206 	 * will always succeed.
2207 	 *
2208 	 * Note that if limits are placed on anything here, we use one
2209 	 * event internally, so the actual limit should be "wanted + 1."
2210 	 *
2211 	 * XXXMLG
2212 	 */
2213 
2214 	if (maxbuffers < 8)
2215 		maxbuffers = 8;
2216 
2217 	LOCK(&mgr->buffer_lock);
2218 
2219 	/* Create or adjust buffer pool */
2220 	if (mgr->bpool != NULL) {
2221 		/*
2222 		 * We only increase the maxbuffers to avoid accidental buffer
2223 		 * shortage.  Ideally we'd separate the manager-wide maximum
2224 		 * from per-dispatch limits and respect the latter within the
2225 		 * global limit.  But at this moment that's deemed to be
2226 		 * overkilling and isn't worth additional implementation
2227 		 * complexity.
2228 		 */
2229 		if (maxbuffers > mgr->maxbuffers) {
2230 			isc_mempool_setmaxalloc(mgr->bpool, maxbuffers);
2231 			isc_mempool_setfreemax(mgr->bpool, maxbuffers);
2232 			mgr->maxbuffers = maxbuffers;
2233 		}
2234 	} else {
2235 		result = isc_mempool_create(mgr->mctx, buffersize, &mgr->bpool);
2236 		if (result != ISC_R_SUCCESS) {
2237 			UNLOCK(&mgr->buffer_lock);
2238 			return (result);
2239 		}
2240 		isc_mempool_setname(mgr->bpool, "dispmgr_bpool");
2241 		isc_mempool_setmaxalloc(mgr->bpool, maxbuffers);
2242 		isc_mempool_setfreemax(mgr->bpool, maxbuffers);
2243 		isc_mempool_associatelock(mgr->bpool, &mgr->bpool_lock);
2244 		isc_mempool_setfillcount(mgr->bpool, 32);
2245 	}
2246 
2247 	/* Create or adjust socket pool */
2248 	if (mgr->spool != NULL) {
2249 		if (maxrequests < DNS_DISPATCH_POOLSOCKS * 2) {
2250 			isc_mempool_setmaxalloc(mgr->spool,
2251 						DNS_DISPATCH_POOLSOCKS * 2);
2252 			isc_mempool_setfreemax(mgr->spool,
2253 					       DNS_DISPATCH_POOLSOCKS * 2);
2254 		}
2255 		UNLOCK(&mgr->buffer_lock);
2256 		return (ISC_R_SUCCESS);
2257 	}
2258 	result = isc_mempool_create(mgr->mctx, sizeof(dispsocket_t),
2259 				    &mgr->spool);
2260 	if (result != ISC_R_SUCCESS) {
2261 		UNLOCK(&mgr->buffer_lock);
2262 		goto cleanup;
2263 	}
2264 	isc_mempool_setname(mgr->spool, "dispmgr_spool");
2265 	isc_mempool_setmaxalloc(mgr->spool, maxrequests);
2266 	isc_mempool_setfreemax(mgr->spool, maxrequests);
2267 	isc_mempool_associatelock(mgr->spool, &mgr->spool_lock);
2268 	isc_mempool_setfillcount(mgr->spool, 32);
2269 
2270 	result = qid_allocate(mgr, buckets, increment, &mgr->qid, ISC_TRUE);
2271 	if (result != ISC_R_SUCCESS)
2272 		goto cleanup;
2273 
2274 	mgr->buffersize = buffersize;
2275 	mgr->maxbuffers = maxbuffers;
2276 	UNLOCK(&mgr->buffer_lock);
2277 	return (ISC_R_SUCCESS);
2278 
2279  cleanup:
2280 	isc_mempool_destroy(&mgr->bpool);
2281 	if (mgr->spool != NULL)
2282 		isc_mempool_destroy(&mgr->spool);
2283 	UNLOCK(&mgr->buffer_lock);
2284 	return (result);
2285 }
2286 
2287 void
dns_dispatchmgr_destroy(dns_dispatchmgr_t ** mgrp)2288 dns_dispatchmgr_destroy(dns_dispatchmgr_t **mgrp) {
2289 	dns_dispatchmgr_t *mgr;
2290 	isc_boolean_t killit;
2291 
2292 	REQUIRE(mgrp != NULL);
2293 	REQUIRE(VALID_DISPATCHMGR(*mgrp));
2294 
2295 	mgr = *mgrp;
2296 	*mgrp = NULL;
2297 
2298 	LOCK(&mgr->lock);
2299 	mgr->state |= MGR_SHUTTINGDOWN;
2300 
2301 	killit = destroy_mgr_ok(mgr);
2302 	UNLOCK(&mgr->lock);
2303 
2304 	mgr_log(mgr, LVL(90), "destroy: killit=%d", killit);
2305 
2306 	if (killit)
2307 		destroy_mgr(&mgr);
2308 }
2309 
2310 void
dns_dispatchmgr_setstats(dns_dispatchmgr_t * mgr,isc_stats_t * stats)2311 dns_dispatchmgr_setstats(dns_dispatchmgr_t *mgr, isc_stats_t *stats) {
2312 	REQUIRE(VALID_DISPATCHMGR(mgr));
2313 	REQUIRE(ISC_LIST_EMPTY(mgr->list));
2314 	REQUIRE(mgr->stats == NULL);
2315 
2316 	isc_stats_attach(stats, &mgr->stats);
2317 }
2318 
2319 static int
port_cmp(const void * key,const void * ent)2320 port_cmp(const void *key, const void *ent) {
2321 	in_port_t p1 = *(const in_port_t *)key;
2322 	in_port_t p2 = *(const in_port_t *)ent;
2323 
2324 	if (p1 < p2)
2325 		return (-1);
2326 	else if (p1 == p2)
2327 		return (0);
2328 	else
2329 		return (1);
2330 }
2331 
2332 static isc_boolean_t
portavailable(dns_dispatchmgr_t * mgr,isc_socket_t * sock,isc_sockaddr_t * sockaddrp)2333 portavailable(dns_dispatchmgr_t *mgr, isc_socket_t *sock,
2334 	      isc_sockaddr_t *sockaddrp)
2335 {
2336 	isc_sockaddr_t sockaddr;
2337 	isc_result_t result;
2338 	in_port_t *ports, port;
2339 	unsigned int nports;
2340 	isc_boolean_t available = ISC_FALSE;
2341 
2342 	REQUIRE(sock != NULL || sockaddrp != NULL);
2343 
2344 	PORTBUFLOCK(mgr);
2345 	if (sock != NULL) {
2346 		sockaddrp = &sockaddr;
2347 		result = isc_socket_getsockname(sock, sockaddrp);
2348 		if (result != ISC_R_SUCCESS)
2349 			goto unlock;
2350 	}
2351 
2352 	if (isc_sockaddr_pf(sockaddrp) == AF_INET) {
2353 		ports = mgr->v4ports;
2354 		nports = mgr->nv4ports;
2355 	} else {
2356 		ports = mgr->v6ports;
2357 		nports = mgr->nv6ports;
2358 	}
2359 	if (ports == NULL)
2360 		goto unlock;
2361 
2362 	port = isc_sockaddr_getport(sockaddrp);
2363 	if (bsearch(&port, ports, nports, sizeof(in_port_t), port_cmp) != NULL)
2364 		available = ISC_TRUE;
2365 
2366 unlock:
2367 	PORTBUFUNLOCK(mgr);
2368 	return (available);
2369 }
2370 
2371 #define ATTRMATCH(_a1, _a2, _mask) (((_a1) & (_mask)) == ((_a2) & (_mask)))
2372 
2373 static isc_boolean_t
local_addr_match(dns_dispatch_t * disp,isc_sockaddr_t * addr)2374 local_addr_match(dns_dispatch_t *disp, isc_sockaddr_t *addr) {
2375 	isc_sockaddr_t sockaddr;
2376 	isc_result_t result;
2377 
2378 	REQUIRE(disp->socket != NULL);
2379 
2380 	if (addr == NULL)
2381 		return (ISC_TRUE);
2382 
2383 	/*
2384 	 * Don't match wildcard ports unless the port is available in the
2385 	 * current configuration.
2386 	 */
2387 	if (isc_sockaddr_getport(addr) == 0 &&
2388 	    isc_sockaddr_getport(&disp->local) == 0 &&
2389 	    !portavailable(disp->mgr, disp->socket, NULL)) {
2390 		return (ISC_FALSE);
2391 	}
2392 
2393 	/*
2394 	 * Check if we match the binding <address,port>.
2395 	 * Wildcard ports match/fail here.
2396 	 */
2397 	if (isc_sockaddr_equal(&disp->local, addr))
2398 		return (ISC_TRUE);
2399 	if (isc_sockaddr_getport(addr) == 0)
2400 		return (ISC_FALSE);
2401 
2402 	/*
2403 	 * Check if we match a bound wildcard port <address,port>.
2404 	 */
2405 	if (!isc_sockaddr_eqaddr(&disp->local, addr))
2406 		return (ISC_FALSE);
2407 	result = isc_socket_getsockname(disp->socket, &sockaddr);
2408 	if (result != ISC_R_SUCCESS)
2409 		return (ISC_FALSE);
2410 
2411 	return (isc_sockaddr_equal(&sockaddr, addr));
2412 }
2413 
2414 /*
2415  * Requires mgr be locked.
2416  *
2417  * No dispatcher can be locked by this thread when calling this function.
2418  *
2419  *
2420  * NOTE:
2421  *	If a matching dispatcher is found, it is locked after this function
2422  *	returns, and must be unlocked by the caller.
2423  */
2424 static isc_result_t
dispatch_find(dns_dispatchmgr_t * mgr,isc_sockaddr_t * local,unsigned int attributes,unsigned int mask,dns_dispatch_t ** dispp)2425 dispatch_find(dns_dispatchmgr_t *mgr, isc_sockaddr_t *local,
2426 	      unsigned int attributes, unsigned int mask,
2427 	      dns_dispatch_t **dispp)
2428 {
2429 	dns_dispatch_t *disp;
2430 	isc_result_t result;
2431 
2432 	/*
2433 	 * Make certain that we will not match a private or exclusive dispatch.
2434 	 */
2435 	attributes &= ~(DNS_DISPATCHATTR_PRIVATE|DNS_DISPATCHATTR_EXCLUSIVE);
2436 	mask |= (DNS_DISPATCHATTR_PRIVATE|DNS_DISPATCHATTR_EXCLUSIVE);
2437 
2438 	disp = ISC_LIST_HEAD(mgr->list);
2439 	while (disp != NULL) {
2440 		LOCK(&disp->lock);
2441 		if ((disp->shutting_down == 0)
2442 		    && ATTRMATCH(disp->attributes, attributes, mask)
2443 		    && local_addr_match(disp, local))
2444 			break;
2445 		UNLOCK(&disp->lock);
2446 		disp = ISC_LIST_NEXT(disp, link);
2447 	}
2448 
2449 	if (disp == NULL) {
2450 		result = ISC_R_NOTFOUND;
2451 		goto out;
2452 	}
2453 
2454 	*dispp = disp;
2455 	result = ISC_R_SUCCESS;
2456  out:
2457 
2458 	return (result);
2459 }
2460 
2461 static isc_result_t
qid_allocate(dns_dispatchmgr_t * mgr,unsigned int buckets,unsigned int increment,dns_qid_t ** qidp,isc_boolean_t needsocktable)2462 qid_allocate(dns_dispatchmgr_t *mgr, unsigned int buckets,
2463 	     unsigned int increment, dns_qid_t **qidp,
2464 	     isc_boolean_t needsocktable)
2465 {
2466 	dns_qid_t *qid;
2467 	unsigned int i;
2468 	isc_result_t result;
2469 
2470 	REQUIRE(VALID_DISPATCHMGR(mgr));
2471 	REQUIRE(buckets < 2097169);  /* next prime > 65536 * 32 */
2472 	REQUIRE(increment > buckets);
2473 	REQUIRE(qidp != NULL && *qidp == NULL);
2474 
2475 	qid = isc_mem_get(mgr->mctx, sizeof(*qid));
2476 	if (qid == NULL)
2477 		return (ISC_R_NOMEMORY);
2478 
2479 	qid->qid_table = isc_mem_get(mgr->mctx,
2480 				     buckets * sizeof(dns_displist_t));
2481 	if (qid->qid_table == NULL) {
2482 		isc_mem_put(mgr->mctx, qid, sizeof(*qid));
2483 		return (ISC_R_NOMEMORY);
2484 	}
2485 
2486 	qid->sock_table = NULL;
2487 	if (needsocktable) {
2488 		qid->sock_table = isc_mem_get(mgr->mctx, buckets *
2489 					      sizeof(dispsocketlist_t));
2490 		if (qid->sock_table == NULL) {
2491 			isc_mem_put(mgr->mctx, qid->qid_table,
2492 				    buckets * sizeof(dns_displist_t));
2493 			isc_mem_put(mgr->mctx, qid, sizeof(*qid));
2494 			return (ISC_R_NOMEMORY);
2495 		}
2496 	}
2497 
2498 	result = isc_mutex_init(&qid->lock);
2499 	if (result != ISC_R_SUCCESS) {
2500 		if (qid->sock_table != NULL) {
2501 			isc_mem_put(mgr->mctx, qid->sock_table,
2502 				    buckets * sizeof(dispsocketlist_t));
2503 		}
2504 		isc_mem_put(mgr->mctx, qid->qid_table,
2505 			    buckets * sizeof(dns_displist_t));
2506 		isc_mem_put(mgr->mctx, qid, sizeof(*qid));
2507 		return (result);
2508 	}
2509 
2510 	for (i = 0; i < buckets; i++) {
2511 		ISC_LIST_INIT(qid->qid_table[i]);
2512 		if (qid->sock_table != NULL)
2513 			ISC_LIST_INIT(qid->sock_table[i]);
2514 	}
2515 
2516 	qid->qid_nbuckets = buckets;
2517 	qid->qid_increment = increment;
2518 	qid->magic = QID_MAGIC;
2519 	*qidp = qid;
2520 	return (ISC_R_SUCCESS);
2521 }
2522 
2523 static void
qid_destroy(isc_mem_t * mctx,dns_qid_t ** qidp)2524 qid_destroy(isc_mem_t *mctx, dns_qid_t **qidp) {
2525 	dns_qid_t *qid;
2526 
2527 	REQUIRE(qidp != NULL);
2528 	qid = *qidp;
2529 
2530 	REQUIRE(VALID_QID(qid));
2531 
2532 	*qidp = NULL;
2533 	qid->magic = 0;
2534 	isc_mem_put(mctx, qid->qid_table,
2535 		    qid->qid_nbuckets * sizeof(dns_displist_t));
2536 	if (qid->sock_table != NULL) {
2537 		isc_mem_put(mctx, qid->sock_table,
2538 			    qid->qid_nbuckets * sizeof(dispsocketlist_t));
2539 	}
2540 	DESTROYLOCK(&qid->lock);
2541 	isc_mem_put(mctx, qid, sizeof(*qid));
2542 }
2543 
2544 /*
2545  * Allocate and set important limits.
2546  */
2547 static isc_result_t
dispatch_allocate(dns_dispatchmgr_t * mgr,unsigned int maxrequests,dns_dispatch_t ** dispp)2548 dispatch_allocate(dns_dispatchmgr_t *mgr, unsigned int maxrequests,
2549 		  dns_dispatch_t **dispp)
2550 {
2551 	dns_dispatch_t *disp;
2552 	isc_result_t result;
2553 
2554 	REQUIRE(VALID_DISPATCHMGR(mgr));
2555 	REQUIRE(dispp != NULL && *dispp == NULL);
2556 
2557 	/*
2558 	 * Set up the dispatcher, mostly.  Don't bother setting some of
2559 	 * the options that are controlled by tcp vs. udp, etc.
2560 	 */
2561 
2562 	disp = isc_mempool_get(mgr->dpool);
2563 	if (disp == NULL)
2564 		return (ISC_R_NOMEMORY);
2565 
2566 	disp->magic = 0;
2567 	disp->mgr = mgr;
2568 	disp->maxrequests = maxrequests;
2569 	disp->attributes = 0;
2570 	ISC_LINK_INIT(disp, link);
2571 	disp->refcount = 1;
2572 	disp->recv_pending = 0;
2573 	memset(&disp->local, 0, sizeof(disp->local));
2574 	disp->localport = 0;
2575 	disp->shutting_down = 0;
2576 	disp->shutdown_out = 0;
2577 	disp->connected = 0;
2578 	disp->tcpmsg_valid = 0;
2579 	disp->shutdown_why = ISC_R_UNEXPECTED;
2580 	disp->requests = 0;
2581 	disp->tcpbuffers = 0;
2582 	disp->qid = NULL;
2583 	ISC_LIST_INIT(disp->activesockets);
2584 	ISC_LIST_INIT(disp->inactivesockets);
2585 	disp->nsockets = 0;
2586 	dispatch_initrandom(&disp->arc4ctx, mgr->entropy, NULL);
2587 	disp->port_table = NULL;
2588 	disp->portpool = NULL;
2589 	disp->dscp = -1;
2590 
2591 	result = isc_mutex_init(&disp->lock);
2592 	if (result != ISC_R_SUCCESS)
2593 		goto deallocate;
2594 
2595 	disp->failsafe_ev = allocate_devent(disp);
2596 	if (disp->failsafe_ev == NULL) {
2597 		result = ISC_R_NOMEMORY;
2598 		goto kill_lock;
2599 	}
2600 
2601 	disp->magic = DISPATCH_MAGIC;
2602 
2603 	*dispp = disp;
2604 	return (ISC_R_SUCCESS);
2605 
2606 	/*
2607 	 * error returns
2608 	 */
2609  kill_lock:
2610 	DESTROYLOCK(&disp->lock);
2611  deallocate:
2612 	isc_mempool_put(mgr->dpool, disp);
2613 
2614 	return (result);
2615 }
2616 
2617 
2618 /*
2619  * MUST be unlocked, and not used by anything.
2620  */
2621 static void
dispatch_free(dns_dispatch_t ** dispp)2622 dispatch_free(dns_dispatch_t **dispp) {
2623 	dns_dispatch_t *disp;
2624 	dns_dispatchmgr_t *mgr;
2625 	int i;
2626 
2627 	REQUIRE(VALID_DISPATCH(*dispp));
2628 	disp = *dispp;
2629 	*dispp = NULL;
2630 
2631 	mgr = disp->mgr;
2632 	REQUIRE(VALID_DISPATCHMGR(mgr));
2633 
2634 	if (disp->tcpmsg_valid) {
2635 		dns_tcpmsg_invalidate(&disp->tcpmsg);
2636 		disp->tcpmsg_valid = 0;
2637 	}
2638 
2639 	INSIST(disp->tcpbuffers == 0);
2640 	INSIST(disp->requests == 0);
2641 	INSIST(disp->recv_pending == 0);
2642 	INSIST(ISC_LIST_EMPTY(disp->activesockets));
2643 	INSIST(ISC_LIST_EMPTY(disp->inactivesockets));
2644 
2645 	isc_mempool_put(mgr->depool, disp->failsafe_ev);
2646 	disp->failsafe_ev = NULL;
2647 
2648 	if (disp->qid != NULL)
2649 		qid_destroy(mgr->mctx, &disp->qid);
2650 
2651 	if (disp->port_table != NULL) {
2652 		for (i = 0; i < DNS_DISPATCH_PORTTABLESIZE; i++)
2653 			INSIST(ISC_LIST_EMPTY(disp->port_table[i]));
2654 		isc_mem_put(mgr->mctx, disp->port_table,
2655 			    sizeof(disp->port_table[0]) *
2656 			    DNS_DISPATCH_PORTTABLESIZE);
2657 	}
2658 
2659 	if (disp->portpool != NULL)
2660 		isc_mempool_destroy(&disp->portpool);
2661 
2662 	disp->mgr = NULL;
2663 	DESTROYLOCK(&disp->lock);
2664 	disp->magic = 0;
2665 	isc_mempool_put(mgr->dpool, disp);
2666 }
2667 
2668 isc_result_t
dns_dispatch_createtcp(dns_dispatchmgr_t * mgr,isc_socket_t * sock,isc_taskmgr_t * taskmgr,unsigned int buffersize,unsigned int maxbuffers,unsigned int maxrequests,unsigned int buckets,unsigned int increment,unsigned int attributes,dns_dispatch_t ** dispp)2669 dns_dispatch_createtcp(dns_dispatchmgr_t *mgr, isc_socket_t *sock,
2670 		       isc_taskmgr_t *taskmgr, unsigned int buffersize,
2671 		       unsigned int maxbuffers, unsigned int maxrequests,
2672 		       unsigned int buckets, unsigned int increment,
2673 		       unsigned int attributes, dns_dispatch_t **dispp)
2674 {
2675 	isc_result_t result;
2676 	dns_dispatch_t *disp;
2677 
2678 	UNUSED(maxbuffers);
2679 	UNUSED(buffersize);
2680 
2681 	REQUIRE(VALID_DISPATCHMGR(mgr));
2682 	REQUIRE(isc_socket_gettype(sock) == isc_sockettype_tcp);
2683 	REQUIRE((attributes & DNS_DISPATCHATTR_TCP) != 0);
2684 	REQUIRE((attributes & DNS_DISPATCHATTR_UDP) == 0);
2685 
2686 	attributes |= DNS_DISPATCHATTR_PRIVATE;  /* XXXMLG */
2687 
2688 	LOCK(&mgr->lock);
2689 
2690 	/*
2691 	 * dispatch_allocate() checks mgr for us.
2692 	 * qid_allocate() checks buckets and increment for us.
2693 	 */
2694 	disp = NULL;
2695 	result = dispatch_allocate(mgr, maxrequests, &disp);
2696 	if (result != ISC_R_SUCCESS) {
2697 		UNLOCK(&mgr->lock);
2698 		return (result);
2699 	}
2700 
2701 	result = qid_allocate(mgr, buckets, increment, &disp->qid, ISC_FALSE);
2702 	if (result != ISC_R_SUCCESS)
2703 		goto deallocate_dispatch;
2704 
2705 	disp->socktype = isc_sockettype_tcp;
2706 	disp->socket = NULL;
2707 	isc_socket_attach(sock, &disp->socket);
2708 
2709 	disp->sepool = NULL;
2710 
2711 	disp->ntasks = 1;
2712 	disp->task[0] = NULL;
2713 	result = isc_task_create(taskmgr, 0, &disp->task[0]);
2714 	if (result != ISC_R_SUCCESS)
2715 		goto kill_socket;
2716 
2717 	disp->ctlevent = isc_event_allocate(mgr->mctx, disp,
2718 					    DNS_EVENT_DISPATCHCONTROL,
2719 					    destroy_disp, disp,
2720 					    sizeof(isc_event_t));
2721 	if (disp->ctlevent == NULL) {
2722 		result = ISC_R_NOMEMORY;
2723 		goto kill_task;
2724 	}
2725 
2726 	isc_task_setname(disp->task[0], "tcpdispatch", disp);
2727 
2728 	dns_tcpmsg_init(mgr->mctx, disp->socket, &disp->tcpmsg);
2729 	disp->tcpmsg_valid = 1;
2730 
2731 	disp->attributes = attributes;
2732 
2733 	/*
2734 	 * Append it to the dispatcher list.
2735 	 */
2736 	ISC_LIST_APPEND(mgr->list, disp, link);
2737 	UNLOCK(&mgr->lock);
2738 
2739 	mgr_log(mgr, LVL(90), "created TCP dispatcher %p", disp);
2740 	dispatch_log(disp, LVL(90), "created task %p", disp->task[0]);
2741 
2742 	*dispp = disp;
2743 
2744 	return (ISC_R_SUCCESS);
2745 
2746 	/*
2747 	 * Error returns.
2748 	 */
2749  kill_task:
2750 	isc_task_detach(&disp->task[0]);
2751  kill_socket:
2752 	isc_socket_detach(&disp->socket);
2753  deallocate_dispatch:
2754 	dispatch_free(&disp);
2755 
2756 	UNLOCK(&mgr->lock);
2757 
2758 	return (result);
2759 }
2760 
2761 isc_result_t
dns_dispatch_getudp_dup(dns_dispatchmgr_t * mgr,isc_socketmgr_t * sockmgr,isc_taskmgr_t * taskmgr,isc_sockaddr_t * localaddr,unsigned int buffersize,unsigned int maxbuffers,unsigned int maxrequests,unsigned int buckets,unsigned int increment,unsigned int attributes,unsigned int mask,dns_dispatch_t ** dispp,dns_dispatch_t * dup_dispatch)2762 dns_dispatch_getudp_dup(dns_dispatchmgr_t *mgr, isc_socketmgr_t *sockmgr,
2763 		    isc_taskmgr_t *taskmgr, isc_sockaddr_t *localaddr,
2764 		    unsigned int buffersize,
2765 		    unsigned int maxbuffers, unsigned int maxrequests,
2766 		    unsigned int buckets, unsigned int increment,
2767 		    unsigned int attributes, unsigned int mask,
2768 		    dns_dispatch_t **dispp, dns_dispatch_t *dup_dispatch)
2769 {
2770 	isc_result_t result;
2771 	dns_dispatch_t *disp = NULL;
2772 
2773 	REQUIRE(VALID_DISPATCHMGR(mgr));
2774 	REQUIRE(sockmgr != NULL);
2775 	REQUIRE(localaddr != NULL);
2776 	REQUIRE(taskmgr != NULL);
2777 	REQUIRE(buffersize >= 512 && buffersize < (64 * 1024));
2778 	REQUIRE(maxbuffers > 0);
2779 	REQUIRE(buckets < 2097169);  /* next prime > 65536 * 32 */
2780 	REQUIRE(increment > buckets);
2781 	REQUIRE(dispp != NULL && *dispp == NULL);
2782 	REQUIRE((attributes & DNS_DISPATCHATTR_TCP) == 0);
2783 
2784 	result = dns_dispatchmgr_setudp(mgr, buffersize, maxbuffers,
2785 					maxrequests, buckets, increment);
2786 	if (result != ISC_R_SUCCESS)
2787 		return (result);
2788 
2789 	LOCK(&mgr->lock);
2790 
2791 	if ((attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0) {
2792 		REQUIRE(isc_sockaddr_getport(localaddr) == 0);
2793 		goto createudp;
2794 	}
2795 
2796 	/*
2797 	 * See if we have a dispatcher that matches.
2798 	 */
2799 	if (dup_dispatch == NULL) {
2800 		result = dispatch_find(mgr, localaddr, attributes, mask, &disp);
2801 		if (result == ISC_R_SUCCESS) {
2802 			disp->refcount++;
2803 
2804 			if (disp->maxrequests < maxrequests)
2805 				disp->maxrequests = maxrequests;
2806 
2807 			if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) == 0
2808 			    && (attributes & DNS_DISPATCHATTR_NOLISTEN) != 0)
2809 			{
2810 				disp->attributes |= DNS_DISPATCHATTR_NOLISTEN;
2811 				if (disp->recv_pending != 0)
2812 					isc_socket_cancel(disp->socket,
2813 							  disp->task[0],
2814 							  ISC_SOCKCANCEL_RECV);
2815 			}
2816 
2817 			UNLOCK(&disp->lock);
2818 			UNLOCK(&mgr->lock);
2819 
2820 			*dispp = disp;
2821 
2822 			return (ISC_R_SUCCESS);
2823 		}
2824 	}
2825 
2826  createudp:
2827 	/*
2828 	 * Nope, create one.
2829 	 */
2830 	result = dispatch_createudp(mgr, sockmgr, taskmgr, localaddr,
2831 				    maxrequests, attributes, &disp,
2832 				    dup_dispatch == NULL
2833 					    ? NULL
2834 					    : dup_dispatch->socket);
2835 
2836 	if (result != ISC_R_SUCCESS) {
2837 		UNLOCK(&mgr->lock);
2838 		return (result);
2839 	}
2840 
2841 	UNLOCK(&mgr->lock);
2842 	*dispp = disp;
2843 
2844 	return (ISC_R_SUCCESS);
2845 }
2846 
2847 isc_result_t
dns_dispatch_getudp(dns_dispatchmgr_t * mgr,isc_socketmgr_t * sockmgr,isc_taskmgr_t * taskmgr,isc_sockaddr_t * localaddr,unsigned int buffersize,unsigned int maxbuffers,unsigned int maxrequests,unsigned int buckets,unsigned int increment,unsigned int attributes,unsigned int mask,dns_dispatch_t ** dispp)2848 dns_dispatch_getudp(dns_dispatchmgr_t *mgr, isc_socketmgr_t *sockmgr,
2849 		    isc_taskmgr_t *taskmgr, isc_sockaddr_t *localaddr,
2850 		    unsigned int buffersize,
2851 		    unsigned int maxbuffers, unsigned int maxrequests,
2852 		    unsigned int buckets, unsigned int increment,
2853 		    unsigned int attributes, unsigned int mask,
2854 		    dns_dispatch_t **dispp)
2855 {
2856 	return (dns_dispatch_getudp_dup(mgr, sockmgr, taskmgr, localaddr,
2857 					buffersize, maxbuffers, maxrequests,
2858 					buckets, increment, attributes,
2859 					mask, dispp, NULL));
2860 }
2861 
2862 /*
2863  * mgr should be locked.
2864  */
2865 
2866 #ifndef DNS_DISPATCH_HELD
2867 #define DNS_DISPATCH_HELD 20U
2868 #endif
2869 
2870 static isc_result_t
get_udpsocket(dns_dispatchmgr_t * mgr,dns_dispatch_t * disp,isc_socketmgr_t * sockmgr,isc_sockaddr_t * localaddr,isc_socket_t ** sockp,isc_socket_t * dup_socket)2871 get_udpsocket(dns_dispatchmgr_t *mgr, dns_dispatch_t *disp,
2872 	      isc_socketmgr_t *sockmgr, isc_sockaddr_t *localaddr,
2873 	      isc_socket_t **sockp, isc_socket_t *dup_socket)
2874 {
2875 	unsigned int i, j;
2876 	isc_socket_t *held[DNS_DISPATCH_HELD];
2877 	isc_sockaddr_t localaddr_bound;
2878 	isc_socket_t *sock = NULL;
2879 	isc_result_t result = ISC_R_SUCCESS;
2880 	isc_boolean_t anyport;
2881 
2882 	INSIST(sockp != NULL && *sockp == NULL);
2883 
2884 	localaddr_bound = *localaddr;
2885 	anyport = ISC_TF(isc_sockaddr_getport(localaddr) == 0);
2886 
2887 	if (anyport) {
2888 		unsigned int nports;
2889 		in_port_t *ports;
2890 
2891 		/*
2892 		 * If no port is specified, we first try to pick up a random
2893 		 * port by ourselves.
2894 		 */
2895 		if (isc_sockaddr_pf(localaddr) == AF_INET) {
2896 			nports = disp->mgr->nv4ports;
2897 			ports = disp->mgr->v4ports;
2898 		} else {
2899 			nports = disp->mgr->nv6ports;
2900 			ports = disp->mgr->v6ports;
2901 		}
2902 		if (nports == 0)
2903 			return (ISC_R_ADDRNOTAVAIL);
2904 
2905 		for (i = 0; i < 1024; i++) {
2906 			in_port_t prt;
2907 
2908 			prt = ports[dispatch_uniformrandom(
2909 					DISP_ARC4CTX(disp),
2910 					nports)];
2911 			isc_sockaddr_setport(&localaddr_bound, prt);
2912 			result = open_socket(sockmgr, &localaddr_bound,
2913 					     0, &sock, NULL);
2914 			/*
2915 			 * Continue if the port choosen is already in use
2916 			 * or the OS has reserved it.
2917 			 */
2918 			if (result == ISC_R_NOPERM ||
2919 			    result == ISC_R_ADDRINUSE)
2920 				continue;
2921 			disp->localport = prt;
2922 			*sockp = sock;
2923 			return (result);
2924 		}
2925 
2926 		/*
2927 		 * If this fails 1024 times, we then ask the kernel for
2928 		 * choosing one.
2929 		 */
2930 	} else {
2931 		/* Allow to reuse address for non-random ports. */
2932 		result = open_socket(sockmgr, localaddr,
2933 				     ISC_SOCKET_REUSEADDRESS, &sock,
2934 				     dup_socket);
2935 
2936 		if (result == ISC_R_SUCCESS)
2937 			*sockp = sock;
2938 
2939 		return (result);
2940 	}
2941 
2942 	memset(held, 0, sizeof(held));
2943 	i = 0;
2944 
2945 	for (j = 0; j < 0xffffU; j++) {
2946 		result = open_socket(sockmgr, localaddr, 0, &sock, NULL);
2947 		if (result != ISC_R_SUCCESS)
2948 			goto end;
2949 		else if (portavailable(mgr, sock, NULL))
2950 			break;
2951 		if (held[i] != NULL)
2952 			isc_socket_detach(&held[i]);
2953 		held[i++] = sock;
2954 		sock = NULL;
2955 		if (i == DNS_DISPATCH_HELD)
2956 			i = 0;
2957 	}
2958 	if (j == 0xffffU) {
2959 		mgr_log(mgr, ISC_LOG_ERROR,
2960 			"avoid-v%s-udp-ports: unable to allocate "
2961 			"an available port",
2962 			isc_sockaddr_pf(localaddr) == AF_INET ? "4" : "6");
2963 		result = ISC_R_FAILURE;
2964 		goto end;
2965 	}
2966 	*sockp = sock;
2967 
2968 end:
2969 	for (i = 0; i < DNS_DISPATCH_HELD; i++) {
2970 		if (held[i] != NULL)
2971 			isc_socket_detach(&held[i]);
2972 	}
2973 
2974 	return (result);
2975 }
2976 
2977 static isc_result_t
dispatch_createudp(dns_dispatchmgr_t * mgr,isc_socketmgr_t * sockmgr,isc_taskmgr_t * taskmgr,isc_sockaddr_t * localaddr,unsigned int maxrequests,unsigned int attributes,dns_dispatch_t ** dispp,isc_socket_t * dup_socket)2978 dispatch_createudp(dns_dispatchmgr_t *mgr, isc_socketmgr_t *sockmgr,
2979 		   isc_taskmgr_t *taskmgr,
2980 		   isc_sockaddr_t *localaddr,
2981 		   unsigned int maxrequests,
2982 		   unsigned int attributes,
2983 		   dns_dispatch_t **dispp,
2984 		   isc_socket_t *dup_socket)
2985 {
2986 	isc_result_t result;
2987 	dns_dispatch_t *disp;
2988 	isc_socket_t *sock = NULL;
2989 	int i = 0;
2990 
2991 	/*
2992 	 * dispatch_allocate() checks mgr for us.
2993 	 */
2994 	disp = NULL;
2995 	result = dispatch_allocate(mgr, maxrequests, &disp);
2996 	if (result != ISC_R_SUCCESS)
2997 		return (result);
2998 
2999 	disp->socktype = isc_sockettype_udp;
3000 
3001 	if ((attributes & DNS_DISPATCHATTR_EXCLUSIVE) == 0) {
3002 		result = get_udpsocket(mgr, disp, sockmgr, localaddr, &sock,
3003 				       dup_socket);
3004 		if (result != ISC_R_SUCCESS)
3005 			goto deallocate_dispatch;
3006 
3007 		if (isc_log_wouldlog(dns_lctx, 90)) {
3008 			char addrbuf[ISC_SOCKADDR_FORMATSIZE];
3009 
3010 			isc_sockaddr_format(localaddr, addrbuf,
3011 					    ISC_SOCKADDR_FORMATSIZE);
3012 			mgr_log(mgr, LVL(90), "dns_dispatch_createudp: Created"
3013 				" UDP dispatch for %s with socket fd %d\n",
3014 				addrbuf, isc_socket_getfd(sock));
3015 		}
3016 
3017 	} else {
3018 		isc_sockaddr_t sa_any;
3019 
3020 		/*
3021 		 * For dispatches using exclusive sockets with a specific
3022 		 * source address, we only check if the specified address is
3023 		 * available on the system.  Query sockets will be created later
3024 		 * on demand.
3025 		 */
3026 		isc_sockaddr_anyofpf(&sa_any, isc_sockaddr_pf(localaddr));
3027 		if (!isc_sockaddr_eqaddr(&sa_any, localaddr)) {
3028 			result = open_socket(sockmgr, localaddr, 0, &sock, NULL);
3029 			if (sock != NULL)
3030 				isc_socket_detach(&sock);
3031 			if (result != ISC_R_SUCCESS)
3032 				goto deallocate_dispatch;
3033 		}
3034 
3035 		disp->port_table = isc_mem_get(mgr->mctx,
3036 					       sizeof(disp->port_table[0]) *
3037 					       DNS_DISPATCH_PORTTABLESIZE);
3038 		if (disp->port_table == NULL)
3039 			goto deallocate_dispatch;
3040 		for (i = 0; i < DNS_DISPATCH_PORTTABLESIZE; i++)
3041 			ISC_LIST_INIT(disp->port_table[i]);
3042 
3043 		result = isc_mempool_create(mgr->mctx, sizeof(dispportentry_t),
3044 					    &disp->portpool);
3045 		if (result != ISC_R_SUCCESS)
3046 			goto deallocate_dispatch;
3047 		isc_mempool_setname(disp->portpool, "disp_portpool");
3048 		isc_mempool_setfreemax(disp->portpool, 128);
3049 	}
3050 	disp->socket = sock;
3051 	disp->local = *localaddr;
3052 
3053 	if ((attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0)
3054 		disp->ntasks = MAX_INTERNAL_TASKS;
3055 	else
3056 		disp->ntasks = 1;
3057 	for (i = 0; i < disp->ntasks; i++) {
3058 		disp->task[i] = NULL;
3059 		result = isc_task_create(taskmgr, 0, &disp->task[i]);
3060 		if (result != ISC_R_SUCCESS) {
3061 			while (--i >= 0) {
3062 				isc_task_shutdown(disp->task[i]);
3063 				isc_task_detach(&disp->task[i]);
3064 			}
3065 			goto kill_socket;
3066 		}
3067 		isc_task_setname(disp->task[i], "udpdispatch", disp);
3068 	}
3069 
3070 	disp->ctlevent = isc_event_allocate(mgr->mctx, disp,
3071 					    DNS_EVENT_DISPATCHCONTROL,
3072 					    destroy_disp, disp,
3073 					    sizeof(isc_event_t));
3074 	if (disp->ctlevent == NULL) {
3075 		result = ISC_R_NOMEMORY;
3076 		goto kill_task;
3077 	}
3078 
3079 	disp->sepool = NULL;
3080 	if (isc_mempool_create(mgr->mctx, sizeof(isc_socketevent_t),
3081 			       &disp->sepool) != ISC_R_SUCCESS)
3082 	{
3083 		result = ISC_R_NOMEMORY;
3084 		goto kill_ctlevent;
3085 	}
3086 
3087 	result = isc_mutex_init(&disp->sepool_lock);
3088 	if (result != ISC_R_SUCCESS)
3089 		goto kill_sepool;
3090 
3091 	isc_mempool_setname(disp->sepool, "disp_sepool");
3092 	isc_mempool_setmaxalloc(disp->sepool, 32768);
3093 	isc_mempool_setfreemax(disp->sepool, 32768);
3094 	isc_mempool_associatelock(disp->sepool, &disp->sepool_lock);
3095 	isc_mempool_setfillcount(disp->sepool, 16);
3096 
3097 	attributes &= ~DNS_DISPATCHATTR_TCP;
3098 	attributes |= DNS_DISPATCHATTR_UDP;
3099 	disp->attributes = attributes;
3100 
3101 	/*
3102 	 * Append it to the dispatcher list.
3103 	 */
3104 	ISC_LIST_APPEND(mgr->list, disp, link);
3105 
3106 	mgr_log(mgr, LVL(90), "created UDP dispatcher %p", disp);
3107 	dispatch_log(disp, LVL(90), "created task %p", disp->task[0]); /* XXX */
3108 	if (disp->socket != NULL)
3109 		dispatch_log(disp, LVL(90), "created socket %p", disp->socket);
3110 
3111 	*dispp = disp;
3112 
3113 	return (result);
3114 
3115 	/*
3116 	 * Error returns.
3117 	 */
3118  kill_sepool:
3119 	isc_mempool_destroy(&disp->sepool);
3120  kill_ctlevent:
3121 	isc_event_free(&disp->ctlevent);
3122  kill_task:
3123 	for (i = 0; i < disp->ntasks; i++)
3124 		isc_task_detach(&disp->task[i]);
3125  kill_socket:
3126 	if (disp->socket != NULL)
3127 		isc_socket_detach(&disp->socket);
3128  deallocate_dispatch:
3129 	dispatch_free(&disp);
3130 
3131 	return (result);
3132 }
3133 
3134 void
dns_dispatch_attach(dns_dispatch_t * disp,dns_dispatch_t ** dispp)3135 dns_dispatch_attach(dns_dispatch_t *disp, dns_dispatch_t **dispp) {
3136 	REQUIRE(VALID_DISPATCH(disp));
3137 	REQUIRE(dispp != NULL && *dispp == NULL);
3138 
3139 	LOCK(&disp->lock);
3140 	disp->refcount++;
3141 	UNLOCK(&disp->lock);
3142 
3143 	*dispp = disp;
3144 }
3145 
3146 /*
3147  * It is important to lock the manager while we are deleting the dispatch,
3148  * since dns_dispatch_getudp will call dispatch_find, which returns to
3149  * the caller a dispatch but does not attach to it until later.  _getudp
3150  * locks the manager, however, so locking it here will keep us from attaching
3151  * to a dispatcher that is in the process of going away.
3152  */
3153 void
dns_dispatch_detach(dns_dispatch_t ** dispp)3154 dns_dispatch_detach(dns_dispatch_t **dispp) {
3155 	dns_dispatch_t *disp;
3156 	dispsocket_t *dispsock;
3157 	isc_boolean_t killit;
3158 
3159 	REQUIRE(dispp != NULL && VALID_DISPATCH(*dispp));
3160 
3161 	disp = *dispp;
3162 	*dispp = NULL;
3163 
3164 	LOCK(&disp->lock);
3165 
3166 	INSIST(disp->refcount > 0);
3167 	disp->refcount--;
3168 	if (disp->refcount == 0) {
3169 		if (disp->recv_pending > 0)
3170 			isc_socket_cancel(disp->socket, disp->task[0],
3171 					  ISC_SOCKCANCEL_RECV);
3172 		for (dispsock = ISC_LIST_HEAD(disp->activesockets);
3173 		     dispsock != NULL;
3174 		     dispsock = ISC_LIST_NEXT(dispsock, link)) {
3175 			isc_socket_cancel(dispsock->socket, dispsock->task,
3176 					  ISC_SOCKCANCEL_RECV);
3177 		}
3178 		disp->shutting_down = 1;
3179 	}
3180 
3181 	dispatch_log(disp, LVL(90), "detach: refcount %d", disp->refcount);
3182 
3183 	killit = destroy_disp_ok(disp);
3184 	UNLOCK(&disp->lock);
3185 	if (killit)
3186 		isc_task_send(disp->task[0], &disp->ctlevent);
3187 }
3188 
3189 isc_result_t
dns_dispatch_addresponse2(dns_dispatch_t * disp,isc_sockaddr_t * dest,isc_task_t * task,isc_taskaction_t action,void * arg,dns_messageid_t * idp,dns_dispentry_t ** resp,isc_socketmgr_t * sockmgr)3190 dns_dispatch_addresponse2(dns_dispatch_t *disp, isc_sockaddr_t *dest,
3191 			  isc_task_t *task, isc_taskaction_t action, void *arg,
3192 			  dns_messageid_t *idp, dns_dispentry_t **resp,
3193 			  isc_socketmgr_t *sockmgr)
3194 {
3195 	return (dns_dispatch_addresponse3(disp, 0, dest, task, action, arg,
3196 					  idp, resp, sockmgr));
3197 }
3198 
3199 isc_result_t
dns_dispatch_addresponse3(dns_dispatch_t * disp,unsigned int options,isc_sockaddr_t * dest,isc_task_t * task,isc_taskaction_t action,void * arg,dns_messageid_t * idp,dns_dispentry_t ** resp,isc_socketmgr_t * sockmgr)3200 dns_dispatch_addresponse3(dns_dispatch_t *disp, unsigned int options,
3201 			  isc_sockaddr_t *dest, isc_task_t *task,
3202 			  isc_taskaction_t action, void *arg,
3203 			  dns_messageid_t *idp, dns_dispentry_t **resp,
3204 			  isc_socketmgr_t *sockmgr)
3205 {
3206 	dns_dispentry_t *res;
3207 	unsigned int bucket;
3208 	in_port_t localport = 0;
3209 	dns_messageid_t id;
3210 	int i;
3211 	isc_boolean_t ok;
3212 	dns_qid_t *qid;
3213 	dispsocket_t *dispsocket = NULL;
3214 	isc_result_t result;
3215 
3216 	REQUIRE(VALID_DISPATCH(disp));
3217 	REQUIRE(task != NULL);
3218 	REQUIRE(dest != NULL);
3219 	REQUIRE(resp != NULL && *resp == NULL);
3220 	REQUIRE(idp != NULL);
3221 	if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0)
3222 		REQUIRE(sockmgr != NULL);
3223 
3224 	LOCK(&disp->lock);
3225 
3226 	if (disp->shutting_down == 1) {
3227 		UNLOCK(&disp->lock);
3228 		return (ISC_R_SHUTTINGDOWN);
3229 	}
3230 
3231 	if (disp->requests >= disp->maxrequests) {
3232 		UNLOCK(&disp->lock);
3233 		return (ISC_R_QUOTA);
3234 	}
3235 
3236 	if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0 &&
3237 	    disp->nsockets > DNS_DISPATCH_SOCKSQUOTA) {
3238 		dispsocket_t *oldestsocket;
3239 		dns_dispentry_t *oldestresp;
3240 		dns_dispatchevent_t *rev;
3241 
3242 		/*
3243 		 * Kill oldest outstanding query if the number of sockets
3244 		 * exceeds the quota to keep the room for new queries.
3245 		 */
3246 		oldestsocket = ISC_LIST_HEAD(disp->activesockets);
3247 		oldestresp = oldestsocket->resp;
3248 		if (oldestresp != NULL && !oldestresp->item_out) {
3249 			rev = allocate_devent(oldestresp->disp);
3250 			if (rev != NULL) {
3251 				rev->buffer.base = NULL;
3252 				rev->result = ISC_R_CANCELED;
3253 				rev->id = oldestresp->id;
3254 				ISC_EVENT_INIT(rev, sizeof(*rev), 0,
3255 					       NULL, DNS_EVENT_DISPATCH,
3256 					       oldestresp->action,
3257 					       oldestresp->arg, oldestresp,
3258 					       NULL, NULL);
3259 				oldestresp->item_out = ISC_TRUE;
3260 				isc_task_send(oldestresp->task,
3261 					      ISC_EVENT_PTR(&rev));
3262 				inc_stats(disp->mgr,
3263 					  dns_resstatscounter_dispabort);
3264 			}
3265 		}
3266 
3267 		/*
3268 		 * Move this entry to the tail so that it won't (easily) be
3269 		 * examined before actually being canceled.
3270 		 */
3271 		ISC_LIST_UNLINK(disp->activesockets, oldestsocket, link);
3272 		ISC_LIST_APPEND(disp->activesockets, oldestsocket, link);
3273 	}
3274 
3275 	qid = DNS_QID(disp);
3276 
3277 	if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0) {
3278 		/*
3279 		 * Get a separate UDP socket with a random port number.
3280 		 */
3281 		result = get_dispsocket(disp, dest, sockmgr, &dispsocket,
3282 					&localport);
3283 		if (result != ISC_R_SUCCESS) {
3284 			UNLOCK(&disp->lock);
3285 			inc_stats(disp->mgr, dns_resstatscounter_dispsockfail);
3286 			return (result);
3287 		}
3288 	} else {
3289 		localport = disp->localport;
3290 	}
3291 
3292 	/*
3293 	 * Try somewhat hard to find an unique ID unless FIXEDID is set
3294 	 * in which case we use the id passed in via *idp.
3295 	 */
3296 	LOCK(&qid->lock);
3297 	if ((options & DNS_DISPATCHOPT_FIXEDID) != 0)
3298 		id = *idp;
3299 	else
3300 		id = (dns_messageid_t)dispatch_random(DISP_ARC4CTX(disp));
3301 	ok = ISC_FALSE;
3302 	i = 0;
3303 	do {
3304 		bucket = dns_hash(qid, dest, id, localport);
3305 		if (entry_search(qid, dest, id, localport, bucket) == NULL) {
3306 			ok = ISC_TRUE;
3307 			break;
3308 		}
3309 		if ((disp->attributes & DNS_DISPATCHATTR_FIXEDID) != 0)
3310 			break;
3311 		id += qid->qid_increment;
3312 		id &= 0x0000ffff;
3313 	} while (i++ < 64);
3314 	UNLOCK(&qid->lock);
3315 
3316 	if (!ok) {
3317 		UNLOCK(&disp->lock);
3318 		return (ISC_R_NOMORE);
3319 	}
3320 
3321 	res = isc_mempool_get(disp->mgr->rpool);
3322 	if (res == NULL) {
3323 		if (dispsocket != NULL)
3324 			destroy_dispsocket(disp, &dispsocket);
3325 		UNLOCK(&disp->lock);
3326 		return (ISC_R_NOMEMORY);
3327 	}
3328 
3329 	disp->refcount++;
3330 	disp->requests++;
3331 	res->task = NULL;
3332 	isc_task_attach(task, &res->task);
3333 	res->disp = disp;
3334 	res->id = id;
3335 	res->port = localport;
3336 	res->bucket = bucket;
3337 	res->host = *dest;
3338 	res->action = action;
3339 	res->arg = arg;
3340 	res->dispsocket = dispsocket;
3341 	if (dispsocket != NULL)
3342 		dispsocket->resp = res;
3343 	res->item_out = ISC_FALSE;
3344 	ISC_LIST_INIT(res->items);
3345 	ISC_LINK_INIT(res, link);
3346 	res->magic = RESPONSE_MAGIC;
3347 
3348 	LOCK(&qid->lock);
3349 	ISC_LIST_APPEND(qid->qid_table[bucket], res, link);
3350 	UNLOCK(&qid->lock);
3351 
3352 	inc_stats(disp->mgr, (qid == disp->mgr->qid) ?
3353 			     dns_resstatscounter_disprequdp :
3354 			     dns_resstatscounter_dispreqtcp);
3355 
3356 	request_log(disp, res, LVL(90),
3357 		    "attached to task %p", res->task);
3358 
3359 	if (((disp->attributes & DNS_DISPATCHATTR_UDP) != 0) ||
3360 	    ((disp->attributes & DNS_DISPATCHATTR_CONNECTED) != 0)) {
3361 		result = startrecv(disp, dispsocket);
3362 		if (result != ISC_R_SUCCESS) {
3363 			LOCK(&qid->lock);
3364 			ISC_LIST_UNLINK(qid->qid_table[bucket], res, link);
3365 			UNLOCK(&qid->lock);
3366 
3367 			if (dispsocket != NULL)
3368 				destroy_dispsocket(disp, &dispsocket);
3369 
3370 			disp->refcount--;
3371 			disp->requests--;
3372 
3373 			dec_stats(disp->mgr, (qid == disp->mgr->qid) ?
3374 					     dns_resstatscounter_disprequdp :
3375 					     dns_resstatscounter_dispreqtcp);
3376 
3377 			UNLOCK(&disp->lock);
3378 			isc_task_detach(&res->task);
3379 			isc_mempool_put(disp->mgr->rpool, res);
3380 			return (result);
3381 		}
3382 	}
3383 
3384 	if (dispsocket != NULL)
3385 		ISC_LIST_APPEND(disp->activesockets, dispsocket, link);
3386 
3387 	UNLOCK(&disp->lock);
3388 
3389 	*idp = id;
3390 	*resp = res;
3391 
3392 	if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0)
3393 		INSIST(res->dispsocket != NULL);
3394 
3395 	return (ISC_R_SUCCESS);
3396 }
3397 
3398 isc_result_t
dns_dispatch_addresponse(dns_dispatch_t * disp,isc_sockaddr_t * dest,isc_task_t * task,isc_taskaction_t action,void * arg,dns_messageid_t * idp,dns_dispentry_t ** resp)3399 dns_dispatch_addresponse(dns_dispatch_t *disp, isc_sockaddr_t *dest,
3400 			 isc_task_t *task, isc_taskaction_t action, void *arg,
3401 			 dns_messageid_t *idp, dns_dispentry_t **resp)
3402 {
3403 	REQUIRE(VALID_DISPATCH(disp));
3404 	REQUIRE((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) == 0);
3405 
3406 	return (dns_dispatch_addresponse3(disp, 0, dest, task, action, arg,
3407 					  idp, resp, NULL));
3408 }
3409 
3410 void
dns_dispatch_starttcp(dns_dispatch_t * disp)3411 dns_dispatch_starttcp(dns_dispatch_t *disp) {
3412 
3413 	REQUIRE(VALID_DISPATCH(disp));
3414 
3415 	dispatch_log(disp, LVL(90), "starttcp %p", disp->task[0]);
3416 
3417 	LOCK(&disp->lock);
3418 	disp->attributes |= DNS_DISPATCHATTR_CONNECTED;
3419 	(void)startrecv(disp, NULL);
3420 	UNLOCK(&disp->lock);
3421 }
3422 
3423 void
dns_dispatch_removeresponse(dns_dispentry_t ** resp,dns_dispatchevent_t ** sockevent)3424 dns_dispatch_removeresponse(dns_dispentry_t **resp,
3425 			    dns_dispatchevent_t **sockevent)
3426 {
3427 	dns_dispatchmgr_t *mgr;
3428 	dns_dispatch_t *disp;
3429 	dns_dispentry_t *res;
3430 	dispsocket_t *dispsock;
3431 	dns_dispatchevent_t *ev;
3432 	unsigned int bucket;
3433 	isc_boolean_t killit;
3434 	unsigned int n;
3435 	isc_eventlist_t events;
3436 	dns_qid_t *qid;
3437 
3438 	REQUIRE(resp != NULL);
3439 	REQUIRE(VALID_RESPONSE(*resp));
3440 
3441 	res = *resp;
3442 	*resp = NULL;
3443 
3444 	disp = res->disp;
3445 	REQUIRE(VALID_DISPATCH(disp));
3446 	mgr = disp->mgr;
3447 	REQUIRE(VALID_DISPATCHMGR(mgr));
3448 
3449 	qid = DNS_QID(disp);
3450 
3451 	if (sockevent != NULL) {
3452 		REQUIRE(*sockevent != NULL);
3453 		ev = *sockevent;
3454 		*sockevent = NULL;
3455 	} else {
3456 		ev = NULL;
3457 	}
3458 
3459 	LOCK(&disp->lock);
3460 
3461 	INSIST(disp->requests > 0);
3462 	disp->requests--;
3463 	dec_stats(disp->mgr, (qid == disp->mgr->qid) ?
3464 			     dns_resstatscounter_disprequdp :
3465 			     dns_resstatscounter_dispreqtcp);
3466 	INSIST(disp->refcount > 0);
3467 	disp->refcount--;
3468 	if (disp->refcount == 0) {
3469 		if (disp->recv_pending > 0)
3470 			isc_socket_cancel(disp->socket, disp->task[0],
3471 					  ISC_SOCKCANCEL_RECV);
3472 		for (dispsock = ISC_LIST_HEAD(disp->activesockets);
3473 		     dispsock != NULL;
3474 		     dispsock = ISC_LIST_NEXT(dispsock, link)) {
3475 			isc_socket_cancel(dispsock->socket, dispsock->task,
3476 					  ISC_SOCKCANCEL_RECV);
3477 		}
3478 		disp->shutting_down = 1;
3479 	}
3480 
3481 	bucket = res->bucket;
3482 
3483 	LOCK(&qid->lock);
3484 	ISC_LIST_UNLINK(qid->qid_table[bucket], res, link);
3485 	UNLOCK(&qid->lock);
3486 
3487 	if (ev == NULL && res->item_out) {
3488 		/*
3489 		 * We've posted our event, but the caller hasn't gotten it
3490 		 * yet.  Take it back.
3491 		 */
3492 		ISC_LIST_INIT(events);
3493 		n = isc_task_unsend(res->task, res, DNS_EVENT_DISPATCH,
3494 				    NULL, &events);
3495 		/*
3496 		 * We had better have gotten it back.
3497 		 */
3498 		INSIST(n == 1);
3499 		ev = (dns_dispatchevent_t *)ISC_LIST_HEAD(events);
3500 	}
3501 
3502 	if (ev != NULL) {
3503 		REQUIRE(res->item_out == ISC_TRUE);
3504 		res->item_out = ISC_FALSE;
3505 		if (ev->buffer.base != NULL)
3506 			free_buffer(disp, ev->buffer.base, ev->buffer.length);
3507 		free_devent(disp, ev);
3508 	}
3509 
3510 	request_log(disp, res, LVL(90), "detaching from task %p", res->task);
3511 	isc_task_detach(&res->task);
3512 
3513 	if (res->dispsocket != NULL) {
3514 		isc_socket_cancel(res->dispsocket->socket,
3515 				  res->dispsocket->task, ISC_SOCKCANCEL_RECV);
3516 		res->dispsocket->resp = NULL;
3517 	}
3518 
3519 	/*
3520 	 * Free any buffered requests as well
3521 	 */
3522 	ev = ISC_LIST_HEAD(res->items);
3523 	while (ev != NULL) {
3524 		ISC_LIST_UNLINK(res->items, ev, ev_link);
3525 		if (ev->buffer.base != NULL)
3526 			free_buffer(disp, ev->buffer.base, ev->buffer.length);
3527 		free_devent(disp, ev);
3528 		ev = ISC_LIST_HEAD(res->items);
3529 	}
3530 	res->magic = 0;
3531 	isc_mempool_put(disp->mgr->rpool, res);
3532 	if (disp->shutting_down == 1)
3533 		do_cancel(disp);
3534 	else
3535 		(void)startrecv(disp, NULL);
3536 
3537 	killit = destroy_disp_ok(disp);
3538 	UNLOCK(&disp->lock);
3539 	if (killit)
3540 		isc_task_send(disp->task[0], &disp->ctlevent);
3541 }
3542 
3543 static void
do_cancel(dns_dispatch_t * disp)3544 do_cancel(dns_dispatch_t *disp) {
3545 	dns_dispatchevent_t *ev;
3546 	dns_dispentry_t *resp;
3547 	dns_qid_t *qid;
3548 
3549 	if (disp->shutdown_out == 1)
3550 		return;
3551 
3552 	qid = DNS_QID(disp);
3553 
3554 	/*
3555 	 * Search for the first response handler without packets outstanding
3556 	 * unless a specific hander is given.
3557 	 */
3558 	LOCK(&qid->lock);
3559 	for (resp = linear_first(qid);
3560 	     resp != NULL && resp->item_out;
3561 	     /* Empty. */)
3562 		resp = linear_next(qid, resp);
3563 
3564 	/*
3565 	 * No one to send the cancel event to, so nothing to do.
3566 	 */
3567 	if (resp == NULL)
3568 		goto unlock;
3569 
3570 	/*
3571 	 * Send the shutdown failsafe event to this resp.
3572 	 */
3573 	ev = disp->failsafe_ev;
3574 	ISC_EVENT_INIT(ev, sizeof(*ev), 0, NULL, DNS_EVENT_DISPATCH,
3575 		       resp->action, resp->arg, resp, NULL, NULL);
3576 	ev->result = disp->shutdown_why;
3577 	ev->buffer.base = NULL;
3578 	ev->buffer.length = 0;
3579 	disp->shutdown_out = 1;
3580 	request_log(disp, resp, LVL(10),
3581 		    "cancel: failsafe event %p -> task %p",
3582 		    ev, resp->task);
3583 	resp->item_out = ISC_TRUE;
3584 	isc_task_send(resp->task, ISC_EVENT_PTR(&ev));
3585  unlock:
3586 	UNLOCK(&qid->lock);
3587 }
3588 
3589 isc_socket_t *
dns_dispatch_getsocket(dns_dispatch_t * disp)3590 dns_dispatch_getsocket(dns_dispatch_t *disp) {
3591 	REQUIRE(VALID_DISPATCH(disp));
3592 
3593 	return (disp->socket);
3594 }
3595 
3596 isc_socket_t *
dns_dispatch_getentrysocket(dns_dispentry_t * resp)3597 dns_dispatch_getentrysocket(dns_dispentry_t *resp) {
3598 	REQUIRE(VALID_RESPONSE(resp));
3599 
3600 	if (resp->dispsocket != NULL)
3601 		return (resp->dispsocket->socket);
3602 	else
3603 		return (NULL);
3604 }
3605 
3606 isc_result_t
dns_dispatch_getlocaladdress(dns_dispatch_t * disp,isc_sockaddr_t * addrp)3607 dns_dispatch_getlocaladdress(dns_dispatch_t *disp, isc_sockaddr_t *addrp) {
3608 
3609 	REQUIRE(VALID_DISPATCH(disp));
3610 	REQUIRE(addrp != NULL);
3611 
3612 	if (disp->socktype == isc_sockettype_udp) {
3613 		*addrp = disp->local;
3614 		return (ISC_R_SUCCESS);
3615 	}
3616 	return (ISC_R_NOTIMPLEMENTED);
3617 }
3618 
3619 void
dns_dispatch_cancel(dns_dispatch_t * disp)3620 dns_dispatch_cancel(dns_dispatch_t *disp) {
3621 	REQUIRE(VALID_DISPATCH(disp));
3622 
3623 	LOCK(&disp->lock);
3624 
3625 	if (disp->shutting_down == 1) {
3626 		UNLOCK(&disp->lock);
3627 		return;
3628 	}
3629 
3630 	disp->shutdown_why = ISC_R_CANCELED;
3631 	disp->shutting_down = 1;
3632 	do_cancel(disp);
3633 
3634 	UNLOCK(&disp->lock);
3635 
3636 	return;
3637 }
3638 
3639 unsigned int
dns_dispatch_getattributes(dns_dispatch_t * disp)3640 dns_dispatch_getattributes(dns_dispatch_t *disp) {
3641 	REQUIRE(VALID_DISPATCH(disp));
3642 
3643 	/*
3644 	 * We don't bother locking disp here; it's the caller's responsibility
3645 	 * to use only non volatile flags.
3646 	 */
3647 	return (disp->attributes);
3648 }
3649 
3650 void
dns_dispatch_changeattributes(dns_dispatch_t * disp,unsigned int attributes,unsigned int mask)3651 dns_dispatch_changeattributes(dns_dispatch_t *disp,
3652 			      unsigned int attributes, unsigned int mask)
3653 {
3654 	REQUIRE(VALID_DISPATCH(disp));
3655 	/* Exclusive attribute can only be set on creation */
3656 	REQUIRE((attributes & DNS_DISPATCHATTR_EXCLUSIVE) == 0);
3657 	/* Also, a dispatch with randomport specified cannot start listening */
3658 	REQUIRE((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) == 0 ||
3659 		(attributes & DNS_DISPATCHATTR_NOLISTEN) == 0);
3660 
3661 	/* XXXMLG
3662 	 * Should check for valid attributes here!
3663 	 */
3664 
3665 	LOCK(&disp->lock);
3666 
3667 	if ((mask & DNS_DISPATCHATTR_NOLISTEN) != 0) {
3668 		if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) != 0 &&
3669 		    (attributes & DNS_DISPATCHATTR_NOLISTEN) == 0) {
3670 			disp->attributes &= ~DNS_DISPATCHATTR_NOLISTEN;
3671 			(void)startrecv(disp, NULL);
3672 		} else if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN)
3673 			   == 0 &&
3674 			   (attributes & DNS_DISPATCHATTR_NOLISTEN) != 0) {
3675 			disp->attributes |= DNS_DISPATCHATTR_NOLISTEN;
3676 			if (disp->recv_pending != 0)
3677 				isc_socket_cancel(disp->socket, disp->task[0],
3678 						  ISC_SOCKCANCEL_RECV);
3679 		}
3680 	}
3681 
3682 	disp->attributes &= ~mask;
3683 	disp->attributes |= (attributes & mask);
3684 	UNLOCK(&disp->lock);
3685 }
3686 
3687 void
dns_dispatch_importrecv(dns_dispatch_t * disp,isc_event_t * event)3688 dns_dispatch_importrecv(dns_dispatch_t *disp, isc_event_t *event) {
3689 	void *buf;
3690 	isc_socketevent_t *sevent, *newsevent;
3691 
3692 	REQUIRE(VALID_DISPATCH(disp));
3693 	REQUIRE((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) != 0);
3694 	REQUIRE(event != NULL);
3695 
3696 	sevent = (isc_socketevent_t *)event;
3697 
3698 	INSIST(sevent->n <= disp->mgr->buffersize);
3699 	newsevent = (isc_socketevent_t *)
3700 		    isc_event_allocate(disp->mgr->mctx, NULL,
3701 				      DNS_EVENT_IMPORTRECVDONE, udp_shrecv,
3702 				      disp, sizeof(isc_socketevent_t));
3703 	if (newsevent == NULL)
3704 		return;
3705 
3706 	buf = allocate_udp_buffer(disp);
3707 	if (buf == NULL) {
3708 		isc_event_free(ISC_EVENT_PTR(&newsevent));
3709 		return;
3710 	}
3711 	memmove(buf, sevent->region.base, sevent->n);
3712 	newsevent->region.base = buf;
3713 	newsevent->region.length = disp->mgr->buffersize;
3714 	newsevent->n = sevent->n;
3715 	newsevent->result = sevent->result;
3716 	newsevent->address = sevent->address;
3717 	newsevent->timestamp = sevent->timestamp;
3718 	newsevent->pktinfo = sevent->pktinfo;
3719 	newsevent->attributes = sevent->attributes;
3720 
3721 	isc_task_send(disp->task[0], ISC_EVENT_PTR(&newsevent));
3722 }
3723 
3724 dns_dispatch_t *
dns_dispatchset_get(dns_dispatchset_t * dset)3725 dns_dispatchset_get(dns_dispatchset_t *dset) {
3726 	dns_dispatch_t *disp;
3727 
3728 	/* check that dispatch set is configured */
3729 	if (dset == NULL || dset->ndisp == 0)
3730 		return (NULL);
3731 
3732 	LOCK(&dset->lock);
3733 	disp = dset->dispatches[dset->cur];
3734 	dset->cur++;
3735 	if (dset->cur == dset->ndisp)
3736 		dset->cur = 0;
3737 	UNLOCK(&dset->lock);
3738 
3739 	return (disp);
3740 }
3741 
3742 isc_result_t
dns_dispatchset_create(isc_mem_t * mctx,isc_socketmgr_t * sockmgr,isc_taskmgr_t * taskmgr,dns_dispatch_t * source,dns_dispatchset_t ** dsetp,int n)3743 dns_dispatchset_create(isc_mem_t *mctx, isc_socketmgr_t *sockmgr,
3744 		       isc_taskmgr_t *taskmgr, dns_dispatch_t *source,
3745 		       dns_dispatchset_t **dsetp, int n)
3746 {
3747 	isc_result_t result;
3748 	dns_dispatchset_t *dset;
3749 	dns_dispatchmgr_t *mgr;
3750 	int i, j;
3751 
3752 	REQUIRE(VALID_DISPATCH(source));
3753 	REQUIRE((source->attributes & DNS_DISPATCHATTR_UDP) != 0);
3754 	REQUIRE(dsetp != NULL && *dsetp == NULL);
3755 
3756 	mgr = source->mgr;
3757 
3758 	dset = isc_mem_get(mctx, sizeof(dns_dispatchset_t));
3759 	if (dset == NULL)
3760 		return (ISC_R_NOMEMORY);
3761 	memset(dset, 0, sizeof(*dset));
3762 
3763 	result = isc_mutex_init(&dset->lock);
3764 	if (result != ISC_R_SUCCESS)
3765 		goto fail_alloc;
3766 
3767 	dset->dispatches = isc_mem_get(mctx, sizeof(dns_dispatch_t *) * n);
3768 	if (dset->dispatches == NULL) {
3769 		result = ISC_R_NOMEMORY;
3770 		goto fail_lock;
3771 	}
3772 
3773 	isc_mem_attach(mctx, &dset->mctx);
3774 	dset->ndisp = n;
3775 	dset->cur = 0;
3776 
3777 	dset->dispatches[0] = NULL;
3778 	dns_dispatch_attach(source, &dset->dispatches[0]);
3779 
3780 	LOCK(&mgr->lock);
3781 	for (i = 1; i < n; i++) {
3782 		dset->dispatches[i] = NULL;
3783 		result = dispatch_createudp(mgr, sockmgr, taskmgr,
3784 					    &source->local,
3785 					    source->maxrequests,
3786 					    source->attributes,
3787 					    &dset->dispatches[i],
3788 					    source->socket);
3789 		if (result != ISC_R_SUCCESS)
3790 			goto fail;
3791 	}
3792 
3793 	UNLOCK(&mgr->lock);
3794 	*dsetp = dset;
3795 
3796 	return (ISC_R_SUCCESS);
3797 
3798  fail:
3799 	UNLOCK(&mgr->lock);
3800 
3801 	for (j = 0; j < i; j++)
3802 		dns_dispatch_detach(&(dset->dispatches[j]));
3803 	isc_mem_put(mctx, dset->dispatches, sizeof(dns_dispatch_t *) * n);
3804 	if (dset->mctx == mctx)
3805 		isc_mem_detach(&dset->mctx);
3806 
3807  fail_lock:
3808 	DESTROYLOCK(&dset->lock);
3809 
3810  fail_alloc:
3811 	isc_mem_put(mctx, dset, sizeof(dns_dispatchset_t));
3812 	return (result);
3813 }
3814 
3815 void
dns_dispatchset_cancelall(dns_dispatchset_t * dset,isc_task_t * task)3816 dns_dispatchset_cancelall(dns_dispatchset_t *dset, isc_task_t *task) {
3817 	int i;
3818 
3819 	REQUIRE(dset != NULL);
3820 
3821 	for (i = 0; i < dset->ndisp; i++) {
3822 		isc_socket_t *sock;
3823 		sock = dns_dispatch_getsocket(dset->dispatches[i]);
3824 		isc_socket_cancel(sock, task, ISC_SOCKCANCEL_ALL);
3825 	}
3826 }
3827 
3828 void
dns_dispatchset_destroy(dns_dispatchset_t ** dsetp)3829 dns_dispatchset_destroy(dns_dispatchset_t **dsetp) {
3830 	dns_dispatchset_t *dset;
3831 	int i;
3832 
3833 	REQUIRE(dsetp != NULL && *dsetp != NULL);
3834 
3835 	dset = *dsetp;
3836 	for (i = 0; i < dset->ndisp; i++)
3837 		dns_dispatch_detach(&(dset->dispatches[i]));
3838 	isc_mem_put(dset->mctx, dset->dispatches,
3839 		    sizeof(dns_dispatch_t *) * dset->ndisp);
3840 	DESTROYLOCK(&dset->lock);
3841 	isc_mem_putanddetach(&dset->mctx, dset, sizeof(dns_dispatchset_t));
3842 
3843 	*dsetp = NULL;
3844 }
3845 
3846 void
dns_dispatch_setdscp(dns_dispatch_t * disp,isc_dscp_t dscp)3847 dns_dispatch_setdscp(dns_dispatch_t *disp, isc_dscp_t dscp) {
3848 	REQUIRE(VALID_DISPATCH(disp));
3849 	disp->dscp = dscp;
3850 }
3851 
3852 isc_dscp_t
dns_dispatch_getdscp(dns_dispatch_t * disp)3853 dns_dispatch_getdscp(dns_dispatch_t *disp) {
3854 	REQUIRE(VALID_DISPATCH(disp));
3855 	return (disp->dscp);
3856 }
3857 
3858 #if 0
3859 void
3860 dns_dispatchmgr_dump(dns_dispatchmgr_t *mgr) {
3861 	dns_dispatch_t *disp;
3862 	char foo[1024];
3863 
3864 	disp = ISC_LIST_HEAD(mgr->list);
3865 	while (disp != NULL) {
3866 		isc_sockaddr_format(&disp->local, foo, sizeof(foo));
3867 		printf("\tdispatch %p, addr %s\n", disp, foo);
3868 		disp = ISC_LIST_NEXT(disp, link);
3869 	}
3870 }
3871 #endif
3872