1 /*
2  * services/listen_dnsport.c - listen on port 53 for incoming DNS queries.
3  *
4  * Copyright (c) 2007, NLnet Labs. All rights reserved.
5  *
6  * This software is open source.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * Redistributions of source code must retain the above copyright notice,
13  * this list of conditions and the following disclaimer.
14  *
15  * Redistributions in binary form must reproduce the above copyright notice,
16  * this list of conditions and the following disclaimer in the documentation
17  * and/or other materials provided with the distribution.
18  *
19  * Neither the name of the NLNET LABS nor the names of its contributors may
20  * be used to endorse or promote products derived from this software without
21  * specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27  * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
29  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
30  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
31  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
32  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34  */
35 
36 /**
37  * \file
38  *
39  * This file has functions to get queries from clients.
40  */
41 #include "config.h"
42 #ifdef HAVE_SYS_TYPES_H
43 #  include <sys/types.h>
44 #endif
45 #include <sys/time.h>
46 #include "services/listen_dnsport.h"
47 #include "services/outside_network.h"
48 #include "util/netevent.h"
49 #include "util/log.h"
50 #include "util/config_file.h"
51 #include "util/net_help.h"
52 #include "sldns/sbuffer.h"
53 
54 #ifdef HAVE_NETDB_H
55 #include <netdb.h>
56 #endif
57 #include <fcntl.h>
58 
59 #ifdef HAVE_SYS_UN_H
60 #include <sys/un.h>
61 #endif
62 
63 /** number of queued TCP connections for listen() */
64 #define TCP_BACKLOG 256
65 
66 /**
67  * Debug print of the getaddrinfo returned address.
68  * @param addr: the address returned.
69  */
70 static void
71 verbose_print_addr(struct addrinfo *addr)
72 {
73 	if(verbosity >= VERB_ALGO) {
74 		char buf[100];
75 		void* sinaddr = &((struct sockaddr_in*)addr->ai_addr)->sin_addr;
76 #ifdef INET6
77 		if(addr->ai_family == AF_INET6)
78 			sinaddr = &((struct sockaddr_in6*)addr->ai_addr)->
79 				sin6_addr;
80 #endif /* INET6 */
81 		if(inet_ntop(addr->ai_family, sinaddr, buf,
82 			(socklen_t)sizeof(buf)) == 0) {
83 			(void)strlcpy(buf, "(null)", sizeof(buf));
84 		}
85 		buf[sizeof(buf)-1] = 0;
86 		verbose(VERB_ALGO, "creating %s%s socket %s %d",
87 			addr->ai_socktype==SOCK_DGRAM?"udp":
88 			addr->ai_socktype==SOCK_STREAM?"tcp":"otherproto",
89 			addr->ai_family==AF_INET?"4":
90 			addr->ai_family==AF_INET6?"6":
91 			"_otherfam", buf,
92 			ntohs(((struct sockaddr_in*)addr->ai_addr)->sin_port));
93 	}
94 }
95 
96 int
97 create_udp_sock(int family, int socktype, struct sockaddr* addr,
98         socklen_t addrlen, int v6only, int* inuse, int* noproto,
99 	int rcv, int snd, int listen, int* reuseport, int transparent)
100 {
101 	int s;
102 #if defined(SO_REUSEADDR) || defined(SO_REUSEPORT) || defined(IPV6_USE_MIN_MTU)  || defined(IP_TRANSPARENT)
103 	int on=1;
104 #endif
105 #ifdef IPV6_MTU
106 	int mtu = IPV6_MIN_MTU;
107 #endif
108 #if !defined(SO_RCVBUFFORCE) && !defined(SO_RCVBUF)
109 	(void)rcv;
110 #endif
111 #if !defined(SO_SNDBUFFORCE) && !defined(SO_SNDBUF)
112 	(void)snd;
113 #endif
114 #ifndef IPV6_V6ONLY
115 	(void)v6only;
116 #endif
117 #ifndef IP_TRANSPARENT
118 	(void)transparent;
119 #endif
120 	if((s = socket(family, socktype, 0)) == -1) {
121 		*inuse = 0;
122 #ifndef USE_WINSOCK
123 		if(errno == EAFNOSUPPORT || errno == EPROTONOSUPPORT) {
124 			*noproto = 1;
125 			return -1;
126 		}
127 		log_err("can't create socket: %s", strerror(errno));
128 #else
129 		if(WSAGetLastError() == WSAEAFNOSUPPORT ||
130 			WSAGetLastError() == WSAEPROTONOSUPPORT) {
131 			*noproto = 1;
132 			return -1;
133 		}
134 		log_err("can't create socket: %s",
135 			wsa_strerror(WSAGetLastError()));
136 #endif
137 		*noproto = 0;
138 		return -1;
139 	}
140 	if(listen) {
141 #ifdef SO_REUSEADDR
142 		if(setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (void*)&on,
143 			(socklen_t)sizeof(on)) < 0) {
144 #ifndef USE_WINSOCK
145 			log_err("setsockopt(.. SO_REUSEADDR ..) failed: %s",
146 				strerror(errno));
147 			if(errno != ENOSYS) {
148 				close(s);
149 				*noproto = 0;
150 				*inuse = 0;
151 				return -1;
152 			}
153 #else
154 			log_err("setsockopt(.. SO_REUSEADDR ..) failed: %s",
155 				wsa_strerror(WSAGetLastError()));
156 			closesocket(s);
157 			*noproto = 0;
158 			*inuse = 0;
159 			return -1;
160 #endif
161 		}
162 #endif /* SO_REUSEADDR */
163 #ifdef SO_REUSEPORT
164 		/* try to set SO_REUSEPORT so that incoming
165 		 * queries are distributed evenly among the receiving threads.
166 		 * Each thread must have its own socket bound to the same port,
167 		 * with SO_REUSEPORT set on each socket.
168 		 */
169 		if (reuseport && *reuseport &&
170 		    setsockopt(s, SOL_SOCKET, SO_REUSEPORT, (void*)&on,
171 			(socklen_t)sizeof(on)) < 0) {
172 #ifdef ENOPROTOOPT
173 			if(errno != ENOPROTOOPT || verbosity >= 3)
174 				log_warn("setsockopt(.. SO_REUSEPORT ..) failed: %s",
175 					strerror(errno));
176 #endif
177 			/* this option is not essential, we can continue */
178 			*reuseport = 0;
179 		}
180 #else
181 		(void)reuseport;
182 #endif /* defined(SO_REUSEPORT) */
183 #ifdef IP_TRANSPARENT
184 		if (transparent &&
185 		    setsockopt(s, IPPROTO_IP, IP_TRANSPARENT, (void*)&on,
186 		    (socklen_t)sizeof(on)) < 0) {
187 			log_warn("setsockopt(.. IP_TRANSPARENT ..) failed: %s",
188 			strerror(errno));
189 		}
190 #endif /* IP_TRANSPARENT */
191 	}
192 	if(rcv) {
193 #ifdef SO_RCVBUF
194 		int got;
195 		socklen_t slen = (socklen_t)sizeof(got);
196 #  ifdef SO_RCVBUFFORCE
197 		/* Linux specific: try to use root permission to override
198 		 * system limits on rcvbuf. The limit is stored in
199 		 * /proc/sys/net/core/rmem_max or sysctl net.core.rmem_max */
200 		if(setsockopt(s, SOL_SOCKET, SO_RCVBUFFORCE, (void*)&rcv,
201 			(socklen_t)sizeof(rcv)) < 0) {
202 			if(errno != EPERM) {
203 #    ifndef USE_WINSOCK
204 				log_err("setsockopt(..., SO_RCVBUFFORCE, "
205 					"...) failed: %s", strerror(errno));
206 				close(s);
207 #    else
208 				log_err("setsockopt(..., SO_RCVBUFFORCE, "
209 					"...) failed: %s",
210 					wsa_strerror(WSAGetLastError()));
211 				closesocket(s);
212 #    endif
213 				*noproto = 0;
214 				*inuse = 0;
215 				return -1;
216 			}
217 #  endif /* SO_RCVBUFFORCE */
218 			if(setsockopt(s, SOL_SOCKET, SO_RCVBUF, (void*)&rcv,
219 				(socklen_t)sizeof(rcv)) < 0) {
220 #  ifndef USE_WINSOCK
221 				log_err("setsockopt(..., SO_RCVBUF, "
222 					"...) failed: %s", strerror(errno));
223 				close(s);
224 #  else
225 				log_err("setsockopt(..., SO_RCVBUF, "
226 					"...) failed: %s",
227 					wsa_strerror(WSAGetLastError()));
228 				closesocket(s);
229 #  endif
230 				*noproto = 0;
231 				*inuse = 0;
232 				return -1;
233 			}
234 			/* check if we got the right thing or if system
235 			 * reduced to some system max.  Warn if so */
236 			if(getsockopt(s, SOL_SOCKET, SO_RCVBUF, (void*)&got,
237 				&slen) >= 0 && got < rcv/2) {
238 				log_warn("so-rcvbuf %u was not granted. "
239 					"Got %u. To fix: start with "
240 					"root permissions(linux) or sysctl "
241 					"bigger net.core.rmem_max(linux) or "
242 					"kern.ipc.maxsockbuf(bsd) values.",
243 					(unsigned)rcv, (unsigned)got);
244 			}
245 #  ifdef SO_RCVBUFFORCE
246 		}
247 #  endif
248 #endif /* SO_RCVBUF */
249 	}
250 	/* first do RCVBUF as the receive buffer is more important */
251 	if(snd) {
252 #ifdef SO_SNDBUF
253 		int got;
254 		socklen_t slen = (socklen_t)sizeof(got);
255 #  ifdef SO_SNDBUFFORCE
256 		/* Linux specific: try to use root permission to override
257 		 * system limits on sndbuf. The limit is stored in
258 		 * /proc/sys/net/core/wmem_max or sysctl net.core.wmem_max */
259 		if(setsockopt(s, SOL_SOCKET, SO_SNDBUFFORCE, (void*)&snd,
260 			(socklen_t)sizeof(snd)) < 0) {
261 			if(errno != EPERM) {
262 #    ifndef USE_WINSOCK
263 				log_err("setsockopt(..., SO_SNDBUFFORCE, "
264 					"...) failed: %s", strerror(errno));
265 				close(s);
266 #    else
267 				log_err("setsockopt(..., SO_SNDBUFFORCE, "
268 					"...) failed: %s",
269 					wsa_strerror(WSAGetLastError()));
270 				closesocket(s);
271 #    endif
272 				*noproto = 0;
273 				*inuse = 0;
274 				return -1;
275 			}
276 #  endif /* SO_SNDBUFFORCE */
277 			if(setsockopt(s, SOL_SOCKET, SO_SNDBUF, (void*)&snd,
278 				(socklen_t)sizeof(snd)) < 0) {
279 #  ifndef USE_WINSOCK
280 				log_err("setsockopt(..., SO_SNDBUF, "
281 					"...) failed: %s", strerror(errno));
282 				close(s);
283 #  else
284 				log_err("setsockopt(..., SO_SNDBUF, "
285 					"...) failed: %s",
286 					wsa_strerror(WSAGetLastError()));
287 				closesocket(s);
288 #  endif
289 				*noproto = 0;
290 				*inuse = 0;
291 				return -1;
292 			}
293 			/* check if we got the right thing or if system
294 			 * reduced to some system max.  Warn if so */
295 			if(getsockopt(s, SOL_SOCKET, SO_SNDBUF, (void*)&got,
296 				&slen) >= 0 && got < snd/2) {
297 				log_warn("so-sndbuf %u was not granted. "
298 					"Got %u. To fix: start with "
299 					"root permissions(linux) or sysctl "
300 					"bigger net.core.wmem_max(linux) or "
301 					"kern.ipc.maxsockbuf(bsd) values.",
302 					(unsigned)snd, (unsigned)got);
303 			}
304 #  ifdef SO_SNDBUFFORCE
305 		}
306 #  endif
307 #endif /* SO_SNDBUF */
308 	}
309 	if(family == AF_INET6) {
310 # if defined(IPV6_V6ONLY)
311 		if(v6only) {
312 			int val=(v6only==2)?0:1;
313 			if (setsockopt(s, IPPROTO_IPV6, IPV6_V6ONLY,
314 				(void*)&val, (socklen_t)sizeof(val)) < 0) {
315 #ifndef USE_WINSOCK
316 				log_err("setsockopt(..., IPV6_V6ONLY"
317 					", ...) failed: %s", strerror(errno));
318 				close(s);
319 #else
320 				log_err("setsockopt(..., IPV6_V6ONLY"
321 					", ...) failed: %s",
322 					wsa_strerror(WSAGetLastError()));
323 				closesocket(s);
324 #endif
325 				*noproto = 0;
326 				*inuse = 0;
327 				return -1;
328 			}
329 		}
330 # endif
331 # if defined(IPV6_USE_MIN_MTU)
332 		/*
333 		 * There is no fragmentation of IPv6 datagrams
334 		 * during forwarding in the network. Therefore
335 		 * we do not send UDP datagrams larger than
336 		 * the minimum IPv6 MTU of 1280 octets. The
337 		 * EDNS0 message length can be larger if the
338 		 * network stack supports IPV6_USE_MIN_MTU.
339 		 */
340 		if (setsockopt(s, IPPROTO_IPV6, IPV6_USE_MIN_MTU,
341 			(void*)&on, (socklen_t)sizeof(on)) < 0) {
342 #  ifndef USE_WINSOCK
343 			log_err("setsockopt(..., IPV6_USE_MIN_MTU, "
344 				"...) failed: %s", strerror(errno));
345 			close(s);
346 #  else
347 			log_err("setsockopt(..., IPV6_USE_MIN_MTU, "
348 				"...) failed: %s",
349 				wsa_strerror(WSAGetLastError()));
350 			closesocket(s);
351 #  endif
352 			*noproto = 0;
353 			*inuse = 0;
354 			return -1;
355 		}
356 # elif defined(IPV6_MTU)
357 		/*
358 		 * On Linux, to send no larger than 1280, the PMTUD is
359 		 * disabled by default for datagrams anyway, so we set
360 		 * the MTU to use.
361 		 */
362 		if (setsockopt(s, IPPROTO_IPV6, IPV6_MTU,
363 			(void*)&mtu, (socklen_t)sizeof(mtu)) < 0) {
364 #  ifndef USE_WINSOCK
365 			log_err("setsockopt(..., IPV6_MTU, ...) failed: %s",
366 				strerror(errno));
367 			close(s);
368 #  else
369 			log_err("setsockopt(..., IPV6_MTU, ...) failed: %s",
370 				wsa_strerror(WSAGetLastError()));
371 			closesocket(s);
372 #  endif
373 			*noproto = 0;
374 			*inuse = 0;
375 			return -1;
376 		}
377 # endif /* IPv6 MTU */
378 	} else if(family == AF_INET) {
379 #  if defined(IP_MTU_DISCOVER) && defined(IP_PMTUDISC_DONT)
380 /* linux 3.15 has IP_PMTUDISC_OMIT, Hannes Frederic Sowa made it so that
381  * PMTU information is not accepted, but fragmentation is allowed
382  * if and only if the packet size exceeds the outgoing interface MTU
383  * (and also uses the interface mtu to determine the size of the packets).
384  * So there won't be any EMSGSIZE error.  Against DNS fragmentation attacks.
385  * FreeBSD already has same semantics without setting the option. */
386 		int omit_set = 0;
387 		int action;
388 #   if defined(IP_PMTUDISC_OMIT)
389 		action = IP_PMTUDISC_OMIT;
390 		if (setsockopt(s, IPPROTO_IP, IP_MTU_DISCOVER,
391 			&action, (socklen_t)sizeof(action)) < 0) {
392 
393 			if (errno != EINVAL) {
394 				log_err("setsockopt(..., IP_MTU_DISCOVER, IP_PMTUDISC_OMIT...) failed: %s",
395 					strerror(errno));
396 
397 #    ifndef USE_WINSOCK
398 				close(s);
399 #    else
400 				closesocket(s);
401 #    endif
402 				*noproto = 0;
403 				*inuse = 0;
404 				return -1;
405 			}
406 		}
407 		else
408 		{
409 		    omit_set = 1;
410 		}
411 #   endif
412 		if (omit_set == 0) {
413    			action = IP_PMTUDISC_DONT;
414 			if (setsockopt(s, IPPROTO_IP, IP_MTU_DISCOVER,
415 				&action, (socklen_t)sizeof(action)) < 0) {
416 				log_err("setsockopt(..., IP_MTU_DISCOVER, IP_PMTUDISC_DONT...) failed: %s",
417 					strerror(errno));
418 #    ifndef USE_WINSOCK
419 				close(s);
420 #    else
421 				closesocket(s);
422 #    endif
423 				*noproto = 0;
424 				*inuse = 0;
425 				return -1;
426 			}
427 		}
428 #  elif defined(IP_DONTFRAG)
429 		int off = 0;
430 		if (setsockopt(s, IPPROTO_IP, IP_DONTFRAG,
431 			&off, (socklen_t)sizeof(off)) < 0) {
432 			log_err("setsockopt(..., IP_DONTFRAG, ...) failed: %s",
433 				strerror(errno));
434 #    ifndef USE_WINSOCK
435 			close(s);
436 #    else
437 			closesocket(s);
438 #    endif
439 			*noproto = 0;
440 			*inuse = 0;
441 			return -1;
442 		}
443 #  endif /* IPv4 MTU */
444 	}
445 	if(bind(s, (struct sockaddr*)addr, addrlen) != 0) {
446 		*noproto = 0;
447 		*inuse = 0;
448 #ifndef USE_WINSOCK
449 #ifdef EADDRINUSE
450 		*inuse = (errno == EADDRINUSE);
451 		/* detect freebsd jail with no ipv6 permission */
452 		if(family==AF_INET6 && errno==EINVAL)
453 			*noproto = 1;
454 		else if(errno != EADDRINUSE) {
455 			log_err_addr("can't bind socket", strerror(errno),
456 				(struct sockaddr_storage*)addr, addrlen);
457 		}
458 #endif /* EADDRINUSE */
459 		close(s);
460 #else /* USE_WINSOCK */
461 		if(WSAGetLastError() != WSAEADDRINUSE &&
462 			WSAGetLastError() != WSAEADDRNOTAVAIL) {
463 			log_err_addr("can't bind socket",
464 				wsa_strerror(WSAGetLastError()),
465 				(struct sockaddr_storage*)addr, addrlen);
466 		}
467 		closesocket(s);
468 #endif
469 		return -1;
470 	}
471 	if(!fd_set_nonblock(s)) {
472 		*noproto = 0;
473 		*inuse = 0;
474 #ifndef USE_WINSOCK
475 		close(s);
476 #else
477 		closesocket(s);
478 #endif
479 		return -1;
480 	}
481 	return s;
482 }
483 
484 int
485 create_tcp_accept_sock(struct addrinfo *addr, int v6only, int* noproto,
486 	int* reuseport, int transparent)
487 {
488 	int s;
489 #if defined(SO_REUSEADDR) || defined(SO_REUSEPORT) || defined(IPV6_V6ONLY) || defined(IP_TRANSPARENT)
490 	int on = 1;
491 #endif
492 #ifndef IP_TRANSPARENT
493 	(void)transparent;
494 #endif
495 	verbose_print_addr(addr);
496 	*noproto = 0;
497 	if((s = socket(addr->ai_family, addr->ai_socktype, 0)) == -1) {
498 #ifndef USE_WINSOCK
499 		if(errno == EAFNOSUPPORT || errno == EPROTONOSUPPORT) {
500 			*noproto = 1;
501 			return -1;
502 		}
503 		log_err("can't create socket: %s", strerror(errno));
504 #else
505 		if(WSAGetLastError() == WSAEAFNOSUPPORT ||
506 			WSAGetLastError() == WSAEPROTONOSUPPORT) {
507 			*noproto = 1;
508 			return -1;
509 		}
510 		log_err("can't create socket: %s",
511 			wsa_strerror(WSAGetLastError()));
512 #endif
513 		return -1;
514 	}
515 #ifdef SO_REUSEADDR
516 	if(setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (void*)&on,
517 		(socklen_t)sizeof(on)) < 0) {
518 #ifndef USE_WINSOCK
519 		log_err("setsockopt(.. SO_REUSEADDR ..) failed: %s",
520 			strerror(errno));
521 		close(s);
522 #else
523 		log_err("setsockopt(.. SO_REUSEADDR ..) failed: %s",
524 			wsa_strerror(WSAGetLastError()));
525 		closesocket(s);
526 #endif
527 		return -1;
528 	}
529 #endif /* SO_REUSEADDR */
530 #ifdef SO_REUSEPORT
531 	/* try to set SO_REUSEPORT so that incoming
532 	 * connections are distributed evenly among the receiving threads.
533 	 * Each thread must have its own socket bound to the same port,
534 	 * with SO_REUSEPORT set on each socket.
535 	 */
536 	if (reuseport && *reuseport &&
537 		setsockopt(s, SOL_SOCKET, SO_REUSEPORT, (void*)&on,
538 		(socklen_t)sizeof(on)) < 0) {
539 #ifdef ENOPROTOOPT
540 		if(errno != ENOPROTOOPT || verbosity >= 3)
541 			log_warn("setsockopt(.. SO_REUSEPORT ..) failed: %s",
542 				strerror(errno));
543 #endif
544 		/* this option is not essential, we can continue */
545 		*reuseport = 0;
546 	}
547 #else
548 	(void)reuseport;
549 #endif /* defined(SO_REUSEPORT) */
550 #if defined(IPV6_V6ONLY)
551 	if(addr->ai_family == AF_INET6 && v6only) {
552 		if(setsockopt(s, IPPROTO_IPV6, IPV6_V6ONLY,
553 			(void*)&on, (socklen_t)sizeof(on)) < 0) {
554 #ifndef USE_WINSOCK
555 			log_err("setsockopt(..., IPV6_V6ONLY, ...) failed: %s",
556 				strerror(errno));
557 			close(s);
558 #else
559 			log_err("setsockopt(..., IPV6_V6ONLY, ...) failed: %s",
560 				wsa_strerror(WSAGetLastError()));
561 			closesocket(s);
562 #endif
563 			return -1;
564 		}
565 	}
566 #else
567 	(void)v6only;
568 #endif /* IPV6_V6ONLY */
569 #ifdef IP_TRANSPARENT
570 	if (transparent &&
571 	    setsockopt(s, IPPROTO_IP, IP_TRANSPARENT, (void*)&on,
572 	    (socklen_t)sizeof(on)) < 0) {
573 		log_warn("setsockopt(.. IP_TRANSPARENT ..) failed: %s",
574 			strerror(errno));
575 	}
576 #endif /* IP_TRANSPARENT */
577 	if(bind(s, addr->ai_addr, addr->ai_addrlen) != 0) {
578 #ifndef USE_WINSOCK
579 		/* detect freebsd jail with no ipv6 permission */
580 		if(addr->ai_family==AF_INET6 && errno==EINVAL)
581 			*noproto = 1;
582 		else {
583 			log_err_addr("can't bind socket", strerror(errno),
584 				(struct sockaddr_storage*)addr->ai_addr,
585 				addr->ai_addrlen);
586 		}
587 		close(s);
588 #else
589 		log_err_addr("can't bind socket",
590 			wsa_strerror(WSAGetLastError()),
591 			(struct sockaddr_storage*)addr->ai_addr,
592 			addr->ai_addrlen);
593 		closesocket(s);
594 #endif
595 		return -1;
596 	}
597 	if(!fd_set_nonblock(s)) {
598 #ifndef USE_WINSOCK
599 		close(s);
600 #else
601 		closesocket(s);
602 #endif
603 		return -1;
604 	}
605 	if(listen(s, TCP_BACKLOG) == -1) {
606 #ifndef USE_WINSOCK
607 		log_err("can't listen: %s", strerror(errno));
608 		close(s);
609 #else
610 		log_err("can't listen: %s", wsa_strerror(WSAGetLastError()));
611 		closesocket(s);
612 #endif
613 		return -1;
614 	}
615 	return s;
616 }
617 
618 int
619 create_local_accept_sock(const char *path, int* noproto)
620 {
621 #ifdef HAVE_SYS_UN_H
622 	int s;
623 	struct sockaddr_un usock;
624 
625 	verbose(VERB_ALGO, "creating unix socket %s", path);
626 #ifdef HAVE_STRUCT_SOCKADDR_UN_SUN_LEN
627 	/* this member exists on BSDs, not Linux */
628 	usock.sun_len = (socklen_t)sizeof(usock);
629 #endif
630 	usock.sun_family = AF_LOCAL;
631 	/* length is 92-108, 104 on FreeBSD */
632 	(void)strlcpy(usock.sun_path, path, sizeof(usock.sun_path));
633 
634 	if ((s = socket(AF_LOCAL, SOCK_STREAM, 0)) == -1) {
635 		log_err("Cannot create local socket %s (%s)",
636 			path, strerror(errno));
637 		return -1;
638 	}
639 
640 	if (unlink(path) && errno != ENOENT) {
641 		/* The socket already exists and cannot be removed */
642 		log_err("Cannot remove old local socket %s (%s)",
643 			path, strerror(errno));
644 		return -1;
645 	}
646 
647 	if (bind(s, (struct sockaddr *)&usock,
648 		(socklen_t)sizeof(struct sockaddr_un)) == -1) {
649 		log_err("Cannot bind local socket %s (%s)",
650 			path, strerror(errno));
651 		return -1;
652 	}
653 
654 	if (!fd_set_nonblock(s)) {
655 		log_err("Cannot set non-blocking mode");
656 		return -1;
657 	}
658 
659 	if (listen(s, TCP_BACKLOG) == -1) {
660 		log_err("can't listen: %s", strerror(errno));
661 		return -1;
662 	}
663 
664 	(void)noproto; /*unused*/
665 	return s;
666 #else
667 	(void)path;
668 	log_err("Local sockets are not supported");
669 	*noproto = 1;
670 	return -1;
671 #endif
672 }
673 
674 
675 /**
676  * Create socket from getaddrinfo results
677  */
678 static int
679 make_sock(int stype, const char* ifname, const char* port,
680 	struct addrinfo *hints, int v6only, int* noip6, size_t rcv, size_t snd,
681 	int* reuseport, int transparent)
682 {
683 	struct addrinfo *res = NULL;
684 	int r, s, inuse, noproto;
685 	hints->ai_socktype = stype;
686 	*noip6 = 0;
687 	if((r=getaddrinfo(ifname, port, hints, &res)) != 0 || !res) {
688 #ifdef USE_WINSOCK
689 		if(r == EAI_NONAME && hints->ai_family == AF_INET6){
690 			*noip6 = 1; /* 'Host not found' for IP6 on winXP */
691 			return -1;
692 		}
693 #endif
694 		log_err("node %s:%s getaddrinfo: %s %s",
695 			ifname?ifname:"default", port, gai_strerror(r),
696 #ifdef EAI_SYSTEM
697 			r==EAI_SYSTEM?(char*)strerror(errno):""
698 #else
699 			""
700 #endif
701 		);
702 		return -1;
703 	}
704 	if(stype == SOCK_DGRAM) {
705 		verbose_print_addr(res);
706 		s = create_udp_sock(res->ai_family, res->ai_socktype,
707 			(struct sockaddr*)res->ai_addr, res->ai_addrlen,
708 			v6only, &inuse, &noproto, (int)rcv, (int)snd, 1,
709 			reuseport, transparent);
710 		if(s == -1 && inuse) {
711 			log_err("bind: address already in use");
712 		} else if(s == -1 && noproto && hints->ai_family == AF_INET6){
713 			*noip6 = 1;
714 		}
715 	} else	{
716 		s = create_tcp_accept_sock(res, v6only, &noproto, reuseport,
717 			transparent);
718 		if(s == -1 && noproto && hints->ai_family == AF_INET6){
719 			*noip6 = 1;
720 		}
721 	}
722 	freeaddrinfo(res);
723 	return s;
724 }
725 
726 /** make socket and first see if ifname contains port override info */
727 static int
728 make_sock_port(int stype, const char* ifname, const char* port,
729 	struct addrinfo *hints, int v6only, int* noip6, size_t rcv, size_t snd,
730 	int* reuseport, int transparent)
731 {
732 	char* s = strchr(ifname, '@');
733 	if(s) {
734 		/* override port with ifspec@port */
735 		char p[16];
736 		char newif[128];
737 		if((size_t)(s-ifname) >= sizeof(newif)) {
738 			log_err("ifname too long: %s", ifname);
739 			*noip6 = 0;
740 			return -1;
741 		}
742 		if(strlen(s+1) >= sizeof(p)) {
743 			log_err("portnumber too long: %s", ifname);
744 			*noip6 = 0;
745 			return -1;
746 		}
747 		(void)strlcpy(newif, ifname, sizeof(newif));
748 		newif[s-ifname] = 0;
749 		(void)strlcpy(p, s+1, sizeof(p));
750 		p[strlen(s+1)]=0;
751 		return make_sock(stype, newif, p, hints, v6only, noip6,
752 			rcv, snd, reuseport, transparent);
753 	}
754 	return make_sock(stype, ifname, port, hints, v6only, noip6, rcv, snd,
755 		reuseport, transparent);
756 }
757 
758 /**
759  * Add port to open ports list.
760  * @param list: list head. changed.
761  * @param s: fd.
762  * @param ftype: if fd is UDP.
763  * @return false on failure. list in unchanged then.
764  */
765 static int
766 port_insert(struct listen_port** list, int s, enum listen_type ftype)
767 {
768 	struct listen_port* item = (struct listen_port*)malloc(
769 		sizeof(struct listen_port));
770 	if(!item)
771 		return 0;
772 	item->next = *list;
773 	item->fd = s;
774 	item->ftype = ftype;
775 	*list = item;
776 	return 1;
777 }
778 
779 /** set fd to receive source address packet info */
780 static int
781 set_recvpktinfo(int s, int family)
782 {
783 #if defined(IPV6_RECVPKTINFO) || defined(IPV6_PKTINFO) || (defined(IP_RECVDSTADDR) && defined(IP_SENDSRCADDR)) || defined(IP_PKTINFO)
784 	int on = 1;
785 #else
786 	(void)s;
787 #endif
788 	if(family == AF_INET6) {
789 #           ifdef IPV6_RECVPKTINFO
790 		if(setsockopt(s, IPPROTO_IPV6, IPV6_RECVPKTINFO,
791 			(void*)&on, (socklen_t)sizeof(on)) < 0) {
792 			log_err("setsockopt(..., IPV6_RECVPKTINFO, ...) failed: %s",
793 				strerror(errno));
794 			return 0;
795 		}
796 #           elif defined(IPV6_PKTINFO)
797 		if(setsockopt(s, IPPROTO_IPV6, IPV6_PKTINFO,
798 			(void*)&on, (socklen_t)sizeof(on)) < 0) {
799 			log_err("setsockopt(..., IPV6_PKTINFO, ...) failed: %s",
800 				strerror(errno));
801 			return 0;
802 		}
803 #           else
804 		log_err("no IPV6_RECVPKTINFO and no IPV6_PKTINFO option, please "
805 			"disable interface-automatic in config");
806 		return 0;
807 #           endif /* defined IPV6_RECVPKTINFO */
808 
809 	} else if(family == AF_INET) {
810 #           ifdef IP_PKTINFO
811 		if(setsockopt(s, IPPROTO_IP, IP_PKTINFO,
812 			(void*)&on, (socklen_t)sizeof(on)) < 0) {
813 			log_err("setsockopt(..., IP_PKTINFO, ...) failed: %s",
814 				strerror(errno));
815 			return 0;
816 		}
817 #           elif defined(IP_RECVDSTADDR) && defined(IP_SENDSRCADDR)
818 		if(setsockopt(s, IPPROTO_IP, IP_RECVDSTADDR,
819 			(void*)&on, (socklen_t)sizeof(on)) < 0) {
820 			log_err("setsockopt(..., IP_RECVDSTADDR, ...) failed: %s",
821 				strerror(errno));
822 			return 0;
823 		}
824 #           else
825 		log_err("no IP_SENDSRCADDR or IP_PKTINFO option, please disable "
826 			"interface-automatic in config");
827 		return 0;
828 #           endif /* IP_PKTINFO */
829 
830 	}
831 	return 1;
832 }
833 
834 /**
835  * Helper for ports_open. Creates one interface (or NULL for default).
836  * @param ifname: The interface ip address.
837  * @param do_auto: use automatic interface detection.
838  * 	If enabled, then ifname must be the wildcard name.
839  * @param do_udp: if udp should be used.
840  * @param do_tcp: if udp should be used.
841  * @param hints: for getaddrinfo. family and flags have to be set by caller.
842  * @param port: Port number to use (as string).
843  * @param list: list of open ports, appended to, changed to point to list head.
844  * @param rcv: receive buffer size for UDP
845  * @param snd: send buffer size for UDP
846  * @param ssl_port: ssl service port number
847  * @param reuseport: try to set SO_REUSEPORT if nonNULL and true.
848  * 	set to false on exit if reuseport failed due to no kernel support.
849  * @param transparent: set IP_TRANSPARENT socket option.
850  * @return: returns false on error.
851  */
852 static int
853 ports_create_if(const char* ifname, int do_auto, int do_udp, int do_tcp,
854 	struct addrinfo *hints, const char* port, struct listen_port** list,
855 	size_t rcv, size_t snd, int ssl_port, int* reuseport, int transparent)
856 {
857 	int s, noip6=0;
858 	if(!do_udp && !do_tcp)
859 		return 0;
860 	if(do_auto) {
861 		if((s = make_sock_port(SOCK_DGRAM, ifname, port, hints, 1,
862 			&noip6, rcv, snd, reuseport, transparent)) == -1) {
863 			if(noip6) {
864 				log_warn("IPv6 protocol not available");
865 				return 1;
866 			}
867 			return 0;
868 		}
869 		/* getting source addr packet info is highly non-portable */
870 		if(!set_recvpktinfo(s, hints->ai_family)) {
871 #ifndef USE_WINSOCK
872 			close(s);
873 #else
874 			closesocket(s);
875 #endif
876 			return 0;
877 		}
878 		if(!port_insert(list, s, listen_type_udpancil)) {
879 #ifndef USE_WINSOCK
880 			close(s);
881 #else
882 			closesocket(s);
883 #endif
884 			return 0;
885 		}
886 	} else if(do_udp) {
887 		/* regular udp socket */
888 		if((s = make_sock_port(SOCK_DGRAM, ifname, port, hints, 1,
889 			&noip6, rcv, snd, reuseport, transparent)) == -1) {
890 			if(noip6) {
891 				log_warn("IPv6 protocol not available");
892 				return 1;
893 			}
894 			return 0;
895 		}
896 		if(!port_insert(list, s, listen_type_udp)) {
897 #ifndef USE_WINSOCK
898 			close(s);
899 #else
900 			closesocket(s);
901 #endif
902 			return 0;
903 		}
904 	}
905 	if(do_tcp) {
906 		int is_ssl = ((strchr(ifname, '@') &&
907 			atoi(strchr(ifname, '@')+1) == ssl_port) ||
908 			(!strchr(ifname, '@') && atoi(port) == ssl_port));
909 		if((s = make_sock_port(SOCK_STREAM, ifname, port, hints, 1,
910 			&noip6, 0, 0, reuseport, transparent)) == -1) {
911 			if(noip6) {
912 				/*log_warn("IPv6 protocol not available");*/
913 				return 1;
914 			}
915 			return 0;
916 		}
917 		if(is_ssl)
918 			verbose(VERB_ALGO, "setup TCP for SSL service");
919 		if(!port_insert(list, s, is_ssl?listen_type_ssl:
920 			listen_type_tcp)) {
921 #ifndef USE_WINSOCK
922 			close(s);
923 #else
924 			closesocket(s);
925 #endif
926 			return 0;
927 		}
928 	}
929 	return 1;
930 }
931 
932 /**
933  * Add items to commpoint list in front.
934  * @param c: commpoint to add.
935  * @param front: listen struct.
936  * @return: false on failure.
937  */
938 static int
939 listen_cp_insert(struct comm_point* c, struct listen_dnsport* front)
940 {
941 	struct listen_list* item = (struct listen_list*)malloc(
942 		sizeof(struct listen_list));
943 	if(!item)
944 		return 0;
945 	item->com = c;
946 	item->next = front->cps;
947 	front->cps = item;
948 	return 1;
949 }
950 
951 struct listen_dnsport*
952 listen_create(struct comm_base* base, struct listen_port* ports,
953 	size_t bufsize, int tcp_accept_count, void* sslctx,
954 	struct dt_env* dtenv, comm_point_callback_t* cb, void *cb_arg)
955 {
956 	struct listen_dnsport* front = (struct listen_dnsport*)
957 		malloc(sizeof(struct listen_dnsport));
958 	if(!front)
959 		return NULL;
960 	front->cps = NULL;
961 	front->udp_buff = sldns_buffer_new(bufsize);
962 	if(!front->udp_buff) {
963 		free(front);
964 		return NULL;
965 	}
966 
967 	/* create comm points as needed */
968 	while(ports) {
969 		struct comm_point* cp = NULL;
970 		if(ports->ftype == listen_type_udp)
971 			cp = comm_point_create_udp(base, ports->fd,
972 				front->udp_buff, cb, cb_arg);
973 		else if(ports->ftype == listen_type_tcp)
974 			cp = comm_point_create_tcp(base, ports->fd,
975 				tcp_accept_count, bufsize, cb, cb_arg);
976 		else if(ports->ftype == listen_type_ssl) {
977 			cp = comm_point_create_tcp(base, ports->fd,
978 				tcp_accept_count, bufsize, cb, cb_arg);
979 			cp->ssl = sslctx;
980 		} else if(ports->ftype == listen_type_udpancil)
981 			cp = comm_point_create_udp_ancil(base, ports->fd,
982 				front->udp_buff, cb, cb_arg);
983 		if(!cp) {
984 			log_err("can't create commpoint");
985 			listen_delete(front);
986 			return NULL;
987 		}
988 		cp->dtenv = dtenv;
989 		cp->do_not_close = 1;
990 		if(!listen_cp_insert(cp, front)) {
991 			log_err("malloc failed");
992 			comm_point_delete(cp);
993 			listen_delete(front);
994 			return NULL;
995 		}
996 		ports = ports->next;
997 	}
998 	if(!front->cps) {
999 		log_err("Could not open sockets to accept queries.");
1000 		listen_delete(front);
1001 		return NULL;
1002 	}
1003 
1004 	return front;
1005 }
1006 
1007 void
1008 listen_list_delete(struct listen_list* list)
1009 {
1010 	struct listen_list *p = list, *pn;
1011 	while(p) {
1012 		pn = p->next;
1013 		comm_point_delete(p->com);
1014 		free(p);
1015 		p = pn;
1016 	}
1017 }
1018 
1019 void
1020 listen_delete(struct listen_dnsport* front)
1021 {
1022 	if(!front)
1023 		return;
1024 	listen_list_delete(front->cps);
1025 	sldns_buffer_free(front->udp_buff);
1026 	free(front);
1027 }
1028 
1029 struct listen_port*
1030 listening_ports_open(struct config_file* cfg, int* reuseport)
1031 {
1032 	struct listen_port* list = NULL;
1033 	struct addrinfo hints;
1034 	int i, do_ip4, do_ip6;
1035 	int do_tcp, do_auto;
1036 	char portbuf[32];
1037 	snprintf(portbuf, sizeof(portbuf), "%d", cfg->port);
1038 	do_ip4 = cfg->do_ip4;
1039 	do_ip6 = cfg->do_ip6;
1040 	do_tcp = cfg->do_tcp;
1041 	do_auto = cfg->if_automatic && cfg->do_udp;
1042 	if(cfg->incoming_num_tcp == 0)
1043 		do_tcp = 0;
1044 
1045 	/* getaddrinfo */
1046 	memset(&hints, 0, sizeof(hints));
1047 	hints.ai_flags = AI_PASSIVE;
1048 	/* no name lookups on our listening ports */
1049 	if(cfg->num_ifs > 0)
1050 		hints.ai_flags |= AI_NUMERICHOST;
1051 	hints.ai_family = AF_UNSPEC;
1052 #ifndef INET6
1053 	do_ip6 = 0;
1054 #endif
1055 	if(!do_ip4 && !do_ip6) {
1056 		return NULL;
1057 	}
1058 	/* create ip4 and ip6 ports so that return addresses are nice. */
1059 	if(do_auto || cfg->num_ifs == 0) {
1060 		if(do_ip6) {
1061 			hints.ai_family = AF_INET6;
1062 			if(!ports_create_if(do_auto?"::0":"::1",
1063 				do_auto, cfg->do_udp, do_tcp,
1064 				&hints, portbuf, &list,
1065 				cfg->so_rcvbuf, cfg->so_sndbuf,
1066 				cfg->ssl_port, reuseport,
1067 				cfg->ip_transparent)) {
1068 				listening_ports_free(list);
1069 				return NULL;
1070 			}
1071 		}
1072 		if(do_ip4) {
1073 			hints.ai_family = AF_INET;
1074 			if(!ports_create_if(do_auto?"0.0.0.0":"127.0.0.1",
1075 				do_auto, cfg->do_udp, do_tcp,
1076 				&hints, portbuf, &list,
1077 				cfg->so_rcvbuf, cfg->so_sndbuf,
1078 				cfg->ssl_port, reuseport,
1079 				cfg->ip_transparent)) {
1080 				listening_ports_free(list);
1081 				return NULL;
1082 			}
1083 		}
1084 	} else for(i = 0; i<cfg->num_ifs; i++) {
1085 		if(str_is_ip6(cfg->ifs[i])) {
1086 			if(!do_ip6)
1087 				continue;
1088 			hints.ai_family = AF_INET6;
1089 			if(!ports_create_if(cfg->ifs[i], 0, cfg->do_udp,
1090 				do_tcp, &hints, portbuf, &list,
1091 				cfg->so_rcvbuf, cfg->so_sndbuf,
1092 				cfg->ssl_port, reuseport,
1093 				cfg->ip_transparent)) {
1094 				listening_ports_free(list);
1095 				return NULL;
1096 			}
1097 		} else {
1098 			if(!do_ip4)
1099 				continue;
1100 			hints.ai_family = AF_INET;
1101 			if(!ports_create_if(cfg->ifs[i], 0, cfg->do_udp,
1102 				do_tcp, &hints, portbuf, &list,
1103 				cfg->so_rcvbuf, cfg->so_sndbuf,
1104 				cfg->ssl_port, reuseport,
1105 				cfg->ip_transparent)) {
1106 				listening_ports_free(list);
1107 				return NULL;
1108 			}
1109 		}
1110 	}
1111 	return list;
1112 }
1113 
1114 void listening_ports_free(struct listen_port* list)
1115 {
1116 	struct listen_port* nx;
1117 	while(list) {
1118 		nx = list->next;
1119 		if(list->fd != -1) {
1120 #ifndef USE_WINSOCK
1121 			close(list->fd);
1122 #else
1123 			closesocket(list->fd);
1124 #endif
1125 		}
1126 		free(list);
1127 		list = nx;
1128 	}
1129 }
1130 
1131 size_t listen_get_mem(struct listen_dnsport* listen)
1132 {
1133 	size_t s = sizeof(*listen) + sizeof(*listen->base) +
1134 		sizeof(*listen->udp_buff) +
1135 		sldns_buffer_capacity(listen->udp_buff);
1136 	struct listen_list* p;
1137 	for(p = listen->cps; p; p = p->next) {
1138 		s += sizeof(*p);
1139 		s += comm_point_get_mem(p->com);
1140 	}
1141 	return s;
1142 }
1143 
1144 void listen_stop_accept(struct listen_dnsport* listen)
1145 {
1146 	/* do not stop the ones that have no tcp_free list
1147 	 * (they have already stopped listening) */
1148 	struct listen_list* p;
1149 	for(p=listen->cps; p; p=p->next) {
1150 		if(p->com->type == comm_tcp_accept &&
1151 			p->com->tcp_free != NULL) {
1152 			comm_point_stop_listening(p->com);
1153 		}
1154 	}
1155 }
1156 
1157 void listen_start_accept(struct listen_dnsport* listen)
1158 {
1159 	/* do not start the ones that have no tcp_free list, it is no
1160 	 * use to listen to them because they have no free tcp handlers */
1161 	struct listen_list* p;
1162 	for(p=listen->cps; p; p=p->next) {
1163 		if(p->com->type == comm_tcp_accept &&
1164 			p->com->tcp_free != NULL) {
1165 			comm_point_start_listening(p->com, -1, -1);
1166 		}
1167 	}
1168 }
1169 
1170