1 /*
2  * services/listen_dnsport.c - listen on port 53 for incoming DNS queries.
3  *
4  * Copyright (c) 2007, NLnet Labs. All rights reserved.
5  *
6  * This software is open source.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * Redistributions of source code must retain the above copyright notice,
13  * this list of conditions and the following disclaimer.
14  *
15  * Redistributions in binary form must reproduce the above copyright notice,
16  * this list of conditions and the following disclaimer in the documentation
17  * and/or other materials provided with the distribution.
18  *
19  * Neither the name of the NLNET LABS nor the names of its contributors may
20  * be used to endorse or promote products derived from this software without
21  * specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27  * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
29  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
30  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
31  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
32  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34  */
35 
36 /**
37  * \file
38  *
39  * This file has functions to get queries from clients.
40  */
41 #include "config.h"
42 #ifdef HAVE_SYS_TYPES_H
43 #  include <sys/types.h>
44 #endif
45 #include <sys/time.h>
46 #include "services/listen_dnsport.h"
47 #include "services/outside_network.h"
48 #include "util/netevent.h"
49 #include "util/log.h"
50 #include "util/config_file.h"
51 #include "util/net_help.h"
52 #include "sldns/sbuffer.h"
53 
54 #ifdef HAVE_NETDB_H
55 #include <netdb.h>
56 #endif
57 #include <fcntl.h>
58 
59 #ifdef HAVE_SYS_UN_H
60 #include <sys/un.h>
61 #endif
62 
63 /** number of queued TCP connections for listen() */
64 #define TCP_BACKLOG 256
65 
66 /**
67  * Debug print of the getaddrinfo returned address.
68  * @param addr: the address returned.
69  */
70 static void
71 verbose_print_addr(struct addrinfo *addr)
72 {
73 	if(verbosity >= VERB_ALGO) {
74 		char buf[100];
75 		void* sinaddr = &((struct sockaddr_in*)addr->ai_addr)->sin_addr;
76 #ifdef INET6
77 		if(addr->ai_family == AF_INET6)
78 			sinaddr = &((struct sockaddr_in6*)addr->ai_addr)->
79 				sin6_addr;
80 #endif /* INET6 */
81 		if(inet_ntop(addr->ai_family, sinaddr, buf,
82 			(socklen_t)sizeof(buf)) == 0) {
83 			(void)strlcpy(buf, "(null)", sizeof(buf));
84 		}
85 		buf[sizeof(buf)-1] = 0;
86 		verbose(VERB_ALGO, "creating %s%s socket %s %d",
87 			addr->ai_socktype==SOCK_DGRAM?"udp":
88 			addr->ai_socktype==SOCK_STREAM?"tcp":"otherproto",
89 			addr->ai_family==AF_INET?"4":
90 			addr->ai_family==AF_INET6?"6":
91 			"_otherfam", buf,
92 			ntohs(((struct sockaddr_in*)addr->ai_addr)->sin_port));
93 	}
94 }
95 
96 int
97 create_udp_sock(int family, int socktype, struct sockaddr* addr,
98         socklen_t addrlen, int v6only, int* inuse, int* noproto,
99 	int rcv, int snd, int listen, int* reuseport, int transparent)
100 {
101 	int s;
102 #if defined(SO_REUSEADDR) || defined(SO_REUSEPORT) || defined(IPV6_USE_MIN_MTU)  || defined(IP_TRANSPARENT) || defined(IP_BINDANY)
103 	int on=1;
104 #endif
105 #ifdef IPV6_MTU
106 	int mtu = IPV6_MIN_MTU;
107 #endif
108 #if !defined(SO_RCVBUFFORCE) && !defined(SO_RCVBUF)
109 	(void)rcv;
110 #endif
111 #if !defined(SO_SNDBUFFORCE) && !defined(SO_SNDBUF)
112 	(void)snd;
113 #endif
114 #ifndef IPV6_V6ONLY
115 	(void)v6only;
116 #endif
117 #if !defined(IP_TRANSPARENT) && !defined(IP_BINDANY)
118 	(void)transparent;
119 #endif
120 	if((s = socket(family, socktype, 0)) == -1) {
121 		*inuse = 0;
122 #ifndef USE_WINSOCK
123 		if(errno == EAFNOSUPPORT || errno == EPROTONOSUPPORT) {
124 			*noproto = 1;
125 			return -1;
126 		}
127 		log_err("can't create socket: %s", strerror(errno));
128 #else
129 		if(WSAGetLastError() == WSAEAFNOSUPPORT ||
130 			WSAGetLastError() == WSAEPROTONOSUPPORT) {
131 			*noproto = 1;
132 			return -1;
133 		}
134 		log_err("can't create socket: %s",
135 			wsa_strerror(WSAGetLastError()));
136 #endif
137 		*noproto = 0;
138 		return -1;
139 	}
140 	if(listen) {
141 #ifdef SO_REUSEADDR
142 		if(setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (void*)&on,
143 			(socklen_t)sizeof(on)) < 0) {
144 #ifndef USE_WINSOCK
145 			log_err("setsockopt(.. SO_REUSEADDR ..) failed: %s",
146 				strerror(errno));
147 			if(errno != ENOSYS) {
148 				close(s);
149 				*noproto = 0;
150 				*inuse = 0;
151 				return -1;
152 			}
153 #else
154 			log_err("setsockopt(.. SO_REUSEADDR ..) failed: %s",
155 				wsa_strerror(WSAGetLastError()));
156 			closesocket(s);
157 			*noproto = 0;
158 			*inuse = 0;
159 			return -1;
160 #endif
161 		}
162 #endif /* SO_REUSEADDR */
163 #ifdef SO_REUSEPORT
164 		/* try to set SO_REUSEPORT so that incoming
165 		 * queries are distributed evenly among the receiving threads.
166 		 * Each thread must have its own socket bound to the same port,
167 		 * with SO_REUSEPORT set on each socket.
168 		 */
169 		if (reuseport && *reuseport &&
170 		    setsockopt(s, SOL_SOCKET, SO_REUSEPORT, (void*)&on,
171 			(socklen_t)sizeof(on)) < 0) {
172 #ifdef ENOPROTOOPT
173 			if(errno != ENOPROTOOPT || verbosity >= 3)
174 				log_warn("setsockopt(.. SO_REUSEPORT ..) failed: %s",
175 					strerror(errno));
176 #endif
177 			/* this option is not essential, we can continue */
178 			*reuseport = 0;
179 		}
180 #else
181 		(void)reuseport;
182 #endif /* defined(SO_REUSEPORT) */
183 #ifdef IP_TRANSPARENT
184 		if (transparent &&
185 		    setsockopt(s, IPPROTO_IP, IP_TRANSPARENT, (void*)&on,
186 		    (socklen_t)sizeof(on)) < 0) {
187 			log_warn("setsockopt(.. IP_TRANSPARENT ..) failed: %s",
188 			strerror(errno));
189 		}
190 #elif defined(IP_BINDANY)
191 		if (transparent &&
192 		    setsockopt(s, (family==AF_INET6? IPPROTO_IPV6:IPPROTO_IP),
193 		    IP_BINDANY, (void*)&on, (socklen_t)sizeof(on)) < 0) {
194 			log_warn("setsockopt(.. IP_BINDANY ..) failed: %s",
195 			strerror(errno));
196 		}
197 #endif /* IP_TRANSPARENT || IP_BINDANY */
198 	}
199 	if(rcv) {
200 #ifdef SO_RCVBUF
201 		int got;
202 		socklen_t slen = (socklen_t)sizeof(got);
203 #  ifdef SO_RCVBUFFORCE
204 		/* Linux specific: try to use root permission to override
205 		 * system limits on rcvbuf. The limit is stored in
206 		 * /proc/sys/net/core/rmem_max or sysctl net.core.rmem_max */
207 		if(setsockopt(s, SOL_SOCKET, SO_RCVBUFFORCE, (void*)&rcv,
208 			(socklen_t)sizeof(rcv)) < 0) {
209 			if(errno != EPERM) {
210 #    ifndef USE_WINSOCK
211 				log_err("setsockopt(..., SO_RCVBUFFORCE, "
212 					"...) failed: %s", strerror(errno));
213 				close(s);
214 #    else
215 				log_err("setsockopt(..., SO_RCVBUFFORCE, "
216 					"...) failed: %s",
217 					wsa_strerror(WSAGetLastError()));
218 				closesocket(s);
219 #    endif
220 				*noproto = 0;
221 				*inuse = 0;
222 				return -1;
223 			}
224 #  endif /* SO_RCVBUFFORCE */
225 			if(setsockopt(s, SOL_SOCKET, SO_RCVBUF, (void*)&rcv,
226 				(socklen_t)sizeof(rcv)) < 0) {
227 #  ifndef USE_WINSOCK
228 				log_err("setsockopt(..., SO_RCVBUF, "
229 					"...) failed: %s", strerror(errno));
230 				close(s);
231 #  else
232 				log_err("setsockopt(..., SO_RCVBUF, "
233 					"...) failed: %s",
234 					wsa_strerror(WSAGetLastError()));
235 				closesocket(s);
236 #  endif
237 				*noproto = 0;
238 				*inuse = 0;
239 				return -1;
240 			}
241 			/* check if we got the right thing or if system
242 			 * reduced to some system max.  Warn if so */
243 			if(getsockopt(s, SOL_SOCKET, SO_RCVBUF, (void*)&got,
244 				&slen) >= 0 && got < rcv/2) {
245 				log_warn("so-rcvbuf %u was not granted. "
246 					"Got %u. To fix: start with "
247 					"root permissions(linux) or sysctl "
248 					"bigger net.core.rmem_max(linux) or "
249 					"kern.ipc.maxsockbuf(bsd) values.",
250 					(unsigned)rcv, (unsigned)got);
251 			}
252 #  ifdef SO_RCVBUFFORCE
253 		}
254 #  endif
255 #endif /* SO_RCVBUF */
256 	}
257 	/* first do RCVBUF as the receive buffer is more important */
258 	if(snd) {
259 #ifdef SO_SNDBUF
260 		int got;
261 		socklen_t slen = (socklen_t)sizeof(got);
262 #  ifdef SO_SNDBUFFORCE
263 		/* Linux specific: try to use root permission to override
264 		 * system limits on sndbuf. The limit is stored in
265 		 * /proc/sys/net/core/wmem_max or sysctl net.core.wmem_max */
266 		if(setsockopt(s, SOL_SOCKET, SO_SNDBUFFORCE, (void*)&snd,
267 			(socklen_t)sizeof(snd)) < 0) {
268 			if(errno != EPERM) {
269 #    ifndef USE_WINSOCK
270 				log_err("setsockopt(..., SO_SNDBUFFORCE, "
271 					"...) failed: %s", strerror(errno));
272 				close(s);
273 #    else
274 				log_err("setsockopt(..., SO_SNDBUFFORCE, "
275 					"...) failed: %s",
276 					wsa_strerror(WSAGetLastError()));
277 				closesocket(s);
278 #    endif
279 				*noproto = 0;
280 				*inuse = 0;
281 				return -1;
282 			}
283 #  endif /* SO_SNDBUFFORCE */
284 			if(setsockopt(s, SOL_SOCKET, SO_SNDBUF, (void*)&snd,
285 				(socklen_t)sizeof(snd)) < 0) {
286 #  ifndef USE_WINSOCK
287 				log_err("setsockopt(..., SO_SNDBUF, "
288 					"...) failed: %s", strerror(errno));
289 				close(s);
290 #  else
291 				log_err("setsockopt(..., SO_SNDBUF, "
292 					"...) failed: %s",
293 					wsa_strerror(WSAGetLastError()));
294 				closesocket(s);
295 #  endif
296 				*noproto = 0;
297 				*inuse = 0;
298 				return -1;
299 			}
300 			/* check if we got the right thing or if system
301 			 * reduced to some system max.  Warn if so */
302 			if(getsockopt(s, SOL_SOCKET, SO_SNDBUF, (void*)&got,
303 				&slen) >= 0 && got < snd/2) {
304 				log_warn("so-sndbuf %u was not granted. "
305 					"Got %u. To fix: start with "
306 					"root permissions(linux) or sysctl "
307 					"bigger net.core.wmem_max(linux) or "
308 					"kern.ipc.maxsockbuf(bsd) values.",
309 					(unsigned)snd, (unsigned)got);
310 			}
311 #  ifdef SO_SNDBUFFORCE
312 		}
313 #  endif
314 #endif /* SO_SNDBUF */
315 	}
316 	if(family == AF_INET6) {
317 # if defined(IPV6_V6ONLY)
318 		if(v6only) {
319 			int val=(v6only==2)?0:1;
320 			if (setsockopt(s, IPPROTO_IPV6, IPV6_V6ONLY,
321 				(void*)&val, (socklen_t)sizeof(val)) < 0) {
322 #ifndef USE_WINSOCK
323 				log_err("setsockopt(..., IPV6_V6ONLY"
324 					", ...) failed: %s", strerror(errno));
325 				close(s);
326 #else
327 				log_err("setsockopt(..., IPV6_V6ONLY"
328 					", ...) failed: %s",
329 					wsa_strerror(WSAGetLastError()));
330 				closesocket(s);
331 #endif
332 				*noproto = 0;
333 				*inuse = 0;
334 				return -1;
335 			}
336 		}
337 # endif
338 # if defined(IPV6_USE_MIN_MTU)
339 		/*
340 		 * There is no fragmentation of IPv6 datagrams
341 		 * during forwarding in the network. Therefore
342 		 * we do not send UDP datagrams larger than
343 		 * the minimum IPv6 MTU of 1280 octets. The
344 		 * EDNS0 message length can be larger if the
345 		 * network stack supports IPV6_USE_MIN_MTU.
346 		 */
347 		if (setsockopt(s, IPPROTO_IPV6, IPV6_USE_MIN_MTU,
348 			(void*)&on, (socklen_t)sizeof(on)) < 0) {
349 #  ifndef USE_WINSOCK
350 			log_err("setsockopt(..., IPV6_USE_MIN_MTU, "
351 				"...) failed: %s", strerror(errno));
352 			close(s);
353 #  else
354 			log_err("setsockopt(..., IPV6_USE_MIN_MTU, "
355 				"...) failed: %s",
356 				wsa_strerror(WSAGetLastError()));
357 			closesocket(s);
358 #  endif
359 			*noproto = 0;
360 			*inuse = 0;
361 			return -1;
362 		}
363 # elif defined(IPV6_MTU)
364 		/*
365 		 * On Linux, to send no larger than 1280, the PMTUD is
366 		 * disabled by default for datagrams anyway, so we set
367 		 * the MTU to use.
368 		 */
369 		if (setsockopt(s, IPPROTO_IPV6, IPV6_MTU,
370 			(void*)&mtu, (socklen_t)sizeof(mtu)) < 0) {
371 #  ifndef USE_WINSOCK
372 			log_err("setsockopt(..., IPV6_MTU, ...) failed: %s",
373 				strerror(errno));
374 			close(s);
375 #  else
376 			log_err("setsockopt(..., IPV6_MTU, ...) failed: %s",
377 				wsa_strerror(WSAGetLastError()));
378 			closesocket(s);
379 #  endif
380 			*noproto = 0;
381 			*inuse = 0;
382 			return -1;
383 		}
384 # endif /* IPv6 MTU */
385 	} else if(family == AF_INET) {
386 #  if defined(IP_MTU_DISCOVER) && defined(IP_PMTUDISC_DONT)
387 /* linux 3.15 has IP_PMTUDISC_OMIT, Hannes Frederic Sowa made it so that
388  * PMTU information is not accepted, but fragmentation is allowed
389  * if and only if the packet size exceeds the outgoing interface MTU
390  * (and also uses the interface mtu to determine the size of the packets).
391  * So there won't be any EMSGSIZE error.  Against DNS fragmentation attacks.
392  * FreeBSD already has same semantics without setting the option. */
393 		int omit_set = 0;
394 		int action;
395 #   if defined(IP_PMTUDISC_OMIT)
396 		action = IP_PMTUDISC_OMIT;
397 		if (setsockopt(s, IPPROTO_IP, IP_MTU_DISCOVER,
398 			&action, (socklen_t)sizeof(action)) < 0) {
399 
400 			if (errno != EINVAL) {
401 				log_err("setsockopt(..., IP_MTU_DISCOVER, IP_PMTUDISC_OMIT...) failed: %s",
402 					strerror(errno));
403 
404 #    ifndef USE_WINSOCK
405 				close(s);
406 #    else
407 				closesocket(s);
408 #    endif
409 				*noproto = 0;
410 				*inuse = 0;
411 				return -1;
412 			}
413 		}
414 		else
415 		{
416 		    omit_set = 1;
417 		}
418 #   endif
419 		if (omit_set == 0) {
420    			action = IP_PMTUDISC_DONT;
421 			if (setsockopt(s, IPPROTO_IP, IP_MTU_DISCOVER,
422 				&action, (socklen_t)sizeof(action)) < 0) {
423 				log_err("setsockopt(..., IP_MTU_DISCOVER, IP_PMTUDISC_DONT...) failed: %s",
424 					strerror(errno));
425 #    ifndef USE_WINSOCK
426 				close(s);
427 #    else
428 				closesocket(s);
429 #    endif
430 				*noproto = 0;
431 				*inuse = 0;
432 				return -1;
433 			}
434 		}
435 #  elif defined(IP_DONTFRAG)
436 		int off = 0;
437 		if (setsockopt(s, IPPROTO_IP, IP_DONTFRAG,
438 			&off, (socklen_t)sizeof(off)) < 0) {
439 			log_err("setsockopt(..., IP_DONTFRAG, ...) failed: %s",
440 				strerror(errno));
441 #    ifndef USE_WINSOCK
442 			close(s);
443 #    else
444 			closesocket(s);
445 #    endif
446 			*noproto = 0;
447 			*inuse = 0;
448 			return -1;
449 		}
450 #  endif /* IPv4 MTU */
451 	}
452 	if(bind(s, (struct sockaddr*)addr, addrlen) != 0) {
453 		*noproto = 0;
454 		*inuse = 0;
455 #ifndef USE_WINSOCK
456 #ifdef EADDRINUSE
457 		*inuse = (errno == EADDRINUSE);
458 		/* detect freebsd jail with no ipv6 permission */
459 		if(family==AF_INET6 && errno==EINVAL)
460 			*noproto = 1;
461 		else if(errno != EADDRINUSE) {
462 			log_err_addr("can't bind socket", strerror(errno),
463 				(struct sockaddr_storage*)addr, addrlen);
464 		}
465 #endif /* EADDRINUSE */
466 		close(s);
467 #else /* USE_WINSOCK */
468 		if(WSAGetLastError() != WSAEADDRINUSE &&
469 			WSAGetLastError() != WSAEADDRNOTAVAIL) {
470 			log_err_addr("can't bind socket",
471 				wsa_strerror(WSAGetLastError()),
472 				(struct sockaddr_storage*)addr, addrlen);
473 		}
474 		closesocket(s);
475 #endif
476 		return -1;
477 	}
478 	if(!fd_set_nonblock(s)) {
479 		*noproto = 0;
480 		*inuse = 0;
481 #ifndef USE_WINSOCK
482 		close(s);
483 #else
484 		closesocket(s);
485 #endif
486 		return -1;
487 	}
488 	return s;
489 }
490 
491 int
492 create_tcp_accept_sock(struct addrinfo *addr, int v6only, int* noproto,
493 	int* reuseport, int transparent, int mss)
494 {
495 	int s;
496 #if defined(SO_REUSEADDR) || defined(SO_REUSEPORT) || defined(IPV6_V6ONLY) || defined(IP_TRANSPARENT)
497 	int on = 1;
498 #endif
499 #ifndef IP_TRANSPARENT
500 	(void)transparent;
501 #endif
502 	verbose_print_addr(addr);
503 	*noproto = 0;
504 	if((s = socket(addr->ai_family, addr->ai_socktype, 0)) == -1) {
505 #ifndef USE_WINSOCK
506 		if(errno == EAFNOSUPPORT || errno == EPROTONOSUPPORT) {
507 			*noproto = 1;
508 			return -1;
509 		}
510 		log_err("can't create socket: %s", strerror(errno));
511 #else
512 		if(WSAGetLastError() == WSAEAFNOSUPPORT ||
513 			WSAGetLastError() == WSAEPROTONOSUPPORT) {
514 			*noproto = 1;
515 			return -1;
516 		}
517 		log_err("can't create socket: %s",
518 			wsa_strerror(WSAGetLastError()));
519 #endif
520 		return -1;
521 	}
522 	if (mss > 0) {
523 #if defined(IPPROTO_TCP) && defined(TCP_MAXSEG)
524 		if(setsockopt(s, IPPROTO_TCP, TCP_MAXSEG, (void*)&mss,
525 			(socklen_t)sizeof(mss)) < 0) {
526 			#ifndef USE_WINSOCK
527 			log_err(" setsockopt(.. TCP_MAXSEG ..) failed: %s",
528 				strerror(errno));
529 			#else
530 			log_err(" setsockopt(.. TCP_MAXSEG ..) failed: %s",
531 				wsa_strerror(WSAGetLastError()));
532 			#endif
533 		} else {
534 			verbose(VERB_ALGO,
535 				" tcp socket mss set to %d", mss);
536 		}
537 #else
538 		log_warn(" setsockopt(TCP_MAXSEG) unsupported");
539 #endif /* defined(IPPROTO_TCP) && defined(TCP_MAXSEG) */
540 	}
541 #ifdef SO_REUSEADDR
542 	if(setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (void*)&on,
543 		(socklen_t)sizeof(on)) < 0) {
544 #ifndef USE_WINSOCK
545 		log_err("setsockopt(.. SO_REUSEADDR ..) failed: %s",
546 			strerror(errno));
547 		close(s);
548 #else
549 		log_err("setsockopt(.. SO_REUSEADDR ..) failed: %s",
550 			wsa_strerror(WSAGetLastError()));
551 		closesocket(s);
552 #endif
553 		return -1;
554 	}
555 #endif /* SO_REUSEADDR */
556 #ifdef SO_REUSEPORT
557 	/* try to set SO_REUSEPORT so that incoming
558 	 * connections are distributed evenly among the receiving threads.
559 	 * Each thread must have its own socket bound to the same port,
560 	 * with SO_REUSEPORT set on each socket.
561 	 */
562 	if (reuseport && *reuseport &&
563 		setsockopt(s, SOL_SOCKET, SO_REUSEPORT, (void*)&on,
564 		(socklen_t)sizeof(on)) < 0) {
565 #ifdef ENOPROTOOPT
566 		if(errno != ENOPROTOOPT || verbosity >= 3)
567 			log_warn("setsockopt(.. SO_REUSEPORT ..) failed: %s",
568 				strerror(errno));
569 #endif
570 		/* this option is not essential, we can continue */
571 		*reuseport = 0;
572 	}
573 #else
574 	(void)reuseport;
575 #endif /* defined(SO_REUSEPORT) */
576 #if defined(IPV6_V6ONLY)
577 	if(addr->ai_family == AF_INET6 && v6only) {
578 		if(setsockopt(s, IPPROTO_IPV6, IPV6_V6ONLY,
579 			(void*)&on, (socklen_t)sizeof(on)) < 0) {
580 #ifndef USE_WINSOCK
581 			log_err("setsockopt(..., IPV6_V6ONLY, ...) failed: %s",
582 				strerror(errno));
583 			close(s);
584 #else
585 			log_err("setsockopt(..., IPV6_V6ONLY, ...) failed: %s",
586 				wsa_strerror(WSAGetLastError()));
587 			closesocket(s);
588 #endif
589 			return -1;
590 		}
591 	}
592 #else
593 	(void)v6only;
594 #endif /* IPV6_V6ONLY */
595 #ifdef IP_TRANSPARENT
596 	if (transparent &&
597 	    setsockopt(s, IPPROTO_IP, IP_TRANSPARENT, (void*)&on,
598 	    (socklen_t)sizeof(on)) < 0) {
599 		log_warn("setsockopt(.. IP_TRANSPARENT ..) failed: %s",
600 			strerror(errno));
601 	}
602 #endif /* IP_TRANSPARENT */
603 	if(bind(s, addr->ai_addr, addr->ai_addrlen) != 0) {
604 #ifndef USE_WINSOCK
605 		/* detect freebsd jail with no ipv6 permission */
606 		if(addr->ai_family==AF_INET6 && errno==EINVAL)
607 			*noproto = 1;
608 		else {
609 			log_err_addr("can't bind socket", strerror(errno),
610 				(struct sockaddr_storage*)addr->ai_addr,
611 				addr->ai_addrlen);
612 		}
613 		close(s);
614 #else
615 		log_err_addr("can't bind socket",
616 			wsa_strerror(WSAGetLastError()),
617 			(struct sockaddr_storage*)addr->ai_addr,
618 			addr->ai_addrlen);
619 		closesocket(s);
620 #endif
621 		return -1;
622 	}
623 	if(!fd_set_nonblock(s)) {
624 #ifndef USE_WINSOCK
625 		close(s);
626 #else
627 		closesocket(s);
628 #endif
629 		return -1;
630 	}
631 	if(listen(s, TCP_BACKLOG) == -1) {
632 #ifndef USE_WINSOCK
633 		log_err("can't listen: %s", strerror(errno));
634 		close(s);
635 #else
636 		log_err("can't listen: %s", wsa_strerror(WSAGetLastError()));
637 		closesocket(s);
638 #endif
639 		return -1;
640 	}
641 	return s;
642 }
643 
644 int
645 create_local_accept_sock(const char *path, int* noproto)
646 {
647 #ifdef HAVE_SYS_UN_H
648 	int s;
649 	struct sockaddr_un usock;
650 
651 	verbose(VERB_ALGO, "creating unix socket %s", path);
652 #ifdef HAVE_STRUCT_SOCKADDR_UN_SUN_LEN
653 	/* this member exists on BSDs, not Linux */
654 	usock.sun_len = (socklen_t)sizeof(usock);
655 #endif
656 	usock.sun_family = AF_LOCAL;
657 	/* length is 92-108, 104 on FreeBSD */
658 	(void)strlcpy(usock.sun_path, path, sizeof(usock.sun_path));
659 
660 	if ((s = socket(AF_LOCAL, SOCK_STREAM, 0)) == -1) {
661 		log_err("Cannot create local socket %s (%s)",
662 			path, strerror(errno));
663 		return -1;
664 	}
665 
666 	if (unlink(path) && errno != ENOENT) {
667 		/* The socket already exists and cannot be removed */
668 		log_err("Cannot remove old local socket %s (%s)",
669 			path, strerror(errno));
670 		return -1;
671 	}
672 
673 	if (bind(s, (struct sockaddr *)&usock,
674 		(socklen_t)sizeof(struct sockaddr_un)) == -1) {
675 		log_err("Cannot bind local socket %s (%s)",
676 			path, strerror(errno));
677 		return -1;
678 	}
679 
680 	if (!fd_set_nonblock(s)) {
681 		log_err("Cannot set non-blocking mode");
682 		return -1;
683 	}
684 
685 	if (listen(s, TCP_BACKLOG) == -1) {
686 		log_err("can't listen: %s", strerror(errno));
687 		return -1;
688 	}
689 
690 	(void)noproto; /*unused*/
691 	return s;
692 #else
693 	(void)path;
694 	log_err("Local sockets are not supported");
695 	*noproto = 1;
696 	return -1;
697 #endif
698 }
699 
700 
701 /**
702  * Create socket from getaddrinfo results
703  */
704 static int
705 make_sock(int stype, const char* ifname, const char* port,
706 	struct addrinfo *hints, int v6only, int* noip6, size_t rcv, size_t snd,
707 	int* reuseport, int transparent, int tcp_mss)
708 {
709 	struct addrinfo *res = NULL;
710 	int r, s, inuse, noproto;
711 	hints->ai_socktype = stype;
712 	*noip6 = 0;
713 	if((r=getaddrinfo(ifname, port, hints, &res)) != 0 || !res) {
714 #ifdef USE_WINSOCK
715 		if(r == EAI_NONAME && hints->ai_family == AF_INET6){
716 			*noip6 = 1; /* 'Host not found' for IP6 on winXP */
717 			return -1;
718 		}
719 #endif
720 		log_err("node %s:%s getaddrinfo: %s %s",
721 			ifname?ifname:"default", port, gai_strerror(r),
722 #ifdef EAI_SYSTEM
723 			r==EAI_SYSTEM?(char*)strerror(errno):""
724 #else
725 			""
726 #endif
727 		);
728 		return -1;
729 	}
730 	if(stype == SOCK_DGRAM) {
731 		verbose_print_addr(res);
732 		s = create_udp_sock(res->ai_family, res->ai_socktype,
733 			(struct sockaddr*)res->ai_addr, res->ai_addrlen,
734 			v6only, &inuse, &noproto, (int)rcv, (int)snd, 1,
735 			reuseport, transparent);
736 		if(s == -1 && inuse) {
737 			log_err("bind: address already in use");
738 		} else if(s == -1 && noproto && hints->ai_family == AF_INET6){
739 			*noip6 = 1;
740 		}
741 	} else	{
742 		s = create_tcp_accept_sock(res, v6only, &noproto, reuseport,
743 			transparent, tcp_mss);
744 		if(s == -1 && noproto && hints->ai_family == AF_INET6){
745 			*noip6 = 1;
746 		}
747 	}
748 	freeaddrinfo(res);
749 	return s;
750 }
751 
752 /** make socket and first see if ifname contains port override info */
753 static int
754 make_sock_port(int stype, const char* ifname, const char* port,
755 	struct addrinfo *hints, int v6only, int* noip6, size_t rcv, size_t snd,
756 	int* reuseport, int transparent, int tcp_mss)
757 {
758 	char* s = strchr(ifname, '@');
759 	if(s) {
760 		/* override port with ifspec@port */
761 		char p[16];
762 		char newif[128];
763 		if((size_t)(s-ifname) >= sizeof(newif)) {
764 			log_err("ifname too long: %s", ifname);
765 			*noip6 = 0;
766 			return -1;
767 		}
768 		if(strlen(s+1) >= sizeof(p)) {
769 			log_err("portnumber too long: %s", ifname);
770 			*noip6 = 0;
771 			return -1;
772 		}
773 		(void)strlcpy(newif, ifname, sizeof(newif));
774 		newif[s-ifname] = 0;
775 		(void)strlcpy(p, s+1, sizeof(p));
776 		p[strlen(s+1)]=0;
777 		return make_sock(stype, newif, p, hints, v6only, noip6,
778 			rcv, snd, reuseport, transparent, tcp_mss);
779 	}
780 	return make_sock(stype, ifname, port, hints, v6only, noip6, rcv, snd,
781 		reuseport, transparent, tcp_mss);
782 }
783 
784 /**
785  * Add port to open ports list.
786  * @param list: list head. changed.
787  * @param s: fd.
788  * @param ftype: if fd is UDP.
789  * @return false on failure. list in unchanged then.
790  */
791 static int
792 port_insert(struct listen_port** list, int s, enum listen_type ftype)
793 {
794 	struct listen_port* item = (struct listen_port*)malloc(
795 		sizeof(struct listen_port));
796 	if(!item)
797 		return 0;
798 	item->next = *list;
799 	item->fd = s;
800 	item->ftype = ftype;
801 	*list = item;
802 	return 1;
803 }
804 
805 /** set fd to receive source address packet info */
806 static int
807 set_recvpktinfo(int s, int family)
808 {
809 #if defined(IPV6_RECVPKTINFO) || defined(IPV6_PKTINFO) || (defined(IP_RECVDSTADDR) && defined(IP_SENDSRCADDR)) || defined(IP_PKTINFO)
810 	int on = 1;
811 #else
812 	(void)s;
813 #endif
814 	if(family == AF_INET6) {
815 #           ifdef IPV6_RECVPKTINFO
816 		if(setsockopt(s, IPPROTO_IPV6, IPV6_RECVPKTINFO,
817 			(void*)&on, (socklen_t)sizeof(on)) < 0) {
818 			log_err("setsockopt(..., IPV6_RECVPKTINFO, ...) failed: %s",
819 				strerror(errno));
820 			return 0;
821 		}
822 #           elif defined(IPV6_PKTINFO)
823 		if(setsockopt(s, IPPROTO_IPV6, IPV6_PKTINFO,
824 			(void*)&on, (socklen_t)sizeof(on)) < 0) {
825 			log_err("setsockopt(..., IPV6_PKTINFO, ...) failed: %s",
826 				strerror(errno));
827 			return 0;
828 		}
829 #           else
830 		log_err("no IPV6_RECVPKTINFO and no IPV6_PKTINFO option, please "
831 			"disable interface-automatic in config");
832 		return 0;
833 #           endif /* defined IPV6_RECVPKTINFO */
834 
835 	} else if(family == AF_INET) {
836 #           ifdef IP_PKTINFO
837 		if(setsockopt(s, IPPROTO_IP, IP_PKTINFO,
838 			(void*)&on, (socklen_t)sizeof(on)) < 0) {
839 			log_err("setsockopt(..., IP_PKTINFO, ...) failed: %s",
840 				strerror(errno));
841 			return 0;
842 		}
843 #           elif defined(IP_RECVDSTADDR) && defined(IP_SENDSRCADDR)
844 		if(setsockopt(s, IPPROTO_IP, IP_RECVDSTADDR,
845 			(void*)&on, (socklen_t)sizeof(on)) < 0) {
846 			log_err("setsockopt(..., IP_RECVDSTADDR, ...) failed: %s",
847 				strerror(errno));
848 			return 0;
849 		}
850 #           else
851 		log_err("no IP_SENDSRCADDR or IP_PKTINFO option, please disable "
852 			"interface-automatic in config");
853 		return 0;
854 #           endif /* IP_PKTINFO */
855 
856 	}
857 	return 1;
858 }
859 
860 /**
861  * Helper for ports_open. Creates one interface (or NULL for default).
862  * @param ifname: The interface ip address.
863  * @param do_auto: use automatic interface detection.
864  * 	If enabled, then ifname must be the wildcard name.
865  * @param do_udp: if udp should be used.
866  * @param do_tcp: if udp should be used.
867  * @param hints: for getaddrinfo. family and flags have to be set by caller.
868  * @param port: Port number to use (as string).
869  * @param list: list of open ports, appended to, changed to point to list head.
870  * @param rcv: receive buffer size for UDP
871  * @param snd: send buffer size for UDP
872  * @param ssl_port: ssl service port number
873  * @param reuseport: try to set SO_REUSEPORT if nonNULL and true.
874  * 	set to false on exit if reuseport failed due to no kernel support.
875  * @param transparent: set IP_TRANSPARENT socket option.
876  * @param tcp_mss: maximum segment size of tcp socket. default if zero.
877  * @return: returns false on error.
878  */
879 static int
880 ports_create_if(const char* ifname, int do_auto, int do_udp, int do_tcp,
881 	struct addrinfo *hints, const char* port, struct listen_port** list,
882 	size_t rcv, size_t snd, int ssl_port, int* reuseport, int transparent,
883 	int tcp_mss)
884 {
885 	int s, noip6=0;
886 	if(!do_udp && !do_tcp)
887 		return 0;
888 	if(do_auto) {
889 		if((s = make_sock_port(SOCK_DGRAM, ifname, port, hints, 1,
890 			&noip6, rcv, snd, reuseport, transparent,
891 			tcp_mss)) == -1) {
892 			if(noip6) {
893 				log_warn("IPv6 protocol not available");
894 				return 1;
895 			}
896 			return 0;
897 		}
898 		/* getting source addr packet info is highly non-portable */
899 		if(!set_recvpktinfo(s, hints->ai_family)) {
900 #ifndef USE_WINSOCK
901 			close(s);
902 #else
903 			closesocket(s);
904 #endif
905 			return 0;
906 		}
907 		if(!port_insert(list, s, listen_type_udpancil)) {
908 #ifndef USE_WINSOCK
909 			close(s);
910 #else
911 			closesocket(s);
912 #endif
913 			return 0;
914 		}
915 	} else if(do_udp) {
916 		/* regular udp socket */
917 		if((s = make_sock_port(SOCK_DGRAM, ifname, port, hints, 1,
918 			&noip6, rcv, snd, reuseport, transparent,
919 			tcp_mss)) == -1) {
920 			if(noip6) {
921 				log_warn("IPv6 protocol not available");
922 				return 1;
923 			}
924 			return 0;
925 		}
926 		if(!port_insert(list, s, listen_type_udp)) {
927 #ifndef USE_WINSOCK
928 			close(s);
929 #else
930 			closesocket(s);
931 #endif
932 			return 0;
933 		}
934 	}
935 	if(do_tcp) {
936 		int is_ssl = ((strchr(ifname, '@') &&
937 			atoi(strchr(ifname, '@')+1) == ssl_port) ||
938 			(!strchr(ifname, '@') && atoi(port) == ssl_port));
939 		if((s = make_sock_port(SOCK_STREAM, ifname, port, hints, 1,
940 			&noip6, 0, 0, reuseport, transparent, tcp_mss)) == -1) {
941 			if(noip6) {
942 				/*log_warn("IPv6 protocol not available");*/
943 				return 1;
944 			}
945 			return 0;
946 		}
947 		if(is_ssl)
948 			verbose(VERB_ALGO, "setup TCP for SSL service");
949 		if(!port_insert(list, s, is_ssl?listen_type_ssl:
950 			listen_type_tcp)) {
951 #ifndef USE_WINSOCK
952 			close(s);
953 #else
954 			closesocket(s);
955 #endif
956 			return 0;
957 		}
958 	}
959 	return 1;
960 }
961 
962 /**
963  * Add items to commpoint list in front.
964  * @param c: commpoint to add.
965  * @param front: listen struct.
966  * @return: false on failure.
967  */
968 static int
969 listen_cp_insert(struct comm_point* c, struct listen_dnsport* front)
970 {
971 	struct listen_list* item = (struct listen_list*)malloc(
972 		sizeof(struct listen_list));
973 	if(!item)
974 		return 0;
975 	item->com = c;
976 	item->next = front->cps;
977 	front->cps = item;
978 	return 1;
979 }
980 
981 struct listen_dnsport*
982 listen_create(struct comm_base* base, struct listen_port* ports,
983 	size_t bufsize, int tcp_accept_count, void* sslctx,
984 	struct dt_env* dtenv, comm_point_callback_t* cb, void *cb_arg)
985 {
986 	struct listen_dnsport* front = (struct listen_dnsport*)
987 		malloc(sizeof(struct listen_dnsport));
988 	if(!front)
989 		return NULL;
990 	front->cps = NULL;
991 	front->udp_buff = sldns_buffer_new(bufsize);
992 	if(!front->udp_buff) {
993 		free(front);
994 		return NULL;
995 	}
996 
997 	/* create comm points as needed */
998 	while(ports) {
999 		struct comm_point* cp = NULL;
1000 		if(ports->ftype == listen_type_udp)
1001 			cp = comm_point_create_udp(base, ports->fd,
1002 				front->udp_buff, cb, cb_arg);
1003 		else if(ports->ftype == listen_type_tcp)
1004 			cp = comm_point_create_tcp(base, ports->fd,
1005 				tcp_accept_count, bufsize, cb, cb_arg);
1006 		else if(ports->ftype == listen_type_ssl) {
1007 			cp = comm_point_create_tcp(base, ports->fd,
1008 				tcp_accept_count, bufsize, cb, cb_arg);
1009 			cp->ssl = sslctx;
1010 		} else if(ports->ftype == listen_type_udpancil)
1011 			cp = comm_point_create_udp_ancil(base, ports->fd,
1012 				front->udp_buff, cb, cb_arg);
1013 		if(!cp) {
1014 			log_err("can't create commpoint");
1015 			listen_delete(front);
1016 			return NULL;
1017 		}
1018 		cp->dtenv = dtenv;
1019 		cp->do_not_close = 1;
1020 		if(!listen_cp_insert(cp, front)) {
1021 			log_err("malloc failed");
1022 			comm_point_delete(cp);
1023 			listen_delete(front);
1024 			return NULL;
1025 		}
1026 		ports = ports->next;
1027 	}
1028 	if(!front->cps) {
1029 		log_err("Could not open sockets to accept queries.");
1030 		listen_delete(front);
1031 		return NULL;
1032 	}
1033 
1034 	return front;
1035 }
1036 
1037 void
1038 listen_list_delete(struct listen_list* list)
1039 {
1040 	struct listen_list *p = list, *pn;
1041 	while(p) {
1042 		pn = p->next;
1043 		comm_point_delete(p->com);
1044 		free(p);
1045 		p = pn;
1046 	}
1047 }
1048 
1049 void
1050 listen_delete(struct listen_dnsport* front)
1051 {
1052 	if(!front)
1053 		return;
1054 	listen_list_delete(front->cps);
1055 	sldns_buffer_free(front->udp_buff);
1056 	free(front);
1057 }
1058 
1059 struct listen_port*
1060 listening_ports_open(struct config_file* cfg, int* reuseport)
1061 {
1062 	struct listen_port* list = NULL;
1063 	struct addrinfo hints;
1064 	int i, do_ip4, do_ip6;
1065 	int do_tcp, do_auto;
1066 	char portbuf[32];
1067 	snprintf(portbuf, sizeof(portbuf), "%d", cfg->port);
1068 	do_ip4 = cfg->do_ip4;
1069 	do_ip6 = cfg->do_ip6;
1070 	do_tcp = cfg->do_tcp;
1071 	do_auto = cfg->if_automatic && cfg->do_udp;
1072 	if(cfg->incoming_num_tcp == 0)
1073 		do_tcp = 0;
1074 
1075 	/* getaddrinfo */
1076 	memset(&hints, 0, sizeof(hints));
1077 	hints.ai_flags = AI_PASSIVE;
1078 	/* no name lookups on our listening ports */
1079 	if(cfg->num_ifs > 0)
1080 		hints.ai_flags |= AI_NUMERICHOST;
1081 	hints.ai_family = AF_UNSPEC;
1082 #ifndef INET6
1083 	do_ip6 = 0;
1084 #endif
1085 	if(!do_ip4 && !do_ip6) {
1086 		return NULL;
1087 	}
1088 	/* create ip4 and ip6 ports so that return addresses are nice. */
1089 	if(do_auto || cfg->num_ifs == 0) {
1090 		if(do_ip6) {
1091 			hints.ai_family = AF_INET6;
1092 			if(!ports_create_if(do_auto?"::0":"::1",
1093 				do_auto, cfg->do_udp, do_tcp,
1094 				&hints, portbuf, &list,
1095 				cfg->so_rcvbuf, cfg->so_sndbuf,
1096 				cfg->ssl_port, reuseport,
1097 				cfg->ip_transparent,
1098 				cfg->tcp_mss)) {
1099 				listening_ports_free(list);
1100 				return NULL;
1101 			}
1102 		}
1103 		if(do_ip4) {
1104 			hints.ai_family = AF_INET;
1105 			if(!ports_create_if(do_auto?"0.0.0.0":"127.0.0.1",
1106 				do_auto, cfg->do_udp, do_tcp,
1107 				&hints, portbuf, &list,
1108 				cfg->so_rcvbuf, cfg->so_sndbuf,
1109 				cfg->ssl_port, reuseport,
1110 				cfg->ip_transparent,
1111 				cfg->tcp_mss)) {
1112 				listening_ports_free(list);
1113 				return NULL;
1114 			}
1115 		}
1116 	} else for(i = 0; i<cfg->num_ifs; i++) {
1117 		if(str_is_ip6(cfg->ifs[i])) {
1118 			if(!do_ip6)
1119 				continue;
1120 			hints.ai_family = AF_INET6;
1121 			if(!ports_create_if(cfg->ifs[i], 0, cfg->do_udp,
1122 				do_tcp, &hints, portbuf, &list,
1123 				cfg->so_rcvbuf, cfg->so_sndbuf,
1124 				cfg->ssl_port, reuseport,
1125 				cfg->ip_transparent,
1126 				cfg->tcp_mss)) {
1127 				listening_ports_free(list);
1128 				return NULL;
1129 			}
1130 		} else {
1131 			if(!do_ip4)
1132 				continue;
1133 			hints.ai_family = AF_INET;
1134 			if(!ports_create_if(cfg->ifs[i], 0, cfg->do_udp,
1135 				do_tcp, &hints, portbuf, &list,
1136 				cfg->so_rcvbuf, cfg->so_sndbuf,
1137 				cfg->ssl_port, reuseport,
1138 				cfg->ip_transparent,
1139 				cfg->tcp_mss)) {
1140 				listening_ports_free(list);
1141 				return NULL;
1142 			}
1143 		}
1144 	}
1145 	return list;
1146 }
1147 
1148 void listening_ports_free(struct listen_port* list)
1149 {
1150 	struct listen_port* nx;
1151 	while(list) {
1152 		nx = list->next;
1153 		if(list->fd != -1) {
1154 #ifndef USE_WINSOCK
1155 			close(list->fd);
1156 #else
1157 			closesocket(list->fd);
1158 #endif
1159 		}
1160 		free(list);
1161 		list = nx;
1162 	}
1163 }
1164 
1165 size_t listen_get_mem(struct listen_dnsport* listen)
1166 {
1167 	size_t s = sizeof(*listen) + sizeof(*listen->base) +
1168 		sizeof(*listen->udp_buff) +
1169 		sldns_buffer_capacity(listen->udp_buff);
1170 	struct listen_list* p;
1171 	for(p = listen->cps; p; p = p->next) {
1172 		s += sizeof(*p);
1173 		s += comm_point_get_mem(p->com);
1174 	}
1175 	return s;
1176 }
1177 
1178 void listen_stop_accept(struct listen_dnsport* listen)
1179 {
1180 	/* do not stop the ones that have no tcp_free list
1181 	 * (they have already stopped listening) */
1182 	struct listen_list* p;
1183 	for(p=listen->cps; p; p=p->next) {
1184 		if(p->com->type == comm_tcp_accept &&
1185 			p->com->tcp_free != NULL) {
1186 			comm_point_stop_listening(p->com);
1187 		}
1188 	}
1189 }
1190 
1191 void listen_start_accept(struct listen_dnsport* listen)
1192 {
1193 	/* do not start the ones that have no tcp_free list, it is no
1194 	 * use to listen to them because they have no free tcp handlers */
1195 	struct listen_list* p;
1196 	for(p=listen->cps; p; p=p->next) {
1197 		if(p->com->type == comm_tcp_accept &&
1198 			p->com->tcp_free != NULL) {
1199 			comm_point_start_listening(p->com, -1, -1);
1200 		}
1201 	}
1202 }
1203 
1204